From bec4d9f13271b59ecb174f02bbada9f3c87b3ec7 Mon Sep 17 00:00:00 2001 From: yuwenboj <1948505032@qq.com> Date: Sat, 10 Apr 2021 22:03:44 +0800 Subject: [PATCH 1/2] =?UTF-8?q?gs=5Fom=E5=88=9D=E5=A7=8B=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- LICENSE | 127 + build.sh | 260 + build/get_PlatForm_str.sh | 37 + other/transfer.py | 198 + script/__init__.py | 0 script/config/leap-seconds.conf | 250 + script/config/logrotate.conf | 56 + script/gs_backup | 251 + script/gs_check | 1764 +++++ script/gs_checkos | 1571 ++++ script/gs_checkperf | 307 + script/gs_collector | 397 ++ script/gs_dropnode | 339 + script/gs_expansion | 368 + script/gs_install | 317 + script/gs_om | 770 ++ script/gs_postuninstall | 414 ++ script/gs_preinstall | 483 ++ script/gs_ssh | 224 + script/gs_sshexkey | 1217 ++++ script/gs_uninstall | 185 + script/gs_upgradectl | 292 + script/gspylib/__init__.py | 0 script/gspylib/common/CheckPythonVersion.py | 43 + script/gspylib/common/CommandLineParser.py | 407 ++ script/gspylib/common/Common.py | 5433 ++++++++++++++ script/gspylib/common/DbClusterInfo.py | 6340 +++++++++++++++++ script/gspylib/common/DbClusterStatus.py | 1009 +++ script/gspylib/common/ErrorCode.py | 1288 ++++ script/gspylib/common/GaussLog.py | 1851 +++++ script/gspylib/common/GaussStat.py | 3045 ++++++++ script/gspylib/common/LocalBaseOM.py | 166 + script/gspylib/common/OMCommand.py | 376 + script/gspylib/common/ParallelBaseOM.py | 945 +++ script/gspylib/common/ParameterParsecheck.py | 680 ++ script/gspylib/common/SSDFaultInfo.py | 204 + script/gspylib/common/Signal.py | 133 + script/gspylib/common/Sql.py | 5653 +++++++++++++++ script/gspylib/common/SqlResult.py | 70 + script/gspylib/common/VersionInfo.py | 126 + script/gspylib/common/__init__.py | 0 script/gspylib/component/BaseComponent.py | 201 + .../component/Kernel/DN_OLAP/DN_OLAP.py | 448 ++ .../component/Kernel/DN_OLAP/__init__.py | 0 script/gspylib/component/Kernel/Kernel.py | 493 ++ script/gspylib/component/Kernel/__init__.py | 0 script/gspylib/component/__init__.py | 0 script/gspylib/etc/conf/check_list.conf | 67 + script/gspylib/etc/conf/check_list_dws.conf | 65 + .../etc/conf/cluster_config_template.xml | 36 + script/gspylib/etc/conf/gs-OS-set.service | 12 + script/gspylib/etc/conf/gs_collector.json | 9 + script/gspylib/etc/conf/guc_cloud_list.xml | 39 + script/gspylib/etc/conf/guc_list.xml | 26 + script/gspylib/etc/controller/gs_install.xml | 0 .../gspylib/etc/controller/gs_preinstall.xml | 75 + script/gspylib/etc/sql/pmk_schema.sql | 3215 +++++++++ .../etc/sql/pmk_schema_single_inst.sql | 2680 +++++++ script/gspylib/etc/sql/test_data_node.sql | 2 + script/gspylib/etc/sql/test_pmk.sql | 17 + script/gspylib/etc/sql/unlock_cluster.sql | 33 + script/gspylib/hardware/__init__.py | 0 script/gspylib/hardware/gscpu.py | 86 + script/gspylib/hardware/gsdisk.py | 298 + script/gspylib/hardware/gsmemory.py | 101 + script/gspylib/inspection/__init__.py | 0 script/gspylib/inspection/common/CheckItem.py | 472 ++ .../gspylib/inspection/common/CheckResult.py | 252 + script/gspylib/inspection/common/Exception.py | 184 + script/gspylib/inspection/common/Log.py | 181 + .../gspylib/inspection/common/ProgressBar.py | 112 + .../gspylib/inspection/common/SharedFuncs.py | 982 +++ script/gspylib/inspection/common/TaskPool.py | 204 + script/gspylib/inspection/common/__init__.py | 0 .../inspection/config/check_list_V1R6C10.conf | 62 + .../inspection/config/check_list_V1R7C00.conf | 181 + script/gspylib/inspection/config/items.xml | 1982 ++++++ .../config/scene_binary_upgrade.xml | 9 + .../inspection/config/scene_health.xml | 22 + .../inspection/config/scene_inspect.xml | 66 + .../inspection/config/scene_install.xml | 56 + .../inspection/config/scene_longtime.xml | 12 + .../inspection/config/scene_slow_node.xml | 19 + .../inspection/config/scene_upgrade.xml | 60 + .../inspection/items/cluster/CheckCatchup.py | 39 + .../items/cluster/CheckClusterState.py | 81 + .../items/cluster/CheckCollector.py | 99 + .../inspection/items/cluster/CheckDBParams.py | 267 + .../items/cluster/CheckDebugSwitch.py | 146 + .../items/cluster/CheckDilateSysTab.py | 95 + .../items/cluster/CheckDirPermissions.py | 187 + .../items/cluster/CheckEnvProfile.py | 151 + .../inspection/items/cluster/CheckGaussVer.py | 47 + .../items/cluster/CheckIntegrity.py | 38 + .../items/cluster/CheckLargeFile.py | 96 + .../items/cluster/CheckMpprcFile.py | 97 + .../items/cluster/CheckPortRange.py | 78 + .../items/cluster/CheckProStartTime.py | 127 + .../items/cluster/CheckProcessStatus.py | 45 + .../items/cluster/CheckReadonlyMode.py | 36 + .../items/cluster/CheckSpecialFile.py | 206 + .../inspection/items/cluster/CheckUpVer.py | 45 + .../items/database/CheckArchiveParameter.py | 70 + .../items/database/CheckCreateView.py | 103 + .../items/database/CheckCurConnCount.py | 52 + .../items/database/CheckCursorNum.py | 33 + .../items/database/CheckDBConnection.py | 61 + .../inspection/items/database/CheckDNSkew.py | 89 + .../items/database/CheckDiskFailure.py | 90 + .../items/database/CheckDropColumn.py | 53 + .../items/database/CheckGUCConsistent.py | 242 + .../items/database/CheckGUCValue.py | 81 + .../items/database/CheckHashIndex.py | 65 + .../items/database/CheckIdleSession.py | 45 + .../inspection/items/database/CheckLockNum.py | 33 + .../items/database/CheckLockState.py | 39 + .../items/database/CheckMaxDatanode.py | 87 + .../items/database/CheckNextvalInDefault.py | 79 + .../items/database/CheckNodeGroupName.py | 72 + .../inspection/items/database/CheckPMKData.py | 46 + .../items/database/CheckPgPreparedXacts.py | 42 + .../items/database/CheckPgxcRedistb.py | 67 + .../items/database/CheckPgxcgroup.py | 52 + .../items/database/CheckReturnType.py | 41 + .../items/database/CheckSysTabSize.py | 144 + .../items/database/CheckSysTable.py | 145 + .../items/database/CheckSysadminUser.py | 40 + .../inspection/items/database/CheckTDDate.py | 74 + .../items/database/CheckTableSkew.py | 139 + .../items/database/CheckTableSpace.py | 113 + .../items/database/CheckUnAnalyzeTable.py | 173 + .../inspection/items/database/CheckXid.py | 39 + .../inspection/items/device/CheckBlockdev.py | 114 + .../items/device/CheckDiskConfig.py | 41 + .../items/device/CheckDiskFormat.py | 69 + .../items/device/CheckIOConfigure.py | 180 + .../items/device/CheckIOrequestqueue.py | 150 + .../items/device/CheckInodeUsage.py | 133 + .../items/device/CheckLogicalBlock.py | 90 + .../items/device/CheckMaxAsyIOrequests.py | 107 + .../inspection/items/device/CheckSlowDisk.py | 72 + .../items/device/CheckSpaceUsage.py | 177 + .../items/device/CheckSwapMemory.py | 82 + .../inspection/items/network/CheckBond.py | 83 + .../inspection/items/network/CheckMTU.py | 95 + .../items/network/CheckMultiQueue.py | 226 + .../inspection/items/network/CheckNICModel.py | 85 + .../inspection/items/network/CheckNetSpeed.py | 243 + .../items/network/CheckNetWorkDrop.py | 162 + .../items/network/CheckNoCheckSum.py | 71 + .../inspection/items/network/CheckPing.py | 55 + .../inspection/items/network/CheckRXTX.py | 104 + .../inspection/items/network/CheckRouting.py | 75 + .../inspection/items/network/CheckUsedPort.py | 83 + .../inspection/items/os/CheckBootItems.py | 47 + .../gspylib/inspection/items/os/CheckCPU.py | 70 + .../inspection/items/os/CheckCpuCount.py | 58 + .../inspection/items/os/CheckCrondService.py | 49 + .../inspection/items/os/CheckCrontabLeft.py | 86 + .../inspection/items/os/CheckDirLeft.py | 87 + .../inspection/items/os/CheckDropCache.py | 40 + .../inspection/items/os/CheckEncoding.py | 35 + .../inspection/items/os/CheckEtcHosts.py | 91 + .../inspection/items/os/CheckFilehandle.py | 120 + .../inspection/items/os/CheckFirewall.py | 67 + .../inspection/items/os/CheckHyperThread.py | 57 + .../inspection/items/os/CheckKernelVer.py | 35 + .../inspection/items/os/CheckKeyProAdj.py | 59 + .../inspection/items/os/CheckMaxHandle.py | 108 + .../inspection/items/os/CheckMaxProcMemory.py | 166 + .../inspection/items/os/CheckMemInfo.py | 32 + .../gspylib/inspection/items/os/CheckNTPD.py | 122 + .../gspylib/inspection/items/os/CheckOSVer.py | 106 + .../inspection/items/os/CheckOmmUserExist.py | 46 + .../inspection/items/os/CheckPortConflict.py | 65 + .../inspection/items/os/CheckProcMem.py | 114 + .../inspection/items/os/CheckProcessLeft.py | 58 + .../inspection/items/os/CheckSctpService.py | 108 + .../inspection/items/os/CheckSshdConfig.py | 118 + .../inspection/items/os/CheckSshdService.py | 35 + .../gspylib/inspection/items/os/CheckStack.py | 83 + .../inspection/items/os/CheckSysParams.py | 215 + .../inspection/items/os/CheckSysPortRange.py | 61 + .../gspylib/inspection/items/os/CheckTHP.py | 70 + .../inspection/items/os/CheckTimeZone.py | 32 + .../items/other/CheckDataDiskUsage.py | 86 + .../items/other/CheckInstallDiskUsage.py | 65 + .../items/other/CheckLogDiskUsage.py | 68 + .../items/other/CheckTmpDiskUsage.py | 69 + script/gspylib/inspection/lib/__init__.py | 0 .../inspection/lib/checkblacklist/DBCheck.sh | 142 + .../lib/checkblacklist/ExecuteSQLOnAllDB.sh | 100 + .../checkblacklist/GetDroppedColumnTable.sql | 40 + .../lib/checkblacklist/GetInheritTable.sql | 20 + .../lib/checkblacklist/GetSequenceName.sql | 6 + .../checkblacklist/GetTableRecurrentGrant.sql | 46 + .../lib/checkblacklist/GetTableSkew.sql | 98 + .../lib/checkblacklist/GetTableUseRule.sql | 15 + .../lib/checkblacklist/GetTableUseToGroup.sql | 16 + .../lib/checkblacklist/GetTableUseTonode.sql | 16 + .../GetTableUseUnsupportConstraint.sql | 18 + .../GetTableUseUnsupportIndex.sql | 5 + .../GetTableUseUnsupportLocatortype.sql | 24 + .../lib/checkblacklist/GetTableWithOids.sql | 6 + .../GetTable_ProcUseUnsupportDataType.sql | 57 + .../GetTable_unsupportHDFSForeignTable.sql | 16 + .../checkblacklist/GetThirdPartExtension.sql | 7 + .../GetUserDefinedAggregate.sql | 6 + .../GetUserDefinedConversion.sql | 7 + .../checkblacklist/GetUserDefinedDataType.sql | 19 + .../checkblacklist/GetUserDefinedLanguage.sql | 1 + .../GetUserDefinedNodeGroup.sql | 5 + .../inspection/lib/checkblacklist/Note.txt | 29 + .../lib/checkblacklist/blacklist_check.sql | 263 + .../lib/checkcollector/databaseinfo.sql | 15 + .../lib/checkcollector/dndatabaseinfo.sql | 4 + .../lib/checkcollector/getClusterInfo.sh | 252 + .../lib/checkcollector/getOSInfo.sh | 65 + .../inspection/lib/checkcollector/hostfile | 0 .../inspection/lib/checkcollector/readme.txt | 12 + .../lib/checkcreateview/check_viewdef.sql | 5 + .../inspection/lib/checknetspeed/speed_test | 103 + script/gspylib/inspection/readme.txt | 52 + script/gspylib/os/__init__.py | 0 script/gspylib/os/gsOSlib.py | 562 ++ script/gspylib/os/gsfile.py | 977 +++ script/gspylib/os/gsnetwork.py | 226 + script/gspylib/os/gsplatform.py | 1999 ++++++ script/gspylib/os/gsservice.py | 106 + script/gspylib/os/gssysctl.py | 102 + script/gspylib/pssh/bin/TaskPool.py | 335 + script/gspylib/pssh/bin/pscp | 179 + script/gspylib/pssh/bin/pssh | 182 + script/gspylib/threads/SshTool.py | 860 +++ script/gspylib/threads/__init__.py | 0 script/gspylib/threads/parallelTool.py | 95 + script/impl/__init__.py | 0 script/impl/backup/BackupImpl.py | 86 + script/impl/backup/OLAP/BackupImplOLAP.py | 223 + script/impl/backup/OLAP/__init__.py | 0 script/impl/backup/__init__.py | 0 script/impl/checkperf/CheckperfImpl.py | 130 + .../impl/checkperf/OLAP/CheckperfImplOLAP.py | 1842 +++++ script/impl/checkperf/OLAP/__init__.py | 0 script/impl/checkperf/__init__.py | 0 script/impl/collect/CollectImpl.py | 65 + script/impl/collect/OLAP/CollectImplOLAP.py | 927 +++ script/impl/collect/OLAP/__init__.py | 0 script/impl/collect/__init__.py | 0 script/impl/dropnode/DropnodeImpl.py | 797 +++ script/impl/dropnode/__init__.py | 0 script/impl/expansion/ExpansionImpl.py | 1519 ++++ script/impl/expansion/__init__.py | 0 script/impl/install/InstallImpl.py | 583 ++ script/impl/install/OLAP/InstallImplOLAP.py | 490 ++ script/impl/install/OLAP/__init__.py | 0 script/impl/install/__init__.py | 0 script/impl/om/OLAP/OmImplOLAP.py | 350 + script/impl/om/OLAP/__init__.py | 0 script/impl/om/OmImpl.py | 949 +++ script/impl/om/__init__.py | 0 .../OLAP/PostUninstallImplOLAP.py | 58 + script/impl/postuninstall/OLAP/__init__.py | 0 .../impl/postuninstall/PostUninstallImpl.py | 781 ++ script/impl/postuninstall/__init__.py | 0 .../preinstall/OLAP/PreinstallImplOLAP.py | 609 ++ script/impl/preinstall/OLAP/__init__.py | 0 script/impl/preinstall/PreinstallImpl.py | 1941 +++++ script/impl/preinstall/__init__.py | 0 .../impl/uninstall/OLAP/UninstallImplOLAP.py | 96 + script/impl/uninstall/OLAP/__init__.py | 0 script/impl/uninstall/UninstallImpl.py | 362 + script/impl/uninstall/__init__.py | 0 script/impl/upgrade/OLAP/UpgradeImplOLAP.py | 34 + script/impl/upgrade/OLAP/__init__.py | 0 script/impl/upgrade/UpgradeConst.py | 162 + script/impl/upgrade/UpgradeImpl.py | 5842 +++++++++++++++ script/impl/upgrade/__init__.py | 0 script/killall | 169 + script/local/Backup.py | 544 ++ script/local/CheckConfig.py | 397 ++ script/local/CheckInstall.py | 640 ++ script/local/CheckPreInstall.py | 115 + script/local/CheckUninstall.py | 248 + script/local/CheckUpgrade.py | 549 ++ script/local/CleanInstance.py | 290 + script/local/CleanOsUser.py | 210 + script/local/ConfigHba.py | 271 + script/local/ConfigInstance.py | 334 + script/local/CreatePath.py | 175 + script/local/ExecuteSql.py | 138 + script/local/InitInstance.py | 243 + script/local/Install.py | 693 ++ script/local/KerberosUtility.py | 1310 ++++ script/local/LocalCheck.py | 804 +++ script/local/LocalCheckOS.py | 2316 ++++++ script/local/LocalCollect.py | 1674 +++++ script/local/LocalPerformanceCheck.py | 275 + script/local/PreInstallUtility.py | 2979 ++++++++ script/local/Resetreplconninfo.py | 199 + script/local/Restore.py | 533 ++ script/local/StartInstance.py | 168 + script/local/StopInstance.py | 159 + script/local/UnPreInstallUtility.py | 732 ++ script/local/Uninstall.py | 350 + script/local/UpgradeUtility.py | 4105 +++++++++++ script/local/__init__.py | 0 simpleInstall/README.md | 154 + simpleInstall/common.sh | 73 + simpleInstall/finance.sql | 147 + simpleInstall/install.sh | 412 ++ simpleInstall/one_master_one_slave.sh | 400 ++ .../one_master_one_slave_template.xml | 33 + simpleInstall/requirements_centos_x86_64 | 14 + simpleInstall/requirements_openEuler_aarch64 | 12 + simpleInstall/requirements_openEuler_x86_64 | 13 + simpleInstall/school.sql | 214 + simpleInstall/template.xml | 28 + 318 files changed, 106928 insertions(+) create mode 100644 LICENSE create mode 100644 build.sh create mode 100644 build/get_PlatForm_str.sh create mode 100644 other/transfer.py create mode 100644 script/__init__.py create mode 100644 script/config/leap-seconds.conf create mode 100644 script/config/logrotate.conf create mode 100644 script/gs_backup create mode 100644 script/gs_check create mode 100644 script/gs_checkos create mode 100644 script/gs_checkperf create mode 100644 script/gs_collector create mode 100644 script/gs_dropnode create mode 100644 script/gs_expansion create mode 100644 script/gs_install create mode 100644 script/gs_om create mode 100644 script/gs_postuninstall create mode 100644 script/gs_preinstall create mode 100644 script/gs_ssh create mode 100644 script/gs_sshexkey create mode 100644 script/gs_uninstall create mode 100644 script/gs_upgradectl create mode 100644 script/gspylib/__init__.py create mode 100644 script/gspylib/common/CheckPythonVersion.py create mode 100644 script/gspylib/common/CommandLineParser.py create mode 100644 script/gspylib/common/Common.py create mode 100644 script/gspylib/common/DbClusterInfo.py create mode 100644 script/gspylib/common/DbClusterStatus.py create mode 100644 script/gspylib/common/ErrorCode.py create mode 100644 script/gspylib/common/GaussLog.py create mode 100644 script/gspylib/common/GaussStat.py create mode 100644 script/gspylib/common/LocalBaseOM.py create mode 100644 script/gspylib/common/OMCommand.py create mode 100644 script/gspylib/common/ParallelBaseOM.py create mode 100644 script/gspylib/common/ParameterParsecheck.py create mode 100644 script/gspylib/common/SSDFaultInfo.py create mode 100644 script/gspylib/common/Signal.py create mode 100644 script/gspylib/common/Sql.py create mode 100644 script/gspylib/common/SqlResult.py create mode 100644 script/gspylib/common/VersionInfo.py create mode 100644 script/gspylib/common/__init__.py create mode 100644 script/gspylib/component/BaseComponent.py create mode 100644 script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py create mode 100644 script/gspylib/component/Kernel/DN_OLAP/__init__.py create mode 100644 script/gspylib/component/Kernel/Kernel.py create mode 100644 script/gspylib/component/Kernel/__init__.py create mode 100644 script/gspylib/component/__init__.py create mode 100644 script/gspylib/etc/conf/check_list.conf create mode 100644 script/gspylib/etc/conf/check_list_dws.conf create mode 100644 script/gspylib/etc/conf/cluster_config_template.xml create mode 100644 script/gspylib/etc/conf/gs-OS-set.service create mode 100644 script/gspylib/etc/conf/gs_collector.json create mode 100644 script/gspylib/etc/conf/guc_cloud_list.xml create mode 100644 script/gspylib/etc/conf/guc_list.xml create mode 100644 script/gspylib/etc/controller/gs_install.xml create mode 100644 script/gspylib/etc/controller/gs_preinstall.xml create mode 100644 script/gspylib/etc/sql/pmk_schema.sql create mode 100644 script/gspylib/etc/sql/pmk_schema_single_inst.sql create mode 100644 script/gspylib/etc/sql/test_data_node.sql create mode 100644 script/gspylib/etc/sql/test_pmk.sql create mode 100644 script/gspylib/etc/sql/unlock_cluster.sql create mode 100644 script/gspylib/hardware/__init__.py create mode 100644 script/gspylib/hardware/gscpu.py create mode 100644 script/gspylib/hardware/gsdisk.py create mode 100644 script/gspylib/hardware/gsmemory.py create mode 100644 script/gspylib/inspection/__init__.py create mode 100644 script/gspylib/inspection/common/CheckItem.py create mode 100644 script/gspylib/inspection/common/CheckResult.py create mode 100644 script/gspylib/inspection/common/Exception.py create mode 100644 script/gspylib/inspection/common/Log.py create mode 100644 script/gspylib/inspection/common/ProgressBar.py create mode 100644 script/gspylib/inspection/common/SharedFuncs.py create mode 100644 script/gspylib/inspection/common/TaskPool.py create mode 100644 script/gspylib/inspection/common/__init__.py create mode 100644 script/gspylib/inspection/config/check_list_V1R6C10.conf create mode 100644 script/gspylib/inspection/config/check_list_V1R7C00.conf create mode 100644 script/gspylib/inspection/config/items.xml create mode 100644 script/gspylib/inspection/config/scene_binary_upgrade.xml create mode 100644 script/gspylib/inspection/config/scene_health.xml create mode 100644 script/gspylib/inspection/config/scene_inspect.xml create mode 100644 script/gspylib/inspection/config/scene_install.xml create mode 100644 script/gspylib/inspection/config/scene_longtime.xml create mode 100644 script/gspylib/inspection/config/scene_slow_node.xml create mode 100644 script/gspylib/inspection/config/scene_upgrade.xml create mode 100644 script/gspylib/inspection/items/cluster/CheckCatchup.py create mode 100644 script/gspylib/inspection/items/cluster/CheckClusterState.py create mode 100644 script/gspylib/inspection/items/cluster/CheckCollector.py create mode 100644 script/gspylib/inspection/items/cluster/CheckDBParams.py create mode 100644 script/gspylib/inspection/items/cluster/CheckDebugSwitch.py create mode 100644 script/gspylib/inspection/items/cluster/CheckDilateSysTab.py create mode 100644 script/gspylib/inspection/items/cluster/CheckDirPermissions.py create mode 100644 script/gspylib/inspection/items/cluster/CheckEnvProfile.py create mode 100644 script/gspylib/inspection/items/cluster/CheckGaussVer.py create mode 100644 script/gspylib/inspection/items/cluster/CheckIntegrity.py create mode 100644 script/gspylib/inspection/items/cluster/CheckLargeFile.py create mode 100644 script/gspylib/inspection/items/cluster/CheckMpprcFile.py create mode 100644 script/gspylib/inspection/items/cluster/CheckPortRange.py create mode 100644 script/gspylib/inspection/items/cluster/CheckProStartTime.py create mode 100644 script/gspylib/inspection/items/cluster/CheckProcessStatus.py create mode 100644 script/gspylib/inspection/items/cluster/CheckReadonlyMode.py create mode 100644 script/gspylib/inspection/items/cluster/CheckSpecialFile.py create mode 100644 script/gspylib/inspection/items/cluster/CheckUpVer.py create mode 100644 script/gspylib/inspection/items/database/CheckArchiveParameter.py create mode 100644 script/gspylib/inspection/items/database/CheckCreateView.py create mode 100644 script/gspylib/inspection/items/database/CheckCurConnCount.py create mode 100644 script/gspylib/inspection/items/database/CheckCursorNum.py create mode 100644 script/gspylib/inspection/items/database/CheckDBConnection.py create mode 100644 script/gspylib/inspection/items/database/CheckDNSkew.py create mode 100644 script/gspylib/inspection/items/database/CheckDiskFailure.py create mode 100644 script/gspylib/inspection/items/database/CheckDropColumn.py create mode 100644 script/gspylib/inspection/items/database/CheckGUCConsistent.py create mode 100644 script/gspylib/inspection/items/database/CheckGUCValue.py create mode 100644 script/gspylib/inspection/items/database/CheckHashIndex.py create mode 100644 script/gspylib/inspection/items/database/CheckIdleSession.py create mode 100644 script/gspylib/inspection/items/database/CheckLockNum.py create mode 100644 script/gspylib/inspection/items/database/CheckLockState.py create mode 100644 script/gspylib/inspection/items/database/CheckMaxDatanode.py create mode 100644 script/gspylib/inspection/items/database/CheckNextvalInDefault.py create mode 100644 script/gspylib/inspection/items/database/CheckNodeGroupName.py create mode 100644 script/gspylib/inspection/items/database/CheckPMKData.py create mode 100644 script/gspylib/inspection/items/database/CheckPgPreparedXacts.py create mode 100644 script/gspylib/inspection/items/database/CheckPgxcRedistb.py create mode 100644 script/gspylib/inspection/items/database/CheckPgxcgroup.py create mode 100644 script/gspylib/inspection/items/database/CheckReturnType.py create mode 100644 script/gspylib/inspection/items/database/CheckSysTabSize.py create mode 100644 script/gspylib/inspection/items/database/CheckSysTable.py create mode 100644 script/gspylib/inspection/items/database/CheckSysadminUser.py create mode 100644 script/gspylib/inspection/items/database/CheckTDDate.py create mode 100644 script/gspylib/inspection/items/database/CheckTableSkew.py create mode 100644 script/gspylib/inspection/items/database/CheckTableSpace.py create mode 100644 script/gspylib/inspection/items/database/CheckUnAnalyzeTable.py create mode 100644 script/gspylib/inspection/items/database/CheckXid.py create mode 100644 script/gspylib/inspection/items/device/CheckBlockdev.py create mode 100644 script/gspylib/inspection/items/device/CheckDiskConfig.py create mode 100644 script/gspylib/inspection/items/device/CheckDiskFormat.py create mode 100644 script/gspylib/inspection/items/device/CheckIOConfigure.py create mode 100644 script/gspylib/inspection/items/device/CheckIOrequestqueue.py create mode 100644 script/gspylib/inspection/items/device/CheckInodeUsage.py create mode 100644 script/gspylib/inspection/items/device/CheckLogicalBlock.py create mode 100644 script/gspylib/inspection/items/device/CheckMaxAsyIOrequests.py create mode 100644 script/gspylib/inspection/items/device/CheckSlowDisk.py create mode 100644 script/gspylib/inspection/items/device/CheckSpaceUsage.py create mode 100644 script/gspylib/inspection/items/device/CheckSwapMemory.py create mode 100644 script/gspylib/inspection/items/network/CheckBond.py create mode 100644 script/gspylib/inspection/items/network/CheckMTU.py create mode 100644 script/gspylib/inspection/items/network/CheckMultiQueue.py create mode 100644 script/gspylib/inspection/items/network/CheckNICModel.py create mode 100644 script/gspylib/inspection/items/network/CheckNetSpeed.py create mode 100644 script/gspylib/inspection/items/network/CheckNetWorkDrop.py create mode 100644 script/gspylib/inspection/items/network/CheckNoCheckSum.py create mode 100644 script/gspylib/inspection/items/network/CheckPing.py create mode 100644 script/gspylib/inspection/items/network/CheckRXTX.py create mode 100644 script/gspylib/inspection/items/network/CheckRouting.py create mode 100644 script/gspylib/inspection/items/network/CheckUsedPort.py create mode 100644 script/gspylib/inspection/items/os/CheckBootItems.py create mode 100644 script/gspylib/inspection/items/os/CheckCPU.py create mode 100644 script/gspylib/inspection/items/os/CheckCpuCount.py create mode 100644 script/gspylib/inspection/items/os/CheckCrondService.py create mode 100644 script/gspylib/inspection/items/os/CheckCrontabLeft.py create mode 100644 script/gspylib/inspection/items/os/CheckDirLeft.py create mode 100644 script/gspylib/inspection/items/os/CheckDropCache.py create mode 100644 script/gspylib/inspection/items/os/CheckEncoding.py create mode 100644 script/gspylib/inspection/items/os/CheckEtcHosts.py create mode 100644 script/gspylib/inspection/items/os/CheckFilehandle.py create mode 100644 script/gspylib/inspection/items/os/CheckFirewall.py create mode 100644 script/gspylib/inspection/items/os/CheckHyperThread.py create mode 100644 script/gspylib/inspection/items/os/CheckKernelVer.py create mode 100644 script/gspylib/inspection/items/os/CheckKeyProAdj.py create mode 100644 script/gspylib/inspection/items/os/CheckMaxHandle.py create mode 100644 script/gspylib/inspection/items/os/CheckMaxProcMemory.py create mode 100644 script/gspylib/inspection/items/os/CheckMemInfo.py create mode 100644 script/gspylib/inspection/items/os/CheckNTPD.py create mode 100644 script/gspylib/inspection/items/os/CheckOSVer.py create mode 100644 script/gspylib/inspection/items/os/CheckOmmUserExist.py create mode 100644 script/gspylib/inspection/items/os/CheckPortConflict.py create mode 100644 script/gspylib/inspection/items/os/CheckProcMem.py create mode 100644 script/gspylib/inspection/items/os/CheckProcessLeft.py create mode 100644 script/gspylib/inspection/items/os/CheckSctpService.py create mode 100644 script/gspylib/inspection/items/os/CheckSshdConfig.py create mode 100644 script/gspylib/inspection/items/os/CheckSshdService.py create mode 100644 script/gspylib/inspection/items/os/CheckStack.py create mode 100644 script/gspylib/inspection/items/os/CheckSysParams.py create mode 100644 script/gspylib/inspection/items/os/CheckSysPortRange.py create mode 100644 script/gspylib/inspection/items/os/CheckTHP.py create mode 100644 script/gspylib/inspection/items/os/CheckTimeZone.py create mode 100644 script/gspylib/inspection/items/other/CheckDataDiskUsage.py create mode 100644 script/gspylib/inspection/items/other/CheckInstallDiskUsage.py create mode 100644 script/gspylib/inspection/items/other/CheckLogDiskUsage.py create mode 100644 script/gspylib/inspection/items/other/CheckTmpDiskUsage.py create mode 100644 script/gspylib/inspection/lib/__init__.py create mode 100644 script/gspylib/inspection/lib/checkblacklist/DBCheck.sh create mode 100644 script/gspylib/inspection/lib/checkblacklist/ExecuteSQLOnAllDB.sh create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetDroppedColumnTable.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetInheritTable.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetSequenceName.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableRecurrentGrant.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableSkew.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableUseRule.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableUseToGroup.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableUseTonode.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportConstraint.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportIndex.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportLocatortype.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTableWithOids.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTable_ProcUseUnsupportDataType.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetTable_unsupportHDFSForeignTable.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetThirdPartExtension.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetUserDefinedAggregate.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetUserDefinedConversion.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetUserDefinedDataType.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetUserDefinedLanguage.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/GetUserDefinedNodeGroup.sql create mode 100644 script/gspylib/inspection/lib/checkblacklist/Note.txt create mode 100644 script/gspylib/inspection/lib/checkblacklist/blacklist_check.sql create mode 100644 script/gspylib/inspection/lib/checkcollector/databaseinfo.sql create mode 100644 script/gspylib/inspection/lib/checkcollector/dndatabaseinfo.sql create mode 100644 script/gspylib/inspection/lib/checkcollector/getClusterInfo.sh create mode 100644 script/gspylib/inspection/lib/checkcollector/getOSInfo.sh create mode 100644 script/gspylib/inspection/lib/checkcollector/hostfile create mode 100644 script/gspylib/inspection/lib/checkcollector/readme.txt create mode 100644 script/gspylib/inspection/lib/checkcreateview/check_viewdef.sql create mode 100644 script/gspylib/inspection/lib/checknetspeed/speed_test create mode 100644 script/gspylib/inspection/readme.txt create mode 100644 script/gspylib/os/__init__.py create mode 100644 script/gspylib/os/gsOSlib.py create mode 100644 script/gspylib/os/gsfile.py create mode 100644 script/gspylib/os/gsnetwork.py create mode 100644 script/gspylib/os/gsplatform.py create mode 100644 script/gspylib/os/gsservice.py create mode 100644 script/gspylib/os/gssysctl.py create mode 100644 script/gspylib/pssh/bin/TaskPool.py create mode 100644 script/gspylib/pssh/bin/pscp create mode 100644 script/gspylib/pssh/bin/pssh create mode 100644 script/gspylib/threads/SshTool.py create mode 100644 script/gspylib/threads/__init__.py create mode 100644 script/gspylib/threads/parallelTool.py create mode 100644 script/impl/__init__.py create mode 100644 script/impl/backup/BackupImpl.py create mode 100644 script/impl/backup/OLAP/BackupImplOLAP.py create mode 100644 script/impl/backup/OLAP/__init__.py create mode 100644 script/impl/backup/__init__.py create mode 100644 script/impl/checkperf/CheckperfImpl.py create mode 100644 script/impl/checkperf/OLAP/CheckperfImplOLAP.py create mode 100644 script/impl/checkperf/OLAP/__init__.py create mode 100644 script/impl/checkperf/__init__.py create mode 100644 script/impl/collect/CollectImpl.py create mode 100644 script/impl/collect/OLAP/CollectImplOLAP.py create mode 100644 script/impl/collect/OLAP/__init__.py create mode 100644 script/impl/collect/__init__.py create mode 100644 script/impl/dropnode/DropnodeImpl.py create mode 100644 script/impl/dropnode/__init__.py create mode 100644 script/impl/expansion/ExpansionImpl.py create mode 100644 script/impl/expansion/__init__.py create mode 100644 script/impl/install/InstallImpl.py create mode 100644 script/impl/install/OLAP/InstallImplOLAP.py create mode 100644 script/impl/install/OLAP/__init__.py create mode 100644 script/impl/install/__init__.py create mode 100644 script/impl/om/OLAP/OmImplOLAP.py create mode 100644 script/impl/om/OLAP/__init__.py create mode 100644 script/impl/om/OmImpl.py create mode 100644 script/impl/om/__init__.py create mode 100644 script/impl/postuninstall/OLAP/PostUninstallImplOLAP.py create mode 100644 script/impl/postuninstall/OLAP/__init__.py create mode 100644 script/impl/postuninstall/PostUninstallImpl.py create mode 100644 script/impl/postuninstall/__init__.py create mode 100644 script/impl/preinstall/OLAP/PreinstallImplOLAP.py create mode 100644 script/impl/preinstall/OLAP/__init__.py create mode 100644 script/impl/preinstall/PreinstallImpl.py create mode 100644 script/impl/preinstall/__init__.py create mode 100644 script/impl/uninstall/OLAP/UninstallImplOLAP.py create mode 100644 script/impl/uninstall/OLAP/__init__.py create mode 100644 script/impl/uninstall/UninstallImpl.py create mode 100644 script/impl/uninstall/__init__.py create mode 100644 script/impl/upgrade/OLAP/UpgradeImplOLAP.py create mode 100644 script/impl/upgrade/OLAP/__init__.py create mode 100644 script/impl/upgrade/UpgradeConst.py create mode 100644 script/impl/upgrade/UpgradeImpl.py create mode 100644 script/impl/upgrade/__init__.py create mode 100644 script/killall create mode 100644 script/local/Backup.py create mode 100644 script/local/CheckConfig.py create mode 100644 script/local/CheckInstall.py create mode 100644 script/local/CheckPreInstall.py create mode 100644 script/local/CheckUninstall.py create mode 100644 script/local/CheckUpgrade.py create mode 100644 script/local/CleanInstance.py create mode 100644 script/local/CleanOsUser.py create mode 100644 script/local/ConfigHba.py create mode 100644 script/local/ConfigInstance.py create mode 100644 script/local/CreatePath.py create mode 100644 script/local/ExecuteSql.py create mode 100644 script/local/InitInstance.py create mode 100644 script/local/Install.py create mode 100644 script/local/KerberosUtility.py create mode 100644 script/local/LocalCheck.py create mode 100644 script/local/LocalCheckOS.py create mode 100644 script/local/LocalCollect.py create mode 100644 script/local/LocalPerformanceCheck.py create mode 100644 script/local/PreInstallUtility.py create mode 100644 script/local/Resetreplconninfo.py create mode 100644 script/local/Restore.py create mode 100644 script/local/StartInstance.py create mode 100644 script/local/StopInstance.py create mode 100644 script/local/UnPreInstallUtility.py create mode 100644 script/local/Uninstall.py create mode 100644 script/local/UpgradeUtility.py create mode 100644 script/local/__init__.py create mode 100644 simpleInstall/README.md create mode 100644 simpleInstall/common.sh create mode 100644 simpleInstall/finance.sql create mode 100644 simpleInstall/install.sh create mode 100644 simpleInstall/one_master_one_slave.sh create mode 100644 simpleInstall/one_master_one_slave_template.xml create mode 100644 simpleInstall/requirements_centos_x86_64 create mode 100644 simpleInstall/requirements_openEuler_aarch64 create mode 100644 simpleInstall/requirements_openEuler_x86_64 create mode 100644 simpleInstall/school.sql create mode 100644 simpleInstall/template.xml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9e32cde --- /dev/null +++ b/LICENSE @@ -0,0 +1,127 @@ + 木兰宽松许可证, 第2版 + + 木兰宽松许可证, 第2版 + 2020年1月 http://license.coscl.org.cn/MulanPSL2 + + + 您对“软件”的复制、使用、修改及分发受木兰宽松许可证,第2版(“本许可证”)的如下条款的约束: + + 0. 定义 + + “软件”是指由“贡献”构成的许可在“本许可证”下的程序和相关文档的集合。 + + “贡献”是指由任一“贡献者”许可在“本许可证”下的受版权法保护的作品。 + + “贡献者”是指将受版权法保护的作品许可在“本许可证”下的自然人或“法人实体”。 + + “法人实体”是指提交贡献的机构及其“关联实体”。 + + “关联实体”是指,对“本许可证”下的行为方而言,控制、受控制或与其共同受控制的机构,此处的控制是指有受控方或共同受控方至少50%直接或间接的投票权、资金或其他有价证券。 + + 1. 授予版权许可 + + 每个“贡献者”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的版权许可,您可以复制、使用、修改、分发其“贡献”,不论修改与否。 + + 2. 授予专利许可 + + 每个“贡献者”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的(根据本条规定撤销除外)专利许可,供您制造、委托制造、使用、许诺销售、销售、进口其“贡献”或以其他方式转移其“贡献”。前述专利许可仅限于“贡献者”现在或将来拥有或控制的其“贡献”本身或其“贡献”与许可“贡献”时的“软件”结合而将必然会侵犯的专利权利要求,不包括对“贡献”的修改或包含“贡献”的其他结合。如果您或您的“关联实体”直接或间接地,就“软件”或其中的“贡献”对任何人发起专利侵权诉讼(包括反诉或交叉诉讼)或其他专利维权行动,指控其侵犯专利权,则“本许可证”授予您对“软件”的专利许可自您提起诉讼或发起维权行动之日终止。 + + 3. 无商标许可 + + “本许可证”不提供对“贡献者”的商品名称、商标、服务标志或产品名称的商标许可,但您为满足第4条规定的声明义务而必须使用除外。 + + 4. 分发限制 + + 您可以在任何媒介中将“软件”以源程序形式或可执行形式重新分发,不论修改与否,但您必须向接收者提供“本许可证”的副本,并保留“软件”中的版权、商标、专利及免责声明。 + + 5. 免责声明与责任限制 + + “软件”及其中的“贡献”在提供时不带任何明示或默示的担保。在任何情况下,“贡献者”或版权所有者不对任何人因使用“软件”或其中的“贡献”而引发的任何直接或间接损失承担责任,不论因何种原因导致或者基于何种法律理论,即使其曾被建议有此种损失的可能性。 + + 6. 语言 + “本许可证”以中英文双语表述,中英文版本具有同等法律效力。如果中英文版本存在任何冲突不一致,以中文版为准。 + + 条款结束 + + 如何将木兰宽松许可证,第2版,应用到您的软件 + + 如果您希望将木兰宽松许可证,第2版,应用到您的新软件,为了方便接收者查阅,建议您完成如下三步: + + 1, 请您补充如下声明中的空白,包括软件名、软件的首次发表年份以及您作为版权人的名字; + + 2, 请您在软件包的一级目录下创建以“LICENSE”为名的文件,将整个许可证文本放入该文件中; + + 3, 请将如下声明文本放入每个源文件的头部注释中。 + + Copyright (c) [Year] [name of copyright holder] + [Software Name] is licensed under Mulan PSL v2. + You can use this software according to the terms and conditions of the Mulan PSL v2. + You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 + THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + See the Mulan PSL v2 for more details. + + + Mulan Permissive Software License,Version 2 + + Mulan Permissive Software License,Version 2 (Mulan PSL v2) + January 2020 http://license.coscl.org.cn/MulanPSL2 + + Your reproduction, use, modification and distribution of the Software shall be subject to Mulan PSL v2 (this License) with the following terms and conditions: + + 0. Definition + + Software means the program and related documents which are licensed under this License and comprise all Contribution(s). + + Contribution means the copyrightable work licensed by a particular Contributor under this License. + + Contributor means the Individual or Legal Entity who licenses its copyrightable work under this License. + + Legal Entity means the entity making a Contribution and all its Affiliates. + + Affiliates means entities that control, are controlled by, or are under common control with the acting entity under this License, ‘control’ means direct or indirect ownership of at least fifty percent (50%) of the voting power, capital or other securities of controlled or commonly controlled entity. + + 1. Grant of Copyright License + + Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable copyright license to reproduce, use, modify, or distribute its Contribution, with modification or not. + + 2. Grant of Patent License + + Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable (except for revocation under this Section) patent license to make, have made, use, offer for sale, sell, import or otherwise transfer its Contribution, where such patent license is only limited to the patent claims owned or controlled by such Contributor now or in future which will be necessarily infringed by its Contribution alone, or by combination of the Contribution with the Software to which the Contribution was contributed. The patent license shall not apply to any modification of the Contribution, and any other combination which includes the Contribution. If you or your Affiliates directly or indirectly institute patent litigation (including a cross claim or counterclaim in a litigation) or other patent enforcement activities against any individual or entity by alleging that the Software or any Contribution in it infringes patents, then any patent license granted to you under this License for the Software shall terminate as of the date such litigation or activity is filed or taken. + + 3. No Trademark License + + No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, except as required to fulfill notice requirements in Section 4. + + 4. Distribution Restriction + + You may distribute the Software in any medium with or without modification, whether in source or executable forms, provided that you provide recipients with a copy of this License and retain copyright, patent, trademark and disclaimer statements in the Software. + + 5. Disclaimer of Warranty and Limitation of Liability + + THE SOFTWARE AND CONTRIBUTION IN IT ARE PROVIDED WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED. IN NO EVENT SHALL ANY CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM YOUR USE OR INABILITY TO USE THE SOFTWARE OR THE CONTRIBUTION IN IT, NO MATTER HOW IT’S CAUSED OR BASED ON WHICH LEGAL THEORY, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + + 6. Language + + THIS LICENSE IS WRITTEN IN BOTH CHINESE AND ENGLISH, AND THE CHINESE VERSION AND ENGLISH VERSION SHALL HAVE THE SAME LEGAL EFFECT. IN THE CASE OF DIVERGENCE BETWEEN THE CHINESE AND ENGLISH VERSIONS, THE CHINESE VERSION SHALL PREVAIL. + + END OF THE TERMS AND CONDITIONS + + How to Apply the Mulan Permissive Software License,Version 2 (Mulan PSL v2) to Your Software + + To apply the Mulan PSL v2 to your work, for easy identification by recipients, you are suggested to complete following three steps: + + i Fill in the blanks in following statement, including insert your software name, the year of the first publication of your software, and your name identified as the copyright owner; + + ii Create a file named “LICENSE” which contains the whole context of this License in the first directory of your software package; + + iii Attach the statement to the appropriate annotated syntax at the beginning of each source file. + + + Copyright (c) [Year] [name of copyright holder] + [Software Name] is licensed under Mulan PSL v2. + You can use this software according to the terms and conditions of the Mulan PSL v2. + You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 + THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + See the Mulan PSL v2 for more details. diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..65672e0 --- /dev/null +++ b/build.sh @@ -0,0 +1,260 @@ +#!/bin/bash + +declare binarylib_dir='None' +declare module_name="openGauss" +declare version_number='2.0.0' +declare version_Kernel='92.298' +ROOT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +echo "ROOT_DIR : $ROOT_DIR" +declare ERR_MKGS_FAILED=1 +declare LOG_FILE="${ROOT_DIR}/build.log" +declare PKG_DIR="${ROOT_DIR}/package" +declare PKG_TMP_DIR="${ROOT_DIR}/package/temp" +declare version_string="${module_name}-${version_number}" + +######################################################################### +##read command line paramenters +####################################################################### + +function print_help() +{ + echo "Usage: $0 [OPTION] + -h|--help show help information + -3rd|--binarylib_dir the parent directory of binarylibs + " +} + +while [ $# -gt 0 ]; do + case "$1" in + -h|--help) + print_help + exit 1 + ;; + -3rd|--binarylib_dir) + if [ "$2"X = X ]; then + echo "no given binarylib directory values" + exit 1 + fi + binarylib_dir=$2 + shift 2 + ;; + *) + echo "Internal Error: option processing error: $1" 1>&2 + echo "please input right paramtenter, the following command may help you" + echo "./build.sh --help or ./build.sh -h" + exit 1 + esac +done + +PLAT_FORM_STR=$(sh "${ROOT_DIR}/build/get_PlatForm_str.sh") +if [ "${PLAT_FORM_STR}"x == "Failed"x ]; then + echo "We only support openEuler(aarch64), EulerOS(aarch64), CentOS platform." + exit 1; +fi + +PLATFORM=32 +bit=$(getconf LONG_BIT) +if [ "$bit" -eq 64 ]; then + PLATFORM=64 +fi + +if [ X$(echo $PLAT_FORM_STR | grep "centos") != X"" ]; then + dist_version="CentOS" +elif [ X$(echo $PLAT_FORM_STR | grep "openeuler") != X"" ]; then + dist_version="openEuler" +elif [ X$(echo $PLAT_FORM_STR | grep "euleros") != X"" ]; then + dist_version="EulerOS" +else + echo "We only support openEuler(aarch64), EulerOS(aarch64), CentOS platform." + echo "Kernel is $kernel" + exit 1 +fi + +declare package_pre_name="${version_string}-${dist_version}-${PLATFORM}bit-om" +declare package_name="${package_pre_name}.tar.gz" +declare sha256_name="${package_pre_name}.sha256" + +if [ ${binarylib_dir} != 'None' ] && [ -d ${binarylib_dir} ]; then + BINARYLIBS_PATH="${binarylib_dir}/dependency/${PLAT_FORM_STR}" + BUILD_TOOLS_PATH="${binarylib_dir}/buildtools/${PLAT_FORM_STR}" + BINARYLIBS_PATH_INSTALL_TOOLS="${binarylib_dir}/dependency/install_tools_${PLAT_FORM_STR}" +else + BINARYLIBS_PATH="${ROOT_DIR}/binarylibs/dependency/${PLAT_FORM_STR}" + BUILD_TOOLS_PATH="${ROOT_DIR}/binarylibs/buildtools/${PLAT_FORM_STR}" + BINARYLIBS_PATH_INSTALL_TOOLS="${ROOT_DIR}/dependency/install_tools_${PLAT_FORM_STR}" +fi + +log() +{ + echo "[makegaussdb] $(date +%y-%m-%d' '%T): $@" + echo "[makegaussdb] $(date +%y-%m-%d' '%T): $@" >> "$LOG_FILE" 2>&1 +} + +die() +{ + log "$@" + echo "$@" + exit $ERR_MKGS_FAILED +} + +function env_check() +{ + if [ -d "$PKG_DIR" ]; then + rm -rf ${PKG_DIR} + fi + mkdir -p ${PKG_TMP_DIR} + if [ -d "$LOG_FILE" ]; then + rm -rf $LOG_FILE + fi + if [ $? -eq 0 ]; then + echo "Everything is ready." + else + echo "clean enviroment failed." + exit 1 + fi +} + +function copy_script_file() +{ + cp -rf $ROOT_DIR/script $PKG_TMP_DIR/ && + cp -rf $ROOT_DIR/other/transfer.py $PKG_TMP_DIR/script/ && + find $PKG_TMP_DIR/script/ -type f -print0 | xargs -0 -n 10 -r dos2unix > /dev/null 2>&1 && + find $PKG_TMP_DIR/script/gspylib/inspection/ -name d2utmp* -print0 | xargs -0 rm -rf && + if [ $? -ne 0 ]; then + die "cp -r $ROOT_DIR/script $PKG_TMP_DIR failed " + fi + chmod -R +x $PKG_TMP_DIR/script/ + + cp -rf $ROOT_DIR/simpleInstall $PKG_TMP_DIR/ + if [ $? -ne 0 ]; then + die "cp -r $ROOT_DIR/simpleInstall $PKG_TMP_DIR/ failed " + fi +} + +function version_cfg() +{ + gitversion=$(git log | grep commit | head -1 | awk '{print $2}' | cut -b 1-8) + commits=$(git log | grep "See in merge request" | wc -l) + mrid=$(git log | grep "See in merge request" | head -1 | awk -F! '{print $2}' | grep -o '[0-9]\+') + om_version="(openGauss OM 2.0.0 build $gitversion) compiled at `date -d today +\"%Y-%m-%d %H:%M:%S\"` commit $commits last mr $mrid" + version_file=${PKG_TMP_DIR}/version.cfg + touch ${version_file} + echo "${module_name}-${version_number}">${version_file} + echo "${version_Kernel}" >>${version_file} + echo "${gitversion}" >>${version_file} + + if [ -f ${PKG_TMP_DIR}/script/gspylib/common/VersionInfo.py ] ; then + sed -i -e "s/COMMON_VERSION = \"Gauss200 OM VERSION\"/COMMON_VERSION = \"$(echo ${om_version})\"/g" -e "s/__GAUSS_PRODUCT_STRING__/$module_name/g" ${PKG_TMP_DIR}/script/gspylib/common/VersionInfo.py + if [ $? -ne 0 ]; then + die "Failed to replace OM tools version number." + fi + else + sed -i "s/COMMON_VERSION = \"Gauss200 OM VERSION\"/COMMON_VERSION = \"$(echo ${om_version})\"/g" ${PKG_TMP_DIR}/script/gspylib/os/gsOSlib.py + if [ $? -ne 0 ]; then + die "Failed to replace OM tools version number." + fi + fi +} + +function clib_copy() +{ + rm -rf $PKG_TMP_DIR/script/gspylib/clib + mkdir -p $PKG_TMP_DIR/script/gspylib/clib + cp $BUILD_TOOLS_PATH/gcc7.3/gcc/lib64/libstdc++.so.6 $PKG_TMP_DIR/script/gspylib/clib + cp $BINARYLIBS_PATH/openssl/comm/lib/libssl.so.1.1 $PKG_TMP_DIR/script/gspylib/clib + cp $BINARYLIBS_PATH/openssl/comm/lib/libcrypto.so.1.1 $PKG_TMP_DIR/script/gspylib/clib + if [ -f $BINARYLIBS_PATH_INSTALL_TOOLS/libpython3.*m.so.1.0 ] + then + cp $BINARYLIBS_PATH_INSTALL_TOOLS/libpython3.*m.so.1.0 $PKG_TMP_DIR/script/gspylib/clib + fi + #cp $BUILD_DIR/bin/encrypt $BUILD_DIR/script/gspylib/clib +} + +function lib_copy() +{ + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/output/log/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/output/nodes/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/asn1crypto/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/bcrypt/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/cffi/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/cryptography/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/idna/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/nacl/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/pyasn1/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/pycparser/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/OpenSSL/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/psutil/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/netifaces/ && + mkdir -p ${PKG_TMP_DIR}/script/gspylib/inspection/lib/paramiko/ && + + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/asn1crypto/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/bcrypt/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/cffi/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/cryptography/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/idna/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/nacl/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/pyasn1/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/pycparser/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/OpenSSL/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/ipaddress.py ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/six.py ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/_cffi_backend.py ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/_cffi_backend.so* ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/psutil/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/netifaces/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/paramiko/ ${PKG_TMP_DIR}/script/gspylib/inspection/lib/ + + mkdir -p ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/asn1crypto ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/bcrypt ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/cffi ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/cryptography ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/idna ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/nacl ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/pyasn1 ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/pycparser ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/OpenSSL ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/ipaddress.py ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/six.py ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/_cffi_backend.py ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/_cffi_backend.so* ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/paramiko ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/psutil ${PKG_TMP_DIR}/lib + cp -rf ${BINARYLIBS_PATH_INSTALL_TOOLS}/netifaces ${PKG_TMP_DIR}/lib +} + +function main() +{ + # 1. clean install path and log file + env_check + + # 2. copy script file + copy_script_file + + # 3. copy clib file + clib_copy + + # 4. copy lib file + lib_copy + + # 5. make version file + version_cfg + + cd $PKG_TMP_DIR + tar -zvcf "${package_name}" ./* >>"$LOG_FILE" 2>&1 + if [ $? -ne 0 ]; then + die "$package_command ${package_name} failed" + fi + + sha256sum "${package_name}" | awk -F" " '{print $1}' > "$sha256_name" + if [ $? -ne 0 ]; then + die "generate sha256 file failed." + fi + mv $package_name $sha256_name ../ + cd $PKG_DIR + rm -rf $PKG_TMP_DIR + echo "success!" +} + +main +exit 0 \ No newline at end of file diff --git a/build/get_PlatForm_str.sh b/build/get_PlatForm_str.sh new file mode 100644 index 0000000..a2cef1f --- /dev/null +++ b/build/get_PlatForm_str.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# ************************************************************************* +# Copyright: (c) Huawei Technologies Co., Ltd. 2020. All rights reserved +# +# description: the script is to get platform string value +# date: 2020-06-01 +# version: 1.0 +# history: +# +# ************************************************************************* + +function get_os_str() { + if [ -f "/etc/os-release" ]; then + os_name=$(source /etc/os-release; echo $ID) + else + echo "Can not get /etc/os-release file, please check it!" + exit 1 + fi + + cpu_arc=$(uname -p) + + if [ "$os_name"x = "centos"x ] && [ "$cpu_arc"x = "x86_64"x ]; then + os_str=centos7.6_x86_64 + elif [ "$os_name"x = "euleros"x ] && [ "$cpu_arc"x = "aarch64"x ]; then + os_str=euleros2.0_sp8_aarch64 + elif [ "$os_name"x = "openEuler"x ] && [ "$cpu_arc"x = "aarch64"x ]; then + os_str=openeuler_aarch64 + elif [ "$os_name"x = "openEuler"x ] && [ "$cpu_arc"x = "x86_64"x ]; then + os_str=openeuler_x86_64 + else + os_str="Failed" + fi + + echo $os_str +} + +get_os_str \ No newline at end of file diff --git a/other/transfer.py b/other/transfer.py new file mode 100644 index 0000000..7f5e30c --- /dev/null +++ b/other/transfer.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +#Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +#openGauss is licensed under Mulan PSL v2. +#You can use this software according to the terms and conditions of the Mulan PSL v2. +#You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +#------------------------------------------------------------------------- +# +# transfer.py +# relfilenode to oid mapping cache. +# +# IDENTIFICATION +# src/manager/om/other/transfer.py +# +#------------------------------------------------------------------------- + +import os +import sys +import pwd +import getopt + +from script.gspylib.common.DbClusterInfo import dbClusterInfo +from script.gspylib.common.Common import DefaultValue +GPPATH = os.getenv("GPHOME") +DefaultValue.checkPathVaild(GPPATH) +sys.path.insert(0, GPPATH) +from script.gspylib.common.GaussLog import GaussLog +from script.gspylib.common.ErrorCode import ErrorCode +from script.gspylib.threads.SshTool import SshTool + +# source file path +SRCFILEPATH = "" +DRCPATH = "" +DNINSTANCEID = [] +ISALLHOSTS = False +g_logger = None +g_clusterUser = "" +g_clusterInfo = None +g_sshTool = None + + +def usage(): + """ +transfer.py is a utility to transfer C function lib file to all nodes or standy node. + +Usage: + transfer.py -? | --help + transfer.py 1 sourcefile destinationpath copy sourcefile to Cluster all nodes. + transfer.py 2 sourcefile pgxc_node_name copy sourcefile to the same path of node contain pgxc_node_name standy instance. + """ + + print (usage.__doc__) + + +def initGlobals(): + global g_logger + global g_clusterUser + global g_clusterInfo + global g_sshTool + + if os.getuid() == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) + sys.exit(1) + # Init user + g_clusterUser = pwd.getpwuid(os.getuid()).pw_name + # Init logger + logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, g_clusterUser, "", "") + g_logger = GaussLog(logFile, "Transfer_C_function_file") + # Init ClusterInfo + g_clusterInfo = dbClusterInfo() + g_clusterInfo.initFromStaticConfig(g_clusterUser) + # Init sshtool + g_sshTool = SshTool(g_clusterInfo.getClusterNodeNames(), g_logger.logFile) + + +def checkSrcFile(srcFile): + g_logger.log("Check whether the source file exists.") + if not os.path.isfile(srcFile): + g_logger.debug("The %s does not exist. " % srcFile) + return False + else: + g_logger.log("The source file exists.") + return True + + +def parseCommandLine(): + g_logger.log("Start parse parameter.") + try: + opts, args = getopt.getopt(sys.argv[1:], "") + if len(args) != 3: + raise getopt.GetoptError("The number of parameters is not equal to 3.") + except getopt.GetoptError as e: + g_logger.logExit("Parameter error, Error:\n%s" % str(e)) + + global SRCFILEPATH + global DRCPATH + global DNINSTANCEID + global ISALLHOSTS + + if args[0] not in ['1', '2']: + g_logger.logExit("Parameter error.") + if args[0] == "1": + ISALLHOSTS = True + if not checkSrcFile(args[1]): + g_logger.logExit("Parameter error.") + SRCFILEPATH = args[1] + DRCPATH = args[2] + elif args[0] == "2": + if not checkSrcFile(args[1]): + g_logger.logExit("Parameter error.") + SRCFILEPATH = args[1] + nodenamelst = args[2].split("_") + # when the clustertype is primary-standy-dummy,the standby DNinstence ID is the third arg in "nodenamelst" + if len(nodenamelst) == 3: + DNINSTANCEID.append(nodenamelst[2]) + return + # when the clustertype is primary-multi-standby,the standby DNinstence IDs are following the third parameter + for dnId in nodenamelst[2:]: + DNINSTANCEID.append(dnId) + else: + g_logger.logExit("Parameter error.") + g_logger.log("Successfully parse parameter.") + + +def scpFileToAllHost(srcFile, drcpath): + try: + g_logger.log("Transfer C function file to all hosts.") + g_sshTool.scpFiles(srcFile, drcpath, g_clusterInfo.getClusterNodeNames()) + cmd = "chmod 600 '%s'" % drcpath + g_sshTool.executeCommand(cmd, + "Transfer C function file to all hosts.", + DefaultValue.SUCCESS, + g_clusterInfo.getClusterNodeNames()) + except Exception as e: + raise Exception(ErrorCode.GAUSS_536["GAUSS_53611"] % str(e)) + + +def scpFileToStandy(srcFile, InstanceID): + try: + g_logger.log("Transfer C function file to standy node.") + + mirrorID = 0 + peerNode = [] + # Get instance mirrorID by InstanceID + for dbNode in g_clusterInfo.dbNodes: + for dbInst in dbNode.datanodes: + if str(dbInst.instanceId) == InstanceID: + mirrorID = dbInst.mirrorId + + if mirrorID == 0: + g_logger.logExit("Failed to find primary instance mirrorId.") + + # Get standy instance + for node in g_clusterInfo.dbNodes: + for instance in node.datanodes: + if instance.mirrorId == mirrorID and (instance.instanceType == 1 or instance.instanceType == 0): + peerNode.append(node.name) + + # send SOFile to peerInstance + (despath, sofile) = os.path.split(srcFile) + for deshost in peerNode: + status = g_sshTool.checkRemoteFileExist(deshost, srcFile, "") + if not status: + g_sshTool.scpFiles(srcFile, despath, [deshost]) + except Exception as e: + raise Exception(ErrorCode.GAUSS_536["GAUSS_53611"] % str(e)) + + +if __name__ == '__main__': + # help info + if "-?" in sys.argv[1:] or "--help" in sys.argv[1:]: + usage() + sys.exit(0) + + # Init globle + initGlobals() + g_logger.log("Start transfer C function file.") + + # parse command line + parseCommandLine() + # start send soFile + try: + if ISALLHOSTS: + scpFileToAllHost(SRCFILEPATH, DRCPATH) + else: + for dnInstanceId in DNINSTANCEID: + scpFileToStandy(SRCFILEPATH, dnInstanceId) + except Exception as e: + g_logger.logExit("Failed to transfer C function file. Error:%s" % str(e)) + g_logger.log("Successfully transfer C function file.") + sys.exit(0) diff --git a/script/__init__.py b/script/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/config/leap-seconds.conf b/script/config/leap-seconds.conf new file mode 100644 index 0000000..835ec5d --- /dev/null +++ b/script/config/leap-seconds.conf @@ -0,0 +1,250 @@ +# +# In the following text, the symbol '#' introduces +# a comment, which continues from that symbol until +# the end of the line. A plain comment line has a +# whitespace character following the comment indicator. +# There are also special comment lines defined below. +# A special comment will always have a non-whitespace +# character in column 2. +# +# A blank line should be ignored. +# +# The following table shows the corrections that must +# be applied to compute International Atomic Time (TAI) +# from the Coordinated Universal Time (UTC) values that +# are transmitted by almost all time services. +# +# The first column shows an epoch as a number of seconds +# since 1 January 1900, 00:00:00 (1900.0 is also used to +# indicate the same epoch.) Both of these time stamp formats +# ignore the complexities of the time scales that were +# used before the current definition of UTC at the start +# of 1972. (See note 3 below.) +# The second column shows the number of seconds that +# must be added to UTC to compute TAI for any timestamp +# at or after that epoch. The value on each line is +# valid from the indicated initial instant until the +# epoch given on the next one or indefinitely into the +# future if there is no next line. +# (The comment on each line shows the representation of +# the corresponding initial epoch in the usual +# day-month-year format. The epoch always begins at +# 00:00:00 UTC on the indicated day. See Note 5 below.) +# +# Important notes: +# +# 1. Coordinated Universal Time (UTC) is often referred to +# as Greenwich Mean Time (GMT). The GMT time scale is no +# longer used, and the use of GMT to designate UTC is +# discouraged. +# +# 2. The UTC time scale is realized by many national +# laboratories and timing centers. Each laboratory +# identifies its realization with its name: Thus +# UTC(NIST), UTC(USNO), etc. The differences among +# these different realizations are typically on the +# order of a few nanoseconds (i.e., 0.000 000 00x s) +# and can be ignored for many purposes. These differences +# are tabulated in Circular T, which is published monthly +# by the International Bureau of Weights and Measures +# (BIPM). See www.bipm.org for more information. +# +# 3. The current definition of the relationship between UTC +# and TAI dates from 1 January 1972. A number of different +# time scales were in use before that epoch, and it can be +# quite difficult to compute precise timestamps and time +# intervals in those "prehistoric" days. For more information, +# consult: +# +# The Explanatory Supplement to the Astronomical +# Ephemeris. +# or +# Terry Quinn, "The BIPM and the Accurate Measurement +# of Time," Proc. of the IEEE, Vol. 79, pp. 894-905, +# July, 1991. +# +# 4. The decision to insert a leap second into UTC is currently +# the responsibility of the International Earth Rotation and +# Reference Systems Service. (The name was changed from the +# International Earth Rotation Service, but the acronym IERS +# is still used.) +# +# Leap seconds are announced by the IERS in its Bulletin C. +# +# See www.iers.org for more details. +# +# Every national laboratory and timing center uses the +# data from the BIPM and the IERS to construct UTC(lab), +# their local realization of UTC. +# +# Although the definition also includes the possibility +# of dropping seconds ("negative" leap seconds), this has +# never been done and is unlikely to be necessary in the +# foreseeable future. +# +# 5. If your system keeps time as the number of seconds since +# some epoch (e.g., NTP timestamps), then the algorithm for +# assigning a UTC time stamp to an event that happens during a positive +# leap second is not well defined. The official name of that leap +# second is 23:59:60, but there is no way of representing that time +# in these systems. +# Many systems of this type effectively stop the system clock for +# one second during the leap second and use a time that is equivalent +# to 23:59:59 UTC twice. For these systems, the corresponding TAI +# timestamp would be obtained by advancing to the next entry in the +# following table when the time equivalent to 23:59:59 UTC +# is used for the second time. Thus the leap second which +# occurred on 30 June 1972 at 23:59:59 UTC would have TAI +# timestamps computed as follows: +# +# ... +# 30 June 1972 23:59:59 (2287785599, first time): TAI= UTC + 10 seconds +# 30 June 1972 23:59:60 (2287785599,second time): TAI= UTC + 11 seconds +# 1 July 1972 00:00:00 (2287785600) TAI= UTC + 11 seconds +# ... +# +# If your system realizes the leap second by repeating 00:00:00 UTC twice +# (this is possible but not usual), then the advance to the next entry +# in the table must occur the second time that a time equivalent to +# 00:00:00 UTC is used. Thus, using the same example as above: +# +# ... +# 30 June 1972 23:59:59 (2287785599): TAI= UTC + 10 seconds +# 30 June 1972 23:59:60 (2287785600, first time): TAI= UTC + 10 seconds +# 1 July 1972 00:00:00 (2287785600,second time): TAI= UTC + 11 seconds +# ... +# +# in both cases the use of timestamps based on TAI produces a smooth +# time scale with no discontinuity in the time interval. However, +# although the long-term behavior of the time scale is correct in both +# methods, the second method is technically not correct because it adds +# the extra second to the wrong day. +# +# This complexity would not be needed for negative leap seconds (if they +# are ever used). The UTC time would skip 23:59:59 and advance from +# 23:59:58 to 00:00:00 in that case. The TAI offset would decrease by +# 1 second at the same instant. This is a much easier situation to deal +# with, since the difficulty of unambiguously representing the epoch +# during the leap second does not arise. +# +# Some systems implement leap seconds by amortizing the leap second +# over the last few minutes of the day. The frequency of the local +# clock is decreased (or increased) to realize the positive (or +# negative) leap second. This method removes the time step described +# above. Although the long-term behavior of the time scale is correct +# in this case, this method introduces an error during the adjustment +# period both in time and in frequency with respect to the official +# definition of UTC. +# +# Questions or comments to: +# Judah Levine +# Time and Frequency Division +# NIST +# Boulder, Colorado +# Judah.Levine@nist.gov +# +# Last Update of leap second values: 8 July 2016 +# +# The following line shows this last update date in NTP timestamp +# format. This is the date on which the most recent change to +# the leap second data was added to the file. This line can +# be identified by the unique pair of characters in the first two +# columns as shown below. +# +#$ 3676924800 +# +# The NTP timestamps are in units of seconds since the NTP epoch, +# which is 1 January 1900, 00:00:00. The Modified Julian Day number +# corresponding to the NTP time stamp, X, can be computed as +# +# X/86400 + 15020 +# +# where the first term converts seconds to days and the second +# term adds the MJD corresponding to the time origin defined above. +# The integer portion of the result is the integer MJD for that +# day, and any remainder is the time of day, expressed as the +# fraction of the day since 0 hours UTC. The conversion from day +# fraction to seconds or to hours, minutes, and seconds may involve +# rounding or truncation, depending on the method used in the +# computation. +# +# The data in this file will be updated periodically as new leap +# seconds are announced. In addition to being entered on the line +# above, the update time (in NTP format) will be added to the basic +# file name leap-seconds to form the name leap-seconds.. +# In addition, the generic name leap-seconds.list will always point to +# the most recent version of the file. +# +# This update procedure will be performed only when a new leap second +# is announced. +# +# The following entry specifies the expiration date of the data +# in this file in units of seconds since the origin at the instant +# 1 January 1900, 00:00:00. This expiration date will be changed +# at least twice per year whether or not a new leap second is +# announced. These semi-annual changes will be made no later +# than 1 June and 1 December of each year to indicate what +# action (if any) is to be taken on 30 June and 31 December, +# respectively. (These are the customary effective dates for new +# leap seconds.) This expiration date will be identified by a +# unique pair of characters in columns 1 and 2 as shown below. +# In the unlikely event that a leap second is announced with an +# effective date other than 30 June or 31 December, then this +# file will be edited to include that leap second as soon as it is +# announced or at least one month before the effective date +# (whichever is later). +# If an announcement by the IERS specifies that no leap second is +# scheduled, then only the expiration date of the file will +# be advanced to show that the information in the file is still +# current -- the update time stamp, the data and the name of the file +# will not change. +# +# Updated through IERS Bulletin C54 +# File expires on: 28 June 2018 +# +#@ 3739132800 +# +2272060800 10 # 1 Jan 1972 +2287785600 11 # 1 Jul 1972 +2303683200 12 # 1 Jan 1973 +2335219200 13 # 1 Jan 1974 +2366755200 14 # 1 Jan 1975 +2398291200 15 # 1 Jan 1976 +2429913600 16 # 1 Jan 1977 +2461449600 17 # 1 Jan 1978 +2492985600 18 # 1 Jan 1979 +2524521600 19 # 1 Jan 1980 +2571782400 20 # 1 Jul 1981 +2603318400 21 # 1 Jul 1982 +2634854400 22 # 1 Jul 1983 +2698012800 23 # 1 Jul 1985 +2776982400 24 # 1 Jan 1988 +2840140800 25 # 1 Jan 1990 +2871676800 26 # 1 Jan 1991 +2918937600 27 # 1 Jul 1992 +2950473600 28 # 1 Jul 1993 +2982009600 29 # 1 Jul 1994 +3029443200 30 # 1 Jan 1996 +3076704000 31 # 1 Jul 1997 +3124137600 32 # 1 Jan 1999 +3345062400 33 # 1 Jan 2006 +3439756800 34 # 1 Jan 2009 +3550089600 35 # 1 Jul 2012 +3644697600 36 # 1 Jul 2015 +3692217600 37 # 1 Jan 2017 +# +# the following special comment contains the +# hash value of the data in this file computed +# use the secure hash algorithm as specified +# by FIPS 180-1. See the files in ~/pub/sha for +# the details of how this hash value is +# computed. Note that the hash computation +# ignores comments and whitespace characters +# in data lines. It includes the NTP values +# of both the last modification time and the +# expiration time of the file, but not the +# white space on those lines. +# the hash line is also ignored in the +# computation. +# +#h 5101445a 69948b51 9153e2b 2086e3d8 d54561a3 \ No newline at end of file diff --git a/script/config/logrotate.conf b/script/config/logrotate.conf new file mode 100644 index 0000000..ced56d3 --- /dev/null +++ b/script/config/logrotate.conf @@ -0,0 +1,56 @@ +compress +/var/log/gaussdb/cm/cm_agent/*.log +{ + dateext + dateformat -%Y-%m-%d + extension .log + missingok + copytruncate + rotate 16 + size 16M + noolddir + } +/var/log/gaussdb/cm/cm_server/*.log +{ + dateext + dateformat -%Y-%m-%d + extension .log + missingok + copytruncate + rotate 16 + size 16M + noolddir + } + /var/log/gaussdb/cm/*.log +{ + dateext + dateformat -%Y-%m-%d + extension .log + missingok + copytruncate + rotate 16 + size 16M + noolddir + } +/var/log/gaussdb/om/*.log +{ + dateext + dateformat -%Y-%m-%d + extension .log + missingok + copytruncate + rotate 16 + size 16M + noolddir + } +/var/log/gaussdb/alarm/CM/*.log +{ + dateext + dateformat -%Y-%m-%d + extension .log + missingok + nocopytruncate + rotate 16 + size 16M + noolddir + } diff --git a/script/gs_backup b/script/gs_backup new file mode 100644 index 0000000..bf16965 --- /dev/null +++ b/script/gs_backup @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_backup is a utility to back up or restore binary files and parameter files. +############################################################################# + +import os +import sys +import pwd +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from impl.backup.OLAP.BackupImplOLAP import BackupImplOLAP + +ACTION_BACKUP = "backup" +ACTION_RESTORE = "restore" + + +class Backup(ParallelBaseOM): + ''' + classdocs + input : NA + output: NA + ''' + + def __init__(self): + ''' + function: Constructor + input : NA + output: NA + ''' + ParallelBaseOM.__init__(self) + self.nodename = "" + self.backupDir = "" + self.isParameter = False + self.isBinary = False + self.isForce = False + + #################################################################################### + # Help context. U:R:oC:v: + #################################################################################### + def usage(self): + """ +gs_backup is a utility to back up or restore binary files and parameter files. + +Usage: + gs_backup -? | --help + gs_backup -V | --version + gs_backup -t backup --backup-dir=BACKUPDIR [-h HOSTNAME] [--parameter] + [--binary] [--all] [-l LOGFILE] + gs_backup -t restore --backup-dir=BACKUPDIR [-h HOSTNAME] [--parameter] + [--binary] [--all] [-l LOGFILE] + [--force] + +General options: + -t Operation type. It can be backup or restore. + --backup-dir=BACKUPDIR Backup or restore directory. + -h The node which stored the backup file, + need to specify the node when recovering. + If the node name is not specified, + the backup sets are stored in each node. + --parameter Back up or restore parameter files only. + (This option is used by default.) + --binary Back up or restore binary files only. + --all Back up or restore both parameter files and binary files. + --force Force to restore binary files even if the + cluster_static_config is lost + -l Path of log file. + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + + """ + + print(self.usage.__doc__) + + def checkAction(self): + """ + function: check action + if action not in (ACTION_BACKUP, ACTION_RESTORE), throw error + input : NA + output: NA + """ + if (self.action == ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".") + if self.action not in (ACTION_BACKUP, ACTION_RESTORE): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' + + " The value of the '-t' parameter :" + " backup or restore.") + + def checkUserParameter(self): + """ + function: check User Parameter + if clusterUser is null, check user name + if user name is null, throw error + input : NA + output: NA + """ + if (self.user == ""): + self.user = pwd.getpwuid(os.getuid()).pw_name + if (self.user == ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + # check if user exist and is the right user + DefaultValue.checkUser(self.user) + + def checkBackupPara(self): + """ + function: check Backup Parameter + if backupDir is null throw error + if backupDir is not absolute throw error + input : NA + output: NA + """ + if (self.backupDir == ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % '-backup-dir' + ".") + if (not os.path.isabs(self.backupDir)): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50213"] % self.backupDir) + + def checkLogFilePara(self): + """ + function: check log file parameter + input : NA + output: NA + """ + # check log file + if (self.logFile == ""): + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.GS_BACKUP_LOG_FILE, self.user, "") + if (not os.path.isabs(self.logFile)): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50213"] % "log file") + + def parseCommandLine(self): + """ + function: Parse command line and save to global variable + input : NA + output: NA + """ + ParaObj = Parameter() + # get the dict of paramters + ParaDict = ParaObj.ParameterCommandLine("backup") + # check if has '--help' + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + # parse --all parameter + backupAll = False + forceRestore = False + parameter_map = {"action": self.action, + "backupDir": self.backupDir, + "isBinary": self.isBinary, + "isParameter": self.isParameter, + "logFile": self.logFile, + "all": backupAll, + "force": forceRestore} + parameter_keys = parameter_map.keys() + + for key in parameter_keys: + if (ParaDict.__contains__(key)): + parameter_map[key] = ParaDict.get(key) + + self.action = parameter_map["action"] + self.backupDir = parameter_map["backupDir"] + self.isBinary = parameter_map["isBinary"] + self.isParameter = parameter_map["isParameter"] + self.logFile = parameter_map["logFile"] + self.isForce = parameter_map["force"] + + if (parameter_map["all"]): + self.isBinary = True + self.isParameter = True + + if (ParaDict.__contains__("nodename")): + nodename = ParaDict.get("nodename") + if (len(nodename) != 1): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50004"] % "h" + \ + " The number of node must be equal 1. Please check it.") + self.nodename = nodename[0] + + def checkParameter(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + self.checkAction() + # check if user exist and is the right user + self.checkUserParameter() + self.checkLogFilePara() + # check backupDir + self.checkBackupPara() + # check backup context parameter + if (self.isParameter == False and self.isBinary == False): + GaussLog.printMessage("Hint:Parameters '--parameter','--binary'," + "and '--all' were not specified." + + " Only parameter files will be backed up.") + self.isParameter = True + + + +if __name__ == '__main__': + """ + function: main + input : NA + output: NA + """ + # check if is root user + if (os.getuid() == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) + try: + # Objectize class + backupObj = Backup() + # Initialize self and Parse command line and save to global variable + backupObj.parseCommandLine() + + # check the parameters is not OK + backupObj.checkParameter() + + # set action flag file + DefaultValue.setActionFlagFile("gs_backup") + + # get clustet Type + impl = BackupImplOLAP(backupObj) + impl.run() + except Exception as e: + GaussLog.exitWithError(str(e)) + finally: + DefaultValue.setActionFlagFile("gs_backup", None, False) + sys.exit(0) diff --git a/script/gs_check b/script/gs_check new file mode 100644 index 0000000..05d5625 --- /dev/null +++ b/script/gs_check @@ -0,0 +1,1764 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_check is a utility to check cluster and database status +############################################################################# + +import subprocess +import os +import sys +import re +import getopt +import getpass +import time +import pwd +import grp +import pickle +package_path = os.path.dirname(os.path.realpath(__file__)) +ld_path = package_path + "/gspylib/clib" +if 'LD_LIBRARY_PATH' not in os.environ: + os.environ['LD_LIBRARY_PATH'] = ld_path + os.execve(os.path.realpath(__file__), sys.argv, os.environ) +if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): + os.environ['LD_LIBRARY_PATH'] = \ + ld_path + ":" + os.environ['LD_LIBRARY_PATH'] + os.execve(os.path.realpath(__file__), sys.argv, os.environ) + +import xml.etree.cElementTree as ETree +from itertools import combinations +from datetime import datetime, timedelta +from multiprocessing.dummy import Pool as ThreadPool +from gspylib.inspection.common.Exception import CheckException, \ + ParameterException, UnknownParameterException, \ + EmptyParameterException, \ + UseBothParameterException, AvailableParameterException, \ + SceneNotFoundException, ParseItemException, \ + NotEmptyException, \ + NotExistException, InterruptException, ThreadCheckException, \ + ContextDumpException, ContextLoadException, \ + TimeoutException +from gspylib.common.Common import DefaultValue +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.Log import LoggerFactory +from gspylib.inspection.common.TaskPool import Watcher, CheckThread +from gspylib.inspection.common.CheckResult import CheckResult, ItemResult +from gspylib.inspection.common.CheckItem import CheckItemFactory +from gspylib.inspection.common.ProgressBar import MultiProgressManager, \ + LineProgress +from gspylib.os.gsfile import g_file +from gspylib.os.gsplatform import g_Platform +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.DbClusterInfo import dbClusterInfo + + +############################################################################# +# Global variables +# g_opts: global option +# g_logger: global logger +# g_context :global context +# g_result : global result +# g_endTime : global endTime +# DIRECTORY_MODE: global directory mode +# MPPDB_VERSION_R5 : mppdb version +# DEFAULT_TIMEOUT : time out +############################################################################# +g_logger = None +g_opts = None +g_context = None +g_result = None +g_endTime = None +g_mtuMap = {} +g_itemResult = {} + +DEFAULT_TIMEOUT = 1500 +# single cluster will skip these items +# because single clusters don't need to perform consistency checks and +# internal communication class checks +SINGLE_SKIP = ["CheckTimeZone", "CheckEncoding", "CheckKernelVer", + "CheckNTPD", "CheckNoCheckSum", "CheckCpuCount", + "CheckMemInfo", "CheckDiskConfig", + "CheckUpVer", "CheckPgxcgroup", "CheckPing", + "CheckNetWorkDrop", "CheckNetSpeed"] + +SETITEM_SKIP = ["CheckCPU", "CheckTimeZone", "CheckOSVer", "CheckNTPD", + "CheckSshdService", "CheckNoCheckSum", "CheckEtcHosts", + "CheckCpuCount", "CheckHyperThread", "CheckMemInfo", + "CheckKernelVer", "CheckEncoding", "CheckBootItems", + "CheckDropCache", "CheckFilehandle", "CheckKeyProAdj", + "CheckDiskFormat", "CheckInodeUsage", "CheckSpaceUsage", + "CheckDiskConfig", "CheckXid", "CheckSysTabSize", + "CheckClusterState", "CheckConfigFileDiff", "CheckUpVer", + "CheckEnvProfile", "CheckGaussVer", "CheckPortRange", + "CheckReadonlyMode", "CheckCatchup", "CheckProcessStatus", + "CheckSpecialFile", "CheckCollector", "CheckLargeFile", + "CheckProStartTime", "CheckMpprcFile", "CheckLockNum", + "CheckCurConnCount", "CheckCursorNum", "CheckPgxcgroup", + "CheckLockState", "CheckIdleSession", "CheckDBConnection", + "CheckSysTable", "CheckSysTabSize", "CheckTableSpace", + "CheckTableSkew", "CheckDNSkew", "CheckCreateView", + "CheckHashIndex", "CheckNextvalInDefault", "CheckPgxcRedistb", + "CheckReturnType", "CheckSysadminUser", "CheckTDDate", + "CheckDropColumn", "CheckDiskFailure", "CheckPing", + "CheckNetWorkDrop", "CheckUsedPort", "CheckNICModel", + "CheckRouting", "CheckNetSpeed", "CheckDataDiskUsage"] + + +class CmdOptions(): + """ + command-line options + """ + + def __init__(self): + # initialize variable + self.user = None + self.localMode = False + self.distributing = False + self.skipRootItems = False + self.set = False + self.language = 'zh' + self.format = 'default' + self.scene = None + self.items = None + self.nodes = [] + self.cluster = None + self.timeout = DEFAULT_TIMEOUT + self.pwdMap = {} + self.thresholdDn = None + self.outPath = None + self.logFile = None + self.isSingle = False + self.routing = None + self.skipItems = [] + self.LCName = None + self.ShrinkNodes = None + + +class CheckContext(): + """ + check execution context + """ + + def __init__(self): + """ + Constructor + """ + # Initialize the self.clusterInfo variable + self.basePath = os.path.join( + os.path.split(os.path.realpath(__file__))[0], 'gspylib', + 'inspection') + self.user = None + self.set = None + self.log = None + self.postAnalysis = False + self.supportItems = {} + self.supportScenes = {} + self.items = [] + self.rootItems = [] + self.cluster = None + self.nodes = [] + self.mpprc = None + self.checkID = self.genCheckID() + self.thresholdDn = None + self.outPath = os.path.join(self.basePath, "output") + self.logFile = None + self.tmpPath = None + self.hostMapping = None + self.routing = None + self.skipSetItem = [] + self.oldNodes = [] + self.newNodes = [] + self.oldItems = [] + self.newItems = [] + self.LCName = None + self.ShrinkNodes = None + + def genCheckID(self): + ''' + function : generate the check ID which is unique for once checking + input : NA + output : checkID + ''' + # Get Time + t = time.localtime(time.time()) + dateString = time.strftime("%Y%m%d", t) + seconds = timedelta(hours=t.tm_hour, minutes=t.tm_min, + seconds=t.tm_sec).seconds + pidString = str(os.getpid()) + return dateString + str(seconds) + pidString + + def setCheckID(self, checkID): + ''' + function : set check id + ''' + self.checkID = checkID + + def getCacheFile(self): + return "%s/context_%s.cache" % (self.tmpPath, self.checkID) + + def checkMPPDBVersion(self): + ''' + function : check mppdb version + input : NA + output : NA + ''' + # check the version number + cmd = "gsql -V" + output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprc) + return re.compile(r'V[0-9]{3}R[0-9]{3}C[0-9]{2}').search( + output).group() + + def loadClusterInfo(self, user=None): + ''' + function : load cluster info from static config file + input : user + output : NA + ''' + # Get the user + u = user if user is not None else self.user + if (u is None): + return None + try: + # Init cluster info + clusterInfo = dbClusterInfo() + # Initialize the self.clusterInfo variable + clusterInfo.initFromStaticConfig(u) + return clusterInfo + except Exception: + return None + + def loadSupportItems(self): + ''' + function : load support items by scanning the disk files + input : NA + output : NA + ''' + # Get check items + itemPath = "%s/items/" % self.basePath + for (dirpath, dirnames, filenames) in os.walk(itemPath): + for f in filenames: + (fileName, suffix) = os.path.splitext(f) + if (fileName.find("Check") == 0 and suffix == ".py"): + self.supportItems[fileName] = os.path.join(dirpath, f) + if (not self.supportItems): + raise NotEmptyException("support items") + + def loadSupportScene(self): + ''' + function : load support scene by scanning the scene + configuration files in config folder + input : NA + output : NA + ''' + configPath = "%s/config/" % self.basePath + for (dirpath, dirnames, filenames) in os.walk(configPath): + for f in filenames: + (fileName, suffix) = os.path.splitext(f) + if (fileName.find("scene_") == 0 and suffix == ".xml"): + self.supportScenes[fileName[6:]] =\ + os.path.join(dirpath, f) + if (not self.supportScenes): + raise NotEmptyException("support scenes") + + def loadSceneConfiguration(self, scene): + ''' + function : load certain scene configuration in xml file + input : NA + output : NA + ''' + # Get scene xml + configFile = "%s/config/scene_%s.xml" % (self.basePath, scene) + if not os.path.isfile(configFile): + raise SceneNotFoundException(scene, self.supportScenes) + # root node + rootNode = ETree.parse(configFile).getroot() + configElem = rootNode.find('configuration') + if configElem is not None: + for elem in list(configElem): + setattr(self, elem.tag, elem.text.strip()) + + def isCached(self): + ''' + function : whether the context was serialized to disk + input : NA + output : boolean + ''' + # Check if Cache file exists + if os.path.isfile(self.getCacheFile()): + return True + else: + return False + + def clean(self): + ''' + function : clean the cache file + input : NA + output : boolean + ''' + # Delete Cache files + cmd = "rm -rf %s" % self.getCacheFile() + SharedFuncs.runShellCmd(cmd) + + def dump(self): + ''' + function : serialize the check context to disk + input : NA + output : NA + ''' + self.clean() + try: + pickle.dump(self, open(self.getCacheFile(), "wb"), True) + SharedFuncs.chmodFile(self.getCacheFile(), + DefaultValue.KEY_FILE_MODE) + except Exception as e: + raise ContextDumpException(e) + + def load(self, fileName=None): + ''' + function : load the check context from disk + input : path of the context file + output : CheckContext + ''' + f = fileName if fileName is not None else self.getCacheFile() + result = None + if self.isCached(): + try: + result = pickle.load(open(f, "rb")) + except Exception as e: + raise ContextLoadException(e) + return result + + def getNodeName(self, host): + if "HOST_IP" in list(os.environ.keys()): + cmd = "echo $HOST_IP" + else: + cmd = "hostname" + if SharedFuncs.is_local_node(host): + output = SharedFuncs.runShellCmd(cmd) + else: + output = SharedFuncs.runSshCmd(cmd, host, self.user) + hostname = output.strip().split('\n')[-1].strip() + self.hostMapping[host] = hostname + + def getMapping(self): + ''' + function : get the ip to hostname mapping with all host + input : remote host name and password map + output : NA + ''' + self.hostMapping = {} + if (not self.nodes): + return + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.getNodeName, self.nodes) + pool.close() + pool.join() + except Exception as e: + raise Exception(str(e)) + + def sendTmpFile(self, host): + + cmd = "if [ ! -d %s ]; then mkdir %s -p -m %s;fi" % ( + self.tmpPath, self.tmpPath, DefaultValue.KEY_DIRECTORY_MODE) + SharedFuncs.runSshCmd(cmd, host, self.user) + SharedFuncs.sendFile(self.getCacheFile(), host, self.user, + self.tmpPath) + + def dispatch(self, hosts): + ''' + function : send the serialized context file to remote host + input : remote host name and password map + output : NA + ''' + if len(hosts) == 0 or g_opts.isSingle: + return + fileName = self.getCacheFile() + if not os.path.isfile(fileName): + raise CheckException("File %s is not exist or invalid" % fileName) + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.sendTmpFile, hosts) + pool.close() + pool.join() + except Exception as e: + raise Exception(str(e)) + + +############################################################################# +# Parse and check parameters +############################################################################# +def usage(): + """ +gs_check is a utility to check the health status of a cluster. + +Usage: + gs_check -? | --help + Example: + gs_check -i ITEM [...] [-U USER] [-L] [-l LOGFILE] [-o OUTPUTDIR] + [--skip-root-items] [--set] [--routing] + gs_check -e SCENE_NAME [-U USER] [-L] [-l LOGFILE] [-o OUTPUTDIR] + [--skip-root-items] [--set] [--time-out=SECS] + [--routing] [--skip-items] + +General options: + -i Health check item number. + OLAP Example: -i CheckCPU,CheckMTU, + CheckPing. + -e Health check scene name. + OLAP Example: -e inspect/upgrade/slow_node/ + binary_upgrade/health/install/longtime + -U Cluster user. + -L Run the command as local mode. + -l Path of log file. + -o Save the result to the specified directory. + --cid The check ID used for identify a check + process, only for internal use. + --skip-root-items Skip the items with root privileges. + --disk-threshold Set disk threshold for checking disk usage, + only for CheckDataDiskUsage. + --format Set the format of the result report. + --set Set abnormal items if supported + --time-out Set the timeout for scene check, default + 1500 seconds. + --routing The network segment with business ip, + example: 192.168.1.1:255.255.255.0 + --skip-items Skip the specified check item or setting + item with scene check + Example: --skip-items CheckCPU,CheckMTU + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + """ + print(usage.__doc__) + + +def version(): + ''' + function : get the version the check tool + input : NA + output: NA + ''' + print(SharedFuncs.getVersion()) + + +######################################################### +# Init global log +######################################################### +def initGlobal(): + """ + function: initialize the global variable + input : NA + output: NA + """ + # state global variable + global g_opts, g_context, g_result + g_opts = CmdOptions() + g_context = CheckContext() + g_result = CheckResult() + + +def parseCommandLine(): + """ + function: Parse command line and save to global variable + input : NA + output: NA + """ + # Resolves the command line + global g_opts + g_opts = CmdOptions() + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("check") + if "helpFlag" in list(ParaDict.keys()): + usage() + sys.exit(0) + + # command line parameter group definition for gs_check + irrelevantPara = {"scenes": "itemstr", "time_out": "itemstr", + "skipItems": "itemstr", + "cid": "scenes", "nodegroup_name": "scenes", + "shrinkNodes": "scenes"} + paraNameMap = {"itemstr": "i", "scenes": "e", "time_out": "-time-out", + "skipItems": "-skip-items", + "cid": "-cid", "nodegroup_name": "-nodegroup-name", + "shrinkNodes": "-ShrinkNodes"} + formatList = ['default', 'json'] + + # position parameter can not be set at the same time + for para in list(irrelevantPara.keys()): + if (para in list(ParaDict.keys()) and + irrelevantPara[para] in list(ParaDict.keys())): + raise UseBothParameterException( + (paraNameMap[para], paraNameMap[irrelevantPara[para]])) + + if "itemstr" in list(ParaDict.keys()): + g_opts.items = ParaDict["itemstr"] + if "scenes" in list(ParaDict.keys()): + g_opts.scene = ParaDict["scenes"] + if "outFile" in list(ParaDict.keys()): + g_context.outPath = ParaDict["outFile"] + if "logFile" in list(ParaDict.keys()): + g_opts.logFile = ParaDict["logFile"] + if "user" in list(ParaDict.keys()): + g_context.user = ParaDict["user"] + if "hostfile" in list(ParaDict.keys()): + for node in g_file.readFile(ParaDict["hostfile"]): + g_opts.nodes.append(node.strip()) + if "cid" in list(ParaDict.keys()): + g_context.setCheckID(ParaDict["cid"]) + g_opts.distributing = True + if "localMode" in list(ParaDict.keys()): + g_opts.localMode = True + if "skipRootItems" in list(ParaDict.keys()): + g_opts.skipRootItems = True + if "disk-threshold" in list(ParaDict.keys()): + g_context.thresholdDn = ParaDict["disk-threshold"] + if "set" in list(ParaDict.keys()): + g_context.set = True + if "routing" in list(ParaDict.keys()): + g_opts.routing = ParaDict["routing"] + if "skipItems" in list(ParaDict.keys()): + g_opts.skipItems = ParaDict["skipItems"] + if "nodegroup_name" in list(ParaDict.keys()): + g_context.LCName = ParaDict["nodegroup_name"] + if "shrinkNodes" in list(ParaDict.keys()): + g_context.ShrinkNodes = ParaDict["shrinkNodes"] + if "time_out" in list(ParaDict.keys()): + try: + g_opts.timeout = int(ParaDict["time_out"]) + except Exception: + raise CheckException("The parameter timeout set invalid value") + if g_opts.timeout < DEFAULT_TIMEOUT: + raise CheckException( + "The timeout parameter must be set larger than default " + "value 1500 seconds") + setTimeOut() + if "format" in list(ParaDict.keys()): + g_opts.format = ParaDict["format"] + if g_opts.format not in formatList: + raise CheckException( + "Format %s is not available,the valid format is %s" % ( + g_opts.format, ",".join(formatList))) + + +def checkParameter(): + ########################################################## + if g_opts.nodes: + raise CheckException("The --hosts parameter is not available") + if __isRoot() and not __isDistributing(): + if not g_opts.localMode: + raise CheckException( + "The command must be running with cluster user") + ######################################################## + # Get the -U parameter + ######################################################## + checkuser() + + if (g_opts.outPath and not g_opts.localMode): + ######################################################## + # create output path + ######################################################## + createPath(g_opts.outPath, g_context.user) + + +def checkuser(): + # The new node scenario does not need the -U parameter + if __isRoot() and not g_opts.localMode: + g_context.user = None + return + # Default mode -U for the current user + if not __isRoot() and not g_context.user: + g_context.user = SharedFuncs.getCurrentUser() + if g_context.user: + if not __isRoot() and g_context.user != SharedFuncs.getCurrentUser(): + raise CheckException( + "The user %s is not current user" % g_context.user) + try: + user_uid = pwd.getpwnam(g_context.user).pw_uid + except Exception: + raise CheckException( + "The user %s is not a effective user." % g_context.user) + if user_uid == 0: + raise CheckException("The -U parameter can not be the root user.") + isClusterUser = SharedFuncs.checkClusterUser(g_context.user, + __getMpprcFile()) + if isClusterUser: + # get cluster information + g_context.mpprc = __getMpprcFile() + clusterInfo = g_context.loadClusterInfo(g_context.user) + if clusterInfo: + g_opts.cluster = clusterInfo + else: + isClusterUser = False + if not isClusterUser: + raise CheckException( + "The user %s is not valid cluster user" % g_context.user) + if g_opts.localMode or g_opts.distributing: + return + + # Check cluster user trust + dbNameList = g_opts.cluster.getClusterNodeNames() + if (len(dbNameList) == 1 and + dbNameList[0] == DefaultValue.GetHostIpOrName()): + return + appPath = DefaultValue.getEnv('GPHOME', g_opts.cluster.appPath) + psshPath = os.path.join(appPath, 'script/gspylib/pssh/bin/pssh') + cmd = "%s -H %s 'id' " % (psshPath, " -H ".join(dbNameList)) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + errorNode = [] + for result in output.split('\n'): + if result.strip() == "": + continue + resultInfo = result.split() + # Analyze the results + if len(resultInfo) > 3 and resultInfo[2] == "[SUCCESS]": + continue + elif (len(resultInfo) > 3 and resultInfo[2] == "[FAILURE]" and + resultInfo[3] in dbNameList): + errorNode.append(resultInfo[3]) + else: + raise CheckException( + "Failed to check user trust. commands: %s Error:/n%s" + % (cmd, output)) + if errorNode: + raise CheckException( + "Failed to check user trust with %s" % errorNode) + else: + raise CheckException( + "Failed to check user trust. Error:/n%s" % output) + + +def createPath(path, user=""): + if path == "/dev/null": + return + if os.path.isdir(path): + # test write permissions + if not g_file.checkDirWriteable(path): + raise CheckException( + "Failed to create or delete file in the [%s]." % path) + elif os.path.isfile(path): + raise CheckException("The out path [%s] must be a directory." % path) + else: + # path is not exist. recursively create the path + g_file.createDirectory(path, True, DefaultValue.KEY_DIRECTORY_MODE) + # Modify the file owner + if __isRoot() and user: + g_file.changeOwner(user, path) + + +def getTmpPath(): + """ + function: Get and return temporary directory. + input : NA + output: String + """ + tmpPath = os.path.join("/tmp", "check_%s" % g_context.checkID) + # Get the tmp file path + createPath(tmpPath, g_context.user) + createPath(os.path.join(tmpPath, "log"), g_context.user) + createPath(os.path.join(tmpPath, "nodes"), g_context.user) + return tmpPath + + +def initLogFile(): + """ + function: Get and return temporary directory. + input : NA + output: NA + """ + global g_context, g_logger + # load the context when the script ruuning on local mode and the context + # was cached before + g_context.tmpPath = getTmpPath() + if g_context.isCached(): + g_context = g_context.load() + if __getLocalNode(g_context.nodes) in g_context.newNodes: + g_context.mpprc = None + g_context.user = None + g_context.cluster = None + (g_logger, logFile) = LoggerFactory.getLogger('gs_check', + g_context.logFile, + g_context.user) + g_context.log = g_logger.debug + g_logger.debug("Load check context from cache file") + else: + # Parameter specified first, followed by default GAUSSLOG, + # last temporary directory + if g_opts.logFile: + g_context.logFile = os.path.realpath(g_opts.logFile) + elif g_opts.cluster: + g_context.logFile = os.path.join(g_opts.cluster.logPath, + '%s/om/gs_check.log' + % g_context.user) + else: + g_context.logFile = os.path.join(g_context.tmpPath, + 'log/gs_check.log') + (g_logger, g_context.logFile) = LoggerFactory.getLogger( + 'gs_check', g_context.logFile, g_context.user) + # clean the cache files for reentry the command + g_context.clean() + # set mpprc file + g_context.mpprc = __getMpprcFile() + # Load support scene by parsing project folder + g_context.loadSupportScene() + # Load support check items by parsing the project folder + g_context.loadSupportItems() + # load the scene configuration + if g_opts.scene: + g_context.loadSceneConfiguration(g_opts.scene) + # load cluster info + if g_opts.cluster: + g_context.cluster = g_opts.cluster + g_context.oldNodes = g_opts.cluster.getClusterSshIps()[0] + # load nodes + if g_opts.nodes: + for node in g_opts.nodes: + if node not in g_context.oldNodes: + g_context.newNodes.append(node) + g_context.nodes = g_context.oldNodes + g_context.newNodes + + +def getRootUserPwd(): + # ask user input root password interactive when in new node scene or + # contains items with root permission + if __hasRootItems() and not __isRoot(): + rootItems = [i['name'] for i in g_context.rootItems] + __printOnScreen( + "The below items require root privileges to execute:[%s]" + % " ".join(rootItems)) + rootuser = input("Please enter root privileges user[root]:")\ + or "root" + rootpwd = getpass.getpass("Please enter password for user[%s]:" + % rootuser) + g_logger.debug("Ask user input password interactive") + for host in g_context.nodes: + isPwdOk = SharedFuncs.verifyPasswd(host, rootuser, rootpwd) + if not isPwdOk: + # try to connect remote node again + rootpwd = __retryConnection(host, rootuser) + g_opts.pwdMap[host] = (rootuser, rootpwd) + if pwd.getpwnam(rootuser).pw_uid != 0: + raise CheckException("Enter the user [%s] does not have" + " root privileges." % rootuser) + # print message on screen + __printOnScreen("Check root password connection successfully") + + +def parseCheckContext(): + """ + function: Parse check context and initialize all the context value + input : NA + output: NA + """ + global g_context + initLogFile() + if g_context.isCached(): + return + g_logger.debug("Start to parse the check items config file") + items_all = [] + items_oldNode = [] + items_newNode = [] + failedItems = [] + singleSkipList = [] + # generate the items from scene configuration + if g_opts.scene: + items_oldNode, failedItems = __parseScene(g_opts.scene) + items_all += items_oldNode + # generate the items from -i parameter value + elif (g_opts.items): + for i in g_opts.items: + item = __parseOneItem(i) + if (not item): + failedItems.append(i) + else: + items_all.append(item) + for item in items_all[:]: + if not g_context.set and item['name'] in g_opts.skipItems: + items_all.remove(item) + continue + if g_context.set and item['set_permission'] == 'root': + g_context.rootItems.append(item) + if g_opts.skipRootItems and item['permission'] == 'root': + items_all.remove(item) + continue + if item['permission'] == 'root': + g_context.rootItems.append(item) + if g_opts.isSingle and item['name'] in SINGLE_SKIP: + singleSkipList.append(item['name']) + continue + if item['name'] == "CheckRouting": + if g_opts.routing: + g_context.routing = g_opts.routing + elif g_opts.cluster: + workIP = g_opts.cluster.getDbNodeByName( + DefaultValue.GetHostIpOrName()).backIps[0] + g_context.routing = "%s:%s" % ( + workIP, SharedFuncs.getMaskByIP(workIP)) + else: + raise CheckException( + "The --routing is required when cluster dosen't exist") + g_context.items.append(item) + if len(singleSkipList) != 0: + __printOnScreen( + "The following items are skipped when the type of cluster is" + " single:\n[%s]" % ",".join(singleSkipList)) + if not items_newNode: + g_context.oldItems = g_context.items + else: + g_context.oldItems = items_oldNode + g_context.newItems = items_newNode + if g_context.set and items_all: + # Settings will have a big impact and need to be confirmed + confirmItem = { + "CheckCrontabLeft": "Clear om_monitor in crond service", + "CheckDirLeft": "Delete all file in '/opt/huawei/Bigdata/'," + "'/var/log/Bigdata/','/home/omm/'", + "CheckProcessLeft": "Kill all process with gaussdb and omm user", + "CheckOmmUserExist": "Delete system user omm", + "CheckPortConflict": "kill all process with occupies " + "the 25xxx port" + } + confirmMsg = "" + for item in items_all: + if item['name'] in list(confirmItem.keys()): + confirmMsg += confirmItem[item['name']] + "\n" + if item['name'] in SETITEM_SKIP: + g_context.skipSetItem.append(item['name']) + + if confirmMsg: + confirmMsg = "Warning: Executing the settings will do " \ + "the following at the [%s] node:\n" % \ + ','.join(g_context.newNodes) + confirmMsg + __printOnScreen(confirmMsg) + flag = input("Execution settings? (Y/N):") + while True: + # If it is not yes or all, it has been imported + if not flag.upper() in ("Y", "N", "YES", "NO"): + flag = input("Please type 'yes' or 'no': ") + continue + break + if flag.upper() in ("Y", "YES"): + pass + if flag.upper() in ("N", "NO"): + for Item in g_context.newItems: + if Item['name'] in list(confirmItem.keys()): + g_context.newItems.remove(Item) + g_context.skipSetItem.append(Item['name']) + __printOnScreen( + 'Skip the settings for [%s]' + % ','.join(g_context.skipSetItem)) + if failedItems: + raise ParseItemException(failedItems) + if not g_context.items: + raise CheckException("No check item can be performed," + " please confirm the input parameters.") + + # print message on screen + __printOnScreen("Parsing the check items config file successfully") + getRootUserPwd() + g_context.getMapping() + g_context.dump() + + +def dispatchCached(): + # dispatch the context file to remote node + if (not g_opts.localMode and not g_opts.distributing and + not g_opts.isSingle): + g_logger.debug("Start to distributing the check context dump file") + g_context.dispatch(__getRemoteNodes(g_context.nodes)) + # print message on screen + __printOnScreen( + "Distribute the context file to remote hosts successfully") + + +def __printOnScreen(msg): + """ + function: print message on screen + """ + if g_opts.localMode or g_opts.distributing: + return + g_logger.info(msg) + + +def __isRoot(): + """ + function: whether the item running under root user + """ + return os.getuid() == 0 + + +def __hasRootItems(): + """ + function: whether the items required root privileges + """ + return g_context.rootItems is not None and len(g_context.rootItems) > 0 + + +def __isDistributing(): + """ + function: whether execution is distributing + """ + return g_opts.distributing + +def __getLocalNode(nodes): + """ + function: get local node + """ + if nodes: + for n in nodes: + if SharedFuncs.is_local_node(n): + return n + return DefaultValue.GetHostIpOrName() + + +def __getSeparatedValue(value, separator=","): + ''' + get command line value which were separated by "," + ''' + if separator not in value: + return [value] + return value.split(separator) + + +def __getNodesFromFile(fileName): + """ + function: get nodes information from hostFile + """ + lines = [] + try: + with open(fileName, 'r') as fp: + for line in [line.strip().rstrip('\n') for line in fp]: + if not line or line in lines or line.startswith('#'): + continue + lines.append(line.strip()) + except Exception as e: + raise Exception(str(e)) + return lines + + +def __retryConnection(host, user): + """ + function: try to connect remote node again + """ + # Try connecting to the remote node three times + for i in range(3): + passwd = getpass.getpass( + "Please enter password for user[%s] on the node[%s]:" + % (user, host)) + isOK = SharedFuncs.verifyPasswd(host, user, passwd) + if isOK: + return passwd + else: + continue + raise CheckException( + "Verify password failed for user[%s] on the node[%s]" % (user, host)) + + +def __getMpprcFile(): + """ + function: get separated environment variables + """ + # get mpprc file + envValue = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH") + if envValue is not None and os.path.isfile(envValue): + return envValue + elif not __isRoot() and DefaultValue.getEnv('GAUSS_ENV'): + cmd = "echo ~ 2>/dev/null" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise CheckException( + "Fetching user environment variable file failed." + " Please setup environment variables." + "The cmd is %s" % cmd) + else: + return os.path.join(output, ".bashrc") + elif __isRoot() and g_context.user: + cmd = "su - %s -c 'echo ~ 2>/dev/null'" % g_context.user + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise CheckException( + "Failed to get user [%s] home directory. Error: %s\n" % ( + g_context.user, output) + "The cmd is %s" % cmd) + else: + return os.path.join(output, ".bashrc") + elif __isRoot(): + return "" + else: + raise CheckException("The separated mpprc file was not found." + " Please setup environment variables") + + +def __getUserAndPwd(node): + """ + function: get username and password for certain node + """ + if __hasRootItems() and not __isRoot(): + return (g_opts.pwdMap[node][0], g_opts.pwdMap[node][1]) + else: + return (g_context.user, None) + + +def __getRemoteNodes(hosts): + ''' + function: get the remote host ignore the local host + ''' + return [h for h in hosts if not SharedFuncs.is_local_node(h)] + + +def __parseScene(sceneName): + ''' + function: parse scene configure file + ''' + if not sceneName: + raise NotEmptyException("scene name") + # Get scene xml + xmlFile = "%s/config/scene_%s.xml" % (g_context.basePath, sceneName) + if not os.path.isfile(xmlFile): + raise SceneNotFoundException(sceneName, g_context.supportScenes) + + domTree = ETree.parse(xmlFile) + rootNode = domTree.getroot() + + itemNames = [] + thresholds = {} + + # parse items from allow items + for elem in rootNode.findall('allowitems/item'): + elemName = elem.attrib['name'] + # check the check item whether exist or not + if elemName not in list(g_context.supportItems.keys()): + raise NotExistException("elemName", "support items") + # save threshold as text and parse them later + subElem = elem.find('threshold') + if subElem is not None: + thresholds[elemName] = subElem.text.strip() + itemNames.append(elemName) + + # parse categories and get all items + for category in rootNode.findall('allowcategories/category'): + cpath = "%s/items/%s" % (g_context.basePath, category.attrib['name']) + if os.path.isdir(cpath): + itemNames.extend(x[:-3] for x in os.listdir(cpath) if + x[:-3] not in itemNames and x.endswith(".py")) + + # parse deny items + for elem in rootNode.findall('denyitems/item'): + elemName = elem.attrib['name'] + if elemName in itemNames: + itemNames.remove(elemName) + + items = [] + failedItems = [] + for i in itemNames: + item = __parseOneItem(i) + if (not item): + failedItems.append(i) + + # overwrite the threshold parameters + if thresholds and i in list(thresholds.keys()): + # parse the threshold of check item + sceneThreshold = __parseThreshold(thresholds[i]) + if item['threshold']: + item['threshold'] = dict(item['threshold'], **sceneThreshold) + else: + item['threshold'] = sceneThreshold + items.append(item) + return (items, failedItems) + + +def __parseOneItem(itemName): + ''' + function: parse one check item and get the full information + ''' + if not itemName: + raise NotEmptyException("Item name") + item = {} + # try to load check item configuration from xml file + xmlFile = "%s/config/items.xml" % g_context.basePath + for event, elem in ETree.iterparse(xmlFile): + if event == 'end': + if elem.tag == 'checkitem' and elem.attrib['name'] == itemName: + # Parse the xml file + item['id'] = elem.attrib['id'] + item['name'] = elem.attrib['name'] + + item['title_zh'] = __parseAttr(elem, "title", "zh") + item['title_en'] = __parseAttr(elem, "title", "en") + item['suggestion_zh'] = __parseAttr(elem, "suggestion", "zh") + item['suggestion_en'] = __parseAttr(elem, "suggestion", "en") + item['standard_zh'] = __parseAttr(elem, "standard", "zh") + item['standard_en'] = __parseAttr(elem, "standard", "en") + item['category'] = __parseProperty(elem, 'category', 'other') + item['permission'] = __parseProperty(elem, 'permission', + 'user') + item['set_permission'] = __parseProperty(elem, + 'set_permission', + 'user') + item['scope'] = __parseProperty(elem, 'scope', 'all') + item['analysis'] = __parseProperty(elem, 'analysis', + 'default') + # Get the threshold + threshold = elem.find('threshold') + if threshold is not None and threshold.text is not None: + # parse the threshold of check item + item["threshold"] = __parseThreshold( + threshold.text.strip()) + break + return item + + +def __parseAttr(elem, attr, language='zh'): + ''' + function: parse the xml attr with language + ''' + val = elem.find('/'.join([attr, language])) + if val is not None and val.text is not None: + return val.text.strip().encode('utf-8') + return "" + + +def __parseProperty(elem, propertyName, defaultValue): + ''' + function: parse the property of check item + ''' + prop = elem.find(propertyName) + result = defaultValue + if prop is not None and prop.text is not None: + result = prop.text.strip() + return result + + +def __parseThreshold(value, separator=";"): + ''' + function: parse the threshold of check item + ''' + result = {} + if separator not in value and "=" not in value: + return result + + if separator not in value and "=" in value: + d = value.strip().split('=') + result[d[0]] = d[1] + else: + for v in value.strip().split(separator): + d = v.strip().split('=') + result[d[0]] = d[1] + return result + + +def getMTUValue(node): + global g_mtuMap + # get ip address + # maybe backIP has no trust + nodeName = g_context.hostMapping[node] + if (g_context.cluster and + nodeName in g_context.cluster.getClusterNodeNames()): + addr = g_context.cluster.getDbNodeByName(nodeName).backIps[0] + sshIp = g_context.cluster.getDbNodeByName(nodeName).sshIps[0] + else: + addr = node + sshIp = node + # get all network card information + cmd1 = """printf \"\n\n`/sbin/ifconfig -a`\n\n\" """ + if not g_opts.pwdMap: + output = SharedFuncs.runSshCmd(cmd1, sshIp, g_context.user) + else: + username, passwd = g_opts.pwdMap[node] + if username is None or passwd is None: + raise CheckException("Retrive username and password error.") + output = SharedFuncs.runSshCmdWithPwd(cmd1, sshIp, username, passwd) + # Separate each network card + networkInfoList = output.strip().split('\n\n') + + networkInfo = "" + mtuValue = "" + # find network card by IP + for eachNet in networkInfoList: + if eachNet.find(addr) > 0 and eachNet.find('inet') > 0: + networkInfo = eachNet + break + if not networkInfo: + raise CheckException( + "Failed to get network card information with '%s'." % node) + # get network number + networkNum = networkInfo.split()[0] + # Remove : if it exists + if networkNum[-1] == ":": + networkNum = networkNum[:-1] + for eachLine in networkInfo.split('\n'): + # get mtu Value with SuSE and redHat6.x + if eachLine.find('MTU') > 0: + mtuValue = eachLine.split(':')[1].split(' ')[0].strip() + break + # get mtu Value with redHat7.x + elif eachLine.find('mtu') > 0: + mtuValue = eachLine.split()[-1] + break + else: + continue + if not networkNum: + raise CheckException( + "Failed to get network card number with '%s'." % node) + if not mtuValue: + raise CheckException( + "Failed to get network card mtu value with '%s' '%s'." + % (node, networkNum)) + # The nodes are grouped by MTU value + if not mtuValue in list(g_mtuMap.keys()): + g_mtuMap[mtuValue] = ["%s-%s" % (node, networkNum)] + else: + g_mtuMap[mtuValue].append("%s-%s" % (node, networkNum)) + + +def preCheck(): + """ + function: preCheck for different scene + input : NA + output: NA + """ + # patch ssh config + if __isRoot(): + cmd = "grep -E '^MaxStartups[\ \t]+1000' /etc/ssh/sshd_config" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + cmd = "sed -i '/MaxStartups/d' /etc/ssh/sshd_config &&" \ + " echo 'MaxStartups 1000' >> /etc/ssh/sshd_config &&" \ + " service sshd reload" + SharedFuncs.runShellCmd(cmd) + + if (g_opts.distributing or g_opts.localMode or + g_opts.isSingle or not g_context.nodes): + return + # Check all node MTU value + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(getMTUValue, g_context.nodes) + pool.close() + pool.join() + except Exception as e: + raise Exception(str(e)) + # According to the number of groups to determine whether the same + if len(list(g_mtuMap.keys())) > 1: + warningMsg = "Warning: The MTU value is inconsistent on all node," \ + " maybe checking will be slower or hang." + for mtuValue in list(g_mtuMap.keys()): + warningMsg += "\n%s: [%s]" % ( + mtuValue, ','.join(g_mtuMap[mtuValue])) + __printOnScreen(warningMsg) + + +def analysisResult(item): + global g_itemResult + outputPath = g_context.tmpPath + checkID = g_context.checkID + itemName = item['name'] + files = [] + content = "" + for n in g_context.nodes: + fileName = "%s/%s_%s_%s.out" % ( + outputPath, itemName, g_context.hostMapping[n], checkID) + files.append(fileName) + for f in files: + content += "".join(g_file.readFile(f)) + itemResult = __analysisResult(content, itemName) + g_itemResult[itemName] = [itemResult, itemResult.formatOutput()] + + +def doCheck(): + """ + function: do check process + input : NA + output: NA + """ + # Local mode + if g_opts.localMode: + if __isDistributing(): + # load check item dynamic and get the execute result + doRunCheck() + else: + if not __hasRootItems() or __isRoot(): + # load check item dynamic and get the execute result + doRunCheck() + else: + # check with root privileges + doRootCheck() + else: + # watching the threads and response for Ctrl+C signal + Watcher() + threads = [] + __printOnScreen( + "Start to health check for the cluster. Total Items:%s Nodes:%s" + % (len(g_context.items), len(g_context.nodes))) + for n in g_context.nodes: + t = CheckThread("%s Thread" % n, doLocalCheck, n) + threads.append(t) + + items = g_context.items + itemCount = len(items) + itemsName = [i['name'] for i in items] + outputPath = g_context.tmpPath + nodes = g_context.nodes[:] + checkID = g_context.checkID + # init progress display + progress_manager = MultiProgressManager() + progress_manager.put('Checking...', LineProgress(total=itemCount, + title='Checking...')) + # fix the display format for progress bar + newLine = '\n' + print(newLine) + # Check the number of completed nodes + overNodes = 0 + # Time to hit the log + LogCount = 0 + lastTimeProgress = -1 + while len(nodes) and datetime.now() <= g_endTime: + totleCount = 0 + slowNode = [] + for node in nodes: + # Get user and password + username, passwd = __getUserAndPwd(node) + if node in g_context.oldNodes: + itemCount_node = len(g_context.oldItems) + else: + itemCount_node = len(g_context.newItems) + # Local execution + if SharedFuncs.is_local_node(node): + checkCount = SharedFuncs.checkComplete( + checkID, node, g_context.hostMapping[node], + g_context.user, g_context.tmpPath) + # Executed in new node scene + elif node in g_context.newNodes: + checkCount = SharedFuncs.checkComplete( + checkID, node, g_context.hostMapping[node], username, + g_context.tmpPath, passwd) + else: + checkCount = SharedFuncs.checkComplete( + checkID, node, g_context.hostMapping[node], + g_context.user, g_context.tmpPath) + try: + checkCount = int(checkCount.strip()) + except Exception: + checkCount = 0 + # If there is a node check completed, + # some nodes just started,record slow node + if overNodes > 0 and checkCount < 2: + slowNode.append(node) + if checkCount == itemCount_node: + nodes.remove(node) + # Record the number of completed nodes + overNodes += 1 + if not SharedFuncs.is_local_node(node): + if node in g_context.newNodes: + outItems = [] + for i in itemsName: + outItems.append("%s/%s_%s_%s.out" % ( + outputPath, i, + g_context.hostMapping[node], + checkID)) + SharedFuncs.receiveFile(outItems, node, username, + outputPath, passwd) + else: + fileName = "%s/*_%s_%s.out" % ( + outputPath, g_context.hostMapping[node], + checkID) + # Delete Files + SharedFuncs.receiveFile(fileName, node, + g_context.user, + outputPath) + else: + totleCount += checkCount + # All nodes check the number of completed + totleCount += itemCount * overNodes + + # Timed and counted + time.sleep(1) + LogCount += 1 + # Update execution progress + progressInfo = totleCount // len(g_context.nodes) + # Refresh only as the schedule changes + if lastTimeProgress < progressInfo <= itemCount: + progress_manager.update("Checking...", progressInfo) + lastTimeProgress = progressInfo + # Suggest the slow node to log every 30 seconds + if slowNode and itemCount > 1 and LogCount % 30 == 0: + logMsg = "Warning: The node [%s] check progress" \ + " is slow." % ",".join(slowNode) + g_logger.debug(logMsg) + + for t in threads: + if t.exitcode == 1: + raise ThreadCheckException(t.name, t.exception) + + for t in threads: + t.join(1) + + if datetime.now() > g_endTime: + raise TimeoutException(nodes) + + __printOnScreen("Start to analysis the check result") + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(analysisResult, g_context.items) + pool.close() + pool.join() + except Exception as e: + raise Exception(str(e)) + for item in g_context.items: + g_result.append(g_itemResult[item['name']][0]) + print(g_itemResult[item['name']][1]) + + __printOnScreen("Analysis the check result successfully") + + +def doRunCheck(): + """ + function: load check item dynamic and get the execute result + input : NA + output: NA + """ + outputPath = g_context.tmpPath + localHost = __getLocalNode(g_context.nodes) + if localHost in g_context.newNodes: + items = g_context.newItems + else: + items = g_context.oldItems + if g_context.hostMapping: + localHost = g_context.hostMapping[localHost] + for item in items: + content = "" + modPath = g_context.supportItems[item['name']] + checker = CheckItemFactory.createItem(item['name'], modPath, + item['scope'], item['analysis']) + checker.runCheck(g_context, g_logger) + + # for local run get the content + fileName = "%s/%s_%s_%s.out" % ( + outputPath, item['name'], localHost, g_context.checkID) + + content += "".join(g_file.readFile(fileName)) + itemResult = __analysisResult(content, item['name']) + g_result.append(itemResult) + # run the check process distributing and no need to clean the resource + if __isDistributing(): + g_logger.debug("run check items done and exit the command") + if g_opts.format == 'default': + # Initialize the self.clusterInfo variable + print(g_result.outputRaw()) + + +def doRootCheck(): + """ + function: check with root privileges + input : NA + output: NA + """ + # get local node + host = __getLocalNode(g_context.nodes) + # prepare the command for running check + cmd = __prepareCmd(g_context.items, g_context.user, g_context.checkID) + # run root cmd + output = SharedFuncs.runRootCmd(cmd, g_opts.pwdMap[host][0], + g_opts.pwdMap[host][1], g_context.mpprc) + print(output) + + +def __prepareCmd(items, user, checkid): + """ + function: prepare the command for running check + """ + cmdPath = os.path.realpath(os.path.dirname(__file__)) + itemsName = [i['name'] for i in items] + userParam = "" + checkIdParam = "" + routingParam = "" + if user: + userParam = " -U %s " % user + if checkid: + checkIdParam = " --cid=%s " % checkid + if g_context.routing: + routingParam = "--routing %s" % g_context.routing + cmd = "%s/gs_check -i %s %s %s -L %s -o %s -l %s" % ( + cmdPath, ",".join(itemsName), userParam, checkIdParam, + routingParam, g_context.tmpPath, g_context.logFile) + return cmd + + +def doLocalCheck(host): + """ + function: running check on different threads + input : NA + output: NA + """ + # prepare the command for running check + if host in g_context.oldNodes: + cmd = __prepareCmd(g_context.oldItems, g_context.user, + g_context.checkID) + else: + cmd = __prepareCmd(g_context.newItems, "", g_context.checkID) + if SharedFuncs.is_local_node(host): + if __hasRootItems(): + SharedFuncs.runRootCmd(cmd, g_opts.pwdMap[host][0], + g_opts.pwdMap[host][1], g_context.mpprc) + else: + SharedFuncs.runShellCmd(cmd, g_context.user, g_context.mpprc) + else: + if not __hasRootItems(): + SharedFuncs.runSshCmd(cmd, host, g_context.user, g_context.mpprc) + else: + # get username and password for certain node + username, passwd = __getUserAndPwd(host) + if host in g_context.newNodes: + SharedFuncs.runSshCmdWithPwd(cmd, host, username, passwd) + else: + SharedFuncs.runSshCmdWithPwd(cmd, host, username, passwd, + g_context.mpprc) + + +def __analysisResult(output, itemName): + """ + function: analysis the check result + """ + item_result = ItemResult.parse(output) + if not item_result: + raise CheckException("analysis result occurs error") + try: + # load support item + mod_path = g_context.supportItems[itemName] + checker = CheckItemFactory.createFrom(itemName, mod_path, g_context) + # analysis the item result got from each node + item_result = checker.postAnalysis(item_result) + except Exception as e: + raise CheckException(str(e)) + return item_result + + +def moveLogFile(host): + tmpLog = os.path.join(g_context.tmpPath, "log/gs_check.log") + SharedFuncs.receiveFile(g_context.logFile, host, g_context.user, + tmpLog[:-4] + "_" + host + ".log") + + +def formatOutput(): + """ + function: format and zip the result package + input : NA + output: NA + """ + if g_opts.distributing or not g_result: + return + + try: + # output the result to a file + resultFile = os.path.join(g_context.tmpPath, + "CheckResult_%s" % g_context.checkID) + g_file.createFile(resultFile, True) + g_file.writeFile(resultFile, [g_result.outputResult()]) + except Exception as e: + if os.path.exists(resultFile): + g_file.removeFile(resultFile) + g_logger.info("Warning! Generate check result output file failed.") + g_logger.debug(str(e)) + + if g_opts.localMode: + return + + # export the check result to excel file in output folder, + # only export excel for certain scene + scene = '_' + g_opts.scene if g_opts.scene else "" + + # collect the log file from remote host + tmpLog = os.path.join(g_context.tmpPath, "log/gs_check.log") + # Get the log file + if g_opts.logFile or g_opts.cluster: + g_file.cpFile(g_context.logFile, tmpLog[:-4] + "_" + + DefaultValue.GetHostIpOrName() + ".log") + else: + g_file.moveFile(g_context.logFile, tmpLog[:-4] + "_" + + DefaultValue.GetHostIpOrName() + ".log") + hosts = __getRemoteNodes(g_context.nodes) + if hosts: + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(moveLogFile, hosts) + pool.close() + pool.join() + except Exception as e: + g_logger.info( + "Warning! Retrieve log file from remote host failed.") + g_logger.debug(str(e)) + + # move the *.out file to nodes folder + outputFolder = g_context.tmpPath + checkID = g_context.checkID + cmd = "cd %s; find . -name \'*%s.out\' -exec mv {} %s \;"\ + % (g_context.tmpPath, checkID, os.path.join(outputFolder, "nodes")) + SharedFuncs.runShellCmd(cmd, g_context.user) + + # No check result is generated when the output is specified as /dev/null + if g_context.outPath == "/dev/null": + print(g_result.outputStatistic()) + print("The inspection report has been cleared by /dev/null.") + return + tarFile = "%s/CheckReport%s_%s.tar.gz" %\ + (g_context.outPath, scene, g_context.checkID) + # tar the output for this check + tarFiles = '' + if (__checkFileExist(os.path.join(outputFolder, "nodes"), + '%s.out' % checkID)): + tarFiles += ' nodes ' + if __checkFileExist(os.path.join(outputFolder, "log"), '.log'): + tarFiles += ' log ' + if __checkFileExist(outputFolder, '%s.zip' % checkID): + tarFiles += ' *%s.zip ' % checkID + if __checkFileExist(outputFolder, 'CheckResult_%s' % checkID): + tarFiles += ' CheckResult_%s ' % checkID + tarcmd = "cd %s;tar -zcf %s %s 2>&1; chmod %s '%s'" \ + % (outputFolder, tarFile, tarFiles, + DefaultValue.KEY_FILE_MODE, tarFile) + SharedFuncs.runShellCmd(tarcmd, g_context.user) + + if g_opts.format == 'default': + print(g_result.outputStatistic()) + print("For more information please refer to %s" + % os.path.join(outputFolder, tarFile)) + + if g_opts.format == 'json': + print(g_result.outputJson()) + + +def __checkFileExist(path, filePattern): + # Check the file exists + cmd = "cd %s; ls | grep '%s' | wc -l" % (path, filePattern) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and output != "0": + return True + else: + return False + + +def killChildProcess(node): + checkID = g_context.checkID + # cmd with switch users + cmd_switch = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'""" \ + """|awk '{print \$2}'` """ % checkID + cmd_switch += """ && (if [ X\"$proc_pid_list\" != X\"\" ]; """ \ + """then echo \"$proc_pid_list\" | xargs kill -9 ; fi)""" + # cmd with not switch users + cmd_current = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'""" \ + """|awk "{print \\\$2}"` """ % checkID + cmd_current += """ && (if [ X"$proc_pid_list" != X"" ]; then """ \ + """echo "$proc_pid_list" | xargs kill -9 ; fi)""" + + username, passwd = __getUserAndPwd(node) + if SharedFuncs.is_local_node(node) and not __hasRootItems(): + SharedFuncs.runShellCmd(cmd_current) + elif __hasRootItems(): + SharedFuncs.runSshCmdWithPwd(cmd_switch, node, username, passwd) + else: + SharedFuncs.runSshCmd(cmd_current, node, g_context.user) + + +def cleanTmpDir(node): + # clean tmp files in all the nodes + cmd = r"rm -rf %s" % g_context.tmpPath + if SharedFuncs.is_local_node(node): + SharedFuncs.runShellCmd(cmd) + else: + SharedFuncs.runSshCmd(cmd, node, g_context.user) + + +def cleanEnvironment(skiplog=False): + """ + function: clean the environment + input : NA + output: NA + """ + if __isDistributing(): + return + if not g_context.tmpPath: + return + if not g_context.nodes: + return + + # kill child process on all hosts when exception(skip log) + if skiplog: + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(killChildProcess, g_context.nodes) + pool.close() + pool.join() + except Exception as e: + g_logger.info("Warning! Failed to kill child process.") + g_logger.debug(str(e)) + + # clean tmp files in all the nodes + cmd = r"rm -rf %s" % g_context.tmpPath + if g_opts.localMode: + SharedFuncs.runShellCmd(cmd) + else: + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(cleanTmpDir, g_context.nodes) + pool.close() + pool.join() + except Exception as e: + g_logger.info("Warning! Failed to clear tmp directory.") + g_logger.debug(str(e)) + + +def setTimeOut(): + """ + function: set time out + input : NA + output: NA + """ + global g_endTime + # end time + g_endTime = datetime.now() + timedelta(seconds=g_opts.timeout) + + +if __name__ == '__main__': + # main function + try: + initGlobal() + parseCommandLine() + checkParameter() + parseCheckContext() + preCheck() + dispatchCached() + doCheck() + formatOutput() + cleanEnvironment() + except (InterruptException, ThreadCheckException, TimeoutException) as e: + g_logger.error(str(e)) + # clean the environment and child process when using Ctrl+C force or + # except or timeout to exit the command + cleanEnvironment(True) + sys.exit(1) + except Exception as e: + if not g_logger: + sys.stdout = sys.stderr + print(str(e)) + else: + g_logger.error(str(e)) + cleanEnvironment() + sys.exit(1) + else: + sys.exit(0) diff --git a/script/gs_checkos b/script/gs_checkos new file mode 100644 index 0000000..d8f083b --- /dev/null +++ b/script/gs_checkos @@ -0,0 +1,1571 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_checkos is a utility to check and set cluster OS information. +############################################################################# + +import os +import sys +import pwd +import time +import subprocess +from datetime import datetime, timedelta + +sys.path.append(sys.path[0] + '/../lib') +from gspylib.threads.SshTool import SshTool +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.os.gsfile import g_file + + +############################################################################# +# Global variables +# gs_checkos_version: the gs_checkos verion number +# g_opts: globle option +# g_logger: globle logger +# g_sshTool: globle ssh interface +# g_OSCheckOpts: options about all checking and setting items +# DEFAULT_INTERVAL: default space number +# CHECK_ITEMNUMLIST: checking item list +# SET_ITEMNUMLIST: setting item list +# LOG_DIR: the log directory about gs_checkos +############################################################################# +g_opts = None +g_logger = None +g_sshTool = None +host = None +g_clusterInfo = None +g_OSCheckOpts = { + 'A1': ['Checking items', '[ OS version status ]', 'Normal', 'OK', 'OK'], + 'A2': ['Checking items', '[ Kernel version status ]', 'Normal', 'OK', + 'OK'], + 'A3': ['Checking items', '[ Unicode status ]', 'Normal', 'OK', 'OK'], + 'A4': ['Checking items', '[ Time zone status ]', 'Normal', 'OK', 'OK'], + 'A5': ['Checking items', '[ Swap memory status ]', 'Normal', 'OK', 'OK'], + 'A6': ['Checking items', '[ System control parameters status ]', 'Normal', + 'OK', 'OK'], + 'A7': ['Checking items', '[ File system configuration status ]', 'Normal', + 'OK', 'OK'], + 'A8': ['Checking items', '[ Disk configuration status ]', 'Normal', 'OK', + 'OK'], + 'A9': ['Checking items', '[ Pre-read block size status ]', 'Normal', 'OK', + 'OK'], + 'A10': ['Checking items', '[ IO scheduler status ]', 'Normal', 'OK', + 'OK'], + 'A11': ['Checking items', '[ Network card configuration status ]', + 'Normal', 'OK', 'OK'], + 'A12': ['Checking items', '[ Time consistency status ]', 'Normal', 'OK', + 'OK'], + 'A13': ['Checking items', '[ Firewall service status ]', 'Normal', 'OK', + 'OK'], + 'A14': ['Checking items', '[ THP service status ]', 'Normal', 'OK', 'OK'], + 'B1': ['Setting items', '[ Set system control parameters ]', 'Normal', + 'OK', 'OK'], + 'B2': ['Setting items', '[ Set file system configuration value ]', + 'Normal', 'OK', 'OK'], + 'B3': ['Setting items', '[ Set pre-read block size value ]', 'Normal', + 'OK', 'OK'], + 'B4': ['Setting items', '[ Set IO scheduler value ]', 'Normal', 'OK', + 'OK'], + 'B5': ['Setting items', '[ Set network card configuration value ]', + 'Normal', 'OK', 'OK'], + 'B6': ['Setting items', '[ Set THP service ]', 'Normal', 'OK', 'OK'], + 'B7': ['Setting items', '[ Set RemoveIPC value ]', 'Normal', 'OK', 'OK'], + 'B8': ['Setting items', '[ Set Session Process ]', 'Normal', 'OK', 'OK']} +DEFAULT_INTERVAL = 60 +CHECK_ITEMNUMLIST = ['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', + 'A10', 'A11', 'A12', 'A13', 'A14'] +SET_ITEMNUMLIST = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8'] +LOG_DIR = "/tmp/gs_checkos" +Local_CheckOs = "" +Local_Check = "" +####################################################### +# action option strings +ACTION_CHECK_OS_VERSION = "Check_OS_Version" +ACTION_CHECK_KERNEL_VERSION = "Check_Kernel_Version" +ACTION_CHECK_UNICODE = "Check_Unicode" +ACTION_CHECK_TIMEZONE = "Check_TimeZone" +ACTION_CHECK_SYSCTL_PARAMETER = "Check_SysCtl_Parameter" +ACTION_CHECK_DISK_CONFIGURE = "Check_Disk_Configure" +ACTION_CHECK_BLOCKDEV_CONFIGURE = "Check_BlockDev_Configure" +ACTION_CHECK_LOGICAL_BLOCK = "Check_Logical_Block" +ACTION_CHECK_IO_REQUEST = "Check_IO_Request" +ACTION_CHECK_ASYNCHRONOUS_IO_REQUEST = "Check_Asynchronous_IO_Request" +ACTION_CHECK_IO_CONFIGURE = "Check_IO_Configure" +ACTION_CHECK_NETWORK_CONFIGURE = "Check_Network_Configure" +ACTION_CHECK_NETWORK_BOND_MODE = "Check_Network_Bond_Mode" +ACTION_CHECK_SWAP_MEMORY_CONFIGURE = "Check_Swap_Memory_Configure" +ACTION_CHECK_FILESYSTEM_CONFIGURE = "Check_FileSystem_Configure" +ACTION_CHECK_TIME_CONSISTENCY = "Check_Time_Consistency" +ACTION_CHECK_FIREWALL_SERVICE = "Check_Firewall_Service" +ACTION_CHECK_THP_SERVICE = "Check_THP_Service" + +ACTION_SET_SYSCTL_PARAMETER = "Set_SysCtl_Parameter" +ACTION_SET_FILESYSTEM_CONFIGURE = "Set_FileSystem_Configure" +ACTION_SET_NETWORK_CONFIGURE = "Set_Network_Configure" +ACTION_SET_THP_SERVICE = "Set_THP_Service" +ACTION_SET_REMOVEIPC_VALUE = "Set_RemoveIPC_Value" +ACTION_SET_SESSION_PROCESS = "Set_Session_Process" +ACTION_SET_BLOCKDEV_CONFIGURE = "Set_BlockDev_Configure" +ACTION_SET_LOGICAL_BLOCK = "Set_Logical_Block" +ACTION_SET_IO_CONFIGURE = "Set_IO_Configure" +ACTION_SET_IO_REQUEST = "Set_IO_REQUEST" +ACTION_SET_ASYNCHRONOUS_IO_REQUEST = "Set_Asynchronous_IO_Request" + + +####################################################### +class CmdOptions(): + """ + init the command options + """ + + def __init__(self): + self.hostnamestr = "" + self.itemstr = "" + self.hostlistfile = "" + self.hostnameList = [] + self.outputfile = "" + self.logFile = "" + self.localLog = "" + self.set = False + self.detail = False + self.detail_all = False + self.item_detail = [] + self.confFile = "" + self.localMode = False + + ######################################################### + + +# Init global log +######################################################### +def initGlobals(): + """ + init the global parameter g_logger and g_sshTool + """ + global g_logger + global g_sshTool + global g_clusterInfo + g_logger = GaussLog(g_opts.logFile, "gs_checkos") + dirName = os.path.dirname(g_opts.logFile) + g_opts.localLog = os.path.join(dirName, DefaultValue.LOCAL_LOG_FILE) + g_sshTool = SshTool(g_opts.hostnameList, g_logger.logFile, + DefaultValue.TIMEOUT_PSSH_CHECK) + g_clusterInfo = dbClusterInfo() + if (g_opts.confFile != ""): + g_clusterInfo.initFromXml(g_opts.confFile) + + +############################################################################# +# Parse and check parameters +############################################################################# +def usage(): + """ +gs_checkos is a utility to check and set cluster OS information. + +Usage: + gs_checkos -? | --help + gs_checkos -V | --version + gs_checkos -i ITEM [-f HOSTFILE] [-h HOSTNAME] [-X XMLFILE] [--detail] [-o OUTPUT] [-l LOGFILE] + +General options: + -i Item number. To check all items, enter "-i A". To set all parameters, enter "-i B". + To check multiple status, enter the items in the following format: "-i A1,A2,A3". + -f File listing names of all the hosts to connect to. The host names are separated by line breaks. + -h Name of the host to connect to. + -X Configuration file of the cluster. + --detail Show detailed information. + -o Save the result to the specified file. + -l Path of log file. + -? --help Show help information for this utility, and exit the command line mode. + -V --version Show version information. + + +Item number description: + 'A1':[ OS version status ] + 'A2':[ Kernel version status ] + 'A3':[ Unicode status ] + 'A4':[ Time zone status ] + 'A5':[ Swap memory status ] + 'A6':[ System control parameters status ] + 'A7':[ File system configuration status ] + 'A8':[ Disk configuration status ] + 'A9':[ Pre-read block size status ] + 'A10':[ IO scheduler status ] + 'A11':[ Network card configuration status ] + 'A12':[ Time consistency status ] + 'A13':[ Firewall service status ] + 'A14':[ THP service status ] + 'B1':[ Set system control parameters ] + 'B2':[ Set file system configuration value ] + 'B3':[ Set pre-read block size value ] + 'B4':[ Set IO scheduler value ] + 'B5':[ Set network card configuration value ] + 'B6':[ Set THP service ] + 'B7':[Set RemoveIPC value] + 'B8':[Set Session Process] + """ + print(usage.__doc__) + + +def parseHostnameOpts(value): + """ + parse hostnames by value + """ + if (len(value) > 1): + for val in value: + val = val.strip() + if val != "" and val not in g_opts.hostnameList: + g_opts.hostnameList.append(val) + else: + g_opts.hostnameList.append(value[0]) + + +def parseItemOpts(itemList): + """ + parse items by value + """ + value = [] + for val in itemList: + if (len(val.split(',')) > 1): + for i in val.split(','): + value.append(i) + else: + value.append(val) + if (len(value) > 1): + for val in value: + val = val.strip().upper() + if (val in CHECK_ITEMNUMLIST or val.upper() == "A" or + val in SET_ITEMNUMLIST or val.upper() == "B"): + if val not in g_opts.item_detail: + g_opts.item_detail.append(val) + else: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50004"] % 'i') + else: + value = value[0].upper() + if (value in CHECK_ITEMNUMLIST or value == "A" or + value in SET_ITEMNUMLIST or value == "B"): + if value not in g_opts.item_detail: + g_opts.item_detail.append(value) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % 'i') + + +def parseCommandLine(): + """ + Parse command line and save to global variable + """ + global g_opts + g_opts = CmdOptions() + + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("checkos") + if (ParaDict.__contains__("helpFlag")): + usage() + sys.exit(0) + if (ParaDict.__contains__("logFile")): + g_opts.logFile = ParaDict.get("logFile") + if (ParaDict.__contains__("confFile")): + g_opts.confFile = ParaDict.get("confFile") + if (ParaDict.__contains__("nodename")): + g_opts.hostnamestr = ParaDict.get("nodename") + if (ParaDict.__contains__("hostfile")): + g_opts.hostlistfile = ParaDict.get("hostfile") + if (ParaDict.__contains__("outFile")): + g_opts.outputfile = ParaDict.get("outFile") + if (ParaDict.__contains__("itemstr")): + g_opts.itemstr = ParaDict.get("itemstr") + if (ParaDict.__contains__("show_detail")): + g_opts.detail = ParaDict.get("show_detail") + + +def readHostFile(hostfile): + """ + read host file to hostlist + """ + try: + with open(hostfile, "r") as fp: + for readline in fp: + hostname = readline.strip().split("\n")[0] + if hostname != "" and hostname not in hostfile: + g_opts.hostnameList.append(hostname) + except Exception as e: + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50204"] % + hostfile + " Error: \n%s" % str(e)) + + +def checkHostList(): + """ + """ + if (g_opts.hostnamestr == "" and g_opts.hostlistfile == ""): + g_opts.hostnameList = [DefaultValue.GetHostIpOrName()] + elif (g_opts.hostnamestr != "" and g_opts.hostlistfile != ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50005"] % ('h', 'f')) + elif (g_opts.hostnamestr == "" and g_opts.hostlistfile != ""): + if (not os.path.isfile(g_opts.hostlistfile)): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50201"] % g_opts.hostlistfile) + else: + readHostFile(g_opts.hostlistfile) + else: + parseHostnameOpts(g_opts.hostnamestr) + + +def checkConfigFile(): + """ + """ + if (g_opts.confFile != ""): + if (not os.path.isfile(g_opts.confFile)): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50201"] % g_opts.confFile) + + +def setLogFile(): + """ + """ + if (g_opts.logFile == ""): + cmd = "(if [ ! -d %s ]; then mkdir -p %s -m %s; fi)" % ( + LOG_DIR, LOG_DIR, DefaultValue.KEY_DIRECTORY_MODE) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50208"] + % "log of gs_checkos" + " Error: \n%s." + % output + "The cmd is %s" % cmd) + g_opts.logFile = os.path.join(LOG_DIR, "gs_checkos.log") + + +def checkItems(): + """ + """ + if (g_opts.itemstr == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "i" + ".") + + +def checkOutputFile(): + """ + """ + try: + if (g_opts.outputfile != ""): + DefaultValue.checkOutputFile(g_opts.outputfile) + except Exception as e: + GaussLog.exitWithError(str(e)) + + +def checkParameter(): + """ + Check parameter from command line + """ + ############################################ + # check hostlist info + ########################################### + checkHostList() + # check config file + checkConfigFile() + if (len(g_opts.hostnameList) == 0): + g_opts.hostnameList = [DefaultValue.GetHostIpOrName()] + g_opts.hostnameList.sort() + checkHostnameList() + ########################################## + # set logfile + ############################################ + setLogFile() + ########################################## + # set items + ############################################ + checkItems() + + parseItemOpts(g_opts.itemstr) + if (("B" in g_opts.item_detail)): + g_opts.set = True + else: + for i in SET_ITEMNUMLIST: + for j in g_opts.item_detail: + if (j == i): + g_opts.set = True + break + if (g_opts.set == True): + if ("A" in g_opts.item_detail): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % 'i' + + " Checking items and setting items" + " can't be used together.") + for i in CHECK_ITEMNUMLIST: + for j in g_opts.item_detail: + if (j == i): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % 'i' + " Checking items and " + "setting items can't be" + " used together.") + + ############################################ + # check output file + ############################################ + checkOutputFile() + + +def doCheckOS(itemNumber): + """ + do the checing ation by item + """ + if (itemNumber == 'A1'): + checkOSVersion() + elif (itemNumber == 'A2'): + checkKernelVersion() + elif (itemNumber == 'A3'): + checkUnicode() + elif (itemNumber == 'A4'): + checkTimeZone() + elif (itemNumber == 'A5'): + checkMemoryUsage() + elif (itemNumber == 'A6'): + checkSysCtlParameter() + elif (itemNumber == 'A7'): + checkFileSystemConfigure() + elif (itemNumber == 'A8'): + checkDiskConfigure() + elif (itemNumber == 'A9'): + checkBlockDevConfigure() + checkLogicalBlock() + elif (itemNumber == 'A10'): + checkIOrequestqueue() + checkMaxAsyIOrequests() + checkIOConfigure() + elif (itemNumber == 'A11'): + checkNetworkConfigure() + elif (itemNumber == 'A12'): + checkTimeConsistency() + elif (itemNumber == 'A13'): + checkFirewallService() + elif (itemNumber == 'A14'): + checkTHPService() + +def doSetOS(itemNumber): + """ + do the setting ation by item + """ + if (itemNumber == 'B1'): + setSysCtlParameter() + elif (itemNumber == 'B2'): + setFileSystemConfigure() + elif (itemNumber == 'B3'): + setLogicalBlock() + setBlockDevConfigure() + elif (itemNumber == 'B4'): + setIOrequestqueue() + setMaxAsyIOrequests() + setIOConfigure() + elif (itemNumber == 'B5'): + setNetworkConfigure() + elif (itemNumber == 'B6'): + setTHPService() + elif (itemNumber == 'B7'): + setRemoveIPCValue() + elif (itemNumber == 'B8'): + setSessionProcess() + + +def checkOSVersion(): + """ + check OS version + make sure that the OS vesion is in SuSE11 sp1/2/3/4, SuSE12 sp0/1/2/3 or Redhat(centos) 6.4/6.5/6.5/6.7/6.8/6.9/7.0/7.1/7.2/7.3/7.4/7.5 + If it is local, skipping. Else the OS version from all nodes are same + information type like this: + True SuSE11SP1 SuSE_11_SP1_64bit + False SuSE SuSE_10_SP4_64bit + """ + g_logger.debug("Checking OS version.") + try: + cmd = "%s -t %s -l %s" % ( + Local_CheckOs, ACTION_CHECK_OS_VERSION, g_opts.localLog) + (status, output, outputMap) = getCmdOutput(cmd) + parRes = "" + detail_msg = "" + for node in list(status.keys()): + outputMap[node] = outputMap[node].strip().split("\n")[0].strip() + for node in list(status.keys()): + if ((status[node] != DefaultValue.SUCCESS)): + g_logger.logExit("[%s]: \n" % node + + ErrorCode.GAUSS_516["GAUSS_51632"] % cmd + + " Error: \n%s" % outputMap[node].strip()) + if ((status[node] == DefaultValue.SUCCESS) and + (outputMap[node].strip().split(' ')[0].strip() + == str(False))): + g_OSCheckOpts['A1'][2] = 'Abnormal' + parRes += "[%s]\n%s\n" % ( + node, outputMap[node].strip().split(' ')[2].strip()) + detail_msg += "[%s]\n%s [Abnormal]\n" % ( + node, outputMap[node].strip().split(' ')[2].strip()) + + if ((parRes == "") and (len(list(status.keys())) > 1)): + nodeValue = list(status.keys())[0].strip() + mixedType = outputMap[nodeValue].strip().split(' ')[1].strip() + platformStr = outputMap[nodeValue].strip().split(' ')[2].strip() + sshresult = "" + if (mixedType == ""): + g_OSCheckOpts['A1'][2] = 'Abnormal' + parRes += "[%s]\n%s\n" % ( + nodeValue, "Failed to obtain platform information.") + detail_msg += "[%s]\n%s [Abnormal]\n" % ( + nodeValue, "Failed to obtain platform information.") + else: + for node in list(status.keys()): + if ((status[node] == DefaultValue.SUCCESS) and ( + outputMap[node].strip().split(' ')[ + 1].strip() != mixedType)): + g_OSCheckOpts['A1'][2] = 'Abnormal' + sshresult = "failed" + parRes += "[%s]\n%s\n" % ( + node, outputMap[node].strip().split(' ')[2].strip()) + detail_msg += "[%s]\n%s [Abnormal]\n" % ( + node, outputMap[node].strip().split(' ')[2].strip()) + if (sshresult == "failed"): + parRes = "[%s]\n%s\n%s" % (nodeValue, platformStr, parRes) + if (g_OSCheckOpts['A1'][2] == 'Abnormal'): + g_OSCheckOpts['A1'][3] = "\n%s" % parRes + else: + parRes = "" + for node in list(status.keys()): + parRes += " [%s]\n %s\n" % ( + node, outputMap[node].strip().split(' ')[2].strip()) + detail_msg += " [%s]\n %s [Normal]\n" % ( + node, outputMap[node].strip().split(' ')[2].strip()) + g_OSCheckOpts['A1'][3] = "\n%s" % parRes + g_OSCheckOpts['A1'][4] = "\n%s" % detail_msg + + except Exception as e: + g_logger.debug(str(e)) + g_logger.debug("Successfully checked OS version.") + + +def performEnvCheck(action_item, failed_mesg, success_mesg, item_num, + warning_level, configFile=""): + """ + action_item : action item + failed_mesg : failed message information + success_mesg : success message information + item_num : item number + warning_level : warning level, warning or Abnormal + """ + ssh_config_tmp = "" + outputMap = {} + try: + if (configFile != ""): + cmd = "%s -t %s -X '%s' -l '%s'" % ( + Local_CheckOs, action_item, configFile, g_opts.localLog) + else: + cmd = "%s -t %s -l '%s'" % ( + Local_CheckOs, action_item, g_opts.localLog) + + if action_item == ACTION_CHECK_UNICODE: + try: + # Generate an empty file as a configuration file + ssh_config_tmp = getTmpFile() + g_file.createFile(ssh_config_tmp) + + # Execute the ssh command with an empty configuration file when checking the encoding format + (status, output, outputMap) = getCmdOutput(cmd, + ssh_config_tmp) + + # Clean up the generated empty configuration file + g_file.removeFile(ssh_config_tmp) + except Exception as e: + if os.path.exists(ssh_config_tmp): + g_file.removeFile(ssh_config_tmp) + g_logger.debug( + "Execute ssh cmd [%s] with ssh_config exception." % cmd) + raise Exception(str(e)) + else: + (status, output, outputMap) = getCmdOutput(cmd) + parRes = "" + detail_msg = "" + + for node in list(status.keys()): + if (status[node] != DefaultValue.SUCCESS): + g_logger.logExit("[%s]: \n" % node + + ErrorCode.GAUSS_516["GAUSS_51632"] % cmd + + " Error: \n%s" % outputMap[node].strip()) + if (outputMap[node].strip() == ''): + raise Exception("[%s]\n%s\n" % (node, "The result is Null")) + if ("not same" in outputMap[node].strip()): + raise Exception( + "[%s]\n%s\n" % (node, outputMap[node].strip())) + + if ((len(list(status.keys())) > 1)): + nodeValue = list(status.keys())[0].strip() + keystr = outputMap[nodeValue].strip().split()[1].strip() + sshresult = "" + if (keystr == ""): + g_OSCheckOpts[item_num][2] = '%s' % warning_level + parRes += " [%s]\n %s\n" % ( + nodeValue, failed_mesg) + detail_msg += " [%s]\n %s\n" % ( + nodeValue, failed_mesg) + else: + for node in list(status.keys()): + if ((status[node] == DefaultValue.SUCCESS) + and (outputMap[node].strip().split()[1].strip() + != keystr)): + g_OSCheckOpts[item_num][2] = warning_level + sshresult = "failed" + parRes += " [%s]\n %s\n" % ( + node, outputMap[node].strip().split()[1].strip()) + detail_msg += " [%s]\n %s [%s]\n" % ( + node, outputMap[node].strip().split()[1].strip(), + warning_level) + else: + detail_msg += " [%s]\n %s [%s]\n" % ( + node, outputMap[node].strip().split()[1].strip(), + "Normal") + if (sshresult == "failed"): + parRes = " [%s]\n %s\n%s" % ( + nodeValue, keystr, parRes) + + if (g_OSCheckOpts[item_num][2] == warning_level): + g_OSCheckOpts[item_num][3] = "\n%s" % parRes + keystr = "" + else: + nodeValue = list(status.keys())[0].strip() + keystr = outputMap[nodeValue].strip().split()[1].strip() + g_OSCheckOpts[item_num][3] = "\n %s The value is \"%s\"."\ + % (success_mesg, keystr) + + if g_opts.detail_all: + if (item_num == "A11") and (keystr == "Null"): + g_OSCheckOpts[item_num][4] = "OK" + else: + g_OSCheckOpts[item_num][4] = "\n%s" % detail_msg + + except Exception as e: + g_logger.debug(str(e)) + g_logger.debug("Output: \n%s" % outputMap) + g_OSCheckOpts[item_num][2] = warning_level + g_OSCheckOpts[item_num][3] = "\n %s" % failed_mesg + g_OSCheckOpts[item_num][4] = "\n %s" % failed_mesg + + +def checkKernelVersion(): + """ + Checking kernel version + """ + g_logger.debug("Checking kernel version.") + performEnvCheck(ACTION_CHECK_KERNEL_VERSION, + "Failed to obtain kernel version information.", + "The names about all kernel versions are same.", "A2", + "Warning") + g_logger.debug("Successfully checked kernel version.") + + +def checkUnicode(): + """ + Checking unicode value + """ + g_logger.debug("Checking unicode value.") + performEnvCheck(ACTION_CHECK_UNICODE, "Failed to obtain unicode value.", + "The values of all unicode are same.", "A3", "Abnormal") + g_logger.debug("Successfully checked unicode value.") + + +def checkTimeZone(): + """ + Checking timezone information + """ + g_logger.debug("Checking timezone information.") + performEnvCheck(ACTION_CHECK_TIMEZONE, + "Failed to obtain timezone information.", + "The informations about all timezones are same.", "A4", + "Abnormal") + g_logger.debug("Successfully checked timezone information.") + + +def performCheckorSetOS(action_item, flag_str, success_mesg, item_num, + warning_level, configFile="", command="", + parameters=""): + """ + action_item : action item + flag_str : failed flag string + success_mesg : success message information + item_num : item number + warning_level : warning level, warning or Abnormal + parameters : multiple strings of parameters + """ + try: + if command == "": + command = Local_CheckOs + if (configFile != ""): + cmd = "%s -t %s -X '%s' -l '%s' %s" % ( + command, action_item, configFile, g_opts.localLog, parameters) + else: + cmd = "%s -t %s -l '%s' %s" % ( + command, action_item, g_opts.localLog, parameters) + (status, output, outputMap) = getCmdOutput(cmd) + parRes = "" + detail_msg = "" + for node in list(status.keys()): + if (status[node] != DefaultValue.SUCCESS): + g_logger.logExit(" [%s]: \n" % node + + ErrorCode.GAUSS_516["GAUSS_51632"] % cmd + + " Error: \n%s" % outputMap[node].strip()) + if (outputMap[node].find(flag_str) >= 0): + g_OSCheckOpts[item_num][2] = warning_level + parRes += " [%s]\n%s\n" % (node, outputMap[node]) + else: + detail_msg += " [%s]\n%s\n" % (node, outputMap[node]) + if (outputMap[node].find("Warning reason") >= 0 + and outputMap[node].find("Abnormal") < 0): + if (g_OSCheckOpts[item_num][2] != "Abnormal"): + g_OSCheckOpts[item_num][2] = "Warning" + parRes += " [%s]\n%s\n" % (node, outputMap[node]) + if (parRes != ""): + raise Exception("%s" % parRes) + if (g_OSCheckOpts[item_num][2] != warning_level): + g_OSCheckOpts[item_num][3] = "\n %s" % success_mesg + if g_OSCheckOpts[item_num][4] != "OK": + g_OSCheckOpts[item_num][4] = formatResault( + detail_msg, g_OSCheckOpts[item_num][4]) + else: + g_OSCheckOpts[item_num][4] = "\n%s" % detail_msg + except Exception as e: + g_logger.debug(str(e)) + if (g_OSCheckOpts[item_num][2] == "Normal"): + g_OSCheckOpts[item_num][2] = '%s' % warning_level + g_OSCheckOpts[item_num][3] = "\n%s" % str(e) + g_OSCheckOpts[item_num][4] = "\n%s" % str(e) + + +def formatResault(result_Str1, result_Str2): + """ + get add result of to dict + """ + result = "" + result_list1 = result_Str1.strip("\n").split("\n\n") + result_list2 = result_Str2.strip("\n").split("\n\n") + for nodeInfo1 in result_list1: + result += ("\n" + nodeInfo1.split("\n")[0]) + result += ("\n" + "\n".join(nodeInfo1.split("\n")[1:])) + for nodeInfo2 in result_list2: + if nodeInfo1.split("\n")[0].strip() == nodeInfo2.split("\n")[0].strip(): + result += ("\n" + "\n".join(nodeInfo2.split("\n")[1:]) + "\n") + return result + + +def checkMemoryUsage(): + """ + Checking swap memory value + """ + g_logger.debug("Checking swap memory value.") + performCheckorSetOS(ACTION_CHECK_SWAP_MEMORY_CONFIGURE, "SwapMemory", + "The value about swap memory is correct.", "A5", + "Warning") + g_logger.debug("Successfully checked swap memory value.") + + +def checkSysCtlParameter(): + """ + Checking system control parameter value + """ + g_logger.debug("Checking system control parameter value.") + performCheckorSetOS(ACTION_CHECK_SYSCTL_PARAMETER, "Abnormal", + "All values about system control parameters are correct.", + "A6", "Abnormal", '', Local_Check) + g_logger.debug("Successfully checked system control parameter value.") + + +def checkFileSystemConfigure(): + """ + Checking file system configuration information + """ + g_logger.debug("Checking file system configuration information.") + performCheckorSetOS(ACTION_CHECK_FILESYSTEM_CONFIGURE, "Abnormal", + "Both soft nofile and hard nofile are correct.", "A7", + "Abnormal", '', Local_Check) + g_logger.debug( + "Successfully checked file system configuration information.") + + +def checkDiskConfigure(): + """ + Checking disk configuration value + """ + g_logger.debug("Checking disk configuration value.") + performCheckorSetOS(ACTION_CHECK_DISK_CONFIGURE, "filesystem", + "The value about XFS mount parameters is correct.", + "A8", "Warning") + g_logger.debug("Successfully checked disk configuration value.") + + +def checkBlockDevConfigure(): + """ + Checking Pre-read block size value + """ + g_logger.debug("Checking Pre-read block size value.") + performCheckorSetOS(ACTION_CHECK_BLOCKDEV_CONFIGURE, "blockdev", + "The value about Pre-read block size is correct.", + "A9", "Abnormal") + g_logger.debug("Successfully checked Pre-read block size value.") + + +def checkLogicalBlock(): + """ + Checking Logical Block size value + """ + g_logger.debug("Checking Logical Block size value.") + performCheckorSetOS(ACTION_CHECK_LOGICAL_BLOCK, "logical_block_size", + "The value about Logical block size is correct.", + "A9", "Abnormal") + g_logger.debug("Successfully checked Logical block size value.") + + +def checkIOrequestqueue(): + """ + Checking IO request queue value + """ + g_logger.debug("Checking IO request queue value.") + performCheckorSetOS(ACTION_CHECK_IO_REQUEST, "request", + "The value of IO request queue is correct.", "A10", + "Abnormal") + g_logger.debug("Successfully checked IO request queue value.") + + +def checkMaxAsyIOrequests(): + """ + Checking Asynchronous IO request queue value + """ + g_logger.debug("Checking Asynchronous IO request queue value.") + performCheckorSetOS(ACTION_CHECK_ASYNCHRONOUS_IO_REQUEST, "aio-max-nr", + "The value of Asynchronous IO request queue is correct.", + "A10", "Abnormal", g_opts.confFile) + g_logger.debug( + "Successfully checked Asynchronous IO request queue value.") + + +def checkIOConfigure(): + """ + Checking IO scheduler value + """ + g_logger.debug("Checking IO scheduler value.") + performCheckorSetOS(ACTION_CHECK_IO_CONFIGURE, "scheduler", + "The value of IO scheduler is correct.", "A10", + "Abnormal") + g_logger.debug("Successfully checked IO scheduler value.") + + +def checkNetworkConfigure(): + """ + Checking network card configuration + """ + g_logger.debug("Checking network card configuration.") + performEnvCheck(ACTION_CHECK_NETWORK_BOND_MODE, + "Failed to obtain the networks bond mode information.", + "The all networks bond mode are same.", "A11", "Warning", + g_opts.confFile) + if (g_OSCheckOpts["A11"][2] == "Warning"): + g_OSCheckOpts["A11"][3] += "\n Failed to check the network" \ + " bond mode." + g_OSCheckOpts["A11"][4] = g_OSCheckOpts["A11"][3] + return + + nodeIp = getLocalIPAddr() + localMTU = DefaultValue.checkNetWorkMTU(nodeIp, False) + if (str(localMTU).find("Abnormal") >= 0 or localMTU == ""): + return + cmdInfo = "--hostname=%s " % DefaultValue.GetHostIpOrName() + if (not g_opts.localMode): + cmdInfo += "--MTUvalue=%s " % localMTU + if (g_opts.confFile != "" and g_opts.confFile != None): + cmdInfo += "--xmlfile=%s " % g_opts.confFile + performCheckorSetOS(ACTION_CHECK_NETWORK_CONFIGURE, "Abnormal", + "The configuration about network card is correct.", + "A11", "Abnormal", g_opts.confFile, "", cmdInfo) + g_logger.debug("Successfully checked network configuration.") + + +def getLocalIPAddr(): + ''' + function: get all ips from configuration file + input : NA + output: Ips + ''' + Ips = "" + + if (g_opts.confFile == ""): + localHostIp = DefaultValue.getIpByHostName() + Ips = localHostIp + else: + for node in g_clusterInfo.dbNodes: + if (node.name == DefaultValue.GetHostIpOrName()): + Ips = node.backIps[0] + return Ips + + +def checkTimeConsistency(): + """ + Checking system time consistency + """ + g_logger.debug("Checking system time consistency.") + try: + cmd = "%s -t %s -l %s" % ( + Local_CheckOs, ACTION_CHECK_TIME_CONSISTENCY, g_opts.localLog) + (status, output, outputMap) = getCmdOutput(cmd) + parRes = "" + detail_msg = "" + for node in list(status.keys()): + outputMap[node] = outputMap[node].strip().split("\n")[0].strip() + for node in list(status.keys()): + if (status[node] != DefaultValue.SUCCESS): + g_logger.logExit("[%s]: \n" % node + + ErrorCode.GAUSS_516["GAUSS_51632"] % cmd + + " Error: \n%s" % outputMap[node].strip()) + if ((status[node] == DefaultValue.SUCCESS) + and (outputMap[node].strip().split(',')[0].strip() + == str(False))): + g_OSCheckOpts['A12'][2] = 'Warning' + parRes += " [%s]\n The NTPD not detected" \ + " on machine and local time is \"%s\".\n"\ + % (node, + outputMap[node].strip().split(',')[1].strip()) + + if ((parRes == "") and (len(list(status.keys())) > 1)): + nodeValue = list(status.keys())[0].strip() + keystr = outputMap[nodeValue].strip().split(',')[1].strip() + if (keystr == ""): + g_OSCheckOpts['A12'][2] = 'Warning' + parRes += " [%s]\n %s\n" % ( + nodeValue, "Failed to obtain localtime information.") + else: + baseTime = datetime.strptime(keystr, "%Y-%m-%d %H:%M:%S") + startTime = baseTime - timedelta(seconds=DEFAULT_INTERVAL) + endTime = baseTime + timedelta(seconds=DEFAULT_INTERVAL) + for node in list(status.keys()): + if (status[node] == DefaultValue.SUCCESS): + tmpstr = outputMap[node].strip().split(',')[1].strip() + tmpTime = datetime.strptime(tmpstr, + "%Y-%m-%d %H:%M:%S") + if (tmpTime < startTime or tmpTime > endTime): + g_OSCheckOpts['A12'][2] = 'Warning' + parRes += " [%s]\n " \ + "The current system time = (%s)\n"\ + % (node, tmpstr) + detail_msg += " [%s]\n Variable:" \ + "'current system time' RealValue:" \ + "'%s' ExpectedValue:'%s' " \ + "[Warning]\n"\ + % (node, tmpstr, + baseTime.strftime("%Y-%m-%d %H:%M:%S")) + + if (g_OSCheckOpts['A12'][2] == 'Warning'): + g_OSCheckOpts['A12'][3] = "\n%s" % parRes + else: + nodeValue = list(status.keys())[0].strip() + keystr = outputMap[nodeValue].strip().split(',')[1].strip() + g_OSCheckOpts['A12'][3] = "\n The ntpd service is " \ + "started, local time is \"%s\"." % keystr + g_OSCheckOpts['A12'][4] = "\n%s" % detail_msg + + except Exception as e: + g_logger.debug(str(e)) + g_logger.debug("Successfully checked system time consistency.") + + +def checkFirewallService(): + """ + Checking firewall service + """ + g_logger.debug("Checking firewall service.") + performCheckorSetOS(ACTION_CHECK_FIREWALL_SERVICE, "firewall", + "The firewall service is stopped.", "A13", "Warning") + g_logger.debug("Successfully checked firewall service.") + + +def checkTHPService(): + """ + Checking THP service + """ + g_logger.debug("Checking THP service.") + performCheckorSetOS(ACTION_CHECK_THP_SERVICE, "THP", + "The THP service is stopped.", "A14", "Abnormal") + g_logger.debug("Successfully checked THP service.") + +def getSetRemoveIPCValue(action_item, flag_str, flag_error, success_mesg, + item_num, warning_level, configFile="", command=""): + """ + get the print information that is from setting removeipc value + """ + try: + if command == "": + command = Local_CheckOs + + if (configFile != ""): + cmd = "%s -t %s -X '%s' -l '%s'"\ + % (command, action_item, configFile, g_opts.localLog) + else: + cmd = "%s -t %s -l '%s'" % (command, action_item, g_opts.localLog) + (status, output, outputMap) = getCmdOutput(cmd) + message = "" + parRes = "" + for node in list(status.keys()): + if (status[node] != DefaultValue.SUCCESS): + g_logger.logExit("[%s]: \n" % node + + ErrorCode.GAUSS_516["GAUSS_51632"] % cmd + + " Error: \n%s" % outputMap[node].strip()) + + if (outputMap[node].find(flag_error) >= 0): + parRes += "[%s]\n%s\n" % (node, outputMap[node]) + + if (outputMap[node].find(flag_str) >= 0): + message += " [%s]\n%s\n" % (node, outputMap[node]) + + if (parRes != ""): + raise Exception("%s" % parRes) + + if (g_OSCheckOpts[item_num][2] != warning_level): + if (message != ""): + g_OSCheckOpts[item_num][3] = "\n%s %s" % ( + message, success_mesg) + else: + g_OSCheckOpts[item_num][3] = "\n %s" % success_mesg + + except Exception as e: + g_logger.debug(str(e)) + g_OSCheckOpts[item_num][2] = '%s' % warning_level + g_OSCheckOpts[item_num][3] = "\n %s" % str(e) + +def getSetSessionProcess(action_item, flag_str, flag_error, success_mesg, + item_num, warning_level, configFile="", command=""): + """ + get the print information that is from setting session process + """ + try: + if command == "": + command = Local_CheckOs + + if (configFile != ""): + cmd = "%s -t %s -X '%s' -l '%s'"\ + % (command, action_item, configFile, g_opts.localLog) + else: + cmd = "%s -t %s -l '%s'" % (command, action_item, g_opts.localLog) + (status, output, outputMap) = getCmdOutput(cmd) + message = "" + parRes = "" + for node in list(status.keys()): + if (status[node] != DefaultValue.SUCCESS): + g_logger.logExit("[%s]: \n" % node + + ErrorCode.GAUSS_516["GAUSS_51632"] % cmd + + " Error: \n%s" % outputMap[node].strip()) + + if (outputMap[node].find(flag_error) >= 0): + parRes += "[%s]\n%s\n" % (node, outputMap[node]) + + if (outputMap[node].find(flag_str) >= 0): + message += " [%s]\n%s\n" % (node, outputMap[node]) + + if (parRes != ""): + raise Exception("%s" % parRes) + + if (g_OSCheckOpts[item_num][2] != warning_level): + if (message != ""): + g_OSCheckOpts[item_num][3] = "\n%s %s" % ( + message, success_mesg) + else: + g_OSCheckOpts[item_num][3] = "\n %s" % success_mesg + + except Exception as e: + g_logger.debug(str(e)) + g_OSCheckOpts[item_num][2] = '%s' % warning_level + g_OSCheckOpts[item_num][3] = "\n %s" % str(e) + + +def getSetOSPrintInfo(action_item, flag_str, flag_error, success_mesg, + item_num, warning_level, configFile="", command=""): + """ + get the print information that is from setting OS parameters + """ + try: + if command == "": + command = Local_CheckOs + + if (configFile != ""): + cmd = "%s -t %s -X '%s' -l '%s'"\ + % (command, action_item, configFile, g_opts.localLog) + else: + cmd = "%s -t %s -l '%s'" % (command, action_item, g_opts.localLog) + (status, output, outputMap) = getCmdOutput(cmd) + message = "" + parRes = "" + for node in list(status.keys()): + if (status[node] != DefaultValue.SUCCESS): + g_logger.logExit("[%s]: \n" % node + + ErrorCode.GAUSS_516["GAUSS_51632"] % cmd + + " Error: \n%s" % outputMap[node].strip()) + + if (outputMap[node].find(flag_error) >= 0): + parRes += "[%s]\n%s\n" % (node, outputMap[node]) + + if (outputMap[node].find(flag_str) >= 0): + message += " [%s]\n%s\n" % (node, outputMap[node]) + + if (parRes != ""): + raise Exception("%s" % parRes) + + if (g_OSCheckOpts[item_num][2] != warning_level): + if (message != ""): + g_OSCheckOpts[item_num][3] = "\n%s %s" % ( + message, success_mesg) + else: + g_OSCheckOpts[item_num][3] = "\n %s" % success_mesg + + except Exception as e: + g_logger.debug(str(e)) + g_OSCheckOpts[item_num][2] = '%s' % warning_level + g_OSCheckOpts[item_num][3] = "\n %s" % str(e) + + +def setSysCtlParameter(): + """ + Setting sysctl parameter value + """ + g_logger.debug("Setting sysctl parameter value.") + getSetOSPrintInfo(ACTION_SET_SYSCTL_PARAMETER, "Set", "Failed", "Setting sysctl \ +parameter values succeed.", "B1", "Abnormal", g_opts.confFile, Local_Check) + g_logger.debug("Successfully setted the sysctl configuration parameters.") + + +def setFileSystemConfigure(): + """ + Setting file system configuration + """ + g_logger.debug("Setting file system configuration.") + getSetOSPrintInfo(ACTION_SET_FILESYSTEM_CONFIGURE, "Set", "Failed", "Setting soft file \ +and hard file parameters succeed.", "B2", "Abnormal", g_opts.confFile, + Local_Check) + g_logger.debug("Successfully setted file system configuration.") + + +def setBlockDevConfigure(): + """ + Setting Pre-read block size value + """ + g_logger.debug("Setting Pre-read block size value.") + getSetOSPrintInfo(ACTION_SET_BLOCKDEV_CONFIGURE, "blockdev", "Failed", + "Setting Pre-read block size value succeed.", "B3", + "Abnormal") + g_logger.debug("Successfully setted Pre-read block size value.") + + +def setLogicalBlock(): + """ + Setting logical block size value + """ + g_logger.debug("Setting logical block size value.") + performCheckorSetOS(ACTION_SET_LOGICAL_BLOCK, "Failed", + "Setting logical block size value succeed.", "B3", + "Abnormal") + g_logger.debug("Successfully setted logical block size value.") + +def setRemoveIPCValue(): + """ + Setting removeipc value + """ + g_logger.debug("Setting RemoveIPC value.") + getSetRemoveIPCValue(ACTION_SET_REMOVEIPC_VALUE, "Set", "Failed", "Setting sysctl \ + parameter values succeed.", "B7", "Abnormal", g_opts.confFile, Local_CheckOs) + g_logger.debug("Sucessfully setted RemoveIPC value.") + +def setSessionProcess(): + """ + Setting Session Process + """ + g_logger.debug("Setting Session Process.") + getSetSessionProcess(ACTION_SET_SESSION_PROCESS, "Set", "Failed", "Setting sysctl \ + parameter values succeed.", "B8", "Abnormal", g_opts.confFile, Local_CheckOs) + g_logger.debug("Sucessfully Setted Session Process.") + +def setIOConfigure(): + """ + Setting IO scheduler value + """ + g_logger.debug("Setting IO scheduler value.") + getSetOSPrintInfo(ACTION_SET_IO_CONFIGURE, "scheduler", "Failed", + "Setting IO scheduler value succeed.", "B4", "Abnormal") + g_logger.debug("Successfully setted IO scheduler value.") + + +def setIOrequestqueue(): + """ + Setting IO request queue value + """ + g_logger.debug("Setting IO request queue value.") + performCheckorSetOS(ACTION_SET_IO_REQUEST, "failed", + "Setting IO request queue value succeed.", "B4", + "Abnormal") + g_logger.debug("Successfully setted IO request queue value.") + + +def setMaxAsyIOrequests(): + """ + Setting Maximumasynchronous IO request queue value + """ + g_logger.debug("Setting Maximumasynchronous IO request queue value.") + performCheckorSetOS(ACTION_SET_ASYNCHRONOUS_IO_REQUEST, "failed", + "Setting Maximumasynchronous IO request queue value succeed.", + "B4", "Abnormal", g_opts.confFile) + g_logger.debug( + "Successfully setted Maximumasynchronous IO request queue value.") + + +def setNetworkConfigure(): + """ + Setting network card configuration + """ + g_logger.debug("Setting network card configuration.") + getSetOSPrintInfo(ACTION_SET_NETWORK_CONFIGURE, "Set", "Failed", + "Seting network card configuration succeed.", "B5", + "Warning", g_opts.confFile) + g_logger.debug("Successfully setted network card configuration.") + + +def setTHPService(): + """ + Setting THP service + """ + g_logger.debug("Setting THP service.") + getSetOSPrintInfo(ACTION_SET_THP_SERVICE, "Set", "Failed", + "Setting THP server succeed.", "B6", "Abnormal") + g_logger.debug("Sucessfully setted THP service.") + +def DisplayResultInformation(Item, output): + """ + display the result information + """ + if ("A" in g_opts.item_detail) or ("B" in g_opts.item_detail): + if (Item in ("A1", "B1")): + if (g_opts.detail): + print("%s:\n %s: %s %s" % ( + g_OSCheckOpts[Item][0], + ("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2].ljust(10), + g_OSCheckOpts[Item][3].ljust(DEFAULT_INTERVAL)), + file=output) + elif (g_opts.detail_all): + print("%s:\n %s: %s %s" % + (g_OSCheckOpts[Item][0], + ("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2].ljust(10), + g_OSCheckOpts[Item][4].ljust(DEFAULT_INTERVAL)), + file=output) + else: + print("%s:\n %s: %s" % + (g_OSCheckOpts[Item][0], + ("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2]), file=output) + else: + if (g_opts.detail): + print(" %s: %s %s" % + (("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2].ljust(10), + g_OSCheckOpts[Item][3].ljust(DEFAULT_INTERVAL)), + file=output) + elif (g_opts.detail_all): + print(" %s: %s %s" % + (("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2].ljust(10), + g_OSCheckOpts[Item][4].ljust(DEFAULT_INTERVAL)), + file=output) + else: + print(" %s: %s" % + (("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2]), file=output) + else: + orderItems = [] + for i in [CHECK_ITEMNUMLIST, SET_ITEMNUMLIST]: + bb = [] + for j in g_opts.item_detail: + if j in i: + bb.append(j) + tmp = sorted(bb) + bb = tmp + if (bb != []): + orderItems.append(bb[0]) + if (Item in orderItems): + if (g_opts.detail): + print("%s\n %s: %s %s" % + (g_OSCheckOpts[Item][0], + ("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2].ljust(10), + g_OSCheckOpts[Item][3].ljust(DEFAULT_INTERVAL)), + file=output) + elif (g_opts.detail_all): + print("%s\n %s: %s %s" % + (g_OSCheckOpts[Item][0], + ("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2].ljust(10), + g_OSCheckOpts[Item][4].ljust(DEFAULT_INTERVAL)), + file=output) + else: + print("%s\n %s: %s" % + (g_OSCheckOpts[Item][0], + ("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2]), file=output) + else: + if (g_opts.detail): + print(" %s: %s %s" % + (("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2].ljust(10), + g_OSCheckOpts[Item][3].ljust(DEFAULT_INTERVAL)), + file=output) + elif (g_opts.detail_all): + print(" %s: %s %s" % + (("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2].ljust(10), + g_OSCheckOpts[Item][4].ljust(DEFAULT_INTERVAL)), + file=output) + else: + print(" %s: %s" % + (("%s%s" % + (("%s." % Item).ljust(4), + g_OSCheckOpts[Item][1])).ljust(DEFAULT_INTERVAL), + g_OSCheckOpts[Item][2]), file=output) + + +def cmp_item(item1, item2): + """ + sort item + """ + if (item1[0] < item2[0]): + return -1 + elif (item1[0] > item2[0]): + return 1 + else: + if (int(item1[1:]) < int(item2[1:])): + return -1 + elif (int(item1[1:]) > int(item2[1:])): + return 1 + else: + return 0 + + +def checkHostnameList(): + """ + function: check the self.hostnameList is only local hostname + input: NA + output:NA + """ + + # get the local hostname + global host + host = DefaultValue.GetHostIpOrName() + + # if g_opts.hostnameList have only one value, + # check it whether or not local hostname + if (len(g_opts.hostnameList) == 1): + if (g_opts.hostnameList[0] == DefaultValue.GetHostIpOrName()): + g_opts.localMode = True + return + try: + # check the g_opts.hostnameList values are whether or not local IPs + # obtain the all local IPs + IPlist = DefaultValue.getIpAddressList() + IPlist.append(host) + + for ip in g_opts.hostnameList: + if ip not in IPlist: + return + g_opts.localMode = True + + except Exception as ex: + GaussLog.exitWithError(str(ex)) + + +def getCmdOutput(cmd, ssh_conf=""): + """ + function: execute the cmd and get the output + input: cmd + output:status, output, outputMap + """ + + if (g_opts.localMode == True): + status = {} + outputMap = {} + outputCollect = [] + (statusStr, output) = subprocess.getstatusoutput(cmd) + if (statusStr != 0): + status[host] = "Failure" + else: + status[host] = "Success" + outputMap[host] = output + else: + gp_path = os.path.dirname(os.path.realpath(__file__)) + (status, output) = g_sshTool.getSshStatusOutput(cmd, [], "", + "%s/../" % gp_path, + ssh_config=ssh_conf) + outputMap = g_sshTool.parseSshOutput(g_sshTool.hostNames) + + return (status, output, outputMap) + + +def getTmpFile(): + ''' + function : generate the check ID which is unique for once checking + input : NA + output : checkID + ''' + # Get Time + while True: + t = time.localtime(time.time()) + dateString = time.strftime("%Y%m%d", t) + seconds = timedelta(hours=t.tm_hour, minutes=t.tm_min, + seconds=t.tm_sec).seconds + pidString = str(os.getpid()) + tmpFileName = "ssh_config" + "_" + dateString + "_"\ + + str(seconds) + "_" + pidString + tmpFile = os.path.join("/tmp", tmpFileName) + if os.path.exists(tmpFile): + time.sleep(1) + else: + return tmpFile + +def main(): + """ + main function + """ + if (os.getuid() != 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50104"]) + + global Local_CheckOs + global Local_Check + + try: + parseCommandLine() + checkParameter() + initGlobals() + gpHome = os.path.dirname(os.path.realpath(__file__)) + Local_CheckOs = OMCommand.getLocalScript("Local_CheckOS") + Local_Check = OMCommand.getLocalScript("Local_Check") + except Exception as e: + GaussLog.exitWithError(str(e)) + + if ("A" in g_opts.item_detail): + itemList = CHECK_ITEMNUMLIST + elif ("B" in g_opts.item_detail): + itemList = SET_ITEMNUMLIST + else: + sortList = sorted(g_opts.item_detail) + itemList = sortList + + fp = None + dirName = "%s/gspylib/etc/conf" % os.path.dirname( + os.path.realpath(__file__)) + configFile = "%s/check_list.conf" % dirName + try: + if (DefaultValue.checkInList(['A6', 'A7', 'A11', 'B1', 'B2', 'B5'], + itemList) and + (os.path.isfile(configFile) != True)): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] % configFile) + if ((g_opts.localMode != True) and + DefaultValue.checkInList(['A6', 'A7', 'A11', 'B1', 'B2', 'B5'], + itemList)): + g_sshTool.scpFiles(configFile, dirName, [], "", "%s/../" % gpHome) + except Exception as ex: + g_logger.logExit(str(ex)) + + try: + output = sys.stdout + if (g_opts.outputfile != ""): + basepath = os.path.dirname(g_opts.outputfile) + if (not os.path.isdir(basepath)): + os.makedirs(basepath, DefaultValue.KEY_DIRECTORY_PERMISSION) + g_file.createFileInSafeMode(g_opts.outputfile) + fp = open(g_opts.outputfile, "w") + output = fp + g_logger.log("Performing operation system check/set." + " Output the result to the file %s." + % g_opts.outputfile) + + for item in itemList: + if (g_opts.set == False): + doCheckOS(item) + else: + doSetOS(item) + DisplayResultInformation(item, output) + + if (fp): + fp.flush() + fp.close() + if g_opts.outputfile != "": + os.chmod(g_opts.outputfile, DefaultValue.KEY_FILE_PERMISSION) + g_logger.log("Operation system check/set is completed.") + except Exception as ex: + if fp: + fp.flush() + fp.close() + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50205"] % + g_opts.outputfile + "Error: %s" % str(ex)) + + totalNum = 0 + abnormalNum = 0 + warningNum = 0 + for key in itemList: + totalNum += 1 + if (g_OSCheckOpts[key][2] == "Abnormal"): + abnormalNum += 1 + elif (g_OSCheckOpts[key][2] == "Warning"): + warningNum += 1 + if (g_opts.set): + g_logger.log("NOTICE: MTU value and some warning items can NOT be set." + " Please do it manually.") + g_logger.log("Total numbers:%d. Abnormal numbers:%d. Warning numbers:%d." + % (totalNum, abnormalNum, warningNum)) + if (abnormalNum > 0): + if (g_opts.set == False): + g_logger.log("Do checking operation finished. Result: Abnormal.") + else: + g_logger.log("Do setting operation finished. Result: Abnormal.") + + g_logger.closeLog() + sys.exit(0) + + +# the main entry for this script +if __name__ == '__main__': + main() diff --git a/script/gs_checkperf b/script/gs_checkperf new file mode 100644 index 0000000..7623a97 --- /dev/null +++ b/script/gs_checkperf @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_checkperf is a utility to check the Gauss200 cluster +# performance and SSD performance. +# +# PMK: database performance collecting and displaying,which only can be run with +# cluster user.it depends on many PL/SQL procedures and tables/views(installed +# in pmk schema of postgres database).pmk supports display streamline information +# and detailed information. +# +# SSD: SSD disk performance checking, +# which only can be run with root permission user. +# it depends on binary of SSD disk. +############################################################################# + +import subprocess +import os +import sys +import pwd +import grp +import time +import threading +import glob +import shutil + +from gspylib.common.GaussLog import GaussLog +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.threads.SshTool import SshTool +from gspylib.common.Common import ClusterCommand, DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from impl.checkperf.OLAP.CheckperfImplOLAP import CheckperfImplOLAP +from multiprocessing.dummy import Pool as ThreadPool + +############################################################################# +# Global variables +# g_opts: global option +# g_logger: global logger +# g_sshTool: global ssh interface +############################################################################# +g_opts = None +g_logger = None +g_sshTool = None + + +class CmdOptions(): + """ + init the command options + """ + + def __init__(self): + # initialize variable + self.show_detail = False + self.outFile = "" + self.outFile_tmp = "" + self.logFile = "" + self.localLog = "" + self.user = "" + self.mpprcFile = "" + self.checkItem = [] + self.databaseSizeFile = "" + self.databaseSize = 0 + + +class Checkperf(): + """ + + """ + + def __init__(self): + self.clusterInfo = dbClusterInfo() + self.DWS_mode = False + + def usage(self): + """ +gs_checkperf is a utility to check the cluster performance and SSD performance. + +Usage: + gs_checkperf -? | --help + gs_checkperf -V | --version + gs_checkperf [-U USER] [-o OUTPUT] [-i ITEM] [--detail] [-l LOGFILE] + +General options: + -U Cluster user. + -o Save the result to the specified file. + -i PMK or SSD performance check items. + Example: -i PMK -i SSD. + --detail Show detailed information about the PMK check. + -l Path of log files. + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + """ + + print(self.usage.__doc__) + + def parseItem(self, value): + """ + function: parse items by value + input : value + output: NA + """ + # parse the parameter '-i' value + items = value + for val in items: + # remove space + val = val.strip() + # invert val into uppercase + item = val.upper() + if item in ("PMK", "SSD"): + if item not in g_opts.checkItem: + g_opts.checkItem.append(item) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "i" + " Error: %s." % value) + + def parseCommandLine(self): + """ + function: do parse command line + get user input and save to variable + input : NA + output: NA + """ + global g_opts + g_opts = CmdOptions() + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("checkperf") + if ("helpFlag" in ParaDict.keys()): + self.usage() + sys.exit(0) + # get parameter value + if ("logFile" in list(ParaDict.keys())): + g_opts.logFile = ParaDict.get("logFile") + if ("user" in list(ParaDict.keys())): + g_opts.user = ParaDict.get("user") + if ("outFile" in list(ParaDict.keys())): + g_opts.outFile = ParaDict.get("outFile") + if ("itemstr" in list(ParaDict.keys())): + self.parseItem(ParaDict.get("itemstr")) + if ("show_detail" in list(ParaDict.keys())): + g_opts.show_detail = ParaDict.get("show_detail") + + def checkParameter(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + # check outputFile if the parameter '-o' value is not none + if (g_opts.outFile != ''): + DefaultValue.checkOutputFile(g_opts.outFile) + # check mpprc file path + g_opts.mpprcFile = DefaultValue.getMpprcFile() + + # cannot check SSD by cluster user, + # and cannot check PMK by root permission user. + if (os.getuid() != 0): + # check if SSD exists + if ('SSD' in g_opts.checkItem): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50109"]) + else: + if ("PMK" in g_opts.checkItem): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50011"] + % ("-i", "PMK") + + " Only cluster user can check the PMK.") + + # default check PMK in cluster user or check SSD in root Permission user + # if the parameter '-i' value is none + if (not g_opts.checkItem): + if (os.getuid() == 0): + g_opts.checkItem.append('SSD') + else: + g_opts.checkItem.append('PMK') + + # check user is the right user + if (g_opts.user == ""): + # the parameter '-U' is required when + # the script is executed by root user + if (os.getuid() == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "U" + + " for a user with the root permission.") + else: + # get user + g_opts.user = pwd.getpwuid(os.getuid()).pw_name + + # check if user exists and if is the right user + DefaultValue.checkUser(g_opts.user) + + # Get the temporary directory from PGHOST + tmpDir = DefaultValue.getTmpDirFromEnv(g_opts.user) + + # check if tmpDir exists + if (not os.path.exists(tmpDir)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % ("temporary directory[" + tmpDir + "]")) + + # check log file + if (g_opts.logFile == ""): + g_opts.logFile = DefaultValue.getOMLogPath( + DefaultValue.GS_CHECKPERF_LOG_FILE, g_opts.user, "") + + # PMK is required if the parameter '--detail' exists + if (g_opts.show_detail and "PMK" not in g_opts.checkItem): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50002"] % "-detail" + ".") + + def initGlobal(self): + """ + function: Init logger + input : NA + output: NA + """ + # state global variable + try: + global g_logger + global g_sshTool + # initialize log + g_logger = GaussLog(g_opts.logFile, "gs_checkperf") + # modify the file's owner + if (os.getuid() == 0): + g_file.changeOwner(g_opts.user, g_logger.logFile) + # Init cluster from static configuration file + self.clusterInfo.initFromStaticConfig(g_opts.user) + # get directory name + dirName = os.path.dirname(g_opts.logFile) + g_opts.localLog = os.path.join(dirName, + DefaultValue.LOCAL_LOG_FILE) + # check if appPath exists + if (not os.path.exists(self.clusterInfo.appPath)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % ("local install path[" + + self.clusterInfo.appPath + "]")) + # initialize sshTool + g_sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), + g_logger.logFile, + DefaultValue.TIMEOUT_PSSH_CHECK) + + binPath = os.path.join(self.clusterInfo.appPath, "bin") + g_opts.databaseSizeFile = os.path.join(binPath, + DefaultValue.DB_SIZE_FILE) + except Exception as e: + g_logger.logExit(str(e)) + + def checkUserInfo(self): + """ + function: Check user information + input : NA + output: NA + """ + # get user and group + (user, group) = g_file.getfileUser(self.clusterInfo.appPath) + # check if user right + if (user != g_opts.user): + g_logger.logExit( + ErrorCode.GAUSS_503["GAUSS_50304"] % (g_opts.user, user)) + + +if __name__ == '__main__': + # main function + try: + checkperf = Checkperf() + # do parse command line + checkperf.parseCommandLine() + # Check parameter from command line + checkperf.checkParameter() + # Init logger + checkperf.initGlobal() + except Exception as e: + GaussLog.exitWithError(str(e)) + + try: + # Check user information + checkperf.checkUserInfo() + + impl = CheckperfImplOLAP() + impl.opts = g_opts + impl.logger = g_logger + impl.clusterInfo = checkperf.clusterInfo + impl.sshTool = g_sshTool + impl.DWS_mode = checkperf.DWS_mode + # Perform the whole extand process + impl.run() + except Exception as e: + g_logger.error(str(e)) + sys.exit(1) + + sys.exit(0) diff --git a/script/gs_collector b/script/gs_collector new file mode 100644 index 0000000..58c8a73 --- /dev/null +++ b/script/gs_collector @@ -0,0 +1,397 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_collector is a utility +# to collect information about the cluster. +############################################################################# + +import os +import sys +import pwd +import time +import json +from datetime import datetime +from datetime import timedelta + +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from impl.collect.OLAP.CollectImplOLAP import CollectImplOLAP + +def my_obj_pairs_hook(lst): + result = {} + count = {} + for key, val in lst: + if key in count: + count[key] = 1 + count[key] + else: + count[key] = 1 + if key in result: + if count[key] >= 2: + GaussLog.exitWithError( + ErrorCode.GAUSS_512["GAUSS_51245"] % key) + else: + result[key] = [result[key], val] + else: + result[key] = val + return result + + +class Collect(ParallelBaseOM): + """ + define option + """ + + def __init__(self): + ParallelBaseOM.__init__(self) + # initialize variable + self.host = "" + self.inFile = "" + self.outFile = "" + self.nodeName = [] + self.config = {} + self.appPath = "" + + self.begintime = "" + self.endtime = "" + self.keyword = "" + # speed limit to copy/scp files, in MB/s + self.speedLimit = 1024 + self.speedLimitFlag = 0 + + # config file + self.configFile = "" + + # Our products may generate 200MB/(1DN per day), + # So max log size is (8DN * (1master+7standbys) + 1CN) * 200MB = 13GB/node + # Other logs, such as OM/CM/Audit we ignore them here, which are too small. + self.LOG_SIZE_PER_DAY_ONE_NODE = 1024 * 13 + + # As we test, the speed for packaging logs into a compressed tar file is 45MB/s. + self.TAR_SPEED = 45 + + # endtime - begintime, in days, rounded up. + self.duration = 0 + + ############################################################################# + # Parse and check parameters + ############################################################################# + def usage(self): + """ +gs_collector is a utility to collect information about the cluster. + +Usage: + gs_collector -? | --help + gs_collector -V | --version + gs_collector --begin-time="BEGINTIME" --end-time="ENDTIME" [-h HOSTNAME | -f HOSTFILE] + [--keyword=KEYWORD] [--speed-limit=SPEED] [-o OUTPUT] [-l LOGFILE] + +General options: + --begin-time=BEGINTIME Time to start log file collection. Pattern:yyyymmdd hh:mm. + --end-time=ENDTIME Time to end log file collection. Pattern:yyyymmdd hh:mm. + --speed-limit=SPEED Bandwidth to copy files, a nonnegative integer, in MByte/s. + 0 means unlimited. Only supported if rsync command exists. + -h Names of hosts whose information is to be collected. + Example: host1,host2. + -f File listing names of all the hosts to connect to. + --keyword=KEYWORD Save log files containing the keyword. + -o Save the result to the specified file. + -l Path of log file. + -?, --help Show help information for this utility, and exit the command line mode. + -V, --version Show version information. + -C gs_collector config file, listing which info to collect + # gs_collector.json example + { + "Collect": + [ + {"TypeName": "name", "Content": "value", "Interval": "seconds", "Count": "counts"} # interval is in Second + ] + } + + # TypeName : content + COLLECT_INFO_MAP + { + "System" : "HardWareInfo,RunTimeInfo", + "Database" : "pg_locks,pg_stat_activity,pg_thread_wait_status", + "Log" : "DataNode,ClusterManager", + "XLog": "DataNode", + "Config" : "DataNode", + "Gstack" : "DataNode", + "CoreDump": "gaussdb,GaussMaster,gs_ctl" + "Trace": "Dump" + "Plan": "*" # Any database name or character "*" + } + + """ + print(self.usage.__doc__) + + def dateCheck(self, datestr): + """ + function: check the type of date wether is is correct or not + input : timedate + output: bool + """ + # Check the time format + try: + time.strptime(datestr, "%Y%m%d %H:%M") + if (len(datestr.split(" ")[0]) != 8 + or len(datestr.split(" ")[1]) != 5): + return False + return True + except Exception: + return False + + def parseCommandLine(self): + """ + function: do parse command line + input : cmdCommand + output: help/version information + """ + # Parse command + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("collector") + + # If help is included in the parameter, + # the help message is printed and exited + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + # Save parameter + if (ParaDict.__contains__("nodename")): + self.nodeName = ParaDict.get("nodename") + # Save parameter hostfile + if (ParaDict.__contains__("hostfile")): + self.inFile = ParaDict.get("hostfile") + # Save parameter begintime + if (ParaDict.__contains__("begintime")): + self.begintime = ParaDict.get("begintime") + # Check the begin time parameter format is correct + if (not self.dateCheck(self.begintime)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] % + ('-begin-time', + "date") + " Pattern: yyyymmdd hh:mm.") + # Save parameter endtime + if (ParaDict.__contains__("endtime")): + self.endtime = ParaDict.get("endtime") + # Check the end time parameter format is correct + if (not self.dateCheck(self.endtime)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] % + ('-end-time', + "date") + " Pattern: yyyymmdd hh:mm.") + # Save parameter keyword + if (ParaDict.__contains__("keyword")): + self.keyword = ParaDict.get("keyword") + # Save parameter outFile + if (ParaDict.__contains__("outFile")): + self.outFile = ParaDict.get("outFile") + # Save parameter logFile + if (ParaDict.__contains__("logFile")): + self.logFile = ParaDict.get("logFile") + + # Get speed limit to copy/remote copy files. + if (ParaDict.__contains__("speedLimit")): + self.speedLimit = str(ParaDict.get("speedLimit")) + if (not self.speedLimit.isdigit() or int(self.speedLimit) < 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] % + ('-speed-limit', + 'a nonnegative integer')) + self.speedLimit = int(self.speedLimit) + self.speedLimitFlag = 1 + + # Save parameter configFile + if (ParaDict.__contains__("configFile")): + self.configFile = ParaDict.get("configFile") + + def checkParameter(self): + """ + function: do parameters checking + input : check parameters + output: ErrorCode + """ + + # The -h and -f parameters can not be specified at the same time + if (len(self.nodeName) != 0 and self.inFile != ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50005"] % ('h', 'f')) + + if (self.inFile != ""): + # Check if the hostname file exists + if (not os.path.exists(self.inFile)): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50201"] % self.inFile) + # Get the value in the hostname file + with open(self.inFile, "r") as fp: + for line in fp: + node = line.strip().split("\n")[0] + if node is not None and node != "" \ + and (node not in self.nodeName): + self.nodeName.append(node) + # An error exit if the node name is not available + if len(self.nodeName) == 0: + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50203"] % self.inFile) + # check configFile + if self.configFile == "": + self.configFile = "%s/%s" % ( + os.path.dirname(os.path.realpath(__file__)), + DefaultValue.GS_COLLECTOR_CONFIG_FILE) + + if self.configFile != "": + # Check if the config file exists + if not os.path.exists(self.configFile): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50201"] % self.configFile) + # Get the value in the configFile file + try: + with open(self.configFile, "r") as fp: + config_json = json.loads(fp.read(), + object_pairs_hook=my_obj_pairs_hook) + items = config_json.items() + + for key, value in items: + if str(key) != "Collect": + GaussLog.exitWithError( + ErrorCode.GAUSS_512["GAUSS_51242"] % ( + self.configFile, str(key))) + for it in value: + d_c = "" + u_c = "" + for k, v in it.items(): + if k not in DefaultValue.COLLECT_CONF_JSON_KEY_LIST: + GaussLog.exitWithError( + ErrorCode.GAUSS_512["GAUSS_51242"] + % (self.configFile, str(k))) + if k == "TypeName": + d_c = DefaultValue.COLLECT_CONF_MAP[v] + elif k == "Content": + u_c = v + elif k == "Interval" or k == "Count": + if (not v.replace(" ", "").isdigit() + or int(v.replace(" ", "")) < 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_512["GAUSS_51241"] + % (k, v)) + + if len(u_c) > 0 and len(d_c) > 0: + T_Name = it["TypeName"] + it["Content"] = "" + if T_Name in "Plan,Database": + it["Content"] = u_c + else: + uc = u_c.replace(" ", "").split(",") + for c in uc: + if c not in d_c: + GaussLog.exitWithError( + ErrorCode.GAUSS_512["GAUSS_51243"] + % (c, it['TypeName'], + self.configFile)) + elif DefaultValue.COLLECT_CONF_CONTENT_MAP.__contains__(c): + it["Content"] += \ + DefaultValue.COLLECT_CONF_CONTENT_MAP[c] + "," + else: + GaussLog.exitWithError( + ErrorCode.GAUSS_512["GAUSS_51244"] + % c) + if self.config.__contains__(T_Name): + self.config[T_Name].append(it) + else: + contentList = [it] + self.config[T_Name] = contentList + else: + GaussLog.exitWithError( + ErrorCode.GAUSS_512["GAUSS_51240"]) + + except Exception as e: + GaussLog.exitWithError( + ErrorCode.GAUSS_512["GAUSS_51239"] % self.configFile) + + if len(self.config) == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_535["GAUSS_53516"]) + + # An error exit if the begin time parameter is not entered + if (not self.begintime): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % '-begin-time' + " for [gs_collector].") + else: + # Extract the time in --end-time according to the format + self.begintime = self.begintime.replace(" ", "").replace(":", "") + + # An error exit if the end time parameter is not entered + if (not self.endtime): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % '-end-time' + " for [gs_collector].") + else: + # Extract the time in --begin-time according to the format + self.endtime = self.endtime.replace(" ", "").replace(":", "") + + if self.endtime and self.begintime: + # The start time must be earlier than the end time, + # notice: using string comparison !!! + if (self.endtime < self.begintime): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "-end-time or --begin-time" + + "The value of '--end-time' must" + " be greater than the value " + "of '--begin-time'.") + datebegin = datetime.strptime(self.begintime, "%Y%m%d%H%M") + dateend = datetime.strptime(self.endtime, "%Y%m%d%H%M") + diff = dateend - datebegin + self.duration = diff.days + 1 + + # check mpprc file path + self.mpprcFile = DefaultValue.getMpprcFile() + # check if user exist and is the right user + try: + self.user = pwd.getpwuid(os.getuid()).pw_name + DefaultValue.checkUser(self.user) + except Exception as e: + GaussLog.exitWithError(str(e)) + + # check log file + if (self.logFile == ""): + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.GS_COLLECTOR_LOG_FILE, self.user, "") + + if (self.speedLimit == 0): + self.speedLimit = 1024 + + +if __name__ == '__main__': + """ + function: main + input : NA + output: NA + """ + # check if is root user + if (os.getuid() == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) + try: + # Objectize class + collectObj = Collect() + + # Initialize self and Parse command line and save to global variable + collectObj.parseCommandLine() + # check the parameters is not OK + collectObj.checkParameter() + impl = CollectImplOLAP(collectObj) + impl.run() + except Exception as e: + GaussLog.exitWithError(str(e)) + sys.exit(0) diff --git a/script/gs_dropnode b/script/gs_dropnode new file mode 100644 index 0000000..9f4635f --- /dev/null +++ b/script/gs_dropnode @@ -0,0 +1,339 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_dropnode is a utility to drop a standby node from the cluster +############################################################################# + +import datetime +import os +import re +import subprocess +import sys +import pwd +import grp +package_path = os.path.dirname(os.path.realpath(__file__)) +ld_path = package_path + "/gspylib/clib" +if 'LD_LIBRARY_PATH' not in os.environ: + os.environ['LD_LIBRARY_PATH'] = ld_path + os.execve(os.path.realpath(__file__), sys.argv, os.environ) +if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): + os.environ['LD_LIBRARY_PATH'] = \ + ld_path + ":" + os.environ['LD_LIBRARY_PATH'] + os.execve(os.path.realpath(__file__), sys.argv, os.environ) + +sys.path.append(sys.path[0]) +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.threads.SshTool import SshTool +from impl.dropnode.DropnodeImpl import DropnodeImpl + +ENV_LIST = ["MPPDB_ENV_SEPARATE_PATH", "GPHOME", "PATH", + "LD_LIBRARY_PATH", "PYTHONPATH", "GAUSS_WARNING_TYPE", + "GAUSSHOME", "PATH", "LD_LIBRARY_PATH", + "S3_CLIENT_CRT_FILE", "GAUSS_VERSION", "PGHOST", + "GS_CLUSTER_NAME", "GAUSSLOG", "GAUSS_ENV", "umask"] + + +class Dropnode(ParallelBaseOM): + """ + """ + + def __init__(self): + """ + """ + ParallelBaseOM.__init__(self) + # Add the standby node backip list which need to be deleted + self.hostIpListForDel = [] + self.hostMapForDel = {} + self.hostMapForExist = {} + self.clusterInfo = dbClusterInfo() + self.backIpNameMap = {} + self.failureHosts = [] + self.flagOnlyPrimary = False + envFile = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH") + if envFile: + self.envFile = envFile + self.userProfile = envFile + else: + self.envFile = "/etc/profile" + cmd = "echo ~%s" % self.user + (status, output) = subprocess.getstatusoutput(cmd) + self.userProfile = os.path.join(output, ".bashrc") + + def usage(self): + """ +gs_dropnode is a utility to delete the standby node from a cluster. + +Usage: + gs_dropnode -? | --help + gs_dropnode -V | --version + gs_dropnode -U USER -G GROUP -h nodeList +General options: + -U Cluster user. + -G Group of the cluster user. + -h The standby node backip list which need to be deleted + Separate multiple nodes with commas (,). + such as '-h 192.168.0.1,192.168.0.2' + -?, --help Show help information for this + utility, and exit the command line mode. + -V, --version Show version information. + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + parse parameter from command line + """ + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("dropnode") + + # parameter -h or -? + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + # Resolves command line arguments + # parameter -U + if (ParaDict.__contains__("user")): + self.user = ParaDict.get("user") + DefaultValue.checkPathVaild(self.user) + # parameter -G + if (ParaDict.__contains__("group")): + self.group = ParaDict.get("group") + # parameter -h + if (ParaDict.__contains__("nodename")): + self.hostIpListForDel = ParaDict.get("nodename") + + def checkParameters(self): + """ + function: Check parameter from command line + input: NA + output: NA + """ + + # check user | group | node + if len(self.user) == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-U") + if len(self.group) == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-G") + if len(self.hostIpListForDel) == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-h") + # check if upgrade action is exist + if DefaultValue.isUnderUpgrade(self.user): + GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"]) + + try: + pw_user = pwd.getpwnam(self.user) + gr_group = grp.getgrnam(self.group) + except KeyError as e: + if self.user in e.args[0]: + GaussLog.exitWithError( + ErrorCode.GAUSS_503["GAUSS_50300"] % self.user) + if self.group in e.args[0]: + self.logger.log("Group %s not exist." % self.group) + sys.exit(1) + + # get dbcluster info from static config file + self.clusterInfo.initFromStaticConfig(self.user) + appPath = self.clusterInfo.appPath + db_uid = os.stat(appPath).st_uid + db_gid = os.stat(appPath).st_gid + if db_uid != pw_user.pw_uid or db_gid != gr_group.gr_gid: + GaussLog.exitWithError( + ErrorCode.GAUSS_503["GAUSS_50323"] % self.user) + self.backIpNameMap = {} + for node in self.clusterInfo.dbNodes: + self.backIpNameMap[node.name] = node.backIps[0] + if node.backIps[0] in self.hostIpListForDel: + self.hostMapForDel[node.name] = {'ipaddr': node.backIps[0], + 'datadir': [], 'dn_id': [], + 'port': []} + for i in node.datanodes: + self.hostMapForDel[node.name]['datadir'].append(i.datadir) + self.hostMapForDel[node.name]['dn_id'].append( + 'dn_' + str(i.instanceId)) + self.hostMapForDel[node.name]['port'].append(str(i.port)) + else: + self.hostMapForExist[node.name] = {'ipaddr': node.backIps[0], + 'datadir': [], 'dn_id': [], + 'port': [], + 'replToBeDel': [], + 'syncStandbyDel': [], + 'pghbaDel': []} + for i in node.datanodes: + self.hostMapForExist[node.name]['datadir'].append(i.datadir) + self.hostMapForExist[node.name]['dn_id'].append( + 'dn_' + str(i.instanceId)) + self.hostMapForExist[node.name]['port'].append(str(i.port)) + localIp = self.backIpNameMap[DefaultValue.GetHostIpOrName()] + if localIp in self.hostIpListForDel: + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35803"] % \ + localIp) + + for ipLoop in self.hostIpListForDel: + if ipLoop not in self.backIpNameMap.values(): + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35802"] % \ + self.hostIpListForDel) + + if not self.hostMapForDel: + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35802"] % \ + self.hostIpListForDel) + + def check_repeat_process(self): + """ + function: Check whether only one node be left in the cluster + return a flag + """ + cmd = "ps -ef | grep 'gs_dropnode -U %s -G %s' | grep -v grep" \ + % (self.user, self.group) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and len(output.split('\n')) > 1: + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35810"]) + + def flagForOnlyPrimaryLeft(self): + """ + function: Check whether only one node be left in the cluster + return a flag + """ + countClusterNodes = len(self.backIpNameMap.values()) + if (countClusterNodes - len(self.hostIpListForDel)) == 1: + flag = input( + "The cluster will have only one standalone node left after the operation!" + "\nDo you want to continue to drop the target node (yes/no)? ") + count_f = 2 + while count_f: + if ( + flag.upper() != "YES" + and flag.upper() != "NO" + and flag.upper() != "Y" and flag.upper() != "N"): + count_f -= 1 + flag = input("Please type 'yes' or 'no': ") + continue + break + if flag.upper() != "YES" and flag.upper() != "Y": + GaussLog.exitWithError( + ErrorCode.GAUSS_358["GAUSS_35805"] % flag.upper()) + self.flagOnlyPrimary = True + + def check_cluster_status(self): + """ + function: Check whether the status of cluster is normal + input: NA + output: NA + """ + tmpDir = DefaultValue.getTmpDirFromEnv() + tmpFile = os.path.join(tmpDir, "gauss_cluster_status.dat_" + \ + str(datetime.datetime.now().strftime( + '%Y%m%d%H%M%S')) + "_" + str(os.getpid())) + if not len(self.failureHosts): + cmd = ClusterCommand.getQueryStatusCmd(self.user, "", tmpFile, False) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % \ + cmd + "Error: \n%s" % output) + clusterStatus = DbClusterStatus() + clusterStatus.initFromFile(tmpFile) + clsStatus = clusterStatus.clusterStatusDetail + if clsStatus in ["Unknown", "Unavailable"]: + GaussLog.exitWithError( + ErrorCode.GAUSS_358["GAUSS_35806"] % clsStatus) + + statusDelHost = "The target node to be dropped is %s \n" % str( + self.hostMapForDel.keys())[9:] + for dndir_loop in \ + self.hostMapForExist[DefaultValue.GetHostIpOrName()]['datadir']: + cmd = "gs_ctl query -D %s|grep '\'| " \ + "awk -F ':' '{print $2}'" % dndir_loop + (status, output) = subprocess.getstatusoutput(cmd) + if 'Primary' not in output: + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35804"]) + + flag = input( + statusDelHost + "Do you want to continue " + "to drop the target node (yes/no)? ") + count_f = 2 + while count_f: + if ( + flag.upper() != "YES" + and flag.upper() != "NO" + and flag.upper() != "Y" and flag.upper() != "N"): + count_f -= 1 + flag = input("Please type 'yes' or 'no': ") + continue + break + if flag.upper() != "YES" and flag.upper() != "Y": + GaussLog.exitWithError( + ErrorCode.GAUSS_358["GAUSS_35805"] % flag.upper()) + + def checkConnection(self, hostnames, env): + """ + check the node connection, change the timeout to 30s as 330s is too long + if the node which will not be deleted can't be connected, report ERR + else continue + """ + command = "echo 1" + sshTool = SshTool(hostnames, None, -20) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + hostnames, env) + self.logger.debug(outputCollect) + self.failureHosts = '.'.join(re.findall(r"\[FAILURE\] .*:.*\n", + outputCollect)) + for host in list(self.hostMapForExist.keys()): + if host in self.failureHosts: + GaussLog.exitWithError( + ErrorCode.GAUSS_358["GAUSS_35807"] % host) + + def initLogs(self): + """ + init log file + """ + if not os.path.isfile(self.userProfile): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50210"] % self.userProfile) + log_path = DefaultValue.getEnvironmentParameterValue("GAUSSLOG", + self.user, + self.userProfile) + self.logFile = os.path.realpath( + "%s/om/%s" % (log_path, DefaultValue.DROPNODE_LOG_FILE)) + # if not absolute path + if not os.path.isabs(self.logFile): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + self.initLogger("gs_dropnode") + self.logger.ignoreErr = True + + +if __name__ == "__main__": + # check if user is root + if (os.getuid() == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) + dropNode = Dropnode() + dropNode.parseCommandLine() + dropNode.initLogs() + dropNode.check_repeat_process() + dropNode.checkParameters() + dropNode.checkConnection(list(dropNode.backIpNameMap.keys()), + dropNode.envFile) + dropNode.check_cluster_status() + dropNode.flagForOnlyPrimaryLeft() + dropNodeImpl = DropnodeImpl(dropNode) + dropNodeImpl.run() diff --git a/script/gs_expansion b/script/gs_expansion new file mode 100644 index 0000000..435d9af --- /dev/null +++ b/script/gs_expansion @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_expansion is a utility to expansion standby node databases +############################################################################# + +import os +import sys +import pwd +import subprocess + +import socket +package_path = os.path.dirname(os.path.realpath(__file__)) +ld_path = package_path + "/gspylib/clib" +if 'LD_LIBRARY_PATH' not in os.environ: + os.environ['LD_LIBRARY_PATH'] = ld_path + os.execve(os.path.realpath(__file__), sys.argv, os.environ) +if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): + os.environ['LD_LIBRARY_PATH'] = \ + ld_path + ":" + os.environ['LD_LIBRARY_PATH'] + os.execve(os.path.realpath(__file__), sys.argv, os.environ) + +sys.path.append(sys.path[0]) +from gspylib.common.DbClusterInfo import dbClusterInfo, \ + readOneClusterConfigItem, initParserXMLFile, dbNodeInfo, checkPathVaild +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.common.ParameterParsecheck import Parameter +from impl.preinstall.OLAP.PreinstallImplOLAP import PreinstallImplOLAP +from gspylib.threads.SshTool import SshTool +from impl.expansion.ExpansionImpl import ExpansionImpl + +ENV_LIST = ["MPPDB_ENV_SEPARATE_PATH", "GPHOME", "PATH", + "LD_LIBRARY_PATH", "PYTHONPATH", "GAUSS_WARNING_TYPE", + "GAUSSHOME", "PATH", "LD_LIBRARY_PATH", + "S3_CLIENT_CRT_FILE", "GAUSS_VERSION", "PGHOST", + "GS_CLUSTER_NAME", "GAUSSLOG", "GAUSS_ENV", "umask"] + +class Expansion(ParallelBaseOM): + """ + """ + + def __init__(self): + """ + """ + ParallelBaseOM.__init__(self) + # new added standby node backip list + self.newHostList = [] + self.clusterInfoDict = {} + self.backIpNameMap = {} + self.newHostCasRoleMap = {} + self.hostAzNameMap = {} + self.packagepath = os.path.realpath( + os.path.join(os.path.realpath(__file__), "../../")) + + self.standbyLocalMode = False + self.envFile = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH") + + def usage(self): + """ +gs_expansion is a utility to expansion standby node for a cluster. + +Usage: + gs_expansion -? | --help + gs_expansion -V | --version + gs_expansion -U USER -G GROUP -X XMLFILE -h nodeList [-L] +General options: + -U Cluster user. + -G Group of the cluster user. + -X Path of the XML configuration file. + -h New standby node node backip list. + Separate multiple nodes with commas (,). + such as '-h 192.168.0.1,192.168.0.2' + -L The standby database installed with + local mode. + -?, --help Show help information for this + utility, and exit the command line mode. + -V, --version Show version information. + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + parse parameter from command line + """ + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("expansion") + + # parameter -h or -? + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + # Resolves command line arguments + # parameter -U + if (ParaDict.__contains__("user")): + self.user = ParaDict.get("user") + DefaultValue.checkPathVaild(self.user) + # parameter -G + if (ParaDict.__contains__("group")): + self.group = ParaDict.get("group") + # parameter -X + if (ParaDict.__contains__("confFile")): + self.xmlFile = ParaDict.get("confFile") + # parameter -L + if (ParaDict.__contains__("localMode")): + self.localMode = ParaDict.get("localMode") + self.standbyLocalMode = ParaDict.get("localMode") + # parameter -l + if (ParaDict.__contains__("logFile")): + self.logFile = ParaDict.get("logFile") + #parameter -h + if (ParaDict.__contains__("nodename")): + self.newHostList = ParaDict.get("nodename") + + def checkParameters(self): + """ + function: Check parameter from command line + input: NA + output: NA + """ + + # check user | group | xmlfile | node + if len(self.user) == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U") + if len(self.group) == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G") + if len(self.xmlFile) == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X") + if len(self.newHostList) == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h") + # check if upgrade action is exist + if DefaultValue.isUnderUpgrade(self.user): + GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"]) + + def _getClusterInfoDict(self): + clusterInfo = ExpansionClusterInfo() + self.clusterInfo = clusterInfo + hostNameIpDict = clusterInfo.initFromXml(self.xmlFile) + clusterDict = clusterInfo.getClusterDirectorys() + self.nodeNameList = clusterInfo.getClusterNodeNames() + + # get corepath and toolpath from xml file + corePath = clusterInfo.readClustercorePath(self.xmlFile) + toolPath = clusterInfo.getToolPath(self.xmlFile) + # parse xml file and cache node info + clusterInfoDict = {} + clusterInfoDict["appPath"] = clusterDict["appPath"][0] + clusterInfoDict["logPath"] = clusterDict["logPath"][0] + clusterInfoDict["corePath"] = corePath + clusterInfoDict["toolPath"] = toolPath + for nodeName in self.nodeNameList: + hostInfo = hostNameIpDict[nodeName] + ipList = hostInfo[0] + portList = hostInfo[1] + backIp = ipList[0] + sshIp = ipList[1] + port = portList[0] + cluster = clusterDict[nodeName] + dataNode = cluster[2] + dbNode = clusterInfo.getDbNodeByName(nodeName) + clusterInfoDict[nodeName] = { + "backIp": backIp, + "sshIp": sshIp, + "port": port, + "localport": int(port) + 1, + "localservice": int(port) + 4, + "heartBeatPort": int(port) + 3, + "dataNode": dataNode, + "instanceType": -1, + "azPriority": dbNode.azPriority + } + + nodeIdList = clusterInfo.getClusterNodeIds() + for id in nodeIdList: + insType = clusterInfo.getdataNodeInstanceType(id) + hostName = clusterInfo.getHostNameByNodeId(id) + clusterInfoDict[hostName]["instanceType"] = insType + self.clusterInfoDict = clusterInfoDict + + def initLogs(self): + """ + init log file + """ + # if no log file + if (self.logFile == ""): + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.EXPANSION_LOG_FILE, self.user, "", + self.xmlFile) + # if not absolute path + if (not os.path.isabs(self.logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + self.initLogger("gs_expansion") + self.logger.ignoreErr = True + + def getExpansionInfo(self): + self._getClusterInfoDict() + self._getBackIpNameMap() + self._getHostAzNameMap() + self._getNewHostCasRoleMap() + + def checkXmlIncludeNewHost(self): + """ + check parameter node must in xml config file + """ + backIpList = self.clusterInfo.getClusterBackIps() + for nodeIp in self.newHostList: + if nodeIp not in backIpList: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % + nodeIp) + + def _getBackIpNameMap(self): + backIpList = self.clusterInfo.getClusterBackIps() + for backip in backIpList: + self.backIpNameMap[backip] = \ + self.clusterInfo.getNodeNameByBackIp(backip) + + def checkExecutingUser(self): + """ + check whether current user executing this command is root + """ + if os.getuid() != 0: + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50104"]) + + def checkExecutingHost(self): + """ + check whether current host is primary host + """ + currentHost = socket.gethostname() + primaryHost = "" + for nodeName in self.nodeNameList: + if self.clusterInfoDict[nodeName]["instanceType"] \ + == 0: + primaryHost = nodeName + break + if currentHost != primaryHost: + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50110"] % + (currentHost + ", which is not primary")) + + def checkTrust(self, hostList = None): + """ + check trust between primary/current host and every host in hostList + """ + if hostList == None: + hostList = self.nodeNameList + gpHome = DefaultValue.getEnv("GPHOME") + psshPath = "python3 %s/script/gspylib/pssh/bin/pssh" % gpHome + rootSSHExceptionHosts = [] + individualSSHExceptionHosts = [] + for host in hostList: + # check root's trust + checkRootTrustCmd = "%s -s -H %s 'pwd'" % (psshPath, host) + (status, output) = subprocess.getstatusoutput(checkRootTrustCmd) + if status != 0: + rootSSHExceptionHosts.append(host) + # check individual user's trust + checkUserTrustCmd = "su - %s -c '%s -s -H %s pwd'" % ( + self.user, psshPath, host) + (status, output) = subprocess.getstatusoutput(checkUserTrustCmd) + if status != 0: + individualSSHExceptionHosts.append(host) + # output ssh exception info if ssh connect failed + if rootSSHExceptionHosts or individualSSHExceptionHosts: + sshExceptionInfo = "" + if rootSSHExceptionHosts: + sshExceptionInfo += "\n" + sshExceptionInfo += ", ".join(rootSSHExceptionHosts) + sshExceptionInfo += " by root" + if individualSSHExceptionHosts: + sshExceptionInfo += "\n" + sshExceptionInfo += ", ".join(individualSSHExceptionHosts) + sshExceptionInfo += " by individual user" + GaussLog.exitWithError(ErrorCode.GAUSS_511["GAUSS_51100"] % + sshExceptionInfo) + + def checkEnvfile(self): + """ + check whether env file is sourced + check whether info in XML is consistent with environment variable + """ + self.logger.debug("Checking environment variable.") + if not DefaultValue.getEnv("GPHOME"): + GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51802"] % ( + "\"GPHOME\", please import environment variable")) + if not DefaultValue.getEnv("GAUSSHOME"): + GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51802"] % ( + "\"GAUSSHOME\", please import environment variable")) + if not DefaultValue.getEnv("PGHOST"): + GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51802"] % ( + "\"PGHOST\", please import environment variable")) + clusterInfoDict = self.clusterInfoDict + toolPath = DefaultValue.getEnv("GPHOME") + appPath = DefaultValue.getEnv("GAUSSHOME") + if toolPath != clusterInfoDict["toolPath"]: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35711"] % "toolPath") + if appPath != clusterInfoDict["appPath"]: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35711"] % "appPath") + + def _getHostAzNameMap(self): + """ + get azName of all hosts + """ + for dbnode in self.clusterInfo.dbNodes: + self.hostAzNameMap[dbnode.backIps[0]] = dbnode.azName + + def _getNewHostCasRoleMap(self): + """ + get cascadeRole of newHosts + """ + for dbnode in self.clusterInfo.dbNodes: + if dbnode.backIps[0] in self.newHostList: + self.newHostCasRoleMap[dbnode.backIps[0]] = dbnode.cascadeRole + +class ExpansionClusterInfo(dbClusterInfo): + + def __init__(self): + dbClusterInfo.__init__(self) + + def getToolPath(self, xmlFile): + """ + function : Read tool path from default xml file + input : String + output : String + """ + self.setDefaultXmlFile(xmlFile) + # read gaussdb tool path from xml file + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(xmlFile), "gaussdbToolPath", "cluster") + if retStatus != 0: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] + % "gaussdbToolPath" + " Error: \n%s" % retValue) + toolPath = os.path.normpath(retValue) + checkPathVaild(toolPath) + return toolPath + + +if __name__ == "__main__": + """ + """ + expansion = Expansion() + expansion.checkExecutingUser() + expansion.parseCommandLine() + expansion.checkParameters() + expansion.initLogs() + expansion.getExpansionInfo() + expansion.checkEnvfile() + expansion.checkXmlIncludeNewHost() + expansion.checkExecutingHost() + expansion.checkTrust() + expImpl = ExpansionImpl(expansion) + expImpl.run() diff --git a/script/gs_install b/script/gs_install new file mode 100644 index 0000000..1c60f8b --- /dev/null +++ b/script/gs_install @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_install is a utility to deploy a Gauss200 server. +############################################################################# + +import os +import sys +package_path = os.path.dirname(os.path.realpath(__file__)) +ld_path = package_path + "/gspylib/clib" +if 'LD_LIBRARY_PATH' not in os.environ: + os.environ['LD_LIBRARY_PATH'] = ld_path + os.execve(os.path.realpath(__file__), sys.argv, os.environ) +if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): + os.environ['LD_LIBRARY_PATH'] = \ + ld_path + ":" + os.environ['LD_LIBRARY_PATH'] + os.execve(os.path.realpath(__file__), sys.argv, os.environ) + +sys.path.append(sys.path[0]) +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.os.gsOSlib import g_OSlib +from impl.install.OLAP.InstallImplOLAP import InstallImplOLAP + +# exit code +EXEC_SUCCESS = 0 +ROLLBACK_FAILED = 3 + + +class Install(ParallelBaseOM): + """ + The class is used to do perform installation + """ + + def __init__(self): + """ + function: initialize the parameters + input : NA + output: NA + """ + ParallelBaseOM.__init__(self) + self.time_out = None + self.alarm_component = "" + self.dbInitParam = [] + self.dataGucParam = [] + self.action = "gs_install" + self.initStep = "Init Install" + + def usage(self): + """ +gs_install is a utility to deploy a cluster server. + +Usage: + gs_install -? | --help + gs_install -V | --version + gs_install -X XMLFILE [--gsinit-parameter="PARAMETER" [...]] + [--dn-guc="PARAMETER" [...]] [--alarm-component=ALARMCOMPONENT] + [--time-out=SECS] [-l LOGFILE] + +General options: + -X Path of the XML configuration file. + -l Path of log file. + -?, --help Show help information for this utility, and exit the command line mode. + -V, --version Show version information. + + --gsinit-parameter="PARAMETER" Parameters to initialize DN and CN. + For more information, see \"gs_initdb --help\". + --dn-guc="PARAMETER" Parameters to set the configuration of DN. + For more information, see \"gs_guc --help\". + --alarm-component=ALARMCOMPONENT Path of the alarm component. + --time-out=SECS Maximum waiting time when start cluster. + """ + print(self.usage.__doc__) + + def initGlobals(self): + """ + function: Init logger + input : NA + output: NA + """ + try: + self.initLogger(self.action) + self.logger.debug( + "gs_install execution takes %s steps in total" % ClusterCommand.countTotalSteps( + self.action, "", self.readOperateStep())) + self.logger.log("Parsing the configuration file.", "addStep") + # parsing the configuration file, Parameter [refreshCN] does not refresh the CN number + self.initClusterInfo(refreshCN=False) + self.initComponent() + # Initialize self.sshTool + self.initSshTool(self.clusterInfo.getClusterNodeNames(), + DefaultValue.TIMEOUT_PSSH_INSTALL) + if (len(self.clusterInfo.getClusterNodeNames()) == 1 and + self.clusterInfo.getClusterNodeNames()[0] + == DefaultValue.GetHostIpOrName()): + self.isSingle = True + self.localMode = True + except Exception as e: + # failed to parse cluster config file + raise Exception(str(e)) + # Successfully parsed the configuration file + self.logger.debug("Successfully parsed the configuration file.", + "constant") + + def checkParaList(self, specialStr): + """ + function: + input: + output: + """ + VALUE_CHECK_LIST = ["|", ";", "&", "$", "<", ">", "`", "\\", "{", "}", + "(", ")", "[", "]", "~", "*", "?", "!", "\n"] + VALUE_CHECK_GUC_PARA_LIST = ["client_encoding", "--encoding"] + VALUE_CHECK_ENCODING_LIST = ["LATIN5", "ISO_8859_7", "KOI8U", + "LATIN7", "EUC_TW", "WIN1251", "LATIN8", + "KOI8R", "UTF8", + "ISO_8859_5", "ISO_8859_8", "LATIN9", + "LATIN6", "EUC_JP", "EUC_KR", "WIN1255", + "EUC_CN", + "LATIN3", "LATIN1", "ISO_8859_6", "GBK"] + gs_checkStr = specialStr[0] + if (gs_checkStr.strip() == ""): + return + for rac in VALUE_CHECK_LIST: + flag = gs_checkStr.find(rac) + if flag >= 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] + % specialStr + " There are illegal " + "characters in the parameter.") + if (len(gs_checkStr.split("=")) != 2): + return + if (gs_checkStr.split("=")[1].strip().startswith("\'") is True and + gs_checkStr.split("=")[1].strip().endswith("\'") is False) or \ + (gs_checkStr.split("=")[1].strip().startswith("\'") is False + and gs_checkStr.split("=")[1].strip().endswith( + "\'") is True): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50219"] + % specialStr + " Lack of Paired Single " + "Quotation Marks.value %s" % gs_checkStr) + if (gs_checkStr.split("=")[1].strip().startswith("\"") is True and + gs_checkStr.split("=")[1].strip().endswith("\"") is False) \ + or ( + gs_checkStr.split("=")[1].strip().startswith("\"") is False + and gs_checkStr.split("=")[1].strip().endswith("\"") is True): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50219"] % specialStr + + " Lack of double quotation marks.value %s" % gs_checkStr) + if gs_checkStr.split("=")[0].strip() in VALUE_CHECK_GUC_PARA_LIST and \ + (gs_checkStr.split("=")[1].strip().strip("\'").strip( + "\"").strip() not in VALUE_CHECK_ENCODING_LIST): + raise Exception( + ErrorCode.GAUSS_500["GAUSS_50011"] % ( + gs_checkStr.split("=")[0], + gs_checkStr.split("=")[1].strip("\'").strip("\"").strip()) + + "Please cheak parameter '--dn-guc' or '--gsinit-parameter'.") + + def parseCommandLine(self): + """ + function: Parse command line and save to global variable + input : NA + output: NA + """ + # init the ParaObj + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("install") + # parameter -h or -? + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(EXEC_SUCCESS) + + # parameter -X + if (ParaDict.__contains__("confFile")): + self.xmlFile = ParaDict.get("confFile") + # parameter -l + if (ParaDict.__contains__("logFile")): + self.logFile = ParaDict.get("logFile") + # parameter --gsinit-parameter + if (ParaDict.__contains__("dbInitParams")): + self.dbInitParam = ParaDict.get("dbInitParams") + self.checkParaList(self.dbInitParam) + # parameter --dn-guc + if (ParaDict.__contains__("dataGucParams")): + self.dataGucParam = ParaDict.get("dataGucParams") + self.checkParaList(self.dataGucParam) + # parameter --alarm-component + if (ParaDict.__contains__("alarm_component")): + self.alarm_component = ParaDict.get("alarm_component") + # parameter --time-out + if (ParaDict.__contains__("time_out")): + self.time_out = ParaDict.get("time_out") + + def checkUser(self): + """ + """ + # get user info + self.user = g_OSlib.getUserInfo()['name'] + # get the group info + self.group = g_OSlib.getUserInfo()['g_name'] + # check the user and group + if (self.user == "" or self.group == ""): + raise Exception(ErrorCode.GAUSS_503["GAUSS_50308"]) + if (self.user == "root" or self.group == "root"): + raise Exception(ErrorCode.GAUSS_501["GAUSS_50105"]) + + def checkConfigFile(self): + """ + """ + if (self.xmlFile == ""): + # there is no -X parameter + raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + + ' for the installation.') + if (not os.path.exists(self.xmlFile)): + # -X parameter value is not exists + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % self.xmlFile) + if (not os.path.isabs(self.xmlFile)): + # -X parameter value is not absolute path + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50213"] % "configuration file") + + def checkDNPara(self): + """ + """ + dnUnsupportedParameters = DefaultValue.findUnsupportedParameters( + self.dataGucParam) + if (len(dnUnsupportedParameters) != 0): + GaussLog.printMessage("The following parameters set for database node will" + " not take effect:\n%s" + % str(dnUnsupportedParameters)) + for param in dnUnsupportedParameters: + self.dataGucParam.remove(param) + + def checkAlarm(self): + """ + """ + if (self.alarm_component == ""): + self.alarm_component = DefaultValue.ALARM_COMPONENT_PATH + if (not os.path.isabs(self.alarm_component)): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50213"] % "alarm component") + + def checkLogFile(self): + """ + """ + if (self.logFile == ""): + # if -l parameter is null + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.DEPLOY_LOG_FILE, self.user, "", self.xmlFile) + if (not os.path.isabs(self.logFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + def checkParameter(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + # check required parameters + self.checkUser() + # check mpprc file path + self.mpprcFile = DefaultValue.getMpprcFile() + # check config file + self.checkConfigFile() + # check unsupported -D parameter + self.checkDNPara() + # check alarm component + self.checkAlarm() + # check logFile + self.checkLogFile() + + +if __name__ == '__main__': + """ + main function + """ + # check if user is root + if (os.getuid() == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) + try: + REPEAT = False + # Objectize class + install = Install() + # Initialize self and Parse command line and save to global variable + install.parseCommandLine() + # check the parameters is not OK + install.checkParameter() + # Initialize globals parameters + install.initGlobals() + # set action flag file + DefaultValue.setActionFlagFile("gs_install") + + impl = InstallImplOLAP(install) + # Perform the whole install process + impl.run() + except Exception as e: + GaussLog.exitWithError(str(e)) + finally: + DefaultValue.setActionFlagFile("gs_install", None, False) diff --git a/script/gs_om b/script/gs_om new file mode 100644 index 0000000..e93fc74 --- /dev/null +++ b/script/gs_om @@ -0,0 +1,770 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_om is a utility to manage a Gauss200 cluster. +############################################################################# + +import subprocess +import os +import sys +import pwd + +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.os.gsOSlib import g_OSlib +from gspylib.threads.SshTool import SshTool +from impl.om.OLAP.OmImplOLAP import OmImplOLAP +from gspylib.common.VersionInfo import VersionInfo + +# action type +ACTION_START = "start" +ACTION_STOP = "stop" +ACTION_RESTART = "restart" +ACTION_STATUS = "status" +ACTION_REBUID = "generateconf" +ACTION_CERT = "cert" +STOP_MODE_FAST = "fast" +STOP_MODE_IMMEDIATE = "immediate" +ACTION_VIEW = "view" +ACTION_QUERY = "query" +ACTION_KERBEROS = "kerberos" +ACTION_REFRESHCONF = "refreshconf" + +# postgis +ACTION_DEL_POSTGIs = "rmlib" + + +class CmdOptions(): + """ + define option + """ + + def __init__(self): + self.action = "" + # if action is "express", use this parameter to store the list of + # cluster node names passed by the + # command line option "-h". + self.nodeName = "" + self.time_out = None + # if action is "express", use this parameter to store whether to + # show the detail message of cluster + # node state. + self.show_detail = False + self.showAll = False + self.dataDir = "" + self.outFile = "" + self.logFile = "" + self.localLog = "" + self.reset = False + self.distribute = False + self.certFile = "" + self.certRollback = False + self.NormalCNNode = [] + self.mode = "" + + self.user = "" + self.group = "" + self.userInfo = "" + self.mpprcFile = "" + self.confFile = "" + self.localMode = False + self.instanceName = "" + self.azName = "" + self.nodeId = -1 + self.clusterInfo = None + self.security_mode = "off" + + # kerberos + self.kerberosMode = "" + self.clusterUser = "" + self.kerberosType = "" + self.clusterToolPath = "" + + +########################################### +class OperationManager(ParallelBaseOM): + + def __init__(self): + """ + init the command options + save command line parameter values + """ + ParallelBaseOM.__init__(self) + # command line parameter collection + self.g_opts = CmdOptions() + self.OM_PARAMETER_DIR = "" + + def usage(self): + """ +gs_om is a utility to manage a cluster. + +Usage: + gs_om -? | --help + gs_om -V | --version + OLAP scene: + gs_om -t start [-h HOSTNAME] [-D dataDir] [--time-out=SECS] + [--security-mode=MODE] [-l LOGFILE] + gs_om -t stop [-h HOSTNAME] [-D dataDir] [--time-out=SECS] [-m MODE] + [-l LOGFILE] + gs_om -t restart [-h HOSTNAME] [-D dataDir] [--time-out=SECS] + [--security-mode=MODE] [-l LOGFILE] [-m MODE] + gs_om -t status [-h HOSTNAME] [-o OUTPUT] [--detail] [--all] [-l LOGFILE] + gs_om -t generateconf -X XMLFILE [--distribute] [-l LOGFILE] + gs_om -t cert [--cert-file=CERTFILE | --rollback] [-L] [-l LOGFILE] + gs_om -t kerberos -m [install|uninstall] -U USER [-l LOGFILE] + [--krb-server|--krb-client] + gs_om -t view [-o OUTPUT] + gs_om -t query [-o OUTPUT] + gs_om -t refreshconf + +General options: + -t Type of the OM command. + -l Path of log file. + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + +Options for start + -h Name of the host to be started. + -D Path of dn + --time-out=SECS Maximum waiting time when start the + cluster or node. + --security-mode=MODE database start with security mode: on or off + on: start with security mode + off: start without security mode + +Options for stop + -h Name of the host to be shut down. + -m, --mode=MODE Shutdown mode. It can be f (fast), + or i (immediate). + -D Path of dn + --time-out=SECS Maximum waiting time when start the cluster + or node. +Options for status + -h Name of the host whose status is to be + queried. + --az Name of the single az whose status is to + be queried. + -o Save the result to the specified file. + --detail Show detailed status information. + --all Show all database node status information. + +Options for generating configuration files + -X Path of the XML configuration file. + --distribute Distribute the static configuration file + to installation directory of cluster nodes. + +Options for cert + --cert-file Path of cert file. + --rollback Perform rollback SSL cert files. + -L local mode. + +Options for kerberos + -m Kerberos management mode. It can be + install or uninstall. + -U %s cluster user. +Install options: + --krb-server Execute install for server. This parameter + only work for install + --krb-client Execute install for client. This parameter + only work for install + + """ + + print(self.usage.__doc__) + + def initGlobal(self): + """ + function:Init logger + input:NA + output:NA + """ + try: + # Init logger + self.xmlFile = self.g_opts.confFile + self.logFile = self.g_opts.logFile + self.initLogger(self.g_opts.action) + if (os.getuid() != 0): + DefaultValue.modifyFileOwnerFromGPHOME(self.logger.logFile) + + dirName = os.path.dirname(self.g_opts.logFile) + self.g_opts.localLog = os.path.join(dirName, + DefaultValue.LOCAL_LOG_FILE) + + if (self.g_opts.action == ACTION_REBUID): + self.initClusterInfo() + else: + # Initialize the self.clusterInfo variable + if (self.g_opts.action == ACTION_STATUS): + self.initClusterInfoFromStaticFile(self.g_opts.user, False) + else: + self.initClusterInfoFromStaticFile(self.g_opts.user) + + # Check --az with single-primary-multi-standby + if (self.g_opts.azName): + raise Exception( + ErrorCode.GAUSS_500["GAUSS_50002"] % '-az' + + ". This parameter is used in single primary multi " + "standby.") + + # Obtain the owner and group of the cluster installation directory + if self.g_opts.action != ACTION_KERBEROS: + (self.g_opts.user, self.g_opts.group) = g_OSlib.getPathOwner( + self.clusterInfo.appPath) + if self.g_opts.user == "" or self.g_opts.group == "": + raise Exception(ErrorCode.GAUSS_503["GAUSS_50308"]) + # Init base member + self.user = self.g_opts.user + self.group = self.g_opts.group + + # init components + if self.g_opts.action != ACTION_STATUS: + self.initComponent() + except Exception as e: + GaussLog.exitWithError(str(e)) + + def checkAction(self, ParaDict): + """ + """ + if (ParaDict.__contains__("action")): + self.g_opts.action = ParaDict.get("action") + if (len(self.g_opts.action) == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "t" + ".") + + def parseNode(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_START or + self.g_opts.action == ACTION_STOP or + self.g_opts.action == ACTION_STATUS): + if (ParaDict.__contains__("nodename")): + nodename = ParaDict.get("nodename") + self.g_opts.estimateNodeName = nodename + # Only one -h parameter can be entered + if (len(nodename) != 1): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50006"] + % nodename[0] + " Please check it.") + self.g_opts.nodeName = nodename[0] + + if "nodeId" in ParaDict.keys(): + nodeId = int(ParaDict.get("nodeId")) + if nodeId < 1: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50004"] % + ParaDict.get("nodeId") + " Please check it.") + self.g_opts.nodeId = nodeId + + if ("dataDir" in ParaDict.keys()): + self.g_opts.dataDir = ParaDict.get("dataDir") + + def parseTimeOut(self, ParaDict): + """ + """ + if self.g_opts.action == ACTION_START or self.g_opts.action == \ + ACTION_STOP: + if (ParaDict.__contains__("time_out")): + self.g_opts.time_out = ParaDict.get("time_out") + + def parseMode(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_STOP): + if (ParaDict.__contains__("Mode")): + self.g_opts.mode = ParaDict.get("Mode") + + def parseKerberosMode(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_KERBEROS): + if ("Mode" in ParaDict): + self.g_opts.kerberosMode = ParaDict.get("Mode") + + def parseStatus(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_STATUS): + # A status query can specify an out file + if (ParaDict.__contains__("outFile")): + self.g_opts.outFile = ParaDict.get("outFile") + # The status query can display detailed information + if (ParaDict.__contains__("show_detail")): + self.g_opts.show_detail = ParaDict.get("show_detail") + if (ParaDict.__contains__("all")): + self.g_opts.showAll = ParaDict.get("all") + + def parseView(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_VIEW): + # A view can specify an out file + if ("outFile" in ParaDict.keys()): + self.g_opts.outFile = ParaDict.get("outFile") + + def parseQuery(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_QUERY): + # A view can specify an out file + if ("outFile" in ParaDict.keys()): + self.g_opts.outFile = ParaDict.get("outFile") + + def parseStart(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_START): + # The start query can specify az name + if ParaDict.__contains__("az_name"): + self.g_opts.azName = ParaDict.get("az_name") + if ParaDict.__contains__("security_mode"): + self.g_opts.security_mode = ParaDict.get("security_mode") + + def parseStop(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_STOP): + # The start query can specify az name for OLAP + if (ParaDict.__contains__("az_name")): + self.g_opts.azName = ParaDict.get("az_name") + + def parseConFile(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_REBUID): + # Changeip, managecn, and generateconf require the -X parameter + if (ParaDict.__contains__("confFile")): + self.g_opts.confFile = ParaDict.get("confFile") + + def parseGenerateconf(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_REBUID): + # Generateconf can specify the distribution file + if (ParaDict.__contains__("distribute")): + self.g_opts.distribute = ParaDict.get("distribute") + + def parseCert(self, ParaDict): + """ + """ + if (self.g_opts.action == ACTION_CERT): + # cert can change cert file + if (ParaDict.__contains__("cert-file")): + self.g_opts.certFile = ParaDict.get("cert-file") + if (ParaDict.__contains__("rollback")): + self.g_opts.certRollback = ParaDict.get("rollback") + if (ParaDict.__contains__("localMode")): + self.g_opts.localMode = ParaDict.get("localMode") + if (ParaDict.__contains__("cert-file") and ParaDict.__contains__( + "rollback")): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50005"] + % ("-cert-file", "-rollback")) + + def parseKerberos(self, ParaDict): + """ + """ + if self.g_opts.action == ACTION_KERBEROS: + if "user" in ParaDict: + self.g_opts.clusterUser = ParaDict.get("user") + if self.g_opts.kerberosMode == "install": + if "krb-server" in ParaDict and "krb-client" in ParaDict: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50005"] + % ("-krb-server", "-krb-client")) + if (("krb-server" not in ParaDict) and ( + "krb-client" not in ParaDict)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "-krb-server' or '--krb-client") + if "krb-server" in ParaDict: + self.g_opts.kerberosType = "krb-server" + if "krb-client" in ParaDict: + self.g_opts.kerberosType = "krb-client" + if self.g_opts.kerberosMode == "uninstall": + if "krb-server" in ParaDict: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50002"] + % "-krb-server") + if "krb-client" in ParaDict: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50002"] + % "-krb-client") + + + def parseLog(self, ParaDict): + """ + """ + if (ParaDict.__contains__("logFile")): + self.g_opts.logFile = ParaDict.get("logFile") + + def parseCommandLine(self): + """ + function:Parse command line and save to global variable + input:NA + output:NA + """ + # Parse command line + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("gs_om") + # If help is included in the parameter, + # the help message is printed and exited + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + # The -t parameter is required + self.checkAction(ParaDict) + # Starting a cluster, stopping a cluster, querying a state, + # and switching between active and standby devices require obtaining + # node information + self.parseNode(ParaDict) + # Starting a Cluster and Stopping a Cluster + # can specify a timeout period + self.parseTimeOut(ParaDict) + # Stop the cluster and managecn to specify the mode + self.parseMode(ParaDict) + # Kerberos to specify the mode + self.parseKerberosMode(ParaDict) + # Parse start parameter + self.parseStart(ParaDict) + # Parse stop parameter + self.parseStop(ParaDict) + # Parse status parameter + self.parseStatus(ParaDict) + # Parse view parameter + self.parseView(ParaDict) + # Parse query parameter + self.parseQuery(ParaDict) + # Parse -X parameter + self.parseConFile(ParaDict) + # Parse generateconf parameter + self.parseGenerateconf(ParaDict) + # Parse cert parameter + self.parseCert(ParaDict) + # Parse kerberos parameter + self.parseKerberos(ParaDict) + # Parse log parameter + self.parseLog(ParaDict) + + ########################################################################### + # Check parameters for all operations + ########################################################################### + def checkParameter(self): + """ + function:Check parameter from command line + input:NA + output:NA + """ + if (os.getuid() == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"] + \ + " When the parameter '-t' value is not " + "dailyAlarm or not estimate install " + "consume or not extension connector with " + "add, delete, restart, upgrade in -m " + "parameter.") + + if (self.g_opts.action == ACTION_START): + self.checkStartParameter() + elif (self.g_opts.action == ACTION_STOP): + self.checkStopParameter() + elif (self.g_opts.action == ACTION_RESTART): + pass + elif (self.g_opts.action == ACTION_STATUS): + self.checkOutFileParameter() + elif (self.g_opts.action == ACTION_REBUID): + self.checkGenerateConfParameter() + elif (self.g_opts.action == ACTION_CERT): + self.checkCertParameter() + elif (self.g_opts.action == ACTION_KERBEROS): + self.checkKerberosParameter() + elif (self.g_opts.action == ACTION_VIEW): + self.checkOutFileParameter() + elif (self.g_opts.action == ACTION_QUERY): + self.checkOutFileParameter() + elif (self.g_opts.action == ACTION_REFRESHCONF): + pass + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % "t") + + # check mpprc file path + self.g_opts.mpprcFile = DefaultValue.getMpprcFile() + + # check if user exist and is the right user + if (self.g_opts.user == ""): + self.g_opts.user = pwd.getpwuid(os.getuid()).pw_name + if (self.g_opts.user == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % + "U" + ".") + + # Check user on installed cluster + DefaultValue.checkUser(self.g_opts.user) + # Check whether the current user is consistent with -U if no root + if (os.getuid() != 0): + cmd = "id -un" + (status, output) = subprocess.getstatusoutput(cmd) + if (output != self.g_opts.user): + GaussLog.exitWithError( + ErrorCode.GAUSS_530["GAUSS_53033"] % self.g_opts.user) + + self.OM_PARAMETER_DIR = "%s/om_parameter_dir" % \ + DefaultValue.getTmpDirFromEnv(self.g_opts.user) + + # check log file + if (self.g_opts.logFile == ""): + self.g_opts.logFile = DefaultValue.getOMLogPath( + DefaultValue.OM_LOG_FILE, self.g_opts.user, "", + action=self.g_opts.action) + if (not os.path.isabs(self.g_opts.logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + def checkStartParameter(self): + """ + Check parameter for start the cluster and node + input : NA + output: NA + """ + # if the parameter -I is exits, then the -h parameter is required. + if (self.g_opts.instanceName and (not self.g_opts.nodeName)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "h") + # If the cluster does not specify a time-out period, the default is + # 300 seconds + if (self.g_opts.time_out is None): + self.g_opts.time_out = DefaultValue.TIMEOUT_CLUSTER_START + else: + # The timeout parameter must be a pure number + if (not str(self.g_opts.time_out).isdigit()): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50003"] % + ("-time-out", "a nonnegative integer")) + self.g_opts.time_out = int(self.g_opts.time_out) + # The timeout parameter must be greater than 0 + # The timeout parameter must be less than the integer maximum + if (self.g_opts.time_out <= 0 or self.g_opts.time_out + >= 2147483647): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "-time-out") + + if self.g_opts.security_mode != "off" and self.g_opts.security_mode \ + != "on": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "-security-mode") + + def checkStopParameter(self): + """ + Check parameter for stop clster and node + input : NA + output: NA + """ + # if the parameter -I is exits, then the -h parameter is required. + if (self.g_opts.instanceName and (not self.g_opts.nodeName)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "h") + # If no stop type is specified, the default is fast + if (self.g_opts.mode == ""): + self.g_opts.mode = STOP_MODE_FAST + # Specifies that the stop type must be f, i or s + if (self.g_opts.mode not in [STOP_MODE_FAST, STOP_MODE_IMMEDIATE, + "f", "i"]): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % "m") + + def checkOutFileParameter(self): + """ + Check parameter for status + input : NA + output: NA + """ + # Check the status query for the specified output file + if (self.g_opts.outFile != ''): + DefaultValue.checkOutputFile(self.g_opts.outFile) + + def checkGenerateConfParameter(self): + """ + Check parameter for generate config + input : NA + output: NA + """ + + # check xml file + if (self.g_opts.confFile == ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".") + if (not os.path.isfile(self.g_opts.confFile)): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50210"] % self.g_opts.confFile) + + def checkCertParameter(self): + """ + Check parameter for cert + input : NA + output: NA + """ + # add cert must specify the --cert-file parameter + if (self.g_opts.certFile == "" and not self.g_opts.certRollback): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % + '-cert-file or --rollback') + # certFile must be exist + if (self.g_opts.certFile != "" and self.g_opts.certFile[-4:] + != ".zip"): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "-cert-file type is not 'zip'") + if (not os.path.isfile(self.g_opts.certFile) + and not self.g_opts.certRollback): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50210"] + % self.g_opts.certFile) + + def checkKerberosParameter(self): + """ + Check parameter for kerberos + input : NA + output: NA + """ + if (self.g_opts.kerberosMode != "install" and + self.g_opts.kerberosMode != "uninstall"): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % 'm' + "Value: %s" + % self.g_opts.kerberosMode) + + # get user info + self.user = g_OSlib.getUserInfo()['name'] + if (self.g_opts.clusterUser == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U') + if self.g_opts.clusterUser != self.user: + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50323"] + % self.g_opts.clusterUser) + + if (self.g_opts.kerberosMode == "install" and + self.g_opts.kerberosType == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "-krb-server' or '--krb-client") + + def checkDSN(self, dsnName): + """ + function: Check the path: + the path must be composed of letters, numbers, + underscores, slashes, hyphen, and spaces + input : path_type_in + output: NA + """ + nameLen = len(dsnName) + if (nameLen > 64): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50219"] + % dsnName + + "Error:\nThe DSN name is too long.") + wrongChar = None + i = 0 + a_ascii = ord('a') + z_ascii = ord('z') + A_ascii = ord('A') + Z_ascii = ord('Z') + num0_ascii = ord('0') + num9_ascii = ord('9') + sep_ascii = ord('_') + for i in range(0, nameLen): + char_check = ord(dsnName[i]) + if (not (a_ascii <= char_check <= z_ascii or A_ascii <= + char_check <= Z_ascii or num0_ascii <= char_check <= + num9_ascii or char_check == sep_ascii)): + wrongChar = dsnName[i] + break + if (wrongChar != None): + return wrongChar + else: + return True + + +def main(): + """ + main function + """ + try: + REPEAT = False + # Objectize class + manager = OperationManager() + # parse cmd lines + manager.parseCommandLine() + # check parameters + manager.checkParameter() + + # init global variables + manager.initGlobal() + # set action flag file + DefaultValue.setActionFlagFile(manager.g_opts.action, manager.logger) + except Exception as e: + if (manager.g_opts.action in (ACTION_STATUS, ACTION_STOP, + ACTION_START)): + actionDict = {ACTION_STATUS: DefaultValue.TASK_QUERY_STATUS, + ACTION_STOP: DefaultValue.TASK_STOP, + ACTION_START: DefaultValue.TASK_START} + if REPEAT: + manager.sshTool = SshTool( + manager.clusterInfo.getClusterNodeNames(), manager.logFile, + timeout=DefaultValue.TIMEOUT_PSSH_COMMON) + manager.logger.logExit(str(e)) + + try: + impl = OmImplOLAP(manager) + + if (manager.g_opts.action not in [ACTION_START, + ACTION_STOP, + ACTION_RESTART, + ACTION_STATUS, + ACTION_REBUID, + ACTION_CERT, + ACTION_KERBEROS, + ACTION_VIEW, + ACTION_QUERY, + ACTION_REFRESHCONF + ]): + raise Exception(ErrorCode.GAUSS_531['GAUSS_53104'] + % ("gs_om -t " + manager.g_opts.action)) + elif (manager.g_opts.action == ACTION_CERT and + manager.g_opts.certRollback): + impl.doDNSSLCertRollback() + + # Depending on the function, different operations are performed + if (manager.g_opts.action == ACTION_START): + impl.doStart() + elif (manager.g_opts.action == ACTION_STOP): + impl.doStop() + elif (manager.g_opts.action == ACTION_RESTART): + impl.doStop(), impl.doStart() + elif (manager.g_opts.action == ACTION_STATUS): + impl.doStatus() + elif (manager.g_opts.action == ACTION_REBUID): + impl.doRebuildConf() + elif (manager.g_opts.action == ACTION_KERBEROS): + if DefaultValue.isUnderUpgrade(manager.user): + raise Exception(ErrorCode.GAUSS_529["GAUSS_52936"]) + impl.doKerberos() + elif (manager.g_opts.action == ACTION_CERT + and not manager.g_opts.certRollback): + impl.doReplaceSSLCert() + elif (manager.g_opts.action == ACTION_VIEW): + impl.doView() + elif (manager.g_opts.action == ACTION_QUERY): + impl.doQuery() + elif (manager.g_opts.action == ACTION_REFRESHCONF): + impl.doRefreshConf() + + manager.logger.closeLog() + except Exception as e: + manager.logger.logExit(str(e)) + finally: + DefaultValue.setActionFlagFile("gs_om", None, False) + + +if __name__ == '__main__': + main() diff --git a/script/gs_postuninstall b/script/gs_postuninstall new file mode 100644 index 0000000..beac8a2 --- /dev/null +++ b/script/gs_postuninstall @@ -0,0 +1,414 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_postuninstall is a utility to clean up the environment +# after uninstalling a Gauss200 server. +############################################################################# + +import os, sys +import subprocess +import grp +import pwd +import platform + +package_path = os.path.dirname(os.path.realpath(__file__)) +ld_path = package_path + "/gspylib/clib" +if 'LD_LIBRARY_PATH' not in os.environ: + os.environ['LD_LIBRARY_PATH'] = ld_path + os.execve(os.path.realpath(__file__), sys.argv, os.environ) +if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): + os.environ['LD_LIBRARY_PATH'] = \ + ld_path + ":" + os.environ['LD_LIBRARY_PATH'] + os.execve(os.path.realpath(__file__), sys.argv, os.environ) + + +from gspylib.common.GaussLog import GaussLog +from gspylib.os.gsfile import g_Platform +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.common.ParameterParsecheck import Parameter +from impl.postuninstall.OLAP.PostUninstallImplOLAP import \ + PostUninstallImplOLAP + + +class Postuninstall(ParallelBaseOM): + """ + init the command options + input : NA + output: NA + """ + + def __init__(self): + """ + function: init parameters + input : NA + output: NA + """ + ParallelBaseOM.__init__(self) + self.deleteUser = False + self.deleteGroup = False + self.clean_gphome = False + self.clean_host = [] + self.sshpwd = "" + self.nodeList = [] + self.clusterToolPath = "" + self.userHome = "" + + def usage(self): + """ +gs_postuninstall is a utility to clean up the environment +after uninstalling the cluster. + +Usage: + gs_postuninstall -? |--help + gs_postuninstall -V |--version + gs_postuninstall -U USER -X XMLFILE [-L] [--delete-user] [--delete-group] + [-l LOGFILE] + +General options: + -U Cluster user. + -X Path of the XML configuration file. + -L Only clean up local nodes. + --delete-user Delete the OS user. + --delete-group Delete the group of the OS user. + -l Path of log file. + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + function: Parse command line and save to global variable + input : NA + output: NA + """ + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("postuninstall") + # check if has '--help' parameter + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + + # check the parameters of postuninstall command + if (ParaDict.__contains__("user")): + self.user = ParaDict.get("user") + if (ParaDict.__contains__("confFile")): + self.xmlFile = ParaDict.get("confFile") + if (ParaDict.__contains__("logFile")): + self.logFile = ParaDict.get("logFile") + + if (ParaDict.__contains__("delete-user")): + self.deleteUser = ParaDict.get("delete-user") + if (ParaDict.__contains__("delete-group")): + self.deleteGroup = ParaDict.get("delete-group") + if (ParaDict.__contains__("clean-gphome")): + self.clean_gphome = ParaDict.get("clean-gphome") + if (ParaDict.__contains__("nodename")): + if not "HOST_IP" in os.environ.keys(): + GaussLog.exitWithError( + ErrorCode.GAUSS_518["GAUSS_51801"] % "HOST_IP doesn't" + + " so -h parameter is not needed.") + self.clean_host = ParaDict.get("nodename") + if len(self.clean_host) == 0: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % 'h') + + if (ParaDict.__contains__("localMode")): + self.localMode = ParaDict.get("localMode") + + if "HOST_IP" in os.environ.keys(): + if not ParaDict.__contains__("localMode"): + if not (ParaDict.__contains__("clean-gphome") + and ParaDict.__contains__("nodename")): + GaussLog.exitWithError( + ErrorCode.GAUSS_518["GAUSS_51801"] % "HOST_IP" + + " so you must specify the -L parameter or (-h and " + "--clean-gphome) parameters.") + if ParaDict.__contains__("clean-gphome"): + if ParaDict.__contains__("localMode") and\ + ParaDict.__contains__("nodename"): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50005"] + % ("-L", "-h")) + + if (self.deleteGroup == True and self.deleteUser != True): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "-delete-user" + ".") + + def checkParameter(self): + """ + function: check parameter + input : NA + output: NA + """ + # check user + self.checkUser() + # check config file + self.checkConfigFile() + # check log file + self.checkLogFile() + # check mpprc file if needed, should be done + # before check preinstall step + self.checkMpprcFile() + # check preInstall + self.checkPreInstall() + # check group for redhat + self.checkGroup() + + def checkUser(self): + """ + function: check the user + input : NA + output: NA + """ + # check if no user + if (self.user == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "U" + + ".") + # check if is root user + if (self.user == "root"): + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50301"]) + + try: + DefaultValue.checkUser(self.user, False) + except Exception as e: + GaussLog.exitWithError(str(e)) + + if (pwd.getpwnam(self.user).pw_uid == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50302"]) + + def checkConfigFile(self): + """ + function: check Config File + input : NA + output: NA + """ + # if no config file + if (self.xmlFile == ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % "X" + ".") + # if path not exists + if (not os.path.exists(self.xmlFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] + % "configuration file" + " %s." + % self.xmlFile) + # if not absolute path + if (not os.path.isabs(self.xmlFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] + % "configure file") + + def checkLogFile(self): + """ + function: check log File + input : NA + output: NA + """ + # if no log file + if (self.logFile == ""): + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.UNPREINSTALL_LOG_FILE, self.user, "", + self.xmlFile) + # if not absolute path + if (not os.path.isabs(self.logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + def checkMpprcFile(self): + """ + function: check MpprcFile + input : NA + output: NA + """ + # get path of MpprcFile + self.mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + + try: + # get tool path + self.clusterToolPath = DefaultValue.getPreClusterToolPath( + self.user, self.xmlFile) + except Exception as e: + GaussLog.exitWithError(str(e)) + # if MpprcFile is null + if (self.mpprcFile == None): + self.mpprcFile = "" + # if MpprcFile is not null + if (self.mpprcFile != ""): + # if no MpprcFile + if (not os.path.exists(self.mpprcFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] + % "MPPRC file" + " %s." + % self.mpprcFile) + # if is not absolute path + if (not os.path.isabs(self.mpprcFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_512["GAUSS_51206"] + % self.mpprcFile) + + def checkPreInstall(self): + """ + function: check preInstall + input : NA + output: NA + """ + # check if agent-mode + if "HOST_IP" in os.environ.keys(): + # get om_agent path + agent_path_cmd = "ps aux | grep 'om_agent.py' | grep %s | grep " \ + "-v grep | head -n 1 | awk '{print $NF}'" % \ + self.user + (status, output) = subprocess.getstatusoutput(agent_path_cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_535["GAUSS_53507"] + % agent_path_cmd) + agent_path = os.path.dirname(output.strip()) + agent_conf_file = os.path.join(agent_path, 'om_agent.conf') + if not os.path.exists(agent_conf_file): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % agent_conf_file) + + # get agent sep_env_file + with open(agent_conf_file) as fp: + recordLines = fp.readlines() + sep_env_file = "" + for tmp in recordLines: + if 'sep_env_file' in tmp: + sep_env_file = tmp.split("=")[-1].strip() + if not os.path.exists(sep_env_file): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % sep_env_file) + + cmd = "su - %s -c 'source %s && echo $GAUSS_ENV' 2>/dev/null"\ + % (self.user, sep_env_file) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51802"] + % "$GAUSS_ENV" + "Error: \n%s." % output + + "The cmd is %s" % cmd) + gaussEnv = output.strip() + else: + # check if has mpprcFile + if (self.mpprcFile != ""): + userprofile = self.mpprcFile + else: + userprofile = "/home/%s/.bashrc" % self.user + + cmd = "su - %s -c 'source %s && echo $GAUSS_ENV' 2>/dev/null"\ + % (self.user, userprofile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51802"] + % "$GAUSS_ENV" + "Error: \n%s." % output + + "The cmd is %s" % cmd) + gaussEnv = output.strip() + + # if gaussEnv is 2, user do not do uninstall before + if (str(gaussEnv) == "2"): + GaussLog.exitWithError(ErrorCode.GAUSS_525["GAUSS_52501"] + % "gs_uninstall") + # if gaussEnv is not 1, user do not do preinstall before + elif (str(gaussEnv) != "1" and not self.clean_gphome): + GaussLog.exitWithError( + ErrorCode.GAUSS_525["GAUSS_52501"] % "gs_preinstall" + + "If you do preinstall with seperate file mode, please input " + "sep-env-file before postuninstall. ") + elif (str(gaussEnv) == "1" or str(gaussEnv) == "2")\ + and self.clean_gphome: + GaussLog.exitWithError(ErrorCode.GAUSS_525["GAUSS_52501"] + % "'gs_uninstall' or 'gs_postuninstall" + " no clean gphome'") + + def checkGroup(self): + """ + function: check user group + input : NA + output: NA + """ + try: + # get user group + group = grp.getgrgid(pwd.getpwnam(self.user).pw_gid).gr_name + distname, version, idnum = g_Platform.dist() + # check if OS version is redhat or Euler + if (distname in ("redhat", "euleros", "centos", "openEuler")): + if (self.deleteGroup != True and self.deleteUser == True + and self.user == group): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % + "-delete-group" + ". You must " + "delete the " + "group when you " + "delete the " + "user which has " + "the same name " + "with the group " + "in redhat.") + except Exception as e: + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50308"] + + "Failed to obtain the group for %s" % + self.user + "Error:\n%s" % str(e)) + + def initGlobals(self): + """ + function: init Logg file + input : NA + output: NA + """ + self.initLogger("gs_postuninstall") + self.logger.ignoreErr = True + + try: + self.logger.log("Parsing the configuration file.", "addStep") + # get cluster info from xml file + # Initialize the self.clusterInfo variable + self.initClusterInfo() + # Initialize the self.sshTool variable + self.initSshTool(self.clusterInfo.getClusterNodeNames(), + DefaultValue.TIMEOUT_PSSH_POSTPREINSTALL) + self.logger.debug("The cluster's information:\n%s." + % str(self.clusterInfo)) + self.logger.log("Successfully parsed the configuration file.", + "constant") + except Exception as e: + self.logger.logExit(str(e)) + + dirName = os.path.dirname(self.logFile) + self.localLog = os.path.join(dirName, DefaultValue.LOCAL_LOG_FILE) + self.userHome = DefaultValue.getUserHome(self.user) + +if __name__ == '__main__': + """ + main function + input : NA + output: NA + """ + if (os.getuid() != 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50104"]) + + try: + postuninstall = Postuninstall() + postuninstall.parseCommandLine() + postuninstall.checkParameter() + postuninstall.initGlobals() + + if len(postuninstall.clusterInfo.getClusterNodeNames()) == 1 and \ + postuninstall.clusterInfo.getClusterNodeNames()[ + 0] == DefaultValue.GetHostIpOrName(): + postuninstall.localMode = True + impl = PostUninstallImplOLAP(postuninstall) + + # Perform the whole extand process + impl.run() + except Exception as e: + GaussLog.exitWithError(str(e)) diff --git a/script/gs_preinstall b/script/gs_preinstall new file mode 100644 index 0000000..5cfdd2b --- /dev/null +++ b/script/gs_preinstall @@ -0,0 +1,483 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_preinstall is a utility to create an installation +# environment for a cluster. +############################################################################# + +import os +import pwd +import sys +import grp +import subprocess +from gspylib.common.CheckPythonVersion import checkPythonVersion +checkPythonVersion() +from subprocess import Popen, PIPE + +from gspylib.common.DbClusterInfo import dbClusterInfo, \ + readOneClusterConfigItem, initParserXMLFile +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.common.ParameterParsecheck import Parameter +from impl.preinstall.OLAP.PreinstallImplOLAP import PreinstallImplOLAP +from gspylib.threads.SshTool import SshTool + +############################################################################# +# Global variables +############################################################################# +userNameFirtChar = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'] + + +class Preinstall(ParallelBaseOM): + def __init__(self): + ParallelBaseOM.__init__(self) + self.password = "" + self.envParams = [] + self.rootUser = "" + self.rootPasswd = "" + self.createUserSshTrust = True + self.clusterToolPath = "" + self.needFixOwnerPaths = [] + self.preMode = False + self.skipOSSet = False + self.skipHostnameSet = False + self.passwordsec = "" + self.corePath = "" + self.is_new_root_path = False + + def usage(self): + """ +gs_preinstall is a utility to create an installation environment for a cluster. + +Usage: + gs_preinstall -? | --help + gs_preinstall -V | --version + gs_preinstall -U USER -G GROUP -X XMLFILE + [-L] [--skip-os-set] [--env-var="ENVVAR" [...]] + [--sep-env-file=ENVFILE] [--skip-hostname-set] [-l LOGFILE] + [--non-interactive] + +General options: + -U Cluster user. + -G Group of the cluster user. + -X Path of the XML configuration file. + -L Only perform preinstallation on local + nodes. + --skip-os-set Whether to skip OS parameter setting. + (The default value is set.) + --env-var="ENVVAR" OS user environment variables. + --sep-env-file=ENVFILE Path of the MPP environment file. + --skip-hostname-set Whether to skip hostname setting. + (The default value is set.) + -l Path of log file. + -?, --help Show help information for this + utility, and exit the command line mode. + -V, --version Show version information. + --non-interactive Pre-execution of non-secure mode. + If it is not specified, you can choose + whether create the SSH trust for root + user or cluster user. + If it is specified, you must ensure the + SSH trust for root user and cluster + user have been created. + """ + print(self.usage.__doc__) + + # get parameter from command + def parseCommandLine(self): + """ + function: Parse command line and save to global variable + input: NA + output: NA + """ + # init the ParaObj + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("preinstall") + # parameter -h or -? + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + + # Resolves command line arguments + # parameter -U + if (ParaDict.__contains__("user")): + self.user = ParaDict.get("user") + DefaultValue.checkPathVaild(self.user) + # parameter -G + if (ParaDict.__contains__("group")): + self.group = ParaDict.get("group") + # parameter -X + if (ParaDict.__contains__("confFile")): + self.xmlFile = ParaDict.get("confFile") + # parameter -L + if (ParaDict.__contains__("localMode")): + self.localMode = ParaDict.get("localMode") + # parameter -l + if (ParaDict.__contains__("logFile")): + self.logFile = ParaDict.get("logFile") + # parameter --env-var + if (ParaDict.__contains__("envparams")): + self.envParams = ParaDict.get("envparams") + # parameter --sep-env-file + if (ParaDict.__contains__("mpprcFile")): + self.mpprcFile = ParaDict.get("mpprcFile") + DefaultValue.checkPathVaild(self.mpprcFile) + # parameter --skip-hostname-set + if (ParaDict.__contains__("skipHostnameSet")): + self.skipHostnameSet = ParaDict.get("skipHostnameSet") + # parameter --skip-os-set + if (ParaDict.__contains__("skipOSSet")): + self.skipOSSet = ParaDict.get("skipOSSet") + # parameter --non-interactive + if (ParaDict.__contains__("preMode")): + self.preMode = ParaDict.get("preMode") + + def checkUserParameter(self, check_clusterInfo): + """ + """ + if (self.user == ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + elif (":" in self.user): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % 'U') + + # check group info + self.checkGroupParameter() + + # check if user exists + cmd = "cat /etc/passwd|grep -v nologin|grep -v halt|" \ + "grep -v shutdown|awk -F: '{ print $1 }'|" \ + " grep '^%s$' 2>/dev/null" % self.user + status = subprocess.getstatusoutput(cmd)[0] + if status == 0: + if pwd.getpwnam(self.user).pw_uid == 0: + # user exists and uid is 0, exit. + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50302"]) + + # check the local user and the localmode, + # if user not exist exit with error + if (self.localMode): + try: + DefaultValue.getUserId(self.user) + except Exception as e: + GaussLog.exitWithError(str(e)) + + def checkGroupParameter(self): + """ + """ + if (self.group == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'G' + ".") + if (self.user == "root" or self.group == "root"): + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50301"] + + "User:Group[%s:%s]." + % (self.user, self.group)) + + def checkUserAndGroup(self): + """ + """ + if (self.localMode): + usergroup = grp.getgrgid(pwd.getpwnam(self.user).pw_gid).gr_name + if (self.group != usergroup): + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50305"] + + "User:Group[%s:%s]" + % (self.user, self.group)) + + def checkConfigFile(self): + """ + """ + if (self.xmlFile == ""): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".") + if (not os.path.exists(self.xmlFile)): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50201"] % self.xmlFile) + if (not os.path.isabs(self.xmlFile)): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50213"] % "configuration file") + + def checkEnvValueParameter(self): + """ + """ + for param in self.envParams: + # check environmental variables vaild + if (param.find("\'") >= 0 or param.find(";") >= 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "-env-var" + " There are illegal" + " characters in the" + " parameter.") + + def checkLogFile(self): + """ + """ + if (self.logFile == ""): + self.logFile = self.getPreOMLogPath( + DefaultValue.PREINSTALL_LOG_FILE, self.xmlFile) + if (not os.path.isabs(self.logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] + % self.logFile) + + def checkMpprcFile(self): + """ + """ + if (self.mpprcFile == ""): + return + + if (not os.path.isabs(self.mpprcFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_512["GAUSS_51206"] + % self.mpprcFile) + + # check mpprc file path + mpprcFilePath = os.path.normpath(self.mpprcFile) + if (mpprcFilePath == "/home/%s" % self.user): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % \ + '-sep-env-file' + " The file [%s] can not" + " be a reserved home " + "directory." + % self.mpprcFile) + if (os.path.isdir(self.mpprcFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % \ + '-sep-env-file' + " The file [%s] can not " + "be a directory." + % self.mpprcFile) + + DefaultValue.checkMpprcFileChange(self.mpprcFile, "", self.mpprcFile) + (checkstatus, checkoutput) = DefaultValue.checkEnvFile(self.mpprcFile) + if (not checkstatus): + if (self.mpprcFile != ""): + envfile = self.mpprcFile + " and /etc/profile" + else: + envfile = "/etc/profile and ~/.bashrc" + GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51808"] % \ + checkoutput + "Please check %s." % envfile) + + def checkParameter(self): + """ + function: Check parameter from command line + input: NA + output: NA + """ + # remove HOST_IP info with /etc/profile and environ + cmd = "sed -i '/^export[ ]*HOST_IP=/d' /etc/profile" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50205"] + % "/etc/profile" + "The cmd is %s" % cmd) + if "HOST_IP" in os.environ.keys(): + os.environ.pop("HOST_IP") + + # check config file + self.checkConfigFile() + check_clusterInfo = dbClusterInfo() + check_clusterInfo.initFromXml(self.xmlFile) + # check user info + self.checkUserParameter(check_clusterInfo) + # check user group match + self.checkUserAndGroup() + # check env-val + self.checkEnvValueParameter() + # check mpprc file + self.checkMpprcFile() + + # check log file + self.checkLogFile() + + # set LD_LIBRARY_PATH add local lib + def setLibPath(self): + package_path = os.path.dirname(os.path.realpath(__file__)) + ld_path = package_path + "/gspylib/clib" + rerun = True + + if 'LD_LIBRARY_PATH' not in os.environ: + os.environ['LD_LIBRARY_PATH'] = ld_path + elif not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): + os.environ['LD_LIBRARY_PATH'] = \ + ld_path + ":" + os.environ['LD_LIBRARY_PATH'] + else: + rerun = False + + if rerun: + try: + os.execve(os.path.realpath(__file__), sys.argv, os.environ) + except Exception as e: + GaussLog.exitWithError(str(e)) + + # decompress version.cfg from bz2 + def decompressVersioncfg(self): + package_path = os.path.dirname(os.path.realpath(__file__)) + toolpath = package_path + "/../" + cmd = "cd " + toolpath + \ + " && tar -xpf `head -1 version.cfg`*.tar.bz2 ./version.cfg" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50217"] + % "version.cfg" + "The cmd is %s. " % cmd + + "The output is %s." % output) + + # init global variables + def initGlobals(self): + """ + function: init global parameters + input: NA + output: NA + """ + # init the log file + self.initLogger("gs_preinstall") + + # get the clusterToolPath + self.clusterToolPath = self.getPreClusterToolPath(self.xmlFile) + temp_nodes = self.getOneClusterConfigItem("nodeNames", self.xmlFile) + if len(temp_nodes.split(',')) < 2: + self.isSingle = True + os.environ[DefaultValue.TOOL_PATH_ENV] = self.clusterToolPath + + self.logger.log("Parsing the configuration file.", "addStep") + try: + # parse the configuration file + self.initClusterInfo() + self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), + self.logFile, + DefaultValue.TIMEOUT_PSSH_PREINSTALL) + + except Exception as e: + self.logger.logExit(str(e)) + + # check the local hostname + if DefaultValue.GetHostIpOrName() not in \ + self.clusterInfo.getClusterNodeNames(): + self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51619"] + % DefaultValue.GetHostIpOrName()) + self.logger.log("Successfully parsed the configuration file.", + "constant") + + def getPreOMLogPath(self, logName, xml): + """ + function: get the OM log path + input: logName, xml + output: fullLogPath + """ + try: + fullLogPath = "" + # get the log path + configedLogPath = self.getOneClusterConfigItem("gaussdbLogPath", + xml) + DefaultValue.checkPathVaild(configedLogPath) + # check gaussdbLogPath is not null + if configedLogPath == "": + fullLogPath = "%s/%s/om/%s" % ( + DefaultValue.GAUSSDB_DIR, self.user, logName) + else: + fullLogPath = "%s/%s/om/%s" % ( + os.path.normpath(configedLogPath), self.user, logName) + + return fullLogPath + except Exception as e: + GaussLog.exitWithError(str(e)) + + def getOneClusterConfigItem(self, item_name, xml): + """ + function: get the item_name's value in xml file + input: item_name, xml + output: item_name's value in the xml + """ + try: + # set the environment variable + os.putenv("CLUSTERCONFIGFILE", xml) + # get the item_name's value in the xml + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(xml), item_name, "cluster") + if (retStatus == 0): + return os.path.normpath(retValue) + elif (retStatus == 2): + return "" + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % "the cluster configuration item file" + + " Error: \n%s." % retValue) + except Exception as e: + GaussLog.exitWithError(str(e)) + + def getPreClusterToolPath(self, xml): + """ + function: get the cluster tool path + input: xml + output: configedPath + """ + try: + # get the cluster tool path in the xml file + configedPath = self.getOneClusterConfigItem("gaussdbToolPath", xml) + DefaultValue.checkPathVaild(configedPath) + # check the gaussdbToolPath is not null + if configedPath == "": + configedPath = DefaultValue.CLUSTER_TOOL_PATH + return configedPath + except Exception as e: + self.context.logger.logExit(str(e)) + + def change_lib_path(self): + """ + if gs_preinstall current path is /root/gauss_om/username, + so change its lib path + :return: + """ + gsom_path = os.path.realpath( + os.path.join(os.path.realpath(__file__), "../../../")) + package_path = os.path.dirname(os.path.realpath(__file__)) + lib_path = os.path.join(package_path, "lib") + sys.path.insert(0, lib_path) + if gsom_path == DefaultValue.ROOT_SCRIPTS_PATH: + self.is_new_root_path = True + + +def clearHistTimeFormat(): + cmd = "sed -i '/HISTTIMEFORMAT=/d' /etc/profile" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + GaussLog.exitWithError("Clear HISTTIMEFORMAT from /etc/profile " + "failed.\nError: %s\nThe cmd is: %s\n" % + (output,cmd)) + +if __name__ == '__main__': + """ + main function + """ + # check if user is root + if os.getuid() != 0: + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50104"]) + clearHistTimeFormat() + try: + # Objectize class + preinstall = Preinstall() + # set LD_LIBRARY_PATH + preinstall.setLibPath() + # decompress version.cfg + preinstall.decompressVersioncfg() + # parse cmd lines + preinstall.parseCommandLine() + # check parameters + preinstall.checkParameter() + # init global variables + preinstall.initGlobals() + preinstall.change_lib_path() + impl = PreinstallImplOLAP(preinstall) + # Perform the whole extand process + impl.run() + except Exception as e: + GaussLog.exitWithError(str(e)) diff --git a/script/gs_ssh b/script/gs_ssh new file mode 100644 index 0000000..bb15220 --- /dev/null +++ b/script/gs_ssh @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_ssh is a utility to execute one command on all nodes. +############################################################################# +import os +import sys +package_path = os.path.dirname(os.path.realpath(__file__)) +ld_path = package_path + "/gspylib/clib" +if 'LD_LIBRARY_PATH' not in os.environ: + os.environ['LD_LIBRARY_PATH'] = ld_path + os.execve(os.path.realpath(__file__), sys.argv, os.environ) +if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): + os.environ['LD_LIBRARY_PATH'] = \ + ld_path + ":" + os.environ['LD_LIBRARY_PATH'] + os.execve(os.path.realpath(__file__), sys.argv, os.environ) + +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.threads.SshTool import SshTool +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsfile import g_file +from gspylib.common.VersionInfo import VersionInfo + +class ParallelSsh(ParallelBaseOM): + """ + The class is used to execute one command on all nodes. + """ + + def __init__(self): + """ + function: initialize the parameters + input : NA + output: NA + """ + ParallelBaseOM.__init__(self) + self.userInfo = "" + self.cmd = "" + + def usage(self): + """ +gs_ssh is a utility to execute one command on all %s cluster nodes. + +Usage: + gs_ssh -? | --help + gs_ssh -V | --version + gs_ssh -c COMMAND + +General options: + -c Command to be executed in cluster. + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + """ + print(self.usage.__doc__ % VersionInfo.PRODUCT_NAME) + + def parseCommandLine(self): + """ + function: parse command line + input : NA + output: NA + """ + ##Parse command + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("ssh") + # If help is included in the parameter, + # the help message is printed and exited + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + # Gets the cmd parameter + if (ParaDict.__contains__("cmd")): + self.cmd = ParaDict.get("cmd") + # The cmd parameter is required + if (self.cmd == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'c' + ".") + + def initGlobal(self): + """ + function: Init global parameter + input : NA + output: NA + """ + try: + # Get user information + self.user = g_OSlib.getUserInfo()["name"] + self.clusterInfo = dbClusterInfo() + self.clusterInfo.initFromStaticConfig(self.user) + + nodeNames = self.clusterInfo.getClusterNodeNames() + self.sshTool = SshTool(nodeNames) + except Exception as e: + GaussLog.exitWithError(str(e)) + + def executeCommand(self): + """ + function: execute command + input : NA + output: NA + """ + failedNodes = "" + succeedNodes = "" + try: + # Queries the existence of objects that + # the command executes in all nodes + command = (self.cmd.strip()).split(" ") + checkCmd = g_file.SHELL_CMD_DICT["getFullPathForShellCmd"] % \ + command[0] + (status, output) = self.sshTool.getSshStatusOutput(checkCmd) + # Resolve all node execution results + for node in status.keys(): + if (status[node] != DefaultValue.SUCCESS): + failedNodes += "%s " % node + else: + succeedNodes += "%s " % node + if (failedNodes != ""): + GaussLog.exitWithError(ErrorCode.GAUSS_524["GAUSS_52403"] + % (command[0], failedNodes)) + failedNodes = "" + succeedNodes = "" + executeCmd = self.cmd + ############################################################# + # Create a temporary shell file + cmdFile = "%s/ClusterCall_%d.sh"\ + % (DefaultValue.getTmpDirFromEnv(), os.getpid()) + + g_file.createFile(cmdFile, True, DefaultValue.FILE_MODE) + + # Writes the cmd command to the shell + with open(cmdFile, "a") as fp: + fp.write("#!/bin/sh") + fp.write(os.linesep) + fp.write("%s" % executeCmd) + fp.write(os.linesep) + fp.flush() + + ############################################################## + cmdDir = DefaultValue.getTmpDirFromEnv() + '/' + # Distribute the shell file to the temporary directory + # for each node + self.sshTool.scpFiles(cmdFile, cmdDir) + # Execute the shell file on all nodes + cmdExecute = g_file.SHELL_CMD_DICT["execShellFile"] % cmdFile + (status, output) = self.sshTool.getSshStatusOutput(cmdExecute) + # Resolve the execution results of all nodes + for node in status.keys(): + if (status[node] != DefaultValue.SUCCESS): + failedNodes += "%s " % node + else: + succeedNodes += "%s " % node + # Some nodes fail to execute + if (failedNodes != "" and succeedNodes != ""): + GaussLog.printMessage( + "Failed to execute command on %s." % failedNodes) + GaussLog.printMessage( + "Successfully execute command on %s.\n" % succeedNodes) + # All nodes execute successfully + elif (failedNodes == ""): + GaussLog.printMessage( + "Successfully execute command on all nodes.\n") + # All nodes fail to execute + elif (succeedNodes == ""): + GaussLog.printMessage( + "Failed to execute command on all nodes.\n") + # Output Execution result + GaussLog.printMessage("Output:\n%s" % output) + # Delete the temporary shell file at all nodes + cmdFileRm = g_file.SHELL_CMD_DICT["deleteFile"]\ + % (cmdFile, cmdFile) + self.sshTool.executeCommand(cmdFileRm, "remove files") + + except Exception as e: + cmdFileRm = g_file.SHELL_CMD_DICT["deleteFile"]\ + % (cmdFile, cmdFile) + self.sshTool.executeCommand(cmdFileRm, "remove files") + GaussLog.exitWithError(str(e)) + + def run(self): + """ + function: Perform the whole process + input : NA + output: NA + """ + # parse cmd lines + self.parseCommandLine() + # init globals + self.initGlobal() + # execute command + self.executeCommand() + + +if __name__ == '__main__': + # main function + # Can not run as root + if (os.getuid() == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) + + try: + parallelSsh = ParallelSsh() + parallelSsh.run() + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/gs_sshexkey b/script/gs_sshexkey new file mode 100644 index 0000000..514f58a --- /dev/null +++ b/script/gs_sshexkey @@ -0,0 +1,1217 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_sshexkey is a utility to create SSH trust among nodes in +# a cluster. +############################################################################# + +import sys +import warnings + +warnings.simplefilter('ignore', DeprecationWarning) +sys.path.append(sys.path[0] + "/../lib") +import time +import os +import subprocess +import pwd +import grp +import socket +import getpass +import shutil +package_path = os.path.dirname(os.path.realpath(__file__)) +ld_path = package_path + "/gspylib/clib" +if 'LD_LIBRARY_PATH' not in os.environ: + os.environ['LD_LIBRARY_PATH'] = ld_path + os.execve(os.path.realpath(__file__), sys.argv, os.environ) +if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): + os.environ['LD_LIBRARY_PATH'] = \ + ld_path + ":" + os.environ['LD_LIBRARY_PATH'] + os.execve(os.path.realpath(__file__), sys.argv, os.environ) + +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ErrorCode import ErrorCode +from gspylib.threads.parallelTool import parallelTool +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib + +DefaultValue.doConfigForParamiko() +import paramiko + +HOSTS_MAPPING_FLAG = "#Gauss OM IP Hosts Mapping" +ipHostInfo = "" +# the tmp path +tmp_files = "" +# tmp file name +TMP_TRUST_FILE = "step_preinstall_file.dat" + + +class PrintOnScreen(): + """ + class about print on screen + """ + + def __init__(self): + """ + function : Constructor + input: NA + output: NA + """ + pass + + def log(self, msg): + """ + function : print log + input: msg: str + output: NA + """ + print(msg) + + def debug(self, msg): + """ + function : debug + input: msg: debug message string + output: NA + """ + pass + + def logExit(self, msg): + """ + function : print log and exit + input: msg: str + output: NA + """ + print(msg) + sys.exit(1) + + +class GaussCreateTrust(): + """ + class about create trust for user + """ + log_list = ["addStep", + "constant", + "Checking network information.", + "Successfully checked network information.", + "Creating the local key file.", + "Successfully created the local key files.", + "Appending local ID to authorized_keys.", + "Successfully appended local ID to authorized_keys.", + "Updating the known_hosts file.", + "Successfully updated the known_hosts file.", + "Appending authorized_key on the remote node.", + "Successfully appended authorized_key on all remote node.", + "Checking common authentication file content.", + "Successfully checked common authentication content.", + "Distributing SSH trust file to all node.", + "Successfully distributed SSH trust file to all node.", + "Verifying SSH trust on all hosts.", + "Successfully verified SSH trust on all hosts.", + ] + + def __init__(self): + """ + function : Constructor + input: NA + output: NA + """ + self.logger = None + self.hostFile = "" + self.hostList = [] + self.passwd = [] + self.logFile = "" + self.localHost = "" + self.flag = False + self.logger = None + self.localID = "" + self.user = pwd.getpwuid(os.getuid()).pw_name + self.group = grp.getgrgid(os.getgid()).gr_name + self.incorrectPasswdInfo = "" + self.failedToAppendInfo = "" + self.homeDir = os.path.expanduser("~" + self.user) + self.sshDir = "%s/.ssh" % self.homeDir + self.authorized_keys_fname = '%s/.ssh/authorized_keys' % self.homeDir + self.known_hosts_fname = '%s/.ssh/known_hosts' % self.homeDir + self.id_rsa_fname = '%s/.ssh/id_rsa' % self.homeDir + self.id_rsa_pub_fname = self.id_rsa_fname + '.pub' + self.skipHostnameSet = False + self.isKeyboardPassword = False + self.nodeduplicate = False + + def usage(self): + """ +gs_sshexkey is a utility to create SSH trust among nodes in a cluster. + +Usage: + gs_sshexkey -? | --help + gs_sshexkey -V | --version + gs_sshexkey -f HOSTFILE [-l LOGFILE] [--skip-hostname-set] + gs_sshexkey -h IPLIST [-l LOGFILE] [--skip-hostname-set] + +General options: + -f Host file containing the IP address of nodes. + -h Host ip list. Separate multiple nodes with commas(,). + -l Path of log file. + --skip-hostname-set Whether to skip hostname setting. + (The default value is set.) + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + paraObj = Parameter() + paraDict = paraObj.ParameterCommandLine("sshexkey") + if ("helpFlag" in paraDict.keys()): + self.usage() + sys.exit(0) + + if ("hostfile" in paraDict.keys()): + self.hostFile = paraDict.get("hostfile") + if ("nodename" in paraDict.keys()): + self.hostList = paraDict.get("nodename") + if ("logFile" in paraDict.keys()): + self.logFile = paraDict.get("logFile") + if ("skipHostnameSet" in paraDict.keys()): + self.skipHostnameSet = paraDict.get("skipHostnameSet") + if ("noDeduplicate" in paraDict.keys()): + self.nodeduplicate = paraDict.get("noDeduplicate") + + def checkParameter(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + # check required parameters + if len(self.hostList) == 0: + if (self.hostFile == ""): + self.usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'f' + ".") + if (not os.path.exists(self.hostFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] + % self.hostFile) + if (not os.path.isabs(self.hostFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] + % self.hostFile) + + # read host file to hostList + self.readHostFile() + + if (self.hostList == []): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % 'f' + " It cannot be empty.") + else: + for temp_host in self.hostList: + if not DefaultValue.isIpValid(temp_host): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % temp_host) + # check logfile + if (self.logFile != ""): + if (not os.path.isabs(self.logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] + % self.logFile) + + if (not self.passwd): + self.passwd = self.getUserPasswd() + self.isKeyboardPassword = True + + def logOut(self, log_index1, log_index2): + """ + function:logout + :param log_index1: index of the log + :param log_index2: indec of the log + :return: + """ + if (self.logFile != ""): + if (not os.path.exists(tmp_files)): + self.logger.log(GaussCreateTrust.log_list[log_index1], + GaussCreateTrust.log_list[log_index2]) + else: + self.logger.log(GaussCreateTrust.log_list[log_index1]) + else: + self.logger.log(GaussCreateTrust.log_list[log_index1]) + + def readHostFile(self): + """ + function: read host file to hostList + input : NA + output: NA + """ + inValidIp = [] + try: + with open(self.hostFile, "r") as f: + for readLine in f: + hostname = readLine.strip().split("\n")[0] + if hostname != "" and hostname not in self.hostList: + if not DefaultValue.isIpValid(hostname): + inValidIp.append(hostname) + continue + self.hostList.append(hostname) + if len(inValidIp) > 0: + GaussLog.exitWithError(ErrorCode.GAUSS_506["GAUSS_50603"] + + "The IP list is:%s." % inValidIp) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % "host file" + + " Error: \n%s" % str(e)) + + def getAllHostsName(self, ip): + """ + function: + Connect to all nodes ,then get all hostaname by threading + precondition: + 1.User's password is correct on each node + postcondition: + NA + input: ip + output:Dictionary ipHostname,key is IP and value is hostname + hideninfo:NA + """ + + ipHostname = {} + try: + ssh = paramiko.Transport((ip, 22)) + except Exception as e: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % ip + + " Error: \n%s" % str(e)) + try: + ssh.connect(username=self.user, password=self.passwd[0]) + except Exception as e: + ssh.close() + raise Exception(ErrorCode.GAUSS_503["GAUSS_50306"] % ip) + + check_channel = ssh.open_session() + cmd = "cd" + check_channel.exec_command(cmd) + env_msg = check_channel.recv_stderr(9999).decode() + while True: + channel_read = check_channel.recv(9999).decode().strip() + if (len(channel_read) != 0): + env_msg += str(channel_read) + else: + break + if (env_msg != ""): + ipHostname["Node[%s]" % ip] = "Output: [" + env_msg \ + + " ] print by /etc/profile or" \ + " ~/.bashrc, please check it." + return ipHostname + + channel = ssh.open_session() + cmd = "hostname" + channel.exec_command(cmd) + hostname = channel.recv(9999).decode().strip() + ipHostname[ip] = hostname + ssh.close() + return ipHostname + + def verifyPasswd(self, ssh, pswd=None): + try: + ssh.connect(username=self.user, password=pswd) + return True + except Exception: + ssh.close() + return False + + def parallelGetHosts(self, sshIps): + parallelResult = {} + ipHostname = parallelTool.parallelExecute(self.getAllHostsName, sshIps) + + err_msg = "" + for i in ipHostname: + for (key, value) in i.items(): + if (key.find("Node") >= 0): + err_msg += str(i) + else: + parallelResult[key] = value + if (len(err_msg) > 0): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51808"] % err_msg) + return parallelResult + + def serialGetHosts(self, sshIps): + serialResult = {} + invalidIP = "" + boolInvalidIp = False + for sshIp in sshIps: + isPasswdOK = False + for pswd in self.passwd: + try: + ssh = paramiko.Transport((sshIp, 22)) + except Exception as e: + self.logger.debug(str(e)) + invalidIP += "Incorrect IP address: %s.\n" % sshIp + boolInvalidIp = True + break + + isPasswdOK = self.verifyPasswd(ssh, pswd) + if (isPasswdOK): + break + + if (boolInvalidIp): + boolInvalidIp = False + continue + + if (not isPasswdOK and self.isKeyboardPassword): + GaussLog.printMessage("Please enter password for current" + " user[%s] on the node[%s]." + % (self.user, sshIp)) + # Try entering the password 3 times interactively + for i in range(3): + KeyboardPassword = getpass.getpass() + DefaultValue.checkPasswordVaild(KeyboardPassword) + ssh = paramiko.Transport((sshIp, 22)) + isPasswdOK = self.verifyPasswd(ssh, KeyboardPassword) + if (isPasswdOK): + self.passwd.append(KeyboardPassword) + break + else: + continue + # if isKeyboardPassword is true, 3 times after the password is + # also wrong to throw an unusual exit + if (not isPasswdOK): + raise Exception(ErrorCode.GAUSS_503["GAUSS_50306"] % sshIp) + + cmd = "cd" + check_channel = ssh.open_session() + check_channel.exec_command(cmd) + check_result = check_channel.recv_stderr(9999).decode() + while True: + channel_read = check_channel.recv(9999).decode() + if (len(channel_read) != 0): + check_result += str(channel_read) + else: + break + + if (check_result != ""): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51808"] + % check_result + "Please check %s node" + " /etc/profile or ~/.bashrc" + % sshIp) + else: + cmd = "hostname" + channel = ssh.open_session() + channel.exec_command(cmd) + while True: + hostname = channel.recv(9999).decode().strip() + if (len(hostname) != 0): + serialResult[sshIp] = hostname + else: + break + ssh.close() + + if (invalidIP): + raise Exception( + ErrorCode.GAUSS_511["GAUSS_51101"] % invalidIP.rstrip("\n")) + + return serialResult + + def getAllHosts(self, sshIps): + """ + function: + Connect to all nodes ,then get all hostaname + precondition: + 1.User's password is correct on each node + postcondition: + NA + input: sshIps,username,passwd + output:Dictionary ipHostname,key is IP and value is hostname + hideninfo:NA + """ + if (self.logFile != ""): + if (not os.path.exists(tmp_files)): + self.logger.debug("Get hostnames for all nodes.", "addStep") + else: + self.logger.debug("Get hostnames for all nodes.") + if (len(self.passwd) == 0): + self.isKeyboardPassword = True + GaussLog.printMessage("Please enter password for current user[%s]." + % self.user) + passwd = getpass.getpass() + self.passwd.append(passwd) + + if (len(self.passwd) == 1): + try: + result = self.parallelGetHosts(sshIps) + except Exception as e: + if (self.isKeyboardPassword and str(e).startswith( + "[GAUSS-50306] : The password of")): + GaussLog.printMessage( + "Notice :The password of some nodes is incorrect.") + result = self.serialGetHosts(sshIps) + else: + raise Exception(str(e)) + else: + result = self.serialGetHosts(sshIps) + if (self.logFile != ""): + if (not os.path.exists(tmp_files)): + self.logger.debug("Successfully get hostnames for all nodes.", + "constant") + else: + self.logger.debug("Successfully get hostnames for all nodes.") + return result + + def writeLocalHosts(self, result): + """ + function: + Write hostname and Ip into /etc/hosts when there's not the same one + in /etc/hosts file + precondition: + NA + postcondition: + NA + input: Dictionary result,key is IP and value is hostname + output: NA + hideninfo:NA + """ + if (self.logFile != ""): + if (not os.path.exists(tmp_files)): + self.logger.debug( + "Write local hostname and Ip into /etc/hosts.", "addStep") + else: + self.logger.debug( + "Write local hostname and Ip into /etc/hosts.") + hostIPInfo = "" + if (os.getuid() == 0): + tmpHostIpName = "./tmp_hostsiphostname_%d" % os.getpid() + # Check if /etc/hosts exists. + if (not os.path.exists("/etc/hosts")): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51221"] + + " Error: \nThe /etc/hosts does not exist.") + cmd = "grep -v '" + HOSTS_MAPPING_FLAG + "' /etc/hosts| grep -v '^$'" + (status, output) = subprocess.getstatusoutput(cmd) + try: + g_file.createFile(tmpHostIpName) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, tmpHostIpName) + g_file.writeFile(tmpHostIpName, [output]) + shutil.copyfile(tmpHostIpName, '/etc/hosts') + g_file.removeFile(tmpHostIpName) + except Exception as e: + if os.path.exists(tmpHostIpName): + g_file.removeFile(tmpHostIpName) + raise Exception(str(e)) + if not self.nodeduplicate: + ipCompare = [] + for line in output.split("\n"): + if line: + ipCompare.append(line.replace("\t", " ").strip().split(' ')[0]) + tmpResult = {} + for s_key in list(result.keys()): + if s_key not in ipCompare: + tmpResult[s_key] = result[s_key] + for (key, value) in tmpResult.items(): + hostIPInfo += '%s %s %s\n' % (key, value, HOSTS_MAPPING_FLAG) + else: + for (key, value) in result.items(): + hostIPInfo += '%s %s %s\n' % (key, value, HOSTS_MAPPING_FLAG) + hostIPInfo = hostIPInfo[:-1] + ipInfoList = [hostIPInfo] + g_file.writeFile("/etc/hosts", ipInfoList) + if (self.logFile != ""): + if (not os.path.exists(tmp_files)): + self.logger.debug( + "Successfully write local hostname and Ip into " + "/etc/hosts.", + "constant") + else: + self.logger.debug( + "Successfully write local hostname and Ip into " + "/etc/hosts.") + + def writeRemoteHostName(self, ip): + """ + function: + Write hostname and Ip into /etc/hosts when there's not the same one + in /etc/hosts file by threading + precondition: + NA + postcondition: + NA + input: ip + output: NA + hideninfo:NA + """ + writeResult = [] + result = {} + tmpHostIpName = "./tmp_hostsiphostname_%d_%s" % (os.getpid(), ip) + username = pwd.getpwuid(os.getuid()).pw_name + global ipHostInfo + try: + ssh = paramiko.Transport((ip, 22)) + except Exception as e: + raise Exception(ErrorCode.GAUSS_511["GAUSS_51107"] + + " Error: \n%s" % str(e)) + try: + ssh.connect(username=username, password=self.passwd[0]) + except Exception as e: + ssh.close() + raise Exception(ErrorCode.GAUSS_503["GAUSS_50317"] + + " Error: \n%s" % str(e)) + cmd = "grep -v '%s' %s | grep -v '^$'" \ + % (" #Gauss.* IP Hosts Mapping", '/etc/hosts') + channel = ssh.open_session() + channel.exec_command(cmd) + ipHosts = channel.recv(9999).decode().strip() + errInfo = channel.recv_stderr(9999).decode().strip() + cmd = "echo \"%s\" > %s ; cp %s %s && rm -rf %s" \ + % (ipHosts, tmpHostIpName, tmpHostIpName, '/etc/hosts', tmpHostIpName) + channel = ssh.open_session() + channel.exec_command(cmd) + ipHosts1 = channel.recv(9999).decode().strip() + errInfo1 = channel.recv_stderr(9999).decode().strip() + if ((errInfo + errInfo1)): + writeResult.append(errInfo + errInfo1) + else: + if (not ipHosts1): + if not self.nodeduplicate: + ipCompare = [] + for line in ipHosts.split("\n"): + if line: + ipCompare.append(line.replace("\t", " ").strip().split(' ')[0]) + tmpIpHostInfo = "" + ipArray = ipHostInfo.split("\n") + for info in ipArray: + hostname = info.split(' ')[0] + if hostname not in ipCompare: + tmpIpHostInfo += info + "\n" + cmd = "echo '%s' >> /etc/hosts" % tmpIpHostInfo + else: + cmd = "echo '%s' >> /etc/hosts" % ipHostInfo + channel = ssh.open_session() + channel.exec_command(cmd) + errInfo = channel.recv_stderr(9999).decode().strip() + if (errInfo): + writeResult.append(errInfo) + if channel: + channel.close() + result[ip] = writeResult + if (len(writeResult) > 0): + return (False, result) + else: + return (True, result) + + def writeRemoteHosts(self, result, username, rootPasswd): + """ + function: + Write hostname and Ip into /etc/hosts when there's not the same one + in /etc/hosts file + precondition: + NA + postcondition: + NA + input: Dictionary result,key is IP and value is hostname + rootPasswd + output: NA + hideninfo:NA + """ + if (self.logFile != ""): + if (not os.path.exists(tmp_files)): + self.logger.debug( + "Write remote hostname and Ip into /etc/hosts.", "addStep") + else: + self.logger.debug( + "Write remote hostname and Ip into /etc/hosts.") + global ipHostInfo + boolInvalidIp = False + ipHostInfo = "" + if (os.getuid() == 0): + writeResult = [] + tmpHostIpName = "./tmp_hostsiphostname_%d" % os.getpid() + + if (len(rootPasswd) == 1): + result1 = {} + for (key, value) in result.items(): + ipHostInfo += '%s %s %s\n' % (key, value, + HOSTS_MAPPING_FLAG) + if (value != self.localHost): + if (not value in result1.keys()): + result1[value] = key + + sshIps = result1.keys() + ipHostInfo = ipHostInfo[:-1] + if (sshIps): + ipRemoteHostname = parallelTool.parallelExecute( + self.writeRemoteHostName, sshIps) + errorMsg = "" + for (key, value) in ipRemoteHostname: + if (not key): + errorMsg = errorMsg + '\n' + str(value) + if (errorMsg != ""): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51221"] + + " Error: %s" % errorMsg) + else: + for (key, value) in result.items(): + if (value == self.localHost): + continue + for pswd in rootPasswd: + try: + ssh = paramiko.Transport((key, 22)) + except Exception as e: + self.logger.debug(str(e)) + boolInvalidIp = True + break + try: + ssh.connect(username=username, password=pswd) + break + except Exception as e: + self.logger.debug(str(e)) + continue + if (boolInvalidIp): + boolInvalidIp = False + continue + cmd = "grep -v '%s' %s | grep -v '^$'" % ( + " #Gauss.* IP Hosts Mapping", '/etc/hosts') + channel = ssh.open_session() + channel.exec_command(cmd) + ipHosts = channel.recv(9999).decode().strip() + errInfo = channel.recv_stderr(9999).decode().strip() + cmd = "echo \"%s\" > %s ; cp %s %s && rm -rf %s" % ( + ipHosts, tmpHostIpName, tmpHostIpName, + '/etc/hosts', tmpHostIpName) + channel = ssh.open_session() + channel.exec_command(cmd) + ipHosts1 = channel.recv(9999).decode().strip() + errInfo1 = channel.recv_stderr(9999).decode().strip() + + if (errInfo + errInfo1): + writeResult.append(errInfo + errInfo1) + else: + if (not ipHosts1): + ipHostInfo = "" + if not self.nodeduplicate: + ipCompare = [] + for line in ipHosts.split("\n"): + if line: + ipCompare.append(line.replace("\t", " ").strip().split(' ')[0]) + for (key1, value1) in result.items(): + if key1 not in ipCompare: + ipHostInfo += '%s %s %s\n' % ( + key1, value1, HOSTS_MAPPING_FLAG) + else: + for (key1, value1) in result.items(): + ipHostInfo += '%s %s %s\n' % ( + key1, value1, HOSTS_MAPPING_FLAG) + ipHostInfo = ipHostInfo[:-1] + cmd = "echo '%s' >> /etc/hosts" % ipHostInfo + channel = ssh.open_session() + channel.exec_command(cmd) + errInfo = channel.recv_stderr( + 9999).decode().strip() + if (errInfo): + writeResult.append(errInfo) + + if channel: + channel.close() + + if (len(writeResult) > 0): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51221"] + + " Error: \n%s" % writeResult) + if (self.logFile != ""): + if (not os.path.exists(tmp_files)): + self.logger.debug( + "Successfully write remote hostname and Ip into " + "/etc/hosts.", + "constant") + else: + self.logger.debug( + "Successfully write remote hostname and Ip into " + "/etc/hosts.") + + def initLogger(self): + """ + function: Init logger + input : NA + output: NA + """ + if (self.logFile != ""): + self.logger = GaussLog(self.logFile, "gs_sshexkey") + else: + self.logger = PrintOnScreen() + + def checkNetworkInfo(self): + """ + function: check local node to other node Network Information + input : NA + output: NA + """ + self.logOut(2, 0) + try: + netWorkList = DefaultValue.checkIsPing(self.hostList) + if not netWorkList: + self.logger.log("All nodes in the network are Normal.") + else: + self.logger.logExit(ErrorCode.GAUSS_506["GAUSS_50600"] + + "The IP list is:%s." % netWorkList) + except Exception as e: + self.logger.logExit(str(e)) + self.logOut(3, 1) + + def run(self): + """ + function: Do create SSH trust + input : NA + output: NA + """ + self.parseCommandLine() + self.checkParameter() + self.localHost = socket.gethostname() + + self.initLogger() + global tmp_files + tmp_files = "/tmp/%s" % TMP_TRUST_FILE + if (self.logFile != ""): + if (not os.path.exists(tmp_files)): + self.logger.debug( + "gs_sshexkey execution takes %s steps in total" % + ClusterCommand.countTotalSteps("gs_sshexkey", "", + self.skipHostnameSet)) + Ips = [] + Ips.extend(self.hostList) + result = self.getAllHosts(Ips) + self.checkNetworkInfo() + + if not self.skipHostnameSet: + self.writeLocalHosts(result) + self.writeRemoteHosts(result, self.user, self.passwd) + + self.logger.log("Creating SSH trust.") + try: + self.localID = self.createPublicPrivateKeyFile() + self.addLocalAuthorized() + self.updateKnow_hostsFile(result) + self.addRemoteAuthorization() + self.determinePublicAuthorityFile() + self.synchronizationLicenseFile() + self.verifyTrust() + self.logger.log("Successfully created SSH trust.") + except Exception as e: + self.logger.logExit(str(e)) + + def createPublicPrivateKeyFile(self): + """ + function: create local public private key file + input : NA + output: NA + """ + self.logOut(4, 0) + + if not os.path.exists(self.id_rsa_pub_fname): + cmd = 'ssh-keygen -t rsa -N \"\" -f ~/.ssh/id_rsa < /dev/null' + cmd += "&& chmod %s %s %s" % (DefaultValue.KEY_FILE_MODE, + self.id_rsa_fname, + self.id_rsa_pub_fname) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.log("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_511["GAUSS_51108"] + + " Error:\n%s" % output) + try: + try: + with open(self.id_rsa_pub_fname, 'r') as f: + return f.readline().strip() + except IOError as e: + self.logger.debug(str(e)) + raise Exception(ErrorCode.GAUSS_511["GAUSS_51108"] + + " Unable to read the generated file." + + self.id_rsa_pub_fname) + finally: + self.logOut(5, 1) + + def addLocalAuthorized(self): + """ + function: append the local id_rsa.pub value provided to authorized_keys + input : NA + output: NA + """ + self.logOut(6, 0) + g_file.createFileInSafeMode(self.authorized_keys_fname) + with open(self.authorized_keys_fname, 'a+') as f: + for line in f: + if line.strip() == self.localID: + # The localID is already in authorizedKeys; no need to add + return + f.write(self.localID) + f.write('\n') + self.logOut(7, 1) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + self.authorized_keys_fname) + + def checkAuthentication(self, hostname): + """ + function: Ensure the proper password-less access to the remote host. + input : hostname + output: True/False, hostname + """ + cmd = 'ssh -n %s %s true' % (DefaultValue.SSH_OPTION, hostname) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.debug("The cmd is %s " % cmd) + self.logger.debug( + "Failed to check authentication. Hostname:%s. Error: \n%s" + % (hostname, output)) + return (False, hostname) + return (True, hostname) + + def updateKnow_hostsFile(self, result): + """ + function: keyscan all hosts and update known_hosts file + input : result + output: NA + """ + self.logOut(8, 0) + hostnameList = [] + hostnameList.extend(self.hostList) + for (key, value) in result.items(): + hostnameList.append(value) + for hostname in hostnameList: + cmd = 'ssh-keyscan -t rsa %s >> %s ' % (hostname, + self.known_hosts_fname) + cmd += "&& chmod %s %s" % (DefaultValue.KEY_FILE_MODE, + self.known_hosts_fname) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + (status, output) = self.checkAuthentication(self.localHost) + if not status: + raise Exception( + ErrorCode.GAUSS_511["GAUSS_51100"] % self.localHost) + self.logOut(9, 1) + + def tryParamikoConnect(self, hostname, client, pswd=None, silence=False): + """ + function: try paramiko connect + input : hostname, client, pswd, silence + output: True/False + """ + try: + client.connect(hostname, password=pswd, allow_agent=False, + look_for_keys=False) + return True + except paramiko.AuthenticationException as e: + if not silence: + self.logger.debug("Incorrect password. Node: %s." % hostname + + " Error:\n%s" % str(e)) + client.close() + return False + except Exception as e: + if not silence: + self.logger.debug('[SSHException %s] %s' % (hostname, str(e))) + client.close() + raise Exception(str(e)) + + def addRemoteAuthorization(self): + """ + function: Send local ID to remote over SSH, and append to + authorized_key + input : NA + output: NA + """ + self.logOut(10, 0) + try: + parallelTool.parallelExecute(self.sendRemoteAuthorization, + self.hostList) + if (self.incorrectPasswdInfo != ""): + self.logger.logExit(ErrorCode.GAUSS_511["GAUSS_51101"] + % (self.incorrectPasswdInfo.rstrip("\n"))) + if (self.failedToAppendInfo != ""): + self.logger.logExit(ErrorCode.GAUSS_511["GAUSS_51101"] + % (self.failedToAppendInfo.rstrip("\n"))) + except Exception as e: + self.logger.logExit(ErrorCode.GAUSS_511["GAUSS_51111"] + + " Error:%s." % str(e)) + self.logOut(11, 1) + + def sendRemoteAuthorization(self, hostname): + """ + function: send remote authorization + input : hostname + output: NA + """ + if (hostname != self.localHost): + p = None + cin = cout = cerr = None + try: + # ssh Remote Connection other node + p = paramiko.SSHClient() + p.load_system_host_keys() + ok = self.tryParamikoConnect(hostname, p, self.passwd[0], + silence=True) + if not ok: + for pswd in self.passwd[1:]: + ok = self.tryParamikoConnect(hostname, p, pswd, + silence=True) + if ok: + break + if not ok: + self.incorrectPasswdInfo += "Without this node[%s] of " \ + "the correct password.\n" % \ + hostname + return + # Create .ssh directory and ensure content meets permission + # requirements for password-less SSH + cmd = ('mkdir -p .ssh; ' + "chown -R %s:%s %s; " % + (self.user, self.group, self.sshDir) + 'chmod %s .ssh; ' + % DefaultValue.KEY_DIRECTORY_MODE + + 'touch .ssh/authorized_keys; ' + + 'touch .ssh/known_hosts; ' + + 'chmod %s .ssh/auth* .ssh/id* .ssh/known_hosts; ' + % DefaultValue.KEY_FILE_MODE) + (cin, cout, cerr) = p.exec_command(cmd) + cin.close() + cout.close() + cerr.close() + + # Append the ID to authorized_keys; + cnt = 0 + cmd = 'echo \"%s\" >> .ssh/authorized_keys && echo ok ok ok' \ + % self.localID + (cin, cout, cerr) = p.exec_command(cmd) + cin.close() + # readline will read other msg. + line = cout.read().decode() + while (line.find("ok ok ok") < 0): + time.sleep(cnt * 2) + cmd = 'echo \"%s\" >> .ssh/authorized_keys && echo ok ok ' \ + 'ok' % self.localID + (cin, cout, cerr) = p.exec_command(cmd) + cin.close() + cnt += 1 + line = cout.readline() + if (cnt >= 3): + break + if (line.find("ok ok ok") < 0): + continue + else: + break + + if (line.find("ok ok ok") < 0): + self.failedToAppendInfo += "...send to %s\nFailed to " \ + "append local ID to " \ + "authorized_keys on remote " \ + "node %s.\n" % ( + hostname, hostname) + return + cout.close() + cerr.close() + self.logger.debug( + "Send to %s\nSuccessfully appended authorized_key on " + "remote node %s." % (hostname, hostname)) + finally: + if cin: + cin.close() + if cout: + cout.close() + if cerr: + cerr.close() + if p: + p.close() + + def determinePublicAuthorityFile(self): + ''' + function: determine common authentication file content + input : NA + output: NA + ''' + self.logOut(12, 0) + # eliminate duplicates in known_hosts file + try: + tab = self.readKnownHosts() + self.writeKnownHosts(tab) + except IOError as e: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50230"] + % "known hosts file" + " Error:\n%s" % str(e)) + + # eliminate duploicates in authorized_keys file + try: + tab = self.readAuthorizedKeys() + self.writeAuthorizedKeys(tab) + except IOError as e: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50230"] + % "authorized keys file" + " Error:\n%s" + % str(e)) + self.logOut(13, 1) + + def addRemoteID(self, tab, line): + """ + function: add remote node id + input : tab, line + output: True/False + """ + IDKey = line.strip().split() + if not (len(IDKey) == 3 and line[0] != '#'): + return False + tab[IDKey[2]] = line + return True + + def readAuthorizedKeys(self, tab=None, keysFile=None): + """ + function: read authorized keys + input : tab, keysFile + output: tab + """ + if not keysFile: + keysFile = self.authorized_keys_fname + if not tab: + tab = {} + with open(keysFile, 'r') as f: + for line in f: + self.addRemoteID(tab, line) + return tab + + def writeAuthorizedKeys(self, tab, keysFile=None): + """ + function: write authorized keys + input : tab, keysFile + output: True/False + """ + if not keysFile: + keysFile = self.authorized_keys_fname + with open(keysFile, 'w') as f: + for IDKey in tab: + f.write(tab[IDKey]) + + def addKnownHost(self, tab, line): + """ + function: add known host + input : tab, line + output: True/False + """ + key = line.strip().split() + if not (len(key) == 3 and line[0] != '#'): + return False + tab[key[0]] = line + return True + + def readKnownHosts(self, tab=None, hostsFile=None): + """ + function: read known host + input : tab, hostsFile + output: tab + """ + if not hostsFile: + hostsFile = self.known_hosts_fname + if not tab: + tab = {} + with open(hostsFile, 'r') as f: + for line in f: + self.addKnownHost(tab, line) + return tab + + def writeKnownHosts(self, tab, hostsFile=None): + """ + function: write known host + input : tab, hostsFile + output: NA + """ + if not hostsFile: + hostsFile = self.known_hosts_fname + with open(hostsFile, 'w') as f: + for key in tab: + f.write(tab[key]) + + def sendTrustFile(self, hostname): + ''' + function: Set or update the authentication files on hostname + input : hostname + output: NA + ''' + cmd = ('scp -q -o "BatchMode yes" -o "NumberOfPasswordPrompts ' + '0" ' + '%s %s %s %s %s:.ssh/' % ( + self.authorized_keys_fname, self.known_hosts_fname, + self.id_rsa_fname, self.id_rsa_pub_fname, hostname)) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50223"] + % "the authentication" + " Node:%s. Error:\n%s." + % (hostname, output) + "The cmd is %s " % cmd) + + def synchronizationLicenseFile(self): + ''' + function: Distribution of documents through concurrent execution + ThreadPool. + input : NA + output: NA + ''' + self.logOut(14, 0) + try: + parallelTool.parallelExecute(self.sendTrustFile, self.hostList) + except Exception as e: + self.logger.logExit(str(e)) + self.logOut(15, 1) + + def verifyTrust(self): + """ + function: Verify creating SSH trust is successful + input : NA + output: NA + """ + self.logOut(16, 0) + try: + results = parallelTool.parallelExecute(self.checkAuthentication, + self.hostList) + hostnames = "" + for (key, value) in results: + if (not key): + hostnames = hostnames + ',' + value + if (hostnames != ""): + raise Exception(ErrorCode.GAUSS_511["GAUSS_51100"] + % hostnames.lstrip(',')) + except Exception as e: + self.logger.logExit(str(e)) + self.logOut(17, 1) + + def getUserPasswd(self): + """ + function: get user passwd from cache + input: NA + output: NA + """ + user_passwd = [] + if (sys.stdin.isatty()): + GaussLog.printMessage( + "Please enter password for current user[%s]." % self.user) + user_passwd.append(getpass.getpass()) + else: + user_passwd.append(sys.stdin.readline().strip('\n')) + + if (not user_passwd): + GaussLog.exitWithError("Password should not be empty") + + return user_passwd + + +if __name__ == '__main__': + # main function + createTrust = None + try: + createTrust = GaussCreateTrust() + createTrust.run() + except Exception as e: + if str(e).startswith("[GAUSS-"): + GaussLog.exitWithError(str(e)) + else: + GaussLog.exitWithError("[GAUSS-50100]:"+str(e)) + + sys.exit(0) diff --git a/script/gs_uninstall b/script/gs_uninstall new file mode 100644 index 0000000..fe0cf78 --- /dev/null +++ b/script/gs_uninstall @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_uninstall is a utility to uninstall a Gauss200 server. +############################################################################# + +import subprocess +import os +import sys +import pwd + +sys.path.append(sys.path[0] + "/../") + +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.os.gsfile import g_file +from impl.uninstall.OLAP.UninstallImplOLAP import UninstallImplOLAP + +##################################################### +# Ation type +##################################################### +ACTION_CLEAN_TEMP_DIR = "clean_tmp_dir" + + +class Uninstall(ParallelBaseOM): + """ + class about cmd options + """ + + def __init__(self): + """ + function: init function + """ + ParallelBaseOM.__init__(self) + self.cleanInstance = False + + def usage(self): + """ +gs_uninstall is a utility to uninstall a cluster. + +Usage: + gs_uninstall -? | --help + gs_uninstall -V | --version + gs_uninstall [--delete-data] [-L] [-l LOGFILE] + +General options: + --delete-data Clean up instance data files. + -L Only uninstall local nodes. + -l Path of log file. + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + function: Parse command line and save to global variable + """ + ParaObj = Parameter() + # Parse the parameter with uninstall + ParaDict = ParaObj.ParameterCommandLine("uninstall") + + # check if helpFlag exists + if (ParaDict.__contains__("helpFlag")): + self.usage() + sys.exit(0) + # check if -l parameter exists + if (ParaDict.__contains__("logFile")): + self.logFile = ParaDict.get("logFile") + # check if -L parameter exists + if (ParaDict.__contains__("localMode")): + self.localMode = ParaDict.get("localMode") + # check if need clean instances + if (ParaDict.__contains__("cleanInstance")): + self.cleanInstance = True + + def checkParameter(self): + """ + function: Check parameter from command line + """ + # check user + self.user = pwd.getpwuid(os.getuid()).pw_name + # if no user info, throw error + if (self.user == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50308"]) + # else check user + try: + DefaultValue.checkUser(self.user, False) + except Exception as e: + GaussLog.exitWithError(str(e)) + + # check log file + if (self.logFile == ""): + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.UNINSTALL_LOG_FILE, self.user, "") + if (not os.path.isabs(self.logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + # get user env info + self.mpprcFile = DefaultValue.getMpprcFile() + + def initGlobals(self): + """ + function: init global parameters + """ + # init log file for uninstall + self.initLogger("gs_uninstall") + try: + # OLAP + self.initClusterInfoFromStaticFile(self.user) + # Initialize the self.sshTool variable + self.initSshTool(self.clusterInfo.getClusterNodeNames(), + DefaultValue.TIMEOUT_PSSH_UNINSTALL) + except Exception as e: + self.logger.logExit(str(e)) + + def checkLogFilePath(self): + """ + function: Check log file path + """ + clusterPath = [] + try: + # get tool path + clusterPath.append(DefaultValue.getClusterToolPath(self.user)) + # get tmp path + tmpDir = DefaultValue.getTmpDirFromEnv() + clusterPath.append(tmpDir) + # get cluster path + hostName = DefaultValue.GetHostIpOrName() + dirs = self.clusterInfo.getClusterDirectorys(hostName, False) + # loop all cluster path + for checkdir in dirs.values(): + clusterPath.extend(checkdir) + self.logger.debug("Cluster paths %s." % clusterPath) + + # check directory + g_file.checkIsInDirectory(self.logFile, clusterPath) + except Exception as e: + self.logger.logExit(str(e)) + + +if __name__ == '__main__': + """ + main function + """ + # check if user is root + if (os.getuid() == 0): + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) + + try: + # Objectize class + uninstall = Uninstall() + uninstall.parseCommandLine() + uninstall.checkParameter() + uninstall.initGlobals() + + if (uninstall.xmlFile): + pass + impl = UninstallImplOLAP(uninstall) + + # Perform the whole extand process + impl.run() + + except Exception as e: + GaussLog.exitWithError(str(e)) diff --git a/script/gs_upgradectl b/script/gs_upgradectl new file mode 100644 index 0000000..7c4edd8 --- /dev/null +++ b/script/gs_upgradectl @@ -0,0 +1,292 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_upgradectl is a utility to upgrade a Gauss200 application. +# +# gs_upgradectl is a upgrade framework,which control the upgrade process. +# it contains binary upgrade, in-place upgrade and on-line binary upgrade. +# +# binary upgrade: which includes stopping old cluster, +# replacing binary and starting +# new cluster,only used for no database objects changed between old cluster +# and new cluster. +# +# on-line binary upgrade: rolling upgrade, upgrade standby instances +# firstly, switch over, +# and then upgrade the master instances. only used for no database objects +# changed +# between old cluster and new cluster now. +# +# in-place upgrade: which includes binary upgrade and update database +# mete-data(system tables, +# system views, functions, and so on) ,used for some database objects had +# been changed +# between old cluster and new cluster. +############################################################################# + +import os +import sys +import pwd +import grp +import copy + +from gspylib.common.Common import DefaultValue +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParallelBaseOM import ParallelBaseOM +from gspylib.threads.SshTool import SshTool +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +import impl.upgrade.UpgradeConst as Const +from impl.upgrade.OLAP.UpgradeImplOLAP import UpgradeImplOLAP + + +class Upgrade(ParallelBaseOM): + """ + The class about upgrade + """ + + def __init__(self): + ParallelBaseOM.__init__(self) + self.oldClusterInfo = "" + self.oldVersion = "" + # the directory that when do binary upgrade the information store + self.upgradeBackupPath = "" + self.userProfile = "" + self.tmpDir = "" + self.newClusterAppPath = "" + self.oldClusterAppPath = "" + self.clusterNodes = [] + self.nodesNum = -1 + self.nodeNames = [] + ##static parameter + self.binTarName = "binary_%s.tar" % DefaultValue.GetHostIpOrName() + self.rollback = False + self.is_inplace_upgrade = True + self.is_grey_upgrade = False + self.guc_paras = {} + self.newClusterVersion = None + self.newClusterNumber = None + self.oldclusterVersion = None + self.oldClusterNumber = None + self.forceRollback = False + self.upgrade_remain = False + + def usage(self): + """ +gs_upgradectl is a utility to upgrade a cluster. + +Usage: + gs_upgradectl -? | --help + gs_upgradectl -V | --version + gs_upgradectl -t chose-strategy [-l LOGFILE] + gs_upgradectl -t commit-upgrade -X XMLFILE [-l LOGFILE] + + gs_upgradectl -t auto-upgrade -X XMLFILE [-l LOGFILE] [--grey] + gs_upgradectl -t auto-rollback -X XMLFILE [-l LOGFILE] [--force] + +General options: + -?, --help Show help information for this utility, + and exit the command line mode. + -V, --version Show version information. + -t Subcommand for upgrade. It can be + chose-strategy, auto-upgrade, auto-rollback, + commit-upgrade. + -X Path of the XML configuration file of the + later version cluster. + --force Force to rollback when cluster status is + not normal + --grey Use grey-binary-upgrade + + """ + + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + Parse command line and save to global variable + """ + # Resolves incoming parameters + ParaObj = Parameter() + ParaDict = ParaObj.ParameterCommandLine("upgradectl") + if "helpFlag" in ParaDict.keys(): + self.usage() + sys.exit(0) + + # get action information + if "action" in ParaDict.keys(): + self.action = ParaDict.get("action") + if "confFile" in ParaDict.keys(): + self.xmlFile = ParaDict.get("confFile") + # get logFile information + if "logFile" in ParaDict.keys(): + self.logFile = ParaDict.get("logFile") + if "grey" in ParaDict.keys(): + self.is_grey_upgrade = True + self.is_inplace_upgrade = False + if "force" in ParaDict.keys(): + self.forceRollback = True + + def checkUser(self): + """ + function: check user + """ + # check user + # it will do more check about user after get the cluster config info + # get user information + self.user = pwd.getpwuid(os.getuid()).pw_name + # get group information + self.group = grp.getgrgid(pwd.getpwnam(self.user).pw_gid).gr_name + # if the user or group is null, exit + if self.user == "" or self.group == "": + raise Exception(ErrorCode.GAUSS_503["GAUSS_50308"]) + # if the user or group is 'root', exit + if self.user == "root" or self.group == "root": + raise Exception(ErrorCode.GAUSS_501["GAUSS_50105"]) + + # we must make sure the env 'GAUSSHOME', 'GS_CLUSTER_NAME', + # 'GAUSS_ENV' exists + if (DefaultValue.getEnvironmentParameterValue("GAUSSHOME", + self.user) == ""): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") + if (DefaultValue.getEnvironmentParameterValue("GS_CLUSTER_NAME", + self.user) == ""): + raise Exception( + ErrorCode.GAUSS_518["GAUSS_51800"] % "$GS_CLUSTER_NAME") + if (DefaultValue.getEnvironmentParameterValue("GAUSS_ENV", + self.user) == ""): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSS_ENV") + + # depending on the environment variable GPHOME, access to the python + GPHOME = DefaultValue.getEnv(DefaultValue.TOOL_PATH_ENV) + if (GPHOME == None or GPHOME == ""): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GPHOME") + + def checkParameter(self): + """ + function: Check parameter from command line + """ + if self.action == "": + raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % "t" + ".") + # when we do auto-upgrade, auto-rollback or commit-upgrade, + # we must incoming '-X' and make sure the xml file exists. + if self.action != Const.ACTION_CHOSE_STRATEGY: + if self.xmlFile == "": + raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".") + if not os.path.exists(self.xmlFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + self.xmlFile) + # check mpprc file path + # get mpprcFile by MPPDB_ENV_SEPARATE_PATH. Even if the return value + # is "" or None, no need to pay attention + self.mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + + # make sure which env file we use + # If self.mpprcFile is not "" and None, return self.mpprcFile; else + # return '~/.bashrc' + self.userProfile = DefaultValue.getMpprcFile() + self.checkUser() + + # check log file + if self.logFile == "": + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.UPGRADE_LOG_FILE, self.user, "", "") + if not os.path.isabs(self.logFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + self.initLogger(self.action) + + def initGlobalInfos(self): + """ + function: init global infos + """ + self.logger.debug("Init global infos") + + # init cluster info + if self.xmlFile: + self.initClusterInfo() + else: + self.initClusterInfoFromStaticFile(self.user) + # init clusterNodes + for dbNode in self.clusterInfo.dbNodes: + self.clusterNodes.append(dbNode.name) + if len(self.clusterNodes) == 0: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51201"]) + for nodeName in self.nodeNames: + if nodeName not in self.clusterNodes: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % nodeName) + self.logger.debug("Successfully init global infos") + + def distributeFileToSpecialNode(self, file, destDir, hostList): + """ + distribute file to special node + :param file: + :param destDir: + :param hostList: + :return: + """ + if not hostList: + hostList = copy.deepcopy(self.clusterNodes) + else: + hostList = copy.deepcopy(hostList) + if DefaultValue.GetHostIpOrName() in hostList: + hostList.remove(DefaultValue.GetHostIpOrName()) + + self.logger.debug("Start copy file:{0} to hosts:{1}.".format( + file, hostList)) + if not os.path.exists(file): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % file) + self.logger.debug("Distribute the file %s" % file) + retry = True + count = 0 + while retry: + try: + if count > 4: + retry = False + self.sshTool.scpFiles(file, destDir, hostList) + retry = False + except Exception as e: + count += 1 + self.logger.debug("Retry distributing xml command, " + "the {0} time.".format(count)) + + +if __name__ == '__main__': + """ + main function + """ + if os.getuid() == 0: + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) + + try: + REPEAT = False + upgrade = Upgrade() + upgrade.parseCommandLine() + upgrade.checkParameter() + + # set action flag file + DefaultValue.setActionFlagFile("gs_upgradectl", upgrade.logger) + upgrade.initGlobalInfos() + impl = UpgradeImplOLAP(upgrade) + impl.run() + except Exception as e: + if REPEAT: + upgrade.sshTool = SshTool(upgrade.clusterNodes, upgrade.localLog, + DefaultValue.TIMEOUT_PSSH_COMMON) + GaussLog.exitWithError(str(e)) + finally: + DefaultValue.setActionFlagFile("gs_upgradectl", None, False) diff --git a/script/gspylib/__init__.py b/script/gspylib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/common/CheckPythonVersion.py b/script/gspylib/common/CheckPythonVersion.py new file mode 100644 index 0000000..7f0d9e2 --- /dev/null +++ b/script/gspylib/common/CheckPythonVersion.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import sys +import platform +import re + + +def checkPythonVersion(): + pythonVersion = sys.version_info[0:2] + distName = platform.platform() + if re.search("oe1", distName) is not None: + if not pythonVersion == (3, 7): + raise Exception("[GAUSS-52200] : version of python" + " is not correct: %s." % + distName + " should use Python 3.7.*") + else: + if not pythonVersion >= (3, 6): + raise Exception("[GAUSS-52200] : version of python" + " is not correct: %s." % + distName + " should use Python 3.6.*") + return True + + +if __name__ == '__main__': + try: + checkPythonVersion() + except Exception as e: + raise Exception(e) diff --git a/script/gspylib/common/CommandLineParser.py b/script/gspylib/common/CommandLineParser.py new file mode 100644 index 0000000..0810917 --- /dev/null +++ b/script/gspylib/common/CommandLineParser.py @@ -0,0 +1,407 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +""" +@brief The command line parser module. +@details Parse command line parameters and store them as variables with + the same name. +""" + +# export module +__all__ = ["CommandLineParser", "Option"] + +# system import. +import sys as _sys +import optparse as _optparse + +# import typing for comment. +try: + from typing import Dict + from typing import List + from typing import Tuple + from typing import Any +except ImportError: + Dict = dict + List = list + Tuple = tuple + Any = str or int or complex or list or dict + +# local import +from gspylib.common.ErrorCode import ErrorCode + + +class Option(_optparse.Option, object): + """ + The command line option class which use to the "OptionParser" instance. + But this class does not accept the "dest" + parameter. + """ + + def __init__(self, *args, **kwargs): + """ + Initialize the command line option instance. + + :param args: The command line command string, maximum + length is 2. + :param kwargs: The command line command parameters. + :param action: The named parameter, specified command line + parameter action allowed by option parser. + :param type: The named parameter, specified command line + parameter type of current option. + :param default: The named parameter, specified command line + parameter default value. + :param nargs: The named parameter, specified the number of + the command line parameter value. + :param const: The named parameter, specified the const + value of the command line parameter. + :param choices: The named parameter, specified the choice + range of the command line parameter, the item + in the choices list must be string type, + and must not set the "type" parameter. + Otherwise, it will lead to unexpected errors. + :param callback: The named parameter, specified the handler + function for the command line parameter. + :param callback_args: The named parameter, specified the unnamed + parameters of the handler function for the + command line parameter. + :param callback_kwargs: The named parameter, specified the named + parameters of the handler function for the + command line parameter. + :param help: The named parameter, the help string for the + command line parameter. + :param metavar: The named parameter, the display string for + the command line parameter value. + + :type args: str + :type kwargs: * + :type action: str + :type type: str + :type default: * + :type nargs: int + :type const: * + :type choices: List[str] + :type callback: function + :type callback_args: tuple + :type callback_kwargs: dict + :type help: str + :type metavar: str + """ + # Remove the "dest" parameter. + if "dest" in kwargs: + kwargs.pop("dest") + # Initialize the command line option instance. + _optparse.Option.__init__(self, *args, **kwargs) + + +class OptionParser(_optparse.OptionParser, object): + """ + The command line option parser. + """ + + def __init__(self, *args, **kwargs): + """ + Initialize the internal command line parser. + + :param args: The additional unnamed parameter of the + command line parser. + :param kwargs: The additional named parameter of the + command line parser. + :param usage: A usage string for your program. Before + it is displayed to the user, "%prog" will + be expanded to the name of your program + (prog or os.path.basename(sys.argv[0])). + :param option_list: A list of option instance for this parser. + :param option_class: The command line option class type, + default is "optparse.Option". + :param version: The version string for this scripts. + :param conflict_handler: The solutions after command line options + conflict, "resolve" will override before + option, "errors" will raise error, + default is "error". + :param description: A paragraph of text giving a brief + overview of your program. optparse re-formats + this paragraph to fit the current + terminal width and prints it when the user + requests help (after usage, but before + the list of options). + :param formatter: The formatter instance for the + description information. + :param add_help_option: Whether add the help option instance + automatically. + :param prog: The name of the current program (to + override os.path.basename(sys.argv[0])). + :param epilog: A paragraph of help text to print after + option help. + + :type args: str | list | bool | type | + _optparse.IndentedHelpFormatter + :type kwargs: str | list | bool | type | + _optparse.IndentedHelpFormatter + :type usage: str + :type option_list: List[Option] + :type option_class: type + :type version: str + :type conflict_handler: str + :type description: str + :type formatter: _optparse.IndentedHelpFormatter + :type add_help_option: bool + :type prog: str + :type epilog: str + """ + # Call the parent init function. + _optparse.OptionParser.__init__(self, *args, **kwargs) + + def print_help(self, _file=_sys.stderr): + """ + print_help(file : file = stderr) + + Print an extended help message, listing all options and any help + text provided with them, to 'file' (default + stderr). + + :param _file: The file descriptor instance. + :type _file: file + + :rtype: None + """ + _optparse.OptionParser.print_help(self, _file) + + def print_usage(self, _file=_sys.stderr): + """ + print_usage(file : file = stderr) + + Print the usage message for the current program (self.usage) to + 'file' (default stderr). Any occurrence of the + string "%prog" in self.usage is replaced with the name of the + current program (basename of sys.argv[0]). + Does nothing if self.usage is empty or not defined. + + :param _file: The file descriptor instance. + :type _file: file + + :rtype: None + """ + _optparse.OptionParser.print_usage(self, _file) + + def print_version(self, _file=_sys.stderr): + """ + print_version(file : file = stderr) + + Print the version message for this program (self.version) to 'file' + (default stderr). As with print_usage(), + any occurrence of "%prog" in self.version is replaced by the + current program's name. Does nothing if + self.version is empty or undefined. + + :param _file: The file descriptor instance. + :type _file: file + + :rtype: None + """ + _optparse.OptionParser.print_version(self, _file) + + def error(self, _msg): + """ + error(msg : string) + + Print a usage message incorporating 'msg' to stderr and exit. If you + override this in a subclass, it should not + return -- it should either exit or raise an exception. + + :param _msg: The error message. + :type _msg: str + + :rtype: None + """ + raise Exception(ErrorCode.GAUSS_500["GAUSS_50015"] % _msg) + + +class CommandLineMetaClass(type): + """ + The command line parser metaclass. + + Used to magically save command line parsing options instances. + """ + + def __new__(mcs, name, bases, attrs): + """ + Create an new command line parser class. + + :param name: The name of the current class. + :param bases: The parent instances of the current class. + :param attrs: The attribute dict of the current class. + + :type name: str + :type bases: Tuple[type] + :type attrs: Dict[str, Any] + :return: + """ + # If it is the base command line parser class, we will do nothing. + if name == "CommandLineOption": + return type.__new__(mcs, name, bases, attrs) + + # Store the command line option instance mapping. + mappings = {} + # Store the attribute key-value pair list of the current class. + items = list(attrs.items()) + + # Store the command line option instance to the mapping, and remove + # it from current class attribute. + for key, value in items: + if isinstance(value, Option): + mappings.setdefault(key, value) + attrs.pop(key) + + # Add the additional function. + if value.action in ["append", "append_const", "count"]: + def ensure_value(_self, _attr, _value): + """ + Ensure the non-existence of object attributes and + set the value of attributes. + + :param _self: The object instance. + :param _attr: The object attribute name. + :param _value: The object attribute value. + + :type _self: Option + :type _attr: str + :type _value: * + + :return: Return the object attribute value. + :rtype: * + """ + if not hasattr(_self, _attr) or getattr(_self, + _attr) is None: + setattr(_self, _attr, _value) + return getattr(_self, _attr) + + # Add function. + attrs["ensure_value"] = ensure_value + + # Store the mapping into a named parameter of current class. + attrs["__mappings__"] = mappings + + return type.__new__(mcs, name, bases, attrs) + + +class CommandLineParser(object): + """ + The base class of the command line parser. + """ + # Set the metaclass type, this approach is not supported python 3.x. + __metaclass__ = CommandLineMetaClass + + def __init__(self, _parameters=None, *args, **kwargs): + """ + Initialize the command line parser. + + :param _parameters: The command line parameters list, + default is sys.argv. + :param args: The additional unnamed parameter of the + command line parser. + :param kwargs: The additional named parameter of the + command line parser. + :param usage: A usage string for your program. Before + it is displayed to the user, "%prog" will + be expanded to the name of your program + (prog or os.path.basename(sys.argv[0])). + :param option_list: A list of option instance for this parser. + :param option_class: The command line option class type, + default is "optparse.Option". + :param version: The version string for this scripts. + :param conflict_handler: The solutions after command line options + conflict, "resolve" will override before + option, "errors" will raise error, + default is "error". + :param description: A paragraph of text giving a brief + overview of your program. optparse re-formats + this paragraph to fit the current + terminal width and prints it when the user + requests help (after usage, but before + the list of options). + :param formatter: The formatter instance for the + description information. + :param add_help_option: Whether add the help option instance + automatically. + :param prog: The name of the current program (to + override os.path.basename(sys.argv[0])). + :param epilog: A paragraph of help text to print after + option help. + + :type _parameters: List[str] | None + :type args: str | list | bool | type | + _optparse.IndentedHelpFormatter + :type kwargs: str | list | bool | type | + _optparse.IndentedHelpFormatter + :type usage: str + :type option_list: List[Option] + :type option_class: type + :type version: str + :type conflict_handler: str + :type description: str + :type formatter: _optparse.IndentedHelpFormatter + :type add_help_option: bool + :type prog: str + :type epilog: str + """ + # Create a new command line parser. + opt = OptionParser(*args, **kwargs) + + # Add the "dest" attribute to the command line option instance, + # and add the option instance to the parser. + # noinspection PyUnresolvedReferences + for key, value in list(self.__mappings__.items()): + setattr(value, "dest", key) + opt.add_option(value) + + # Parse the command line parameter. + if not _parameters: + _parameters = _sys.argv[1:] + _, unknown_args = opt.parse_args(_parameters, self) + + # If some command line parameter does not supplied by user, we will + # set it to "None". + # noinspection PyUnresolvedReferences + for key in list(self.__mappings__.keys()): + if not hasattr(self, key): + # noinspection PyUnresolvedReferences + value = self.__mappings__.get(key) + if hasattr(value, "default") and getattr(value, + "default") != \ + _optparse.NO_DEFAULT: + setattr(self, key, getattr(value, "default")) + elif hasattr(value, "const") and getattr(value, + "const") != \ + _optparse.NO_DEFAULT: + setattr(self, key, getattr(value, "const")) + else: + setattr(self, key, None) + + # If it contains configuration that cannot be resolved, save it. + if unknown_args: + setattr(self, "unknown_args", unknown_args) + + +class ExecuteCommand(object): + """ + + """ + + def __init__(self): + """ + + """ + pass diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py new file mode 100644 index 0000000..60f1f99 --- /dev/null +++ b/script/gspylib/common/Common.py @@ -0,0 +1,5433 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Common is a utility with a lot of common functions +############################################################################# +import sys +import subprocess +import os +import platform +import socket +import types +import re +import time +import configparser +import multiprocessing +import _thread as thread +import pwd +import base64 +import struct +import binascii +import json + +# The installation starts, but the package is not decompressed completely. +# The lib64/libz.so.1 file is incomplete, and the hashlib depends on the +# libz.so.1 file. +num = 0 +while num < 10: + try: + import hashlib + + break + except ImportError: + num += 1 + time.sleep(1) + +from random import sample +import csv +import shutil +import string +import traceback +from ctypes import * +from multiprocessing.dummy import Pool as ThreadPool +from datetime import datetime + +localDirPath = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, localDirPath + "/../../../lib") +try: + import psutil +except ImportError as e: + # mv psutil mode .so file by python version + pythonVer = sys.version[:3] + psutilLinux = os.path.join(localDirPath, + "./../../../lib/psutil/_psutil_linux.so") + psutilPosix = os.path.join(localDirPath, + "./../../../lib/psutil/_psutil_posix.so") + psutilLinuxBak = "%s_%s" % (psutilLinux, pythonVer) + psutilPosixBak = "%s_%s" % (psutilPosix, pythonVer) + + glo_cmd = "rm -rf '%s' && cp -r '%s' '%s' " % (psutilLinux, + psutilLinuxBak, + psutilLinux) + glo_cmd += " && rm -rf '%s' && cp -r '%s' '%s' " % (psutilPosix, + psutilPosixBak, + psutilPosix) + psutilFlag = True + for psutilnum in range(3): + (status_mvPsutil, output_mvPsutil) = subprocess.getstatusoutput( + glo_cmd) + if (status_mvPsutil != 0): + psutilFlag = False + time.sleep(1) + else: + psutilFlag = True + break + if (not psutilFlag): + print("Failed to execute cmd: %s. Error:\n%s" % (glo_cmd, + output_mvPsutil)) + sys.exit(1) + # del error import and reload psutil + del sys.modules['psutil._common'] + del sys.modules['psutil._psposix'] + import psutil + +sys.path.append(localDirPath + "/../../") +from gspylib.common.DbClusterInfo import dbClusterInfo, \ + readOneClusterConfigItem, initParserXMLFile +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsplatform import g_Platform +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsservice import g_service +from gspylib.hardware.gsmemory import g_memory +from gspylib.threads.parallelTool import parallelTool +from gspylib.common.VersionInfo import VersionInfo +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.ciphers import Cipher, \ + algorithms, modes +import impl.upgrade.UpgradeConst as Const + +noPassIPs = [] +g_lock = thread.allocate_lock() + + +def check_content_key(content, key): + if not (type(content) == bytes): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53025"]) + elif not (type(key) in (bytes, str)): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53026"]) + + iv_len = 16 + if not (len(content) >= (iv_len + 16)): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53027"]) + + +class DefaultValue(): + """ + Default value of some variables + """ + + def __init__(self): + pass + + TASK_INSTALL = "installation" + TASK_UPGRADE = "upgrade" + TASK_EXPAND = "expansion" + TASK_REPLACE = "replacement" + TASK_REPAIR = "repair" + TASK_QUERY_STATUS = "status" + TASK_START_STOP = "startup" + TASK_START = "startup" + TASK_STOP = "shutdown" + TASK_SWITCH = "switching" + TASK_GAUSSROACH = "GaussRoach" + TASK_GAUSSROACH_SHOW = "roach_show" + TASK_GAUSSROACH_STOP = "roach_stop_backup" + TASK_GAUSSROACH_BACKUP = "roach_backup" + TASK_GAUSSROACH_RESTORE = "roach_restore" + TASK_GAUSSROACH_DELETE = "roach_delete" + TASK_SUCCESS_FLAG = "SUCCESS" + ########################### + # DWS path info + ########################### + DWS_IMAGE_PATH = "/opt/dws/image" + DWS_PACKAGE_PATH = "/opt/dws/package" + DWS_APP_PAHT = "/opt/dws/app" + + # CM reload signal + SIGNAL_RELOAD_PARA = 1 + SIGNAL_RELOAD_FILE = 9 + + ########################### + # init action timeout value + ########################### + # start timeout value + TIMEOUT_CLUSTER_START = 300 + # restart nodegroup timeout value + TIMEOUT_NODEGROUP_RESTART = 1800 + # stop timeout value + TIMEOUT_CLUSTER_STOP = 300 + # failover timeout value + TIMEOUT_CLUSTER_FAILOVER = 300 + # syc timeout value + TIMEOUT_CLUSTER_SYNC = 1800 + # switch reset timeout value + TIMEOUT_CLUSTER_SWITCHRESET = 300 + + ## + TIMEOUT_PSSH_COMMON = 80 + ########################### + # pssh redis timeout value + TIMEOUT_PSSH_REDIS = 604800 + ########################### + # preinstall timeoutvalue + TIMEOUT_PSSH_PREINSTALL = 1800 + # install timeout value + TIMEOUT_PSSH_INSTALL = 1800 + # uninstall timeout value + TIMEOUT_PSSH_UNINSTALL = 43200 + # postpreinstall timeout value + TIMEOUT_PSSH_POSTPREINSTALL = 1800 + # binary-upgrade and rollback timeout value + TIMEOUT_PSSH_BINARY_UPGRADE = 14400 + # expend timeout value + TIMEOUT_PSSH_EXPEND = 43200 + # replace timeout value + TIMEOUT_PSSH_REPLACE = 86400 + # check timeout value + TIMEOUT_PSSH_CHECK = 1800 + # backup timeout value + TIMEOUT_PSSH_BACKUP = 1800 + # sshexkey timeout value + TIMEOUT_PSSH_SSHEXKEY = 1800 + # collector timeout value + TIMEOUT_PSSH_COLLECTOR = 1800 + # start etcd timeout value + TIMEOUT_PSSH_STARTETCD = 600 + # delCN timeout value + TIMEOUT_PSSH_DELCN = 1800 + # addCN timeout value + TIMEOUT_PSSH_ADDCN = 86400 + # estimate timeout value + TIMEOUT_PSSH_ESTIMATE = 1800 + # changeip timeout value + TIMEOUT_PSSH_CHANGEIP = 1800 + # extension connector timeout value + TIMEOUT_PSSH_EXTENSION = 1800 + # VC mode timeout value + TIMEOUT_PSSH_VC = 43200 + + ########################### + # init authority parameter + ########################### + # directory mode + DIRECTORY_MODE = 750 + # directory permission + DIRECTORY_PERMISSION = 0o750 + # file node + FILE_MODE = 640 + FILE_MODE_PERMISSION = 0o640 + KEY_DIRECTORY_PERMISSION = 0o700 + KEY_FILE_MODE = 600 + MIN_FILE_MODE = 400 + MIN_FILE_PERMISSION = 0o400 + SPE_FILE_MODE = 500 + KEY_FILE_PERMISSION = 0o600 + KEY_DIRECTORY_MODE = 700 + MAX_DIRECTORY_MODE = 755 + TMP_EXE_FILE_MODE = 0o700 + SQL_FILE_MODE = 644 + # the host file permission. Do not changed it. + HOSTS_FILE = 644 + KEY_HOSTS_FILE = 0o644 + + # The available size of install app directory + APP_DISK_SIZE = 100 + # in grey upgrade, need to install new bin instead of replacing + # old bin in inplace upgrade + # so need 10G to guarantee enough space + GREY_DISK_SIZE = 10 + # The remaining space of device + INSTANCE_DISK_SIZE = 200 + # lock cluster time + CLUSTER_LOCK_TIME = 43200 + # lock cluster time for waiting mode + CLUSTER_LOCK_TIME_WAIT = 3600 + + # the guc paramter max_wal_senders's max value + MAX_WAL_SENDERS = 100 + + # env parameter + MPPRC_FILE_ENV = "MPPDB_ENV_SEPARATE_PATH" + MPPDB_TMP_PATH_ENV = "PGHOST" + TOOL_PATH_ENV = "GPHOME" + SUCCESS = "Success" + FAILURE = "Failure" + # tablespace version directory name + # it is from gaussdb kernel code + TABLESPACE_VERSION_DIRECTORY = "PG_9.2_201611171" + # gauss log dir + GAUSSDB_DIR = "/var/log/gaussdb" + # default database name + DEFAULT_DB_NAME = "postgres" + # database size file + DB_SIZE_FILE = "total_database_size" + + # current directory path + GURRENT_DIR_FILE = "." + # om_monitor log directory + OM_MONITOR_DIR_FILE = "../cm/om_monitor" + # om_kerberos log directory + OM_KERBEROS_DIR_FILE = "../cm/kerberos_monitor" + # action flag file name + ACTION_FLAG_FILE = ".action_flag_file" + # action log file name + DEFAULT_LOG_FILE = "gaussdb.log" + LOCAL_LOG_FILE = "gs_local.log" + PREINSTALL_LOG_FILE = "gs_preinstall.log" + DEPLOY_LOG_FILE = "gs_install.log" + REPLACE_LOG_FILE = "gs_replace.log" + UNINSTALL_LOG_FILE = "gs_uninstall.log" + OM_LOG_FILE = "gs_om.log" + UPGRADE_LOG_FILE = "gs_upgradectl.log" + CONTRACTION_LOG_FILE = "gs_shrink.log" + DILATAION_LOG_FILE = "gs_expand.log" + UNPREINSTALL_LOG_FILE = "gs_postuninstall.log" + GSROACH_LOG_FILE = "gaussdb_roach.log" + MANAGE_CN_LOG_FILE = "gs_om.log" + GS_CHECK_LOG_FILE = "gs_check.log" + GS_CHECKPERF_LOG_FILE = "gs_checkperf.log" + GS_BACKUP_LOG_FILE = "gs_backup.log" + GS_COLLECTOR_LOG_FILE = "gs_collector.log" + GS_COLLECTOR_CONFIG_FILE = "./gspylib/etc/conf/gs_collector.json" + GAUSS_REPLACE_LOG_FILE = "GaussReplace.log" + GAUSS_OM_LOG_FILE = "GaussOM.log" + TPCDS_INSTALL_LOG_FILE = "tpcd_install.log" + LCCTL_LOG_FILE = "gs_lcctl.log" + RESIZE_LOG_FILE = "gs_resize.log" + HOTPATCH_LOG_FILE = "gs_hotpatch.log" + EXPANSION_LOG_FILE = "gs_expansion.log" + DROPNODE_LOG_FILE = "gs_dropnode.log" + # hotpatch action + HOTPATCH_ACTION_LIST = ["load", "unload", "active", "deactive", + "info", "list"] + # cluster lock file + CLUSTER_LOCK_PID = "gauss_cluster_lock.pid" + # dump file for cn instance + SCHEMA_COORDINATOR = "schema_coordinator.sql" + # dump file for job data + COORDINATOR_JOB_DATA = "schema_coordinator_job_data.sql" + # dump file for statistics data + COORDINATOR_STAT_DATA = "schema_coordinator_statistics_data.sql" + # dump global info file for DB instance + SCHEMA_DATANODE = "schema_datanode.sql" + # record default group table info + DUMP_TABLES_DATANODE = "dump_tables_datanode.dat" + # dump default group table info file for DB instance + DUMP_Output_DATANODE = "dump_output_datanode.sql" + # default cluster config xml + CLUSTER_CONFIG_PATH = "/opt/huawei/wisequery/clusterconfig.xml" + # default alarm tools + ALARM_COMPONENT_PATH = "/opt/huawei/snas/bin/snas_cm_cmd" + # GPHOME + CLUSTER_TOOL_PATH = "/opt/huawei/wisequery" + # root scripts path + ROOT_SCRIPTS_PATH = "/root/gauss_om" + + # package bak file name list + PACKAGE_BACK_LIST = ["Gauss200-OLAP-Package-bak.tar.gz", + "Gauss200-Package-bak.tar.gz", + "GaussDB-Kernel-Package-bak.tar.gz"] + # network scripts file for RHEL + REDHAT_NETWORK_PATH = "/etc/sysconfig/network-scripts" + # cert files list,the order of these files SHOULD NOT be modified + CERT_FILES_LIST = ["cacert.pem", + "server.crt", + "server.key", + "server.key.cipher", + "server.key.rand", + "sslcrl-file.crl"] + SSL_CRL_FILE = CERT_FILES_LIST[5] + CERT_ROLLBACK_LIST = ["cacert.pem", + "server.crt", + "server.key", + "server.key.cipher", + "server.key.rand", + "sslcrl-file.crl", + "gsql_cert_backup.tar.gz", + "certFlag"] + CLIENT_CERT_LIST = ["client.crt", + "client.key", + "client.key.cipher", + "client.key.rand"] + GDS_CERT_LIST = ["cacert.pem", + "server.crt", + "server.key", + "server.key.cipher", + "server.key.rand", + "client.crt", + "client.key", + "client.key.cipher", + "client.key.rand"] + GRPC_CERT_LIST = ["clientnew.crt", + "clientnew.key", + "cacertnew.pem", + "servernew.crt", + "servernew.key", + "openssl.cnf", + "client.key.cipher", + "client.key.rand", + "server.key.cipher", + "server.key.rand"] + SERVER_CERT_LIST = ["client.crt", + "client.key", + "cacert.pem", + "server.crt", + "server.key", + "openssl.cnf", + "client.key.cipher", + "client.key.rand", + "server.key.cipher", + "server.key.rand", + "client.key.pk8"] + BIN_CERT_LIST = ["server.key.cipher", + "server.key.rand"] + CERT_BACKUP_FILE = "gsql_cert_backup.tar.gz" + PATH_CHECK_LIST = ["|", ";", "&", "$", "<", ">", "`", "\\", "'", "\"", + "{", "}", "(", ")", "[", "]", "~", "*", "?", " ", "!", + "\n"] + PASSWORD_CHECK_LIST = [";", "'", "$"] + # The xml file path is needed by kerberos in FI_librA + # FI_KRB_XML is used in mppdb + FI_KRB_XML = "auth_config/mppdb-site.xml" + # FI_ELK_KRB_XML is used in elk + FI_ELK_KRB_XML = "auth_config/elk-krb-site.xml" + FI_KRB_CONF = "krb5.conf" + ########################### + # instance role + ########################### + # init value + INSTANCE_ROLE_UNDEFINED = -1 + # cm_server + INSTANCE_ROLE_CMSERVER = 0 + # gtm + INSTANCE_ROLE_GTM = 1 + # etcd + INSTANCE_ROLE_ETCD = 2 + # cn + INSTANCE_ROLE_COODINATOR = 3 + # dn + INSTANCE_ROLE_DATANODE = 4 + # cm_agent + INSTANCE_ROLE_CMAGENT = 5 + + ########################### + # instance type. only for CN/DN + ########################### + # master + MASTER_INSTANCE = 0 + # standby + STANDBY_INSTANCE = 1 + # dummy standby + DUMMY_STANDBY_INSTANCE = 2 + # cascade standby + CASCADE_STANDBY = 3 + + ########################### + # parallel number + ########################### + DEFAULT_PARALLEL_NUM = 12 + DEFAULT_PARALLEL_NUM_UPGRADE = 6 + + # SQL_EXEC_COMMAND + SQL_EXEC_COMMAND_WITHOUT_USER = "%s -p %s -d %s -h %s " + SQL_EXEC_COMMAND_WITH_USER = "%s -p %s -d %s -U %s -W %s -h %s " + SQL_EXEC_COMMAND_WITHOUT_HOST_WITHOUT_USER = "%s -p %s -d %s " + SQL_EXEC_COMMAND_WITHOUT_HOST_WITH_USER = "%s -p %s -d %s -U %s -W %s " + + # cluster type + CLUSTER_TYPE_SINGLE = "single" + CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY = "single-primary-multi-standby" + CLUSTER_TYPE_SINGLE_INST = "single-inst" + + # ssh option + SSH_OPTION = " -o BatchMode=yes -o TCPKeepAlive=yes -o " \ + "ServerAliveInterval=30 -o ServerAliveCountMax=10 -o " \ + "ConnectTimeout=30 -o ConnectionAttempts=10 " + # base64 option + BASE_ENCODE = "encode" + BASE_DECODE = "decode" + + # Default name of the byte stream file which contain the disabled features. + DEFAULT_DISABLED_FEATURE_FILE_NAME = "gaussdb.version" + # Default license control file name. + DEFAULT_LICENSE_FILE_NAME = "gaussdb.license" + + COLLECT_CONF_JSON_KEY_LIST = [ + "Content", + "TypeName", + "Interval", + "Count" + ] + COLLECT_CONF_CONTENT_MAP = { + # System check config + # cat /proc/cpuinfo; + "HardWareInfo": "cpuInfo,memInfo,disk", + # cat /proc/meminfo df -h + # top; ps ux; iostat + "RunTimeInfo": "ps,ioStat,netFlow,spaceUsage", + # -xm 2 3; netstat; free -m du -sh + # Log & Conf_Gstack check config + "Coordinator": "CN", + "DataNode": "DN", + "Gtm": "GTM", + # Log check config + "ClusterManager": "cm,om,bin", + # Core Dump check + "gaussdb": "gaussdb", + "gs_gtm": "gs_gtm", + "gs_rewind": "gs_rewind", + "cm_server": "cm_server", + "cm_agent": "cm_agent", + "gs_ctl": "gs_ctl", + "gaussdb_stack": "gaussdb_stack", + "gs_gtm_stack": "gs_gtm_stack", + "gs_rewind_stack": "gs_rewind_stack", + "cm_server_stack": "cm_server_stack", + "cm_agent_stack": "cm_agent_stack", + "gs_ctl_stack": "gs_ctl_stack", + "AioWorker": "AioWorker", + "AlarmChecker": "AlarmChecker", + "Archiver": "Archiver", + "Auditor": "Auditor", + "AutoVacLauncher": "AutoVacLauncher", + "AutoVacWorker": "AutoVacWorker", + "AuxMain": "AuxMain", + "BackendMode": "BackendMode", + "BgWriter": "BgWriter", + "BootStrap": "BootStrap", + "Catchup": "Catchup", + "CBMWriter": "CBMWriter", + "Checkpointer": "Checkpointer", + "CommAuxStream": "CommAuxStream", + "CommPoolCleaner": "CommPoolCleaner", + "CommRcvStream": "CommRcvStream", + "CommRcvWorker": "CommRcvWorker", + "CommSendStream": "CommSendStream", + "CpMonitor": "CpMonitor", + "DataRcvWriter": "DataRcvWriter", + "DataReceiver": "DataReceiver", + "DataSender": "DataSender", + "ExtremeRTO": "ExtremeRTO", + "FencedUDFMaster": "FencedUDFMaster", + "GaussMaster": "GaussMaster", + "Heartbeater": "Heartbeater", + "JobExecutor": "JobExecutor", + "LWLockMonitor": "LWLockMonitor", + "PageWriter": "PageWriter", + "ParallelRecov": "ParallelRecov", + "PercentileJob": "PercentileJob", + "Reaper": "Reaper", + "RemoteSrv": "RemoteSrv", + "StartupProcess": "StartupProcess", + "StatCollector": "StatCollector", + "Stream": "Stream", + "SysLogger": "SysLogger", + "ThdPoolListener": "ThdPoolListener", + "TwoPhaseCleaner": "TwoPhaseCleaner", + "WalRcvWriter": "WalRcvWriter", + "WalReceiver": "WalReceiver", + "WalSender": "WalSender", + "WalWriter": "WalWriter", + "WDRSnapshot": "WDRSnapshot", + "WlmArbiter": "WlmArbiter", + "WlmCollector": "WlmCollector", + "WlmMonitor": "WlmMonitor" + } + + COLLECT_CONF_MAP = { + "System": "HardWareInfo,RunTimeInfo", + "Database": "*", + "Log": "Coordinator,DataNode,Gtm,ClusterManager,FFDC,AWRReport", + "XLog": "Coordinator,DataNode", + "Config": "Coordinator,DataNode,Gtm", + "Gstack": "Coordinator,DataNode,Gtm", + "CoreDump": "gaussdb,gs_gtm,gs_rewind,cm_server,cm_agent,gs_ctl," + "gaussdb_stack,gs_gtm_stack,gs_rewind_stack," + "cm_server_stack,cm_agent_stack,cm_server_stack," + "gs_ctl_stack,AioWorker,AlarmChecker,Archiver,Auditor," + "AutoVacLauncher,AutoVacWorker,AuxMain,BackendMode," + "BgWriter,BootStrap,Catchup,CBMWriter,Checkpointer," + "CommAuxStream,CommPoolCleaner,CommRcvStream,CommRcvWorker," + "CommSendStream,CpMonitor,DataRcvWriter,DataReceiver," + "DataSender,ExtremeRTO,FencedUDFMaster,GaussMaster," + "Heartbeater,JobExecutor,JobScheduler,LWLockMonitor," + "PageWriter,ParallelRecov,PercentileJob,Reaper,RemoteSrv," + "StartupProcess,StatCollector,Stream,SysLogger," + "ThdPoolListener,TwoPhaseCleaner,WalRcvWriter,WalReceiver," + "WalSender,WalWriter,WDRSnapshot,WlmArbiter,WlmCollector," + "WlmMonitor", + "Trace": "Dump", + "Plan": "*" + } + + DATABASE_CHECK_WHITE_LIST = ["dbe_perf", "pg_catalog"] + + SYSTEM_CHECK_COMMAND_MAP = { + "cpuInfo": "cat /proc/cpuinfo", + "memInfo": "cat /proc/meminfo", + "disk": "df -h", + "ps": "ps ux", + "ioStat": "iostat -xm 2 3", + "netFlow": "cat /proc/net/dev", + "spaceUsage": "free -m" + } + + # Default retry times of SQL query attempts after successful + # operation "gs_ctl start". + DEFAULT_RETRY_TIMES_GS_CTL = 20 + CORE_PATH_DISK_THRESHOLD = 50 + + @staticmethod + def get_package_back_name(): + package_back_name = "%s-Package-bak_%s.tar.gz" % ( + VersionInfo.PRODUCT_NAME_PACKAGE, VersionInfo.getCommitid()) + return package_back_name + + @staticmethod + def aes_cbc_decrypt(content, key): + check_content_key(content, key) + if type(key) == str: + key = bytes(key) + iv_len = 16 + # pre shared key iv + iv = content[16 + 1 + 16 + 1:16 + 1 + 16 + 1 + 16] + + # pre shared key enctryt + enc_content = content[:iv_len] + backend = default_backend() + cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend) + decrypter = cipher.decryptor() + dec_content = decrypter.update(enc_content) + decrypter.finalize() + dec_content = dec_content.rstrip('\0') + server_decipher_key = dec_content[:len(dec_content) - 1] + return server_decipher_key + + @staticmethod + def aes_cbc_decrypt_with_path(path): + with open(path + '/client.key.cipher', 'r') as f: + cipher_txt = f.read() + with open(path + '/client.key.rand', 'r') as f: + rand_txt = f.read() + + if cipher_txt is None or cipher_txt == "": + return None + + server_vector_cipher_vector = cipher_txt[16 + 1:16 + 1 + 16] + # pre shared key rand + server_key_rand = rand_txt[:16] + + # worker key + server_decrypt_key = hashlib.pbkdf2_hmac('sha256', server_key_rand, + server_vector_cipher_vector, + 10000, 16) + + enc = DefaultValue.aes_cbc_decrypt(cipher_txt, server_decrypt_key) + return enc + + # Cert type + GRPC_CA = "grpc" + SERVER_CA = "server" + + @staticmethod + def encodeParaline(cmd, keyword): + """ + """ + if (keyword == "encode"): + cmd = base64.b64encode(cmd.encode()).decode() + return cmd + if (keyword == "decode"): + cmd = base64.b64decode(cmd.encode()).decode() + return cmd + + @staticmethod + def checkBondMode(bondingConfFile, isCheckOS=True): + """ + function : Check Bond mode + input : String, bool + output : List + """ + netNameList = [] + + cmd = "grep -w 'Bonding Mode' %s | awk -F ':' '{print $NF}'" % \ + bondingConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output.strip() == ""): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50611"] + + " Command:%s. Error:\n%s" % (cmd, output)) + + if (isCheckOS): + print("BondMode %s" % output.strip()) + + cmd = "grep -w 'Slave Interface' %s | awk -F ':' '{print $NF}'" % \ + bondingConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50611"] + + " Command:%s. Error:\n%s" % (cmd, output)) + for networkname in output.split('\n'): + netNameList.append(networkname.strip()) + return netNameList + + @staticmethod + def getNetWorkBondFlag(networkCardNum): + """ + function: Check if the network interface card number is bondCard + by psutil module + input: network interface card number + output: FLAG, netcardList + """ + try: + FLAG = False + nicAddr = "" + netcardList = [] + netWorkInfo = psutil.net_if_addrs() + for snic in netWorkInfo[networkCardNum]: + if snic.family == 17: + nicAddr = snic.address + if nicAddr == "": + return FLAG, netcardList + for net_num in netWorkInfo.keys(): + if net_num == networkCardNum: + continue + for netInfo in netWorkInfo[net_num]: + if netInfo.address == nicAddr: + netcardList.append(net_num) + if len(netcardList) >= 2: + FLAG = True + for net_num in netcardList: + cmd = "ip link | grep '%s'" % net_num + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception((ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd) + "\nError: %s" % output) + if str(output).find("master %s" % networkCardNum) == -1: + FLAG = False + netcardList = [] + break + return FLAG, netcardList + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] % ( + "if the netcardNum[%s] is bondCard" % networkCardNum) + + " Error: \n%s" % str(e)) + + @staticmethod + def CheckNetWorkBonding(serviceIP, isCheckOS=True): + """ + function : Check NetWork ConfFile + input : String, bool + output : List + """ + networkCardNum = DefaultValue.getNICNum(serviceIP) + NetWorkConfFile = DefaultValue.getNetWorkConfFile(networkCardNum) + bondingConfFile = "/proc/net/bonding/%s" % networkCardNum + networkCardNumList = [] + networkCardNumList.append(networkCardNum) + if os.path.exists(NetWorkConfFile): + cmd = "grep -i 'BONDING_OPTS\|BONDING_MODULE_OPTS' %s" % \ + NetWorkConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if ((status == 0) and (output.strip() != "")): + if ((output.find("mode") > 0) and os.path.exists( + bondingConfFile)): + networkCardNumList = networkCardNumList + \ + DefaultValue.checkBondMode( + bondingConfFile, isCheckOS) + else: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50611"] + + " Command:%s. Error:\n%s" % (cmd, output)) + elif isCheckOS: + print("BondMode Null") + else: + (flag, netcardList) = DefaultValue.getNetWorkBondFlag( + networkCardNum) + if flag: + if os.path.exists(bondingConfFile): + networkCardNumList = networkCardNumList + \ + DefaultValue.checkBondMode( + bondingConfFile, isCheckOS) + else: + sys.exit(ErrorCode.GAUSS_506["GAUSS_50611"] + + "Without NetWorkConfFile mode.") + else: + print("BondMode Null") + if (len(networkCardNumList) != 1): + del networkCardNumList[0] + return networkCardNumList + + @staticmethod + def checkNetWorkMTU(nodeIp, isCheckOS=True): + """ + function: gs_check check NetWork card MTU parameters + input: string, string + output: int + """ + try: + networkCardNum = DefaultValue.CheckNetWorkBonding(nodeIp, + isCheckOS) + mtuValue = psutil.net_if_stats()[networkCardNum[0]].mtu + if (not mtuValue): + return " Abnormal reason: Failed to obtain " \ + "network card MTU value." + return mtuValue + except Exception as e: + return " Abnormal reason: Failed to obtain the " \ + "networkCard parameter [MTU]. Error: \n %s" % str(e) + + @staticmethod + def getNetWorkConfFile(networkCardNum): + """ + function : Get NetWork ConfFile + input : int + output : String + """ + SuSENetWorkConfPath = "/etc/sysconfig/network" + RedHatNetWorkConfPath = "/etc/sysconfig/network-scripts" + NetWorkConfFile = "" + distname, version, idnum = g_Platform.dist() + distname = distname.lower() + if (distname in ("redhat", "centos", "euleros", "openEuler")): + NetWorkConfFile = "%s/ifcfg-%s" % (RedHatNetWorkConfPath, + networkCardNum) + else: + NetWorkConfFile = "%s/ifcfg-%s" % (SuSENetWorkConfPath, + networkCardNum) + + if (not os.path.exists(NetWorkConfFile)): + if (distname in ( + "redhat", "centos", "euleros", "openeuler")): + cmd = "find %s -iname 'ifcfg-*-%s' -print" % ( + RedHatNetWorkConfPath, networkCardNum) + else: + cmd = "find %s -iname 'ifcfg-*-%s' -print" % ( + SuSENetWorkConfPath, networkCardNum) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 and DefaultValue.checkDockerEnv()): + return output.strip() + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + if (len(output.split('\n')) != 1): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + NetWorkConfFile) + + NetWorkConfFile = output.strip() + + return NetWorkConfFile + + @staticmethod + def getNICNum(ipAddress): + """ + function: Obtain network interface card number by psutil module + input: ipAddress + output: netWorkNum + """ + try: + netWorkNum = "" + netWorkInfo = psutil.net_if_addrs() + for nic_num in netWorkInfo.keys(): + for netInfo in netWorkInfo[nic_num]: + if netInfo.address == ipAddress: + netWorkNum = nic_num + break + if netWorkNum == "": + raise Exception(ErrorCode.GAUSS_506["GAUSS_50604"] % ipAddress) + return netWorkNum + except Exception as e: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50604"] % ipAddress + + " Error: \n%s" % str(e)) + + @staticmethod + def getIpAddressList(): + """ + """ + # Obtain all Ips by psutil module + try: + ipAddressList = [] + netWorkInfo = psutil.net_if_addrs() + for per_num in netWorkInfo.keys(): + netInfo = netWorkInfo[per_num][0] + if (len(netInfo.address.split('.')) == 4): + ipAddressList.append(netInfo.address) + if (len(ipAddressList) == 0): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50616"]) + return ipAddressList + except Exception as e: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50616"] + + " Error: \n%s" % str(e)) + + @staticmethod + def getIpByHostName(): + ''' + function: get local host ip by the hostname + input : NA + output: hostIp + ''' + # get hostname + hostname = socket.gethostname() + + # get local host in /etc/hosts + cmd = "grep -E \"^[1-9 \\t].*%s[ \\t]*#Gauss.* IP Hosts Mapping$\" " \ + "/etc/hosts | grep -E \" %s \"" % (hostname, hostname) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output != ""): + hostIp = output.strip().split(' ')[0].strip() + return hostIp + + # get local host by os function + hostIp = socket.gethostbyname(hostname) + return hostIp + + @staticmethod + def GetHostIpOrName(): + """ + function: Obtaining the local IP address + input: NA + output: NA + """ + return g_OSlib.getHostName() + + @staticmethod + def GetPythonUCS(): + """ + function: get python3 unicode value. Using it to chose which + Crypto we need. + 1114111 is Crypto_UCS4 + 65535 is Crypto_UCS2 + the value 0 is only grammar support. + input: NA + output: NA + """ + if sys.maxunicode == 1114111: + return 4 + elif sys.maxunicode == 65535: + return 2 + else: + return 0 + + @staticmethod + def checkPythonVersion(): + """ + function : Check system comes with Python version + input : NA + output: list + """ + (major, minor, patchlevel) = platform.python_version_tuple() + if (str(major) == '3' and (str(minor) in ['6', '7'])): + if (str(minor) == '6'): + return (True, "3.6") + else: + return (True, "3.7") + else: + return (False, "%s.%s.%s" % (str(major), str(minor), + str(patchlevel))) + + @staticmethod + def getUserId(user): + """ + function : get user id + input : user + output : user id + """ + try: + pwd.getpwnam(user).pw_uid + except Exception as e: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50300"] % user + + "Detail msg: %s" % str(e)) + + @staticmethod + def checkUser(user, strict=True): + """ + function : Check if user exists and if is the right user + input : String,boolean + output : NA + """ + # get group + try: + DefaultValue.getUserId(user) + except Exception as e: + raise Exception(str(e)) + + # if not strict, skip + if (not strict): + return + + # get $GAUSS_ENV, and makesure the result is correct. + mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + if (mpprcFile != "" and mpprcFile is not None): + gaussEnv = DefaultValue.getEnvironmentParameterValue("GAUSS_ENV", + user, + mpprcFile) + else: + gaussEnv = DefaultValue.getEnvironmentParameterValue("GAUSS_ENV", + user, + "~/.bashrc") + if not gaussEnv or str(gaussEnv) != "2": + raise Exception(ErrorCode.GAUSS_503["GAUSS_50300"] % + ("installation path of designated user %s" % user) + + " Maybe the user is not right.") + + @staticmethod + def getMpprcFile(): + """ + function : get mpprc file + input : NA + output : String + """ + try: + # get mpp file by env parameter MPPDB_ENV_SEPARATE_PATH + mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + if (mpprcFile != "" and mpprcFile is not None): + userProfile = mpprcFile + if (not os.path.isabs(userProfile)): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51206"] % + userProfile) + if (not os.path.exists(userProfile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + userProfile) + elif (os.getuid() == 0): + return "/etc/profile" + else: + userAbsoluteHomePath = g_Platform.getUserHomePath() + userProfile = os.path.join(userAbsoluteHomePath, ".bashrc") + if (not os.path.isfile(userProfile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % + userProfile) + return userProfile + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def isIpValid(ip): + """ + function : check if the input ip address is valid + input : String + output : NA + """ + Valid = re.match("^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]" + "{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|" + "[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|" + "[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\." + "(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}" + "[0-9]{1}|[0-9])$", ip) + if Valid: + if (Valid.group() == ip): + return True + return False + + @staticmethod + def doConfigForParamiko(): + """ + function: Config depend file for pramiko 2.4.2. wen only support 2.7.x + input : NA + output: NA + """ + (result, version) = DefaultValue.checkPythonVersion() + if not result: + print(ErrorCode.GAUSS_522["GAUSS_52201"] % version + + " It must be 3.6.x or 3.7.x.") + sys.exit(1) + else: + localDir = os.path.dirname(os.path.realpath(__file__)) + omToolsCffiPath = os.path.join(localDir, + "./../../../lib/_cffi_backend.so") + inspectToolsCffiPath = os.path.join( + localDir, "./../../../script/gspylib/inspection/" + "lib/_cffi_backend.so") + + """ + Never remove _cffi_backend.so_UCS4 folder, as there maybe + multi-version pythons on the platform + (V1R8C10 is with its own python, but now, we don't package + python any more). + """ + try: + flagNum = int(DefaultValue.GetPythonUCS()) + # clean the old path info + g_file.removeFile(omToolsCffiPath) + g_file.removeFile(inspectToolsCffiPath) + # copy the correct version + newPythonDependCryptoPath = "%s_UCS%d_%s" % (omToolsCffiPath, + flagNum, version) + if os.path.exists(newPythonDependCryptoPath): + g_file.cpFile(newPythonDependCryptoPath, omToolsCffiPath, + "shell") + g_file.cpFile(newPythonDependCryptoPath, inspectToolsCffiPath, + "shell") + else: + newPythonDependCryptoPath = "%s_UCS%d" % (omToolsCffiPath, + flagNum) + g_file.cpFile(newPythonDependCryptoPath, omToolsCffiPath, + "shell") + g_file.cpFile(newPythonDependCryptoPath, inspectToolsCffiPath, + "shell") + except Exception as e: + print(ErrorCode.GAUSS_516["GAUSS_51632"] % + ("config depend file for paramiko 2.6.0. " + "Error:\n%s" % str(e))) + sys.exit(1) + sys.path.insert(0, os.path.join(localDir, "./../../lib")) + + @staticmethod + def getInstallDir(user): + """ + function : Get the installation directory for user + input : NA + output : String + """ + # get the installation directory for user by $GAUSSHOME + gaussHome = DefaultValue.getEnvironmentParameterValue("GAUSSHOME", + user) + return gaussHome + + @staticmethod + def getTmpDir(user, xml_path): + """ + function : Get the temporary directory for user + input : NA + output : String + """ + return dbClusterInfo.readClusterTmpMppdbPath(user, xml_path) + + @staticmethod + def getTmpDirFromEnv(user=""): + """ + function : Get the temporary directory from PGHOST + precondition: only root user or install user can call this function + input : String + output : String + """ + tmpDir = "" + if (os.getuid() == 0 and user == ""): + return tmpDir + # get the temporary directory from PGHOST + tmpDir = DefaultValue.getEnvironmentParameterValue("PGHOST", user) + return tmpDir + + @staticmethod + def getTmpFileFromEnv(fileName="", user="", desc=""): + """ + function : Get the temporary directory from PGHOST + precondition: only root user or install user can call this function + input : String + output : String + """ + tmpDir = DefaultValue.getTmpDirFromEnv(user) + + # get current time + currentTime = time.strftime("%Y-%m-%d_%H%M%S") + # split the log file by '.' + # rebuild the file name + # before rebuild: prefix.suffix + # after rebuild: prefix-currentTime-pid-desc.suffix + if fileName.find(".") >= 0: + tmpList = fileName.split(".") + prefix = tmpList[0] + suffix = tmpList[1] + if (desc == ""): + tmpFile = os.path.join(tmpDir, "%s-%s-%d.%s" % ( + prefix, currentTime, os.getpid(), suffix)) + else: + tmpFile = os.path.join(tmpDir, "%s-%s-%d-%s.%s" % ( + prefix, currentTime, os.getpid(), desc, suffix)) + else: + tmpFile = os.path.join(tmpDir, "%s-%s-%d" % (fileName, currentTime, + os.getpid())) + return tmpFile + + @staticmethod + def getTmpDirAppendMppdb(user): + """ + function : Get the user's temporary directory + input : String + output : String + """ + # get the user's temporary directory + tmpDir = DefaultValue.getTmpDirFromEnv(user) + # if the env paramter not exist, return "" + if (tmpDir == ""): + return tmpDir + # modify tmp dir + forbidenTmpDir = "/tmp/%s" % user + if (tmpDir == forbidenTmpDir): + tmpDir = os.path.join(DefaultValue.getEnv("GPHOME"), + "%s_mppdb" % user) + return tmpDir + + @staticmethod + def getUserFromXml(xml_path): + """ + function : Get the user from xml file + input : String + output : String + """ + # the function must return a value. no matter it is correct or not + try: + bin_path = dbClusterInfo.readClusterAppPath(xml_path) + DefaultValue.checkPathVaild(bin_path) + user = g_OSlib.getPathOwner(bin_path)[0] + except Exception as e: + user = "" + + return user + + @staticmethod + def getEnvironmentParameterValue(environmentParameterName, user, + env_file=None): + """ + function : Get the environment parameter value from user + input : String,String + output : String + """ + userFlag = False + cmd = "cat /etc/passwd|grep -v nologin|grep -v halt|grep -v " \ + "shutdown|" \ + "awk -F: '{ print $1 }'| grep '^%s$' 2>/dev/null" % user + status, output = subprocess.getstatusoutput(cmd) + if output and status == 0: + DefaultValue.getUserId(user) + # User exists, need to check passwd. + userFlag = True + + if userFlag and os.getuid() == 0: + # Only user with root permission need check if password must + # change. + DefaultValue.checkPasswdForceChange(user) + + if (env_file is not None): + userProfile = env_file + else: + userProfile = DefaultValue.getMpprcFile() + # buid the shell command + executeCmd = "echo $%s" % environmentParameterName + cmd = g_Platform.getExecuteCmdWithUserProfile(user, userProfile, + executeCmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + EnvValue = output.split("\n")[0] + EnvValue = EnvValue.replace("\\", "\\\\").replace('"', '\\"\\"') + DefaultValue.checkPathVaild(EnvValue) + return EnvValue + else: + return "" + + @staticmethod + def checkPasswdForceChange(checkUser): + """ + function: Check if user password is forced to change at next login. + input : user name + output: NA + """ + distname, version, currentid = g_Platform.dist() + if (distname.lower() in ("suse", "redhat", "centos", "euleros", + "openeuler")): + cmd = g_file.SHELL_CMD_DICT["checkPassword"] % (checkUser, + "'^Last.*Change'") + else: + return + (timestatus, output) = subprocess.getstatusoutput(cmd) + if (timestatus != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + if (output == ""): + return + result = output.split(":")[1].strip() + # If passwd is forced to change. Throw error code. + if (distname.lower() == "suse"): + if (version == '11'): + if ("password is forced to change at next login" in result): + raise Exception(ErrorCode.GAUSS_503["GAUSS_50307"]) + elif (version == '12'): + if ("password must be changed" in result): + raise Exception(ErrorCode.GAUSS_503["GAUSS_50307"]) + if (distname.lower() in ("redhat", "centos", "euleros", + "openeuler")): + if ("password must be changed" in result): + raise Exception(ErrorCode.GAUSS_503["GAUSS_50307"]) + + @staticmethod + def getClusterToolPath(user): + """ + function : Get the value of cluster's tool path. + The value can't be None or null + input : NA + output : String + """ + mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + echoEnvCmd = "echo $%s" % DefaultValue.TOOL_PATH_ENV + if not mpprcFile: + userpath = pwd.getpwnam(user).pw_dir + mpprcFile = os.path.join(userpath, ".bashrc") + cmd = g_Platform.getExecuteCmdWithUserProfile("", mpprcFile, + echoEnvCmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] % + DefaultValue.TOOL_PATH_ENV + + " Command:%s. Error:\n%s" % (cmd, output)) + + clusterToolPath = output.split("\n")[0] + if not clusterToolPath: + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % + DefaultValue.TOOL_PATH_ENV + "Value: %s." % + clusterToolPath) + + # Check if the path contains illegal characters + DefaultValue.checkPathVaild(clusterToolPath) + + return clusterToolPath + + @staticmethod + def getPreClusterToolPath(user, xml): + """ + function: get the cluster tool path + input : NA + output: NA + """ + try: + configedPath = DefaultValue.getOneClusterConfigItem( + "gaussdbToolPath", user, xml) + if (configedPath == ""): + configedPath = DefaultValue.CLUSTER_TOOL_PATH + DefaultValue.checkPathVaild(configedPath) + return configedPath + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def getOneClusterConfigItem(item_name, user, xml): + """ + function: get the OM log path + input : NA + output: NA + """ + try: + # set env paramter CLUSTERCONFIGFILE + os.putenv("CLUSTERCONFIGFILE", xml) + # read one cluster configuration item "cluster" + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(xml), item_name, "cluster") + if (retStatus == 0): + return os.path.normpath(retValue) + else: + return "" + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def getUserLogDirWithUser(user): + """ + function : Get the log directory from user + input : String + output : String + """ + log_path = "" + try: + log_path = DefaultValue.getEnvironmentParameterValue("GAUSSLOG", + user) + except Exception as e: + log_path = "%s/%s" % (DefaultValue.GAUSSDB_DIR, user) + return log_path + + @staticmethod + def getOMLogPath(logName, user="", appPath="", xml="", action=""): + """ + function : Get the OM log path from xml file + input : String + output : String + """ + logPath = "" + try: + if (user != "" and xml != ""): + logPath = "%s" % dbClusterInfo.readClusterLogPath(xml) + path = "%s/%s/om/%s" % (logPath, user, logName) + elif (action == "virtualip"): + path = "/var/log/gs_virtualip/%s" % (logName) + elif (user != ""): + logPath = DefaultValue.getUserLogDirWithUser(user) + path = "%s/om/%s" % (logPath, logName) + elif (appPath != ""): + user = g_OSlib.getPathOwner(appPath)[0] + if (user == ""): + user = "." + if (user == "."): + logPath = DefaultValue.GAUSSDB_DIR + else: + logPath = DefaultValue.getUserLogDirWithUser(user) + path = "%s/om/%s" % (logPath, logName) + elif (xml != ""): + try: + appPath = dbClusterInfo.readClusterAppPath(xml) + user = g_OSlib.getPathOwner(appPath)[0] + except Exception as e: + user = "." + if (user == ""): + user = "." + if (user == "."): + logPath = DefaultValue.GAUSSDB_DIR + else: + logPath = DefaultValue.getUserLogDirWithUser(user) + path = "%s/om/%s" % (logPath, logName) + else: + logPath = DefaultValue.GAUSSDB_DIR + path = "%s/om/%s" % (logPath, logName) + except Exception as e: + logPath = DefaultValue.GAUSSDB_DIR + path = "%s/om/%s" % (logPath, DefaultValue.LOCAL_LOG_FILE) + + return os.path.realpath(path) + + @staticmethod + def getBackupDir(user, subDir=""): + """ + function : Get the cluster's default backup directory for upgrade + input : String + output : String + """ + bakDir = "%s/backup" % DefaultValue.getClusterToolPath(user) + if (subDir != ""): + bakDir = os.path.join(bakDir, subDir) + + return bakDir + + @staticmethod + def getAppVersion(appPath=""): + """ + function : Get the version of application by $GAUSS_VERSION + input : String + output : String + """ + # get user and group + (user, group) = g_OSlib.getPathOwner(appPath) + if (user == "" or group == ""): + return "" + + # build shell command + # get the version of application by $GAUSS_VERSION + gaussVersion = DefaultValue.getEnvironmentParameterValue( + "GAUSS_VERSION", user) + return gaussVersion + + @staticmethod + def getUserHome(user=""): + """ + function :Get the user Home + input : String + output : String + """ + cmd = "su - %s -c \"echo ~\" 2>/dev/null" % user + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or output.strip() == "": + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + return output.strip() + + + @staticmethod + def getAppBVersion(appPath=""): + """ + function :Get the version of application by $GAUSS_VERSION + input : String + output : String + """ + # get user and group + (user, group) = g_OSlib.getPathOwner(appPath) + if (user == "" or group == ""): + return "" + # build shell command + userProfile = DefaultValue.getMpprcFile() + executeCmd = "gaussdb -V" + cmd = g_Platform.getExecuteCmdWithUserProfile(user, userProfile, + executeCmd, False) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + return "" + return output.replace('gaussdb ', '').strip() + + @staticmethod + def getOSInitFile(): + """ + function : Get the OS initialization file + input : NA + output : String + """ + distname, version, currentid = g_Platform.dist() + systemDir = "/usr/lib/systemd/system/" + systemFile = "/usr/lib/systemd/system/gs-OS-set.service" + # OS init file + # now we only support SuSE and RHEL/CentOS + initFileSuse = "/etc/init.d/boot.local" + initFileRedhat = "/etc/rc.d/rc.local" + # system init file + initSystemFile = "/usr/local/gauss/script/gauss-OS-set.sh" + initSystemPath = "/usr/local/gauss/script" + dirName = os.path.dirname(os.path.realpath(__file__)) + + # Get the startup file of suse or redhat os + if (os.path.isdir(systemDir)): + # Judge if cgroup para 'Delegate=yes' is written in systemFile + cgroup_gate = False + cgroup_gate_para = "Delegate=yes" + if os.path.exists(systemFile): + with open(systemFile, 'r') as fp: + retValue = fp.readlines() + for line in retValue: + if line.strip() == cgroup_gate_para: + cgroup_gate = True + break + + if (not os.path.exists(systemFile) or not cgroup_gate): + srcFile = "%s/../etc/conf/gs-OS-set.service" % dirName + g_file.cpFile(srcFile, systemFile) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, systemFile) + # only support RHEL/Centos/Euler + if (distname != "SuSE"): + # enable gs-OS-set.service + (status, output) = g_service.manageOSService("gs-OS-set", + "enable") + if (status != 0): + raise Exception(ErrorCode.GAUSS_508["GAUSS_50802"] % + "enable gs-OS-set" + " Error: \n%s" % + output) + + if (not os.path.exists(initSystemPath)): + g_file.createDirectory(initSystemPath) + if (not os.path.exists(initSystemFile)): + g_file.createFile(initSystemFile, False) + g_file.writeFile(initSystemFile, ["#!/bin/bash"], "w") + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, initSystemFile) + return initSystemFile + if (distname == "SuSE" and os.path.isfile(initFileSuse)): + initFile = initFileSuse + elif (distname in ("redhat", "centos", "euleros", "openEuler") and + os.path.isfile(initFileRedhat)): + initFile = initFileRedhat + else: + initFile = "" + + return initFile + + @staticmethod + def getNetworkConfiguredFile(ip): + """ + function: get network configuration file + input: ip + output: networkFile + """ + pattern = re.compile("ifcfg-.*:.*") + networkFile = "" + try: + for filename in os.listdir(DefaultValue.REDHAT_NETWORK_PATH): + result = pattern.match(filename) + if (result is None): + continue + paramfile = "%s/%s" % (DefaultValue.REDHAT_NETWORK_PATH, + filename) + with open(paramfile, "r") as fp: + fileInfo = fp.readlines() + # The current opened file is generated while configing + # virtual IP, + # there are 3 lines in file, and the second line is IPADDR=IP + if len(fileInfo) == 3 and \ + fileInfo[1].find("IPADDR=%s" % ip) >= 0: + networkFile += "%s " % paramfile + return networkFile + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + "network configuration file" + + " Error: \n%s " % str(e)) + + @staticmethod + def getMatchingResult(matchExpression, fileMatching, remoteHostName=""): + """ + """ + cmd = "%s -E '%s' %s" % (g_Platform.getGrepCmd(), + matchExpression, fileMatching) + if ("" != remoteHostName and remoteHostName != + DefaultValue.GetHostIpOrName()): + cmd = g_OSlib.getSshCommand(remoteHostName, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + return (status, output) + + @staticmethod + def preConfigFile(filename): + """ + function: pretreatment configuration file, delete the ' ' or + '\t' when they top of line + input: filename + output: NA + """ + try: + (status, output) = DefaultValue.getMatchingResult("^[ \\t]", + filename) + if (status != 0): + return + listLine = output.split('\n') + for strline in listLine: + g_file.replaceFileLineContent("^%s$" % strline, + strline.strip(), filename) + + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def getConfigFilePara(configFile, section, checkList=None, + optionsName=None): + """ + function: get the configuration file(check_list.conf) + input: section: the section in check_list.conf will be get + optionsName: the parameter list will be get, if parameter + is NULL, then get all + output: dist + """ + if checkList is None: + checkList = [] + if optionsName is None: + optionsName = [] + try: + DefaultValue.preConfigFile(configFile) + + # read the check_list.conf + data = {} + fp = configparser.RawConfigParser() + fp.read(configFile) + + # get the sections then check the section whether or not + # in check_list.conf + secs = fp.sections() + if section not in secs: + return data + + # get the parameters then check options whether or not in + # section parameters + optionList = fp.options(section) + if (len(optionsName) != 0 and optionsName not in optionList): + return data + elif (len(optionsName) != 0): + optionList = optionsName + + # get th parameter values + for key in optionList: + value = fp.get(section, key) + if (len(value.split()) == 0): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50012"] % key) + value = value.split('#')[0] + if (key in checkList and not value.isdigit()): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50003"] + % (key, "digit")) + if (section == '/etc/security/limits.conf' and not + value.isdigit() and value != 'unlimited'): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % key) + data[key] = value + + if ("vm.min_free_kbytes" in list(data.keys())): + swapTotalSize = g_memory.getMemTotalSize() // 1024 + multiple = data["vm.min_free_kbytes"].split('*')[1].split('%')[ + 0].strip() + val = int(swapTotalSize) * int(multiple) // 100 + data["vm.min_free_kbytes"] = str(val) + + return data + except Exception as e: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51234"] % configFile + + " Error: \n%s" % str(e)) + + @staticmethod + def checkInList(listsrc, listdest): + """ + function: check the listsrc element is not in listdest + input: listsrc, listdest + output: True or False + """ + if (listsrc == [] or listdest == []): + return False + + for key in listsrc: + if (key in listdest): + return True + return False + + @staticmethod + def checkSSDInstalled(): + """ + function: check SSD + input: NA + output: True/False + """ + cmd = "hio_info" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + return False + return True + + @staticmethod + def Deduplication(listname): + """ + function: Deduplication the list + input : NA + output: NA + """ + listname.sort() + for i in range(len(listname) - 2, -1, -1): + if listname.count(listname[i]) > 1: + del listname[i] + return listname + + @staticmethod + def getEnv(envparam, default_value=None): + """ + function: get the filter environment variable + input:envparam: String + default_value: String + output:envValue + """ + try: + envValue = os.getenv(envparam) + + if envValue is None: + if default_value: + return default_value + else: + return envValue + + envValue = envValue.replace("\\", "\\\\").replace('"', '\\"\\"') + + DefaultValue.checkPathVaild(envValue) + + return envValue + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def checkPathVaild(envValue): + """ + function: check path vaild + input : envValue + output: NA + """ + if (envValue.strip() == ""): + return + for rac in DefaultValue.PATH_CHECK_LIST: + flag = envValue.find(rac) + if flag >= 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % envValue + + " There are illegal characters in the path.") + + @staticmethod + def checkPasswordVaild(password, user="", clusterInfo=None): + """ + function: check password vaild + input : password + output: NA + """ + # rule1: check if the password contains illegal characters + for rac in DefaultValue.PASSWORD_CHECK_LIST: + flag = password.find(rac) + if flag >= 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + "the password" + " The password contains " + "illegal characters.") + + @staticmethod + def getPathFileOfENV(envName): + """ + function : Get the env. + input : envName + output + """ + value = DefaultValue.getEnv(envName) + if (value and not g_file.checkClusterPath(value)): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51805"] % envName + + "It may have been modified after the cluster " + "installation is complete.") + return value + + @staticmethod + def checkPackageOS(): + """ + function : get and check binary file + input : NA + output : boolean + """ + try: + (fileSHA256, sha256Value) = g_OSlib.getFileSHA256Info() + if (fileSHA256 != sha256Value): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51635"] + + "The SHA256 value is different. \nBin file: " + "%s\nSHA256 file: %s." % (fileSHA256, + sha256Value)) + return True + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def removeTmpMpp(mpprcFile): + mppTmp_rm = os.path.dirname(mpprcFile) + "/mpprcfile_tmp" + if (os.path.exists(mppTmp_rm)): + g_file.removeDirectory(mppTmp_rm) + + @staticmethod + def checkRemoteDir(g_sshTool, remoteDir, hostname, mpprcFile="", + localMode=False): + ''' + function: check the remoteDir is existing on hostname + input: remoteDir, hostname, mpprcFile + output:NA + ''' + try: + # check package dir + # package path permission can not change to 750, or it will have + # permission issue. + toolpath = remoteDir.split("/") + toolpath[0] = "/" + toolpath[0] + pathcmd = "" + for path in toolpath: + if (path == ""): + continue + cmd = g_file.SHELL_CMD_DICT["createDir"] % \ + (path, path, DefaultValue.MAX_DIRECTORY_MODE) + pathcmd += "%s; cd '%s';" % (cmd, path) + pathcmd = pathcmd[:-1] + DefaultValue.execCommandWithMode(pathcmd, + "check package directory", + g_sshTool, + localMode, + mpprcFile, + hostname) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def checkAllNodesMpprcFile(hostList, appPath, mpprcFile): + """ + function:check All Nodes MpprcFile + input: hostList, appPath, mpprcFile + output:NA + """ + # get mppfile, make sure it exists + if mpprcFile is None or mpprcFile == "/etc/profile" or mpprcFile == \ + "~/.bashrc" or \ + not os.path.exists(mpprcFile): + return + if (len(hostList) == 0): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51203"] % "hostanme") + mppTmp = os.path.dirname(mpprcFile) + "/mpprcfile_tmp" + # Clean old tmp dir + DefaultValue.removeTmpMpp(mpprcFile) + # Create tmp dir for all mppfile + g_file.createDirectory(mppTmp) + # Copy every mppfile, rename them by hostname + for host in hostList: + catCmd = "%s %s > /dev/null 2>&1" % (g_Platform.getCatCmd(), + mpprcFile) + cmd = g_OSlib.getSshCommand(host, catCmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + tmpEnv = "%s/%s_env" % (mppTmp, host) + scpCmd = g_Platform.getRemoteCopyCmd(mpprcFile, tmpEnv, host, + False) + (status, output) = subprocess.getstatusoutput(scpCmd) + DefaultValue.execCommandLocally(scpCmd) + DefaultValue.checkMpprcFileChange(tmpEnv, host, mpprcFile) + + # remove tmp dir + DefaultValue.removeTmpMpp(mpprcFile) + + @staticmethod + def checkMpprcFileChange(mpprcFile, host="local host", mpprcFile_rm=""): + """ + function:Check if mppfile has been changed + input: mppfile + output:NA + """ + # get mppfile, make sure it exists + if mpprcFile == "" or mpprcFile is None or mpprcFile == \ + "/etc/profile" or mpprcFile == "~/.bashrc" or \ + not os.path.exists(mpprcFile): + DefaultValue.removeTmpMpp(mpprcFile) + return + + if host == "" or host is None: + host = "local host" + + # read the content of mppfile + with open(mpprcFile, 'r') as fp: + mpp_content = fp.read() + env_list = mpp_content.split('\n') + while '' in env_list: + env_list.remove('') + # remove ec content from list + for env in env_list: + if re.match("^if \[ -f .*\/env_ec", env): + env_list.remove(env) + break + + # white elements + list_white = ["ELK_CONFIG_DIR", "ELK_SYSTEM_TABLESPACE", + "MPPDB_ENV_SEPARATE_PATH", "GPHOME", "PATH", + "LD_LIBRARY_PATH", "PYTHONPATH", "GAUSS_WARNING_TYPE", + "GAUSSHOME", "PATH", "LD_LIBRARY_PATH", + "S3_CLIENT_CRT_FILE", "GAUSS_VERSION", "PGHOST", + "GS_CLUSTER_NAME", "GAUSSLOG", "GAUSS_ENV", "umask"] + # black elements + list_black = ["|", ";", "&", "<", ">", "`", "\\", "!", "\n"] + + # check mpprcfile + for env in env_list: + env = env.strip() + if (env == ""): + continue + for white in list_white: + flag_white = 0 + flag = env.find(white) + if (env.startswith('export') or flag >= 0): + flag_white = 1 + break + if (flag_white == 0): + DefaultValue.removeTmpMpp(mpprcFile_rm) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % env + + " There are illegal characters in %s." % host) + for black in list_black: + flag = env.find(black) + if (flag >= 0 and env != ""): + DefaultValue.removeTmpMpp(mpprcFile_rm) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % env + + " There are illegal characters in %s." % + host) + + @staticmethod + def sourceEnvFile(file_env): + """ + """ + cmd = "%s '%s'" % (g_Platform.getSourceCmd(), file_env) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output.strip() != ""): + return (False, output) + return (True, "") + + @staticmethod + def checkEnvFile(mpprcFile="", user=""): + """ + function: check if the env file contains msg which may cause the + program failed. + input: NA + output: NA + """ + (status, output) = DefaultValue.sourceEnvFile("/etc/profile") + if (status != True): + return (False, output) + + if (mpprcFile != "" and os.path.isfile(mpprcFile)): + (status, output) = DefaultValue.sourceEnvFile(mpprcFile) + if (status != True): + return (False, output) + + if ((user != "") and (os.getuid() == 0)): + executeCmd = "%s '%s' && %s '%s'" % (g_Platform.getSourceCmd(), + "/etc/profile", + g_Platform.getSourceCmd(), + "~/.bashrc") + if (mpprcFile != ""): + remoteSourceCmd = "if [ -f '%s' ] ; then %s '%s'; fi" % \ + (mpprcFile, g_Platform.getSourceCmd(), + mpprcFile) + executeCmd = "%s && %s" % (executeCmd, remoteSourceCmd) + cmd = g_Platform.getExecuteCmdWithUserProfile(user, "~/.bashrc", + executeCmd, False) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output.strip() != ""): + return (False, output) + return (True, "") + + @staticmethod + def createPathUnderRoot(newPath, permission, user="", group=""): + """ + function: 1.create path using root user 2.modify the path permission + notice: this function only can be called by root, and user and group + should be exist + input : newPath: the path we want to create. + permission: the permission of the path. + user: the user of the created path. + group: the group of the input user. + output: NA + """ + # check if exist and create new path + ownerPath = newPath + if (not os.path.exists(ownerPath)): + ownerPath = DefaultValue.getTopPathNotExist(ownerPath) + + if (not os.path.isdir(newPath)): + g_file.createDirectory(newPath, True, permission) + g_file.changeMode(permission, ownerPath, True) + if (user != ""): + g_file.changeOwner(user, ownerPath, True) + + # check enter permission + if ((user != "") and (os.getuid() == 0)): + g_file.cdDirectory(newPath, user) + + @staticmethod + def obtainInstStr(objectList): + """ + function : Obtain the message from the objectList + input : List + output : String + """ + info = "" + if (isinstance(objectList, types.ListType)): + for obj in objectList: + info += "%s\n" % str(obj) + return info + + @staticmethod + def findUnsupportedParameters(parameterList): + """ + function : find unsupported configuration parameters, + just ignore other invalid parameters. + if don't find any unsupported configuration + parameter, return []. + input : List + output : [] + """ + # init unsupported args list + unsupportedArgs = ["support_extended_features"] + inputedUnsupportedParameters = [] + for param in parameterList: + # split it by '=' + keyValue = param.split("=") + if (len(keyValue) != 2): + continue + if (keyValue[0].strip() in unsupportedArgs): + inputedUnsupportedParameters.append(param) + + return inputedUnsupportedParameters + + @staticmethod + def judgePathUser(tempPath): + """ + function: judge the owner of path if exist + input: tempPath + output: True/False + """ + try: + tempName = pwd.getpwuid(os.stat(tempPath).st_uid).pw_name + return True + except Exception as e: + # if the user is not exist + if (str(e).find("uid not found") >= 0): + return False + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + ("the owner of %s" % tempPath) + + " Error: \n%s" % str(e)) + + @staticmethod + def checkPathandChangeOwner(onePath, user, group, permission): + """ + function: Get the owner of each layer path , if the user does not + exist and change owner + input: onePath---the specified path; user---the user of cluster; + group---the group of cluster + output: the owner of path + precondiftion: the path exists + """ + pathlist = [] + try: + if (not os.path.exists(onePath)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % onePath) + + ownerPath = onePath + while True: + # obtain the each top path + (ownerPath, dirName) = os.path.split(ownerPath) + if (os.path.exists(ownerPath) and dirName != ""): + pathlist.append(os.path.join(ownerPath, dirName)) + else: + break + + for tempPath in pathlist: + # the user does not exist + if (not DefaultValue.judgePathUser(tempPath)): + g_file.changeMode(permission, tempPath) + g_file.changeOwner(user, tempPath) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def checkOsVersion(): + """ + function : Check os version + input : NA + output : boolean + """ + # now we support this platform: + # RHEL/CentOS "6.4", "6.5", "6.6", "6.7", "6.8", "6.9", + # "7.0", "7.1", "7.2", "7.3", "7.4", "7.5"64bit + # SuSE11 sp1/2/3/4 64bit + # EulerOS '2.0'64bit + # SuSE12 sp0/1/2/3 64bit + try: + g_Platform.getCurrentPlatForm() + return True + except Exception as e: + return False + + @staticmethod + def checkPreInstallFlag(user): + """ + function : check if have called preinstall.py script + input : String + output : boolean + """ + gaussEnv = DefaultValue.getEnvironmentParameterValue("GAUSS_ENV", user) + if ("" == gaussEnv): + return False + if (str(gaussEnv) != "1" or str(gaussEnv) != "2"): + return True + else: + return False + + @staticmethod + def cleanTmpFile(path, fp=None): + """ + function : close and remove temporary file + input : String,file + output : NA + """ + if (fp): + fp.close() + if (os.path.exists(path)): + os.remove(path) + + @staticmethod + def distributeDatasourceFiles(sshTool, appPath, hostList): + """ + function : distribute datasource files of datasource.key.cipher + and datasource.key.rand to remote host + input : String,String + output : NA + """ + # init datasource file + clusterBinPath = "%s/bin" % appPath + datasourceCipherFile = "%s/datasource.key.cipher" % clusterBinPath + datasourceRandFile = "%s/datasource.key.rand" % clusterBinPath + tde_key_cipher = "%s/gs_tde_keys.cipher" % clusterBinPath + + # If the file exists. Remote copy datasource cipher file to new nodes. + if (os.path.isfile(datasourceCipherFile)): + sshTool.scpFiles(datasourceCipherFile, clusterBinPath, hostList) + cmd = g_Platform.getChmodCmd(str(DefaultValue.KEY_FILE_MODE), + datasourceCipherFile) + sshTool.executeCommand( + cmd, "change the datasource cipher file permission", + DefaultValue.SUCCESS, hostList) + # If the file exists. Remote copy datasource rand file to new nodes. + if (os.path.isfile(datasourceRandFile)): + sshTool.scpFiles(datasourceRandFile, clusterBinPath, hostList) + cmd = g_Platform.getChmodCmd(str(DefaultValue.KEY_FILE_MODE), + datasourceRandFile) + sshTool.executeCommand(cmd, "change the datasource " + "rand file permission", + DefaultValue.SUCCESS, hostList) + # If the file exists. Remote copy gs_tde_keys.cipher to new nodes. + if (os.path.isfile(tde_key_cipher)): + sshTool.scpFiles(tde_key_cipher, clusterBinPath, hostList) + cmd = g_Platform.getChmodCmd(str(DefaultValue.KEY_FILE_MODE), + tde_key_cipher) + sshTool.executeCommand(cmd, "change the gs_tde_keys.cipher " + "permission", DefaultValue.SUCCESS, + hostList) + + @staticmethod + def distributeUtilslibDir(sshTool, user, appPath, hostList): + """ + function : distribute utilslib dir to remote host + input : String,String + output : NA + """ + localHostName = DefaultValue.GetHostIpOrName() + # init utilslib dir + datasourceLibPath = "%s/utilslib" % appPath + if (os.path.exists(datasourceLibPath)): + srcPath = "'%s'/*" % datasourceLibPath + destPath = "'%s'/" % datasourceLibPath + sshTool.scpFiles(srcPath, destPath, hostList) + + # init java UDF lib dir + javaUDFLibPath = "%s/lib/postgresql/java" % appPath + if (os.path.isdir(javaUDFLibPath)): + udfFiles = g_file.getDirectoryList(javaUDFLibPath) + if (len(udfFiles) > 0): + srcPath = "'%s'/*" % javaUDFLibPath + destPath = "'%s'/" % javaUDFLibPath + sshTool.scpFiles(srcPath, destPath, hostList) + + # init postgis lib dir + fileLocation = {} + fileLocation["'%s'/lib/postgresql/" % appPath] = "postgis-*.*.so" + fileLocation["'%s'/lib/" % appPath] = \ + "(libgeos_c.so.*|libproj.so.*|libjson-c.so.*|" \ + "libgeos-*.*.*so|libstdc++.*|libgcc_s.so.*)" + fileLocation["'%s'/share/postgresql/extension/" % appPath] = \ + "(postgis--*.*.*.sql|postgis.control)" + fileLocation["'%s'/bin/" % appPath] = "(pgsql2shp|shp2pgsql|" \ + "logic_cluster_name.txt|" \ + "[a-zA-Z0-9_]{1,64}." \ + "cluster_static_config)" + fileLocation["'%s'/etc/" % appPath] = "*.gscgroup_.*.cfg" + for (gisLibPath, pattarn) in fileLocation.items(): + gisFiles = g_file.getDirectoryList(gisLibPath, pattarn) + if (len(gisFiles) > 0): + if (len(gisFiles) > 1): + srcPath = "%s/{%s}" % (gisLibPath, ",".join(gisFiles)) + else: + srcPath = "%s/%s" % (gisLibPath, gisFiles[0]) + sshTool.scpFiles(srcPath, destPath, hostList) + + @staticmethod + def distributeRackFile(sshTool, hostList): + """ + function: Distributing the rack Information File + input : NA + output: NA + """ + rack_conf_file = os.path.realpath(os.path.join( + DefaultValue.getEnv("GPHOME"), + "script/gspylib/etc/conf/rack_info.conf")) + rack_info_temp = os.path.realpath(os.path.join( + DefaultValue.getEnv("GPHOME"), + "script/gspylib/etc/conf/rack_temp.conf")) + if os.path.isfile(rack_info_temp): + shutil.move(rack_info_temp, rack_conf_file) + if os.path.isfile(rack_conf_file): + sshTool.scpFiles(rack_conf_file, rack_conf_file, hostList) + + @staticmethod + def cleanFile(fileName, hostname=""): + """ + function : remove file + input : String,hostname + output : NA + """ + fileList = fileName.split(",") + + cmd = "" + for fileName in fileList: + deleteCmd = g_file.SHELL_CMD_DICT["deleteFile"] % (fileName, + fileName) + if cmd != "": + cmd += ';%s' % deleteCmd + else: + cmd = deleteCmd + + if ("" != hostname and DefaultValue.GetHostIpOrName() != hostname): + cmd = g_OSlib.getSshCommand(hostname, cmd) + DefaultValue.execCommandLocally(cmd) + + @staticmethod + def cleanUserEnvVariable(userProfile, cleanGAUSS_WARNING_TYPE=False, + cleanGS_CLUSTER_NAME=True): + """ + function : Clean the user environment variable + input : String,boolean + output : NA + """ + try: + # check use profile + if os.path.isfile(userProfile): + # clean version + g_file.deleteLine(userProfile, "^\\s*export\\" + "s*GAUSS_VERSION=.*$") + # clean lib + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH=\\" + "$GAUSSHOME\\/lib:\\$LD_LIBRARY_PATH$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH=\\" + "$GAUSSHOME\\/lib\\/libsimsearch:\\" + "$LD_LIBRARY_PATH$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH=\\$GPHOME\\" + "/script\\/gspylib\\/clib:\\" + "$LD_LIBRARY_PATH$") + # clean bin + g_file.deleteLine(userProfile, + "^\\s*export\\s*PATH=\\$GAUSSHOME\\" + "/bin:\\$PATH$") + # clean GAUSSHOME + g_file.deleteLine(userProfile, + "^\\s*export\\s*GAUSSHOME=.*$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*PGHOST=.*$") + # clean GAUSSLOG + g_file.deleteLine(userProfile, + "^\\s*export\\s*GAUSSLOG=.*$") + # clean S3_ACCESS_KEY_ID + g_file.deleteLine(userProfile, + "^\\s*export\\s*S3_ACCESS_KEY_ID=.*$") + # clean S3_SECRET_ACCESS_KEY + g_file.deleteLine(userProfile, + "^\\s*export\\s*S3_SECRET_ACCESS_KEY=.*$") + # clean S3_CLIENT_CRT_FILE + g_file.deleteLine(userProfile, + "^\\s*export\\s*S3_CLIENT_CRT_FILE=.*$") + # clean ETCD_UNSUPPORTED_ARCH + g_file.deleteLine(userProfile, + "^\\s*export\\s*ETCD_UNSUPPORTED_ARCH=.*$") + + if (cleanGAUSS_WARNING_TYPE): + # clean extension connector environment variable + # because only deleting env_ec in postinstall, put it with + # GAUSS_WARNING_TYPE + g_file.deleteLine(userProfile, "^if \[ -f .*\/env_ec") + # clean GAUSS_WARNING_TYPE + g_file.deleteLine(userProfile, "^\\s*export\\" + "s*GAUSS_WARNING_TYPE=.*$") + + if (cleanGS_CLUSTER_NAME): + # clean GS_CLUSTER_NAME + g_file.deleteLine(userProfile, "^\\s*export\\" + "s*GS_CLUSTER_NAME=.*$") + + # clean AGENTPATH + g_file.deleteLine(userProfile, "^\\s*export\\s*AGENTPATH=.*$") + # clean AGENTLOGPATH + g_file.deleteLine(userProfile, "^\\s*export\\s*AGENTLOGPATH=" + ".*$") + # clean umask + g_file.deleteLine(userProfile, "^\\s*umask\\s*.*$") + + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def setComponentEnvVariable(userProfile, envList): + """ + funciton: Set component environment variable + input: userProfile- env file, envList - environment variable list + output: NA + """ + try: + g_file.createFileInSafeMode(userProfile) + with open(userProfile, "a") as fp: + for inst_env in envList: + fp.write(inst_env) + fp.write(os.linesep) + fp.flush() + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + userProfile + " Error: \n%s" % str(e)) + + @staticmethod + def setUserEnvVariable(userProfile, installPath, tmpPath, logPath, + agentPath, agentLogPath): + """ + function : Set the user environment variable + input : String,String,String,String,String,String + output : NA + """ + envList = ["export GAUSSHOME=%s" % installPath, \ + "export PATH=$GAUSSHOME/bin:$PATH", \ + "export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH", \ + "export S3_CLIENT_CRT_FILE=$GAUSSHOME/lib/client.crt", \ + "export GAUSS_VERSION=%s" % + VersionInfo.getPackageVersion(), \ + "export PGHOST=%s" % tmpPath, \ + "export GAUSSLOG=%s" % logPath, + "umask 077"] + if agentPath != '': + envList.append("export AGENTPATH=%s" % agentPath) + if agentLogPath != '': + envList.append("export AGENTLOGPATH=%s" % agentLogPath) + DefaultValue.setComponentEnvVariable(userProfile, envList) + + @staticmethod + def cleanComponentEnvVariable(userProfile, envNames): + """ + function : Clean the user environment variable + input : String,boolean + output : NA + """ + try: + if (os.path.exists(userProfile) and os.path.isfile(userProfile)): + for envName in envNames: + g_file.deleteLine(userProfile, "^\\s*export\\s*%s=.*$" % + envName) + if (envName == "GAUSSHOME"): + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH" + "=\\$GAUSSHOME\\/lib:" + "\\$LD_LIBRARY_PATH$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH" + "=\\$GAUSSHOME\\/add-ons:" + "\\$LD_LIBRARY_PATH$") + # clean bin + g_file.deleteLine(userProfile, + "^\\s*export\\s*PATH" + "=\\$GAUSSHOME\\/bin:\\$PATH$") + elif (envName == "CM_HOME"): + # clean cm path + g_file.deleteLine(userProfile, + "^\\s*export\\s*PATH" + "=\\$CM_HOME:\\$PATH$") + elif (envName == "ETCD_HOME"): + # clean etcd path + g_file.deleteLine( + userProfile, "^\\s*export\\s*PATH" + "=\\$ETCD_HOME\\/bin:\\$PATH$") + + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def updateUserEnvVariable(userProfile, variable, value): + """ + function : Update the user environment variable + input : String,String,String + output : NA + """ + try: + # delete old env information + deleteContent = "^\\s*export\\s*%s=.*$" % variable + g_file.deleteLine(userProfile, deleteContent) + # write the new env information into userProfile + writeContent = ['export %s=%s' % (variable, value)] + g_file.writeFile(userProfile, writeContent) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def createCADir(sshTool, caDir, hostList): + """ + function : create the dir of ca file + input : config file path and ca dir path + output : NA + """ + opensslFile = os.path.join(caDir, "openssl.cnf") + tmpFile = os.path.join(os.path.realpath( + os.path.join(caDir, "..")), "openssl.cnf") + if (not os.path.isfile(opensslFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % opensslFile) + + # not rename file, just move it out and clean the dir, then move back + cmd = g_file.SHELL_CMD_DICT["renameFile"] % (opensslFile, + opensslFile, + tmpFile) + cmd += " && " + g_file.SHELL_CMD_DICT["cleanDir"] % (caDir, + caDir, + caDir) + cmd += " && " + g_file.SHELL_CMD_DICT["renameFile"] % (tmpFile, + tmpFile, + opensslFile) + sshTool.executeCommand(cmd, "move file and clean dir", + DefaultValue.SUCCESS, hostList) + # create ./demoCA/newcerts ./demoCA/private + newcertsPath = os.path.join(caDir, "demoCA/newcerts") + g_file.createDirectory(newcertsPath) + privatePath = os.path.join(caDir, "demoCA/private") + g_file.createDirectory(privatePath) + # touch files: ./demoCA/serial ./demoCA/index.txt + serFile = os.path.join(caDir, "demoCA/serial") + g_file.createFile(serFile) + g_file.writeFile(serFile, ["01"]) + indexFile = os.path.join(caDir, "demoCA/index.txt") + g_file.createFile(indexFile) + + @staticmethod + def createServerCA(caType, caDir, logger): + """ + function : create ca file + input : ca file type and ca dir path + output : NA + """ + if (caType == DefaultValue.SERVER_CA): + logger.log("The sslcert will be generated in %s" % caDir) + randpass = DefaultValue.getRandStr() + confFile = caDir + "/openssl.cnf" + if not os.path.isfile(confFile): + raise Exception(ErrorCode.GAUSS_502 + ["GAUSS_50201"] % confFile) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl genrsa -aes256 -passout pass:%s -out " % \ + (randpass) + cmd += "demoCA/private/cakey.pem 2048" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl req -config openssl.cnf -new " + cmd += "-key demoCA/private/cakey.pem -passin pass:%s " \ + "-out " % (randpass) + cmd += "demoCA/careq.pem -subj " + cmd += "'/C=CN/ST=Beijing/L=Beijing/" + cmd += "O=huawei/OU=gauss/CN=root'" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + g_file.replaceFileLineContent("CA:FALSE", + "CA:TRUE", + confFile) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl ca -config openssl.cnf " + cmd += "-batch -passin pass:%s -out demoCA/cacert.pem " \ + "-keyfile " % (randpass) + cmd += "demoCA/private/cakey.pem " + cmd += "-selfsign -infiles demoCA/careq.pem " + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl genrsa -aes256 -passout pass:%s -out " \ + "server.key 2048" % (randpass) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl req -config openssl.cnf -new " + cmd += "-key server.key -passin pass:%s -out server.req " \ + "-subj " % (randpass) + cmd += "'/C=CN/ST=Beijing/L=Beijing/" + cmd += "O=huawei/OU=gauss/CN=server'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + g_file.replaceFileLineContent("CA:TRUE", + "CA:FALSE", + confFile) + indexAttrFile = caDir + "/demoCA/index.txt.attr" + if os.path.isfile(indexAttrFile): + g_file.replaceFileLineContent("unique_subject = yes", + "unique_subject = no", + indexAttrFile) + else: + raise Exception(ErrorCode.GAUSS_502 + ["GAUSS_50201"] % indexAttrFile) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl ca -config openssl.cnf -batch -in " + cmd += "server.req -passin pass:%s -out server.crt " \ + "-days 3650 -md sha256 -subj " % (randpass) + cmd += "'/C=CN/ST=Beijing/L=Beijing/" + cmd += "O=huawei/OU=gauss/CN=server'" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && gs_guc encrypt -M server -K %s -D ./ " % randpass + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + # client key + randpassClient = DefaultValue.getRandStr() + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl genrsa -aes256 -passout pass:%s -out " \ + "client.key 2048" % (randpassClient) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl req -config openssl.cnf " + cmd += "-new -key client.key -passin pass:%s " \ + "-out client.req -subj " % (randpassClient) + cmd += "'/C=CN/ST=Beijing/L=Beijing/" + cmd += "O=huawei/OU=gauss/CN=client'" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl ca -config openssl.cnf " + cmd += "-batch -in client.req -passin pass:%s -out " % \ + (randpass) + cmd += "client.crt -days 3650 -md sha256" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && gs_guc encrypt -M client -K %s -D ./ " % randpassClient + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl pkcs8 -topk8 -outform DER" + cmd += " -passin pass:%s " % randpassClient + cmd += " -in client.key -out client.key.pk8 -nocrypt" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + randpass = "" + randpassClient = "" + + @staticmethod + def changeOpenSslConf(confFile, hostList): + """ + function : change the openssl.cnf file + input : confFile, hostList + output : NA + """ + # Clean the old content. + lineList = g_file.readFile(confFile) + for i in range(len(lineList)): + if ("[" in lineList[i] and + "alt_names" in lineList[i] and + "]" in lineList[i]): + row = i + 1 + g_file.deleteLineByRowNum(confFile, row) + if ("DNS." in lineList[i] and "=" in lineList[i]): + g_file.deleteLineByRowNum(confFile, row) + # Add new one. + dnsList = [] + dnsList.append("\n") + dnsList.append("[ alt_names ]") + dnsList.append("DNS.1 = localhost") + cont = 2 + for host in hostList: + dns = "DNS." + str(cont) + " = " + host + dnsList.append(dns) + cont = cont + 1 + g_file.writeFile(confFile, dnsList) + + @staticmethod + def getRandStr(): + with open("/dev/random", 'rb') as fp: + srp = fp.read(4) + salt = srp.hex() + salt = "%s%s" % (salt, "aA0") + return salt + + @staticmethod + def createCA(caType, caDir): + """ + function : create ca file + input : ca file type and ca dir path + output : NA + """ + if (caType == DefaultValue.GRPC_CA): + randpass = DefaultValue.getRandStr() + confFile = caDir + "/openssl.cnf" + if (os.path.isfile(confFile)): + g_file.replaceFileLineContent("cakey.pem", + "cakeynew.pem", + confFile) + g_file.replaceFileLineContent("careq.pem", + "careqnew.pem", + confFile) + g_file.replaceFileLineContent("cacert.pem", + "cacertnew.pem", + confFile) + g_file.replaceFileLineContent("server.key", + "servernew.key", + confFile) + g_file.replaceFileLineContent("server.req", + "servernew.req", + confFile) + g_file.replaceFileLineContent("server.crt", + "servernew.crt", + confFile) + g_file.replaceFileLineContent("client.key", + "clientnew.key", + confFile) + g_file.replaceFileLineContent("client.req", + "clientnew.req", + confFile) + g_file.replaceFileLineContent("client.crt", + "clientnew.crt", + confFile) + else: + raise Exception(ErrorCode.GAUSS_502 + ["GAUSS_50201"] % confFile) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl genrsa -aes256 -passout pass:%s -out " % \ + (randpass) + cmd += "demoCA/private/cakeynew.pem 2048" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl req -config openssl.cnf -new " + cmd += "-key demoCA/private/cakeynew.pem -passin pass:%s " \ + "-out " % (randpass) + cmd += "demoCA/careqnew.pem -subj " + cmd += "'/C=CN/ST=Beijing/L=Beijing/" + cmd += "O=huawei/OU=gauss/CN=root'" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl ca -config openssl.cnf -days 7300 " + cmd += "-batch -passin pass:%s -out demoCA/cacertnew.pem " \ + "-md sha512 -keyfile " % (randpass) + cmd += "demoCA/private/cakeynew.pem " + cmd += "-selfsign -infiles demoCA/careqnew.pem " + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl genrsa -aes256 -passout pass:%s -out " \ + "servernew.key 2048" % (randpass) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl req -config openssl.cnf -new " + cmd += "-key servernew.key -passin pass:%s -out servernew.req " \ + "-subj " % (randpass) + cmd += "'/C=CN/ST=Beijing/L=Beijing/" + cmd += "O=huawei/OU=gauss/CN=root'" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + indexAttrFile = caDir + "/demoCA/index.txt.attr" + if (os.path.isfile(indexAttrFile)): + g_file.replaceFileLineContent("unique_subject = yes", + "unique_subject = no", + indexAttrFile) + else: + raise Exception(ErrorCode.GAUSS_502 + ["GAUSS_50201"] % indexAttrFile) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl ca -config openssl.cnf -batch -in " + cmd += "servernew.req -passin pass:%s -out servernew.crt " \ + "-days 7300 -md sha512" % (randpass) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl genrsa -aes256 -passout pass:%s -out " \ + "clientnew.key 2048" % (randpass) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl req -config openssl.cnf " + cmd += "-new -key clientnew.key -passin pass:%s " \ + "-out clientnew.req -subj " % (randpass) + cmd += "'/C=CN/ST=Beijing/L=Beijing/" + cmd += "O=huawei/OU=gauss/CN=root'" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && openssl ca -config openssl.cnf " + cmd += "-batch -in clientnew.req -passin pass:%s -out " % \ + (randpass) + cmd += "clientnew.crt -days 7300 -md sha512" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && gs_guc encrypt -M server -K %s -D ./ " % randpass + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + cmd = g_Platform.getCdCmd(caDir) + cmd += " && gs_guc encrypt -M client -K %s -D ./ " % randpass + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514 + ["GAUSS_51402"] + "Error:\n%s" % output) + randpass = 0 + + @staticmethod + def cleanServerCaDir(caDir): + """ + function : clean the dir of ca file and change mode of ca files + input : ca dir path + output : NA + """ + certFile = caDir + "/demoCA/cacert.pem" + if os.path.exists(certFile): + g_file.moveFile(certFile, caDir) + clientReq = caDir + "/server.req" + g_file.removeFile(clientReq) + clientReq = caDir + "/client.req" + g_file.removeFile(clientReq) + demoCA = caDir + "/demoCA" + g_file.removeDirectory(demoCA) + allCerts = caDir + "/*" + g_file.changeMode(DefaultValue.KEY_FILE_MODE, allCerts) + + @staticmethod + def cleanCaDir(caDir): + """ + function : clean the dir of ca file and change mode of ca files + input : ca dir path + output : NA + """ + certFile = caDir + "/demoCA/cacertnew.pem" + if os.path.exists(certFile): + g_file.moveFile(certFile, caDir) + clientReq = caDir + "/clientnew.req" + g_file.removeFile(clientReq) + clientReq = caDir + "/servernew.req" + g_file.removeFile(clientReq) + demoCA = caDir + "/demoCA" + g_file.removeDirectory(demoCA) + allCerts = caDir + "/*" + g_file.changeMode(DefaultValue.KEY_FILE_MODE, allCerts) + + @staticmethod + def modifyFileOwner(user, currentfile): + """ + function : Modify the file's owner + input : String,String + output : String + """ + # only root user can run this function + if (os.getuid() == 0): + try: + group = g_OSlib.getGroupByUser(user) + except Exception as e: + raise Exception(str(e)) + if os.path.exists(currentfile): + g_file.changeOwner(user, currentfile) + + @staticmethod + def modifyFileOwnerFromGPHOME(currentfile): + """ + function : Modify the file's owner to the GPHOME's user + input : String,String + output : String + """ + GPHOME = DefaultValue.getEnv(DefaultValue.TOOL_PATH_ENV) + if not GPHOME: + raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] % "GPHOME") + (user, group) = g_OSlib.getPathOwner(GPHOME) + if (user == "" or group == ""): + raise Exception(ErrorCode.GAUSS_503["GAUSS_50308"]) + DefaultValue.modifyFileOwner(user, currentfile) + + @staticmethod + def obtainSSDDevice(): + """ + function : Obtain the SSD device + input : NA + output : [] + """ + devList = [] + cmd = "ls -ll /dev/hio? | awk '{print $10}'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output.find("No such file or directory") < 0): + devList = output.split("\n") + else: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53005"] + + " Command:%s. Error:\n%s" % (cmd, output)) + return devList + + @staticmethod + def checkOutputFile(outputFile): + """ + function : check the output file + input : String + output : NA + """ + if (os.path.isdir(outputFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % "output file") + # get parent directory of output file + parent_dir = os.path.dirname(outputFile) + if (os.path.isfile(parent_dir)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50211"] % + "base directory of output file") + + @staticmethod + def getAllIP(g_dbNodes): + """ + function : Get all node IP + input : list + output : list + """ + allIP = [] + for dbNode in g_dbNodes: + allIP += dbNode.backIps + allIP += dbNode.sshIps + for dbInstance in dbNode.cmservers: + allIP += dbInstance.haIps + allIP += dbInstance.listenIps + for dbInstance in dbNode.coordinators: + allIP += dbInstance.haIps + allIP += dbInstance.listenIps + for dbInstance in dbNode.datanodes: + allIP += dbInstance.haIps + allIP += dbInstance.listenIps + for dbInstance in dbNode.gtms: + allIP += dbInstance.haIps + allIP += dbInstance.listenIps + for etcdInst in dbNode.etcds: + allIP += etcdInst.haIps + allIP += etcdInst.listenIps + + return allIP + + @staticmethod + def KillAllProcess(userName, procName): + """ + function : Kill all processes by userName and procName. + input : userName, procName + output : boolean + """ + return g_OSlib.killallProcess(userName, procName, "9") + + @staticmethod + def sendNetworkCmd(ip): + """ + function : Send the network command of ping. + input : String + output : NA + """ + cmd = "%s |%s ttl |%s -l" % (g_Platform.getPingCmd(ip, "5", "1"), + g_Platform.getGrepCmd(), + g_Platform.getWcCmd()) + (status, output) = subprocess.getstatusoutput(cmd) + if (str(output) == '0' or status != 0): + g_lock.acquire() + noPassIPs.append(ip) + g_lock.release() + + @staticmethod + def checkIsPing(ips): + """ + function : Check the connection status of network. + input : [] + output : [] + """ + global noPassIPs + noPassIPs = [] + results = parallelTool.parallelExecute(DefaultValue.sendNetworkCmd, + ips) + return noPassIPs + + @staticmethod + def retryGetstatusoutput(cmd, retryTime=3, sleepTime=1): + """ + function : retry getStatusoutput + @param cmd: command going to be execute + @param retryTime: default retry 3 times after execution failure + @param sleepTime: default sleep 1 second then start retry + """ + retryTime += 1 + for i in range(retryTime): + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + time.sleep(sleepTime) + else: + break + return status, output + + @staticmethod + def killInstProcessCmd(instName, isRemote=False, signal=9, + isExactMatch=True, instType="", + procAbsPath="", instDir=""): + """ + instName: process name + isRemote: do it under remote machine. default is false + signal : kill signle. default is 9 + isExactMatch: the match rule. default is exact match + instType: instance type. default is "", now only support for get + coordinator instance + procAbsPath: process abs path. default is "" + instDir: instance data directory. default is "" + """ + pstree = "python3 %s -sc" % os.path.realpath(os.path.dirname( + os.path.realpath(__file__)) + "/../../py_pstree.py") + # only cm_server need kill all child process, when do kill -9 + if instName == "cm_server" and signal == 9: + if isRemote: + cmd = "pidList=\`ps ux | grep '\' | grep -v " \ + "'grep' " \ + "| awk '{print \$2}' | xargs \`; for pid in \$pidList;" \ + " do %s \$pid | xargs -r -n 100 kill -9; echo " \ + "'SUCCESS'; " \ + "done" % pstree + # only try to kill -9 process of cmserver + cmd += "; ps ux | grep '\' | grep -v grep | awk " \ + "'{print \$2}' | xargs -r kill -9; echo 'SUCCESS'" + else: + cmd = "pidList=`ps ux | grep '\' | grep -v " \ + "'grep' |" \ + " awk '{print $2}' | xargs `; for pid in $pidList; " \ + "do %s $pid | xargs -r -n 100 kill -9; echo 'SUCCESS';" \ + " done" % pstree + cmd += "; ps ux | grep '\' | grep -v grep | " \ + "awk '{print $2}' | xargs -r kill -9; echo 'SUCCESS'" + return cmd + + if "" != instType and "" != procAbsPath and "" != instDir: + if isRemote: + cmd = "ps ux | grep '\<%s\>' | grep '%s' | grep '%s' | " \ + "grep -v grep | awk '{print \$2}' | xargs -r kill -%d " \ + "" % \ + (instType, procAbsPath, instDir, signal) + else: + cmd = "ps ux | grep '\<%s\>' | grep '%s' | grep '%s' | " \ + "grep -v grep | awk '{print $2}' | xargs -r kill -%d " \ + % \ + (instType, procAbsPath, instDir, signal) + else: + if (isExactMatch): + if (isRemote): + cmd = "ps ux | grep '\<%s\>' | grep -v grep | awk " \ + "'{print \$2}' | xargs -r kill -%d " % (instName, + signal) + else: + cmd = "ps ux | grep '\<%s\>' | grep -v grep | awk " \ + "'{print $2}' | xargs -r kill -%d " % (instName, + signal) + else: + if (isRemote): + cmd = "ps ux | grep '%s' | grep -v grep | awk " \ + "'{print \$2}' | xargs -r kill -%d " % (instName, + signal) + else: + cmd = "ps ux | grep '%s' | grep -v grep | " \ + "awk '{print $2}' | xargs -r kill -%d " % (instName, + signal) + return cmd + + @staticmethod + def getRuningInstNum(procAbsPath, instDir=""): + """ + """ + if (instDir): + cmd = "ps ux | grep '%s' | grep '%s' | grep -v grep | wc -l" % \ + (procAbsPath, instDir) + else: + cmd = "ps ux | grep '%s' | grep -v grep | wc -l" % (procAbsPath) + return cmd + + @staticmethod + def killCmserverProcess(sshTool, cmsInsts): + # Restart the instance CMSERVERS + failedNodes = [] + if (len(cmsInsts) == 1 and cmsInsts[0].hostname == + DefaultValue.GetHostIpOrName()): + cmd = DefaultValue.killInstProcessCmd("cm_server", False, 1) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % output) + else: + cmd = DefaultValue.killInstProcessCmd("cm_server", True, 1) + (status, output) = sshTool.getSshStatusOutput( + cmd, [cmsInst.hostname for cmsInst in cmsInsts]) + for cmNodeName in status.keys(): + if (status[cmNodeName] != DefaultValue.SUCCESS): + failedNodes.append(cmNodeName) + + # judge failed nodes + if (len(failedNodes)): + time.sleep(1) + (status, output) = sshTool.getSshStatusOutput(cmd, failedNodes) + for cmNodeName in failedNodes: + if (status[cmNodeName] != DefaultValue.SUCCESS): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Error:\n%s" % output) + + time.sleep(10) + + @staticmethod + def getParaValueFromConfigFile(paraList, instList, instType="cm_server"): + """ + function : Get guc parameter from config file for cm_server or gtm. + input : paraList, instList, instType + output : paraMap + """ + paraMap = {} + for para in paraList: + for inst in instList: + configPath = os.path.join(inst.datadir, "%s.conf" % instType) + (status, output) = DefaultValue.getMatchingResult( + "\<'%s'\>" % para, configPath, inst.hostname) + if (status != 0 and status != 256): + if (instType == "gtm"): + output = "" + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + configPath + " Error:%s." % output) + configValue = "" + for line in output.split('\n'): + confInfo = line.strip() + if (confInfo.startswith('#') or confInfo == ""): + continue + elif (confInfo.startswith(para)): + configValue = \ + confInfo.split('#')[0].split('=')[ + 1].strip().lower() + if (paraMap.__contains__(para) and paraMap[para] != + configValue): + raise Exception( + ErrorCode.GAUSS_530["GAUSS_53011"] % + "Parameter '%s', it is different in " + "same level instance." % para) + paraMap[para] = configValue + break + return paraMap + + @staticmethod + def retry_gs_guc(cmd): + """ + function : Retry 3 times when HINT error + input : cmd + output : NA + """ + retryTimes = 0 + while True: + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + break + if (retryTimes > 1): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50008"] + + " Command:%s. Error:\n%s" % (cmd, output)) + retryTimes = retryTimes + 1 + time.sleep(3) + + @staticmethod + def distributePackagesToRemote(g_sshTool, srcPackageDir, destPackageDir, + hostname=None, mpprcFile="", + clusterInfo=None): + ''' + function: distribute the package to remote nodes + input: g_sshTool, hostname, srcPackageDir, destPackageDir, mpprcFile, + clusterType + output:NA + ''' + if hostname is None: + hostname = [] + try: + # check the destPackageDir is existing on hostname + DefaultValue.checkRemoteDir(g_sshTool, destPackageDir, hostname, + mpprcFile) + + # Send compressed package to every host + g_sshTool.scpFiles("%s/%s" % ( + srcPackageDir, DefaultValue.get_package_back_name()), + destPackageDir, hostname, mpprcFile) + # Decompress package on every host + srcPackage = "'%s'/'%s'" % (destPackageDir, + DefaultValue.get_package_back_name()) + cmd = g_Platform.getDecompressFilesCmd(srcPackage, destPackageDir) + g_sshTool.executeCommand(cmd, "extract %s server package" % + VersionInfo.PRODUCT_NAME, + DefaultValue.SUCCESS, hostname, mpprcFile) + + # change owner and mode of packages + destPath = "'%s'/*" % destPackageDir + cmd = g_Platform.getChmodCmd(str(DefaultValue.MAX_DIRECTORY_MODE), + destPath, True) + g_sshTool.executeCommand(cmd, "change permission", + DefaultValue.SUCCESS, hostname, mpprcFile) + + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def distributeTransEncryptFile(appPath, sshTool, hostList): + ''' + function: Distribute trans encrypt file to the node of hostList + input : appPath, sshTool, hostList + output: NA + ''' + try: + installBinPath = "%s/bin" % appPath + transEncryptKeyCipher = "%s/trans_encrypt.key.cipher" % \ + installBinPath + transEncryptKeyRand = "%s/trans_encrypt.key.rand" % installBinPath + transEncryptKeyAkSk = "%s/trans_encrypt_ak_sk.key" % installBinPath + + if (os.path.exists(transEncryptKeyCipher)): + # MIN_FILE_MODE can not be scp, so expand the permission. + cmd = g_Platform.getChmodCmd( + str(DefaultValue.KEY_DIRECTORY_MODE), + transEncryptKeyCipher) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + sshTool.scpFiles(transEncryptKeyCipher, installBinPath, + hostList) + cmd = g_Platform.getChmodCmd( + str(DefaultValue.MIN_FILE_MODE), transEncryptKeyCipher) + sshTool.executeCommand( + cmd, "change permission", DefaultValue.SUCCESS, hostList) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + + if (os.path.exists(transEncryptKeyRand)): + # MIN_FILE_MODE can not be scp, so expand the permission. + cmd = g_Platform.getChmodCmd( + str(DefaultValue.KEY_DIRECTORY_MODE), transEncryptKeyRand) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + sshTool.scpFiles(transEncryptKeyRand, installBinPath, hostList) + cmd = g_Platform.getChmodCmd(str(DefaultValue.MIN_FILE_MODE), + transEncryptKeyRand) + sshTool.executeCommand( + cmd, "change permission", DefaultValue.SUCCESS, hostList) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + + if (os.path.exists(transEncryptKeyAkSk)): + # MIN_FILE_MODE can not be scp, so expand the permission. + cmd = g_Platform.getChmodCmd( + str(DefaultValue.KEY_DIRECTORY_MODE), transEncryptKeyAkSk) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + sshTool.scpFiles(transEncryptKeyAkSk, installBinPath, hostList) + cmd = g_Platform.getChmodCmd(str(DefaultValue.MIN_FILE_MODE), + transEncryptKeyAkSk) + sshTool.executeCommand(cmd, "change permission", + DefaultValue.SUCCESS, hostList) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def distributeXmlConfFile(g_sshTool, confFile, hostname=None, + mpprcFile="", localMode=False): + ''' + function: distribute the confFile to remote nodes + input: g_sshTool, hostname, confFile, mpprcFile + output:NA + ''' + if hostname is None: + hostname = [] + try: + # distribute xml file + # check and create xml file path + xmlDir = os.path.dirname(confFile) + xmlDir = os.path.normpath(xmlDir) + DefaultValue.checkRemoteDir(g_sshTool, xmlDir, hostname, mpprcFile, + localMode) + local_node = DefaultValue.GetHostIpOrName() + # Skip local file overwriting + if not hostname: + hostname = g_sshTool.hostNames[:] + if local_node in hostname: + hostname.remove(local_node) + if (not localMode): + # Send xml file to every host + g_sshTool.scpFiles(confFile, xmlDir, hostname, mpprcFile) + # change owner and mode of xml file + cmd = g_Platform.getChmodCmd(str(DefaultValue.FILE_MODE), confFile) + DefaultValue.execCommandWithMode(cmd, + "change permission", + g_sshTool, + localMode, + mpprcFile, + hostname) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def cleanFileDir(dirName, g_sshTool=None, hostname=None): + ''' + function: clean directory or file + input: dirName, g_sshTool, hostname + output:NA + ''' + if hostname is None: + hostname = [] + try: + cmd = g_file.SHELL_CMD_DICT["deleteDir"] % (dirName, dirName) + # If clean file or directory on local node + if (g_sshTool is None): + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + else: + # Assign some remote node to clean directory or file. + if hostname == []: + g_sshTool.executeCommand(cmd, "clean directory or file ") + else: + g_sshTool.executeCommand(cmd, "clean directory or file ", + DefaultValue.SUCCESS, hostname) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def execCommandLocally(cmd): + """ + functino: exec only on local node + input: cmd + output: NA + """ + # exec the cmd + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then raise + if (status != 0 and "[GAUSS-5" in str(output)): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + elif (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % str(cmd) + + " Error: \n%s" % str(output)) + + @staticmethod + def execCommandWithMode(cmd, descript, g_sshTool, localMode=False, + mpprcFile='', hostList=None): + """ + function: check the mode, if local mode, exec only on local node, + else exec on all nodes + input: cmd, decript, g_sshTool, localMode, mpprcFile + output: NA + """ + if hostList is None: + hostList = [] + # check the localMode + if localMode: + # localMode + DefaultValue.execCommandLocally(cmd) + else: + # Non-native mode + g_sshTool.executeCommand(cmd, descript, DefaultValue.SUCCESS, + hostList, mpprcFile) + + @staticmethod + def getDevices(): + """ + functino: get device + input: NA + output: NA + """ + cmd = "fdisk -l 2>/dev/null | grep \"Disk /dev/\" | " \ + "grep -Ev \"/dev/mapper/|loop\" | awk '{ print $2 }' | " \ + "awk -F'/' '{ print $NF }' | sed s/:$//g" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + devList = output.split('\n') + return devList + + @staticmethod + def copyCAfile(sshTool, hostList): + """ + functino: copy CA file + input: NA + output: NA + """ + try: + user = pwd.getpwuid(os.getuid()).pw_name + gaussHome = DefaultValue.getInstallDir(user) + sslpath = "%s/share/sslcert/etcd/" % gaussHome + caKeyFile = "%s/ca.key" % sslpath + caCrtFile = "%s/etcdca.crt" % sslpath + clientKeyFile = "%s/client.key" % sslpath + clientCrtFile = "%s/client.crt" % sslpath + etcdKeyRand = "%s/etcd.key.rand" % sslpath + etcdKeyCipher = "%s/etcd.key.cipher" % sslpath + clientKeyRand = "%s/client.key.rand" % sslpath + clientKeyCipher = "%s/client.key.cipher" % sslpath + + if (os.path.exists(caKeyFile)): + mkdirCmd = g_Platform.getMakeDirCmd(sslpath, True) + changModeCmd = g_Platform.getChmodCmd( + str(DefaultValue.KEY_DIRECTORY_MODE), sslpath) + cmd = "%s && %s" % (mkdirCmd, changModeCmd) + sshTool.executeCommand(cmd, "create CA path", + DefaultValue.SUCCESS, hostList) + sshTool.scpFiles(caKeyFile, sslpath, hostList) + sshTool.scpFiles(caCrtFile, sslpath, hostList) + sshTool.scpFiles(clientKeyFile, sslpath, hostList) + sshTool.scpFiles(clientCrtFile, sslpath, hostList) + sshTool.scpFiles(etcdKeyRand, sslpath, hostList) + sshTool.scpFiles(etcdKeyCipher, sslpath, hostList) + sshTool.scpFiles(clientKeyRand, sslpath, hostList) + sshTool.scpFiles(clientKeyCipher, sslpath, hostList) + cmd = g_Platform.getChmodCmd(str(DefaultValue.KEY_FILE_MODE), + "%s %s" % (caKeyFile, caCrtFile)) + sshTool.executeCommand(cmd, "change permission", + DefaultValue.SUCCESS, hostList) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def genCert(nodeip, etcddir, remoteip=""): + """ + function: generate a certificate file for ETCD + input : nodeip:backip, etcddir: the dir of etcd, remoteip:sship + output: NA + """ + try: + user = pwd.getpwuid(os.getuid()).pw_name + ###############1.Save the openssl.cnf under + # $GAUSSHOME/share/sslcert/etcd + gaussHome = DefaultValue.getInstallDir(user) + sslpath = "%s/share/sslcert/etcd" % gaussHome + sslcfg = "%s/openssl.cnf" % sslpath + tmp_Dir = "%s/demoCA" % sslpath + etcdKeyFile = "%s/etcd.key" % sslpath + etcdCsrFile = "%s/etcd.csr" % sslpath + etcdCrtFile = "%s/etcd.crt" % sslpath + etcdKeyRand = "%s/etcd.key.rand" % sslpath + etcdKeyCipher = "%s/etcd.key.cipher" % sslpath + + ###############2.clean file + DefaultValue.cleanFileDir(tmp_Dir) + + ###############3.generate server certificate and sign it + # create directory and copy files + ###############3.generate server certificate and sign it + randpass = DefaultValue.aes_cbc_decrypt_with_path(sslpath) + cmd = "%s" % g_Platform.getCdCmd(sslpath) + # Create paths and files + cmd += " && %s" % g_Platform.getMakeDirCmd("demoCA/newcerts", True) + cmd += " && %s" % g_Platform.getMakeDirCmd("demoCA/private", True) + cmd += " && %s" % g_Platform.getChmodCmd( + str(DefaultValue.KEY_DIRECTORY_MODE), "demoCA/newcerts") + cmd += " && %s" % g_Platform.getChmodCmd( + str(DefaultValue.KEY_DIRECTORY_MODE), "demoCA/private") + cmd += " && %s" % g_Platform.getTouchCmd("demoCA/index.txt") + cmd += " && echo '01' > demoCA/serial" + cmd += " && export SAN=\"IP:%s\"" % nodeip + cmd += " && %s " % g_Platform.getCopyCmd("ca.key", + "demoCA/private/") + cmd += " && %s " % g_Platform.getCopyCmd("etcdca.crt", "demoCA/") + + cmd += " && openssl req -config '%s' -newkey rsa:4096 -keyout " \ + "'%s' -passout pass:%s -out '%s' -subj '/CN=cn'" % \ + (sslcfg, etcdKeyFile, randpass, etcdCsrFile) + cmd += " && %s" % g_Platform.getCdCmd("demoCA") + cmd += " && openssl ca -startdate 200101000000Z -config " \ + "'%s' -extensions etcd_server -batch -keyfile " \ + "'%s/demoCA/private/ca.key' -passin pass:%s -cert " \ + "'%s/demoCA/etcdca.crt' -out '%s' -infiles '%s'" % \ + (sslcfg, sslpath, randpass, sslpath, etcdCrtFile, + etcdCsrFile) + cmd += " && cd ../ && find . -type f | xargs chmod %s" % \ + DefaultValue.KEY_FILE_MODE + DefaultValue.execCommandLocally(cmd) + ############4.copy etcd.srt to the ETCD directory + # copy the file to the ETCD directory + etcddir = "%s/" % etcddir + if (remoteip): + g_OSlib.scpFile(remoteip, etcdKeyFile, etcddir) + g_OSlib.scpFile(remoteip, etcdCrtFile, etcddir) + g_OSlib.scpFile(remoteip, etcdKeyRand, etcddir) + g_OSlib.scpFile(remoteip, etcdKeyCipher, etcddir) + else: + g_file.cpFile(etcdKeyFile, etcddir) + g_file.cpFile(etcdCrtFile, etcddir) + g_file.cpFile(etcdKeyRand, etcddir) + g_file.cpFile(etcdKeyCipher, etcddir) + cmd = "unset SAN" + DefaultValue.execCommandLocally(cmd) + DefaultValue.cleanFileDir(tmp_Dir) + except Exception as e: + DefaultValue.cleanFileDir(tmp_Dir) + raise Exception(str(e)) + + @staticmethod + def replaceCertFilesToRemoteNode(cnNodeName, instanceList, cnInstDir): + """ + function: This method is for replace SSL cert files + input : cnNodeName, instanceList, cnInstDir + output: NA + """ + fileList = DefaultValue.CERT_ROLLBACK_LIST[:] + for file_inx in range(len(fileList)): + fileList[file_inx] = os.path.join(cnInstDir, fileList[file_inx]) + + # copy encrypt file to host + for instInfo in instanceList: + for certfile in fileList: + # scp certfile from cnNodeName to instInfo.hostname + sshCmd = g_Platform.getSshCmd(cnNodeName) + scpCmd = g_Platform.getRemoteCopyCmd(certfile, "%s/" % + instInfo.datadir, + instInfo.hostname, + otherHost=cnNodeName) + cmd = "%s \"if [ -f '%s' ]; then %s; fi\"" % (sshCmd, certfile, + scpCmd) + DefaultValue.execCommandLocally(cmd) + # change the certfile under instInfo.hostname + sshCmd = g_Platform.getSshCmd(instInfo.hostname) + chmodCmd = g_Platform.getChmodCmd( + str(DefaultValue.KEY_FILE_MODE), certfile) + cmd = "%s \"if [ -f '%s' ]; then %s; fi\"" % (sshCmd, certfile, + chmodCmd) + DefaultValue.execCommandLocally(cmd) + + @staticmethod + def getSecurityMode(): + """ + function:to set security mode,if security_mode is not in config + file,return off. + input:String + output:String + """ + securityModeValue = "off" + try: + cmd = "ps -ux | grep \"\\-\\-securitymode\" | grep -v \"grep\"" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 and output != "": + raise Exception( + (ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error: \n %s" % output)) + if output != "": + securityModeValue = "on" + return securityModeValue + except Exception as ex: + raise Exception(str(ex)) + + @staticmethod + def syncDependLibsAndEtcFiles(sshTool, nodeName): + """ + function: Distribute etc file and libsimsearch libs to new node + input : NA + output: NA + """ + try: + # distribute etc file to new node + gaussHome = DefaultValue.getEnv("GAUSSHOME") + + searchConfigFile = "%s/etc/searchletConfig.yaml" % gaussHome + searchIniFile = "%s/etc/searchServer.ini" % gaussHome + if (os.path.exists(searchConfigFile)): + sshTool.scpFiles(searchConfigFile, searchConfigFile, nodeName) + if (os.path.exists(searchIniFile)): + sshTool.scpFiles(searchIniFile, searchIniFile, nodeName) + + # distribute libsimsearch libs to new node + libPath = "%s/lib" % gaussHome + libsimsearchPath = "%s/libsimsearch" % libPath + if (not os.path.isdir(libsimsearchPath)): + return + + for node in nodeName: + cmd = "pscp -H %s '%s' '%s' " % (node, libsimsearchPath, + libPath) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50214"] % cmd + + " Error: \n%s" % str(output)) + + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def checkTransactionReadonly(user, DbclusterInfo, normalCNList=None): + """ + function : check the CN's parameter default_transaction_read_only is on + if eques on, return 1 and error info + input : user, DbclusterInfo, normalCNList + output : 0/1 + """ + cnList = [] + if normalCNList is None: + normalCNList = [] + localhost = DefaultValue.GetHostIpOrName() + sql = "show default_transaction_read_only;" + try: + if (len(normalCNList)): + cnList = normalCNList + else: + # Find CN instance in cluster + for dbNode in DbclusterInfo.dbNodes: + if (len(dbNode.coordinators) != 0): + cnList.append(dbNode.coordinators[0]) + + nodeInfo = DbclusterInfo.getDbNodeByName( + DefaultValue.GetHostIpOrName()) + security_mode_value = DefaultValue.getSecurityMode() + # Execute sql on every CN instance + if (security_mode_value == "on"): + for cooInst in cnList: + if (localhost == cooInst.hostname): + (status, result, error_output) = \ + ClusterCommand.excuteSqlOnLocalhost(cooInst.port, + sql) + if (status != 2): + return 1, "[%s]: Error: %s result: %s status: " \ + "%s" % \ + (cooInst.hostname, error_output, + result, status) + if (result[0][0].strip().lower() == "on"): + return 1, "The database is in read only mode." + else: + currentTime = time.strftime("%Y-%m-%d_%H:%M:%S") + pid = os.getpid() + outputfile = "metadata_%s_%s_%s.json" % ( + cooInst.hostname, pid, currentTime) + tmpDir = DefaultValue.getTmpDirFromEnv() + filepath = os.path.join(tmpDir, outputfile) + ClusterCommand.executeSQLOnRemoteHost(cooInst.hostname, + cooInst.port, + sql, + filepath) + (status, result, error_output) = \ + ClusterCommand.getSQLResult(cooInst.hostname, + outputfile) + if (status != 2): + return 1, "[%s]: Error: %s result: %s status: " \ + "%s" % \ + (cooInst.hostname, error_output, result, + status) + if (result[0][0].strip().lower() == "on"): + return 1, "The database is in read only mode." + else: + for cooInst in cnList: + (status, output) = ClusterCommand.remoteSQLCommand( + sql, user, cooInst.hostname, cooInst.port) + resList = output.split('\n') + if (status != 0 or len(resList) < 1): + return 1, "[%s]: %s" % (cooInst.hostname, output) + if (resList[0].strip() == "on"): + return 1, "The database is in read only mode." + return 0, "success" + except Exception as e: + return 1, str(e) + + @staticmethod + def makeCompressedToolPackage(packageDir): + """ + function : check the output file + input : String + output : NA + """ + # init bin file name, integrity file name and tar list names + packageDir = os.path.normpath(packageDir) + bz2FileName = g_OSlib.getBz2FilePath() + integrityFileName = g_OSlib.getSHA256FilePath() + + tarLists = "--exclude=script/*.log --exclude=*.log script " \ + "version.cfg lib" + upgrade_sql_file_path = os.path.join(packageDir, + Const.UPGRADE_SQL_FILE) + if os.path.exists(upgrade_sql_file_path): + tarLists += " %s %s" % (Const.UPGRADE_SQL_SHA, + Const.UPGRADE_SQL_FILE) + if "HOST_IP" in os.environ.keys(): + tarLists += " cluster_default_agent.xml" + try: + # make compressed tool package + cmd = "%s && " % g_Platform.getCdCmd(packageDir) + # do not tar *.log files + cmd += g_Platform.getCompressFilesCmd( + DefaultValue.get_package_back_name(), tarLists) + cmd += " %s %s " % (os.path.basename(bz2FileName), + os.path.basename(integrityFileName)) + cmd += "&& %s " % g_Platform.getChmodCmd( + str(DefaultValue.KEY_FILE_MODE), + DefaultValue.get_package_back_name()) + cmd += "&& %s " % g_Platform.getCdCmd("-") + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def getCpuSet(): + """ + function: get cpu set of current board + cat /proc/cpuinfo |grep processor + input: NA + output: cpuSet + """ + # do this function to get the parallel number + cpuSet = multiprocessing.cpu_count() + if (cpuSet > 1): + return cpuSet + else: + return DefaultValue.DEFAULT_PARALLEL_NUM + + @staticmethod + def getTopPathNotExist(topDirPath): + """ + function : Get the top path if exist + input : String + output : String + """ + tmpDir = topDirPath + while True: + # find the top path to be created + (tmpDir, topDirName) = os.path.split(tmpDir) + if os.path.exists(tmpDir) or topDirName == "": + tmpDir = os.path.join(tmpDir, topDirName) + break + return tmpDir + + @staticmethod + def checkSHA256(binFile, sha256File): + """ + """ + if binFile == "": + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % "bin file") + if sha256File == "": + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % "verification file") + + sha256Obj = hashlib.sha256() + if not sha256Obj: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50238"] % + binFile + "can not get verification Obj.") + with open(binFile, "rb") as filebin: + while True: + strRead = filebin.read(8096) + if not strRead: + break + sha256Obj.update(strRead) + strSHA256 = sha256Obj.hexdigest() + with open(sha256File, "r") as fileSHA256: + strRead = fileSHA256.readline() + oldSHA256 = strRead.strip() + if strSHA256 != oldSHA256: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50238"] % binFile) + + @staticmethod + def checkDirSize(path, needSize, g_logger): + """ + function: Check the size of directory + input : path,needSize + output: NA + """ + # The file system of directory + diskSizeInfo = {} + dfCmd = "%s | head -2 |tail -1 | %s -F\" \" '{print $1}'" % \ + (g_Platform.getDiskFreeCmd(path), g_Platform.getAwkCmd()) + (status, output) = subprocess.getstatusoutput(dfCmd) + if (status != 0): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50219"] % + "the system file directory" + + " Command:%s. Error:\n%s" % (dfCmd, output)) + + fileSysName = str(output) + diskSize = diskSizeInfo.get(fileSysName) + if (diskSize is None): + vfs = os.statvfs(path) + diskSize = vfs.f_bavail * vfs.f_bsize // (1024 * 1024) + diskSizeInfo[fileSysName] = diskSize + + # 200M for a instance needSize is 200M + if (diskSize < needSize): + g_logger.logExit(ErrorCode.GAUSS_504["GAUSS_50400"] % (fileSysName, + needSize)) + + diskSizeInfo[fileSysName] -= needSize + return diskSizeInfo + + @staticmethod + def kill_process(process_name): + """ + function: kill process + input : NA + output: NA + """ + dfCmd = DefaultValue.killInstProcessCmd(process_name, False, 9, False) + DefaultValue.execCommandLocally(dfCmd) + + @staticmethod + def updateRemoteUserEnvVariable(userProfile, variable, value, ssh_tool, + hostnames=None): + """ + function : Update remote user environment variable + input : String,String,String + output : NA + """ + cmd = "sed -i '\\\/^\\\s*export\\\s*%s=.*$/d' %s;" % (variable, + userProfile) + cmd += 'echo \\\"export %s=%s\\\" >> %s' % (variable, value, + userProfile) + if hostnames and isinstance(hostnames, list): + ssh_tool.executeCommand(cmd, "", DefaultValue.SUCCESS, hostnames) + elif hostnames: + raise Exception("updateRomoteUserEnvVariable: %s" % ( + ErrorCode.GAUSS_500["GAUSS_50003"] % (hostnames, "list"))) + else: + ssh_tool.executeCommand(cmd, "") + + @staticmethod + def getInstBackupName(inst): + """ + function : get backup file name (prefix) for the instance + input : instance object + output : backup file name for this instance + """ + MAX_BACKUP_FILE_LEN = 128 + + backup_name = '' + + if not inst: + return None + if not inst.datadir: + return None + + datadir = inst.datadir + if len(datadir) < MAX_BACKUP_FILE_LEN: + backup_name = datadir.lstrip('/').strip('/').replace('/', '_') + else: + sha256Obj = hashlib.sha256() + if not sha256Obj: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52939"] + % "verification Obj.") + sha256Obj.update(datadir) + backup_name = sha256Obj.hexdigest() + + return backup_name + + @staticmethod + def getPrimaryDnNum(dbClusterInfoGucDnPr): + """ + """ + masterInstance = 0 + dataCount = 0 + dbNodeList = dbClusterInfoGucDnPr.dbNodes + for dbNode in dbNodeList: + dataCount = dataCount + dbNode.dataNum + return dataCount + + @staticmethod + def getPhysicMemo(PhsshTool, instaLocalMode): + """ + """ + if instaLocalMode: + cmd = g_file.SHELL_CMD_DICT["physicMemory"] + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + else: + memTotalList = output.split("\n") + for content in memTotalList: + if ("MemTotal" in content): + memoList = content.split(":") + memo = memoList[1] + memo = memo.replace("kB", "") + memo = memo.replace("\n", "") + memo = memo.strip() + memo = int(memo) / 1024 / 1024 + return memo + physicMemo = [] + cmd = g_file.SHELL_CMD_DICT["physicMemory"] + (status, output) = PhsshTool.getSshStatusOutput(cmd) + for ret in status.values(): + if (ret != DefaultValue.SUCCESS): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + memTotalList = output.split("\n") + for content in memTotalList: + if ("MemTotal" in content): + memoList = content.split(":") + memo = memoList[1] + memo = memo.replace("kB", "") + memo = memo.strip() + memo = int(memo) / 1024 / 1024 + physicMemo.append(memo) + minPhysicMemo = min(physicMemo) + return minPhysicMemo + + @staticmethod + def getDataNodeNum(dbClusterInfoGucDn): + """ + """ + masterInstance = 0 + dataNodeNum = [] + dbNodeList = dbClusterInfoGucDn.dbNodes + for dbNode in dbNodeList: + dataNodeNum.append(dbNode.dataNum) + maxDataNodeNum = max(dataNodeNum) + return maxDataNodeNum + + @staticmethod + def dynamicGuc(user, logger, instanceType, tmpGucFile, gucXml=False): + """ + function: set hba config + input : NA + output: NA + """ + try: + instance = instanceType + gucList = g_file.readFile(tmpGucFile) + gucStr = gucList[0].replace("\n", "") + dynamicParaList = gucStr.split(",") + for guc in dynamicParaList: + if (guc == ""): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50203"] % + gucStr) + + # getting the path of guc_list.conf. + dirName = os.path.dirname(os.path.realpath(__file__)) + if gucXml: + gucFile = os.path.join(dirName, + "./../etc/conf/guc_cloud_list.xml") + else: + gucFile = os.path.join(dirName, "./../etc/conf/guc_list.xml") + gucFile = os.path.normpath(gucFile) + + # reading xml. + gucDict = {} + rootNode = initParserXMLFile(gucFile) + instanceEle = rootNode.find(instance) + instanceList = instanceEle.findall("PARAM") + for gucElement in instanceList: + DefaultValue.checkGuc(gucElement.attrib['VALUE'], logger) + gucDict[gucElement.attrib['KEY']] = gucElement.attrib['VALUE'] + gucParaDict = DefaultValue.initGuc(gucDict, logger, + dynamicParaList, gucXml) + + return gucParaDict + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def checkGuc(gucValue, logger): + """ + function: check path vaild + input : envValue + output: NA + """ + gucCheckList = ["|", ";", "&", "$", "<", ">", "`", "{", "}", "[", "]", + "~", "?", " ", "!"] + if (gucValue.strip() == ""): + return + for rac in gucCheckList: + flag = gucValue.find(rac) + if gucValue.strip() == "%x %a %m %u %d %h %p %S" and rac == " ": + continue + if flag >= 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % gucValue + + " There are illegal characters %s " + "in the content." % rac) + + @staticmethod + def initGuc(gucDict, logger, dynamicParaList, gucXml=False): + """ + """ + for guc in gucDict: + if (guc == "comm_max_datanode" and not gucXml): + if (int(dynamicParaList[0]) < 256): + gucDict[guc] = 256 + elif (int(dynamicParaList[0]) < 512): + gucDict[guc] = 512 + elif (int(dynamicParaList[0]) < 1024): + gucDict[guc] = 1024 + elif (int(dynamicParaList[0]) < 2048): + gucDict[guc] = 2048 + else: + gucDict[guc] = 4096 + continue + elif (guc == "max_process_memory"): + if (gucDict[guc] == "80GB"): + continue + if (int(dynamicParaList[0]) < 256): + ratioNum = 1 + elif (int(dynamicParaList[0]) < 512): + ratioNum = 2 + else: + ratioNum = 3 + gucDict[guc] = gucDict[guc].replace( + "PHYSIC_MEMORY", dynamicParaList[1]) + gucDict[guc] = gucDict[guc].replace( + "MAX_MASTER_DATANUM_IN_ONENODE", dynamicParaList[2]) + gucDict[guc] = gucDict[guc].replace("N", str(ratioNum)) + try: + gucDict[guc] = eval(gucDict[guc]) + except Exception as e: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % + "calculate: %s" % gucDict[guc]) + gucDict[guc] = int(gucDict[guc]) + if (gucDict[guc] >= 2 and gucDict[guc] <= 2047): + gucDict[guc] = str(gucDict[guc]) + "GB" + elif (gucDict[guc] < 2): + gucDict[guc] = "2GB" + else: + gucDict[guc] = "2047GB" + continue + elif guc == "shared_buffers": + if (int(dynamicParaList[0]) < 256): + ratioNum = 1 + elif (int(dynamicParaList[0]) < 512): + ratioNum = 2 + else: + ratioNum = 3 + gucDict[guc] = gucDict[guc].replace( + "PHYSIC_MEMORY", dynamicParaList[1]) + gucDict[guc] = gucDict[guc].replace( + "MAX_MASTER_DATANUM_IN_ONENODE", dynamicParaList[2]) + gucDict[guc] = gucDict[guc].replace("N", str(ratioNum)) + try: + gucDict[guc] = eval(gucDict[guc]) + except Exception as e: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % + "calculate: %s" % gucDict[guc]) + gucDict[guc] = int(gucDict[guc] * 1024) + if gucDict[guc] >= 1024: + gucDict[guc] = "1GB" + else: + gucDict[guc] = str(gucDict[guc]) + "MB" + return gucDict + + @staticmethod + def getPrivateGucParamList(): + """ + function : Get the private guc parameter list. + input : NA + output + """ + # only used by dummy standby instance + # max_connections value is 100 + # memorypool_enable value is false + # shared_buffers value is 32MB + # bulk_write_ring_size value is 32MB + # max_prepared_transactions value is 10 + # cstore_buffers value is 16MB + # autovacuum_max_workers value is 0 + # max_pool_size value is 50 + # wal_buffers value is -1 + + # add the parameter content to the dictionary list + priavetGucParamDict = {} + priavetGucParamDict["max_connections"] = "100" + priavetGucParamDict["memorypool_enable"] = "false" + priavetGucParamDict["shared_buffers"] = "32MB" + priavetGucParamDict["bulk_write_ring_size"] = "32MB" + priavetGucParamDict["max_prepared_transactions"] = "10" + priavetGucParamDict["cstore_buffers"] = "16MB" + priavetGucParamDict["autovacuum_max_workers"] = "0" + priavetGucParamDict["wal_buffers"] = "-1" + priavetGucParamDict["max_locks_per_transaction"] = "64" + priavetGucParamDict["sysadmin_reserved_connections"] = "3" + priavetGucParamDict["max_wal_senders"] = "4" + return priavetGucParamDict + + @staticmethod + def checkKerberos(mpprcFile): + """ + function : check kerberos authentication + input : mpprcfile absolute path + output : True/False + """ + krb5Conf = os.path.join(os.path.dirname(mpprcFile), + DefaultValue.FI_KRB_CONF) + tablespace = DefaultValue.getEnv("ELK_SYSTEM_TABLESPACE") + if (tablespace is not None and tablespace != ""): + xmlfile = os.path.join(os.path.dirname(mpprcFile), + DefaultValue.FI_ELK_KRB_XML) + else: + xmlfile = os.path.join(os.path.dirname(mpprcFile), + DefaultValue.FI_KRB_XML) + if (os.path.exists(xmlfile) and os.path.exists(krb5Conf) and + DefaultValue.getEnv("PGKRBSRVNAME")): + return True + return False + + @staticmethod + def get_max_wal_senders_value(max_connections): + """ + function : Get guc max_wal_senders value by max_connections. + input : NA + output + """ + value = int(max_connections) - 1 + if (value >= DefaultValue.MAX_WAL_SENDERS): + return DefaultValue.MAX_WAL_SENDERS + else: + return value + + @staticmethod + def setActionFlagFile(module="", logger=None, mode=True): + """ + function: Set action flag file + input : module + output: NAself + """ + if os.getuid() == 0: + return + # Get the temporary directory from PGHOST + tmpDir = DefaultValue.getTmpDirFromEnv() + if not tmpDir: + raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] % "PGHOST") + # check if tmp dir exists + if not os.path.exists(tmpDir): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + tmpDir + " Please check it.") + if not os.access(tmpDir, os.R_OK | os.W_OK | os.X_OK): + raise Exception(ErrorCode.GAUSS_501["GAUSS_50103"] % tmpDir) + actionFlagFile = os.path.join(tmpDir, + DefaultValue.ACTION_FLAG_FILE + "_%s" + % os.getpid()) + if mode: + g_file.createFileInSafeMode(actionFlagFile) + with open(actionFlagFile, "w") as fp: + fp.write(module) + fp.flush() + os.chmod(actionFlagFile, DefaultValue.KEY_FILE_PERMISSION) + else: + if os.path.exists(actionFlagFile): + os.remove(actionFlagFile) + + @staticmethod + def isUnderUpgrade(user): + tempPath = DefaultValue.getTmpDirFromEnv(user) + bakPath = os.path.join(tempPath, "binary_upgrade") + if os.path.isdir(bakPath): + if os.listdir(bakPath): + return True + return False + + @staticmethod + def enableWhiteList(sshTool, mpprcFile, nodeNames, logger): + """ + function: write environment value WHITELIST_ENV for agent mode + input : sshTool, mpprcFile, nodeNames, logger + output: NA + """ + env_dist = os.environ + if "HOST_IP" in env_dist.keys(): + cmd = "sed -i '/WHITELIST_ENV=/d' %s ; " \ + "echo 'export WHITELIST_ENV=1' >> %s" % (mpprcFile, + mpprcFile) + sshTool.executeCommand(cmd, "Add WHITELIST_ENV", + DefaultValue.SUCCESS, nodeNames) + logger.debug("Successfully write $WHITELIST_ENV in %s" % mpprcFile) + + @staticmethod + def disableWhiteList(sshTool, mpprcFile, nodeNames, logger): + """ + function: delete environment value WHITELIST_ENV for agent mode + input : NA + output: NA + """ + env_dist = os.environ + if "HOST_IP" in env_dist.keys(): + cmd = "sed -i '/WHITELIST_ENV=/d' %s && unset WHITELIST_ENV" % \ + mpprcFile + sshTool.executeCommand(cmd, "Clear WHITELIST_ENV", + DefaultValue.SUCCESS, nodeNames) + logger.debug( + "Successfully clear $WHITELIST_ENV in %s." % mpprcFile) + + @staticmethod + def checkDockerEnv(): + cmd = "egrep '^1:.+(docker|lxc|kubepods)' /proc/1/cgroup" + (status, output) = subprocess.getstatusoutput(cmd) + if output: + return True + else: + return False + + @staticmethod + def getPrimaryNode(userProfile): + """ + :param + :return: PrimaryNode + """ + try: + primaryFlag = "Primary" + count = 0 + while count < 30: + cmd = "source {0} && gs_om -t status --detail".format( + userProfile) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + break + time.sleep(10) + count += 1 + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command:%s. Error:\n%s" % (cmd, output)) + targetString = output.split("Datanode")[1] + dnPrimary = [x for x in re.split(r"[|\n]", targetString) + if primaryFlag in x] + primaryList = [] + for dn in dnPrimary: + primaryList.append(list(filter(None, dn.split(" ")))[1]) + return primaryList, output + except Exception as e: + raise Exception(str(e)) + + +class ClusterCommand(): + ''' + Common for cluster command + ''' + + def __init__(self): + pass + + # gs_sshexkey execution takes total steps + TOTAL_STEPS_SSHEXKEY = 11 + # gs_preinstall -L execution takes total steps + TOTAL_STEPS_PREINSTALL_L = 14 + # gs_preinstall execution takes total steps + TOTAL_STEPS_PREINSTALL = 17 + # gs_install execution takes total steps + TOTAL_STEPS_INSTALL = 7 + # gs_om -t managecn -m add execution takes total steps + TOTAL_STEPS_OM_ADD = 20 + # gs_om -t managecn -m delete execution takes total steps + TOTAL_STEPS_OM_DELETE = 16 + # gs_om -t changeip execution takes total steps + TOTAL_STEPS_OM_CHANGEIP = 11 + # gs_expand -t dilatation execution takes total steps + TOTAL_STEPS_EXPAND_DILA = 17 + # gs_expand -t redistribute execution takes total steps + TOTAL_STEPS_EXPAND_REDIS = 6 + # gs_shrink -t entry1_percontraction execution takes total steps + TOTAL_STEPS_SHRINK_FIRST = 9 + # gs_shrink -t entry2_redistributre execution takes total steps + TOTAL_STEPS_SHRINK_SECOND = 8 + # gs_shrink -t entry3_postcontraction execution takes total steps + TOTAL_STEPS_SHRINK_THIRD = 7 + # gs_replace -t warm-standby execution takes total steps + TOTAL_STEPS_REPLACE_WARM_STANDBY = 11 + # gs_replace -t warm-standby rollback replace execution takes total steps + TOTAL_STEPS_REPLACE_WARM_STANDBY_REPLACE = 9 + # gs_replace -t warm-standby rollback install execution takes total steps + TOTAL_STEPS_REPLACE_WARM_STANDBY_INSTALL = 7 + # gs_replace -t warm-standby rollback config execution takes total steps + TOTAL_STEPS_REPLACE_WARM_STANDBY_CONFIG = 6 + # gs_replace -t install execution takes total steps + TOTAL_STEPS_REPLACE_INSTALL = 6 + # gs_replace -t config execution takes total steps + TOTAL_STEPS_REPLACE_CONFIG = 6 + # gs_replace -t start execution takes total steps + TOTAL_STEPS_REPLACE_START = 3 + # gs_uninstall execution takes total steps + TOTAL_STEPS_UNINSTALL = 8 + # gs_upgradectl -t auto-upgrade execution takes total steps + TOTAL_STEPS_GREY_UPGRADECTL = 12 + # gs_upgradectl -t auto-upgrade --inplace execution takes total steps + TOTAL_STEPS_INPLACE_UPGRADECTL = 15 + # gs_postuninstall execution takes total steps + TOTAL_STEPS_POSTUNINSTALL = 3 + # warm-standby rollback to flag of begin warm standby + WARM_STEP_INIT = "Begin warm standby" + # warm-standby rollback to flag of replace IP finished + WARM_STEP_REPLACEIPS = "Replace IP finished" + # warm-standby rollback to flag of install warm standby nodes finished + WARM_STEP_INSTALL = "Install warm standby nodes finished" + # warm-standby rollback to flag of configure warm standby nodes finished + WARM_STEP_CONFIG = "Configure warm standby nodes finished" + # rollback to flag of start cluster + INSTALL_STEP_CONFIG = "Config cluster" + # rollback to flag of start cluster + INSTALL_STEP_START = "Start cluster" + + @staticmethod + def getRedisCmd(user, port, jobs=1, timeout=None, enableVacuum="", + enableFast="", redisRetry="", buildTable=False, + mode="", host="", database="postgres"): + """ + funciton : Get the command of gs_redis with password for redisuser + input : user: data redis_user + port: the port redis_user connect to server + jobs: data redis parallel nums + enableVacuum: is need vacuum + enableFast: doing fast data redistribution or not + redisRetry: retry to excute data redis + buildTable: create pgxc_redistb or not + mode: insert or read-only mode + database: database which need to data redis + output : String + """ + userProfile = DefaultValue.getMpprcFile() + database = database.replace('$', '\$') + cmd = "%s %s ; gs_redis -u %s -p %s -d %s -j %d %s %s %s" % \ + (g_Platform.getSourceCmd(), userProfile, user, str(port), + database, jobs, enableVacuum, enableFast, redisRetry) + # check timeout + if (timeout is not None): + cmd += " -t %d" % timeout + # check buildTable + if buildTable: + cmd += " -v" + else: + cmd += " -r" + # check mode + if (len(mode)): + cmd += " -m %s" % mode + + return cmd + + @staticmethod + def getQueryStatusCmd(user, hostName="", outFile="", showAll=True): + """ + function : Get the command of querying status of cluster or node + input : String + output : String + """ + userProfile = DefaultValue.getMpprcFile() + cmd = "%s %s ; gs_om -t status" % (g_Platform.getSourceCmd(), + userProfile) + # check node id + if (hostName != ""): + cmd += " -h %s" % hostName + else: + if (showAll): + cmd += " --all" + # check out put file + if (outFile != ""): + cmd += " > %s" % outFile + + return cmd + + @staticmethod + def findErrorInSqlFile(sqlFile, output): + """ + function : Find error in the sql file + input : String,String + output : String + """ + GSQL_BIN_FILE = "gsql" + # init flag + ERROR_MSG_FLAG = "(ERROR|FATAL|PANIC)" + GSQL_ERROR_PATTERN = "^%s:%s:(\d*): %s:.*" % \ + (GSQL_BIN_FILE, sqlFile, ERROR_MSG_FLAG) + pattern = re.compile(GSQL_ERROR_PATTERN) + for line in output.split("\n"): + line = line.strip() + result = pattern.match(line) + if (result is not None): + return True + return False + + @staticmethod + def findErrorInSql(output): + """ + function : Find error in sql + input : String + output : boolean + """ + # init flag + ERROR_MSG_FLAG = "(ERROR|FATAL|PANIC)" + ERROR_PATTERN = "^%s:.*" % ERROR_MSG_FLAG + pattern = re.compile(ERROR_PATTERN) + + for line in output.split("\n"): + line = line.strip() + result = pattern.match(line) + if (result is not None): + return True + return False + + @staticmethod + def getSQLCommand(port, database=DefaultValue.DEFAULT_DB_NAME, + gsqlBin="gsql", host=""): + """ + function : get SQL command + input : port, database + output : cmd + """ + cmd = DefaultValue.SQL_EXEC_COMMAND_WITHOUT_HOST_WITHOUT_USER % \ + (gsqlBin, str(int(port) + 1), database) + return cmd + + @staticmethod + def getSQLCommandForInplaceUpgradeBackup( + port, database=DefaultValue.DEFAULT_DB_NAME, gsqlBin="gsql"): + """ + function: get SQL command for Inplace + Upgrade backupOneInstanceOldClusterDBAndRel + input: port, database + output: cmd + """ + cmd = DefaultValue.SQL_EXEC_COMMAND_WITHOUT_HOST_WITHOUT_USER % ( + gsqlBin, port, database) + return cmd + + @staticmethod + def execSQLCommand(sql, user, host, port, database="postgres", + dwsFlag=False, option="", IsInplaceUpgrade=False): + """ + function : Execute sql command + input : String,String,String,int + output : String + """ + database = database.replace('$', '\$') + currentTime = datetime.utcnow().strftime("%Y-%m-%d_%H%M%S%f") + pid = os.getpid() + # init SQL query file + sqlFile = os.path.join( + DefaultValue.getTmpDirFromEnv(user), + "gaussdb_query.sql_%s_%s_%s" % (str(port), str(currentTime), + str(pid))) + # init SQL result file + queryResultFile = os.path.join( + DefaultValue.getTmpDirFromEnv(user), + "gaussdb_result.sql_%s_%s_%s" % (str(port), str(currentTime), + str(pid))) + if os.path.exists(sqlFile) or os.path.exists(queryResultFile): + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + # create an empty sql query file + try: + g_file.createFile(sqlFile, DefaultValue.KEY_FILE_MODE) + except Exception as e: + if os.path.exists(sqlFile): + os.remove(sqlFile) + return 1, str(e) + + # witer the SQL command into sql query file + try: + g_file.createFileInSafeMode(sqlFile) + with open(sqlFile, 'w') as fp: + fp.writelines(sql) + except Exception as e: + DefaultValue.cleanFile(sqlFile) + return 1, str(e) + try: + # init hostPara + userProfile = DefaultValue.getMpprcFile() + hostPara = ("-h %s" % host) if host != "" else "" + # build shell command + # if the user is root, switch the user to execute + if (IsInplaceUpgrade): + gsqlCmd = ClusterCommand.getSQLCommandForInplaceUpgradeBackup( + port, database) + else: + gsqlCmd = ClusterCommand.getSQLCommand( + port, database, host=host) + executeCmd = "%s %s -f '%s' --output '%s' -t -A -X %s" % ( + gsqlCmd, hostPara, sqlFile, queryResultFile, option) + cmd = g_Platform.getExecuteCmdWithUserProfile(user, userProfile, + executeCmd, False) + (status, output) = subprocess.getstatusoutput(cmd) + if ClusterCommand.findErrorInSqlFile(sqlFile, output): + status = 1 + if (status != 0): + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + return (status, output) + # read the content of query result file. + except Exception as e: + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + raise Exception(str(e)) + try: + with open(queryResultFile, 'r') as fp: + rowList = fp.readlines() + except Exception as e: + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + return 1, str(e) + + # remove local sqlFile + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + + return (0, "".join(rowList)[:-1]) + + @staticmethod + def findTupleErrorInSqlFile(sqlFile, output): + """ + function : find tuple concurrently updated error in file + input : sqlFile, output + output : True, False + """ + ERROR_TUPLE_PATTERN = "^gsql:(.*)tuple concurrently updated(.*)" + pattern = re.compile(ERROR_TUPLE_PATTERN) + for line in output.split("\n"): + line = line.strip() + result = pattern.match(line) + if (result is not None): + return True + return False + + @staticmethod + def remoteSQLCommand(sql, user, host, port, ignoreError=True, + database="postgres", dwsFlag=False, useTid=False, + IsInplaceUpgrade=False): + """ + function : Execute sql command on remote host + input : String,String,String,int + output : String,String + """ + database = database.replace('$', '\$') + currentTime = datetime.utcnow().strftime("%Y-%m-%d_%H%M%S%f") + pid = os.getpid() + # clean old sql file + # init SQL query file + sqlFile = os.path.join(DefaultValue.getTmpDirFromEnv(user), + "gaussdb_remote_query.sql_%s_%s_%s" % ( + str(port), + str(currentTime), + str(pid))) + # init SQL result file + queryResultFile = os.path.join(DefaultValue.getTmpDirFromEnv(user), + "gaussdb_remote_result.sql_%s_%s_%s" % ( + str(port), + str(currentTime), + str(pid))) + RE_TIMES = 3 + if useTid: + threadPid = CDLL('libc.so.6').syscall(186) + sqlFile = sqlFile + str(threadPid) + queryResultFile = queryResultFile + str(threadPid) + if (os.path.exists(sqlFile) or os.path.exists(queryResultFile)): + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + + # create new sql file + if (os.getuid() == 0): + cmd = "su - %s -c 'touch %s && chmod %s %s'" % ( + user, sqlFile, DefaultValue.KEY_FILE_MODE, sqlFile) + else: + cmd = "touch %s && chmod %s %s" % (sqlFile, + DefaultValue.KEY_FILE_MODE, + sqlFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + output = "%s\n%s" % (cmd, output) + if (os.path.exists(sqlFile) or os.path.exists(queryResultFile)): + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + return (status, output) + + # witer the SQL command into sql query file + try: + g_file.createFileInSafeMode(sqlFile) + with open(sqlFile, 'w') as fp: + fp.writelines(sql) + except Exception as e: + DefaultValue.cleanFile(sqlFile) + return (1, str(e)) + + # send new sql file to remote node if needed + localHost = DefaultValue.GetHostIpOrName() + if str(localHost) != str(host): + cmd = g_Platform.getRemoteCopyCmd(sqlFile, sqlFile, host) + if os.getuid() == 0 and user != "": + cmd = "su - %s \"%s\"" % (user, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + output = "%s\n%s" % (cmd, output) + return (status, output) + + # execute sql file + mpprcFile = DefaultValue.getMpprcFile() + if IsInplaceUpgrade: + gsql_cmd = ClusterCommand.getSQLCommandForInplaceUpgradeBackup( + port, database) + else: + gsql_cmd = ClusterCommand.getSQLCommand(port, database, host=host) + + if str(localHost) != str(host): + sshCmd = g_Platform.getSshCmd(host) + if os.getuid() == 0 and user != "": + cmd = " %s 'su - %s -c \"" % (sshCmd, user) + if mpprcFile != "" and mpprcFile is not None: + cmd += "source %s;" % mpprcFile + cmd += "%s -f %s --output %s -t -A -X \"'" % (gsql_cmd, + sqlFile, + queryResultFile) + if ignoreError: + cmd += " 2>/dev/null" + else: + cmd = "%s '" % sshCmd + if mpprcFile != "" and mpprcFile is not None: + cmd += "source %s;" % mpprcFile + cmd += "%s -f %s --output %s -t -A -X '" % (gsql_cmd, + sqlFile, + queryResultFile) + if ignoreError: + cmd += " 2>/dev/null" + for i in range(RE_TIMES): + (status1, output1) = subprocess.getstatusoutput(cmd) + if ClusterCommand.findErrorInSqlFile(sqlFile, output1): + if (ClusterCommand.findTupleErrorInSqlFile(sqlFile, + output1)): + time.sleep(1) # find tuple error --> retry + else: # find error not tuple error + status1 = 1 + break + else: # not find error + break + # if failed to execute gsql, then clean the sql query file on + # current node and other node + if (status1 != 0): + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile), + host) + return (status1, output1) + else: + if (os.getuid() == 0 and user != ""): + cmd = "su - %s -c \"" % user + if (mpprcFile != "" and mpprcFile is not None): + cmd += "source %s;" % mpprcFile + cmd += "%s -f %s --output %s -t -A -X \"" % (gsql_cmd, + sqlFile, + queryResultFile) + if (ignoreError): + cmd += " 2>/dev/null" + else: + cmd = "" + if (mpprcFile != "" and mpprcFile is not None): + cmd += "source %s;" % mpprcFile + cmd += "%s -f %s --output %s -t -A -X " % (gsql_cmd, + sqlFile, + queryResultFile) + if (ignoreError): + cmd += " 2>/dev/null" + for i in range(RE_TIMES): + (status1, output1) = subprocess.getstatusoutput(cmd) + if ClusterCommand.findErrorInSqlFile(sqlFile, output1): + if (ClusterCommand.findTupleErrorInSqlFile(sqlFile, + output1)): + time.sleep(1) # find tuple error --> retry + else: # find error not tuple error + status1 = 1 + break + else: # not find error + break + # if failed to execute gsql, then clean the sql query file + # on current node and other node + if (status1 != 0): + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + return (status1, output1) + + if (str(localHost) != str(host)): + remoteCmd = g_Platform.getRemoteCopyCmd( + queryResultFile, + DefaultValue.getTmpDirFromEnv(user) + "/", str(localHost)) + cmd = "%s \"%s\"" % (sshCmd, remoteCmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + output = "%s\n%s" % (cmd, output) + DefaultValue.cleanFile(sqlFile) + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile), + host) + return (status, output) + + # read the content of query result file. + try: + with open(queryResultFile, 'r') as fp: + rowList = fp.readlines() + except Exception as e: + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + if (str(localHost) != str(host)): + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile), + host) + return (1, str(e)) + + # remove local sqlFile + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile)) + # remove remote sqlFile + if (str(localHost) != str(host)): + DefaultValue.cleanFile("%s,%s" % (queryResultFile, sqlFile), host) + + return (0, "".join(rowList)[:-1]) + + @staticmethod + def checkSqlConnect(user, host, port, + retryTimes=DefaultValue.DEFAULT_RETRY_TIMES_GS_CTL, + sql=None, dwsFlag=False): + """ + After the operation "gs_ctl start" has returned the success + information, we will try to connect the database + and execute some sql to check the connection. + + :param user: The input database user. + :param host: The input database host or ip address. + :param port: The input database port. + :param retryTimes: The times of attempts to retry the operation. + :param sql: The SQL statements used in retry operation. + :param dwsFlag: Whether the cluster is in the dws mode. + + :type user: str + :type host: str + :type port: int + :type retryTimes: int + :type sql: str | None + :type dwsFlag: bool + + :return: Return the query result. + :rtype: str + """ + # Set default query sql string. + if sql is None: + sql = "select version();" + + for i in range(0, retryTimes): + status, output = ClusterCommand.remoteSQLCommand(sql, user, host, + port, False, + dwsFlag=dwsFlag) + if status == 0 and output != "": + return output + + time.sleep(2) + + raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % + "check instance connection.") + + @staticmethod + def remoteShellCommand(shell, user, hostname): + """ + function : Execute shell command on remote host + input : String,String,String + output : String,String + """ + currentTime = datetime.utcnow().strftime("%Y-%m-%d_%H%M%S%f") + randomnum = ''.join(sample('0123456789', 3)) + pid = os.getpid() + shFile = os.path.join(DefaultValue.getTmpDirFromEnv(user), + "gaussdb_remote_shell.sh_%s_%s_%s_%s" % \ + (str(hostname), str(currentTime), str(pid), + str(randomnum))) + if (os.path.exists(shFile)): + DefaultValue.cleanFile(shFile) + + # create new sh file + if (os.getuid() == 0): + cmd = "su - %s -c 'touch %s && chmod %s %s'" % \ + (user, shFile, DefaultValue.KEY_FILE_MODE, shFile) + else: + cmd = "touch %s && chmod %s %s" % \ + (shFile, DefaultValue.KEY_FILE_MODE, shFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + if (os.path.exists(shFile)): + DefaultValue.cleanFile(shFile) + output = "%s\n%s" % (cmd, output) + return (status, output) + + try: + with open(shFile, 'w') as fp: + fp.writelines(shell) + except Exception as e: + if (fp): + fp.close() + DefaultValue.cleanFile(shFile) + return (1, str(e)) + + # send new sh file to remote node if needed + localHost = DefaultValue.GetHostIpOrName() + if (str(localHost) != str(hostname)): + if (os.getuid() == 0): + cmd = """su - %s -c "pscp -H %s '%s' '%s'" """ % \ + (user, hostname, shFile, shFile) + else: + cmd = "pscp -H %s '%s' '%s'" % (hostname, shFile, shFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + DefaultValue.cleanFile(shFile) + output = "%s\n%s" % (cmd, output) + return (status, output) + + # execute sh file + if (str(localHost) != str(hostname)): + mpprcFile = DefaultValue.getMpprcFile() + if (os.getuid() == 0): + cmd = "pssh -s -H %s 'su - %s -c \"" % \ + (hostname, user) + if (mpprcFile != "" and mpprcFile is not None): + cmd += "source %s;" % mpprcFile + cmd += "sh %s\"'" % shFile + else: + cmd = "pssh -s -H %s '" % hostname + if (mpprcFile != "" and mpprcFile is not None): + cmd += "source %s;" % mpprcFile + cmd += "sh %s'" % shFile + else: + mpprcFile = DefaultValue.getMpprcFile() + if (os.getuid() == 0): + cmd = "su - %s -c '" % user + if (mpprcFile != "" and mpprcFile is not None): + cmd += "source %s;" % mpprcFile + cmd += "sh %s'" % shFile + else: + cmd = "" + if (mpprcFile != "" and mpprcFile is not None): + cmd += "source %s;" % mpprcFile + cmd += "sh %s" % shFile + + (status, output) = subprocess.getstatusoutput(cmd) + + # clean tmp file + DefaultValue.cleanFile(shFile) + if (str(localHost) != str(hostname)): + DefaultValue.cleanFile(shFile, hostname) + + return (status, output) + + @staticmethod + def CopyClusterStatic(): + """ + function : Copy cluster_static_config_bak file to cluster_static_config + input : NA + output: NA + """ + gaussHome = DefaultValue.getEnv("GAUSSHOME") + staticConfig = "%s/bin/cluster_static_config" % gaussHome + staticConfig_bak = "%s/bin/cluster_static_config_bak" % gaussHome + if (os.path.exists(staticConfig_bak) and not + os.path.exists(staticConfig)): + g_file.cpFile(staticConfig_bak, staticConfig) + + @staticmethod + def getchangeDirModeCmd(user_dir): + """ + function : change directory permission + input : user_dir + output: NA + """ + # Use "find -exec" to mask special characters + cmdDir = "find '%s' -type d -exec chmod '%s' {} \;" % \ + (user_dir, DefaultValue.KEY_DIRECTORY_MODE) + (status, diroutput) = subprocess.getstatusoutput(cmdDir) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % user_dir + + " Command:%s. Error:\n%s" % (cmdDir, diroutput)) + + @staticmethod + def getchangeFileModeCmd(user_dir): + """ + function : change log file permission + input : user_dir + output: NA + """ + # Use "find -exec" to mask special characters + cmdFile = "find '%s' -type f -name '*.log' -exec chmod '%s' {} \;" % \ + (user_dir, DefaultValue.KEY_FILE_MODE) + (status, fileoutput) = subprocess.getstatusoutput(cmdFile) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + "log file" + " Directory:%s." % user_dir + + " Command:%s. Error:\n%s" % (cmdFile, fileoutput)) + + @staticmethod + def countTotalSteps(script, act="", model=""): + """ + function: get script takes steps in total + input: + script: command name + act: the type of command + model: mode setting + """ + try: + totalSteps = 0 + if (script == "gs_preinstall"): + if model: + totalSteps = ClusterCommand.TOTAL_STEPS_PREINSTALL_L + else: + totalSteps = ClusterCommand.TOTAL_STEPS_PREINSTALL + elif (script == "gs_install"): + if (model == ClusterCommand.INSTALL_STEP_CONFIG): + totalSteps = ClusterCommand.TOTAL_STEPS_INSTALL - 1 + elif (model == ClusterCommand.INSTALL_STEP_START): + totalSteps = ClusterCommand.TOTAL_STEPS_INSTALL - 2 + else: + totalSteps = ClusterCommand.TOTAL_STEPS_INSTALL + elif (script == "gs_om"): + if (act == "managecn"): + if (model == "add"): + totalSteps = ClusterCommand.TOTAL_STEPS_OM_ADD + if (model == "delete"): + totalSteps = ClusterCommand.TOTAL_STEPS_OM_DELETE + if (act == "changeip"): + totalSteps = ClusterCommand.TOTAL_STEPS_OM_CHANGEIP + elif (script == "gs_expand"): + if (act == "dilatation"): + totalSteps = ClusterCommand.TOTAL_STEPS_EXPAND_DILA + if (act == "redistribute"): + totalSteps = ClusterCommand.TOTAL_STEPS_EXPAND_REDIS + elif (script == "gs_shrink"): + if (act == "entry1"): + totalSteps = ClusterCommand.TOTAL_STEPS_SHRINK_FIRST + if (act == "entry2"): + totalSteps = ClusterCommand.TOTAL_STEPS_SHRINK_SECOND + if (act == "entry3"): + totalSteps = ClusterCommand.TOTAL_STEPS_SHRINK_THIRD + elif (script == "gs_sshexkey"): + if model: + totalSteps = ClusterCommand.TOTAL_STEPS_SSHEXKEY - 2 + else: + totalSteps = ClusterCommand.TOTAL_STEPS_SSHEXKEY + elif (script == "gs_replace"): + if (act == "warm-standby"): + if (model == ClusterCommand.WARM_STEP_INIT): + totalSteps = ClusterCommand. \ + TOTAL_STEPS_REPLACE_WARM_STANDBY + if (model == ClusterCommand.WARM_STEP_REPLACEIPS): + totalSteps = ClusterCommand. \ + TOTAL_STEPS_REPLACE_WARM_STANDBY_REPLACE + if (model == ClusterCommand.WARM_STEP_INSTALL): + totalSteps = ClusterCommand. \ + TOTAL_STEPS_REPLACE_WARM_STANDBY_INSTALL + if (model == ClusterCommand.WARM_STEP_CONFIG): + totalSteps = ClusterCommand. \ + TOTAL_STEPS_REPLACE_WARM_STANDBY_CONFIG + if (act == "install"): + totalSteps = ClusterCommand.TOTAL_STEPS_REPLACE_INSTALL + if (act == "config"): + totalSteps = ClusterCommand.TOTAL_STEPS_REPLACE_CONFIG + if (act == "start"): + totalSteps = ClusterCommand.TOTAL_STEPS_REPLACE_START + elif (script == "gs_upgradectl"): + if (act == "small-binary-upgrade" or act == + "large-binary-upgrade"): + totalSteps = ClusterCommand.TOTAL_STEPS_GREY_UPGRADECTL + if (act == "inplace-binary-upgrade"): + totalSteps = ClusterCommand.TOTAL_STEPS_INPLACE_UPGRADECTL + elif (script == "gs_uninstall"): + totalSteps = ClusterCommand.TOTAL_STEPS_UNINSTALL + elif (script == "gs_postuninstall"): + totalSteps = ClusterCommand.TOTAL_STEPS_POSTUNINSTALL + return totalSteps + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def check_input(jsonFilePath): + """ + function: check the input, and load the backup JSON file. + @param: N/A. + @return: return [OK, para], if the backup JSON file is loaded + successfully. + """ + try: + with open(jsonFilePath) as jsonFile: + para = json.load(jsonFile) + return [0, para] + except TypeError as err: + ERR_MSG = "input para is not json_string. %s" % err + return [1, ERR_MSG] + + @staticmethod + def executeSQLOnRemoteHost(hostName, port, sql, outputfile, + snapid="defaultNone", database="postgres"): + """ + function: execute SQL on remote host + input :hostName, port, sql, outputfile, database + output: NA + """ + from gspylib.threads.SshTool import SshTool + from gspylib.common.OMCommand import OMCommand + hosts = [] + hosts.append(hostName) + gs_sshTool = SshTool(hosts) + currentTime = datetime.utcnow().strftime("%Y-%m-%d_%H%M%S%f") + pid = os.getpid() + sqlfile = "%s_%s_%s.sql" % (hostName, pid, currentTime) + tmpDir = DefaultValue.getTmpDirFromEnv() + "/" + sqlfilepath = os.path.join(tmpDir, sqlfile) + g_file.createFileInSafeMode(sqlfilepath) + try: + with open(sqlfilepath, "w") as fp: + fp.write(sql) + fp.flush() + + g_OSlib.scpFile(hostName, sqlfilepath, tmpDir) + cmd = "%s -p %s -S %s -f %s -s %s -d %s" % ( + OMCommand.getLocalScript("Local_Execute_Sql"), port, + sqlfilepath, outputfile, snapid, database) + gs_sshTool.executeCommand(cmd, "execute SQL on remote host") + cmd = "%s %s" % (g_Platform.getRemoveCmd("directory"), sqlfilepath) + (status, output) = subprocess.getstatusoutput(cmd) + except Exception as e: + cmd = "%s %s" % (g_Platform.getRemoveCmd("directory"), sqlfilepath) + (status, output) = subprocess.getstatusoutput(cmd) + raise Exception(str(e)) + + @staticmethod + def excuteSqlOnLocalhost(port, sql, database="postgres"): + ''' + function: write output message + input : sql + output: NA + ''' + tmpresult = None + conn = None + try: + from gspylib.common.SqlResult import sqlResult + libpath = os.path.join(DefaultValue.getEnv("GAUSSHOME"), "lib") + sys.path.append(libpath) + libc = cdll.LoadLibrary("libpq.so.5.5") + conn_opts = "dbname = '%s' application_name = 'OM' " \ + "options='-c xc_maintenance_mode=on' port = %s " % \ + (database, port) + conn_opts = conn_opts.encode(encoding='utf-8') + err_output = "" + libc.PQconnectdb.argtypes = [c_char_p] + libc.PQconnectdb.restype = c_void_p + libc.PQclear.argtypes = [c_void_p] + libc.PQfinish.argtypes = [c_void_p] + libc.PQerrorMessage.argtypes = [c_void_p] + libc.PQerrorMessage.restype = c_char_p + libc.PQresultStatus.argtypes = [c_void_p] + libc.PQresultStatus.restype = c_int + libc.PQexec.argtypes = [c_void_p, c_char_p] + libc.PQexec.restype = c_void_p + conn = libc.PQconnectdb(conn_opts) + if not conn: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51310"] + % ("by options: %s." % conn_opts)) + sql = sql.encode(encoding='utf-8') + libc.PQstatus.argtypes = [c_void_p] + if (libc.PQstatus(conn) != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51310"] % ".") + tmpresult = libc.PQexec(conn, sql) + if not tmpresult: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51309"] % sql) + status = libc.PQresultStatus(tmpresult) + + resultObj = sqlResult(tmpresult) + resultObj.parseResult() + Error = libc.PQerrorMessage(conn) + if (Error is not None): + err_output = string_at(Error).decode() + result = resultObj.resSet + libc.PQclear(tmpresult) + libc.PQfinish(conn) + return status, result, err_output + except Exception as e: + libc.PQclear.argtypes = [c_void_p] + libc.PQfinish.argtypes = [c_void_p] + if tmpresult: + libc.PQclear(tmpresult) + if conn: + libc.PQfinish(conn) + raise Exception(str(e)) + + @staticmethod + def getSQLResult(hostName, jsonFile): + """ + function: get sql result from jsonFile + input : hostName,jsonFile + output: status, result, error_output + """ + # copy json file from remote host + tmpDir = DefaultValue.getTmpDirFromEnv() + "/" + filepath = os.path.join(tmpDir, jsonFile) + scpCmd = g_Platform.getRemoteCopyCmd(filepath, tmpDir, hostName, + False, "directory") + DefaultValue.execCommandLocally(scpCmd) + # parse json file + status = "" + result = [] + error_output = "" + (ret, para) = ClusterCommand.check_input(filepath) + if (ret != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51308"]) + + if "status" not in para: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51307"]) + else: + status = para["status"] + + if "result" not in para: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % "") + else: + result = para["result"] + if "error_output" in para: + error_output = para["error_output"] + + # remove json file from remote host and localhost + g_file.removeDirectory(filepath) + + remoteCmd = g_Platform.getSshCmd(hostName) + cmd = "%s \"%s '%s'\"" % (remoteCmd, + g_Platform.getRemoveCmd("directory"), + filepath) + DefaultValue.execCommandLocally(cmd) + + return status, result, error_output + + @staticmethod + def checkInstStatusByGsctl(instdir, retryCount=100): + """ + function: check single instance status for local instance. + Wait for 5 minutes. If the instance status is still Catchup, + the instance status is Normal. + input: NA + output: (status, output) + """ + count = 0 + while (count < retryCount): + time.sleep(3) + count += 1 + cmd = "gs_ctl query -D %s|grep '\'| " \ + "awk -F ':' '{print $2}'" % instdir + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output.strip() == "Normal"): + break + elif (status == 0 and count == retryCount and output.strip() == + "Catchup"): + output = "Normal" + return (status, output) + + +class ClusterInstanceConfig(): + """ + Set Instance Config + """ + + def __init__(self): + pass + + @staticmethod + def setConfigItem(typename, datadir, configFile, parmeterDict): + """ + function: Modify a parameter + input : typename, datadir, configFile, parmeterDict + output: NA + """ + # check mpprc file path + mpprcFile = DefaultValue.getMpprcFile() + + # comment out any existing entries for this setting + if (typename == DefaultValue.INSTANCE_ROLE_CMSERVER or typename == + DefaultValue.INSTANCE_ROLE_CMAGENT): + # gs_guc only support for DB instance + # if the type is cm_server or cm_agent, we will use sed to + # instead of it + for entry in parmeterDict.items(): + key = entry[0] + value = entry[1] + # delete the old parameter information + cmd = "sed -i 's/^.*\(%s.*=.*\)/#\\1/g' %s" % (key, configFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50008"] + + " Command:%s. Error:\n%s" % (cmd, output)) + + # append new config to file + cmd = 'echo " " >> %s' % (configFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + cmd = 'echo "%s = %s" >> %s' % (key, value, configFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + else: + # build GUC parameter string + gucstr = "" + for entry in parmeterDict.items(): + gucstr += " -c \"%s=%s\"" % (entry[0], entry[1]) + # check the GUC parameter string + if (gucstr == ""): + return + cmd = "source %s; gs_guc set -D %s %s" % \ + (mpprcFile, datadir, gucstr) + DefaultValue.retry_gs_guc(cmd) + + @staticmethod + def setReplConninfo(dbInst, peerInsts, clusterInfo): + """ + function: Modify replconninfo for datanode + input : dbInst + output: NA + """ + masterInst = None + standbyInst = None + dummyStandbyInst = None + nodename = "" + # init masterInst, standbyInst and dummyStandbyInst + for pi in iter(peerInsts): + if (pi.instanceType == DefaultValue.MASTER_INSTANCE): + masterInst = pi + elif (pi.instanceType == DefaultValue.STANDBY_INSTANCE): + standbyInst = pi + elif (pi.instanceType == + DefaultValue.DUMMY_STANDBY_INSTANCE): + dummyStandbyInst = pi + + if (dbInst.instanceType == DefaultValue.MASTER_INSTANCE): + masterInst = dbInst + nodename = "dn_%d_%d" % (masterInst.instanceId, + standbyInst.instanceId) + elif (dbInst.instanceType == DefaultValue.STANDBY_INSTANCE): + standbyInst = dbInst + nodename = "dn_%d_%d" % (masterInst.instanceId, + standbyInst.instanceId) + elif (dbInst.instanceType == DefaultValue.DUMMY_STANDBY_INSTANCE): + dummyStandbyInst = dbInst + nodename = "dn_%d_%d" % (masterInst.instanceId, + dummyStandbyInst.instanceId) + if (len(masterInst.haIps) == 0 or len(standbyInst.haIps) == 0): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51621"] + + " Data directory: %s." % dbInst.datadir) + if (dummyStandbyInst is not None and len(dummyStandbyInst.haIps) == 0): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51621"] + + " Data directory: %s." % dbInst.datadir) + + connInfo1 = "" + connInfo2 = "" + channelCount = len(masterInst.haIps) + # get master instance number + masterDbNode = clusterInfo.getDbNodeByName(masterInst.hostname) + if masterDbNode is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + ("database node configuration on host [%s]" + % masterInst.hostname)) + masterDataNum = masterDbNode.getDnNum(masterInst.instanceType) + # get standby instance number + standbyDbNode = clusterInfo.getDbNodeByName(standbyInst.hostname) + if standbyDbNode is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + ("database node configuration on host [%s]" + % standbyInst.hostname)) + standbyDataNum = standbyDbNode.getDnNum(standbyInst.instanceType) + # get dummy instance number + if dummyStandbyInst is not None: + dummyDbNode = clusterInfo.getDbNodeByName( + dummyStandbyInst.hostname) + if dummyDbNode is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + ("database node configuration on host [%s]" + % dummyStandbyInst.hostname)) + dummyDataNum = dummyDbNode.getDnNum(dummyStandbyInst.instanceType) + for i in range(channelCount): + if (dbInst.instanceType == DefaultValue.MASTER_INSTANCE): + if (i > 0): + connInfo1 += "," + connInfo1 += "localhost=%s localport=%d localservice=%s " \ + "remotehost=%s remoteport=%d remoteservice=%s" % \ + (dbInst.haIps[i], dbInst.haPort, + (dbInst.port + masterDataNum * 4), + standbyInst.haIps[i], + standbyInst.haPort, (standbyInst.port + + standbyDataNum * 4)) + if dummyStandbyInst is not None: + if (i > 0): + connInfo2 += "," + connInfo2 += "localhost=%s localport=%d localservice=%s " \ + "remotehost=%s remoteport=%d " \ + "remoteservice=%s" % \ + (dbInst.haIps[i], dbInst.haPort, + (dbInst.port + masterDataNum * 4), + dummyStandbyInst.haIps[i], + dummyStandbyInst.haPort, + (dummyStandbyInst.port + dummyDataNum * 4)) + elif dbInst.instanceType == DefaultValue.STANDBY_INSTANCE: + if i > 0: + connInfo1 += "," + connInfo1 += "localhost=%s localport=%d " \ + "localservice=%s remotehost=%s remoteport=%d " \ + "remoteservice=%s" % \ + (dbInst.haIps[i], dbInst.haPort, + (dbInst.port + standbyDataNum * 4), + masterInst.haIps[i], masterInst.haPort, + (masterInst.port + masterDataNum * 4)) + if (dummyStandbyInst is not None): + if i > 0: + connInfo2 += "," + connInfo2 += "localhost=%s localport=%d localservice=%s " \ + "remotehost=%s remoteport=%d " \ + "remoteservice=%s" % \ + (dbInst.haIps[i], dbInst.haPort, + (dbInst.port + standbyDataNum * 4), + dummyStandbyInst.haIps[i], + dummyStandbyInst.haPort, + (dummyStandbyInst.port + dummyDataNum * 4)) + elif (dbInst.instanceType == DefaultValue.DUMMY_STANDBY_INSTANCE): + if i > 0: + connInfo1 += "," + connInfo1 += "localhost=%s localport=%d localservice=%s " \ + "remotehost=%s remoteport=%d remoteservice=%s" % \ + (dbInst.haIps[i], dbInst.haPort, + (dbInst.port + dummyDataNum * 4), + masterInst.haIps[i], + masterInst.haPort, + (masterInst.port + masterDataNum * 4)) + if i > 0: + connInfo2 += "," + connInfo2 += "localhost=%s localport=%d " \ + "localservice=%s remotehost=%s remoteport=%d " \ + "remoteservice=%s" % \ + (dbInst.haIps[i], dbInst.haPort, + (dbInst.port + dummyDataNum * 4), + standbyInst.haIps[i], standbyInst.haPort, + (standbyInst.port + standbyDataNum * 4)) + + return connInfo1, connInfo2, dummyStandbyInst, nodename + + @staticmethod + def getInstanceInfoForSinglePrimaryMultiStandbyCluster(dbInst, peerInsts): + """ + function: get the instance name, master instance and standby + instance list + input : dbInst + output: NA + """ + masterInst = None + standbyInstIdLst = [] + instancename = "" + # init masterInst, standbyInst + for pi in iter(peerInsts): + if pi.instanceType == DefaultValue.MASTER_INSTANCE: + masterInst = pi + elif pi.instanceType == DefaultValue.STANDBY_INSTANCE or \ + dbInst.instanceType == DefaultValue.CASCADE_STANDBY: + standbyInstIdLst.append(pi.instanceId) + + if dbInst.instanceType == DefaultValue.MASTER_INSTANCE: + masterInst = dbInst + instancename = "dn_%d" % masterInst.instanceId + standbyInstIdLst.sort() + for si in iter(standbyInstIdLst): + instancename += "_%d" % si + elif dbInst.instanceType == DefaultValue.STANDBY_INSTANCE or \ + dbInst.instanceType == DefaultValue.CASCADE_STANDBY: + instancename = "dn_%d" % masterInst.instanceId + standbyInstIdLst.append(dbInst.instanceId) + standbyInstIdLst.sort() + for si in iter(standbyInstIdLst): + instancename += "_%d" % si + return (instancename, masterInst, standbyInstIdLst) + + @staticmethod + def setReplConninfoForSinglePrimaryMultiStandbyCluster(dbInst, + peerInsts, + clusterInfo): + """ + function: Modify replconninfo for datanode + input : dbInst + output: NA + """ + masterInst = None + standbyInstIdLst = [] + nodename = "" + connInfo1 = [] + (nodename, masterInst, standbyInstIdLst) = ClusterInstanceConfig. \ + getInstanceInfoForSinglePrimaryMultiStandbyCluster(dbInst, + peerInsts) + if len(masterInst.haIps) == 0: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51621"] + + " Data directory: %s." % dbInst.datadir) + if len(standbyInstIdLst) == 0: + return connInfo1, nodename + + dbNode = clusterInfo.getDbNodeByName(dbInst.hostname) + if dbNode is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + ("database node configuration on host [%s]" + % dbInst.hostname)) + + channelCount = len(masterInst.haIps) + if dbInst.instanceType == DefaultValue.MASTER_INSTANCE: + for pj in iter(peerInsts): + peerDbNode = clusterInfo.getDbNodeByName(pj.hostname) + if peerDbNode is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + ("database node configuration on host [%s]" + % pj.hostname)) + chanalInfo = "" + for i in range(channelCount): + if i > 0: + chanalInfo += "," + chanalInfo += "localhost=%s localport=%d " \ + "localheartbeatport=%d localservice=%s " \ + "remotehost=%s remoteport=%d " \ + "remoteheartbeatport=%d remoteservice=%s" % \ + (dbInst.haIps[i], dbInst.haPort, + dbInst.port + 5, + (dbInst.port + 4), pj.haIps[i], + pj.haPort, pj.port + 5, + pj.port + 4) + if pj.instanceType == DefaultValue.CASCADE_STANDBY: + chanalInfo += " iscascade=true" + + connInfo1.append(chanalInfo) + else: + for pj in iter(peerInsts): + peerDbNode = clusterInfo.getDbNodeByName(pj.hostname) + if peerDbNode is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + ("database node configuration on host [%s]" + % pj.hostname)) + chanalInfo = "" + for i in range(channelCount): + if i > 0: + chanalInfo += "," + chanalInfo += "localhost=%s localport=%d " \ + "localheartbeatport=%d localservice=%s " \ + "remotehost=%s remoteport=%d " \ + "remoteheartbeatport=%d remoteservice=%s" % \ + (dbInst.haIps[i], dbInst.haPort, + dbInst.port + 5, + (dbInst.port + 4), pj.haIps[i], + pj.haPort, pj.port + 5, + (pj.port + 4)) + if pj.instanceType == DefaultValue.CASCADE_STANDBY: + chanalInfo += " iscascade=true" + connInfo1.append(chanalInfo) + + return connInfo1, nodename + + +class TempfileManagement(): + """ + create and remove temp file or directory + """ + + def __init__(self): + """ + function: init function + input: NA + output: NA + """ + pass + + @staticmethod + def getTempDir(dirName): + """ + function: create temp directory in PGHOST + input: dirName + output: + pathName + """ + tmpPath = DefaultValue.getTmpDirFromEnv() + pathName = os.path.join(tmpPath, dirName) + return pathName + + @staticmethod + def removeTempFile(filename, Fuzzy=False): + """ + function: remove temp files in PGHOST + input: + fileName string Specified file name or keywords + Fuzzy bool Whether to remove files with the same prefix, + default is False + output: NA + """ + + if Fuzzy: + keywords = filename + "*" + g_file.removeFile(keywords, "shell") + else: + g_file.removeFile(filename) diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py new file mode 100644 index 0000000..97a7a5b --- /dev/null +++ b/script/gspylib/common/DbClusterInfo.py @@ -0,0 +1,6340 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : DbClusterInfo.py is a utility to get cluster information +############################################################################# +import binascii +import os +import subprocess +import struct +import time +import types +import sys +import re +import pwd +import xml.dom.minidom +import xml.etree.cElementTree as ETree +import json +import socket +import copy + +sys.path.append(os.path.split(os.path.realpath(__file__))[0] + "/../../") +from gspylib.os.gsfile import g_file +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.VersionInfo import VersionInfo + +########################### +# instance role +########################### +# init value +INSTANCE_ROLE_UNDEFINED = -1 +# cm_server +INSTANCE_ROLE_CMSERVER = 0 +# gtm +INSTANCE_ROLE_GTM = 1 +# etcd +INSTANCE_ROLE_ETCD = 2 +# cn +INSTANCE_ROLE_COODINATOR = 3 +# dn +INSTANCE_ROLE_DATANODE = 4 +# cm_agent +INSTANCE_ROLE_CMAGENT = 5 + +########################### +# ID num +########################### +BASE_ID_CMSERVER = 1 +BASE_ID_GTM = 1001 +BASE_ID_CMAGENT = 10001 +BASE_ID_DUMMYDATANODE = 3001 +BASE_ID_COORDINATOR = 5001 +BASE_ID_DATANODE = 6001 +BASE_ID_ETCD = 7001 +DIRECTORY_PERMISSION = 0o750 +KEY_FILE_PERMISSION = 0o600 + +# For primary/standby instance When the ID > 7000 , +# the new id is start from 40001 +OLD_LAST_PRIMARYSTANDBY_BASEID_NUM = 7000 +NEW_FIRST_PRIMARYSTANDBY_BASEID_NUM = 40000 +# For salve instance When the ID > 5000 , the new id is start from 20001 +OLD_LAST_DUMMYNODE_BASEID_NUM = 5000 +NEW_FIRST_DUMMYNODE_BASEID_NUM = 20000 + +# master instance default port +MASTER_BASEPORT_CMS = 5000 +MASTER_BASEPORT_GTM = 6000 +# cm agent has no port, just occupancy index 5 +MASTER_BASEPORT_CMAGENT = 0 +MASTER_BASEPORT_COO = 8000 +MASTER_BASEPORT_DATA = 40000 +MASTER_BASEPORT_ETCD = 2379 +# standby instance default port +STANDBY_BASEPORT_CMS = 5500 +STANDBY_BASEPORT_GTM = 6500 +# cm agent has no port, just occupancy index 5 +STANDBY_BASEPORT_CMAGENT = 0 +STANDBY_BASEPORT_COO = 8500 +STANDBY_BASEPORT_DATA = 45000 +STANDBY_BASEPORT_ETCD = 2380 +# dummy standby instance default port +DUMMY_STANDBY_BASEPORT_DATA = 50000 + +########################### +# instance type. only for CN/DN +########################### +INSTANCE_TYPE_UNDEFINED = -1 +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 +# dummy standby +DUMMY_STANDBY_INSTANCE = 2 +#cascade standby +CASCADE_STANDBY = 3 + +########################### +# instance number +########################### +# cm:cm_server, cm_agent +MIRROR_COUNT_CMS = 2 +# gtm:gtm_server, gtm_agent +MIRROR_COUNT_GTM = 2 +# ssd:ssd_server, ssd_agent +MIRROR_COUNT_SSD = 2 +# minimum number of nodes +MIRROR_COUNT_DATA = 3 +# etcd number >=3 and <= 7 +MIRROR_COUNT_ETCD_MIN = 3 +MIRROR_COUNT_ETCD_MAX = 7 +# max number of CN instance +MIRROR_COUNT_CN_MAX = 16 +# max number of node +MIRROR_COUNT_NODE_MAX = 1024 +# max number of DB instance(primary instance) +MIRROR_COUNT_DN_MAX = 4096 +# min number of replication for CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY +MIRROR_COUNT_REPLICATION_MIN = 2 +# max number of replicationfor CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY +MIRROR_COUNT_REPLICATION_MAX = 8 +# max number of azPriority for CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY +AZPRIORITY_MAX = 10 +# min number of azPriority for CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY +AZPRIORITY_MIN = 1 +# DB port set step size for CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY +PORT_STEP_SIZE = 20 + +MIRROR_ID_COO = -1 +MIRROR_ID_AGENT = -3 +MIRROR_ID_ETCD = -5 + +# cluster type +CLUSTER_TYPE_SINGLE = "single" +CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY = "single-primary-multi-standby" +CLUSTER_TYPE_SINGLE_INST = "single-inst" + +# env parameter +ENV_CLUSTERCONFIG = "CLUSTERCONFIGFILE" + +# default config version, it is used by gs_upgrade +BIN_CONFIG_VERSION = 2 +BIN_CONFIG_VERSION_SINGLE = 101 +BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY = 201 +BIN_CONFIG_VERSION_SINGLE_INST = 301 + +# page size +PAGE_SIZE = 8192 +MAX_IP_NUM = 3 +CONFIG_IP_NUM = 1 + +NODE_ID_LEN = 2 +INSTANCE_ID_LEN = 8 +SPACE_LEN = 1 +STATE_LEN = 17 +SEPERATOR_LEN = 1 +IP_LEN = 16 + +# GPHOME +CLUSTER_TOOL_PATH = "/opt/huawei/wisequery" + +# key words for json configure file +# Globalinfo +JSON_GLOBALINFO = "Globalinfo" +JSON_TOOL_PATH = "gaussdbToolPath" +JSON_CLUSTER_NAME = "ClusterName" +JSON_LOGPATH = "gaussdbLogPath" +JSON_TMPPATH = "gaussdbTmpPath" +JSON_MANAGER_PATH = "gaussdbManagerPath" +JSON_APPPATH = "gaussdbAppPath" +QUORUMMODE = "quorumMode" +REPLICATIONCOUNT = "replicationCount" + +# keywords for layouts in json file +JSON_LAYOUTS = "Layouts" +JSON_AZNAME = "AZName" +JSON_HOSTS = "Hosts" +JSON_IP = "IP" +JSON_CHANNEL_PORT = "channelPort" +JSON_INSTANCES = "Instances" +JSON_ID = "Id" +JSON_SCRIPTS = "Scripts" +JSON_CHECK = "check" +JSON_FAILOVER = "failover" +JSON_RESTART = "restart" +JSON_START = "start" +JSON_STOP = "stop" +JSON_SWITCHOVER = "switchover" +JSON_BUILD = "build" +JSON_KILL = "kill" +JSON_GETPASSWD = "getpasswd" +JSON_CHECK_PGXC = "check_pgxc" +JSON_CHECK_PGXC_GROUP = "check_pgxc_group" +JSON_CREATE_PGXC_NODE = "create_pgxc_node" +JSON_CREATE_PGXC_GROUP = "create_pgxc_group" +JSON_CHECK_PGXC_GROUP_EXPAND = "check_pgxc_group_expand" +JSON_UPDATE_PGXC_GROUP = "update_pgxc_group" +CHANGE_PGXC_NODE = "change_pgxc_node" +DELETE_PGXC_NODE = "delete_pgxc_node" +JSON_EXEC_WITH_TRANSACTION = "execute_with_transaction" +JSON_CHECK_SYNCHRONOUS_STANDY = "check_synchronous_standby" +JSON_CHANGE_SYNCHRONOUS_STANDBY = "change_synchronous_standby" +JSON_TYPE_NAME = "TypeName" +JSON_ATTRIBUTES = "Attributes" +JSON_DATA_DIR = "DataDir" +JSON_GROUP = "Group" +JSON_PORT = "Port" +JSON_REPLPORT = "ReplPort" +JSON_PEER_PORT = "PeerPort" +JSON_CLIENT_PORT = "ClientPort" +JSON_ETCD_DATA_DIR = "EtcdDataDir" +JSON_ETCD_CLUSTER_NAME = "ClusterName" +JSON_SCTP_PORT = "SctpPort" +JSON_CONTROL_PORT = "ControlPort" +# keywords for groups in json file +JSON_GROUPS = "Groups" +JSON_GROUP_TYPE = "GroupType" +JSON_GROUP_ID = "GroupId" +JSON_PARENT_NODE = "ParentNode" +JSON_ROLE = "Role" + +# keywords for StaticConfig in json file +JSON_STATIC_CONFIG = "StaticConfig" +JSON_NUM_PRIMARYAZ = "NumPrimaryAZ" +JSON_PRIMARY_AZ = "PrimaryAZ" +JSON_SYNC_AZ = "SyncAZ" +JSON_THIRDPART_AZ = "ThirdPartAZ" + +g_dom = None + +# The default network type is single plane +g_networkType = 0 + +# Oltp's inst type +# etcd +ETCD = 'etcd' +# cm +CLUSTER_MANAGER = 'cluster_manager' +DN_ZENITH_ZPAXOS = "DN_ZENITH_ZPAXOS" +DN_ZENITH_ZPAXOS_V2 = "DN_ZENITH_ZPAXOS_V2" +DN_ZENITH_HA = "DN_ZENITH_HA" +COORDINATOR = "coordinator" +CN_ZENITH_ZSHARDING = "CN_ZENITH_ZSHARDING" +GTS_ZENITH = "GTS_ZENITH" +OLTP_DN_TYPES = [DN_ZENITH_ZPAXOS, DN_ZENITH_ZPAXOS_V2, DN_ZENITH_HA] +OLTP_CN_TYPES = [CN_ZENITH_ZSHARDING] +# TP AZ names +azName1 = "AZ1" +azName2 = "AZ2" +azName3 = "AZ3" +AZNMAE_LIST = [azName1, azName2, azName3] +DN_ROLE_MAP = {"Primary": "P", "Standby": "S", "Normal": "P", "Secondary": "R"} + + +def InstanceIgnore_haPort(Object): + """ + funciton : Analyze the current instance role:CN or CMAGENT. + input : Object + output : boolean + """ + # we only support CN/cm_agent + if ( + Object.instanceRole == INSTANCE_ROLE_COODINATOR or + Object.instanceRole == INSTANCE_ROLE_CMAGENT): + return True + else: + return False + + +def InstanceIgnore_isMaster(Object): + """ + funciton : Analyze the current instance role:GTM or DN. + input : Object + output : boolean + """ + # we only support DN/gtm + if ( + Object.instanceRole != INSTANCE_ROLE_GTM and Object.instanceRole + != INSTANCE_ROLE_DATANODE): + return True + else: + return False + + +def ignoreCheck(Object, member, model): + """ + funciton : Ignore checking the instance information of table. + input : Object, Object, model + output : boolean + """ + INSTANCEINFO_IGNORE_TABLE = {} + if (model == "replace"): + # init instance ignore table for replace + INSTANCEINFO_IGNORE_TABLE = {"listenIps": None, + "haIps": None, + "hostname": None, + "mirrorId": None + } + elif (model == "changeIP"): + # init instance ignore table for changeip + INSTANCEINFO_IGNORE_TABLE = {"listenIps": None, + "haIps": None, + "hostname": None, + "port": None, + "haPort": None, + "mirrorId": None + } + elif (model == "upgradectl"): + # init instance ignore table for upgradectl + INSTANCEINFO_IGNORE_TABLE = { + "instanceRole": None, + "instanceId": None, + "mirrorId": None + } + elif (model == "manageCN"): + # init instance ignore table for manageCN + INSTANCEINFO_IGNORE_TABLE = { + "instanceId": None, + "mirrorId": None + } + elif (model == "expand"): + # init instance ignore table for expand + INSTANCEINFO_IGNORE_TABLE = { + "mirrorId": None + } + elif (model == "compareCluster"): + INSTANCEINFO_IGNORE_TABLE = { + "listenIps": None, + "haIps": None, + "hostname": None, + "port": None, + "haPort": None, + "mirrorId": None + } + if (hasattr(Object, + "instanceRole") and Object.instanceRole == + INSTANCE_ROLE_COODINATOR): + INSTANCEINFO_IGNORE_TABLE["instanceId"] = None + # init node ignore table + DBNODEINFO_IGNORE_TABLE = { + "backIps": None, + "sshIps": None, + "masterBasePorts": None, + "standbyBasePorts": None, + "dummyStandbyBasePort": None, + "cmsNum": None, + "cooNum": None, + "dataNum": None, + "gtmNum": None, + "name": None, + "virtualIp": None + } + # init cluster ignore table + DBCLUSTERINFO_IGNORE_TABLE = { + "xmlFile": None, + "newNodes": None, + "clusterRings": None + } + + if (model == "upgradectl"): + DBNODEINFO_IGNORE_TABLE.pop("backIps") + DBNODEINFO_IGNORE_TABLE.pop("sshIps") + DBNODEINFO_IGNORE_TABLE.pop("name") + DBCLUSTERINFO_IGNORE_TABLE.pop("clusterRings") + elif (model == "manageCN"): + DBNODEINFO_IGNORE_TABLE.pop("backIps") + DBNODEINFO_IGNORE_TABLE.pop("sshIps") + DBNODEINFO_IGNORE_TABLE.pop("name") + DBNODEINFO_IGNORE_TABLE["id"] = None + if (isinstance(Object, instanceInfo)): + if (member not in list(INSTANCEINFO_IGNORE_TABLE.keys())): + return False + elif (INSTANCEINFO_IGNORE_TABLE[member] is None or not callable( + INSTANCEINFO_IGNORE_TABLE[member])): + return True + else: + return INSTANCEINFO_IGNORE_TABLE[member](Object) + elif (isinstance(Object, dbNodeInfo)): + if (member not in list(DBNODEINFO_IGNORE_TABLE.keys())): + return False + elif (DBNODEINFO_IGNORE_TABLE[member] is None or not callable( + DBNODEINFO_IGNORE_TABLE[member])): + return True + else: + return INSTANCEINFO_IGNORE_TABLE[member](Object) + elif (isinstance(Object, dbClusterInfo)): + if (member not in list(DBCLUSTERINFO_IGNORE_TABLE.keys())): + return False + elif (DBCLUSTERINFO_IGNORE_TABLE[member] is None or not callable( + DBCLUSTERINFO_IGNORE_TABLE[member])): + return True + else: + return DBCLUSTERINFO_IGNORE_TABLE[member](Object) + else: + return False + + +def checkPathVaild(obtainpath): + """ + function: check path vaild + input : envValue + output: NA + """ + PATH_CHECK_LIST = [" ", "|", ";", "&", "$", "<", ">", "`", "\\", "'", "\"", + "{", "}", "(", ")", "[", "]", "~", "*", "?", "!", "\n"] + if (obtainpath.strip() == ""): + return + for rac in PATH_CHECK_LIST: + flag = obtainpath.find(rac) + if flag >= 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % obtainpath + \ + " There are illegal characters in the path.") + + +def obtainInstStr(objectList): + ''' + function : Obtain information of instance. + input : [] + output : String + ''' + info = "" + if (isinstance(objectList, list)): + for obj in objectList: + info += "%s\n" % str(obj) + return info + + +def compareObject(Object_A, Object_B, instName, tempbuffer=None, model=None, + manageCNinfo=None): + ''' + function : Compare object_A and Object_B. + input : Object, Object, instName, tempbuffer, model, manageCNinfo + output : boolean, tempbuffer + ''' + if tempbuffer is None: + tempbuffer = [] + if isinstance(Object_A, bytes) or isinstance(Object_A, str): + if (Object_A != Object_B): + tempbuffer.append(instName) + tempbuffer.append(Object_A) + tempbuffer.append(Object_B) + return False, tempbuffer + ### not the same type + elif (type(Object_A) != type(Object_B)): + tempbuffer.append(instName) + tempbuffer.append(str(Object_A)) + tempbuffer.append(str(Object_B)) + return False, tempbuffer + ### string, int, long, float, bool type + elif (isinstance(Object_A, bytes)): + if (Object_A != Object_B): + tempbuffer.append(instName) + tempbuffer.append(Object_A) + tempbuffer.append(Object_B) + return False, tempbuffer + elif (isinstance(Object_A, type(None))): + if (Object_A != Object_B): + tempbuffer.append(instName) + tempbuffer.append(Object_A) + tempbuffer.append(Object_B) + return False, tempbuffer + elif (isinstance(Object_A, int) or isinstance(Object_A, int) + or isinstance(Object_A, float) or isinstance(Object_A, bool)): + if (Object_A != Object_B): + tempbuffer.append(instName) + tempbuffer.append(Object_A) + tempbuffer.append(Object_B) + return False, tempbuffer + ### list type + elif (isinstance(Object_A, list)): + if (model == "manageCN"): + if (len(Object_A) != len(Object_B)): + theSame, tempbuffer = checkObject(Object_A, Object_B, instName, + tempbuffer, manageCNinfo) + if (not theSame): + return False, tempbuffer + if (len(Object_A) != 0 and len(Object_B) != 0): + Object_A1 = [] + Object_B1 = [] + for Obj_A in Object_A: + for Obj_B in Object_B: + if (Obj_A.name == Obj_B.name): + Object_A1.append(Obj_A) + Object_B1.append(Obj_B) + continue + for idx in range(len(Object_A1)): + result, tempbuffer = compareObject(Object_A1[idx], + Object_B1[idx], + "%s[%d]" % ( + instName, idx), + tempbuffer, + model, + manageCNinfo) + if (not result): + return False, tempbuffer + else: + for idx in range(len(Object_A)): + result, tempbuffer = compareObject(Object_A[idx], + Object_B[idx], + "%s[%d]" % ( + instName, idx), + tempbuffer, + model, + manageCNinfo) + if (not result): + return False, tempbuffer + else: + if (len(Object_A) != len(Object_B)): + instmap = {obtainInstStr(Object_A): obtainInstStr(Object_B)} + tempbuffer.append(instName) + tempbuffer.append(obtainInstStr(Object_A)) + tempbuffer.append(obtainInstStr(Object_B)) + return False, tempbuffer + + for idx in range(len(Object_A)): + result, tempbuffer = compareObject(Object_A[idx], + Object_B[idx], + "%s[%d]" % (instName, idx), + tempbuffer, + model, + manageCNinfo) + if (not result): + return False, tempbuffer + ### function type + elif isinstance(Object_A, types.FunctionType) or \ + isinstance(Object_A, types.MethodType): + return True, tempbuffer + elif isinstance(Object_A, type(dbClusterInfo())) or \ + isinstance(Object_A, type(dbNodeInfo())) or \ + isinstance(Object_A, type(instanceInfo())): + Object_A_list = dir(Object_A) + Object_B_list = dir(Object_B) + if (len(Object_A_list) != len(Object_B_list)): + tempbuffer.append(instName) + tempbuffer.append(str(Object_A)) + tempbuffer.append(str(Object_B)) + return False, tempbuffer + for i in Object_A_list: + if (i.startswith("_") or ignoreCheck(Object_A, i, model)): + continue + Inst_A = getattr(Object_A, i) + try: + Inst_B = getattr(Object_B, i) + except Exception as e: + tempbuffer.append(instName) + tempbuffer.append(str(Object_A)) + tempbuffer.append(str(Object_B)) + return False, tempbuffer + result, tempbuffer = compareObject(Inst_A, Inst_B, i, tempbuffer, + model, manageCNinfo) + if (not result): + return False, tempbuffer + else: + tempbuffer.append(instName) + tempbuffer.append(str(Object_A)) + tempbuffer.append(str(Object_B)) + return False, tempbuffer + return True, tempbuffer + + +def checkObject(Object_A, Object_B, instName, checkbuffer, manageCNinfo): + """ + """ + Join = [] + if (len(Object_A)): + Join.extend(Object_A) + if (len(Object_B)): + Join.extend(Object_B) + + # CN instance + if (isinstance(Join[0], instanceInfo)): + + # check instance role + if (Join[0].instanceRole != 3): + raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) + # xml must match action + if (len(Object_A) == 1 and len(Object_B) == 0): + if (manageCNinfo.mode != "delete"): + raise Exception( + ErrorCode.GAUSS_528["GAUSS_52808"] % ("deletion", "add")) + elif (len(Object_A) == 0 and len(Object_B) == 1): + if (manageCNinfo.mode != "add"): + raise Exception(ErrorCode.GAUSS_528["GAUSS_52808"] % ( + "increased", "delete")) + else: + raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) + + # at most add or delete one CN + if (len(manageCNinfo.nodeInfo) != 0 or len(manageCNinfo.cooInfo) != 0): + raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) + + manageCNinfo.cooInfo.extend(Join) + # GaussDB nodes + elif (isinstance(Join[0], dbNodeInfo)): + # get added or deleted node + oa_names = [Obj_A.name for Obj_A in Object_A] + ob_names = [Obj_B.name for Obj_B in Object_B] + Object_AA = [Obj_A for Obj_A in Object_A if Obj_A.name not in ob_names] + Object_BB = [Obj_B for Obj_B in Object_B if Obj_B.name not in oa_names] + + # xml must match action + if (len(Object_AA) == 1 and len(Object_BB) == 0): + if (manageCNinfo.mode != "delete"): + raise Exception( + ErrorCode.GAUSS_528["GAUSS_52808"] % ("deletion", "add")) + elif (len(Object_AA) == 0 and len(Object_BB) == 1): + if (manageCNinfo.mode != "add"): + raise Exception(ErrorCode.GAUSS_528["GAUSS_52808"] % ( + "increased", "delete")) + else: + raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) + + # at most add or delete one node + if (len(manageCNinfo.nodeInfo) != 0 or len(manageCNinfo.cooInfo) != 0): + raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) + + if (len(Object_AA)): + manageCNinfo.nodeInfo.extend(Object_AA) + if (len(Object_BB)): + manageCNinfo.nodeInfo.extend(Object_BB) + else: + raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) + + return True, checkbuffer + + +#################################################################### +##read cluster functions +#################################################################### + +xmlRootNode = None + + +def checkXMLFile(xmlFile): + """ + function : check XML contain DTDs + input : String + output : NA + """ + # Check xml for security requirements + # if it have " 0 or name.find( + "dataNode") == 0) and returnValue != ""): + returnValue = os.path.normpath(returnValue) + return (returnStatus, returnValue) + + +#################################################################### + + +class queryCmd(): + def __init__(self, outputFile="", dataPathQuery=False, portQuery=False, + azNameQuery=False): + self.outputFile = outputFile + self.dataPathQuery = dataPathQuery + self.portQuery = portQuery + self.azNameQuery = azNameQuery + self.clusterStateQuery = False + + +class peerInstanceInfo(): + """ + Peer instance information + """ + + def __init__(self): + self.peerDataPath = "" + self.peerHAIPs = [] + self.peerHAPort = 0 + self.peerRole = 0 + self.peer2DataPath = "" + self.peer2HAIPs = [] + self.peer2HAPort = 0 + self.peer2Role = 0 + + def __str__(self): + """ + Construct a printable string representation of a instanceInfo + """ + ret = "peerDataPath=%s,peerHAPort=%d,peerRole=%d" % ( + self.peerDataPath, self.peerHAPort, self.peerRole) + if self.peer2DataPath: + ret += ",peer2DataPath=%s" % self.peer2DataPath + if self.peer2HAPort: + ret += ",peer2HAPort=%d" % self.peer2HAPort + if self.peer2Role: + ret += ",peer2Role=%d" % self.peer2Role + return ret + +class instanceInfo(): + """ + Instance information + """ + + def __init__(self, instId=0, mirrorId=0): + """ + Constructor + """ + # instance id + self.instanceId = instId + self.mirrorId = mirrorId + # host name + self.hostname = "" + # listen ip + self.listenIps = [] + # ha ip + self.haIps = [] + # port + self.port = 0 + # It's pool port for coordinator, and ha port for other instance + self.haPort = 0 + # data directory + self.datadir = "" + # xlog directory + self.xlogdir = "" + # ssd data directory + self.ssdDir = "" + # instance type + self.instanceType = INSTANCE_TYPE_UNDEFINED + # instance role + self.instanceRole = INSTANCE_ROLE_UNDEFINED + # instance rack info + self.rack = "" + # oltp zpaxos sub instance type + self.subInstanceType = INSTANCE_ROLE_UNDEFINED + + self.level = 1 + # we use port and haPort to save peerPort/clientPort for etcd + # datanode: use haPort to save replport + # repl port + self.replport = 0 + # sctp port + self.sctpPort = 0 + # control port + self.controlPort = 0 + # az name + self.azName = "" + self.clusterName = "" + # peer port etcd + self.peerPort = 0 + # client port etcd + self.clientPort = 0 + # instance name + self.name = "" + # DB state Normal or other, use to save dynamic info + self.state = "" + # get staticConnections from database,use to save dynamic info + self.staticConnections = "" + # DB role such as Primary, Standby + self.localRole = "" + self.peerInstanceInfos = [] + self.syncNum = -1 + self.cascadeRole = "off" + + def __cmp__(self, target): + """ + Type compare + """ + if (type(self) != type(target)): + return 1 + if (not isinstance(target, instanceInfo)): + return 1 + if (not hasattr(target, "instanceId")): + return 1 + else: + return self.instanceId - target.instanceId + + def __str__(self): + """ + Construct a printable string representation of a instanceInfo + """ + ret = "InstanceId=%s,MirrorId=%s,Host=%s,Port=%s,DataDir=%s," \ + "XlogDir=%s,SsdDir=%s,InstanceType=%s,Role=%s,ListenIps=%s," \ + "HaIps=%s" % ( + self.instanceId, self.mirrorId, self.hostname, self.port, + self.datadir, self.xlogdir, self.ssdDir, self.instanceType, + self.instanceRole, self.listenIps, self.haIps) + if self.rack: + ret += ",rack=%s" % self.rack + if self.replport: + ret += ",replport=%s" % self.replport + if self.sctpPort: + ret += ",sctpPort=%s" % self.sctpPort + if self.controlPort: + ret += ",controlPort=%s" % self.controlPort + if self.azName: + ret += ",azName=%s" % self.azName + if self.clusterName: + ret += ",clusterName=%s" % self.clusterName + if self.peerPort: + ret += ",peerPort=%s" % self.peerPort + if self.clientPort: + ret += ",clientPort=%s" % self.clientPort + if self.name: + ret += ",name=%s" % self.name + return ret + + +class dbNodeInfo(): + """ + Instance info on a node + """ + + def __init__(self, nodeId=0, name=""): + """ + Constructor + """ + # node id + self.id = nodeId + # node name + self.name = name + self.backIps = [] + self.virtualIp = [] + self.sshIps = [] + # instance number + self.cmsNum = 0 + self.cooNum = 0 + self.dataNum = 0 + self.gtmNum = 0 + self.etcdNum = 0 + # cm_servers instance + self.cmservers = [] + # cn instance + self.coordinators = [] + # DB instance + self.datanodes = [] + # gtm instance + self.gtms = [] + # cm_agent instance + self.cmagents = [] + # etcd instance + self.etcds = [] + # cm_server/cm_agent data directory + self.cmDataDir = "" + self.dummyStandbyBasePort = 0 + self.masterBasePorts = [MASTER_BASEPORT_CMS, MASTER_BASEPORT_GTM, + MASTER_BASEPORT_COO, + MASTER_BASEPORT_DATA, MASTER_BASEPORT_ETCD, + MASTER_BASEPORT_CMAGENT] + self.standbyBasePorts = [STANDBY_BASEPORT_CMS, STANDBY_BASEPORT_GTM, + STANDBY_BASEPORT_COO, + STANDBY_BASEPORT_DATA, STANDBY_BASEPORT_ETCD, + STANDBY_BASEPORT_CMAGENT] + # azName + self.azName = "" + self.azPriority = 1 + self.standbyDnNum = 0 + self.dummyStandbyDnNum = 0 + self.cascadeRole = "off" + + def __cmp__(self, target): + """ + Type compare + """ + if (type(self) != type(target)): + return 1 + if (not isinstance(target, dbNodeInfo)): + return 1 + if (not hasattr(target, "id")): + return 1 + else: + return self.id - target.id + + def __str__(self): + """ + function : Construct a printable string representation of a dbNodeInfo + input : NA + output : String + """ + retStr = "HostName=%s,backIps=%s" % (self.name, self.backIps) + # cm_server instance information + for cmsInst in self.cmservers: + retStr += "\n%s" % str(cmsInst) + # cm_agent instance information + for cmaInst in self.cmagents: + retStr += "\n%s" % str(cmaInst) + # gtm instance information + for gtmInst in self.gtms: + retStr += "\n%s" % str(gtmInst) + # cn instance information + for cooInst in self.coordinators: + retStr += "\n%s" % str(cooInst) + # DB instance information + for dataInst in self.datanodes: + retStr += "\n%s" % str(dataInst) + # etcd instance information + for dataInst in self.etcds: + retStr += "\n%s" % str(dataInst) + + return retStr + + def setDnDetailNum(self): + self.dataNum = self.getDnNum(MASTER_INSTANCE) + self.standbyDnNum = self.getDnNum(STANDBY_INSTANCE) + self.dummyStandbyDnNum = self.getDnNum(DUMMY_STANDBY_INSTANCE) + + def getDnNum(self, dntype): + """ + function: get DB num + input: dntype + output:dn num + """ + count = 0 + for dnInst in self.datanodes: + if (dnInst.instanceType == dntype): + count += 1 + return count + + def appendInstance(self, instId, mirrorId, instRole, instanceType, + listenIps=None, + haIps=None, datadir="", ssddir="", level=1, + clusterType=CLUSTER_TYPE_SINGLE_INST, xlogdir="", + syncNum=-1): + """ + function : Classify the instance of cmserver/gtm + input : int,int,String,String + output : NA + """ + if not self.__checkDataDir(datadir, instRole): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51638"] % \ + self.name + " Data directory[%s] is " + "conflicting." % datadir) + + dbInst = instanceInfo(instId, mirrorId) + + dbInst.hostname = self.name + dbInst.datadir = os.path.realpath(datadir) + + if (instRole == INSTANCE_ROLE_DATANODE): + dbInst.xlogdir = xlogdir + else: + dbInst.xlogdir = "" + dbInst.instanceType = instanceType + dbInst.instanceRole = instRole + if (listenIps is not None): + if (len(listenIps) == 0): + dbInst.listenIps = self.backIps[:] + else: + dbInst.listenIps = listenIps[:] + + if (haIps is not None): + if (len(haIps) == 0): + dbInst.haIps = self.backIps[:] + else: + dbInst.haIps = haIps[:] + # cm_server + if (instRole == INSTANCE_ROLE_CMSERVER): + dbInst.datadir = os.path.join(self.cmDataDir, "cm_server") + dbInst.port = self.__assignNewInstancePort(self.cmservers, + instRole, instanceType) + dbInst.level = level + dbInst.haPort = dbInst.port + 1 + self.cmservers.append(dbInst) + # gtm + elif (instRole == INSTANCE_ROLE_GTM): + dbInst.port = self.__assignNewInstancePort(self.gtms, instRole, + instanceType) + dbInst.haPort = dbInst.port + 1 + self.gtms.append(dbInst) + # cn + elif (instRole == INSTANCE_ROLE_COODINATOR): + dbInst.port = self.__assignNewInstancePort(self.coordinators, + instRole, instanceType) + dbInst.haPort = dbInst.port + 1 + dbInst.ssdDir = ssddir + self.coordinators.append(dbInst) + # dn + elif (instRole == INSTANCE_ROLE_DATANODE): + dbInst.port = self.__assignNewInstancePort(self.datanodes, + instRole, instanceType) + dbInst.haPort = dbInst.port + 1 + dbInst.ssdDir = ssddir + dbInst.syncNum = syncNum + self.datanodes.append(dbInst) + # cm_agent + elif (instRole == INSTANCE_ROLE_CMAGENT): + dbInst.datadir = os.path.join(self.cmDataDir, "cm_agent") + self.cmagents.append(dbInst) + # etcd + elif (instRole == INSTANCE_ROLE_ETCD): + dbInst.port = self.__assignNewInstancePort(self.etcds, instRole, + instanceType) + dbInst.haPort = self.__assignNewInstancePort(self.etcds, instRole, + STANDBY_INSTANCE) + self.etcds.append(dbInst) + + def __checkDataDir(self, datadir, instRole): + """ + function : Check whether the instance path is the same as with the + parameter of datadir + input : String,String + output : boolean + """ + if (datadir == ""): + return ( + instRole == INSTANCE_ROLE_CMSERVER or instRole == + INSTANCE_ROLE_CMAGENT) + checkPathVaild(datadir) + # cm_server + for cmsInst in self.cmservers: + if (cmsInst.datadir == datadir): + return False + # cn + for cooInst in self.coordinators: + if (cooInst.datadir == datadir): + return False + # dn + for dataInst in self.datanodes: + if (dataInst.datadir == datadir): + return False + # gtm + for gtmInst in self.gtms: + if (gtmInst.datadir == datadir): + return False + # etcd + for etcd in self.etcds: + if (etcd.datadir == datadir): + return False + # cm_agent + for cmaInst in self.cmagents: + if (cmaInst.datadir == datadir): + return False + + return True + + def assignNewInstancePort(self, instList, instRole, instanceType): + return self.__assignNewInstancePort(instList, instRole, instanceType) + + def __assignNewInstancePort(self, instList, instRole, instanceType): + """ + function : Assign a new port for the instance + input : [],String ,String + output : int + """ + port = 0 + # master instance + if instanceType == MASTER_INSTANCE: + port = self.masterBasePorts[instRole] + # standby instance + elif instanceType == STANDBY_INSTANCE: + port = self.standbyBasePorts[instRole] + # DB dummy standby instance + elif instanceType == DUMMY_STANDBY_INSTANCE: + port = self.dummyStandbyBasePort + # cn and cm_agent instance + elif instanceType == INSTANCE_TYPE_UNDEFINED: + port = self.masterBasePorts[instRole] + return port + for inst in instList: + if (inst.instanceType == instanceType): + port += 2 + + return port + + +class dbClusterInfo(): + """ + Cluster info + """ + + def __init__(self, checkSctpPort=False): + """ + Constructor + """ + self.name = "" + self.appPath = "" + self.logPath = "" + self.xmlFile = "" + self.dbNodes = [] + self.newNodes = [] + self.cmsFloatIp = "" + self.__newInstanceId = [BASE_ID_CMSERVER, BASE_ID_GTM, BASE_ID_ETCD, + BASE_ID_COORDINATOR, BASE_ID_DATANODE, + BASE_ID_CMAGENT] + self.__newDummyStandbyId = BASE_ID_DUMMYDATANODE + self.__newMirrorId = 0 + self.clusterRings = [] + self.clusterType = CLUSTER_TYPE_SINGLE_INST + self.checkSctpPort = checkSctpPort + self.clusterName = "" + self.toolPath = "" + self.agentPath = "" + self.agentLogPath = "" + self.tmpPath = "" + self.managerPath = "" + self.replicaNum = 0 + self.corePath = "" + + # add azName + self.azName = "" + self.cascadeRole = "off" + + self.version = 0 + self.installTime = 0 + self.localNodeId = 0 + self.nodeCount = 0 + # cluster properties + self.replicationCount = 0 + self.quorumMode = "" + self.gtmcount = 0 + self.etcdcount = 0 + self.cmscount = 0 + self.__newGroupId = 0 + self.cncount = 0 + self.masterDnCount = 0 + self.standbyDnCount = 0 + self.dummyStandbyDnCount = 0 + + def __str__(self): + """ + function : Construct a printable string representation of a + dbClusterInfo + input : NA + output : String + """ + retStr = "ClusterName=%s,AppPath=%s,LogPath=%s,ClusterType=%s" % \ + (self.name, self.appPath, self.logPath, self.clusterType) + + for dbNode in self.dbNodes: + retStr += "\n%s" % str(dbNode) + + return retStr + + @staticmethod + def setDefaultXmlFile(xmlFile): + """ + function : Set the default xml file + input : String + output : NA + """ + if not os.path.exists(xmlFile): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % "XML configuration") + + os.putenv(ENV_CLUSTERCONFIG, xmlFile) + + @staticmethod + def readClusterHosts(xmlFile=""): + """ + function : Read cluster node name from xml file + input : String + output : String + """ + if (xmlFile != ""): + dbClusterInfo.setDefaultXmlFile(xmlFile) + + # read cluster node name from xml file + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(xmlFile), "nodeNames", "cluster") + if (retStatus != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % "node names" + " Error: \n%s" % retValue) + nodeNames = [] + nodeNames_tmp = retValue.split(",") + for nodename in nodeNames_tmp: + nodeNames.append(nodename.strip()) + if (len(nodeNames) == 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + "XML file" + " There is no nodes in cluster " + "configuration file.") + + return nodeNames + + @staticmethod + def readClustercorePath(xmlFile): + """ + function : Read corefile path from default xml file + input : String + output : String + """ + dbClusterInfo.setDefaultXmlFile(xmlFile) + # read corefile path from xml file + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(xmlFile), "corePath", "cluster") + if retStatus != 0: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] + % "corePath" + " Error: \n%s" % retValue) + corepath = os.path.normpath(retValue) + checkPathVaild(corepath) + return corepath + + @staticmethod + def readClusterAppPath(xmlFile): + """ + function : Read the cluster's application path from xml file + input : String + output : String + """ + dbClusterInfo.setDefaultXmlFile(xmlFile) + # read the cluster's application path from xml file + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(xmlFile), "gaussdbAppPath", "cluster") + if retStatus != 0: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] + % "gaussdbAppPath" + " Error: \n%s" % retValue) + + appPath = os.path.normpath(retValue) + checkPathVaild(appPath) + return appPath + + @staticmethod + def readClusterTmpMppdbPath(user, xmlFile): + """ + function : Read temporary mppdb path from xml file + input : String,String + output : String + """ + dbClusterInfo.setDefaultXmlFile(xmlFile) + # read temporary mppdb path from xml file + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(xmlFile), "tmpMppdbPath", "cluster") + if retStatus != 0: + (retToolPathStatus, retToolPathValue) = readOneClusterConfigItem( + initParserXMLFile(xmlFile), "gaussdbToolPath", "cluster") + if retToolPathStatus != 0: + retToolPathValue = CLUSTER_TOOL_PATH + retValue = os.path.join(retToolPathValue, "%s_mppdb" % user) + + tmppath = os.path.normpath(retValue) + checkPathVaild(tmppath) + return tmppath + + @staticmethod + def readClusterLogPath(xmlFile): + """ + function : Read log path from xml file + input : String + output : NA + """ + dbClusterInfo.setDefaultXmlFile(xmlFile) + # read log path from xml file + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(xmlFile), "gaussdbLogPath", "cluster") + if retStatus == 0: + tmppath = os.path.normpath(retValue) + checkPathVaild(tmppath) + return tmppath + elif retStatus == 2: + return "/var/log/gaussdb" + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_51200"] + % "gaussdbLogPath" + " Error: \n%s" % retValue) + + def initFromStaticConfig(self, user, static_config_file="", + isLCCluster=False, ignoreLocalEnv=False): + """ + function : Init cluster from static configuration file + input : String,String + output : NA + """ + # check Os user + self.__checkOsUser(user) + # get static_config_file + if (static_config_file == ""): + staticConfigFile = self.__getStaticConfigFilePath(user) + else: + staticConfigFile = static_config_file + # read static_config_file + self.__readStaticConfigFile(staticConfigFile, user, isLCCluster, + ignoreLocalEnv=ignoreLocalEnv) + + def getClusterVersion(self, staticConfigFile): + """ + function : get cluster version information + from static configuration file + input : String + output : version + """ + try: + with open(staticConfigFile, "rb") as fp: + info = fp.read(28) + (crc, lenth, version, currenttime, nodeNum, + localNodeId) = struct.unpack("=IIIqiI", info) + except Exception as e: + raise Exception( + ErrorCode.GAUSS_512["GAUSS_51236"] + " Error: \n%s." % str(e)) + + return version + + def isMiniaturizedDeployment(self, cluster_version): + """ + function: judge whether is the miniaturized deployment + input : Int + output : bool value + """ + if (cluster_version >= 101 and cluster_version <= 200): + return True + return False + + def isSinglePrimaryMultiStandbyDeployment(self, cluster_version): + """ + judge whether is the single primary multi standby deployment + """ + if (cluster_version >= 201 and cluster_version <= 300): + return True + return False + + def queryNodeInfo(self, sshtool, localHostName, nodeId, fileName=""): + """ + get cluster node info, if nodeid is 0, we get all node info, + else ony get one node info + """ + i = 0 + (clusterState, syncInfo) = self.__getDnSenderStatus(sshtool, + localHostName) + outText = \ + "--------------------------------------------------------------" \ + "---------\n\n" + outText = outText + ("cluster_state : %s\n" % clusterState) + outText = outText + "redistributing : No\n\n" + outText = outText + \ + "-------------------------------------" \ + "----------------------------------\n\n" + for dbNode in self.dbNodes: + if dbNode.id == nodeId or nodeId == 0: + for dnInst in dbNode.datanodes: + outText = outText + ( + "node : %u\n" % dbNode.id) + outText = outText + ( + "node_name : %s\n" % dbNode.name) + outText = outText + ( + "instance_id : %u\n" % + dnInst.instanceId) + outText = outText + ("node_ip : %s\n" % + dnInst.listenIps[0]) + outText = outText + ( + "data_path : %s\n" % + dnInst.datadir) + outText = outText + "type : " \ + "Datanode\n" + outText = outText + ( + "instance_state : %s\n" % + dnInst.state) + outText = outText + ( + "az_name : %s\n" % + dnInst.azName) + if dnInst.localRole == "Primary": + outText = outText + ( + "static_connections : %s\n" % + dnInst.staticConnections) + outText = outText + ( + "HA_state : %s\n" % + clusterState) + outText = outText + ( + "instance_role : %s\n" % + dnInst.localRole) + if dnInst.localRole == "Primary": + outText = outText + "\n------------------------" \ + "---------------" \ + "--------------------------------\n\n" + continue + for i_loop in syncInfo: + if i_loop[0] == dnInst.listenIps[0]: + if i_loop[11] == '': + i_loop[11] = 'Unknown' + outText = outText + ( + "HA_state : %s\n" % + i_loop[1]) + outText = outText + ( + "sender_sent_location : %s\n" % + i_loop[2]) + outText = outText + ( + "sender_write_location : %s\n" % + i_loop[3]) + outText = outText + ( + "sender_flush_location : %s\n" % + i_loop[4]) + outText = outText + ( + "sender_replay_location : %s\n" % + i_loop[5]) + outText = outText + ( + "receiver_received_location: %s\n" % + i_loop[6]) + outText = outText + ( + "receiver_write_location : %s\n" % + i_loop[7]) + outText = outText + ( + "receiver_flush_location : %s\n" % + i_loop[8]) + outText = outText + ( + "receiver_replay_location : %s\n" % + i_loop[9]) + outText = outText + ( + "sync_percent : %s\n" % + i_loop[10]) + outText = outText + ( + "sync_state : %s\n" % + i_loop[11]) + if dnInst.localRole == "Cascade Standby": + outText = outText + ( + "upstream_nodeIp : %s\n" % + i_loop[12]) + break + outText = outText + "\n------------------------" \ + "---------------" \ + "--------------------------------\n\n" + if nodeId != 0: + break + else: + i += 1 + continue + if i >= len(self.dbNodes): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51612"] % nodeId) + self.__fprintContent(outText, fileName) + + def printStaticConfig(self, user, fileName="", isLCCluster=False): + """ + function : printStaticConfig + input : String + output : NA + """ + try: + # read static_config_file + outText = "NodeHeader:\n" + outText = outText + ("version:%u\n" % self.version) + outText = outText + ("time:%ld\n" % self.installTime) + outText = outText + ("nodeCount:%u\n" % self.nodeCount) + outText = outText + ("node:%u\n" % self.localNodeId) + dnTotalNum = self.__getDnInstanceNum() + for dbNode in self.dbNodes: + if self.clusterType == \ + CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY or \ + self.clusterType == CLUSTER_TYPE_SINGLE_INST: + outText = outText + ("azName:%s\n" % dbNode.azName) + outText = outText + ("azPriority:%u\n" % dbNode.azPriority) + outText = outText + ("node :%u\n" % dbNode.id) + outText = outText + ("nodeName:%s\n" % dbNode.name) + + outText = outText + "ssh channel :\n" + j = 0 + for sshIp in dbNode.sshIps: + outText = outText + ("sshChannel %u:%s\n" % ( + j + 1, dbNode.sshIps[j])) + j = j + 1 + outText = outText + ( + "datanodeCount :%u\n" % len(dbNode.datanodes)) + j = 0 + for dnInst in dbNode.datanodes: + j = j + 1 + outText = outText + ("datanode %u:\n" % j) + outText = outText + ( + "datanodeLocalDataPath :%s\n" % dnInst.datadir) + outText = outText + ( + "datanodeXlogPath :%s\n" % dnInst.xlogdir) + k = 0 + for listenIp in dnInst.listenIps: + k = k + 1 + outText = outText + ( + "datanodeListenIP %u:%s\n" % (k, listenIp)) + outText = outText + ("datanodePort :%u\n" % dnInst.port) + k = 0 + for haIp in dnInst.haIps: + k = k + 1 + outText = outText + ( + "datanodeLocalHAIP %u:%s\n" % (k, haIp)) + outText = outText + ( + "datanodeLocalHAPort :%u\n" % dnInst.haPort) + outText = outText + ( + "dn_replication_num: %u\n" % dnTotalNum) + k = 0 + if self.clusterType == \ + CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY or \ + self.clusterType == CLUSTER_TYPE_SINGLE_INST: + maxPeerNum = MIRROR_COUNT_REPLICATION_MAX if \ + self.nodeCount > MIRROR_COUNT_REPLICATION_MAX \ + else self.nodeCount + for k in range(maxPeerNum - 1): + outText = outText + ( + "datanodePeer%uDataPath :%s\n" % ( + k, dnInst.peerInstanceInfos[k].peerDataPath)) + m = 0 + for peerHaIP in dnInst.peerInstanceInfos[ + k].peerHAIPs: + m += 1 + outText = outText + ( + "datanodePeer%uHAIP %u:%s\n" % ( + k, m, peerHaIP)) + outText = outText + ( + "datanodePeer%uHAPort :%u\n" % ( + k, dnInst.peerInstanceInfos[k].peerHAPort)) + else: + outText = outText + ("datanodePeerDataPath :%s\n" % + dnInst.peerInstanceInfos[ + 0].peerDataPath) + m = 0 + for peerHaIP in dnInst.peerInstanceInfos[k].peerHAIPs: + m += 1 + outText = outText + ( + "datanodePeer2HAIP %u:%s\n" % ( + m, peerHaIP)) + outText = outText + ("datanodePeerHAPort :%u\n" % + dnInst.peerInstanceInfos[ + 0].peerHAPort) + outText = outText + ("datanodePeer2DataPath :%s\n" % + dnInst.peerInstanceInfos[ + 0].peer2DataPath) + m = 0 + for peer2HaIP in dnInst.peerInstanceInfos[ + 0].peer2HAIPs: + m += 1 + outText = outText + ( + "datanodePeer2HAIP %u:%s\n" % ( + m, peer2HaIP)) + outText = outText + ("datanodePeer2HAPort :%u\n" % + dnInst.peerInstanceInfos[ + 0].peer2HAPort) + + self.__fprintContent(outText, fileName) + except Exception as e: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51652"] % str(e)) + + def queryClsInfo(self, hostName, sshtool, mpprcFile, cmd): + try: + clusterState = 'Normal' + roleStatusArray = [] + dbStateArray = [] + maxNodeNameLen = 0 + maxDataPathLen = 0 + maxAzNameLen = 0 + dnNodeCount = 0 + roleStatus = "" + dbState = "" + primaryDbNum = 0 + primaryDbState = "" + for dbNode in self.dbNodes: + for dnInst in dbNode.datanodes: + sshcmd = "gs_ctl query -D %s" % dnInst.datadir + output = "" + if (dbNode.name != hostName): + (statusMap, output) = sshtool.getSshStatusOutput( + sshcmd, [dbNode.name], mpprcFile) + if statusMap[dbNode.name] != 'Success' or \ + output.find("exc_sql failed") > 0: + if output.find( + "could not connect to the local server") \ + > 0 or output.find( + "Is server running") > 0: + roleStatus = "Down" + dbState = "Manually stopped" + else: + roleStatus = "Unknown" + dbState = "Unknown" + else: + res = re.findall(r'local_role\s*:\s*(\w+)', output) + roleStatus = res[0] + res = re.findall(r'db_state\s*:\s*(\w+)', output) + dbState = res[0] + else: + (status, output) = subprocess.getstatusoutput(sshcmd) + if status != 0 or output.find("exc_sql failed") > 0: + if output.find( + "could not connect to the local server") \ + > 0 or output.find( + "Is server running") > 0: + roleStatus = "Down" + dbState = "Manually stopped" + else: + roleStatus = "Unknown" + dbState = "Unknown" + else: + res = re.findall(r'local_role\s*:\s*(\w+)', output) + roleStatus = res[0] + res = re.findall(r'db_state\s*:\s*(\w+)', output) + dbState = res[0] + if (dbState == "Need"): + detailInformation = re.findall( + r'detail_information\s*:\s*(\w+)', output) + dbState = "Need repair(%s)" % detailInformation[0] + roleStatusArray.append(roleStatus) + dbStateArray.append(dbState) + nodeNameLen = len(dbNode.name) + dataPathLen = len(dbNode.datanodes[0].datadir) + azNameLen = len(dbNode.azName) + maxNodeNameLen = maxNodeNameLen if maxNodeNameLen > \ + nodeNameLen else \ + nodeNameLen + maxDataPathLen = maxDataPathLen if maxDataPathLen > \ + dataPathLen else \ + dataPathLen + maxAzNameLen = maxAzNameLen if maxAzNameLen > azNameLen \ + else azNameLen + dnNodeCount += 1 + if roleStatus == "Primary": + primaryDbNum += 1 + primaryDbState = dbState + else: + if roleStatus != "Standby" and \ + roleStatus != "Secondary" and \ + roleStatus != "Cascade": + clusterState = 'Degraded' + if dbState != "Normal": + clusterState = 'Degraded' + if dnNodeCount == 1: + clusterState = "Unavailable" if dbState != "Normal" \ + else "Normal" + else: + if primaryDbState != "Normal" or primaryDbNum != 1: + clusterState = "Unavailable" + outText = "" + if cmd.clusterStateQuery: + outText = \ + "-------------------------------------------------" \ + "----------------------\n\n" \ + "cluster_name : %s\ncluster_state : %s\nredistributing : No\n\n" % \ + (self.name, clusterState) + outText = outText + \ + "-------------------------------------------" \ + "----------------------------\n" + self.__fprintContent(outText, cmd.outputFile) + return + outText = "[ Cluster State ]\n\ncluster_state : " \ + "%s\nredistributing : No\n" % clusterState + outText = outText + "current_az : AZ_ALL\n\n[ Datanode " \ + "State ]\n\n" + nodeLen = NODE_ID_LEN + SPACE_LEN + maxNodeNameLen + SPACE_LEN + instanceLen = INSTANCE_ID_LEN + SPACE_LEN + ( + maxDataPathLen if cmd.dataPathQuery else 4) + if cmd.azNameQuery: + nodeLen += maxAzNameLen + SPACE_LEN + if cmd.portQuery: + instanceLen += 7 + for i in range(dnNodeCount - 1): + outText = outText + ("%-*s%-*s%-*s%-*s| " % (nodeLen, + "node", + IP_LEN, + "node_ip", + instanceLen, + "instance", + STATE_LEN, + "state")) + outText = outText + "%-*s%-*s%-*s%s\n" % ( + nodeLen, "node", IP_LEN, "node_ip", instanceLen, "instance", + "state") + maxLen = self.nodeCount * ( + nodeLen + instanceLen + IP_LEN + SPACE_LEN + STATE_LEN + + SPACE_LEN + SEPERATOR_LEN) + seperatorLine = "-" * maxLen + outText = outText + seperatorLine + "\n" + i = 0 + for dbNode in self.dbNodes: + for dnInst in dbNode.datanodes: + if cmd.azNameQuery: + outText = outText + ( + "%-*s " % (maxAzNameLen, dbNode.azName)) + outText = outText + ("%-2u " % dbNode.id) + outText = outText + ( + "%-*s " % (maxNodeNameLen, dbNode.name)) + outText = outText + ("%-15s " % dnInst.listenIps[0]) + outText = outText + ("%u " % dnInst.instanceId) + if cmd.portQuery: + outText = outText + ("%-*u " % (6, dnInst.port)) + if cmd.dataPathQuery: + outText = outText + ( + "%-*s " % (maxDataPathLen, dnInst.datadir)) + else: + outText = outText + " " + outText = outText + ( + "%s " % self.__getDnRole(dnInst.instanceType)) + if dnNodeCount == 1: + outText = outText + ("%-7s" % "Primary") + else: + outText = outText + ("%-7s" % roleStatusArray[i]) + outText = outText + (" %s" % dbStateArray[i]) + if i < (dnNodeCount - 1): + outText = outText + " | " + else: + outText = outText + "\n" + i += 1 + self.__fprintContent(outText, cmd.outputFile) + except Exception as e: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51652"] % str(e)) + + def __getDnRole(self, instanceType): + """ + function : Get DnRole by instanceType + input : Int + output : String + """ + if instanceType == MASTER_INSTANCE: + return "P" + elif instanceType == STANDBY_INSTANCE: + return "S" + elif instanceType == CASCADE_STANDBY: + return "C" + elif instanceType == DUMMY_STANDBY_INSTANCE: + return "R" + else: + return "" + + def __getDnInstanceNum(self): + dnInsNum = 0 + for dbNode in self.dbNodes: + dnInsNum += len(dbNode.datanodes) + return dnInsNum + + def __getDnSenderStatus(self, sshtool, localHostName): + sql_get = "select a.client_addr, b.state, b.sender_sent_location," \ + "b.sender_write_location, b.sender_flush_location," \ + "b.sender_replay_location, b.receiver_received_location," \ + "b.receiver_write_location, b.receiver_flush_location," \ + "b.receiver_replay_location, b.sync_percent, b.sync_state " \ + "from pg_stat_replication a inner join " \ + "pg_stat_get_wal_senders() b on a.pid = b.pid;" + syncInfo = [] + clusterState = "Normal" + primaryDbState = "Normal" + primaryDbNum = 0 + dnNodeCount = 0 + for dbNode in self.dbNodes: + for dnInst in dbNode.datanodes: + dnNodeCount += 1 + self.__getDnState(dnInst, dbNode, localHostName, sshtool) + if dnInst.localRole == "Primary": + primaryDbState = dnInst.state + primaryDbNum += 1 + output = "" + if dbNode.name != localHostName: + cmd = "[need_replace_quotes] gsql -m -d postgres -p " \ + "%s -A -t -c \"%s\"" % \ + (dnInst.port, sql_get) + (statusMap, output) = sshtool.getSshStatusOutput(cmd, [ + dbNode.name]) + if statusMap[dbNode.name] != 'Success' or output.find( + "failed to connect") >= 0: + continue + else: + output = output.split('\n')[1:-1] + else: + cmd = "gsql -m -d postgres -p %s -A -t -c \"%s\"" % ( + dnInst.port, sql_get) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or output.find( + "failed to connect") >= 0: + continue + else: + output = output.split('\n') + if not len(output): + continue + for col_loop in output: + syncInfo.append(col_loop.split('|')) + elif dnInst.localRole == "Cascade Standby": + subsql = "select state, sender_sent_location, sender_write_location," \ + "sender_flush_location, sender_replay_location," \ + "receiver_received_location, receiver_write_location," \ + "receiver_flush_location, receiver_replay_location," \ + "sync_percent, channel from pg_stat_get_wal_receiver();" + if dbNode.name != localHostName: + cmd = "[need_replace_quotes] gsql -m -d postgres -p " \ + "%s -A -t -c \"%s\"" % \ + (dnInst.port, subsql) + (statusMap, cascadeOutput) = sshtool.getSshStatusOutput(cmd, [ + dbNode.name]) + if statusMap[dbNode.name] != 'Success' or cascadeOutput.find( + "failed to connect") >= 0: + continue + else: + cascadeOutput = cascadeOutput.split('\n')[1:-1] + else: + cmd = "gsql -m -d postgres -p %s -A -t -c \"%s\"" % ( + dnInst.port, subsql) + (status, cascadeOutput) = subprocess.getstatusoutput(cmd) + if status != 0 or cascadeOutput.find( + "failed to connect") >= 0: + continue + else: + cascadeOutput = cascadeOutput.split('\n') + if not len(cascadeOutput): + continue + for col_loop in cascadeOutput: + col_loop = col_loop.split('|') + cascadeIps = col_loop[-1].split('<--') + col_loop.insert(0, cascadeIps[0].split(':')[0]) + col_loop.insert(11, "Async") + col_loop[-1] = cascadeIps[-1] + syncInfo.append(col_loop) + else: + if dnInst.localRole != "Standby" and \ + dnInst.localRole != "Secondary": + clusterState = "Degraded" + if dnInst.state != "Normal": + clusterState = "Degraded" + if dnNodeCount == 1: + clusterState = "Unavailable" if dnInst.state != "Normal" \ + else "Normal" + else: + if primaryDbState != "Normal" or primaryDbNum != 1: + clusterState = "Unavailable" + return (clusterState, syncInfo) + + def __getDnState(self, dnInst, dbNode, localHostName, sshtool): + sql = "select local_role, static_connections, db_state from " \ + "pg_stat_get_stream_replications();" + if dbNode.name != localHostName: + # [SUCCESS] hostname:\n when ssh, The third line is the sql result + minValidLine = 3 + cmd = "[need_replace_quotes] gsql -m -d postgres -p %s -c " \ + "\"%s\"" % ( + dnInst.port, sql) + (statusMap, output) = sshtool.getSshStatusOutput(cmd, + [dbNode.name]) + dnDown = output.find("failed to connect") >= 0 + if statusMap[dbNode.name] != 'Success' or dnDown: + dnInst.localRole = "Down" if dnDown else "Unknown" + dnInst.staticConnections = 0 + dnInst.state = "Manually stopped" if dnDown else "Unknown" + else: + lineSplitRes = output.split("\n") + if len(lineSplitRes) <= minValidLine or len( + lineSplitRes[minValidLine].split("|")) != 3: + dnInst.localRole = "Unknown" + dnInst.staticConnections = 0 + dnInst.state = "Unknown" + else: + columnRes = lineSplitRes[minValidLine].split("|") + dnInst.localRole = columnRes[0].strip() + dnInst.staticConnections = columnRes[1].strip() + dnInst.state = columnRes[2].strip() + else: + # The second line is the sql result + minValidLine = 2 + cmd = "gsql -m -d postgres -p %s -c \"%s\"" % (dnInst.port, sql) + (status, output) = subprocess.getstatusoutput(cmd) + dnDown = output.find("failed to connect") >= 0 + if status != 0 or dnDown: + dnInst.localRole = "Down" if dnDown else "Unknown" + dnInst.staticConnections = 0 + dnInst.state = "Manually stopped" if dnDown else "Unknown" + else: + lineSplitRes = output.split("\n") + if len(lineSplitRes) <= minValidLine or len( + lineSplitRes[minValidLine].split("|")) != 3: + dnInst.localRole = "Unknown" + dnInst.staticConnections = 0 + dnInst.state = "Unknown" + else: + columnRes = lineSplitRes[minValidLine].split("|") + dnInst.localRole = columnRes[0].strip() + dnInst.staticConnections = columnRes[1].strip() + dnInst.state = columnRes[2].strip() + + def __fprintContent(self, content, fileName): + if fileName != "": + g_file.createFileInSafeMode(fileName) + with open(fileName, "a") as fp: + fp.write(content) + fp.flush() + sys.stdout.write(content) + + def __checkOsUser(self, user): + """ + function : Check os user + input : String + output : NA + """ + try: + user = pwd.getpwnam(user).pw_gid + except Exception as e: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50300"] % user) + + def __getStaticConfigFilePath(self, user): + """ + function : get the path of static configuration file. + input : String + output : String + """ + gaussHome = self.__getEnvironmentParameterValue("GAUSSHOME", user) + if (gaussHome == ""): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ + ("installation path of designated user [%s]" % + user)) + # if under upgrade, and use chose strategy, we may get a wrong path, + # so we will use the realpath of gausshome + commitid = VersionInfo.getCommitid() + appPath = gaussHome + "_" + commitid + staticConfigFile = "%s/bin/cluster_static_config" % appPath + staticConfigBak = "%s/bin/cluster_static_config_bak" % appPath + staticConfig = "%s/bin/cluster_static_config" % os.path.realpath( + gaussHome) + if os.path.exists(staticConfig): + return staticConfig + elif (os.path.exists(staticConfigFile)): + return staticConfigFile + elif (os.path.exists(staticConfigBak)): + return staticConfigBak + + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ + ("static configuration file [%s] of " + "designated user [%s]" % (staticConfig, user))) + + def __getEnvironmentParameterValue(self, environmentParameterName, user): + """ + function :Get the environment parameter. + !!!!Do not call this function in preinstall.py script. + because we determine if we are using env separate version by the + value of MPPDB_ENV_SEPARATE_PATH + input : String,String + output : String + """ + # get mpprc file + mpprcFile = os.getenv('MPPDB_ENV_SEPARATE_PATH') + if mpprcFile is not None and mpprcFile != "": + mpprcFile = mpprcFile.replace("\\", "\\\\").replace('"', '\\"\\"') + checkPathVaild(mpprcFile) + userProfile = mpprcFile + else: + userProfile = "~/.bashrc" + # build shell command + if (os.getuid() == 0): + cmd = "su - %s -c 'source %s;echo $%s' 2>/dev/null" % ( + user, userProfile, environmentParameterName) + else: + cmd = "source %s;echo $%s 2>/dev/null" % (userProfile, + environmentParameterName) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error: \n%s" % output) + return output.split("\n")[0] + + def __getStatusByOM(self, user): + """ + function :Get the environment parameter. + !!!!Do not call this function in preinstall.py script. + because we determine if we are using env separate version by the + value of MPPDB_ENV_SEPARATE_PATH + input : String,String + output : String + """ + # get mpprc file + mpprcFile = os.getenv('MPPDB_ENV_SEPARATE_PATH') + if mpprcFile is not None and mpprcFile != "": + mpprcFile = mpprcFile.replace("\\", "\\\\").replace('"', '\\"\\"') + checkPathVaild(mpprcFile) + userProfile = mpprcFile + else: + userProfile = "~/.bashrc" + # build shell command + if os.getuid() == 0: + cmd = "su - %s -c 'source %s;gs_om -t status --detail|tail -1" % ( + user, userProfile) + else: + cmd = "source %s;gs_om -t status --detail|tail -1" % (userProfile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error: \n%s" % output) + return output.split("\n")[0] + + def __readStaticConfigFile(self, staticConfigFile, user, isLCCluster=False, + ignoreLocalEnv=False): + """ + function : read cluster information from static configuration file + input : String,String + output : NA + """ + fp = None + try: + # get env parameter + gauss_env = self.__getEnvironmentParameterValue("GAUSS_ENV", user) + self.name = self.__getEnvironmentParameterValue("GS_CLUSTER_NAME", + user) + self.appPath = self.__getEnvironmentParameterValue("GAUSSHOME", + user) + logPathWithUser = self.__getEnvironmentParameterValue("GAUSSLOG", + user) + + if not ignoreLocalEnv: + if gauss_env == "2" and self.name == "": + raise Exception(ErrorCode.GAUSS_503["GAUSS_50300"] + % ("cluster name of designated user" + " [%s]" % user)) + if self.appPath == "": + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ + ("installation path of designated user " + "[%s]" % user)) + if logPathWithUser == "": + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ + ("log path of designated user [%s]" % + user)) + + splitMark = "/%s" % user + # set log path without user + # find the path from right to left + self.logPath = logPathWithUser[ + 0:(logPathWithUser.rfind(splitMark))] + staticConfigFilePath = os.path.split(staticConfigFile)[0] + versionFile = os.path.join( + staticConfigFilePath, "upgrade_version") + version, number, commitid = VersionInfo.get_version_info( + versionFile) + try: + # read static_config_file + fp = open(staticConfigFile, "rb") + if float(number) <= 92.200: + info = fp.read(32) + (crc, lenth, version, currenttime, nodeNum, + localNodeId) = struct.unpack("=qIIqiI", info) + else: + info = fp.read(28) + (crc, lenth, version, currenttime, nodeNum, + localNodeId) = struct.unpack("=IIIqiI", info) + self.version = version + self.installTime = currenttime + self.localNodeId = localNodeId + self.nodeCount = nodeNum + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % staticConfigFile + " Error:\n" + str(e)) + if version <= 100: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ + ("cluster static config version[%s]" % version, + "the new version[%s]" % BIN_CONFIG_VERSION)) + elif version >= 101 and version <= 200: + self.clusterType = CLUSTER_TYPE_SINGLE + if BIN_CONFIG_VERSION_SINGLE != version: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ + ("cluster static config version[%s]" + % version, "the new version[%s]" + % BIN_CONFIG_VERSION_SINGLE)) + elif version >= 201 and version <= 300: + # single primary multi standy + self.clusterType = CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY + if (BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY + != version): + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51637"] + % ("cluster static config version[%s]" % version, + "the new version[%s]" + % BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY)) + elif version >= 301 and version <= 400: + # single inst + self.clusterType = CLUSTER_TYPE_SINGLE_INST + if BIN_CONFIG_VERSION_SINGLE_INST != version: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ + ("cluster static config version[%s]" + % version, "the new version[%s]" + % BIN_CONFIG_VERSION_SINGLE_INST)) + + self.dbNodes = [] + try: + for i in range(nodeNum): + offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE + fp.seek(offset) + dbNode = self.__unPackNodeInfo(fp, number, isLCCluster) + self.dbNodes.append(dbNode) + fp.close() + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + staticConfigFile + " Error:\nThe content is " + "not correct.") + except Exception as e: + if (fp): + fp.close() + raise Exception(str(e)) + + def __unPackNodeInfo(self, fp, number, isLCCluster=False): + """ + function : unpack a node config info + input : file + output : Object + """ + if float(number) <= 92.200: + info = fp.read(76) + (crc, nodeId, nodeName) = struct.unpack("=qI64s", info) + else: + info = fp.read(72) + (crc, nodeId, nodeName) = struct.unpack("=II64s", info) + nodeName = nodeName.decode().strip('\x00') + dbNode = dbNodeInfo(nodeId, nodeName) + info = fp.read(68) + (azName, azPriority) = struct.unpack("=64sI", info) + dbNode.azName = azName.decode().strip('\x00') + dbNode.azPriority = azPriority + + # get backIps + self.__unPackIps(fp, dbNode.backIps) + # get sshIps + self.__unPackIps(fp, dbNode.sshIps) + if (not isLCCluster): + # get cm_server information + self.__unPackCmsInfo(fp, dbNode) + # get cm_agent information + self.__unpackAgentInfo(fp, dbNode) + # get gtm information + self.__unpackGtmInfo(fp, dbNode) + info = fp.read(404) + # get cn information + self.__unpackCooInfo(fp, dbNode) + # get DB information + self.__unpackDataNode(fp, dbNode) + if (not isLCCluster): + # get etcd information + self.__unpackEtcdInfo(fp, dbNode) + info = fp.read(8) + # set DB azName for OLAP + for inst in dbNode.datanodes: + inst.azName = dbNode.azName + + return dbNode + + def __unpackEtcdInfo(self, fp, dbNode): + """ + function : unpack the info of etcd + input : file,Object + output : NA + """ + etcdInst = instanceInfo() + etcdInst.instanceRole = INSTANCE_ROLE_ETCD + etcdInst.hostname = dbNode.name + etcdInst.instanceType = INSTANCE_TYPE_UNDEFINED + info = fp.read(1100) + (etcdNum, etcdInst.instanceId, etcdInst.mirrorId, etcdhostname, + etcdInst.datadir) = struct.unpack("=IIi64s1024s", info) + etcdInst.datadir = etcdInst.datadir.decode().strip('\x00') + self.__unPackIps(fp, etcdInst.listenIps) + info = fp.read(4) + (etcdInst.port,) = struct.unpack("=I", info) + self.__unPackIps(fp, etcdInst.haIps) + info = fp.read(4) + (etcdInst.haPort,) = struct.unpack("=I", info) + if (etcdNum == 1): + dbNode.etcdNum = 1 + dbNode.etcds.append(etcdInst) + self.etcdcount += 1 + else: + dbNode.etcdNum = 0 + dbNode.etcds = [] + + def __unPackIps(self, fp, ips): + """ + function : Unpack the info of ips + input : file,[] + output : NA + """ + info = fp.read(4) + (n,) = struct.unpack("=i", info) + for i in range(int(n)): + info = fp.read(128) + (currentIp,) = struct.unpack("=128s", info) + currentIp = currentIp.decode().strip('\x00') + ips.append(str(currentIp.strip())) + info = fp.read(128 * (MAX_IP_NUM - n)) + + def __unPackCmsInfo(self, fp, dbNode): + """ + function : Unpack the info of CMserver + input : file Object + output : NA + """ + cmsInst = instanceInfo() + cmsInst.instanceRole = INSTANCE_ROLE_CMSERVER + cmsInst.hostname = dbNode.name + info = fp.read(1164) + (cmsInst.instanceId, cmsInst.mirrorId, dbNode.cmDataDir, cmsInst.level, + self.cmsFloatIp) = struct.unpack("=II1024sI128s", info) + dbNode.cmDataDir = dbNode.cmDataDir.decode().strip('\x00') + self.cmsFloatIp = self.cmsFloatIp.decode().strip('\x00') + cmsInst.datadir = "%s/cm_server" % dbNode.cmDataDir + self.__unPackIps(fp, cmsInst.listenIps) + info = fp.read(4) + (cmsInst.port,) = struct.unpack("=I", info) + self.__unPackIps(fp, cmsInst.haIps) + info = fp.read(8) + (cmsInst.haPort, cmsInst.instanceType) = struct.unpack("=II", info) + if (cmsInst.instanceType == MASTER_INSTANCE): + dbNode.cmsNum = 1 + elif (cmsInst.instanceType == STANDBY_INSTANCE): + dbNode.cmsNum = 0 + else: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] + % ("CMServer", cmsInst.instanceType)) + info = fp.read(4 + 128 * MAX_IP_NUM + 4) + + if (cmsInst.instanceId): + dbNode.cmservers.append(cmsInst) + self.cmscount += 1 + else: + dbNode.cmservers = [] + + def __unpackAgentInfo(self, fp, dbNode): + """ + function : Unpack the info of agent. It should be called after + __unPackCmsInfo, because dbNode.cmDataDir + get value in __unPackCmsInfo + input : file Object + output : NA + """ + cmaInst = instanceInfo() + cmaInst.instanceRole = INSTANCE_ROLE_CMAGENT + cmaInst.hostname = dbNode.name + cmaInst.instanceType = INSTANCE_TYPE_UNDEFINED + info = fp.read(8) + (cmaInst.instanceId, cmaInst.mirrorId) = struct.unpack("=Ii", info) + self.__unPackIps(fp, cmaInst.listenIps) + cmaInst.datadir = "%s/cm_agent" % dbNode.cmDataDir + dbNode.cmagents.append(cmaInst) + + def __unpackGtmInfo(self, fp, dbNode): + """ + function : Unpack the info of gtm + input : file Object + output : NA + """ + gtmInst = instanceInfo() + gtmInst.instanceRole = INSTANCE_ROLE_GTM + gtmInst.hostname = dbNode.name + info = fp.read(1036) + (gtmInst.instanceId, gtmInst.mirrorId, gtmNum, + gtmInst.datadir) = struct.unpack("=III1024s", info) + gtmInst.datadir = gtmInst.datadir.decode().strip('\x00') + self.__unPackIps(fp, gtmInst.listenIps) + info = fp.read(8) + (gtmInst.port, gtmInst.instanceType) = struct.unpack("=II", info) + if (gtmInst.instanceType == MASTER_INSTANCE): + dbNode.gtmNum = 1 + elif (gtmInst.instanceType == STANDBY_INSTANCE): + dbNode.gtmNum = 0 + else: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % ( + "GTM", gtmInst.instanceType)) + self.__unPackIps(fp, gtmInst.haIps) + info = fp.read(4) + (gtmInst.haPort,) = struct.unpack("=I", info) + info = fp.read(1024 + 4 + 128 * MAX_IP_NUM + 4) + + if (gtmNum == 1): + dbNode.gtms.append(gtmInst) + self.gtmcount += 1 + else: + dbNode.gtms = [] + + def __unpackCooInfo(self, fp, dbNode): + """ + function : Unpack the info of coordinator + input : file Object + output : NA + """ + cooInst = instanceInfo() + cooInst.instanceRole = INSTANCE_ROLE_COODINATOR + cooInst.hostname = dbNode.name + cooInst.instanceType = INSTANCE_TYPE_UNDEFINED + info = fp.read(2060) + (cooInst.instanceId, cooInst.mirrorId, cooNum, cooInst.datadir, + cooInst.ssdDir) = struct.unpack("=IiI1024s1024s", info) + cooInst.datadir = cooInst.datadir.decode().strip('\x00') + cooInst.ssdDir = cooInst.ssdDir.decode().strip('\x00') + self.__unPackIps(fp, cooInst.listenIps) + info = fp.read(8) + (cooInst.port, cooInst.haPort) = struct.unpack("=II", info) + if (cooNum == 1): + dbNode.cooNum = 1 + dbNode.coordinators.append(cooInst) + else: + dbNode.cooNum = 0 + dbNode.coordinators = [] + + def __unpackDataNode(self, fp, dbNode): + """ + function : Unpack the info of datanode + input : file Object + output : NA + """ + info = fp.read(4) + (dataNodeNums,) = struct.unpack("=I", info) + dbNode.dataNum = 0 + + dbNode.datanodes = [] + for i in range(dataNodeNums): + dnInst = instanceInfo() + dnInst.instanceRole = INSTANCE_ROLE_DATANODE + dnInst.hostname = dbNode.name + # In the upgrade scenario, there are two different read methods + # for static config file. + # First, use the new read mode, and judge that if the new read + # mode is not correct, + # then rollback by fp.seek(), and exchange its(xlogdir) value + # with ssddir. + info = fp.read(2056) + (dnInst.instanceId, dnInst.mirrorId, dnInst.datadir, + dnInst.xlogdir) = struct.unpack("=II1024s1024s", info) + dnInst.datadir = dnInst.datadir.decode().strip('\x00') + dnInst.xlogdir = dnInst.xlogdir.decode().strip('\x00') + + info = fp.read(1024) + (dnInst.ssdDir) = struct.unpack("=1024s", info) + dnInst.ssdDir = dnInst.ssdDir[0].decode().strip('\x00') + # if notsetXlog,ssdDir should not be null.use by upgrade. + if dnInst.ssdDir != "" and dnInst.ssdDir[0] != '/': + fp.seek(fp.tell() - 1024) + dnInst.ssdDir = dnInst.xlogdir + dnInst.xlogdir = "" + + self.__unPackIps(fp, dnInst.listenIps) + info = fp.read(8) + (dnInst.port, dnInst.instanceType) = struct.unpack("=II", info) + if (dnInst.instanceType == MASTER_INSTANCE): + dbNode.dataNum += 1 + elif (dnInst.instanceType in [STANDBY_INSTANCE, + DUMMY_STANDBY_INSTANCE, CASCADE_STANDBY]): + pass + else: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] + % ("DN", dnInst.instanceType)) + self.__unPackIps(fp, dnInst.haIps) + info = fp.read(4) + (dnInst.haPort,) = struct.unpack("=I", info) + if ( + self.clusterType == + CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY or + self.clusterType == CLUSTER_TYPE_SINGLE_INST): + maxStandbyCount = MIRROR_COUNT_REPLICATION_MAX - 1 + for j in range(maxStandbyCount): + peerDbInst = peerInstanceInfo() + info = fp.read(1024) + (peerDbInst.peerDataPath,) = struct.unpack("=1024s", info) + peerDbInst.peerDataPath = \ + peerDbInst.peerDataPath.decode().strip('\x00') + self.__unPackIps(fp, peerDbInst.peerHAIPs) + info = fp.read(8) + (peerDbInst.peerHAPort, + peerDbInst.peerRole) = struct.unpack("=II", info) + dnInst.peerInstanceInfos.append(peerDbInst) + else: + peerDbInst = peerInstanceInfo() + info = fp.read(1024) + (peerDbInst.peerDataPath,) = struct.unpack("=1024s", info) + peerDbInst.peerDataPath = \ + peerDbInst.peerDataPath.decode().strip('\x00') + self.__unPackIps(fp, peerDbInst.peerHAIPs) + info = fp.read(8) + (peerDbInst.peerHAPort, peerDbInst.peerRole) = \ + struct.unpack("=II", info) + info = fp.read(1024) + (peerDbInst.peerData2Path,) = struct.unpack("=1024s", info) + peerDbInst.peerData2Path = \ + peerDbInst.peerDataPath.decode().strip('\x00') + self.__unPackIps(fp, peerDbInst.peer2HAIPs) + info = fp.read(8) + (peerDbInst.peer2HAPort, peerDbInst.peer2Role) = \ + struct.unpack("=II", info) + dnInst.peerInstanceInfos.append(peerDbInst) + dbNode.datanodes.append(dnInst) + + def initFromStaticConfigWithoutUser(self, staticConfigFile): + """ + function : Init cluster from static config with out user + input : file Object + output : NA + """ + fp = None + try: + staticConfigFilePath = os.path.split(staticConfigFile)[0] + versionFile = os.path.join( + staticConfigFilePath, "upgrade_version") + version, number, commitid = VersionInfo.get_version_info( + versionFile) + # read cluster info from static config file + fp = open(staticConfigFile, "rb") + if float(number) <= 92.200: + info = fp.read(32) + (crc, lenth, version, currenttime, nodeNum, + localNodeId) = struct.unpack("=qIIqiI", info) + else: + info = fp.read(28) + (crc, lenth, version, currenttime, nodeNum, + localNodeId) = struct.unpack("=IIIqiI", info) + if (version <= 100): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] + % ("cluster static config version[%s]" + % version, "the new version[%s]" + % BIN_CONFIG_VERSION)) + elif (version >= 101 and version <= 200): + self.clusterType = CLUSTER_TYPE_SINGLE + if (BIN_CONFIG_VERSION_SINGLE != version): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] + % ("cluster static config version[%s]" + % version, "the new version[%s]" + % BIN_CONFIG_VERSION_SINGLE)) + elif (version >= 201 and version <= 300): + self.clusterType = CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY + if ( + BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY != + version): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % ( + "cluster static config version[%s]" % version, + "the new version[%s]" + % BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY)) + elif (version >= 301 and version <= 400): + self.clusterType = CLUSTER_TYPE_SINGLE_INST + if (BIN_CONFIG_VERSION_SINGLE_INST != version): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] + % ("cluster static config version[%s]" + % version, "the new version[%s]" + % BIN_CONFIG_VERSION_SINGLE_INST)) + + self.dbNodes = [] + for i in range(nodeNum): + offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE + fp.seek(offset) + dbNode = self.__unPackNodeInfo(fp, number) + self.dbNodes.append(dbNode) + fp.close() + except Exception as e: + if (fp): + fp.close() + raise Exception(ErrorCode.GAUSS_512["GAUSS_51203"] + % "cluster" + " Error: \n%s" % str(e)) + + def __appendInstanceId(self, static_config_file): + """ + function : instance id append to the old cluster. + input : file Object + output : NA + """ + try: + # init oldClusterInfo + oldClusterInfo = dbClusterInfo() + oldClusterInfo.initFromStaticConfigWithoutUser(static_config_file) + + # get max CN/CMA/master-standby DN/dummy DN instanceId of old + # cluster. + # CMS/GTM/ETCD instanceId and nodeId will not be changed. + maxCNInstanceId = 0 + maxCMAInstanceId = 0 + maxMasterDNInstanceId = 0 + maxDummyDNInstanceId = 0 + # new DB mirrorId shoud be refreshed. + # CN mirrorId is const -1, so no need to refresh. + # CMA mirrorId is const-3, so no need to refresh. + # ETCD mirrorId is const -5, so no need to refresh. + # CMS and GTM of new cluster will not simultaneous exist with + # old cluster, + # so no need to refresh. + maxMirrorId = 0 + for olddbNode in oldClusterInfo.dbNodes: + for oldcnInst in olddbNode.coordinators: + if (oldcnInst.instanceId > maxCNInstanceId): + maxCNInstanceId = oldcnInst.instanceId + for oldcmaInst in olddbNode.cmagents: + if (oldcmaInst.instanceId > maxCMAInstanceId): + maxCMAInstanceId = oldcmaInst.instanceId + for olddnInst in olddbNode.datanodes: + if (olddnInst.instanceType == MASTER_INSTANCE and + olddnInst.instanceId > maxMasterDNInstanceId): + maxMasterDNInstanceId = olddnInst.instanceId + elif (olddnInst.instanceType == DUMMY_STANDBY_INSTANCE and + olddnInst.instanceId > maxDummyDNInstanceId): + maxDummyDNInstanceId = olddnInst.instanceId + if (olddnInst.mirrorId > maxMirrorId): + maxMirrorId = olddnInst.mirrorId + for oldcmsInst in olddbNode.cmservers: + if (oldcmsInst.mirrorId > maxMirrorId): + maxMirrorId = oldcmsInst.mirrorId + for oldetcdInst in olddbNode.etcds: + if (oldetcdInst.mirrorId > maxMirrorId): + maxMirrorId = oldetcdInst.mirrorId + + maxCNInstanceId += 1 + maxCMAInstanceId += 1 + maxMasterDNInstanceId += 2 + maxDummyDNInstanceId += 1 + maxMirrorId += 1 + mirrorIdDict = {} + + for newdbNode in self.dbNodes: + if (len(newdbNode.coordinators) > 0): + ## refresh CN instanceId here + newdbNode.coordinators[0].instanceId = maxCNInstanceId + maxCNInstanceId += 1 + + if (len(newdbNode.cmagents) > 0): + ## refresh CMA instanceId here + newdbNode.cmagents[0].instanceId = maxCMAInstanceId + maxCMAInstanceId += 1 + + for dnInst in newdbNode.datanodes: + if (dnInst.instanceType == MASTER_INSTANCE): + masterInst = dnInst + ## refresh master instanceId here + dnInst.instanceId = maxMasterDNInstanceId + maxMasterDNInstanceId += 1 + + ## get related standby and dummy-standby instances + for dbNode in self.dbNodes: + for inst in dbNode.datanodes: + if (inst.mirrorId == dnInst.mirrorId and + inst.instanceType == STANDBY_INSTANCE): + standbyInst = inst + ## refresh related standby instanceId here + inst.instanceId = maxMasterDNInstanceId + maxMasterDNInstanceId += 1 + + elif (inst.mirrorId == dnInst.mirrorId and + inst.instanceType == + DUMMY_STANDBY_INSTANCE): + dummyInst = inst + ## refresh related dummy-standby + # instanceId here + inst.instanceId = maxDummyDNInstanceId + maxDummyDNInstanceId += 1 + + ## refresh mirrorId here,Must refresh it at last. + mirrorIdDict[maxMirrorId] = [masterInst, standbyInst, + dummyInst] + maxMirrorId += 1 + + for mirrorId in list(mirrorIdDict.keys()): + mirrorIdDict[mirrorId][0].mirrorId = mirrorId + mirrorIdDict[mirrorId][1].mirrorId = mirrorId + mirrorIdDict[mirrorId][2].mirrorId = mirrorId + except Exception as e: + raise Exception(str(e)) + + def setInstId(self, instList, nodeIdInstIdDict, newNodeId, newInstId): + """ + instList instance list + nodeIdInstIdDict node id and instance id dict + newNodeId new node id + newInstId new instance id + + """ + for inst in instList: + if (newNodeId in list(nodeIdInstIdDict.keys())): + inst.instanceId = nodeIdInstIdDict[newNodeId] + # the New agent instance + else: + inst.instanceId = newInstId + newInstId += 1 + return newInstId + + def refreshInstIdByInstType(self, oldNodesList, newNodesList, + instType="cmagent"): + """ + """ + nodeIdInstanceIdDict = {} + # get the node id and cmagent/cmserver/gtm/etcd/cn instance id dict + for oldNode in oldNodesList: + if (instType == "cmagent"): + for cmaInst in oldNode.cmagents: + nodeIdInstanceIdDict[oldNode.id] = cmaInst.instanceId + elif (instType == "cmserver"): + for cmsInst in oldNode.cmservers: + nodeIdInstanceIdDict[oldNode.id] = cmsInst.instanceId + elif (instType == "gtm"): + for gtmInst in oldNode.gtms: + nodeIdInstanceIdDict[oldNode.id] = gtmInst.instanceId + elif (instType == "etcd"): + for etcdInst in oldNode.etcds: + nodeIdInstanceIdDict[oldNode.id] = etcdInst.instanceId + elif (instType == "cn"): + for cnInst in oldNode.coordinators: + # warm-standby: the number of nodes is same,so refrush + # by id + # addcn out cluster:refrush by id or nodename + # addcn in cluster:refrush by id or nodename + # deletecn out cluster:refrush by nodename + # deletecn in cluster:refrush by id or nodename + # expand:refrush by id or nodename + # shink in tail:refrush by id or nodename + # shink in mid:refrush by nodename + if (len(oldNodesList) == len(newNodesList)): + nodeIdInstanceIdDict[oldNode.id] = cnInst.instanceId + else: + nodeIdInstanceIdDict[oldNode.name] = cnInst.instanceId + + # sort instance id lists and set newInstId = the max ID num + 1 + instIDList = list(nodeIdInstanceIdDict.values()) + instIDList.sort() + if (len(instIDList) > 0): + newInstId = instIDList[-1] + 1 + else: + newInstId = 1 + + # refresh instance id by oldClusterInfo + for newNode in newNodesList: + if (instType == "cmagent"): + newInstId = self.setInstId(newNode.cmagents, + nodeIdInstanceIdDict, newNode.id, + newInstId) + elif (instType == "cmserver"): + newInstId = self.setInstId(newNode.cmservers, + nodeIdInstanceIdDict, newNode.id, + newInstId) + elif (instType == "gtm"): + newInstId = self.setInstId(newNode.gtms, nodeIdInstanceIdDict, + newNode.id, newInstId) + elif (instType == "etcd"): + newInstId = self.setInstId(newNode.etcds, nodeIdInstanceIdDict, + newNode.id, newInstId) + elif (instType == "cn"): + if (len(oldNodesList) == len(newNodesList)): + newInstId = self.setInstId(newNode.coordinators, + nodeIdInstanceIdDict, + newNode.id, newInstId) + else: + newInstId = self.setInstId(newNode.coordinators, + nodeIdInstanceIdDict, + newNode.name, newInstId) + + def flushCNInstanceId(self, oldNodesList, newNodesList): + """ + function : Refresh CN instance id + input : oldNodesList: :The cluster nodes list from + static_config_file + newNodesList: :The cluster nodes list from + new oldes + output : NA + """ + self.refreshInstIdByInstType(oldNodesList, newNodesList, "cn") + + def getMaxStandbyAndDummyDNInstanceId(self, oldNodesList): + """ + function : get max standby and dummy DB instanceId of old cluster. + input : oldNodesList: :The cluster nodes list from + static_config_file + output : NA + """ + # get max standby and dummy DB instanceId of old cluster. + maxStandbyDNInstanceId = 0 + maxDummyDNInstanceId = 0 + for oldNode in oldNodesList: + for olddnInst in oldNode.datanodes: + if (olddnInst.instanceType == STANDBY_INSTANCE and + olddnInst.instanceId > maxStandbyDNInstanceId): + maxStandbyDNInstanceId = olddnInst.instanceId + elif (olddnInst.instanceType == DUMMY_STANDBY_INSTANCE and + olddnInst.instanceId > maxDummyDNInstanceId): + maxDummyDNInstanceId = olddnInst.instanceId + return (maxStandbyDNInstanceId, maxDummyDNInstanceId) + + def flushDNInstanceId(self, oldNodesList, newNodesList): + """ + function : Refresh DB instance id. When refresh DB id, the node id + has been refreshed. + input : oldNodesList: :The cluster nodes list from + static_config_file + newNodesList: :The cluster nodes list from + new oldes + output : NA + """ + # get all old node id list + oldNodeIdList = [] + for oldNode in oldNodesList: + oldNodeIdList.append(oldNode.id) + + # get max standby and dummy DB instanceId of old cluster. + (maxStandbyDNInstanceId, + maxDummyDNInstanceId) = self.getMaxStandbyAndDummyDNInstanceId( + oldNodesList) + # set next primary/standby and dummy DB instanceId + maxMasterDNInstanceId = maxStandbyDNInstanceId + 1 + maxDummyDNInstanceId += 1 + + # refresh DB instance id of new nodes by oldNodesList and + # maxMasterDNInstanceId/maxDummyDNInstanceId + oldLen = len(oldNodesList) + newLen = len(newNodesList) + minLen = 0 + maxLen = 0 + if (oldLen > newLen): + maxLen = oldLen + minLen = newLen + else: + maxLen = newLen + minLen = oldLen + + # refresh DB id one by one by old node + i = 0 + for newNode in newNodesList[0:minLen]: + # refresh DB instanceId if DB numbers not equal. Only for move + # DB instance + if (len(oldNodesList[i].datanodes) != len(newNode.datanodes)): + break + else: + # refresh DB instanceId one by one (primary/standby/dummy in + # cluster_static_config ) + instid = 0 + for dnInst in newNode.datanodes: + dnInst.instanceId = oldNodesList[i].datanodes[ + instid].instanceId + instid += 1 + i += 1 + + # refresh the new node DB id + for newNode in newNodesList[minLen:maxLen]: + for dnInst in newNode.datanodes: + if (dnInst.instanceType == MASTER_INSTANCE): + ## get standby/dummy instances + standbyInsts = [] + dummyStandbyInsts = [] + peerInsts = self.getPeerInstance(dnInst) + for inst in peerInsts: + if (inst.instanceType == STANDBY_INSTANCE): + standbyInsts.append(inst) + elif (inst.instanceType == DUMMY_STANDBY_INSTANCE): + dummyStandbyInsts.append(inst) + + ## refresh master instanceId here + dnInst.instanceId = maxMasterDNInstanceId + maxMasterDNInstanceId += 1 + + ## refresh standby/dummy instanceId here. Only do it + # under new dbnodes list + for tmpNode in newNodesList[minLen:maxLen]: + for tmpdnInst in tmpNode.datanodes: + if (tmpdnInst.instanceType == STANDBY_INSTANCE): + for standbyInst in standbyInsts: + if (tmpdnInst.instanceId == + standbyInst.instanceId): + ## refresh standby instanceId here + tmpdnInst.instanceId = \ + maxMasterDNInstanceId + maxMasterDNInstanceId += 1 + elif ( + tmpdnInst.instanceType == + DUMMY_STANDBY_INSTANCE): + for dummyStandbyInst in dummyStandbyInsts: + if (tmpdnInst.instanceId == + dummyStandbyInst.instanceId): + ## refresh standby instanceId here + tmpdnInst.instanceId = \ + maxDummyDNInstanceId + maxDummyDNInstanceId += 1 + + def initFromXml(self, xmlFile, static_config_file="", mode="inherit"): + """ + function : Init cluster from xml config file + input : file Object for OLAP + dbClusterInfo instance + inherit: instance id inherit from the old cluster. + append: instance id append to the old cluster. + output : NA + """ + if (not os.path.exists(xmlFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % "XML configuration file") + + self.xmlFile = xmlFile + + # Set the environment variable, then the readcluster command can + # read from it. + os.putenv(ENV_CLUSTERCONFIG, xmlFile) + # parse xml file + global xmlRootNode + try: + xmlRootNode = initParserXMLFile(xmlFile) + except Exception as e: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51234"] + % xmlFile + " Error:\n%s" % str(e)) + + self.__readClusterGlobalInfo() + if "HOST_IP" in list(os.environ.keys()): + self.__readAgentConfigInfo() + self.__readClusterNodeInfo() + self.__checkAZForSingleInst() + IpPort = self.__checkInstancePortandIP() + return IpPort + + def getClusterNodeNames(self): + """ + function : Get the cluster's node names. + input : NA + output : NA + """ + return [dbNode.name for dbNode in self.dbNodes] + + def getClusterNodeIds(self): + """ + function : Get the cluster's node names. + input : NA + output : NA + """ + return [dbNode.id for dbNode in self.dbNodes] + + def getdataNodeInstanceType(self, nodeId=-1): + """ + function: get the dataNode's instanceType + input: NA + output: NA + """ + for dbNode in self.dbNodes: + if nodeId == dbNode.id: + for dataNode in dbNode.datanodes: + return dataNode.instanceType + + def getDataDir(self, nodeId=-1): + """ + function: get the dataNode's data path + input: NA + output: NA + """ + for dbNode in self.dbNodes: + if nodeId == dbNode.id: + for dataNode in dbNode.datanodes: + return dataNode.datadir + + def getHostNameByNodeId(self, nodeId=-1): + """ + function: get the dataNode's name by nodeId + input: NA + output: NA + """ + for dbNode in self.dbNodes: + if nodeId == dbNode.id: + return dbNode.name + + def getClusterNewNodeNames(self): + """ + function : Get the cluster's node names. + input : NA + output : NA + """ + return [dbNode.name for dbNode in self.newNodes] + + def getClusterDirectorys(self, hostName="", ignore=True): + """ + function : Get cluster all directorys + input : NA + output : List + """ + clusterDirs = {} + clusterDirs["appPath"] = [self.appPath] + if (ignore): + clusterDirs["logPath"] = [self.logPath] + # get cluster all directorys + for dbNode in self.dbNodes: + nodeName = dbNode.name + if (hostName != ""): + if (hostName != nodeName): + continue + nodeDirs = [] + # including cm_server, cm_agent, cn, dn, gtm, etcd, ssd + nodeDirs.append(dbNode.cmDataDir) + for dbInst in dbNode.cmservers: + nodeDirs.append(dbInst.datadir) + for dbInst in dbNode.cmagents: + nodeDirs.append(dbInst.datadir) + for dbInst in dbNode.gtms: + nodeDirs.append(dbInst.datadir) + for dbInst in dbNode.coordinators: + nodeDirs.append(dbInst.datadir) + if (len(dbInst.ssdDir) != 0): + nodeDirs.append(dbInst.ssdDir) + for dbInst in dbNode.datanodes: + nodeDirs.append(dbInst.datadir) + nodeDirs.append(dbInst.xlogdir) + if (len(dbInst.ssdDir) != 0): + nodeDirs.append(dbInst.ssdDir) + for dbInst in dbNode.etcds: + nodeDirs.append(dbInst.datadir) + clusterDirs[nodeName] = nodeDirs + return clusterDirs + + def getDbNodeByName(self, name): + """ + function : Get node by name. + input : nodename + output : [] + """ + for dbNode in self.dbNodes: + if (dbNode.name == name): + return dbNode + + return None + + def getDbNodeByID(self, inputid): + """ + function : Get node by id. + input : nodename + output : [] + """ + for dbNode in self.dbNodes: + if (dbNode.id == inputid): + return dbNode + + return None + + def getMirrorInstance(self, mirrorId): + """ + function : Get primary instance and standby instance. + input : String + output : [] + """ + instances = [] + + for dbNode in self.dbNodes: + for inst in dbNode.cmservers: + if (inst.mirrorId == mirrorId): + instances.append(inst) + + for inst in dbNode.gtms: + if (inst.mirrorId == mirrorId): + instances.append(inst) + + for inst in dbNode.coordinators: + if (inst.mirrorId == mirrorId): + instances.append(inst) + + for inst in dbNode.datanodes: + if (inst.mirrorId == mirrorId): + instances.append(inst) + + return instances + + def getPeerInstance(self, dbInst): + """ + function : Get peer instance of specified instance. + input : [] + output : [] + """ + instances = [] + for dbNode in self.dbNodes: + for inst in dbNode.datanodes: + if (inst.mirrorId == dbInst.mirrorId and + inst.instanceId != dbInst.instanceId): + instances.append(inst) + + return instances + + def getClusterBackIps(self): + """ + function : Get cluster back IP. + input : NA + output : [] + """ + backIps = [] + backIpNum = [] + # get backIp number + for dbNode in self.dbNodes: + backIpNum.append(len(dbNode.backIps)) + if max(backIpNum) != min(backIpNum): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51227"] % "backIps") + for num in range(backIpNum[0]): + ips = [] + for dbNode in self.dbNodes: + ips.append(dbNode.backIps[num]) + backIps.extend(ips) + return backIps + + def getClusterSshIps(self): + """ + function : Get cluster ssh IP. + input : NA + output : [] + """ + sshIps = [] + sshIpNum = [] + # get sshIp number + for dbNode in self.dbNodes: + sshIpNum.append(len(dbNode.sshIps)) + if max(sshIpNum) != min(sshIpNum): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51227"] % "sshIps") + for num in range(sshIpNum[0]): + ips = [] + for dbNode in self.dbNodes: + ips.append(dbNode.sshIps[num]) + sshIps.append(ips) + return sshIps + + def getazNames(self): + """ + """ + azMap = {} + azNames = [] + for dbNode in self.dbNodes: + azMap[dbNode.azName] = [] + if (dbNode.azName not in azNames): + azNames.append(dbNode.azName) + for dbNode in self.dbNodes: + azMap[dbNode.azName].append(dbNode.azPriority) + for azName in azNames: + azMap[azName] = max(azMap[azName]) + azNames = sorted(azMap, key=lambda x: azMap[x]) + return azNames + + def getNodeNameByBackIp(self, backIp): + """ + function : Get Nodename by backip. + input : String + output : String + """ + nodeName = "" + for dbNode in self.dbNodes: + if (backIp in dbNode.backIps): + nodeName = dbNode.name + break + return nodeName + + def __checkInstancePortandIP(self): + """ + function : Check instance Port and IP. + input : NA + output : NA + """ + nodeipport = {} + for dbNode in self.dbNodes: + nodeips = [] + nodeports = [] + cmsListenIPs = [] + ipCheckMap = {} + backIP1 = dbNode.backIps[0] + nodeips.extend(dbNode.backIps) + nodeips.extend(dbNode.sshIps) + # get node ip and node port from cmserver + for cmsInst in dbNode.cmservers: + nodeips.extend(cmsInst.listenIps) + nodeips.extend(cmsInst.haIps) + cmsListenIPs = cmsInst.listenIps + ipCheckMap["cmServerListenIp1"] = cmsInst.listenIps[0] + ipCheckMap["cmServerHaIp1"] = cmsInst.haIps[0] + nodeports.append(cmsInst.port) + nodeports.append(cmsInst.haPort) + # get node ip and node port from gtm + for gtmInst in dbNode.gtms: + nodeips.extend(gtmInst.listenIps) + nodeips.extend(gtmInst.haIps) + nodeports.append(gtmInst.port) + nodeports.append(gtmInst.haPort) + # get node ip and node port from cn + for cooInst in dbNode.coordinators: + nodeips.extend(cooInst.listenIps) + nodeips.extend(cooInst.haIps) + nodeports.append(cooInst.port) + nodeports.append(cooInst.haPort) + # get node ip and node port from dn + for dnInst in dbNode.datanodes: + nodeips.extend(dnInst.listenIps) + nodeips.extend(dnInst.haIps) + nodeports.append(dnInst.port) + nodeports.append(dnInst.haPort) + if (self.checkSctpPort): + nodeports.append(dnInst.port + + dbNode.getDnNum(dnInst.instanceType) * 2) + # get node ip and node port from etcd + for etcdInst in dbNode.etcds: + nodeips.extend(etcdInst.listenIps) + nodeips.extend(etcdInst.haIps) + nodeports.append(etcdInst.port) + nodeports.append(etcdInst.haPort) + ipCheckMap["etcdListenIp1"] = etcdInst.listenIps[0] + ipCheckMap["etcdHaIp1"] = etcdInst.haIps[0] + if (len(etcdInst.listenIps) > 1): + etcdListenIp2 = etcdInst.listenIps[1] + if (etcdListenIp2 != backIP1): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % ( + "%s with etcdListenIp2" % etcdListenIp2) + + " Error: \nThe IP address must be " + "the same as the backIP1 %s." % + backIP1) + + # CMS IP must be consistent with CMA IP + cmaListenIPs = dbNode.cmagents[0].listenIps + if (cmsListenIPs and cmsListenIPs != cmaListenIPs): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % ( + "%s with cm_server" % cmsListenIPs) + + " Error: \nThe IP address must be the same " + "as the cm_agent %s." % cmaListenIPs) + if (g_networkType == 1): + # Check + ipCheckMap["cmAgentConnectIp1"] = cmaListenIPs[0] + if (len(set(ipCheckMap.values())) != 1): + errMsg = " Error: \nThe following IPs must be consistent:" + for ipConfigItem in list(ipCheckMap.keys()): + errMsg += "\n%s: %s" % ( + ipConfigItem, ipCheckMap[ipConfigItem]) + raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % ( + "with cm and etcd") + errMsg) + # create a dictionary + nodeipport[dbNode.name] = [nodeips, nodeports] + # check port and ip + self.__checkPortandIP(nodeips, nodeports, dbNode.name) + return nodeipport + + def __checkPortandIP(self, ips, ports, name): + """ + function : Check port and IP. + input : String,int,string + output : NA + """ + ipsCopy = list(set(ips)) + portsCopy = list(set(ports)) + for port in portsCopy: + if (not self.__isPortValid(port)): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51233"] + % (port, name) + " Please check it.") + + for ip in ipsCopy: + if (not self.__isIpValid(ip)): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + \ + "The IP address is: %s." % ip + " Please " + "check it.") + + def __readAgentConfigInfo(self): + """ + Read agent config info from xml config's tag + :return: NA + """ + # Read agent tag + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "agentToolPath", + "CLUSTER") + if (retStatus == 0): + self.agentPath = retValue.strip() + checkPathVaild(self.agentPath) + + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "agentLogPath", + "CLUSTER") + if (retStatus == 0): + self.agentLogPath = retValue.strip() + checkPathVaild(self.agentLogPath) + + def __readClusterGlobalInfo(self): + """ + Read cluster info from xml config's tag except nodeNames, + clusterRings and sqlExpandNames info + :return: NA + """ + global g_networkType + self.clusterType = CLUSTER_TYPE_SINGLE_INST + + # Read cluster name + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "clusterName", + "cluster") + if (retStatus != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % "cluster name" + " Error: \n%s" % retValue) + self.name = retValue.strip() + checkPathVaild(self.name) + + # Read application install path + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "gaussdbAppPath", + "cluster") + if (retStatus != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + "application installation path" + " Error: \n%s" + % retValue) + self.appPath = os.path.normpath(retValue) + checkPathVaild(self.appPath) + + # Read application log path + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "gaussdbLogPath", + "cluster") + if (retStatus == 0): + self.logPath = os.path.normpath(retValue) + checkPathVaild(self.logPath) + elif (retStatus == 2): + self.logPath = "" + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + "application log path" + " Error: \n%s" % retValue) + if (self.logPath == ""): + self.logPath = "/var/log/gaussdb" + if (not os.path.isabs(self.logPath)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % \ + ("%s log path(%s)" % ( + VersionInfo.PRODUCT_NAME, self.logPath))) + + # Read network type + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "networkType", + "cluster") + if (retStatus == 0): + if (retValue.isdigit() and int(retValue) in [0, 1]): + g_networkType = int(retValue) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + "cluster network type" + " Error: \nThe " + "parameter value " + "must be 0 or 1.") + elif (retStatus == 2): + g_networkType = 0 + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + "cluster network type" + " Error: \n%s" % retValue) + + if "HOST_IP" in list(os.environ.keys()): + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "corePath", + "cluster") + self.corePath = retValue + + def __getAllHostnamesFromDEVICELIST(self): + """ + function : Read all host name from + input : Na + output : str + """ + if not xmlRootNode.findall('DEVICELIST'): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] % 'DEVICELIST') + DeviceArray = xmlRootNode.findall('DEVICELIST')[0] + DeviceNodeList = DeviceArray.findall('DEVICE') + allNodeName = [] + for dev in DeviceNodeList: + paramList = dev.findall('PARAM') + for param in paramList: + thisname = param.attrib['name'] + if (thisname == 'name'): + value = param.attrib['value'] + allNodeName.append(value) + return allNodeName + + def __readClusterNodeInfo(self): + """ + function : Read cluster node info. + input : NA + output : NA + """ + # read cluster node info. + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "nodeNames", + "cluster") + if (retStatus != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % "node names" + " Error: \n%s" % retValue) + nodeNames = [] + nodeNames_tmp = retValue.split(",") + for nodename in nodeNames_tmp: + nodeNames.append(nodename.strip()) + if (len(nodeNames) == 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + "cluster configuration" + " There is no node in " + "cluster configuration" + " file.") + + if (len(nodeNames) != len(list(set(nodeNames)))): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + "cluster configuration" + " There contains " + "repeated node in " + "cluster configuration " + "file.") + + # Check node names + nodeNameList = self.__getAllHostnamesFromDEVICELIST() + if len(nodeNameList) != len(nodeNames): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51236"] + \ + " The number of nodeNames and DEVICE are not " + "same.") + for nodeName in nodeNames: + if nodeName not in nodeNameList: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51236"] + \ + " Can not found DEVICE for [%s]." % nodeName) + # Get basic info of node: name, ip and master instance number etc. + self.dbNodes = [] + i = 1 + for name in nodeNames: + dbNode = dbNodeInfo(i, name) + self.__readNodeBasicInfo(dbNode, nodeNames) + self.dbNodes.append(dbNode) + i += 1 + + # Get cm server info + for dbNode in self.dbNodes: + self.__readCmsConfig(dbNode) + + # Get datanode info + for dbNode in self.dbNodes: + self.__readDataNodeConfig(dbNode) + + # Get cm agent info + for dbNode in self.dbNodes: + self.__readCmaConfig(dbNode) + + # set DB port for OLAP + for node in self.dbNodes: + for inst in node.datanodes: + inst.azName = node.azName + self.__setNodePortForSinglePrimaryMultiStandby() + + def __getPeerInstance(self, dbInst): + """ + function : Get peer instance of specified instance. + input : [] + output : [] + """ + instances = [] + if (dbInst.instanceRole == INSTANCE_ROLE_CMSERVER): + for dbNode in self.dbNodes: + for inst in dbNode.cmservers: + if (inst.mirrorId == dbInst.mirrorId and + inst.instanceId != dbInst.instanceId): + instances.append(inst) + elif (dbInst.instanceRole == INSTANCE_ROLE_GTM): + for dbNode in self.dbNodes: + for inst in dbNode.gtms: + if (inst.mirrorId == dbInst.mirrorId and + inst.instanceId != dbInst.instanceId): + instances.append(inst) + elif (dbInst.instanceRole == INSTANCE_ROLE_COODINATOR): + for dbNode in self.dbNodes: + for inst in dbNode.coordinators: + if (inst.mirrorId == dbInst.mirrorId and + inst.instanceId != dbInst.instanceId): + instances.append(inst) + elif (dbInst.instanceRole == INSTANCE_ROLE_DATANODE): + for dbNode in self.dbNodes: + for inst in dbNode.datanodes: + if (inst.mirrorId == dbInst.mirrorId and + inst.instanceId != dbInst.instanceId): + instances.append(inst) + + return instances + + def __setNodePortForSinglePrimaryMultiStandby(self): + """ + function : set the standy DB port. + input : [] + output : NA + """ + for dbNode in self.dbNodes: + i = 0 + for dbInst in dbNode.datanodes: + if (dbInst.instanceType == MASTER_INSTANCE): + dbInst.port = dbNode.masterBasePorts[ + INSTANCE_ROLE_DATANODE] + i * \ + PORT_STEP_SIZE + dbInst.haPort = dbInst.port + 1 + peerInsts = self.__getPeerInstance(dbInst) + for j in range(len(peerInsts)): + peerInsts[j].port = dbInst.port + peerInsts[j].haPort = peerInsts[j].port + 1 + i += 1 + # flush CMSERVER instance port + i = 0 + cmsbaseport = 0 + for dbInst in dbNode.cmservers: + if (dbInst.instanceType == MASTER_INSTANCE): + cmsbaseport = dbNode.masterBasePorts[ + INSTANCE_ROLE_CMSERVER] + dbInst.port = cmsbaseport + i * PORT_STEP_SIZE + dbInst.haPort = dbInst.port + 1 + peerInsts = self.__getPeerInstance(dbInst) + for j in range(len(peerInsts)): + peerInsts[j].port = cmsbaseport + peerInsts[j].haPort = peerInsts[j].port + 1 + i += 1 + # flush GTM instance port + i = 0 + gtmbaseport = 0 + for dbInst in dbNode.gtms: + if (dbInst.instanceType == MASTER_INSTANCE): + gtmbaseport = dbNode.masterBasePorts[INSTANCE_ROLE_GTM] + dbInst.port = gtmbaseport + i * PORT_STEP_SIZE + dbInst.haPort = dbInst.port + 1 + peerInsts = self.__getPeerInstance(dbInst) + for j in range(len(peerInsts)): + peerInsts[j].port = gtmbaseport + peerInsts[j].haPort = peerInsts[j].port + 1 + i += 1 + + def __readExpandNodeInfo(self): + """ + function : Read expand node info. + input : NA + output : NA + """ + # read expand node info. + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "sqlExpandNames", + "cluster") + if (retStatus != 0 or retValue.strip() == ""): + return + nodeNames = [] + nodeNames_tmp = retValue.split(",") + for nodename in nodeNames_tmp: + nodeNames.append(nodename.strip()) + if (len(nodeNames) == 0): + return + + for nodeName in nodeNames: + dbNode = self.getDbNodeByName(nodeName) + if (dbNode is not None): + self.newNodes.append(dbNode) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % "expand nodes configuration" + + " There is no node [%s] in cluster " + "configuration file." % nodeName) + + def __readClusterRingsInfo(self): + """ + function : Read cluster rings info. + input : NA + output : NA + """ + # read cluster rings info. + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, + "clusterRings", + "cluster") + if (retStatus != 0 or retValue.strip() == ""): + return + rings = retValue.split(";") + if (len(rings) == 0): + return + for ring in rings: + ring_tmp = [] + ring_new = ring.strip().split(",") + for ring_one in ring_new: + ring_tmp.append(ring_one.strip()) + self.clusterRings.append(ring_tmp) + + def __readNodeBasicInfo(self, dbNode, nodenames): + """ + function : Read basic info of specified node. + input : [] + output : NA + """ + # get backIp + dbNode.backIps = self.__readNodeIps(dbNode.name, "backIp") + if (len(dbNode.backIps) == 0): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51207"] % dbNode.name) + # get sshIp + dbNode.sshIps = self.__readNodeIps(dbNode.name, "sshIp") + if (len(dbNode.sshIps) == 0): + dbNode.sshIps = dbNode.backIps[:] + # get virtualIp + dbNode.virtualIp = self.__readVirtualIp(dbNode.name, "virtualIp") + + # Get cm_server number + dbNode.cmsNum = self.__readNodeIntValue(dbNode.name, "cmsNum", True, 0) + # Get gtm number + dbNode.gtmNum = self.__readNodeIntValue(dbNode.name, "gtmNum", True, 0) + # Get etcd number + dbNode.etcdNum = self.__readNodeIntValue(dbNode.name, "etcdNum", True, + 0) + # Get cn number + dbNode.cooNum = self.__readNodeIntValue(dbNode.name, "cooNum", True, 0) + # Get DB number + dbNode.dataNum = self.__readNodeIntValue(dbNode.name, "dataNum", True, + 0) + + # check dataNum + if (dbNode.dataNum < 0): + raise Exception( + ErrorCode.GAUSS_512["GAUSS_51208"] % ("dn", dbNode.dataNum)) + + # Get base port + if dbNode.dataNum > 0: + dbNode.masterBasePorts[INSTANCE_ROLE_DATANODE] = \ + self.__readNodeIntValue(dbNode.name, "dataPortBase", + True, MASTER_BASEPORT_DATA) + dbNode.standbyBasePorts[INSTANCE_ROLE_DATANODE] = \ + dbNode.masterBasePorts[INSTANCE_ROLE_DATANODE] + + # Get az name + dbNode.azName = self.__readNodeStrValue(dbNode.name, "azName") + # check azName + # Get az Priority + dbNode.azPriority = self.__readNodeIntValue(dbNode.name, "azPriority", + True, 0) + #get cascadeRole + dbNode.cascadeRole = self.__readNodeStrValue(dbNode.name, "cascadeRole", + True, "off") + if (dbNode.azPriority < AZPRIORITY_MIN or + dbNode.azPriority > AZPRIORITY_MAX): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53206"] % "azPriority") + + if (dbNode.azName == ""): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51212"] % ("azName")) + if (dbNode.azPriority < 1): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51208"] + % ("azPriority", dbNode.azPriority)) + + def __getCmsCountFromWhichConfiguredNode(self, masterNode): + """ + function : get the count of cmservers if current node configured + cmserver + input : masterNode + output : cmsCount + """ + cmsList = self.__readNodeStrValue(masterNode.name, "cmServerRelation", + True, "").split(",") + if (len(cmsList) == 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % ("CMServer configuration on host [%s]" + % str(masterNode.name)) + + " The information of %s is wrong." + % "cmServerRelation") + cmsCount = len(cmsList) + return cmsCount + + def __readCmsConfig(self, masterNode): + """ + function : Read cm server config on node. + input : [] + output : NA + """ + self.__readCmsConfigForMutilAZ(masterNode) + + def __readCmsConfigForMutilAZ(self, masterNode): + """ + """ + cmsListenIps = None + cmsHaIps = None + if (masterNode.cmsNum > 0): + self.cmscount = self.__getCmsCountFromWhichConfiguredNode( + masterNode) + cmsListenIps = self.__readInstanceIps(masterNode.name, + "cmServerListenIp", + self.cmscount) + cmsHaIps = self.__readInstanceIps(masterNode.name, "cmServerHaIp", + self.cmscount) + + for i in range(masterNode.cmsNum): + level = self.__readNodeIntValue(masterNode.name, "cmServerlevel") + hostNames = [] + hostNames_tmp = \ + self.__readNodeStrValue(masterNode.name, + "cmServerRelation").split(",") + for hostname in hostNames_tmp: + hostNames.append(hostname.strip()) + + instId = self.__assignNewInstanceId(INSTANCE_ROLE_CMSERVER) + mirrorId = self.__assignNewMirrorId() + instIndex = i * self.cmscount + masterNode.appendInstance(instId, mirrorId, INSTANCE_ROLE_CMSERVER, + MASTER_INSTANCE, cmsListenIps[instIndex], + cmsHaIps[instIndex], "", "", level, + clusterType=self.clusterType) + + for j in range(1, self.cmscount): + dbNode = self.getDbNodeByName(hostNames[j]) + if dbNode is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % ("CMServer configuration on host [%s]" + % masterNode.name) + + " There is no host named %s." + % hostNames[j]) + instId = self.__assignNewInstanceId(INSTANCE_ROLE_CMSERVER) + instIndex += 1 + dbNode.appendInstance(instId, mirrorId, INSTANCE_ROLE_CMSERVER, + STANDBY_INSTANCE, + cmsListenIps[instIndex], + cmsHaIps[instIndex], "", "", level, + clusterType=self.clusterType) + + def __getDataNodeCount(self, masterNode): + """ + function : get the count of data nodes + input : masterNode + output : dataNodeCount + """ + dataNodeList = self.__readNodeStrValue(masterNode.name, + "dataNode1", + True, "").split(",") + dnListLen = len(dataNodeList) + dataNodeCount = (dnListLen + 1) // 2 + return dataNodeCount + + def __readDataNodeConfig(self, masterNode): + """ + function : Read datanode config on node. + input : [] + output : NA + """ + self.__readDataNodeConfigForMutilAZ(masterNode) + + def __readDataNodeConfigForMutilAZ(self, masterNode): + """ + """ + dnListenIps = None + dnHaIps = None + mirror_count_data = self.__getDataNodeCount(masterNode) + if (masterNode.dataNum > 0): + dnListenIps = self.__readInstanceIps(masterNode.name, + "dataListenIp", + masterNode.dataNum * + mirror_count_data, + True) + dnHaIps = self.__readInstanceIps(masterNode.name, "dataHaIp", + masterNode.dataNum * + mirror_count_data, + True) + + dnInfoLists = [[] for row in range(masterNode.dataNum)] + xlogInfoLists = [[] for row in range(masterNode.dataNum)] + ssdInfoList = [[] for row in range(masterNode.dataNum)] + syncNumList = [-1 for row in range(masterNode.dataNum)] + totalDnInstanceNum = 0 + # Whether the primary and standby have SET XLOG PATH , must be + # synchronized + has_xlog_path = 0 + for i in range(masterNode.dataNum): + dnInfoList = [] + key = "dataNode%d" % (i + 1) + dnInfoList_tmp = self.__readNodeStrValue(masterNode.name, + key).split(",") + for dnInfo in dnInfoList_tmp: + dnInfoList.append(dnInfo.strip()) + dnInfoListLen = len(dnInfoList) + if ((dnInfoListLen != 2 * mirror_count_data - 1)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + ("database node configuration on host [%s]" + % masterNode.name) + + " The information of [%s] is wrong." % key) + totalDnInstanceNum += (dnInfoListLen + 1) // 2 + dnInfoLists[i].extend(dnInfoList) + + # If not set dataNodeXlogPath in xmlfile,just set + # xlogInfoListLen = 0,Used for judgement. + # If set dataNodeXlogPath in xmlfile,each datanode needs to have + # a corresponding xlogdir. + xlogInfoList = [] + xlogkey = "dataNodeXlogPath%d" % (i + 1) + xlogInfoList_tmp = self.__readNodeStrValue(masterNode.name, + xlogkey).split(",") + for xlogInfo in xlogInfoList_tmp: + xlogInfoList.append(xlogInfo.strip()) + + # This judgment is necessary,if not set dataNodeXlogPath, + # xlogInfoListLen will equal 1. + # Because dninfolist must be set, it does not need extra judgment. + if xlogInfoList_tmp == ['']: + xlogInfoListLen = 0 + else: + xlogInfoListLen = len(xlogInfoList) + + if (i == 0): + has_xlog_path = xlogInfoListLen + + if (xlogInfoListLen != has_xlog_path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + ("database node configuration on host [%s]" + % masterNode.name) + + " The information of [%s] is wrong." + % xlogkey) + + if (xlogInfoListLen != 0 and xlogInfoListLen != ( + dnInfoListLen + 1) // 2): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + ("database node configuration on host [%s]" + % masterNode.name) + + " The information of [%s] is wrong." + % xlogkey) + xlogInfoLists[i].extend(xlogInfoList) + + key = "ssdDNDir%d" % (i + 1) + # ssd doesn't supply ,so set ssddir value to empty + ssddirList = [] + ssdInfoList[i].extend(ssddirList) + + # dataNode syncNum + key = "dataNode%d_syncNum" % (i + 1) + syncNum_temp = self.__readNodeStrValue(masterNode.name, key) + if syncNum_temp is not None: + syncNum = int(syncNum_temp) + if syncNum < 0 or syncNum >= totalDnInstanceNum: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + ("database node configuration on host [%s]" + % masterNode.name) + + " The information of [%s] is wrong." + % key) + syncNumList[i] = syncNum + + # check ip num + if (dnListenIps is not None and len(dnListenIps[0]) != 0): + colNum = len(dnListenIps[0]) + rowNum = len(dnListenIps) + for col in range(colNum): + ipNum = 0 + for row in range(rowNum): + if (dnListenIps[row][col] != ""): + ipNum += 1 + else: + break + if (ipNum != totalDnInstanceNum): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ + ("IP number of dataListenIp", + "instance number")) + + if (dnHaIps is not None and len(dnHaIps[0]) != 0): + colNum = len(dnHaIps[0]) + rowNum = len(dnHaIps) + for col in range(colNum): + ipNum = 0 + for row in range(rowNum): + if (dnHaIps[row][col] != ""): + ipNum += 1 + else: + break + if (ipNum != totalDnInstanceNum): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ + ("IP number of dataHaIps", + "instance number")) + + instIndex = 0 + for i in range(masterNode.dataNum): + dnInfoList = dnInfoLists[i] + + # Because xlog may not be set to prevent the array from crossing + # the boundary + if xlogInfoListLen != 0: + xlogInfoList = xlogInfoLists[i] + groupId = self.__assignNewGroupId() + if (len(ssdInfoList[i]) > 1): + ssddirList = ssdInfoList[i] + # master datanode + instId = self.__assignNewInstanceId(INSTANCE_ROLE_DATANODE) + # ssd doesn't supply ,this branch will not arrive when len( + # ssdInfoList[i]) is 0 + if (len(ssdInfoList[i]) > 1): + if (xlogInfoListLen == 0): + masterNode.appendInstance(instId, groupId, + INSTANCE_ROLE_DATANODE, + MASTER_INSTANCE, + dnListenIps[instIndex], + dnHaIps[instIndex], + dnInfoList[0], ssddirList[0], + clusterType=self.clusterType, + syncNum=syncNumList[i]) + else: + masterNode.appendInstance(instId, groupId, + INSTANCE_ROLE_DATANODE, + MASTER_INSTANCE, + dnListenIps[instIndex], + dnHaIps[instIndex], + dnInfoList[0], ssddirList[0], + clusterType=self.clusterType, + xlogdir=xlogInfoList[0], + syncNum=syncNumList[i]) + else: + if (xlogInfoListLen == 0): + masterNode.appendInstance(instId, groupId, + INSTANCE_ROLE_DATANODE, + MASTER_INSTANCE, + dnListenIps[instIndex], + dnHaIps[instIndex], + dnInfoList[0], + clusterType=self.clusterType, + syncNum=syncNumList[i]) + else: + masterNode.appendInstance(instId, groupId, + INSTANCE_ROLE_DATANODE, + MASTER_INSTANCE, + dnListenIps[instIndex], + dnHaIps[instIndex], + dnInfoList[0], + clusterType=self.clusterType, + xlogdir=xlogInfoList[0], + syncNum=syncNumList[i]) + + instIndex += 1 + + for nodeLen in range((len(dnInfoList) + 1) // 2 - 1): + dbNode = self.getDbNodeByName(dnInfoList[nodeLen * 2 + 1]) + if dbNode is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % ("database node configuration on " + "host [%s]" % str(masterNode.name)) + + " There is no host named %s." + % dnInfoList[nodeLen * 2 + 1]) + instId = self.__assignNewInstanceId(INSTANCE_ROLE_DATANODE) + + # ssd doesn't supply ,this branch will not arrive when len( + # ssdInfoList[i]) is 0 + if (len(ssdInfoList[i]) > 1): + if (xlogInfoListLen == 0): + dbNode.appendInstance(instId, groupId, + INSTANCE_ROLE_DATANODE, + STANDBY_INSTANCE, + dnListenIps[instIndex], + dnHaIps[instIndex], + dnInfoList[nodeLen * 2 + 2], + ssddirList[nodeLen * 2 + 1], + clusterType=self.clusterType, + syncNum=syncNumList[i]) + else: + dbNode.appendInstance(instId, groupId, + INSTANCE_ROLE_DATANODE, + STANDBY_INSTANCE, + dnListenIps[instIndex], + dnHaIps[instIndex], + dnInfoList[nodeLen * 2 + 2], + ssddirList[nodeLen * 2 + 1], + clusterType=self.clusterType, + xlogdir=xlogInfoList[ + nodeLen + 1], + syncNum=syncNumList[i]) + else: + if (xlogInfoListLen == 0): + dbNode.appendInstance(instId, groupId, + INSTANCE_ROLE_DATANODE, + STANDBY_INSTANCE, + dnListenIps[instIndex], + dnHaIps[instIndex], + dnInfoList[nodeLen * 2 + 2], + clusterType=self.clusterType, + syncNum=syncNumList[i]) + else: + dbNode.appendInstance(instId, groupId, + INSTANCE_ROLE_DATANODE, + STANDBY_INSTANCE, + dnListenIps[instIndex], + dnHaIps[instIndex], + dnInfoList[nodeLen * 2 + 2], + clusterType=self.clusterType, + xlogdir=xlogInfoList[ + nodeLen + 1], + syncNum=syncNumList[i]) + if dbNode.cascadeRole == "on": + for inst in dbNode.datanodes: + inst.instanceType = CASCADE_STANDBY + + instIndex += 1 + + for inst in masterNode.datanodes: + inst.azName = masterNode.azName + + def __readCmaConfig(self, dbNode): + """ + function : Read cm agent config on node. + input : [] + output : NA + """ + agentIps = self.__readInstanceIps(dbNode.name, "cmAgentConnectIp", 1) + instId = self.__assignNewInstanceId(INSTANCE_ROLE_CMAGENT) + dbNode.appendInstance(instId, MIRROR_ID_AGENT, INSTANCE_ROLE_CMAGENT, + INSTANCE_TYPE_UNDEFINED, agentIps[0], None, "", + clusterType=self.clusterType) + + def newInstanceId(self, instRole): + return self.__assignNewInstanceId(instRole) + + def newMirrorId(self): + return self.__assignNewMirrorId() + + def __assignNewInstanceId(self, instRole): + """ + function : Assign a new id for instance. + input : String + output : NA + """ + newId = self.__newInstanceId[instRole] + if (INSTANCE_ROLE_DATANODE == instRole): + if (newId == OLD_LAST_PRIMARYSTANDBY_BASEID_NUM): + self.__newInstanceId[instRole] = \ + self.__newInstanceId[instRole] + 1 + \ + (NEW_FIRST_PRIMARYSTANDBY_BASEID_NUM + - OLD_LAST_PRIMARYSTANDBY_BASEID_NUM) + else: + self.__newInstanceId[instRole] += 1 + else: + self.__newInstanceId[instRole] += 1 + return newId + + def __assignNewDummyInstanceId(self): + """ + function : Assign a new dummy standby instance id. + input : NA + output : NA + """ + if (self.__newDummyStandbyId == OLD_LAST_DUMMYNODE_BASEID_NUM): + self.__newDummyStandbyId = self.__newDummyStandbyId + 1 + ( + NEW_FIRST_DUMMYNODE_BASEID_NUM - + OLD_LAST_DUMMYNODE_BASEID_NUM) + else: + self.__newDummyStandbyId += 1 + return self.__newDummyStandbyId + + def __assignNewMirrorId(self): + """ + function : Assign a new mirror id. + input : NA + output : NA + """ + self.__newMirrorId += 1 + + return self.__newMirrorId + + def __assignNewGroupId(self): + """""" + self.__newGroupId += 1 + return self.__newGroupId + + def __readNodeIps(self, nodeName, prefix): + """ + function : Read ip for node, such as backIp1, sshIp1 etc.. + input : String,String + output : NA + """ + ipList = [] + n = 1 + + if (prefix == "cooListenIp"): + n = 3 + elif (prefix == "etcdListenIp"): + n = 2 + + for i in range(1, CONFIG_IP_NUM + n): + key = "%s%d" % (prefix, i) + value = self.__readNodeStrValue(nodeName, key, True, "") + if (value == ""): + break + ipList.append(value) + + return ipList + + def __readVirtualIp(self, nodeName, prefix): + """ + function : Read virtual ip only for node. + input : String,String + output : NA + """ + ipList = [] + value = self.__readNodeStrValue(nodeName, prefix, True, "") + if (value != ""): + valueIps = value.split(",") + for ip in valueIps: + ip = ip.strip() + if ip not in ipList: + ipList.append(ip) + return ipList + + def __isIpValid(self, ip): + """ + function : check if the input ip address is valid + input : String + output : NA + """ + IpValid = re.match( + "^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[" + "1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[" + "1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{" + "1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[" + "0-9]{1}|[0-9])$", + ip) + if IpValid: + if (IpValid.group() == ip): + return True + else: + return False + else: + return False + + def __isPortValid(self, port): + """ + function :Judge if the port is valid + input : int + output : boolean + """ + if (port < 0 or port > 65535): + return False + elif (port >= 0 and port <= 1023): + return False + else: + return True + + def __readInstanceIps(self, nodeName, prefix, InstCount, isDataNode=False): + """ + function :Read instance ips + input : String,String,int + output : NA + """ + multiIpList = self.__readNodeIps(nodeName, prefix) + + mutilIpCount = len(multiIpList) + if (mutilIpCount == 0): + return [[] for row in range(InstCount)] + + instanceIpList = [["" for col in range(mutilIpCount)] for row in + range(InstCount)] + for i in range(mutilIpCount): + ipList = [] + ipList_tmp = multiIpList[i].split(",") + for ip in ipList_tmp: + ipList.append(ip.strip()) + ipNum = len(ipList) + if (ipNum != InstCount): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % ("[%s] of node [%s]" % (prefix, nodeName)) + + " The count of IP is wrong.") + for j in range(ipNum): + instanceIpList[j][i] = ipList[j] + + return instanceIpList + + def __readNodeIntValue(self, nodeName, key, nullable=False, defValue=0): + """ + function :Read integer value of specified node + input : String,int + output : NA + """ + value = defValue + + strValue = self.__readNodeStrValue(nodeName, key, nullable, "") + if (strValue != ""): + value = int(strValue) + return value + + def __readNodeStrValue(self, nodeName, key, nullable=False, defValue=""): + """ + function : Read string of specified node + input : String,int + output : defValue + """ + (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, key, + "node", nodeName) + if (retStatus == 0): + return str(retValue).strip() + elif (retStatus == 2 and nullable): + return defValue + # When upgrade,may be not set XLOGPATH in xml.Make special judgment + # for xlog scenario. + elif (retStatus == 2 and "dataNodeXlogPath" in key): + return defValue + elif (retStatus == 2 and "syncNum" in key): + return None + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ + ("[%s] of node [%s]" % (key, nodeName)) + \ + " Return status: %d. value: %s. Check whether " + "the dataNum is correct first." + % (retStatus, retValue)) + + def __checkVirtualIp(self, clusterVirtualIp, dbNode): + """ + function : Check virtual ip + input : String,int + output : NA + """ + allIps = dbNode.virtualIp[:] + allIps.extend(dbNode.backIps) + tempIps = [] + for ip in allIps: + if (not self.__isIpValid(ip)): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + \ + "The IP address is: %s" % ip + " Please " + "check it.") + if ip in tempIps: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % \ + ip + " Virtual IP(s) cannot be same as back " + "IP(s).") + tempIps.append(ip) + + for ip in allIps: + if ip in clusterVirtualIp: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51224"] % ip) + clusterVirtualIp.extend(allIps) + + for dnInstance in dbNode.datanodes: + for dnIp in dnInstance.listenIps: + if (dnIp not in allIps): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51229"] % \ + (dnIp, dbNode.name) + "Please check it.") + + def checkDbNodes(self): + """ + """ + if (len(self.dbNodes) > MIRROR_COUNT_NODE_MAX): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ + ("nodes", + "be less than or equal to %s" % + MIRROR_COUNT_NODE_MAX) + " Please set it.") + + def checkCmsNumForMutilAZ(self, cmsNum): + """ + """ + if (cmsNum != 1): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ + ("CMServer group", + "equal to 1") + " Please set it.") + + def checkGtmNumForMutilAZ(self, gtmNum): + """ + """ + if (gtmNum < 0): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ + ("GTM", "be greater than 0") + " Please set it.") + + def checkCooNumForMutilAZ(self, cooNum): + """ + """ + if (cooNum <= 0): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ + ("CN", "be greater than 0") + " Please set it.") + + def checkDataNumForMutilAZ(self, dataNum): + """ + """ + if (dataNum <= 0 or dataNum > MIRROR_COUNT_DN_MAX): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ + ("DN", + "be greater than 0 and less than or equal to " + "%s" % MIRROR_COUNT_DN_MAX) + " Please set it.") + + def checkEtcdNumForMutilAZ(self, etcdNum): + """ + """ + if (etcdNum > 0): + if ( + etcdNum < MIRROR_COUNT_ETCD_MIN or etcdNum > + MIRROR_COUNT_ETCD_MAX): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ + ("ETCD", + "be greater than 2 and less than 8") + + " Please set it.") + + ###################################################### + def checkDnIp(self, networkSegment): + """ + """ + for dbNode in self.dbNodes: + if (dbNode.dataNum > 0): + for dn in dbNode.datanodes: + if (dn.listenIps[0].split(".")[0] != networkSegment): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] + % dn.listenIps[0] + + "\nAll datanodes are not on " + "the same network segment.") + + def checkNewNodes(self): + """ + """ + if (len(self.dbNodes) - len(self.newNodes) <= 1): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51231"] + + " Please check the cluster configuration file.") + for dbNode in self.newNodes: + if (len(dbNode.cmservers) > 0 or len(dbNode.gtms) > 0 or + len(dbNode.etcds) > 0): + raise Exception( + ErrorCode.GAUSS_512["GAUSS_51215"] % dbNode.name + \ + " Please check the cluster configuration file.") + if (len(dbNode.coordinators) == 0 and len(dbNode.datanodes) == 0): + raise Exception( + ErrorCode.GAUSS_512["GAUSS_51216"] % dbNode.name + \ + " Please check the cluster configuration file.") + + def __checkAZForSingleInst(self): + """ + function : check az names and DB replication + input : NA + output : NA + """ + + # Get DB standys num + # The number of standbys for each DB instance must be the same + peerNum = 0 + for dbNode in self.dbNodes: + for inst in dbNode.datanodes: + if (inst.instanceType == MASTER_INSTANCE): + peerInsts = self.getPeerInstance(inst) + if (peerNum == 0): + peerNum = len(peerInsts) + elif (peerNum != len(peerInsts)): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53200"]) + + if peerNum > 8: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % ( + "database node standbys", "be less than 9") + " Please set it.") + + + + def __checkAZNamesWithDNReplication(self): + """ + function : check az names and DB replication + input : NA + output : NA + """ + # AZ map: name to prioritys + azMap = {} + # Get DB standys num + peerNum = 0 + for dbNode in self.dbNodes: + for inst in dbNode.datanodes: + if (inst.instanceType == MASTER_INSTANCE): + peerInsts = self.getPeerInstance(inst) + # The number of standbys for each DB instance must be + # the same + if (peerNum == 0): + peerNum = len(peerInsts) + elif (peerNum != len(peerInsts)): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53200"]) + + # Get AZ names in cluster + azNames = self.getazNames() + if (peerNum < 2 or peerNum > 7): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] + % ("database node standbys", + "be greater than 1 and less than 8") + + " Please set it.") + # Check az names and DB replication + # When the number of standbys is less than 3, the AZ num must be 1 + # When the number of standbys is equal 3, the AZ num must be 2 or 3 + # When the number of standbys is equal 4, the AZ num must be 3 + # When the number of standbys is greater than 1 and less than 8, + # the AZ num must be 3 + if (len(azNames) != 1 and peerNum <= 2): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53201"]) + elif (len(azNames) == 1 and peerNum == 3): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53201"]) + elif (len(azNames) != 3 and peerNum == 4): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53201"]) + elif (len(azNames) != 3 and peerNum <= 7 and peerNum > 4): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53201"]) + + # Check AZ replication + self.__checkAzInfoForSinglePrimaryMultiStandby(azNames) + # Check DB peerInsts num of configuration in each az zone + self.__checkAzSycNumforDnpeerInsts(azNames) + + def __checkAzInfoForSinglePrimaryMultiStandby(self, azNames): + """ + 1.Check if AZ info with etcd number is set correctly. + 2. Check if the azPriority value is set correctly. + return: NA + """ + az1_etcd = 0 + az2_etcd = 0 + az3_etcd = 0 + az1Priority_max = 0 + az1Priority_min = 0 + az2Priority_max = 0 + az2Priority_min = 0 + az3Priority_max = 0 + az3Priority_min = 0 + az1PriorityLst = [] + az2PriorityLst = [] + az3PriorityLst = [] + syncAz = False + thirdPartAZ = False + + for dbNode in self.dbNodes: + if dbNode.azName == azNames[0]: + az1_etcd += len(dbNode.etcds) + az1PriorityLst.append(dbNode.azPriority) + if len(azNames) > 1 and dbNode.azName == azNames[1]: + syncAz = True + az2_etcd += len(dbNode.etcds) + az2PriorityLst.append(dbNode.azPriority) + if len(azNames) > 2 and dbNode.azName == azNames[2]: + thirdPartAZ = True + az3_etcd += len(dbNode.etcds) + az3PriorityLst.append(dbNode.azPriority) + + # In a primary multi-standby cluster, AZ1 has a higher priority than + # AZ2 and AZ2 has a higher priority than AZ3. + az1Priority_max = max(az1PriorityLst) + az1Priority_min = min(az1PriorityLst) + + # Each AZ requires at least one or more ETCDs. + if (az1_etcd != 3 and not syncAz and not thirdPartAZ): + raise Exception( + ErrorCode.GAUSS_532["GAUSS_53203"] % "AZ1 must be 3") + if (syncAz): + if (az1_etcd < 2 or az2_etcd < 1): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53203"] % \ + "AZ1 must be greater than 2 and the number " + "of ETCD in AZ2 must be greater than 1") + # check az2 priority + az2Priority_max = max(az2PriorityLst) + az2Priority_min = min(az2PriorityLst) + if (az1Priority_max >= az2Priority_min): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53205"] + % (azNames[0], azNames[1])) + if (thirdPartAZ): + if (az1_etcd < 2 or az2_etcd < 2 or az3_etcd < 1): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53203"] % \ + "%s and %s must be greater than 2 and the " + "number of ETCD in %s must be greater than " + "1" % (azNames[0], azNames[1], azNames[2])) + # check az3 priority + az3Priority_max = max(az3PriorityLst) + az3Priority_min = min(az3PriorityLst) + if (az2Priority_max >= az3Priority_min): + raise Exception(ErrorCode.GAUSS_532["GAUSS_53205"] + % (azNames[1], azNames[2])) + + def __checkAzSycNumforDnpeerInsts(self, azNames): + """ + function : Check if AZ info with DB number is set correctly. + input : azName List sorted by azPriority + output NA + """ + az1_datanode_num = 0 + az2_datanode_num = 0 + az3_datanode_num = 0 + syncAz = False + thirdPartAZ = False + + for dbNode in self.dbNodes: + if dbNode.azName == azNames[0]: + az1_datanode_num += len(dbNode.datanodes) + if len(azNames) > 1 and dbNode.azName == azNames[1]: + syncAz = True + az2_datanode_num += len(dbNode.datanodes) + if len(azNames) > 2 and dbNode.azName == azNames[2]: + thirdPartAZ = True + az3_datanode_num += len(dbNode.datanodes) + + # Each AZ requires at least one or more ETCDs. + if (syncAz): + if az2_datanode_num != 0 and az1_datanode_num == 0: + errmsg = ErrorCode.GAUSS_532["GAUSS_53201"] + errmsg += " The datanodes num in highest priority az[%s] " \ + "should not be 0 " % azNames[0] + errmsg += "when there are database node instances in the" \ + " lowest priority az[%s] ." % azNames[1] + raise Exception(errmsg) + if (thirdPartAZ): + if az3_datanode_num != 0 and ( + az1_datanode_num == 0 or az2_datanode_num == 0): + errmsg = ErrorCode.GAUSS_532["GAUSS_53201"] + errmsg += " The datanodes num in one of first two " \ + "priorities az[%s,%s] with higher priorities" \ + " should not be 0 " % (azNames[0], azNames[1]) + errmsg += "when there are database node instances in the" \ + " lowest priority az[%s] ." % azNames[-1] + raise Exception(errmsg) + + def __getDNPeerInstance(self, dbInst): + """ + function : Get DB peer instance of specified instance when write + static configuration file. + input : [] + output : [] + """ + instances = [] + instIdLst = [] + + for dbNode in self.dbNodes: + for inst in dbNode.datanodes: + if (inst.mirrorId == dbInst.mirrorId and inst.instanceId != + dbInst.instanceId): + instances.append(inst) + instIdLst.append(inst.instanceId) + + # In a primary multi-standby cluster, + # since the CM update system table depends on the DB read/write + # sequence in the static configuration file, + # we must sort the DN's standby list by instanceId. + if dbInst.instanceType == MASTER_INSTANCE: + instIdLst.sort() + instanceLst = [] + for instId in instIdLst: + for inst in instances: + if (inst.instanceId == instId): + instanceLst.append(inst) + return instanceLst + else: + return instances + + def saveToStaticConfig(self, filePath, localNodeId, dbNodes=None, + upgrade=False): + """ + function : Save cluster info into to static config + input : String,int + output : NA + """ + fp = None + number = None + if upgrade: + staticConfigFilePath = os.path.split(filePath)[0] + versionFile = os.path.join( + staticConfigFilePath, "upgrade_version") + version, number, commitid = VersionInfo.get_version_info( + versionFile) + try: + if (dbNodes is None): + dbNodes = self.dbNodes + g_file.createFileInSafeMode(filePath) + fp = open(filePath, "wb") + # len + info = struct.pack("I", 28) + # version + info += struct.pack("I", BIN_CONFIG_VERSION_SINGLE_INST) + # time + info += struct.pack("q", int(time.time())) + # node count + info += struct.pack("I", len(dbNodes)) + # local node + info += struct.pack("I", localNodeId) + + crc = binascii.crc32(info) + if upgrade: + if float(number) <= 92.200: + info = struct.pack("q", crc) + info + else: + info = struct.pack("I", crc) + info + else: + info = struct.pack("I", crc) + info + fp.write(info) + + for dbNode in dbNodes: + offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE + fp.seek(offset) + + info = self.__packNodeInfo(dbNode, number, upgrade=upgrade) + fp.write(info) + endBytes = PAGE_SIZE - fp.tell() % PAGE_SIZE + if (endBytes != PAGE_SIZE): + info = struct.pack("%dx" % endBytes) + fp.write(info) + fp.flush() + fp.close() + os.chmod(filePath, DIRECTORY_PERMISSION) + except Exception as e: + if fp: + fp.close() + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \ + "static configuration file" + + " Error: \n%s" % str(e)) + + def __packNodeInfo(self, dbNode, number, upgrade=False): + """ + function : Pack the info of node + input : [] + output : String + """ + # node id + info = struct.pack("I", dbNode.id) + # node name + info += struct.pack("64s", dbNode.name.encode("utf-8")) + # az info + info += struct.pack("64s", dbNode.azName.encode("utf-8")) + info += struct.pack("I", dbNode.azPriority) + # backIp + info += self.__packIps(dbNode.backIps) + # sshIp + info += self.__packIps(dbNode.sshIps) + # cm_server + info += self.__packCmsInfo(dbNode) + # cm_agent + info += self.__packAgentInfo(dbNode) + # gtm + info += self.__packGtmInfo(dbNode) + # cancel save gtmProxy info,need a placeholder + info += self.__packGtmProxyInfo(dbNode) + # cn + info += self.__packCooInfo(dbNode) + # dn + info += self.__packDataNode(dbNode) + # etcd + info += self.__packEtcdInfo(dbNode) + # cancel save sctp begin/end port,need a placeholder + info += struct.pack("I", 0) + info += struct.pack("I", 0) + crc = binascii.crc32(info) + + if upgrade: + if float(number) <= 92.200: + return struct.pack("q", crc) + info + else: + return struct.pack("I", crc) + info + else: + return struct.pack("I", crc) + info + + def __packNodeInfoForLC(self, dbNode): + """ + function : Pack the info of node for the logic cluster + input : [] + output : String + """ + # node id + info = struct.pack("I", dbNode.id) + # node name + info += struct.pack("64s", dbNode.name.encode("utf-8")) + # backIp + info += self.__packIps(dbNode.backIps) + # sshIp + info += self.__packIps(dbNode.sshIps) + # dn + info += self.__packDataNode(dbNode) + # cancel save sctp begin/end port,need a placeholder + info += struct.pack("I", 0) + info += struct.pack("I", 0) + crc = binascii.crc32(info) + + return struct.pack("I", crc) + info + + def __packEtcdInfo(self, dbNode): + """ + function : Pack the info of etcd + input : [] + output : String + """ + n = len(dbNode.etcds) + + info = "".encode() + if (n == 0): + # etcd count + info += struct.pack("I", 0) + # etcd id + info += struct.pack("I", 0) + # etcd mirror id + info += struct.pack("i", 0) + # etcd name + info += struct.pack("64x") + # datadir + info += struct.pack("1024x") + # listen ip + info += self.__packIps([]) + # listn port + info += struct.pack("I", 0) + # ha ip + info += self.__packIps([]) + # ha port + info += struct.pack("I", 0) + elif (n == 1): + etcdInst = dbNode.etcds[0] + # etcd count + info += struct.pack("I", 1) + # etcd id + info += struct.pack("I", etcdInst.instanceId) + # etcd mirror id + info += struct.pack("i", etcdInst.mirrorId) + # etcd name + info += struct.pack("64s", "etcd_%d".encode( + "utf-8") % etcdInst.instanceId) + # datadir + info += struct.pack("1024s", etcdInst.datadir.encode("utf-8")) + # listen ip + info += self.__packIps(etcdInst.listenIps) + # listn port + info += struct.pack("I", etcdInst.port) + # ha ip + info += self.__packIps(etcdInst.haIps) + # ha port + info += struct.pack("I", etcdInst.haPort) + else: + pass + + return info + + def __packCmsInfo(self, dbNode): + """ + function : Pack the info of cm server + input : [] + output : String + """ + n = len(dbNode.cmservers) + + info = "".encode() + if (n == 0): + # cm server id + info += struct.pack("I", 0) + # cm_server mirror id + info += struct.pack("I", 0) + # datadir + info += struct.pack("1024s", dbNode.cmDataDir.encode("utf-8")) + # cm server level + info += struct.pack("I", 0) + # float ip + info += struct.pack("128x") + # listen ip + info += self.__packIps([]) + # listen port + info += struct.pack("I", 0) + # local ha ip + info += self.__packIps([]) + # local ha port + info += struct.pack("I", 0) + # is primary + info += struct.pack("I", 0) + # peer ha ip + info += self.__packIps([]) + # peer ha port + info += struct.pack("I", 0) + elif (n == 1): + cmsInst = dbNode.cmservers[0] + # cm server id + info += struct.pack("I", cmsInst.instanceId) + # cm_server mirror id + info += struct.pack("I", cmsInst.mirrorId) + # datadir + info += struct.pack("1024s", dbNode.cmDataDir.encode("utf-8")) + # cm server level + info += struct.pack("I", cmsInst.level) + info += struct.pack("128s", self.cmsFloatIp.encode("utf-8")) + # listen ip + info += self.__packIps(cmsInst.listenIps) + # listen port + info += struct.pack("I", cmsInst.port) + # local ha ip + info += self.__packIps(cmsInst.haIps) + # local ha port + info += struct.pack("I", cmsInst.haPort) + # instance type + info += struct.pack("I", cmsInst.instanceType) + instances = self.getPeerInstance(cmsInst) + peerInst = instances[0] + # peer ha ip + info += self.__packIps(peerInst.haIps) + # peer ha port + info += struct.pack("I", peerInst.haPort) + else: + pass + + return info + + def __packAgentInfo(self, dbNode): + """ + function : Pack the info of agent + input : [] + output : String + """ + n = len(dbNode.cmagents) + + info = "".encode() + if (n == 1): + cmaInst = dbNode.cmagents[0] + # Agent id + info += struct.pack("I", cmaInst.instanceId) + # Agent mirror id + info += struct.pack("i", cmaInst.mirrorId) + # agent ips + info += self.__packIps(cmaInst.listenIps) + + return info + + def __packGtmInfo(self, dbNode): + """ + function : Pack the info of gtm + input : [] + output : String + """ + n = len(dbNode.gtms) + + info = "".encode() + if (n == 0): + # gtm id + info += struct.pack("I", 0) + # gtm mirror id + info += struct.pack("I", 0) + # gtm count + info += struct.pack("I", 0) + # datadir + info += struct.pack("1024x") + # listen ip + info += self.__packIps([]) + # listn port + info += struct.pack("I", 0) + # instance type + info += struct.pack("I", 0) + # loacl ha ip + info += self.__packIps([]) + # local ha port + info += struct.pack("I", 0) + # peer gtm datadir + info += struct.pack("1024x") + # peer ha ip + info += self.__packIps([]) + # peer ha port + info += struct.pack("I", 0) + elif (n == 1): + gtmInst = dbNode.gtms[0] + # gtm id + info += struct.pack("I", gtmInst.instanceId) + # gtm mirror id + info += struct.pack("I", gtmInst.mirrorId) + # gtm count + info += struct.pack("I", 1) + # datadir + info += struct.pack("1024s", gtmInst.datadir.encode("utf-8")) + # listen ip + info += self.__packIps(gtmInst.listenIps) + # listn port + info += struct.pack("I", gtmInst.port) + # instance type + info += struct.pack("I", gtmInst.instanceType) + # loacl ha ip + info += self.__packIps(gtmInst.haIps) + # local ha port + info += struct.pack("I", gtmInst.haPort) + # peer gtm datadir + info += struct.pack("1024x") + # peer ha ip + info += self.__packIps([]) + # peer ha port + info += struct.pack("I", 0) + + else: + pass + + return info + + def __packGtmProxyInfo(self, dbNode): + """ + function : Pack the info of gtm proxy + input : [] + output : String + """ + info = "".encode() + info += struct.pack("I", 0) + info += struct.pack("I", 0) + info += struct.pack("I", 0) + info += self.__packIps([]) + info += struct.pack("I", 0) + return info + + def __packCooInfo(self, dbNode): + """ + function : Pack the info of coordinator + input : [] + output : String + """ + n = len(dbNode.coordinators) + + info = "".encode() + if (n == 0): + # coordinator id + info += struct.pack("I", 0) + # coordinator mirror id + info += struct.pack("i", 0) + # coordinator count + info += struct.pack("I", 0) + # datadir + info += struct.pack("1024x") + # ssdDir + info += struct.pack("1024x") + # listen ip + info += self.__packIps([]) + # listn port + info += struct.pack("I", 0) + # ha port + info += struct.pack("I", 0) + elif (n == 1): + cooInst = dbNode.coordinators[0] + # coordinator id + info += struct.pack("I", cooInst.instanceId) + # coordinator mirror id + info += struct.pack("i", cooInst.mirrorId) + # coordinator count + info += struct.pack("I", 1) + # datadir + info += struct.pack("1024s", cooInst.datadir.encode("utf-8")) + # ssdDir + info += struct.pack("1024s", cooInst.ssdDir.encode("utf-8")) + # listen ip + info += self.__packIps(cooInst.listenIps) + # listn port + info += struct.pack("I", cooInst.port) + # ha port + info += struct.pack("I", cooInst.haPort) + else: + pass + + return info + + def __packDataNode(self, dbNode): + """ + function : Pack the info of datanode + input : [] + output : String + """ + + info = struct.pack("I", len(dbNode.datanodes)) + for dnInst in dbNode.datanodes: + instances = self.__getDNPeerInstance(dnInst) + # datanode id + info += struct.pack("I", dnInst.instanceId) + # datanode id + info += struct.pack("I", dnInst.mirrorId) + # datadir + info += struct.pack("1024s", dnInst.datadir.encode("utf-8")) + # xlogdir + info += struct.pack("1024s", dnInst.xlogdir.encode("utf-8")) + # ssdDir + info += struct.pack("1024s", dnInst.ssdDir.encode("utf-8")) + # listen ip + info += self.__packIps(dnInst.listenIps) + # port + info += struct.pack("I", dnInst.port) + # instance type + info += struct.pack("I", dnInst.instanceType) + # loacl ha ip + info += self.__packIps(dnInst.haIps) + # local ha port + info += struct.pack("I", dnInst.haPort) + + maxStandbyCount = MIRROR_COUNT_REPLICATION_MAX - 1 + + n = len(instances) + for i in range(n): + peerInst = instances[i] + # peer1 datadir + info += struct.pack("1024s", peerInst.datadir.encode("utf-8")) + # peer1 ha ip + info += self.__packIps(peerInst.haIps) + # peer1 ha port + info += struct.pack("I", peerInst.haPort) + # instance type + info += struct.pack("I", peerInst.instanceType) + for i in range(n, maxStandbyCount): + # peer1 datadir + info += struct.pack("1024x") + # peer1 ha ip + info += self.__packIps([]) + # peer1 ha port + info += struct.pack("I", 0) + # instance type + info += struct.pack("I", 0) + return info + + def __packIps(self, ips): + """ + function : Pack the info of ips + input : [] + output : String + """ + n = len(ips) + + info = struct.pack("I", n) + for i in range(n): + info += struct.pack("128s", ips[i].encode("utf-8")) + for i in range(n, MAX_IP_NUM): + info += struct.pack("128x") + + return info + + def saveClusterLevelData(self, rootNode, user): + """ + function : save cluster level data info. + input : documentElement, string + output : NA + """ + # Add XML comments + # Create a cluster-level information to add to the root node + clusterInfo = g_dom.createElement("CLUSTER") + rootNode.appendChild(clusterInfo) + clusterMap = {} + # get clusterInfo + clusterMap["clusterName"] = self.__getEnvironmentParameterValue( + "GS_CLUSTER_NAME", user) + clusterMap["nodeNames"] = ",".join(self.getClusterNodeNames()) + clusterMap["gaussdbAppPath"] = self.appPath + clusterMap["gaussdbLogPath"] = self.logPath + clusterMap["gaussdbToolPath"] = self.__getEnvironmentParameterValue( + "GPHOME", user) + clusterMap["tmpMppdbPath"] = self.__getEnvironmentParameterValue( + "PGHOST", user) + if len(self.newNodes) > 0: + clusterMap["sqlExpandNames"] = ",".join( + [dbNode.name for dbNode in self.newNodes]) + # save clusterInfo + for (key, value) in clusterMap.items(): + clusterInfo.appendChild(self.saveOneClusterConfigItem(key, value)) + + def saveNodeLevelData(self, rootNode): + """ + function : save node level data info. + input : documentElement + output : NA + """ + # add node-level information + # Node deployment information on each server + devlistInfo = g_dom.createElement("DEVICELIST") + rootNode.appendChild(devlistInfo) + (cmInfoMap, gtmInfoMap) = self.getCmAndGtmInfo() + i = 100000 + for dbNode in self.dbNodes: + i += 1 + # Node deployment information on the dbNode + perDevInfo = g_dom.createElement("DEVICE") + perDevInfo.setAttribute("sn", "%d" % i) + devlistInfo.appendChild(perDevInfo) + # save name, backIp, sshIp on the dbNode + perDevInfo.appendChild( + self.saveOneClusterConfigItem("name", dbNode.name)) + self.saveIPsItem(perDevInfo, "backIp", dbNode.backIps) + self.saveIPsItem(perDevInfo, "sshIp", dbNode.sshIps) + + # save CM info + self.saveCmsInfo(perDevInfo, dbNode, cmInfoMap) + # save GTM info + + self.savegGtmsInfo(perDevInfo, dbNode, gtmInfoMap) + # save CN info + + self.saveCnInfo(perDevInfo, dbNode) + # save ETCD info + + self.saveEtcdInfo(perDevInfo, dbNode) + # save DB info + self.saveDnInfo(perDevInfo, dbNode) + + def saveCmsInfo(self, devInfo, dbNode, cmInfoMap): + """ + function : get GTM instance info. + input : NA + output : NA + """ + # CM deployment information + cms_num = len(dbNode.cmservers) + # Save the CM main information on the CM master node + if cms_num > 0 and dbNode.cmservers[0].instanceType == MASTER_INSTANCE: + for key in list(cmInfoMap.keys()): + # if key is ip info, Has been saved in IP way + if key in ("cmServerListenIp", "cmServerHaIp"): + self.saveIPsItem(devInfo, key, cmInfoMap[key]) + else: + devInfo.appendChild( + self.saveOneClusterConfigItem(key, cmInfoMap[key])) + else: + # Save the cmsNum,cmDir,cmServerPortBase,cmServerPortStandby of + # CM information on the other node + devInfo.appendChild(self.saveOneClusterConfigItem("cmsNum", "0")) + for key in ("cmDir", "cmServerPortBase", "cmServerPortStandby"): + devInfo.appendChild( + self.saveOneClusterConfigItem(key, cmInfoMap[key])) + + def savegGtmsInfo(self, devInfo, dbNode, gtmInfoMap): + """ + function : get GTM instance info. + input : NA + output : NA + """ + # GTM deployment information + gtm_num = len(dbNode.gtms) + # Save the gtm main information on the gtm master node + if gtm_num > 0 and dbNode.gtms[0].instanceType == MASTER_INSTANCE: + for key in list(gtmInfoMap.keys()): + if key in ("gtmListenIp", "gtmHaIp"): + # if key is ip info, Has been saved in IP way + self.saveIPsItem(devInfo, key, gtmInfoMap[key]) + else: + devInfo.appendChild( + self.saveOneClusterConfigItem(key, gtmInfoMap[key])) + else: + # Save the gtmNum,gtmPortBase,gtmPortStandby of gtm information + # on the other node + devInfo.appendChild(self.saveOneClusterConfigItem("gtmNum", "0")) + for key in ("gtmPortBase", "gtmPortStandby"): + devInfo.appendChild( + self.saveOneClusterConfigItem(key, gtmInfoMap[key])) + + def saveCnInfo(self, devInfo, dbNode): + """ + function : get CN instance info. + input : NA + output : NA + """ + if len(dbNode.coordinators) == 0: + return + # CN deployment information + # get CN instance + cnInst = dbNode.coordinators[0] + cnInfoMap = {} + # get CN instance element + cnInfoMap["cooNum"] = '1' + cnInfoMap["cooPortBase"] = str(cnInst.port) + cnInfoMap["cooDir1"] = cnInst.datadir + # save CN instance element + for key in ["cooNum", "cooPortBase", "cooDir1"]: + devInfo.appendChild( + self.saveOneClusterConfigItem(key, cnInfoMap[key])) + # If listenIp is the same as backIp, no listenIp is generated + if dbNode.backIps != cnInst.listenIps: + self.saveIPsItem(devInfo, "cooListenIp", cnInst.listenIps) + + def saveEtcdInfo(self, devInfo, dbNode): + """ + function : get ETCD instance info. + input : NA + output : NA + """ + if len(dbNode.etcds) == 0: + return + # ETCD deployment information + # get etcd instance + etcdInst = dbNode.etcds[0] + etcdInfoMap = {} + # get etcd instance element + etcdInfoMap["etcdNum"] = '1' + etcdInfoMap["etcdListenPort"] = str(etcdInst.port) + etcdInfoMap["etcdHaPort"] = str(etcdInst.haPort) + etcdInfoMap["etcdDir1"] = etcdInst.datadir + # save etcd instance element + for key in ["etcdNum", "etcdListenPort", "etcdHaPort", "etcdDir1"]: + devInfo.appendChild(self.saveOneClusterConfigItem(key, + etcdInfoMap[ + key])) + # If listenIp is the same as backIp, no listenIp is generated + if dbNode.backIps != etcdInst.listenIps: + self.saveIPsItem(devInfo, "etcdListenIp", etcdInst.listenIps) + # If haIp is the same as backIp, no haIp is generated + if dbNode.backIps != etcdInst.haIps: + self.saveIPsItem(devInfo, "etcdHaIp", etcdInst.haIps) + + def saveDnInfo(self, devInfo, dbNode): + """ + function : get DN instance info. + input : NA + output : NA + """ + if len(dbNode.datanodes) == 0: + return + # get DN deployment information + dnInfoMap = {} + dnInfoMap["dataNum"] = str(dbNode.dataNum) + i = 0 + totalListenIps = {} + totalHaIps = {} + flag_j1 = 0 + flag_j2 = 0 + isDnPortBase = True + isDnPortStandby = True + isDnPortDummyStandby = True + for dnInst in dbNode.datanodes: + # get the first standby DN instance port on the current node + if (dnInst.instanceType == STANDBY_INSTANCE and isDnPortStandby): + dnInfoMap["dataPortStandby"] = str(dnInst.port) + isDnPortStandby = False + # get the first dummy standby DN instance port on the current node + if (dnInst.instanceType == DUMMY_STANDBY_INSTANCE and + isDnPortDummyStandby): + dnInfoMap["dataPortDummyStandby"] = str(dnInst.port) + isDnPortDummyStandby = False + + if (dnInst.instanceType == MASTER_INSTANCE): + # get the first base DN instance port on the current node + if (isDnPortBase): + dnInfoMap["dataPortBase"] = str(dnInst.port) + isDnPortBase = False + i += 1 + # get the peer instances of the master DN + instances = self.getPeerInstance(dnInst) + for inst in instances: + if (inst.instanceType == STANDBY_INSTANCE): + standby_inst = inst + elif (inst.instanceType == DUMMY_STANDBY_INSTANCE): + dummy_inst = inst + dnInfoMap["dataNode%d" % i] = "%s,%s,%s,%s,%s" \ + % (dnInst.datadir, + standby_inst.hostname, + standby_inst.datadir, + dummy_inst.hostname, + dummy_inst.datadir) + standby_node = self.getDbNodeByName(standby_inst.hostname) + dummy_node = self.getDbNodeByName(dummy_inst.hostname) + # Get DN listen IP and ha IP + for j1 in range(len(dnInst.listenIps)): + # listen IP is not generated based on the default only + # need backUp + if dnInst.listenIps[j1] != dbNode.backIps[0] or \ + standby_inst.listenIps[j1] != \ + standby_node.backIps[0] or \ + dummy_inst.listenIps[j1] != dummy_node.backIps[0]: + # single DN configure multiple listene IP + if flag_j1 == 0: + totalListenIps[j1] = ("%s,%s,%s" % ( + dnInst.listenIps[j1], + standby_inst.listenIps[j1], + dummy_inst.listenIps[j1])) + flag_j1 += 1 + else: + totalListenIps[j1] += (",%s,%s,%s" % ( + dnInst.listenIps[j1], + standby_inst.listenIps[j1], + dummy_inst.listenIps[j1])) + for j2 in range(len(dnInst.haIps)): + if dnInst.haIps[j2] != dbNode.backIps[0] or \ + standby_inst.haIps[j2] != standby_node.backIps[0] \ + or dummy_inst.haIps[j2] != dummy_node.backIps[0]: + if flag_j2 == 0: + totalHaIps[j2] = ("%s,%s,%s" % ( + dnInst.haIps[j2], standby_inst.haIps[j2], + dummy_inst.haIps[j2])) + flag_j2 += 1 + else: + totalHaIps[j2] += ("%s,%s,%s" % ( + dnInst.haIps[j2], standby_inst.haIps[j2], + dummy_inst.haIps[j2])) + for key in ["dataNum", "dataPortBase", "dataPortStandby", + "dataPortDummyStandby"]: + devInfo.appendChild( + self.saveOneClusterConfigItem(key, dnInfoMap[key])) + self.saveIPsItem(devInfo, "dataListenIp", + list(totalListenIps.values())) + self.saveIPsItem(devInfo, "dataHaIp", list(totalHaIps.values())) + for key in list(dnInfoMap.keys()): + if key not in ["dataNum", "dataPortBase", "dataPortStandby", + "dataPortDummyStandby"]: + devInfo.appendChild( + self.saveOneClusterConfigItem(key, dnInfoMap[key])) + + def getCmAndGtmInfo(self): + """ + function : get gtm and cm instance info. + input : NA + output :(MapData, MapData) + """ + cmInfoMap = {} + gtmInfoMap = {} + for dbNode in self.dbNodes: + + if len(dbNode.cmservers) > 0: + cmsInst = dbNode.cmservers[0] + if cmsInst.instanceType == MASTER_INSTANCE: + instances = self.getPeerInstance(cmsInst) + cmPeerInst = instances[0] + cmInfoMap["cmsNum"] = '1' + cmInfoMap["cmDir"] = dbNode.cmDataDir + cmInfoMap["cmServerPortBase"] = str(cmsInst.port) + cmInfoMap["cmServerPortStandby"] = str(cmPeerInst.port) + cmInfoMap["cmServerRelation"] = "%s,%s" % ( + cmsInst.hostname, cmPeerInst.hostname) + cmInfoMap["cmServerlevel"] = str(cmsInst.level) + cmInfoMap["cmServerListenIp"] = self.getIpList( + cmsInst.listenIps, cmPeerInst.listenIps, + dbNode.backIps[0]) + cmInfoMap["cmServerHaIp"] = self.getIpList( + cmsInst.haIps, cmPeerInst.haIps, dbNode.backIps[0]) + if len(dbNode.gtms) > 0: + gtmInst = dbNode.gtms[0] + if gtmInst.instanceType == MASTER_INSTANCE: + gtmPeerInst = self.getPeerInstance(gtmInst)[0] + gtmInfoMap["gtmNum"] = '1' + gtmInfoMap["gtmDir1"] = "%s,%s,%s" % ( + gtmInst.datadir, gtmPeerInst.hostname, + gtmPeerInst.datadir) + gtmInfoMap["gtmPortBase"] = str(gtmInst.port) + gtmInfoMap["gtmPortStandby"] = str(gtmPeerInst.port) + gtmInfoMap["gtmRelation"] = "%s,%s" % ( + gtmInst.hostname, gtmPeerInst.hostname) + gtmInfoMap["gtmListenIp"] = self.getIpList( + gtmInst.listenIps, gtmPeerInst.listenIps, + dbNode.backIps[0]) + gtmInfoMap["gtmHaIp"] = self.getIpList(gtmInst.haIps, + gtmPeerInst.haIps, + dbNode.backIps[0]) + + return (cmInfoMap, gtmInfoMap) + + def getIpList(self, masterInstIps, standbyInstIps, nodeBackIp): + """ + function : get ip data from master, standby instance of gtm and cm. + input : ips + output : ipList + """ + ipList = [] + for i in range(len(masterInstIps)): + if masterInstIps[i] != nodeBackIp: + ipList.append("%s,%s" % (masterInstIps[i], standbyInstIps[i])) + return ipList + + def saveIPsItem(self, devInfo, ipType, ips): + """ + function : save IP type data to XML parameter + input : ips + output : NA + """ + for i in range(len(ips)): + devInfo.appendChild( + self.saveOneClusterConfigItem("%s%d" % (ipType, i + 1), + ips[i])) + + def saveOneClusterConfigItem(self, paramName, paramValue): + """ + function : save param info and return it + input : paraName, paraValue + output : Element object + """ + paramInfo = g_dom.createElement("PARAM") + paramInfo.setAttribute("name", paramName) + paramInfo.setAttribute("value", paramValue) + return paramInfo + + def listToCSV(self, obj): + """ + convert a list (like IPs) to comma-sep string for XML + """ + return ','.join(map(str, obj)) + + def __writeWithIndent(self, fp, line, indent): + """ + write the XML content with indentation + """ + fp.write('%s%s\n' % (' ' * indent * 2, line)) + + def generateXMLFromStaticConfigFile(self, user, static_config_file, + xmlFilePath, version=201, + newNodeNames=None): + """ + function : Generate cluster installation XML from static + configuration file + input : String,String,String + output : Cluster installation XML file + """ + fp = None + indent = 0 + if newNodeNames is None: + newNodeNames = [] + + # Write XML header + ## file permission added to make it with 600 + fp = os.fdopen(os.open(xmlFilePath, os.O_WRONLY | os.O_CREAT, + KEY_FILE_PERMISSION), "w") + self.__writeWithIndent(fp, '', + indent) + + # Get cluster info from ClusterStatic + if (static_config_file is not None): + # get cluster version + cluster_version = self.getClusterVersion(static_config_file) + self.initFromStaticConfig(user, static_config_file) + else: + cluster_version = version + # Cluster header + indent += 1 + self.__writeWithIndent(fp, '', indent) + self.__writeWithIndent(fp, '', indent) + indent += 1 + self.__writeWithIndent(fp, + '' % + self.name, + indent) + + nodeList = self.getClusterNodeNames() + nodeNames = '' + for item in nodeList: + nodeNames += str(item) + "," + nodeNames = nodeNames[:-1] + backIps = ",".join([node.backIps[0] for node in self.dbNodes]) + self.__writeWithIndent(fp, + '' % + nodeNames, + indent) + self.__writeWithIndent(fp, + '' + % self.appPath, + indent) + self.__writeWithIndent(fp, + '' + % self.logPath, + indent) + self.__writeWithIndent(fp, + '' % + self.tmpPath, + indent) + self.__writeWithIndent(fp, + '' + % self.toolPath, + indent) + self.__writeWithIndent(fp, + '' % + backIps, + indent) + if newNodeNames: + self.__writeWithIndent(fp, + '' % ','.join( + newNodeNames), indent) + if self.isMiniaturizedDeployment(cluster_version): + self.__writeWithIndent(fp, '', indent) + elif self.isSinglePrimaryMultiStandbyDeployment(cluster_version): + self.__writeWithIndent(fp, + '', + indent) + indent -= 1 + self.__writeWithIndent(fp, '', indent) + self.__writeWithIndent(fp, '', indent) + + # + ctr = 1000001 + # For each node + for local_dbn in self.dbNodes: + # Device beginning + self.__writeWithIndent(fp, '' % (str(ctr)), indent) + + indent += 1 + self.__writeWithIndent(fp, '' % ( + local_dbn.name), indent) + if self.isSinglePrimaryMultiStandbyDeployment(cluster_version): + self.__writeWithIndent(fp, + '' % ( + local_dbn.azName), indent) + self.__writeWithIndent(fp, + '' % ( + local_dbn.azPriority), indent) + self.__writeWithIndent(fp, + '' % ( + self.listToCSV(local_dbn.backIps)), + indent) + self.__writeWithIndent(fp, '' % ( + self.listToCSV(local_dbn.sshIps)), indent) + self.__writeWithIndent(fp, '' % ( + local_dbn.cmDataDir), indent) + if not self.isMiniaturizedDeployment( + cluster_version) and local_dbn.virtualIp: + self.__writeWithIndent(fp, + '' + % (self.listToCSV(local_dbn.virtualIp)), + indent) + + if not self.isMiniaturizedDeployment(cluster_version): + # ETCD beginning + if (local_dbn.etcdNum > 0): + # Common part + self.__writeWithIndent(fp, '', indent) + self.__writeWithIndent(fp, + '' % ( + local_dbn.etcdNum), indent) + self.__writeWithIndent(fp, + '' + % (local_dbn.etcds[0].port), indent) + self.__writeWithIndent(fp, + '' + % (local_dbn.etcds[0].haPort), + indent) + + # Repeated part + i = 1 + for etcdInst in local_dbn.etcds: + self.__writeWithIndent(fp, '' + % (i, etcdInst.datadir), + indent) + self.__writeWithIndent(fp, + '' % ( + i, self.listToCSV( + etcdInst.listenIps)), + indent) + self.__writeWithIndent(fp, + '' % + (i, + self.listToCSV( + etcdInst.haIps)), + indent) + i += 1 + # ETCD ending + + # CM beginning + if len(local_dbn.cmservers) > 0 and \ + local_dbn.cmservers[0].instanceType == MASTER_INSTANCE: + try: + cmsInst = local_dbn.cmservers[0] + self.__writeWithIndent(fp, + '', + indent) + self.__writeWithIndent(fp, + '' + % (local_dbn.cmsNum), indent) + self.__writeWithIndent(fp, + '' + % (cmsInst.port), indent) + self.__writeWithIndent(fp, + '' + % (cmsInst.level), indent) + self.__writeWithIndent(fp, + '' % (local_dbn.cmDataDir), + indent) + if not self.isMiniaturizedDeployment(cluster_version): + peerInst_listenIps = '' + peerInst_haIps = '' + peerInst_hostname = '' + peerInst_port = 0 + masterInst = None + for peerInst in self.getPeerInstance(cmsInst): + peerInst_listenIps = peerInst_listenIps + \ + peerInst.listenIps[0] + ',' + peerInst_haIps = peerInst_haIps \ + + peerInst.haIps[0] + ',' + peerInst_port = peerInst.port + peerInst_hostname = peerInst_hostname + \ + peerInst.hostname + ',' + if peerInst.instanceType == MASTER_INSTANCE: + masterInst = peerInst + + if cmsInst.instanceType == STANDBY_INSTANCE: + peerInst_listenIps = '' + peerInst_haIps = '' + for secPeerInst in self.getPeerInstance( + masterInst): + peerInst_listenIps = peerInst_listenIps + \ + secPeerInst.listenIps[0] \ + + ',' + peerInst_haIps = peerInst_haIps + \ + secPeerInst.haIps[0] + ',' + else: + masterInst = cmsInst + + self.__writeWithIndent( + fp, '' + % (masterInst.listenIps[0], + peerInst_listenIps[:-1]), indent) + self.__writeWithIndent( + fp, '' + % (cmsInst.hostname, + peerInst_hostname[:-1]), indent) + else: + self.__writeWithIndent( + fp, '' + % (cmsInst.listenIps[0]), indent) + except IndexError: + # No CM in this instance - make blank entry... + self.__writeWithIndent( + fp, '', indent) + self.__writeWithIndent(fp, + '', + indent) + self.__writeWithIndent(fp, + '' + % (MASTER_BASEPORT_CMS), indent) + self.__writeWithIndent(fp, + '', indent) + self.__writeWithIndent(fp, + '', indent) + self.__writeWithIndent(fp, + '' + % (local_dbn.cmDataDir), indent) + if not self.isMiniaturizedDeployment(cluster_version): + self.__writeWithIndent( + fp, '' % (STANDBY_BASEPORT_CMS), + indent) + self.__writeWithIndent( + fp, '' % (local_dbn.name, + local_dbn.name), indent) + # CM ending + + # gtm beginning + if len(local_dbn.gtms) > 0 and local_dbn.gtms[0].instanceType == \ + MASTER_INSTANCE: + try: + gtmInst = local_dbn.gtms[0] + self.__writeWithIndent(fp, '', indent) + self.__writeWithIndent(fp, '' + % (local_dbn.gtmNum), indent) + self.__writeWithIndent(fp, '' + % (gtmInst.port), indent) + # No GTM in this instance - make blank entry... + if not self.isMiniaturizedDeployment(cluster_version): + peerInst_listenIps = '' + peerInst_haIps = '' + peerInst_hostname = '' + peerInst_hostname_datadir = '' + for peerInst in self.getPeerInstance(gtmInst): + peerInst_listenIps = peerInst_listenIps + \ + peerInst.listenIps[0] + ',' + peerInst_haIps = peerInst_haIps \ + + peerInst.haIps[0] + ',' + peerInst_port = peerInst.port + peerInst_hostname = peerInst_hostname + \ + peerInst.hostname + ',' + peerInst_hostname_datadir = \ + peerInst_hostname_datadir + peerInst.hostname \ + + ',' + peerInst.datadir + ',' + if not self.isSinglePrimaryMultiStandbyDeployment( + cluster_version): + self.__writeWithIndent( + fp, '' % (peerInst_port), indent) + self.__writeWithIndent(fp, + '' + % (gtmInst.haPort), indent) + self.__writeWithIndent(fp, + '' + % (gtmInst.listenIps[0], + peerInst_listenIps[:-1]), + indent) + self.__writeWithIndent(fp, + '' + % (gtmInst.haIps[0], + peerInst_haIps[:-1]), indent) + self.__writeWithIndent(fp, + '' + % (gtmInst.datadir, + peerInst_hostname_datadir[:-1]), + indent) + self.__writeWithIndent(fp, + '' + % (gtmInst.hostname, + peerInst_hostname[:-1]), + indent) + else: + self.__writeWithIndent(fp, + '' + % (gtmInst.listenIps[0]), + indent) + self.__writeWithIndent(fp, + '' + % (gtmInst.datadir), indent) + except IndexError: + self.__writeWithIndent(fp, + '', indent) + self.__writeWithIndent(fp, + '', + indent) + self.__writeWithIndent(fp, + '' + % (MASTER_BASEPORT_GTM), indent) + self.__writeWithIndent(fp, + '', indent) + self.__writeWithIndent(fp, + '', + indent) + if not self.isMiniaturizedDeployment(cluster_version): + if not self.isSinglePrimaryMultiStandbyDeployment( + cluster_version): + self.__writeWithIndent( + fp, '' % (STANDBY_BASEPORT_GTM), indent) + self.__writeWithIndent(fp, + '', indent) + self.__writeWithIndent(fp, + '', indent) + self.__writeWithIndent(fp, + '' + % (local_dbn.name, + local_dbn.name), indent) + # gtm ending + + # cn beginning + if (local_dbn.cooNum > 0): + for cooInst in local_dbn.coordinators: + self.__writeWithIndent(fp, '', + indent) + self.__writeWithIndent(fp, + '' + % (local_dbn.cooNum), indent) + self.__writeWithIndent(fp, '' + % (cooInst.port), indent) + self.__writeWithIndent(fp, + '' + % (self.listToCSV( + cooInst.listenIps)), indent) + self.__writeWithIndent(fp, + '' % (cooInst.datadir), + indent) + if not self.isMiniaturizedDeployment(cluster_version): + self.__writeWithIndent(fp, '', indent) + # cn ending + + # dn beginning + if (local_dbn.dataNum > 0 and local_dbn.datanodes[ + 0].instanceType == MASTER_INSTANCE): + # Find master DN + dnList = [dn for dn in local_dbn.datanodes if + dn.instanceRole == INSTANCE_ROLE_DATANODE and + dn.instanceType == MASTER_INSTANCE] + if len(dnList) == 0: + # No master DN found in this node, so skip... + indent -= 1 + self.__writeWithIndent(fp, '', indent) + ctr += 1 + continue + # Find min MasterDN port value + dnPort = dnList[0].port + for dn in dnList: + if dnPort > dn.port: + dnPort = dn.port + + if not self.isMiniaturizedDeployment(cluster_version) and not \ + self.isSinglePrimaryMultiStandbyDeployment( + cluster_version): + # Find min StandbyDN port and IP value - need to optimize + snList = [sn for sn in local_dbn.datanodes if + sn.instanceRole == INSTANCE_ROLE_DATANODE and + sn.instanceType == STANDBY_INSTANCE] + snPort = snList[0].port + for sn in snList: + if snPort > sn.port: + snPort = sn.port + + # Find min MasterDN port value - need to optimize + dsnList = [dsn for dsn in local_dbn.datanodes if + dsn.instanceRole == INSTANCE_ROLE_DATANODE and + dsn.instanceType == DUMMY_STANDBY_INSTANCE] + dsnPort = dsnList[0].port + for dsn in dsnList: + if dsnPort > dsn.port: + dsnPort = dsn.port + + if self.isSinglePrimaryMultiStandbyDeployment(cluster_version): + # Find min StandbyDN port and IP value - need to optimize + snList = [sn for sn in local_dbn.datanodes if + sn.instanceRole == INSTANCE_ROLE_DATANODE and + sn.instanceType == STANDBY_INSTANCE] + if snList: + snPort = snList[0].port + for sn in snList: + if snPort > sn.port: + snPort = sn.port + # DN common part (1/3) + self.__writeWithIndent(fp, '', indent) + self.__writeWithIndent(fp, + '' + % (local_dbn.dataNum), indent) + self.__writeWithIndent(fp, + '' % (dnPort), indent) + if not self.isMiniaturizedDeployment(cluster_version) and \ + not self.isSinglePrimaryMultiStandbyDeployment( + cluster_version): + self.__writeWithIndent(fp, + '' % (snPort), indent) + self.__writeWithIndent(fp, + '' % (dsnPort), + indent) + + i = 1 + dnInst = None + for dnInst in dnList: + if not self.isMiniaturizedDeployment(cluster_version): + # Find SNs + instances = self.getPeerInstance(dnInst) + snList = [sn for sn in instances if + sn.instanceRole == INSTANCE_ROLE_DATANODE and + sn.instanceType == STANDBY_INSTANCE] + snListenIP = '' + snHaIP = '' + snHostNm = '' + snDir = '' + sn_HostNm_Dir = '' + sn_Xlog_Dir = '' + if len(snList) == 0: + # Will it ever come here - can be removed??? + print("<> No SN found for DN(%s)" % ( + dnInst.name)) + else: + for sn in snList: + snListenIP = snListenIP + sn.listenIps[0] + ',' + snHostNm = snHostNm + sn.hostname + ',' + snDir = snDir + sn.datadir + ',' + sn_HostNm_Dir = sn_HostNm_Dir + sn.hostname \ + + ',' + sn.datadir + ',' + sn_Xlog_Dir = sn_Xlog_Dir + sn.xlogdir + ',' + snHaIP = snHaIP + sn.haIps[0] + ',' + + # Once only per Host, the ListenIP entry needs to + # be written. Part (2/3) + if i == 1: + if self.isMiniaturizedDeployment(cluster_version): + self.__writeWithIndent(fp, + '' % ( + dnInst.listenIps[0], + dnInst.listenIps[0]), + indent) + elif self.isSinglePrimaryMultiStandbyDeployment( + cluster_version): + self.__writeWithIndent( + fp, '' % (dnInst.listenIps[0], + snListenIP[:-1]), indent) + self.__writeWithIndent(fp, + '' + % (dnInst.listenIps[0], + snHaIP[:-1]), indent) + else: + self.__writeWithIndent(fp, + '' + % (dnInst.listenIps[0], + snListenIP[:-1]), indent) + # Find DSNs + if not self.isMiniaturizedDeployment(cluster_version) and \ + not self.isSinglePrimaryMultiStandbyDeployment( + cluster_version): + instances = self.getPeerInstance(dnInst) + dsnList = [dsn for dsn in instances if + dsn.instanceRole == INSTANCE_ROLE_DATANODE + and dsn.instanceType == + DUMMY_STANDBY_INSTANCE] + if len(dsnList) == 0: + # Will it ever come here - can be removed??? + print("<> No DSN found for DN(%s)" % ( + dnInst.name)) + dsnHostNm = '' + dsnDir = '' + else: + dsnHostNm = dsnList[0].hostname + dsnDir = dsnList[0].datadir + # DN repeated part (3/3) + if self.isMiniaturizedDeployment(cluster_version): + self.__writeWithIndent(fp, + '' + % (i, dnInst.datadir), indent) + elif self.isSinglePrimaryMultiStandbyDeployment( + cluster_version): + self.__writeWithIndent(fp, + '' + % (i, dnInst.datadir, + sn_HostNm_Dir[:-1]), indent) + if dnInst.xlogdir != '': + self.__writeWithIndent(fp, + '' + % (i, dnInst.xlogdir, + sn_Xlog_Dir[:-1]), + indent) + else: + self.__writeWithIndent(fp, + '' + % (i, dnInst.datadir, + snHostNm[:-1], snDir[:-1], + dsnHostNm, dsnDir), indent) + if dnInst.xlogdir != '': + self.__writeWithIndent(fp, + '' + % (i, dnInst.xlogdir, + sn_Xlog_Dir[:-1]), + indent) + self.__writeWithIndent(fp, + '' + % (i), indent) + i += 1 + if not self.isMiniaturizedDeployment(cluster_version): + self.__writeWithIndent(fp, + '' + % (dnInst.listenIps[0]), indent) + # dn ending + + # Device ending + indent -= 1 + self.__writeWithIndent(fp, '', indent) + ctr += 1 + self.__writeWithIndent(fp, '', indent) + self.__writeWithIndent(fp, '', indent) + fp.close() + + def __getInstsInNode(self, nodeName): + """ + function: get instance in specified node + input: node name + output: instances list + """ + for node in self.dbNodes: + if node.name == nodeName: + insts = node.etcds + node.cmservers + node.datanodes \ + + node.coordinators + node.gtses + return insts + return [] + + def __getAllInsts(self): + """ + function: get all instances + input: NA + output: all instances list + """ + insts = [] + for node in self.dbNodes: + insts += node.etcds + node.cmservers + node.datanodes \ + + node.coordinators + node.gtses + return insts + + def getInstances(self, nodeName=""): + """ + function: get instances in the cluster, if nodeName is specified, + return the instances in the ndoe + input: node name + output: all instances + """ + if nodeName: + insts = self.__getInstsInNode(nodeName) + else: + insts = self.__getAllInsts() + return insts + + def isSingleInstCluster(self): + return (self.clusterType == CLUSTER_TYPE_SINGLE_INST) + + def getEtcdAddress(self): + """ + function: get etcd address + input: NA + output: etcd address + """ + etcds = [] + etcdAddress = "" + for node in self.dbNodes: + etcds += node.etcds + for etcd in etcds: + etcdAddress += "https://%s:%s," % ( + etcd.listenIps[0], etcd.clientPort) + return etcdAddress.strip(",") + + def mergeClusterInfo(self, oldClusterInfo, newClusterInfo): + """ + function: get etcd address + input: NA + output: etcd address + """ + # should not modify newClusterInfo, so deepcopy + tmpClusterInfo = copy.deepcopy(newClusterInfo) + + # name/clusterName are different between old and new cluster. + # clusterType/appPath/logPath/toolPath/tmpPath are same between old + # and new cluster. + self.name = tmpClusterInfo.name + self.clusterName = tmpClusterInfo.clusterName + self.clusterType = tmpClusterInfo.clusterType + self.appPath = tmpClusterInfo.appPath + self.logPath = tmpClusterInfo.logPath + self.toolPath = tmpClusterInfo.toolPath + self.tmpPath = tmpClusterInfo.tmpPath + + # get max nodeId of old cluster. + maxNodeId = max( + [int(oldNode.id) for oldNode in oldClusterInfo.dbNodes]) + maxNodeId += 1 + + for dbNode in tmpClusterInfo.dbNodes: + # CMS/GTM/ETCD will be dropped in merged cluster. + dbNode.cmservers = [] + dbNode.gtms = [] + dbNode.etcds = [] + + # nodeId will append to old cluster. + dbNode.id = maxNodeId + maxNodeId += 1 + + self.dbNodes = oldClusterInfo.dbNodes + tmpClusterInfo.dbNodes + self.newNodes = tmpClusterInfo.dbNodes + + def isSingleNode(self): + return (self.__getDnInstanceNum() <= 1) + + def doRefreshConf(self, user, localHostName, sshtool): + self.__createDynamicConfig(user, localHostName, sshtool) + self.__create_simple_datanode_config(user, localHostName, sshtool) + self.__reset_replconninfo(user, sshtool) + + def __createDynamicConfig(self, user, localHostName, sshtool): + """ + function : Save cluster info into to dynamic config + input : String,int + output : NA + """ + # only one dn, no need to write primary or stanby node info + dynamicConfigFile = self.__getDynamicConfig(user) + if os.path.exists(dynamicConfigFile): + cmd = "rm -f %s" % dynamicConfigFile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_504["GAUSS_50407"] + + " Error: \n%s." % str(output) + + "The cmd is %s" % cmd) + fp = None + try: + g_file.createFileInSafeMode(dynamicConfigFile) + fp = open(dynamicConfigFile, "wb") + # len + info = struct.pack("I", 24) + # version + info += struct.pack("I", BIN_CONFIG_VERSION_SINGLE_INST) + # time + info += struct.pack("q", int(time.time())) + # node count + info += struct.pack("I", len(self.dbNodes)) + crc = binascii.crc32(info) + info = struct.pack("I", crc) + info + fp.write(info) + primaryDnNum = 0 + for dbNode in self.dbNodes: + offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE + fp.seek(offset) + (primaryNodeNum, info) = self.__packDynamicNodeInfo( + dbNode, localHostName, sshtool) + primaryDnNum += primaryNodeNum + fp.write(info) + if primaryDnNum != 1: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % + ("master dn", "equal to 1")) + endBytes = PAGE_SIZE - fp.tell() % PAGE_SIZE + if endBytes != PAGE_SIZE: + info = struct.pack("%dx" % endBytes) + fp.write(info) + fp.flush() + fp.close() + os.chmod(dynamicConfigFile, KEY_FILE_PERMISSION) + except Exception as e: + if fp: + fp.close() + cmd = "rm -f %s" % dynamicConfigFile + subprocess.getstatusoutput(cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \ + "dynamic configuration file" + + " Error: \n%s" % str(e)) + try: + self.__sendDynamicCfgToAllNodes(localHostName, + dynamicConfigFile, + dynamicConfigFile) + except Exception as e: + cmd = "rm -f %s" % dynamicConfigFile + sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames()) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \ + "dynamic configuration file" + + " Error: \n%s" % str(e)) + + def __create_simple_datanode_config(self, user, localhostname, sshtool): + simpleDNConfig = self.__getDynamicSimpleDNConfig(user) + if os.path.exists(simpleDNConfig): + cmd = "rm -f %s" % simpleDNConfig + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_504["GAUSS_50407"] + + " Error: \n%s." % str(output) + + "The cmd is %s" % cmd) + tempstatus = self.__getStatusByOM(user).split("|") + statusdic = {'Primary': 0, 'Standby': 1, 'Cascade': 3, 'Unknown': 9} + try: + with open(simpleDNConfig, "w") as fp: + for dninfo in tempstatus: + dnstatus = dninfo.split()[6] + dnname = dninfo.split()[1] + if dnstatus not in statusdic: + fp.write("%s=%d\n" % + (dnname, statusdic['Unknown'])) + else: + fp.write("%s=%d\n" % + (dnname, statusdic[dnstatus])) + except Exception as e: + cmd = "rm -f %s" % simpleDNConfig + subprocess.getstatusoutput(cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + "dynamic configuration file" + + " Error: \n%s" % str(e)) + try: + self.__sendDynamicCfgToAllNodes(localhostname, + simpleDNConfig, + simpleDNConfig) + except Exception as e: + cmd = "rm -f %s" % simpleDNConfig + sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames()) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + "dynamic configuration file" + + " Error: \n%s" % str(e)) + + def __reset_replconninfo(self, user, sshtool): + # add for cascade + local_script = os.path.dirname(os.path.realpath(__file__)) \ + + '/../../local/Resetreplconninfo.py' + cmd = "python3 %s -U %s -t reset" % (local_script, user) + (status, output) = \ + sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames()) + for node in self.getClusterNodeNames(): + if status[node] != 'Success': + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Error:\n%s" % output) + + def __packDynamicNodeInfo(self, dbNode, localHostName, sshtool): + # node id + info = struct.pack("I", dbNode.id) + # node name + info += struct.pack("64s", dbNode.name.encode("utf-8")) + info += struct.pack("I", len(dbNode.datanodes)) + primaryNum = 0 + for dnInst in dbNode.datanodes: + self.__getDnState(dnInst, dbNode, localHostName, sshtool) + instanceType = 0 + if dnInst.localRole == "Primary": + instanceType = MASTER_INSTANCE + primaryNum += 1 + elif dnInst.localRole == "Cascade Standby": + instanceType = CASCADE_STANDBY + else: + instanceType = STANDBY_INSTANCE + info += struct.pack("I", dnInst.instanceId) + # datanode id + info += struct.pack("I", dnInst.mirrorId) + # instanceType such as master, standby, dumpstandby + info += struct.pack("I", instanceType) + # datadir + info += struct.pack("1024s", dnInst.datadir.encode("utf-8")) + info += struct.pack("I", 0) + info += struct.pack("I", 0) + crc = binascii.crc32(info) + return (primaryNum, struct.pack("I", crc) + info) + + def __getClusterSwitchTime(self, dynamicConfigFile): + """ + function : get cluster version information + from static configuration file + input : String + output : version + """ + fp = None + try: + fp = open(dynamicConfigFile, "rb") + info = fp.read(24) + (crc, lenth, version, switchTime, nodeNum) = \ + struct.unpack("=IIIqi", info) + fp.close() + except Exception as e: + if fp: + fp.close() + raise Exception(ErrorCode.GAUSS_512["GAUSS_51236"] + + " Error: \n%s." % str(e)) + return switchTime + + def __getDynamicConfig(self, user): + gaussHome = self.__getEnvironmentParameterValue("GAUSSHOME", user) + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ + ("installation path of designated user [%s]" + % user)) + # if under upgrade, and use chose strategy, we may get a wrong path, + # so we will use the realpath of gausshome + gaussHome = os.path.realpath(gaussHome) + dynamicConfigFile = "%s/bin/cluster_dynamic_config" % gaussHome + return dynamicConfigFile + def __getDynamicSimpleDNConfig(self, user): + gaussHome = self.__getEnvironmentParameterValue("GAUSSHOME", user) + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ + ("installation path of designated user [%s]" + % user)) + # if under upgrade, and use chose strategy, we may get a wrong path, + # so we will use the realpath of gausshome + gaussHome = os.path.realpath(gaussHome) + dynamicSimpleDNConfigFile = "%s/bin/cluster_dnrole_config" % gaussHome + return dynamicSimpleDNConfigFile + + def dynamicConfigExists(self, user): + dynamicConfigFile = self.__getDynamicConfig(user) + return os.path.exists(dynamicConfigFile) + + def checkClusterDynamicConfig(self, user, localHostName): + """ + function : make all the node dynamic config file is newest. + input : String + output : none + """ + if self.__getDnInstanceNum() <= 1: + return + gaussHome = self.__getEnvironmentParameterValue("GAUSSHOME", user) + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ + ("installation path of designated user [%s]" + % user)) + # if under upgrade, and use chose strategy, we may get a wrong path, + # so we will use the realpath of gausshome + gaussHome = os.path.realpath(gaussHome) + dynamicConfigFile = "%s/bin/cluster_dynamic_config" % gaussHome + lastSwitchTime = 0 + lastDynamicConfigFile = "" + fileConsistent = False + fileExist = False + if os.path.exists(dynamicConfigFile): + lastSwitchTime = self.__getClusterSwitchTime(dynamicConfigFile) + lastDynamicConfigFile = dynamicConfigFile + fileExist = True + fileConsistent = True + for dbNode in self.dbNodes: + remoteDynamicConfigFile = "%s/bin/cluster_dynamic_config_%s" \ + % (gaussHome, dbNode.name) + if dbNode.name != localHostName: + cmd = "scp %s:%s %s" % ( + dbNode.name, dynamicConfigFile, remoteDynamicConfigFile) + status, output = subprocess.getstatusoutput(cmd) + if status: + if output.find("No such file or directory") >= 0: + fileConsistent = False + continue + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n" + output) + if os.path.exists(remoteDynamicConfigFile): + fileExist = True + switchTime = self.__getClusterSwitchTime( + remoteDynamicConfigFile) + if switchTime > lastSwitchTime: + lastSwitchTime = switchTime + lastDynamicConfigFile = remoteDynamicConfigFile + fileConsistent = False + elif switchTime < lastSwitchTime: + fileConsistent = False + # if dynamic config file exist, but file time is not same, + # send the valid file to all nodes + if fileExist: + if not fileConsistent: + self.__sendDynamicCfgToAllNodes(localHostName, + lastDynamicConfigFile, + dynamicConfigFile) + cleanCmd = "rm -f %s/bin/cluster_dynamic_config_*" % gaussHome + subprocess.getstatusoutput(cleanCmd) + + def __sendDynamicCfgToAllNodes(self, + localHostName, + sourceFile, + targetFile): + status = 0 + output = "" + for dbNode in self.dbNodes: + if dbNode.name == localHostName: + if sourceFile != targetFile: + cmd = "cp -f %s %s" % (sourceFile, targetFile) + status, output = subprocess.getstatusoutput(cmd) + else: + cmd = "scp %s %s:%s" % (sourceFile, dbNode.name, targetFile) + status, output = subprocess.getstatusoutput(cmd) + if status: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n" + output) + + def readDynamicConfig(self, user): + """ + function : read cluster information from dynamic configuration file + only used for start cluster after switchover + input : String + output : NA + """ + fp = None + try: + self.name = self.__getEnvironmentParameterValue("GS_CLUSTER_NAME", + user) + self.appPath = self.__getEnvironmentParameterValue("GAUSSHOME", + user) + logPathWithUser = self.__getEnvironmentParameterValue("GAUSSLOG", + user) + splitMark = "/%s" % user + # set log path without user + # find the path from right to left + self.logPath = \ + logPathWithUser[0:(logPathWithUser.rfind(splitMark))] + dynamicConfigFile = self.__getDynamicConfig(user) + # read dynamic_config_file + dynamicConfigFilePath = os.path.split(dynamicConfigFile)[0] + versionFile = os.path.join( + dynamicConfigFilePath, "upgrade_version") + version, number, commitid = VersionInfo.get_version_info( + versionFile) + fp = open(dynamicConfigFile, "rb") + if float(number) <= 92.200: + info = fp.read(28) + (crc, lenth, version, currenttime, nodeNum) = \ + struct.unpack("=qIIqi", info) + else: + info = fp.read(24) + (crc, lenth, version, currenttime, nodeNum) = \ + struct.unpack("=IIIqi", info) + totalMaterDnNum = 0 + for i in range(nodeNum): + offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE + fp.seek(offset) + (dbNode, materDnNum) = self.__unpackDynamicNodeInfo(fp, number) + totalMaterDnNum += materDnNum + self.dbNodes.append(dbNode) + if totalMaterDnNum != 1: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % + ("master dn", "1")) + fp.close() + except Exception as e: + if fp: + fp.close() + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + dynamicConfigFile + " Error:\n" + str(e)) + + def __unpackDynamicNodeInfo(self, fp, number): + if float(number) <= 92.200: + info = fp.read(76) + (crc, nodeId, nodeName) = struct.unpack("=qI64s", info) + else: + info = fp.read(72) + (crc, nodeId, nodeName) = struct.unpack("=II64s", info) + nodeName = nodeName.decode().strip('\x00') + dbNode = dbNodeInfo(nodeId, nodeName) + info = fp.read(4) + (dataNodeNums,) = struct.unpack("=I", info) + dbNode.datanodes = [] + materDnNum = 0 + for i in range(dataNodeNums): + dnInst = instanceInfo() + dnInst.hostname = nodeName + info = fp.read(12) + (dnInst.instanceId, dnInst.mirrorId, dnInst.instanceType) = \ + struct.unpack("=III", info) + if dnInst.instanceType == MASTER_INSTANCE: + materDnNum += 1 + elif dnInst.instanceType not in [STANDBY_INSTANCE, + DUMMY_STANDBY_INSTANCE, CASCADE_STANDBY]: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % + ("DN", dnInst.instanceType)) + info = fp.read(1024) + (datadir,) = struct.unpack("=1024s", info) + dnInst.datadir = datadir.decode().strip('\x00') + dbNode.datanodes.append(dnInst) + return (dbNode, materDnNum) diff --git a/script/gspylib/common/DbClusterStatus.py b/script/gspylib/common/DbClusterStatus.py new file mode 100644 index 0000000..ed122d2 --- /dev/null +++ b/script/gspylib/common/DbClusterStatus.py @@ -0,0 +1,1009 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : DbClusterStatus.py is a utility to get cluster status +# information. +############################################################################# +import os +import sys +import time + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.Common import DefaultValue, ClusterInstanceConfig +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.ErrorCode import ErrorCode + +########################### +# instance type. only for CN/DN +########################### +INSTANCE_TYPE_UNDEFINED = -1 +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 +# dummy standby +DUMMY_STANDBY_INSTANCE = 2 + +# Global parameter +g_clusterInfo = None +g_instanceInfo = None +g_clusterInfoInitialized = False +g_deletedCNId = [] + + +class StatusReport(): + """ + classdocs + """ + + def __init__(self): + """ + Constructor + """ + self.nodeCount = 0 + self.cooNormal = 0 + self.cooAbnormal = 0 + self.gtmPrimary = 0 + self.gtmStandby = 0 + self.gtmAbnormal = 0 + self.gtmDown = 0 + self.dnPrimary = 0 + self.dnStandby = 0 + self.dnDummy = 0 + self.dnBuild = 0 + self.dnAbnormal = 0 + self.dnDown = 0 + self.fencedUDFNormal = 0 + self.fencedUDFAbnormal = 0 + + +class DbInstanceStatus(): + """ + classdocs + """ + + def __init__(self, nodeId, instId=0): + """ + Constructor + """ + self.nodeId = nodeId + self.nodeIp = "" + self.instanceId = instId + self.datadir = "" + self.type = "" + self.status = "" + self.detail_status = "" + self.haStatus = "" + self.detail_ha = "" + self.connStatus = "" + self.detail_conn = "" + self.syncStatus = "" + self.reason = "" + + def __str__(self): + """ + """ + retStr = "nodeId=%s,instanceId=%s,datadir=%s,type=%s,status=%s," \ + "haStatus=%s,connStatus=%s,syncStatus=%s,reason=%s" % \ + (self.nodeId, self.instanceId, self.datadir, self.type, + self.status, self.haStatus, self.connStatus, + self.syncStatus, self.reason) + + return retStr + + def isInstanceHealthy(self): + """ + function : Check if instance is healthy + input : NA + output : boolean + """ + # check DB instance + if self.type == DbClusterStatus.INSTANCE_TYPE_DATANODE: + if self.status == DbClusterStatus.INSTANCE_STATUS_PRIMARY: + return True + elif self.status == DbClusterStatus.INSTANCE_STATUS_DUMMY: + return True + elif self.status == DbClusterStatus.INSTANCE_STATUS_STANDBY: + if self.haStatus != DbClusterStatus.HA_STATUS_NORMAL: + return False + else: + return False + + return True + + def isCNDeleted(self): + """ + function : Check if CN instance state is Deleted + input : NA + output : boolean + """ + # check CN instance + if (self.type == DbClusterStatus.INSTANCE_TYPE_COORDINATOR and + self.instanceId in g_deletedCNId): + return True + return False + + +class DbNodeStatus(): + """ + classdocs + """ + + def __init__(self, nodeId): + """ + Constructor + """ + self.id = nodeId + self.name = "" + self.version = "" + self.coordinators = [] + self.gtms = [] + self.datanodes = [] + + self.cmservers = [] + self.primaryDNs = [] + self.standbyDNs = [] + self.dummies = [] + self.fencedUDFs = [] + self.etcds = [] + + def __str__(self): + """ + """ + retStr = "NodeId=%s,HostName=%s" % (self.id, self.name) + + for cmsInst in self.cmservers: + retStr += "\n%s" % str(cmsInst) + for gtmInst in self.gtms: + retStr += "\n%s" % str(gtmInst) + for cooInst in self.coordinators: + retStr += "\n%s" % str(cooInst) + for dataInst in self.datanodes: + retStr += "\n%s" % str(dataInst) + for dataInst in self.etcds: + retStr += "\n%s" % str(dataInst) + for udfInst in self.fencedUDFs: + retStr += "\n%s" % str(udfInst) + + return retStr + + def isNodeHealthy(self): + """ + function : Check if node is healthy + input : NA + output : boolean + """ + # get CN, DB and gtm instance + instances = self.datanodes + # check if node is healthy + for inst in instances: + if (not inst.isInstanceHealthy()): + return False + + return True + + def getNodeStatusReport(self): + """ + function : Get the status report of node + input : NA + output : report + """ + # init class StatusReport + report = StatusReport() + for inst in self.coordinators: + if (inst.status == DbClusterStatus.INSTANCE_STATUS_NORMAL): + report.cooNormal += 1 + else: + report.cooAbnormal += 1 + + for inst in self.gtms: + if (inst.status == DbClusterStatus.INSTANCE_STATUS_PRIMARY): + report.gtmPrimary += 1 + elif (inst.status == DbClusterStatus.INSTANCE_STATUS_STANDBY): + if (inst.connStatus == DbClusterStatus.CONN_STATUS_NORMAL): + report.gtmStandby += 1 + else: + report.gtmAbnormal += 1 + elif (inst.status == DbClusterStatus.INSTANCE_STATUS_DOWN): + report.gtmDown += 1 + else: + report.gtmAbnormal += 1 + + for inst in self.datanodes: + if (inst.status == DbClusterStatus.INSTANCE_STATUS_PRIMARY): + report.dnPrimary += 1 + elif (inst.status == DbClusterStatus.INSTANCE_STATUS_STANDBY): + if (inst.haStatus == DbClusterStatus.HA_STATUS_NORMAL): + report.dnStandby += 1 + elif (inst.haStatus == DbClusterStatus.HA_STATUS_BUILD): + report.dnBuild += 1 + else: + report.dnAbnormal += 1 + elif (inst.status == DbClusterStatus.INSTANCE_STATUS_DOWN): + report.dnDown += 1 + elif (inst.status == DbClusterStatus.INSTANCE_STATUS_DUMMY): + report.dnDummy += 1 + else: + report.dnAbnormal += 1 + + # check fenced UDF instance + for inst in self.fencedUDFs: + if (inst.status == DbClusterStatus.INSTANCE_STATUS_NORMAL): + report.fencedUDFNormal += 1 + else: + report.fencedUDFAbnormal += 1 + + return report + + def outputNodeStatus(self, stdout, user, showDetail=False): + """ + function : output the status of node + input : stdout, user + output : NA + """ + global g_clusterInfo + global g_instanceInfo + global g_clusterInfoInitialized + if not g_clusterInfoInitialized: + DefaultValue.checkUser(user) + g_clusterInfo = dbClusterInfo() + g_clusterInfo.initFromStaticConfig(user) + g_clusterInfoInitialized = True + dbNode = g_clusterInfo.getDbNodeByName(self.name) + instName = "" + # print node information + print("%-20s: %s" % ("node", str(self.id)), file=stdout) + print("%-20s: %s" % ("node_name", self.name), file=stdout) + if (self.isNodeHealthy()): + print("%-20s: %s\n" % ("node_state", + DbClusterStatus.OM_NODE_STATUS_NORMAL), + file=stdout) + else: + print("%-20s: %s\n" % ("node_state", + DbClusterStatus.OM_NODE_STATUS_ABNORMAL), + file=stdout) + + if (not showDetail): + return + + # coordinator status + for inst in self.coordinators: + # get the instance info + g_instanceInfo = None + for instInfo in dbNode.coordinators: + if instInfo.instanceId == inst.instanceId: + g_instanceInfo = instInfo + break + if not g_instanceInfo: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "CN") + # construct the instance name + instName = "cn_%s" % g_instanceInfo.instanceId + # print CN instance information + print("Coordinator", file=stdout) + print("%-20s: %d" % (" node", inst.nodeId), file=stdout) + print("%-20s: %s" % (" instance_name", instName), file=stdout) + print("%-20s: %s" % (" listen_IP", g_instanceInfo.listenIps), + file=stdout) + print("%-20s: %d" % (" port", g_instanceInfo.port), file=stdout) + print("%-20s: %s" % (" data_path", inst.datadir), file=stdout) + print("%-20s: %s" % (" instance_state", inst.status), + file=stdout) + print("", file=stdout) + + for inst in self.gtms: + # get the instance info + g_instanceInfo = None + for instInfo in dbNode.gtms: + if instInfo.instanceId == inst.instanceId: + g_instanceInfo = instInfo + break + if not g_instanceInfo: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "GTM") + # construct the instance name + instName = "gtm_%s" % g_instanceInfo.instanceId + # print gtm instance information + print("GTM", file=stdout) + print("%-20s: %d" % (" node", inst.nodeId), file=stdout) + print("%-20s: %s" % (" instance_name", instName), file=stdout) + print("%-20s: %s" % (" listen_IP", g_instanceInfo.listenIps), + file=stdout) + print("%-20s: %d" % (" port", g_instanceInfo.port), file=stdout) + print("%-20s: %s" % (" data_path", inst.datadir), + file=stdout) + print("%-20s: %s" % (" instance_state", inst.status), + file=stdout) + print("%-20s: %s" % (" conn_state", inst.connStatus), + file=stdout) + print("%-20s: %s" % (" reason", inst.reason), file=stdout) + print("", file=stdout) + + i = 1 + for inst in self.datanodes: + # get the instance info + g_instanceInfo = None + for instInfo in dbNode.datanodes: + if instInfo.instanceId == inst.instanceId: + g_instanceInfo = instInfo + break + if not g_instanceInfo: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "DN") + # construct the instance name + peerInsts = g_clusterInfo.getPeerInstance(g_instanceInfo) + instName = \ + ClusterInstanceConfig. \ + setReplConninfoForSinglePrimaryMultiStandbyCluster( + g_instanceInfo, peerInsts, g_clusterInfo)[1] + + # print DB instance information + print("Datanode%d" % i, file=stdout) + print("%-20s: %d" % (" node", inst.nodeId), file=stdout) + print("%-20s: %s" % (" instance_name", instName), file=stdout) + print("%-20s: %s" % (" listen_IP", g_instanceInfo.listenIps), + file=stdout) + print("%-20s: %s" % (" HA_IP", g_instanceInfo.haIps), + file=stdout) + print("%-20s: %d" % (" port", g_instanceInfo.port), file=stdout) + print("%-20s: %s" % (" data_path", inst.datadir), file=stdout) + print("%-20s: %s" % (" instance_state", inst.status), + file=stdout) + print("%-20s: %s" % (" HA_state", inst.haStatus), file=stdout) + print("%-20s: %s" % (" reason", inst.reason), file=stdout) + print("", file=stdout) + + i += 1 + # print fenced UDF status + for inst in self.fencedUDFs: + print("Fenced UDF", file=stdout) + print("%-20s: %d" % (" node", inst.nodeId), file=stdout) + print("%-20s: %s" % (" listen_IP", dbNode.backIps[0]), + file=stdout) + print("%-20s: %s" % (" instance_state", inst.status), + file=stdout) + + def getDnPeerInstance(self, user): + """ + function : Get the Peer instance of DN + input : user + output : Idlist + """ + global g_clusterInfo + global g_instanceInfo + global g_clusterInfoInitialized + if not g_clusterInfoInitialized: + DefaultValue.checkUser(user) + g_clusterInfo = dbClusterInfo() + g_clusterInfo.initFromStaticConfig(user) + g_clusterInfoInitialized = True + dbNode = g_clusterInfo.getDbNodeByName(self.name) + Idlist = {} + for dnInst in self.datanodes: + # get the instance info + g_instanceInfo = None + for instInfo in dbNode.datanodes: + if instInfo.instanceId == dnInst.instanceId: + g_instanceInfo = instInfo + break + if not g_instanceInfo: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "DN") + + # construct the instance name + peerInsts = g_clusterInfo.getPeerInstance(g_instanceInfo) + if (len(peerInsts) != 2 and + len(peerInsts) != 1): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51603"] % + g_instanceInfo.datadir) + + dnMasterInst = None + dnStandbyInst = None + if (g_instanceInfo.instanceType == MASTER_INSTANCE): + dnMasterInst = g_instanceInfo + for instIndex in range(len(peerInsts)): + if (peerInsts[instIndex].instanceType == STANDBY_INSTANCE): + dnStandbyInst = peerInsts[instIndex] + Idlist[dnMasterInst.instanceId] = \ + dnStandbyInst.instanceId + return Idlist + + def getPrimaryStandby(self): + for instance in self.datanodes: + if (instance.status == DbClusterStatus.INSTANCE_STATUS_PRIMARY): + self.primaryDNs.append(instance) + elif (instance.status == DbClusterStatus.INSTANCE_STATUS_STANDBY): + self.standbyDNs.append(instance) + elif (instance.status == DbClusterStatus.INSTANCE_STATUS_DUMMY): + self.dummies.append(instance) + + +class DbClusterStatus(): + """ + classdocs + """ + + OM_STATUS_FILE = "gs_om_status.dat" + OM_STATUS_KEEPTIME = 1800 + ################################################################### + # OM status + ################################################################### + OM_STATUS_NORMAL = "Normal" + OM_STATUS_ABNORMAL = "Abnormal" + OM_STATUS_STARTING = "Starting" + OM_STATUS_UPGRADE = "Upgrade" + OM_STATUS_DILATATION = "Dilatation" + OM_STATUS_REPLACE = "Replace" + OM_STATUS_REDISTIRBUTE = "Redistributing" + + ################################################################### + # node status + ################################################################### + OM_NODE_STATUS_NORMAL = "Normal" + OM_NODE_STATUS_ABNORMAL = "Abnormal" + + ################################################################### + # cluster status + ################################################################### + CLUSTER_STATUS_NORMAL = "Normal" + CLUSTER_STATUS_STARTING = "Starting" + CLUSTER_STATUS_ABNORMAL = "Abnormal" + CLUSTER_STATUS_PENDING = "Pending" + CLUSTER_STATUS_DEGRADED = "Degraded" + CLUSTER_STATUS_MAP = { + "Normal": "Normal", + "Redistributing": "Redistributing", + "Repair": "Abnormal", + "Starting": "Starting", + "Degraded": "Degraded", + "Unknown": "Abnormal" + } + + ################################################################### + # instance role + ################################################################### + INSTANCE_TYPE_GTM = "GTM" + INSTANCE_TYPE_DATANODE = "Datanode" + INSTANCE_TYPE_COORDINATOR = "Coordinator" + INSTANCE_TYPE_CMSERVER = "CMServer" + INSTANCE_TYPE_FENCED_UDF = "Fenced UDF" + INSTANCE_TYPE_ETCD = "ETCD" + + ################################################################### + # instance status + ################################################################### + INSTANCE_STATUS_NORMAL = "Normal" + INSTANCE_STATUS_UNNORMAL = "Unnormal" + INSTANCE_STATUS_PRIMARY = "Primary" + INSTANCE_STATUS_STANDBY = "Standby" + INSTANCE_STATUS_ABNORMAL = "Abnormal" + INSTANCE_STATUS_DOWN = "Down" + INSTANCE_STATUS_DUMMY = "Secondary" + INSTANCE_STATUS_DELETED = "Deleted" + INSTANCE_STATUS_STATElEADER = "StateLeader" + INSTANCE_STATUS_STATEfOLLOWER = "StateFollower" + INSTANCE_STATUS_MAP = { + # When instance run stand-alone,it's 'Normal' + "Normal": "Primary", + "Unnormal": "Abnormal", + "Primary": "Primary", + "Standby": "Standby", + "Secondary": "Secondary", + "Pending": "Abnormal", + "Down": "Down", + "Unknown": "Abnormal" + } + + ################################################################### + # ha status + ################################################################### + HA_STATUS_NORMAL = "Normal" + HA_STATUS_BUILD = "Building" + HA_STATUS_ABNORMAL = "Abnormal" + HA_STATUS_MAP = { + "Normal": "Normal", + "Building": "Building", + "Need repair": "Abnormal", + "Starting": "Starting", + "Demoting": "Demoting", + "Promoting": "Promoting", + "Waiting": "Abnormal", + "Unknown": "Abnormal", + "Catchup": "Normal" + } + + ################################################################### + # connection status + ################################################################### + CONN_STATUS_NORMAL = "Normal" + CONN_STATUS_ABNORMAL = "Abnormal" + CONN_STATUS_MAP = { + "Connection ok": "Normal", + "Connection bad": "Abnormal", + "Connection started": "Abnormal", + "Connection made": "Abnormal", + "Connection awaiting response": "Abnormal", + "Connection authentication ok": "Abnormal", + "Connection prepare SSL": "Abnormal", + "Connection needed": "Abnormal", + "Unknown": "Abnormal" + } + + ################################################################### + # data status + ################################################################### + DATA_STATUS_SYNC = "Sync" + DATA_STATUS_ASYNC = "Async" + DATA_STATUS_Unknown = "Unknown" + DATA_STATUS_MAP = { + "Async": "Async", + "Sync": "Sync", + "Most available": "Standby Down", + "Potential": "Potential", + "Unknown": "Unknown" + } + + def __init__(self): + """ + Constructor + """ + self.dbNodes = [] + self.clusterStatus = "" + self.redistributing = "" + self.clusterStatusDetail = "" + self.__curNode = None + self.__curInstance = None + self.balanced = "" + + def __str__(self): + """ + """ + retStr = "clusterStatus=%s,redistributing=%s,clusterStatusDetail=%s," \ + "balanced=%s" % \ + (self.clusterStatus, self.redistributing, + self.clusterStatusDetail, self.balanced) + + for dbNode in self.dbNodes: + retStr += "\n%s" % str(dbNode) + + return retStr + + @staticmethod + def saveOmStatus(status, sshTool, user): + """ + function : Save om status to a file + input : sshTool, user + output : NA + """ + if (sshTool is None): + raise Exception(ErrorCode.GAUSS_511["GAUSS_51107"] + + " Can't save status to all nodes.") + + try: + statFile = os.path.join(DefaultValue.getTmpDirFromEnv(), + DbClusterStatus.OM_STATUS_FILE) + cmd = "echo \"%s\" > %s" % (status, statFile) + sshTool.executeCommand(cmd, "record OM status information") + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + "OM status information" + " Error: \n%s" % str(e)) + + @staticmethod + def getOmStatus(user): + """ + function : Get om status from file + input : String + output : NA + """ + # check status file + statFile = os.path.join(DefaultValue.getTmpDirFromEnv(), + DbClusterStatus.OM_STATUS_FILE) + if (not os.path.isfile(statFile)): + return DbClusterStatus.OM_STATUS_NORMAL + # get om status from file + status = DbClusterStatus.OM_STATUS_NORMAL + return status + + def getDbNodeStatusById(self, nodeId): + """ + function : Get node status by node id + input : nodeId + output : dbNode + """ + for dbNode in self.dbNodes: + if (dbNode.id == nodeId): + return dbNode + + return None + + def getInstanceStatusById(self, instId): + """ + function : Get instance by its id + input : instId + output : dbInst + """ + for dbNode in self.dbNodes: + # get DB instance + instances = dbNode.coordinators + dbNode.gtms + dbNode.datanodes + # get instance by its id + for dbInst in instances: + if (dbInst.instanceId == instId): + return dbInst + + return None + + def isAllHealthy(self, cluster_normal_status=None): + """ + function : Check if cluster is healthy + input : cluster_normal_status + output : boolean + """ + if (cluster_normal_status is None): + cluster_normal_status = [DbClusterStatus.CLUSTER_STATUS_NORMAL] + + if (self.clusterStatus not in cluster_normal_status): + return False + + for dbNode in self.dbNodes: + if (not dbNode.isNodeHealthy()): + return False + + return True + + def getClusterStatusReport(self): + """ + function : Get the health report of cluster + input : NA + output : clusterRep + """ + clusterRep = StatusReport() + for dbNode in self.dbNodes: + nodeRep = dbNode.getNodeStatusReport() + clusterRep.nodeCount += 1 + clusterRep.cooNormal += nodeRep.cooNormal + clusterRep.cooAbnormal += nodeRep.cooAbnormal + clusterRep.gtmPrimary += nodeRep.gtmPrimary + clusterRep.gtmStandby += nodeRep.gtmStandby + clusterRep.gtmAbnormal += nodeRep.gtmAbnormal + clusterRep.gtmDown += nodeRep.gtmDown + clusterRep.dnPrimary += nodeRep.dnPrimary + clusterRep.dnStandby += nodeRep.dnStandby + clusterRep.dnDummy += nodeRep.dnDummy + clusterRep.dnBuild += nodeRep.dnBuild + clusterRep.dnAbnormal += nodeRep.dnAbnormal + clusterRep.dnDown += nodeRep.dnDown + clusterRep.fencedUDFNormal += nodeRep.fencedUDFNormal + clusterRep.fencedUDFAbnormal += nodeRep.fencedUDFAbnormal + + return clusterRep + + def outputClusterStauts(self, stdout, user, showDetail=False): + """ + function : output the status of cluster + input : stdout, user + output : NA + """ + clusterStat = DbClusterStatus.getOmStatus(user) + redistributing_state = self.redistributing + balanced_state = self.balanced + if (clusterStat == DbClusterStatus.OM_STATUS_NORMAL): + clusterStat = self.clusterStatus + print("%-20s: %s" % ("cluster_state", clusterStat), file=stdout) + print("%-20s: %s" % ("redistributing", redistributing_state), + file=stdout) + print("%-20s: %s" % ("balanced", balanced_state), file=stdout) + print("", file=stdout) + + for dbNode in self.dbNodes: + dbNode.outputNodeStatus(stdout, user, showDetail) + + def getClusterStauts(self, user): + """ + function : Get the status of cluster for Healthcheck + input : user + output : statusInfo + """ + clusterStat = DbClusterStatus.getOmStatus(user) + redistributing_state = self.redistributing + balanced_state = self.balanced + if (clusterStat == DbClusterStatus.OM_STATUS_NORMAL): + clusterStat = self.clusterStatus + statusInfo = " %s: %s\n" % ("cluster_state".ljust(22), + clusterStat) + statusInfo += " %s: %s\n" % ("redistributing".ljust(22), + redistributing_state) + statusInfo += " %s: %s\n" % ("balanced".ljust(22), + balanced_state) + for dbNode in self.dbNodes: + if (dbNode.isNodeHealthy()): + statusInfo += " %s: %s \n" % ( + dbNode.name.ljust(22), + DbClusterStatus.OM_NODE_STATUS_NORMAL) + else: + statusInfo += " %s: %s \n" % ( + dbNode.name.ljust(22), + DbClusterStatus.OM_NODE_STATUS_ABNORMAL) + return statusInfo + + def getReportInstanceStatus(self, instance): + """ + Get the instance status information required for reporting. + """ + repInstSts = InstanceStatus() + repInstSts.nodeId = instance.nodeId + repInstSts.ip = instance.nodeIp + if instance.type == DbClusterStatus.INSTANCE_TYPE_GTM: + repInstSts.detail = instance.detail_conn + elif instance.type == DbClusterStatus.INSTANCE_TYPE_DATANODE: + repInstSts.detail = instance.detail_ha + if instance.detail_status: + repInstSts.status = instance.detail_status + else: + repInstSts.status = instance.status + repInstSts.instanceId = instance.instanceId + return repInstSts.__dict__ + + def getDnInstanceStatus(self, dnInst): + """ + Get datanode instance by instance id. + """ + for dbNode in self.dbNodes: + for dn in dbNode.datanodes: + if (dn.instanceId == dnInst.instanceId): + return dn + return "" + + def getClusterStatusMap(self, user): + """ + Get the cluster status information required for reporting. + """ + repClusterSts = ReportClusterStatus() + repClusterSts.status = self.clusterStatusDetail + repClusterSts.balanced = self.balanced + repClusterSts.redistributing = self.redistributing + clusterInfo = dbClusterInfo() + clusterInfo.initFromStaticConfig(user) + masterInstList = [] + for dbNodeInfo in clusterInfo.dbNodes: + insts = dbNodeInfo.etcds + dbNodeInfo.cmservers + \ + dbNodeInfo.datanodes + dbNodeInfo.gtms + for inst in insts: + if inst.instanceType == 0: + masterInstList.append(inst.instanceId) + dnMirrorDict = {} + etcdStatus = EtcdGroupStatus() + gtmStatus = NodeGroupStatus() + cmsStatus = NodeGroupStatus() + for dbNode in self.dbNodes: + # get cn instance status info + for inst in dbNode.coordinators: + cnRepSts = self.getReportInstanceStatus(inst) + repClusterSts.cns.append(cnRepSts) + # get etcds instance status info + for inst in dbNode.etcds: + if (inst.instanceId in masterInstList): + etcdStatus.leader = self.getReportInstanceStatus(inst) + else: + etcdInstStatus = self.getReportInstanceStatus(inst) + etcdStatus.follower.append(etcdInstStatus) + # get gtm instance status info + for inst in dbNode.gtms: + if (inst.instanceId in masterInstList): + gtmStatus.primary = self.getReportInstanceStatus(inst) + else: + gtmInstStatus = self.getReportInstanceStatus(inst) + gtmStatus.standby.append(gtmInstStatus) + # get cms instance status info + for inst in dbNode.cmservers: + if (inst.instanceId in masterInstList): + cmsStatus.primary = self.getReportInstanceStatus(inst) + else: + cmsInstStatus = self.getReportInstanceStatus(inst) + + cmsStatus.standby.append(cmsInstStatus) + + for dnInst in dbNode.datanodes: + dnNode = clusterInfo.getDbNodeByID(dnInst.nodeId) + clusterDnInst = None + for dnInstInfo in dnNode.datanodes: + if (dnInst.instanceId == dnInstInfo.instanceId): + clusterDnInst = dnInstInfo + if clusterDnInst.mirrorId not in dnMirrorDict.keys(): + dnMirrorDict[clusterDnInst.mirrorId] = [dnInst] + else: + dnMirrorDict[clusterDnInst.mirrorId].append(dnInst) + # get datanodes instance status info + for mirrorDNs in dnMirrorDict.keys(): + dnStatus = NodeGroupStatus() + for dnInst in dnMirrorDict[mirrorDNs]: + if dnInst.instanceId in masterInstList: + primaryDnSts = self.getDnInstanceStatus(dnInst) + dnStatus.primary = \ + self.getReportInstanceStatus(primaryDnSts) + else: + peerInstSts = self.getDnInstanceStatus(dnInst) + peerInstStatus = self.getReportInstanceStatus(peerInstSts) + dnStatus.standby.append(peerInstStatus) + repClusterSts.dns.append(dnStatus.__dict__) + + repClusterSts.etcds = etcdStatus.__dict__ + repClusterSts.gtms = gtmStatus.__dict__ + repClusterSts.cms = cmsStatus.__dict__ + + return repClusterSts.__dict__ + + def initFromFile(self, filePath, isExpandScene=False): + """ + function : Init from status file + input : filePath + output : NA + """ + # check file path + if (not os.path.exists(filePath)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + "status file" + " Path: %s." % filePath) + if (not os.path.isfile(filePath)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % + "status file" + " Path: %s." % filePath) + + try: + with open(filePath, "r") as fp: + for line in fp.readlines(): + line = line.strip() + if line == "": + continue + + strList = line.split(":") + if len(strList) != 2: + continue + + self.__fillField(strList[0].strip(), strList[1].strip(), + isExpandScene) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "status file" + " Error: \n%s" % str(e)) + + def __fillField(self, field, value, isExpandScene): + """ + function : Fill field + input : field, value + output : NA + """ + if field == "cluster_state": + status = DbClusterStatus.CLUSTER_STATUS_MAP.get(value) + self.clusterStatus = DbClusterStatus.CLUSTER_STATUS_ABNORMAL \ + if status is None else status + self.clusterStatusDetail = value + elif field == "redistributing": + self.redistributing = value + elif field == "balanced": + self.balanced = value + elif field == "node": + if not value.isdigit(): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51633"] % "node id") + newId = int(value) + if self.__curNode is None or self.__curNode.id != newId: + self.__curNode = DbNodeStatus(newId) + self.dbNodes.append(self.__curNode) + elif field == "node_name": + self.__curNode.name = value + elif field == "instance_id": + if not value.isdigit(): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51633"] % + "instance id") + self.__curInstance = DbInstanceStatus(self.__curNode.id, + int(value)) + elif field == "data_path": + self.__curInstance.datadir = value + elif field == "type": + if value == DbClusterStatus.INSTANCE_TYPE_FENCED_UDF: + self.__curInstance = DbInstanceStatus(self.__curNode.id) + self.__curNode.fencedUDFs.append(self.__curInstance) + self.__curInstance.type = value + if value == DbClusterStatus.INSTANCE_TYPE_GTM: + self.__curNode.gtms.append(self.__curInstance) + elif value == DbClusterStatus.INSTANCE_TYPE_DATANODE: + self.__curNode.datanodes.append(self.__curInstance) + elif value == DbClusterStatus.INSTANCE_TYPE_COORDINATOR: + self.__curNode.coordinators.append(self.__curInstance) + elif value == DbClusterStatus.INSTANCE_TYPE_CMSERVER: + self.__curNode.cmservers.append(self.__curInstance) + elif value == DbClusterStatus.INSTANCE_TYPE_ETCD: + self.__curNode.etcds.append(self.__curInstance) + elif field == "instance_state": + status = DbClusterStatus.INSTANCE_STATUS_MAP.get(value) + self.__curInstance.status = \ + DbClusterStatus.INSTANCE_STATUS_ABNORMAL \ + if status is None else status + elif field == "node_ip": + self.__curInstance.nodeIp = value + elif field == "state": + if (value == DbClusterStatus.INSTANCE_STATUS_NORMAL or + value == DbClusterStatus.INSTANCE_STATUS_STATElEADER or + value == DbClusterStatus.INSTANCE_STATUS_STATEfOLLOWER): + self.__curInstance.status = value + elif (value == DbClusterStatus.INSTANCE_STATUS_DELETED): + global g_deletedCNId + self.__curInstance.status = \ + DbClusterStatus.INSTANCE_STATUS_ABNORMAL + self.__curInstance.detail_status = value + g_deletedCNId.append(self.__curInstance.instanceId) + else: + if (isExpandScene and self.__curInstance.type == + DbClusterStatus.INSTANCE_TYPE_COORDINATOR): + self.clusterStatus = \ + DbClusterStatus.CLUSTER_STATUS_ABNORMAL + self.__curInstance.status = \ + DbClusterStatus.INSTANCE_STATUS_ABNORMAL + self.__curInstance.detail_status = value + elif field == "HA_state": + haStatus = DbClusterStatus.HA_STATUS_MAP.get(value) + detail_ha = value + self.__curInstance.haStatus = DbClusterStatus.HA_STATUS_ABNORMAL \ + if haStatus is None else haStatus + self.__curInstance.detail_ha = DbClusterStatus.HA_STATUS_ABNORMAL \ + if detail_ha is None else detail_ha + elif field == "con_state": + connStatus = DbClusterStatus.CONN_STATUS_MAP.get(value) + detail_conn = value + self.__curInstance.connStatus = \ + DbClusterStatus.CONN_STATUS_ABNORMAL \ + if connStatus is None else connStatus + self.__curInstance.detail_conn = detail_conn if detail_conn else \ + DbClusterStatus.CONN_STATUS_ABNORMAL + elif field == "static_connections": + connStatus = DbClusterStatus.CONN_STATUS_MAP.get(value) + self.__curInstance.connStatus = \ + DbClusterStatus.CONN_STATUS_ABNORMAL \ + if connStatus is None else connStatus + elif field == "sync_state": + dataStatus = DbClusterStatus.DATA_STATUS_MAP.get(value) + self.__curInstance.syncStatus = \ + DbClusterStatus.DATA_STATUS_Unknown \ + if dataStatus is None else dataStatus + elif field == "reason": + self.__curInstance.reason = value + + +class ReportClusterStatus(): + + def __init__(self): + self.status = "" + self.balanced = "" + self.redistributing = "" + self.gtms = "" + self.etcds = "" + self.cms = "" + self.cns = [] + self.dns = [] + + +class NodeGroupStatus(): + def __init__(self): + self.primary = "" + self.standby = [] + + +class EtcdGroupStatus(): + def __init__(self): + self.leader = "" + self.follower = [] + + +class InstanceStatus(): + def __init__(self): + self.nodeId = "" + self.ip = "" + self.status = "" + self.instanceId = "" + self.detail = "" diff --git a/script/gspylib/common/ErrorCode.py b/script/gspylib/common/ErrorCode.py new file mode 100644 index 0000000..f96a6c9 --- /dev/null +++ b/script/gspylib/common/ErrorCode.py @@ -0,0 +1,1288 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : ErrorCode.py is utility to register the error message +############################################################################# +import re +import sys + + +class ErrorCode(): + """ + Class to define output about the error message + """ + + def __init__(self): + pass + + @staticmethod + def getErrorCodeAsInt(ex, default_error_code): + """ + Resolve the exit code from the exception instance or error message. + + In linux, the range of return values is between 0 and 255. + So we can only use each type of error code as exit code.Such as: + ErrorCode.GAUSS_500 : 10 + ErrorCode.GAUSS_501 : 11 + + :param ex: Exception instance or error message + :param default_error_code: If the exception instance does not contain + the exit code, use this parameter. + + :type ex: Exception | str + :type default_error_code: int + + :return: Return the error code. + 9 represents undefined exit code. + other number between 0 and 255 represent the specific gauss error. + :type: int + """ + error_message = str(ex) + pattern = r"^[\S\s]*\[GAUSS-(\d+)\][\S\s]+$" + match = re.match(pattern, error_message) + + if match is not None and len(match.groups()) == 1: + error_code = int(match.groups()[0]) + else: + error_code = default_error_code + + if 50000 < error_code < 60000: + return error_code // 100 - 500 + 10 + else: + return 9 + + ########################################################################### + # parameter + ########################################################################### + GAUSS_500 = { + 'GAUSS_50000': "[GAUSS-50000] : Unrecognized parameter: %s.", + 'GAUSS_50001': "[GAUSS-50001] : Incorrect parameter. Parameter " + "'-%s' is required", + 'GAUSS_50002': "[GAUSS-50002] : Incorrect parameter. Parameter " + "'-%s' is not required", + 'GAUSS_50003': "[GAUSS-50003] : The parameter '-%s' type should be " + "%s.", + 'GAUSS_50004': "[GAUSS-50004] : The parameter '-%s' value is " + "incorrect.", + 'GAUSS_50005': "[GAUSS-50005] : The parameter '-%s' and '-%s' " + "can not be used together.", + 'GAUSS_50006': "[GAUSS-50006] : Too many command-line arguments " + "(first is \"%s\").", + 'GAUSS_50007': "[GAUSS-50007] : Failed to set %s parameter.", + 'GAUSS_50008': "[GAUSS-50008] : Failed to reload parameter.", + 'GAUSS_50009': "[GAUSS-50009] : Parameter format error.", + 'GAUSS_50010': "[GAUSS-50010] : Failed to check %s parameter.", + 'GAUSS_50011': "[GAUSS-50011] : The parameter[%s] value[%s] " + "is invalid.", + 'GAUSS_50012': "[GAUSS-50012] : The parameter '%s' value can't " + "be empty.", + 'GAUSS_50013': "[GAUSS-50013] : The parameter '%s' have not " + "been initialized.", + 'GAUSS_50014': "[GAUSS-50014] : Parameters of '%s' can not be empty.", + 'GAUSS_50015': "[GAUSS-50015] : The command line parser error: %s.", + 'GAUSS_50016': "[GAUSS-50016] : The re-entrant parameter '-%s' " + "is not same with the previous command.", + 'GAUSS_50017': "[GAUSS-50017] : Incorrect value '%s' specified " + "by the parameter '-%s'.", + 'GAUSS_50018': "[GAUSS-50018] : The parameter value of %s is Null.", + 'GAUSS_50019': "[GAUSS-50019] : The value of %s is error.", + 'GAUSS_50020': "[GAUSS-50020] : The value of %s must be a digit.", + 'GAUSS_50021': "[GAUSS-50021] : Failed to query %s parameter." + + } + + ########################################################################### + # permission + ########################################################################### + GAUSS_501 = { + 'GAUSS_50100': "[GAUSS-50100] : The %s is not readable for %s.", + 'GAUSS_50101': "[GAUSS-50101] : The %s is not executable for %s.", + 'GAUSS_50102': "[GAUSS-50102] : The %s is not writable for %s.", + 'GAUSS_50103': "[GAUSS-50103] : The %s has unexpected rights.", + 'GAUSS_50104': "[GAUSS-50104] : Only a user with the root permission " + "can run this script.", + 'GAUSS_50105': "[GAUSS-50105] : Cannot run this script as a user " + "with the root permission.", + 'GAUSS_50106': "[GAUSS-50106] : Failed to change the owner of %s.", + 'GAUSS_50107': "[GAUSS-50107] : Failed to change the " + "permission of %s.", + 'GAUSS_50108': "[GAUSS-50108] : Failed to change the owner and " + "permission of %s.", + 'GAUSS_50109': "[GAUSS-50109] : Only a user with the root permission " + "can check SSD information.", + 'GAUSS_50110': "[GAUSS-50110] : Cannot execute this script on %s." + } + + ########################################################################### + # file or directory + ########################################################################### + GAUSS_502 = { + 'GAUSS_50200': "[GAUSS-50200] : The %s already exists.", + 'GAUSS_50201': "[GAUSS-50201] : The %s does not exist.", + 'GAUSS_50202': "[GAUSS-50202] : The %s must be empty.", + 'GAUSS_50203': "[GAUSS-50203] : The %s cannot be empty.", + 'GAUSS_50204': "[GAUSS-50204] : Failed to read %s.", + 'GAUSS_50205': "[GAUSS-50205] : Failed to write %s.", + 'GAUSS_50206': "[GAUSS-50206] : Failed to create %s.", + 'GAUSS_50207': "[GAUSS-50207] : Failed to delete %s.", + 'GAUSS_50208': "[GAUSS-50208] : Failed to create the %s directory.", + 'GAUSS_50209': "[GAUSS-50209] : Failed to delete the %s directory.", + 'GAUSS_50210': "[GAUSS-50210] : The %s must be a file.", + 'GAUSS_50211': "[GAUSS-50211] : The %s must be a directory.", + 'GAUSS_50212': "[GAUSS-50212] : The suffix of the file [%s] " + "should be '%s'.", + 'GAUSS_50213': "[GAUSS-50213] : The %s path must be an absolute path.", + 'GAUSS_50214': "[GAUSS-50214] : Failed to copy %s.", + 'GAUSS_50215': "[GAUSS-50215] : Failed to back up %s.", + 'GAUSS_50216': "[GAUSS-50216] : Failed to remote copy %s.", + 'GAUSS_50217': "[GAUSS-50217] : Failed to decompress %s.", + 'GAUSS_50218': "[GAUSS-50218] : Failed to rename %s.", + 'GAUSS_50219': "[GAUSS-50219] : Failed to obtain %s.", + 'GAUSS_50220': "[GAUSS-50220] : Failed to restore %s.", + 'GAUSS_50221': "[GAUSS-50221] : Failed to obtain file type.", + 'GAUSS_50222': "[GAUSS-50222] : The content of file %s is not " + "correct.", + 'GAUSS_50223': "[GAUSS-50223] : Failed to update %s files.", + 'GAUSS_50224': "[GAUSS-50224] : The file name is incorrect.", + 'GAUSS_50225': "[GAUSS-50225] : Failed to back up remotely.", + 'GAUSS_50226': "[GAUSS-50226] : Failed to restore remotely.", + 'GAUSS_50227': "[GAUSS-50227] : Failed to compress %s.", + 'GAUSS_50228': "[GAUSS-50228] : The %s does not exist or is empty.", + 'GAUSS_50229': "[GAUSS-50229] : Cannot specify the file [%s] to " + "the cluster path %s.", + 'GAUSS_50230': "[GAUSS-50230] : Failed to read/write %s.", + 'GAUSS_50231': "[GAUSS-50231] : Failed to generate %s file.", + 'GAUSS_50232': "[GAUSS-50232] : The instance directory [%s] " + "cannot set in app directory [%s].Please check " + "the xml.", + 'GAUSS_50233': "[GAUSS-50233] : The directory name %s and %s " + "cannot be same.", + 'GAUSS_50234': "[GAUSS-50234] : Cannot execute the script in " + "the relevant path of the database.", + 'GAUSS_50235': "[GAUSS-50235] : The log file name [%s] can not contain" + " more than one '.'.", + 'GAUSS_50236': "[GAUSS-50236] : %s should be set in scene config " + "file.", + 'GAUSS_50237': "[GAUSS-50237] : Send result file failed nodes: %s;" + " outputMap: %s", + 'GAUSS_50238': "[GAUSS-50238] : Check integrality of bin " + "file %s failed." + + } + + ########################################################################### + # user and group + ########################################################################### + GAUSS_503 = { + 'GAUSS_50300': "[GAUSS-50300] : User %s does not exist.", + 'GAUSS_50301': "[GAUSS-50301] : The cluster user/group cannot " + "be a root user/group.", + 'GAUSS_50302': "[GAUSS-50302] : The cluster user cannot be a user " + "with the root permission.", + 'GAUSS_50303': "[GAUSS-50303] : Cannot install the program as a " + "user with the root permission.", + 'GAUSS_50304': "[GAUSS-50304] : The new user [%s] is not the same " + "as the old user [%s].", + 'GAUSS_50305': "[GAUSS-50305] : The user is not matched with the " + "user group.", + 'GAUSS_50306': "[GAUSS-50306] : The password of %s is incorrect.", + 'GAUSS_50307': "[GAUSS-50307] : User password has expired.", + 'GAUSS_50308': "[GAUSS-50308] : Failed to obtain user information.", + 'GAUSS_50309': "[GAUSS-50309] : Failed to obtain password " + "change times of data base super user", + 'GAUSS_50310': "[GAUSS-50310] : Failed to obtain password " + "expiring days.", + 'GAUSS_50311': "[GAUSS-50311] : Failed to change password for %s.", + 'GAUSS_50312': "[GAUSS-50312] : There are other users in the group %s " + "on %s, skip to delete group.", + 'GAUSS_50313': "[GAUSS-50313] : Failed to delete %s group.", + 'GAUSS_50314': "[GAUSS-50314] : Failed to delete %s user.", + 'GAUSS_50315': "[GAUSS-50315] : The user %s is not matched with the " + "owner of %s.", + 'GAUSS_50316': "[GAUSS-50316] : Group [%s] does not exist.", + 'GAUSS_50317': "[GAUSS-50317] : Failed to check user and password.", + 'GAUSS_50318': "[GAUSS-50318] : Failed to add %s user.", + 'GAUSS_50319': "[GAUSS-50319] : Failed to add %s group.", + 'GAUSS_50320': "[GAUSS-50320] : Failed to set '%s' to '%s' in " + "/etc/ssh/sshd_config.", + 'GAUSS_50321': "[GAUSS-50321] : Failed to get configuration of '%s' " + "from /etc/ssh/sshd_config.", + 'GAUSS_50322': "[GAUSS-50322] : Failed to encrypt the password for %s", + 'GAUSS_50323': "[GAUSS-50323] : The user %s is not the cluster " + "installation user " + } + + ########################################################################### + # disk + ########################################################################### + GAUSS_504 = { + 'GAUSS_50400': "[GAUSS-50400] : The remaining space of device [%s] " + "cannot be less than %s.", + 'GAUSS_50401': "[GAUSS-50401] : The usage of the device [%s] space " + "cannot be greater than %s.", + 'GAUSS_50402': "[GAUSS-50402] : The usage of INODE cannot be greater " + "than %s.", + 'GAUSS_50403': "[GAUSS-50403] : The IO scheduling policy is " + "incorrect.", + 'GAUSS_50404': "[GAUSS-50404] : The XFS mount type must be %s.", + 'GAUSS_50405': "[GAUSS-50405] : The pre-read block size must " + "be 16384.", + 'GAUSS_50406': "[GAUSS-50406] : Failed to obtain disk read and " + "write rates.", + 'GAUSS_50407': "[GAUSS-50407] : Failed to clean shared semaphore.", + 'GAUSS_50408': "[GAUSS-50408] : Failed to obtain disk read-ahead " + "memory block.", + 'GAUSS_50409': "[GAUSS-50409] : The remaining space of dns cannot " + "support shrink.", + 'GAUSS_50410': "[GAUSS-50410] : Failed to check if remaining space " + "of dns support shrink.", + 'GAUSS_50411': "[GAUSS-50411] : The remaining space cannot be less " + "than %s.", + 'GAUSS_50412': "[GAUSS-50412] : Failed to get disk space of database " + "node %s.", + 'GAUSS_50413': "[GAUSS-50413] : Failed to analysis" + " the disk information." + + } + + ########################################################################### + # memory + ########################################################################### + GAUSS_505 = { + 'GAUSS_50500': "[GAUSS-50500] : The SWAP partition is smaller than " + "the actual memory.", + 'GAUSS_50501': "[GAUSS-50501] : Shared_buffers must be less than " + "shmmax. Please check it.", + 'GAUSS_50502': "[GAUSS-50502] : Failed to obtain %s information." + } + + ########################################################################### + # network + ########################################################################### + GAUSS_506 = { + 'GAUSS_50600': "[GAUSS-50600] : The IP address cannot be pinged, " + "which is caused by network faults.", + 'GAUSS_50601': "[GAUSS-50601] : The port [%s] is occupied or the ip " + "address is incorrectly configured.", + 'GAUSS_50602': "[GAUSS-50602] : Failed to bind network adapters.", + 'GAUSS_50603': "[GAUSS-50603] : The IP address is invalid.", + 'GAUSS_50604': "[GAUSS-50604] : Failed to obtain network interface " + "card of backIp(%s).", + 'GAUSS_50605': "[GAUSS-50605] : Failed to obtain back IP subnet mask.", + 'GAUSS_50606': "[GAUSS-50606] : Back IP(s) do not have the same " + "subnet mask.", + 'GAUSS_50607': "[GAUSS-50607] : Failed to obtain configuring virtual " + "IP line number position of network startup file.", + 'GAUSS_50608': "[GAUSS-50608] : Failed to writing virtual IP setting " + "cmds into init file.", + 'GAUSS_50609': "[GAUSS-50609] : Failed to check port: %s.", + 'GAUSS_50610': "[GAUSS-50610] : Failed to get the range of " + "random port.", + 'GAUSS_50611': "[GAUSS-50611] : Failed to obtain network card " + "bonding information.", + 'GAUSS_50612': "[GAUSS-50612] : Failed to obtain network card %s " + "value.", + 'GAUSS_50613': "[GAUSS-50613] : Failed to set network card %s value.", + 'GAUSS_50614': "[GAUSS-50614] : Failed to check network information.", + 'GAUSS_50615': "[GAUSS-50615] : IP %s and IP %s are not in the " + "same network segment.", + 'GAUSS_50616': "[GAUSS-50616] : Failed to get network interface.", + 'GAUSS_50617': "[GAUSS-50617] : The node of XML configure file " + "has the same virtual IP.", + 'GAUSS_50618': "[GAUSS-50618] : %s. The startup file for SUSE OS" + " is /etc/init.d/boot.local. The startup file for " + "Redhat OS is /etc/rc.d/rc.local.", + 'GAUSS_50619': "[GAUSS-50619] : Failed to obtain network" + " card information.", + 'GAUSS_50620': "[GAUSS-50620] : Failed to check network" + " RX drop percentage.", + 'GAUSS_50621': "[GAUSS-50621] : Failed to check network care speed.\n", + 'GAUSS_50622': "[GAUSS-50622] : Failed to obtain network card " + "interrupt count numbers. Commands for getting " + "interrupt count numbers: %s." + + } + + ########################################################################### + # firwall + ########################################################################### + GAUSS_507 = { + 'GAUSS_50700': "[GAUSS-50700] : The firewall should be disabled.", + 'GAUSS_50701': "[GAUSS-50701] : The firewall should be opened." + } + + ########################################################################### + # crontab + ########################################################################### + GAUSS_508 = { + 'GAUSS_50800': "[GAUSS-50800] : Regular tasks are not started.", + 'GAUSS_50801': "[GAUSS-50801] : Failed to set up tasks.", + 'GAUSS_50802': "[GAUSS-50802] : Failed to %s service.", + 'GAUSS_50803': "[GAUSS-50803] : Failed to check user cron." + } + + ########################################################################### + # Clock service + ########################################################################### + GAUSS_509 = { + 'GAUSS_50900': "[GAUSS-50900] : The NTPD service is not installed.", + 'GAUSS_50901': "[GAUSS-50901] : The NTPD service is not started.", + 'GAUSS_50902': "[GAUSS-50902] : The system time is different." + } + + ########################################################################### + # THP + ########################################################################### + GAUSS_510 = { + 'GAUSS_51000': "[GAUSS-51000] : THP services must be shut down.", + 'GAUSS_51001': "[GAUSS-51001] : Failed to obtain THP service.", + 'GAUSS_51002': "[GAUSS-51002] : Failed to close THP service.", + 'GAUSS_51003': "[GAUSS-51003] : Failed to set session process." + } + + ########################################################################### + # SSH trust + ########################################################################### + GAUSS_511 = { + 'GAUSS_51100': "[GAUSS-51100] : Failed to verify SSH trust on " + "these nodes: %s.", + 'GAUSS_51101': "[GAUSS-51101] : SSH exception: \n%s", + 'GAUSS_51102': "[GAUSS-51102] : Failed to exchange SSH keys " + "for user [%s] performing the %s operation.", + 'GAUSS_51103': "[GAUSS-51103] : Failed to execute the PSSH " + "command [%s].", + 'GAUSS_51104': "[GAUSS-51104] : Failed to obtain SSH status.", + 'GAUSS_51105': "[GAUSS-51105] : Failed to parse SSH output: %s.", + 'GAUSS_51106': "[GAUSS-51106] : The SSH tool does not exist.", + 'GAUSS_51107': "[GAUSS-51107] : Ssh Paramiko failed.", + 'GAUSS_51108': "[GAUSS-51108] : Ssh-keygen failed.", + 'GAUSS_51109': "[GAUSS-51109] : Failed to check authentication.", + 'GAUSS_51110': "[GAUSS-51110] : Failed to obtain RSA host key " + "for local host.", + 'GAUSS_51111': "[GAUSS-51111] : Failed to append local ID to " + "authorized_keys on remote node.", + 'GAUSS_51112': "[GAUSS-51112] : Failed to exchange SSH keys " + "for user[%s] using hostname." + } + + ########################################################################### + # cluster/XML configruation + ########################################################################### + GAUSS_512 = { + 'GAUSS_51200': "[GAUSS-51200] : The parameter [%s] in the XML " + "file does not exist.", + 'GAUSS_51201': "[GAUSS-51201] : Node names must be configured.", + 'GAUSS_51202': "[GAUSS-51202] : Failed to add the %s instance.", + 'GAUSS_51203': "[GAUSS-51203] : Failed to obtain the %s " + "information from static configuration files.", + 'GAUSS_51204': "[GAUSS-51204] : Invalid %s instance type: %d.", + 'GAUSS_51205': "[GAUSS-51205] : Failed to refresh the %s instance ID.", + 'GAUSS_51206': "[GAUSS-51206] : The MPPRC file path must " + "be an absolute path: %s.", + 'GAUSS_51207': "[GAUSS-51207] : Failed to obtain backIp " + "from node [%s].", + 'GAUSS_51208': "[GAUSS-51208] : Invalid %s number [%d].", + 'GAUSS_51209': "[GAUSS-51209] : Failed to obtain %s " + "configuration on the host [%s].", + 'GAUSS_51210': "[GAUSS-51210] : The obtained number does " + "not match the instance number.", + 'GAUSS_51211': "[GAUSS-51211] : Failed to save a static " + "configuration file.", + 'GAUSS_51212': "[GAUSS-51212] : There is no information about %s.", + 'GAUSS_51213': "[GAUSS-51213] : The port number of XML [%s] " + "conflicted.", + 'GAUSS_51214': "[GAUSS-51214] : The number of capacity expansion " + "database nodes cannot be less than three", + 'GAUSS_51215': "[GAUSS-51215] : The capacity expansion node [%s] " + "cannot contain GTM/CM/ETCD.", + 'GAUSS_51216': "[GAUSS-51216] : The capacity expansion node [%s] " + "must contain CN or DN.", + 'GAUSS_51217': "[GAUSS-51217] : The cluster's static configuration " + "does not match the new configuration file.", + 'GAUSS_51218': "[GAUSS-51218] : Failed to obtain initialized " + "configuration parameter: %s.", + 'GAUSS_51219': "[GAUSS-51219] : There is no CN in cluster.", + 'GAUSS_51220': "[GAUSS-51220] : The IP address %s is incorrect.", + 'GAUSS_51221': "[GAUSS-51221] : Failed to configure hosts " + "mapping information.", + 'GAUSS_51222': "[GAUSS-51222] : Failed to check hostname mapping.", + 'GAUSS_51223': "[GAUSS-51223] : Failed to obtain network " + "inet addr on the node(%s).", + 'GAUSS_51224': "[GAUSS-51224] : The ip(%s) has been used " + "on other nodes.", + 'GAUSS_51225': "[GAUSS-51225] : Failed to set virtual IP.", + 'GAUSS_51226': "[GAUSS-51226] : Virtual IP(s) and Back IP(s) " + "do not have the same network segment.", + 'GAUSS_51227': "[GAUSS-51227] : The number of %s on all nodes " + "are different.", + 'GAUSS_51228': "[GAUSS-51228] : The number %s does not " + "match %s number.", + 'GAUSS_51229': "[GAUSS-51229] : The database node listenIp(%s) is not " + "in the virtualIp or backIp on the node(%s).", + 'GAUSS_51230': "[GAUSS-51230] : The number of %s must %s.", + 'GAUSS_51231': "[GAUSS-51231] : Old nodes is less than 2.", + 'GAUSS_51232': "[GAUSS-51232] : XML configuration and static " + "configuration are the same.", + 'GAUSS_51233': "[GAUSS-51233] : The Port(%s) is invalid " + "on the node(%s).", + 'GAUSS_51234': "[GAUSS-51234] : The configuration file [%s] " + "contains parsing errors.", + 'GAUSS_51235': "[GAUSS-51235] : Invalid directory [%s].", + 'GAUSS_51236': "[GAUSS-51236] : Failed to parsing xml.", + 'GAUSS_51239': "[GAUSS-51239] : Failed to parse json. gs_collect " + "configuration file (%s) is invalid , " + "check key in json file", + 'GAUSS_51240': "[GAUSS-51240] : gs_collect configuration file " + "is invalid, TypeName or content must in config file.", + 'GAUSS_51241': "[GAUSS-51241] : The parameter %s(%s) formate " + "is wrong, or value is less than 0.", + 'GAUSS_51242': "[GAUSS-51242] : gs_collect configuration file " + "is invalid: %s, the key: (%s) is invalid.", + 'GAUSS_51243': "[GAUSS-51243] : content(%s) does not match the " + "typename(%s) in gs_collect configuration file(%s).", + 'GAUSS_51244': "[GAUSS-51244] : (%s) doesn't yet support.", + 'GAUSS_51245': "[GAUSS-51245] : There are duplicate key(%s).", + 'GAUSS_51246': "[GAUSS-51246] : %s info only support " + "one time collect.", + 'GAUSS_51247': "[GAUSS-51247] : These virtual IP(%s) are not " + "accessible after configuring.", + 'GAUSS_51248': "[GAUSS-51248] : The hostname(%s) may be not same with " + "hostname(/etc/hostname)", + 'GAUSS_51249': "[GAUSS-51249] : There is no database node instance " + "in the current node.", + 'GAUSS_51250': "[GAUSS-51250] : Error: the '%s' is illegal.\nthe path " + "name or file name should be letters, number or -_:." + + + } + + ########################################################################### + # SQL exception + ########################################################################### + GAUSS_513 = { + 'GAUSS_51300': "[GAUSS-51300] : Failed to execute SQL: %s.", + 'GAUSS_51301': "[GAUSS-51301] : Execute SQL time out. \nSql: %s.", + 'GAUSS_51302': "[GAUSS-51302] : The table '%s.%s' does not exist " + "or is private table!", + 'GAUSS_51303': "[GAUSS-51303] : Query '%s' has no record!.", + 'GAUSS_51304': "[GAUSS-51304] : Query '%s' result '%s' is incorrect!.", + 'GAUSS_51305': "[GAUSS-51305] : The table '%s.%s' exists!", + 'GAUSS_51306': "[GAUSS-51306] : %s: Abnormal reason:%s", + 'GAUSS_51307': "[GAUSS-51307] : Error: can not get sql execute " + "status.", + 'GAUSS_51308': "[GAUSS-51308] : Error: can not load result data.", + 'GAUSS_51309': "[GAUSS-51309] : Can not get correct result" + " by executing sql: %s", + 'GAUSS_51310': "[GAUSS-51310] : Failed to get connection" + " with database %s" + + } + + ########################################################################### + # Shell exception + ########################################################################### + GAUSS_514 = { + 'GAUSS_51400': "[GAUSS-51400] : Failed to execute the command: %s.", + 'GAUSS_51401': "[GAUSS-51401] : Failed to do %s.sh.", + 'GAUSS_51402': "[GAUSS-51402]: Failed to generate certs.", + 'GAUSS_51403': "[GAUSS-51403]: commond execute failure," + " check %s failed!", + 'GAUSS_51404': "[GAUSS-51404] : Not supported command %s.", + 'GAUSS_51405': "[GAUSS-51405] : You need to install software:%s\n" + + } + + ########################################################################### + # interface calls exception + ########################################################################### + GAUSS_515 = { + 'GAUSS_51500': "[GAUSS-51500] : Failed to call the interface %s. " + "Exception: %s." + } + + ########################################################################### + # cluster/instance status + ########################################################################### + GAUSS_516 = { + 'GAUSS_51600': "[GAUSS-51600] : Failed to obtain the cluster status.", + 'GAUSS_51601': "[GAUSS-51601] : Failed to check %s status.", + 'GAUSS_51602': "[GAUSS-51602] : The cluster status is Abnormal.", + 'GAUSS_51603': "[GAUSS-51603] : Failed to obtain peer %s instance.", + 'GAUSS_51604': "[GAUSS-51604] : There is no HA status for %s.", + 'GAUSS_51605': "[GAUSS-51605] : Failed to check whether " + "the %s process exists.", + 'GAUSS_51606': "[GAUSS-51606] : Failed to kill the %s process.", + 'GAUSS_51607': "[GAUSS-51607] : Failed to start %s.", + 'GAUSS_51608': "[GAUSS-51608] : Failed to lock cluster", + 'GAUSS_51609': "[GAUSS-51609] : Failed to unlock cluster", + 'GAUSS_51610': "[GAUSS-51610] : Failed to stop %s.", + 'GAUSS_51611': "[GAUSS-51611] : Failed to create %s instance.", + 'GAUSS_51612': "[GAUSS-51612] : The node id [%u] are not found " + "in the cluster.", + 'GAUSS_51613': "[GAUSS-51613] : There is no instance in %s to " + "be built.", + 'GAUSS_51614': "[GAUSS-51614] : Received signal[%d].", + 'GAUSS_51615': "[GAUSS-51615] : Failed to initialize instance.", + 'GAUSS_51616': "[GAUSS-51616] : Failed to dump %s schema.", + 'GAUSS_51617': "[GAUSS-51617] : Failed to rebuild %s.", + 'GAUSS_51618': "[GAUSS-51618] : Failed to get all hostname.", + 'GAUSS_51619': "[GAUSS-51619] : The host name [%s] is not " + "in the cluster.", + 'GAUSS_51620': "[GAUSS-51620] : Failed to obtain %s " + "instance information.", + 'GAUSS_51621': "[GAUSS-51621] : HA IP is empty.", + 'GAUSS_51622': "[GAUSS-51622] : There is no %s on %s node.", + 'GAUSS_51623': "[GAUSS-51623] : Failed to obtain version.", + 'GAUSS_51624': "[GAUSS-51624] : Failed to get DN connections.", + 'GAUSS_51625': "[GAUSS-51625] : Cluster is running.", + 'GAUSS_51626': "[GAUSS-51626] : Failed to rollback.", + 'GAUSS_51627': "[GAUSS-51627] : Configuration failed.", + 'GAUSS_51628': "[GAUSS-51628] : The version number of new cluster " + "is [%s]. It should be float.", + 'GAUSS_51629': "[GAUSS-51629] : The version number of new cluster " + "is [%s]. It should be greater than or equal to " + "the old version.", + 'GAUSS_51630': "[GAUSS-51630] : No node named %s.", + 'GAUSS_51631': "[GAUSS-51631] : Failed to delete the %s instance.", + 'GAUSS_51632': "[GAUSS-51632] : Failed to do %s.", + 'GAUSS_51633': "[GAUSS-51633] : The step of upgrade " + "number %s is incorrect.", + 'GAUSS_51634': "[GAUSS-51634] : Waiting node synchronizing timeout " + "lead to failure.", + 'GAUSS_51635': "[GAUSS-51635] : Failed to check SHA256.", + 'GAUSS_51636': "[GAUSS-51636] : Failed to obtain %s node information.", + 'GAUSS_51637': "[GAUSS-51637] : The %s does not match with %s.", + 'GAUSS_51638': "[GAUSS-51638] : Failed to append instance on " + "host [%s].", + 'GAUSS_51639': "[GAUSS-51639] : Failed to obtain %s status of " + "local node.", + 'GAUSS_51640': "[GAUSS-51640] : Can't connect to cm_server, cluster " + "is not running possibly.", + 'GAUSS_51641': "[GAUSS-51641] : Cluster redistributing status is not " + "accord with expectation.", + 'GAUSS_51642': "[GAUSS-51642] : Failed to promote peer instances.", + 'GAUSS_51643': "[GAUSS-51643] : Cluster is in read-only mode.", + 'GAUSS_51644': "[GAUSS-51644] : Failed to set resource control " + "for the cluster.", + 'GAUSS_51645': "[GAUSS-51645] : Failed to restart %s.", + 'GAUSS_51646': "[GAUSS-51646] : The other OM operation is currently " + "being performed in the cluster node:" + " '%s'.", + 'GAUSS_51647': "[GAUSS-51647] : The operation step of OM components " + "in current cluster nodes do not match" + " with each other: %s.", + 'GAUSS_51648': "[GAUSS-51648] : Waiting for redistribution process " + "to end timeout.", + 'GAUSS_51649': "[GAUSS-51649] : Capture exceptions '%s' : %s.", + 'GAUSS_51650': "[GAUSS-51650] : Unclassified exceptions: %s.", + 'GAUSS_51651': "[GAUSS-51651] : The node '%s' status is Abnormal.", + 'GAUSS_51652': "[GAUSS-51652] : Failed to get cluster node " + "info.exception is: %s.", + 'GAUSS_51653': "[GAUSS-51653] : No database objects " + "were found in the cluster!", + 'GAUSS_51654': "[GAUSS-51654] : Cannot query instance process" + " version from function." + + } + + ########################################################################### + # Check system table + ########################################################################### + GAUSS_517 = { + 'GAUSS_51700': "[GAUSS-51700] : There must be only one record in the " + "pgxc_group table.", + 'GAUSS_51701': "[GAUSS-51701] : The current node group is incorrect.", + 'GAUSS_51702': "[GAUSS-51702] : Failed to obtain node group " + "information.", + 'GAUSS_51703': "[GAUSS-51703] : Failed to drop record from " + "PGXC_NODE.", + 'GAUSS_51704': "[GAUSS-51704] : Failed to set Cgroup.", + 'GAUSS_51705': "[GAUSS-51705] : Failed to update PGXC_NODE.", + 'GAUSS_51706': "[GAUSS-51706] : Failed to check Cgroup.", + 'GAUSS_51707': "[GAUSS-51707] : Failed to install Cgroup.", + 'GAUSS_51708': "[GAUSS-51708] : Failed to uninstall Cgroup.", + 'GAUSS_51709': "[GAUSS-51709] : Failed to clean Cgroup " + "configuration file." + } + + ########################################################################### + # environmental variable + ########################################################################### + GAUSS_518 = { + 'GAUSS_51800': "[GAUSS-51800] : The environmental variable %s is " + "empty. or variable has exceeded maximum length", + 'GAUSS_51801': "[GAUSS-51801] : The environment variable %s exists.", + 'GAUSS_51802': "[GAUSS-51802] : Failed to obtain the environment " + "variable %s.", + 'GAUSS_51803': "[GAUSS-51803] : Failed to delete the environment " + "variable %s.", + 'GAUSS_51804': "[GAUSS-51804] : Failed to set the environment " + "variable %s.", + 'GAUSS_51805': "[GAUSS-51805] : The environmental variable [%s]'s " + "value is invalid.", + 'GAUSS_51806': "[GAUSS-51806] : The cluster has been installed.", + 'GAUSS_51807': "[GAUSS-51807] : $GAUSSHOME of user is not equal to " + "installation path.", + 'GAUSS_51808': "[GAUSS-51808] : The env file contains errmsg: %s." + } + + ########################################################################### + # OS version + ########################################################################### + GAUSS_519 = { + 'GAUSS_51900': "[GAUSS-51900] : The current OS is not supported.", + 'GAUSS_51901': "[GAUSS-51901] : The OS versions are different " + "among cluster nodes." + } + + ########################################################################### + # database version + ########################################################################### + GAUSS_520 = { + 'GAUSS_52000': "[GAUSS-52000] : Failed to obtain time zone " + "information about the cluster node.", + 'GAUSS_52001': "[GAUSS-52001] : Time zone information is " + "different among cluster nodes." + } + + ########################################################################### + # OS time zone + ########################################################################### + GAUSS_521 = { + 'GAUSS_52100': "[GAUSS-52100] : Failed to obtain cluster node " + "character sets.", + 'GAUSS_52101': "[GAUSS-52101] : Character sets are different " + "among cluster nodes.", + 'GAUSS_52102': "[GAUSS-52102] : The parameter [%s] value is not equal " + "to the expected value.", + 'GAUSS_52103': "[GAUSS-52103] : Failed to forcibly make the character " + "sets to take effect." + } + + ########################################################################### + # OS character set + ########################################################################### + GAUSS_522 = { + 'GAUSS_52200': "[GAUSS-52200] : Unable to import module: %s.", + 'GAUSS_52201': "[GAUSS-52201] : The current python version %s " + "is not supported." + } + + ########################################################################### + # Operating system parameters + ########################################################################### + GAUSS_523 = { + 'GAUSS_52300': "[GAUSS-52300] : Failed to set OS parameters.", + 'GAUSS_52301': "[GAUSS-52301] : Failed to check OS parameters." + + } + + ########################################################################### + # preinsatll install + ########################################################################### + GAUSS_524 = { + 'GAUSS_52400': "[GAUSS-52400] : Installation environment does not " + "meet the desired result.", + 'GAUSS_52401': "[GAUSS-52401] : On systemwide basis, the maximum " + "number of %s is not correct. the current %s value is:", + 'GAUSS_52402': "[GAUSS-52402] : IP [%s] is not matched " + "with hostname [%s]. \n", + 'GAUSS_52403': "[GAUSS-52403] : Command \"%s\" does not exist or the " + "user has no execute permission on %s." + } + + ########################################################################### + # uninsatll postuninstall + ########################################################################### + GAUSS_525 = { + 'GAUSS_52500': "[GAUSS-52500] : Failed to delete regular tasks.", + 'GAUSS_52501': "[GAUSS-52501] : Run %s script before " + "executing this script.", + 'GAUSS_52502': "[GAUSS-52502] : Another OM process is being executed. " + "To avoid conflicts, this process ends in advance." + } + + ########################################################################### + # expand and shrik + ########################################################################### + GAUSS_526 = { + 'GAUSS_52600': "[GAUSS-52600] : Can not obtain any cluster ring.", + 'GAUSS_52601': "[GAUSS-52601] : Redistribution failed due to" + " user request.", + 'GAUSS_52602': "[GAUSS-52602] : There is no CN in old nodes.", + 'GAUSS_52603': "[GAUSS-52603] : There is no CN on the contraction of " + "the remaining nodes.", + 'GAUSS_52604': "[GAUSS-52604] : Parameter '-r'[%s] can not be " + "more than the numbers of cluster ring[%s].", + 'GAUSS_52605': "[GAUSS-52605] : Can not contract local node(%s).", + 'GAUSS_52606': "[GAUSS-52606] : Contract too many nodes. " + "It should left three nodes to format " + "a cluster at least.", + 'GAUSS_52607': "[GAUSS-52607] : [%s] does not at the " + "end of instance list.", + 'GAUSS_52608': "[GAUSS-52608] : [%s] contains %s instance.", + 'GAUSS_52609': "[GAUSS-52609] : All contracted nodes do not " + "contain database node instance.", + 'GAUSS_52610': "[GAUSS-52610] : The current node group are " + "node group after contraction.", + 'GAUSS_52611': "[GAUSS-52611] : There must be only one record " + "in the current node group.", + 'GAUSS_52612': "[GAUSS-52612] : All dilatation nodes do not contain " + "the database node instance.", + 'GAUSS_52613': "[GAUSS-52613] : Static configuration is not matched " + "on some nodes. Please handle it first.", + 'GAUSS_52614': "[GAUSS-52614] : Timeout. The current " + "cluster status is %s.", + 'GAUSS_52615': "[GAUSS-52615] : Cluster lock unlocked due to timeout.", + 'GAUSS_52616': "[GAUSS-52616] : Can not find a similar " + "instance for [%s %s].", + 'GAUSS_52617': "[GAUSS-52617] : Invalid check type.", + 'GAUSS_52618': "[GAUSS-52618] : Failed to delete etcd from node.", + 'GAUSS_52619': "[GAUSS-52619] : Failed to uninstall application.", + 'GAUSS_52620': "[GAUSS-52620] : Not all nodes found. The following " + "is what we found: %s.", + 'GAUSS_52621': "[GAUSS-52621] : No DNs specified in target " + "create new group.", + 'GAUSS_52622': "[GAUSS-52622] : No new group name specified in " + "target create new group.", + 'GAUSS_52623': "[GAUSS-52623] : Failed to check node group " + "numbers: Node group numbers is [%d].", + 'GAUSS_52624': "[GAUSS-52624] : Failed to check %s node " + "group members: Invaild group name or nodes.", + 'GAUSS_52625': "[GAUSS-52625] : The local instance and peer instance " + "does not both in contracted nodes.", + 'GAUSS_52626': "[GAUSS-52626] : The CN connection on the old " + "nodes are abnormal.", + 'GAUSS_52627': "[GAUSS-52627] : The current cluster is locked.", + 'GAUSS_52628': "[GAUSS-52628] : Static configuration has already " + "been updated on all nodes, expansion has been " + "completed possibly.", + 'GAUSS_52629': "[GAUSS-52629] : Cluster ring(%s) can not obtain " + "less than three nodes.", + 'GAUSS_52630': "[GAUSS-52630] : Failed to set the read-only mode " + "parameter for all database node instances.", + 'GAUSS_52631': "[GAUSS-52631] : Invalid value for GUC parameter " + "comm_max_datanode: %s.", + 'GAUSS_52632': "[GAUSS-52632] : Cluster breakdown or " + "abnormal operation during " + "expanding online, lock process for expansion is lost.", + 'GAUSS_52633': "[GAUSS-52633] : Can not excute redistribution " + "for shrink excuted failed." + + + } + + ########################################################################### + # replace + ########################################################################### + GAUSS_527 = { + 'GAUSS_52700': "[GAUSS-52700] : Failed to update ETCD.", + 'GAUSS_52701': "[GAUSS-52701] : All the CMAgents instances are " + "abnormal. Cannot fix the cluster.", + 'GAUSS_52702': "[GAUSS-52702] : The cluster status is Normal. " + "There is no instance to fix.", + 'GAUSS_52703': "[GAUSS-52703] : The number of normal ETCD must " + "be greater than half.", + 'GAUSS_52704': "[GAUSS-52704] : Failed to check the %s condition.", + 'GAUSS_52705': "[GAUSS-52705] : Failed to obtain ETCD key.", + 'GAUSS_52706': "[GAUSS-52706] : Failed to clean ETCD and touch " + "flag file on %s.", + 'GAUSS_52707': "[GAUSS-52707] : Failed to install on %s.", + 'GAUSS_52708': "[GAUSS-52708] : Failed to configure on %s.", + 'GAUSS_52709': "[GAUSS-52709] : Failed to check the cluster " + "configuration differences:", + 'GAUSS_52710': "[GAUSS-52710] : Replacement failed.", + 'GAUSS_52711': "[GAUSS-52711] : Failed to set CMAgent start mode." + } + + ########################################################################### + # manageCN and changeIP + ########################################################################### + GAUSS_528 = { + 'GAUSS_52800': "[GAUSS-52800] : Cluster is %s(%s) now.", + 'GAUSS_52801': "[GAUSS-52801] : Only allow to %s one CN. The %s " + "is not matched.", + 'GAUSS_52802': "[GAUSS-52802] : Only allow to add one CN at the end.", + 'GAUSS_52803': "[GAUSS-52803] : There is at least one Normal " + "CN after delete CN.", + 'GAUSS_52804': "[GAUSS-52804] : Failed to add the Abnormal CN.", + 'GAUSS_52805': "[GAUSS-52805] : Failed to find another instance as " + "model for instance(%s).", + 'GAUSS_52806': "[GAUSS-52806] : Invalid rollback step: %s.", + 'GAUSS_52807': "[GAUSS-52807] : There is no IP changed.", + 'GAUSS_52808': "[GAUSS-52808] : Detected CN %s, but the action is %s.", + 'GAUSS_52809': "[GAUSS-52809] : Only allow to add or delete one CN.", + 'GAUSS_52810': "[GAUSS-52810] : There is Abnormal coodinator(s) " + "in cluster, please delete it firstly." + } + + ########################################################################### + # upgrade + ########################################################################### + GAUSS_529 = { + 'GAUSS_52900': "[GAUSS-52900] : Failed to upgrade strategy: %s.", + 'GAUSS_52901': "[GAUSS-52901] : New cluster commitid cannot be same " + "with old cluster commitid.", + 'GAUSS_52902': "[GAUSS-52902] : Can not support upgrade from %s to %s", + 'GAUSS_52903': "[GAUSS-52903] : The new cluster version number[%s] " + "should be bigger than the old cluster[%s].", + 'GAUSS_52904': "[GAUSS-52904] : Please choose right upgrade strategy.", + 'GAUSS_52905': "[GAUSS-52905] : Upgrade nodes number cannot " + "be more than %d.", + 'GAUSS_52906': "[GAUSS-52906] : Grey upgrade nodes number cannot " + "be more than cluster nodes.", + 'GAUSS_52907': "[GAUSS-52907] : Failed to cancel the cluster " + "read-only mode", + 'GAUSS_52908': "[GAUSS-52908] : Failed to set cluster read-only mode.", + 'GAUSS_52909': "[GAUSS-52909] : Specified upgrade nodes with " + "same step can do upgrade task.", + 'GAUSS_52910': "[GAUSS-52910] : These nodes %s have been successfully " + "upgraded to new version, no need to upgrade again.", + 'GAUSS_52911': "[GAUSS-52911] : Last unsuccessfully upgrade nodes " + "%s are not same with current upgrade nodes.", + 'GAUSS_52912': "[GAUSS-52912] : All nodes have been upgraded, so " + "cannot use --continue.", + 'GAUSS_52913': "[GAUSS-52913] : All nodes have been upgraded. " + "No need to use --continue.", + 'GAUSS_52914': "[GAUSS-52914] : The record commitid is not same " + "with current commitid.", + 'GAUSS_52915': "[GAUSS-52915] : $GAUSSHOME is not a symbolic link.", + 'GAUSS_52916': "[GAUSS-52916] : Current upgrade status is " + "not pre commit.", + 'GAUSS_52917': "[GAUSS-52917] : Failed to drop old pmk schema.", + 'GAUSS_52918': "[GAUSS-52918] : Failed to record node upgrade step " + "in table %s.%s.", + 'GAUSS_52919': "[GAUSS-52919] : Upgrade has already been committed " + "but not finished commit.", + 'GAUSS_52920': "[GAUSS-52920] : Can not use grey upgrade option " + "--continue before upgrade grey nodes.", + 'GAUSS_52921': "[GAUSS-52921] : Failed to query disk usage " + "with gs_check tool.", + 'GAUSS_52922': "[GAUSS-52922] : Disk usage exceeds %s, " + "please clean up before upgrading.", + 'GAUSS_52923': "[GAUSS-52923] : .", + 'GAUSS_52924': "[GAUSS-52924] : .", + 'GAUSS_52925': "[GAUSS-52925] : Input upgrade type [%s] is not same " + "with record upgrade type [%s].", + 'GAUSS_52926': "[GAUSS-52926] : The step of upgrade should be digit.", + 'GAUSS_52927': "[GAUSS-52927] : ", + 'GAUSS_52928': "[GAUSS-52928] : .", + 'GAUSS_52929': "[GAUSS-52929] : Failed to check application version. " + "Output: \n%s.", + 'GAUSS_52930': "[GAUSS-52930] : .", + 'GAUSS_52931': "[GAUSS-52931] : .", + 'GAUSS_52932': "[GAUSS-52932] : There is no CN in the remaining " + "old nodes.", + 'GAUSS_52933': "[GAUSS-52933] : There is not a majority of %s on the " + "remaining old nodes.", + 'GAUSS_52934': "[GAUSS-52934] : .", + 'GAUSS_52935': "[GAUSS-52935] : Current upgrade version is not same " + "with unfinished upgrade version record.", + 'GAUSS_52936': "[GAUSS-52936] : Upgrade is not finished, " + "cannot do another task.", + 'GAUSS_52937': "[GAUSS-52937] : Clean install directory option is " + "invalid, can only be 'new' or 'old'!", + 'GAUSS_52938': "[GAUSS-52938] : Can not find %s.", + 'GAUSS_52939': "[GAUSS-52939] : Can not get %s.", + 'GAUSS_52940': "[GAUSS-52940] : Invalid node type:%s.", + 'GAUSS_52941': "[GAUSS-52941] : Invalid node role:%s.", + 'GAUSS_52942': "[GAUSS-52942] : No such key to check guc value.", + 'GAUSS_52943': "[GAUSS-52943] : Invalid instance type:%s." + + } + + ########################################################################### + # check + ########################################################################### + GAUSS_530 = { + 'GAUSS_53000': "[GAUSS-53000] : The database user [%s] is not " + "match with the old user [%s].", + 'GAUSS_53001': "[GAUSS-53001] : The result of query table " + "is incorrect: %s.", + 'GAUSS_53002': "[GAUSS-53002] : Failed to obtain SSD device.", + 'GAUSS_53003': "[GAUSS-53003] : The checked item does not meet " + "the standards.", + 'GAUSS_53004': "[GAUSS-53004] : Failed to collect statistics " + "on all nodes.", + 'GAUSS_53005': "[GAUSS-53005] : Unable to obtain SSD disk " + "on current node.", + 'GAUSS_53006': "[GAUSS-53006] : No database node instance uses data " + "directory %s on %s.", + 'GAUSS_53007': "[GAUSS-53007] : Failed to switch %s.", + 'GAUSS_53008': "[GAUSS-53008] : The current node do not install SSD. " + "Can not check SSD performance.", + 'GAUSS_53009': "[GAUSS-53009] : Failed to format cu of directory: %s.", + 'GAUSS_53010': "[GAUSS-53010] : The function name of %s is not exist " + "in the %s.", + 'GAUSS_53011': "[GAUSS-53011] : Failed to check %s.", + 'GAUSS_53012': "[GAUSS-53012] : Failed to insert pmk data to " + "database.", + 'GAUSS_53013': "[GAUSS-53013] : %s can not be empty.", + 'GAUSS_53014': "[GAUSS-53014] : %s must be a nonnegative integer.", + 'GAUSS_53015': "[GAUSS-53015] : The threshold Threshold_NG[%d] " + "must be greater than Threshold_Warning[%d].", + 'GAUSS_53016': "[GAUSS-53016] : The threshold Threshold_NG[%d] and " + "Threshold_Warning[%d] must be integer from 1 to 99.", + 'GAUSS_53017': "[GAUSS-53017] : Unsupported operating system %s.", + 'GAUSS_53018': "[GAUSS-53018] : Failed to get file handler " + "of process %s by use cmd %s.", + 'GAUSS_53019': "[GAUSS-53019] : Failed to delete variable '%s %s'" + " from /etc/sysctl.conf.", + 'GAUSS_53020': "[GAUSS-53020] : Failed to set %s.", + 'GAUSS_53021': "[GAUSS-53021] : %s only can be supported" + " on %s Platform.", + 'GAUSS_53022': "[GAUSS-53022] : Platform %s%s is not supported.", + 'GAUSS_53023': "[GAUSS-53023] : Failed to get CPUcores and MemSize." + " Error: %s", + 'GAUSS_53024': "[GAUSS-53024] : Failed to get ip string for" + " config pg_hba.conf.", + 'GAUSS_53025': "[GAUSS-53025] : content's type must be bytes.", + 'GAUSS_53026': "[GAUSS-53026] : bytes's type must be in (bytes, str).", + 'GAUSS_53027': "[GAUSS-53027] : content's len must >= (iv_len + 16).", + 'GAUSS_53028': "[GAUSS-53028] : Test PMK schema failed. " + "Output: \n%s", + 'GAUSS_53029': "[GAUSS-53029] : Failed to install pmk schema," + "Error: \n%s", + 'GAUSS_53030': "[GAUSS-53030] : The class must have %s", + 'GAUSS_53031': "[GAUSS-53031] : The cluster is None.", + 'GAUSS_53032': "[GAUSS-53032] : The speed limit must " + "be a nonnegative integer.", + 'GAUSS_53033': "[GAUSS-53033] : Invalid User : %s." + + } + + ########################################################################### + # check interface + ########################################################################### + GAUSS_531 = { + 'GAUSS_53100': "[GAUSS-53100] : [%s] is not supported in single " + "cluster.", + 'GAUSS_53101': "[GAUSS-53101] : This interface is not supported " + "in %s cluster.", + 'GAUSS_53102': "[GAUSS-53102] : [%s] is not supported in " + "express cluster.", + 'GAUSS_53103': "[GAUSS-53103] : The single primary multi standby " + "cluster does not support the product" + " version '%s'.", + 'GAUSS_53104': "[GAUSS-53104] : [%s] is not supported in " + "single instance cluster." + } + ########################################################################### + # Single Primary MultiStandby cluster + ########################################################################### + GAUSS_532 = { + 'GAUSS_53200': "[GAUSS-53200] : The number of standbys for each " + "database node instance must be the same. " + "Please set it.", + 'GAUSS_53201': "[GAUSS-53201] : The number of database node standbys " + "and the AZ settings are incorrect. Please set it.", + 'GAUSS_53202': "[GAUSS-53202] : The AZ information is incorrect. " + "Please set it.", + 'GAUSS_53203': "[GAUSS-53203] : The number of ETCD in %s. " + "Please set it.", + 'GAUSS_53204': "[GAUSS-53204] : [%s] is not supported in single " + "primary multistandby cluster.", + 'GAUSS_53205': "[GAUSS-53205] : The priority of %s must be higher " + "than %s. Please set it.", + 'GAUSS_53206': "[GAUSS-53206] : The value of %s must be greater " + "than 0 and less than 11. Please set it." + } + ########################################################################### + # License + ########################################################################### + GAUSS_533 = { + 'GAUSS_53300': "[GAUSS-53300] : The current product version '%s' " + "does not support the license " + "register/unregister operation.", + 'GAUSS_53301': "[GAUSS-53301] : The license control files are not " + "consistent on the cluster.", + 'GAUSS_53302': "[GAUSS-53302] : The current cluster does not apply " + "the license control, please upgrade it" + " first.", + 'GAUSS_53303': "[GAUSS-53303] : The DWS cluster does not support the " + "license register/unregister operation.", + 'GAUSS_53304': "[GAUSS-53304] : Can not register the enabled " + "features.", + 'GAUSS_53305': "[GAUSS-53304] : Can not un-register " + "the disabled features.", + 'GAUSS_53306': "[GAUSS-53306] : Can not register the unsupported " + "features of the product version '%s'.", + 'GAUSS_53307': "[GAUSS-53307] : No need to un-register the " + "unsupported " + "features of the product version '%s'." + } + ########################################################################### + # ROACH + # [GAUSS-53400] : Roach etcd operator failded + ########################################################################### + GAUSS_534 = { + 'GAUSS_53400': "[GAUSS-53400] : Roach ETCD term operate failed.", + 'GAUSS_53401': "[GAUSS-53401] : Roach delete/clean operate failed, " + "Failed to clean %s", + 'GAUSS_53402': "[GAUSS-53402] : Get %s cluster infomation/env " + "failed, %s", + 'GAUSS_53403': "[GAUSS-53403] : Cluster balance check failed", + 'GAUSS_53404': "[GAUSS-53404] : backup key %s does not exist" + } + ########################################################################## + # gs_collector + # [GAUSS-53500] : gs_collector failed + ########################################################################## + GAUSS_535 = { + 'GAUSS_53500': "[GAUSS-53500] : Relation %s does not exist.", + 'GAUSS_53501': "[GAUSS-53501] : Connect to server failed, " + "connection refused", + 'GAUSS_53502': "[GAUSS-53502] : Please check database status", + 'GAUSS_53503': "[GAUSS-53503] : There is no coordinator inst in " + "this host", + 'GAUSS_53504': "[GAUSS-53504] : There is no %s files: " + "please check start-time and end-time.", + 'GAUSS_53505': "[GAUSS-53505] : There is no log files: " + "please check cluster info.", + 'GAUSS_53506': "[GAUSS-53506] : Failed to mkdir.", + 'GAUSS_53507': "[GAUSS-53507] : Failed to execute %s command.", + 'GAUSS_53508': "[GAUSS-53508] : Core pattern is not core-e-p-t.", + 'GAUSS_53509': "[GAUSS-53509] : There is no core files: " + "please check core file name pattern.", + 'GAUSS_53510': "[GAUSS-53510] : Please check db status or " + "database name.", + 'GAUSS_53511': "[GAUSS-53511] : There is no %s process.", + 'GAUSS_53512': "[GAUSS-53512] : Gstack command not found.", + 'GAUSS_53513': "[GAUSS-53513] : Schema '%s' is not in white list.", + 'GAUSS_53514': "[GAUSS-53514] : Relation '%s' does not belong " + "to '%s' schema.", + 'GAUSS_53515': "[GAUSS-53515] : Database content '%s' is invalid, " + "only support 'schema.relation'.", + 'GAUSS_53516': "[GAUSS-53516] : There is no info should be collected ," + "gs_collector is finished. " + } + + GAUSS_536 = { + 'GAUSS_53600': "[GAUSS-53600]: Can not start the database, " + "the cmd is %s, Error:\n%s.", + 'GAUSS_53601': "[GAUSS-53601]: Can not start the primary database, " + "Error:\n%s.", + 'GAUSS_53602': "[GAUSS-53602]: Can not start the standby database, " + "Error:\n%s.", + 'GAUSS_53603': "[GAUSS-53603]: The dataDir can not be empty.", + 'GAUSS_53604': "[GAUSS_53604]: The hostName %s has not %s process.", + 'GAUSS_53605': "[GAUSS_53605]: The %s in hostName %s is running.", + 'GAUSS_53606': "[GAUSS-53606]: Can not stop the database, " + "the cmd is %s, Error:\n%s.", + 'GAUSS_53607': "[GAUSS-53607]: Fail to remove the file %s, " + "Error:\n%s.", + 'GAUSS_53608': "[GAUSS-53608]: Can not start the database, " + "Error:\n%s.", + 'GAUSS_53609': "[GAUSS-53609]: Can not stop the database, " + "Error:\n%s.", + 'GAUSS_53610': "[GAUSS-53610]: The input dataDir(%s) " + "may be incorrect.", + 'GAUSS_53611': "[GAUSS-53611]: Error information is :\n%s", + 'GAUSS_53612': "[GAUSS-53612]: Can not find any catalog in database %s" + } + + ########################################################################## + # gs_expansion + # [GAUSS-537] : gs_expansion failed + ########################################################################## + GAUSS_357 = { + "GAUSS_35700": "[GAUSS-35700] Expansion standby node failed.", + "GAUSS_35701": "[GAUSS-35701] Empty parameter. The %s parameter is" + "missing in the command.", + "GAUSS_35702": "[GAUSS-35702] Unrecognized parameter, standby host " + "backip %s is not in the " + "XML configuration file", + "GAUSS_35703": "[GAUSS-35703] Check standby database Failed. The " + "database on node is abnormal. \n" + "node [%s], user [%s], dataNode [%s]. \n" + "You can use command \"gs_ctl query -D %s\" for more " + "detail.", + "GAUSS_35704": "[GAUSS-35704] %s [%s] does not exist on node [%s].", + "GAUSS_35705": "[GAUSS-35705] Error, the database version is " + "inconsistent in %s: %s", + "GAUSS_35706": "[GAUSS-35706] Fail to %s on all new hosts.", + "GAUSS_35707": "[GAUSS-35707] Fail to check %s version on:\n%s", + "GAUSS_35708": "[GAUSS-35708] Inconsistent %s version with primary on \n%s", + "GAUSS_35709": "[GAUSS-35709] The %s of %s is not %s.", + "GAUSS_35710": "[GAUSS-35710] Generate static file [%s] not found.", + "GAUSS_35711": "[GAUSS-35711] %s in xml is not consistent with that in cluster.", + "GAUSS_35712": "[GAUSS-35712] User [%s] is not in the group [%s]." + } + + ########################################################################## + # gs_dropnode + # [GAUSS-358] : gs_dropnode failed + ########################################################################## + GAUSS_358 = { + "GAUSS_35800": "[GAUSS-35800] Expansion standby node failed.", + "GAUSS_35801": "[GAUSS-35801] Empty parameter. The %s parameter is " + "missing in the command.", + "GAUSS_35802": "[GAUSS-35802] The IP list of target node: %s" + "is not in the current cluster. Please check!", + "GAUSS_35803": "[GAUSS-35803] The IP of local host %s is in the " + "target node list. \n" + "Can not drop local host!\n", + "GAUSS_35804": "[GAUSS-35804] The dropnode operation can only be executed" + " at the primary node. \n ", + "GAUSS_35805": "[GAUSS-35805] Input %s. Operation aborted. ", + "GAUSS_35806": "[GAUSS-35806] Current status of cluster is %s .\n" + "It doesn't meet the requirement! ", + "GAUSS_35807": "[GAUSS-35807] The host %s which still exist in the " + "cluster can't be connected.\n" + "It doesn't meet the requirement!\nPlease add it to the " + "list of hosts to be dropped if it is a target host.", + "GAUSS_35808": "[GAUSS-35808] The %s is running switchover/failover!\n" + "The dropnode operation can only be executed when there is" + " no such operation!", + "GAUSS_35809": "[GAUSS-35809] Some important steps failed to execute. " + "Please refer to log for detail!", + "GAUSS_35810": "[GAUSS-35810] A same process is already running! " + + } + + +class OmError(BaseException): + """ + Used to record OM exception information and support ErrorCode + keywords as message information. + """ + + def __init__(self, _message, *args, **kwargs): + """ + Initialize the OmError instance. + + :param _message: The input error message, it can be the error + message string, or the ErrorCode keywords, + or the Exception instance. + :param args: The additional unnamed parameters that use + to format the error message. + :param kwargs: The additional named parameters that use to format + the error message or extend to other + functions. + + :type _message: str | BaseException + :type args: str | int + :type kwargs: str | int + """ + # If we catch an unhandled exception. + if isinstance(_message, Exception): + # Store the error code. + self._errorCode = "" + # Store the error message. + self._message = self.__getErrorMessage(str(_message), args, kwargs) + # If can not parse the error code. + if not self._errorCode: + # Store the error code. + self._errorCode = "GAUSS_51649" + # Store the error message. + self._message = ErrorCode.GAUSS_516[self._errorCode] % ( + type(_message).__name__, repr(_message)) + else: + # Store the error code. + self._errorCode = "" + # Store the error message. + self._message = self.__getErrorMessage(_message, args, kwargs) + + # Store the stack information. + self._stackInfo = sys.exc_info()[2] + + @property + def message(self): + """ + Getter, get the error message. + + :return: Return the error message. + :rtype: str + """ + return self._message + + @property + def errorCode(self): + """ + Getter, get the error code. + + :return: Return the error code. + :rtype: str + """ + return self._errorCode + + def __getErrorMessage(self, _errorCode, args, kwargs): + """ + Get error information through error code. + + :param _errorCode: Error code. + :param args: Additional parameters. + :param kwargs: Additional parameters. + + :type _errorCode: str + :type args: tuple + :type kwargs: dict | None + + :return: Return the error message. + :rtype: str + """ + # Get base error information through error code. + pattern = r"^[\S\s]*\[(GAUSS-\d+)\][\S\s]+$" + match = re.match(pattern, str(_errorCode)) + if match and len(match.groups()) == 1: + self._errorCode = match.groups()[0] + message = _errorCode + else: + self._errorCode = "GAUSS_51650" + message = ErrorCode.GAUSS_516[self._errorCode] % _errorCode + + # Format parameter which type is "%(param)s". + if kwargs: + for key, value in kwargs.items(): + if value is not None: + message = message.replace("%(" + key + ")s", str(value)) + else: + message = message.replace("%(" + key + ")s", "'None'") + + # Format standard type parameters. + if args: + # Convert tuple to list. + args = list(args) + # Travel the list. + for i, arg in enumerate(args): + if arg is None: + args[i] = "'None'" + else: + args[i] = str(arg) + + # Format the message. + message %= tuple(args) + + return message + + def __str__(self): + """ + Show this instance as a string. + + :return: Return this instance as a string. + :rtype: str + """ + return self.message + + def __repr__(self): + """ + Show this instance as a string. + + :return: Return this instance as a string. + :rtype: str + """ + return self.__str__() diff --git a/script/gspylib/common/GaussLog.py b/script/gspylib/common/GaussLog.py new file mode 100644 index 0000000..bdfecf1 --- /dev/null +++ b/script/gspylib/common/GaussLog.py @@ -0,0 +1,1851 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# Portions Copyright (c) 2007 Agendaless Consulting and Contributors. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : GaussLog.py is utility to handle the log +############################################################################# +import os +import sys +import time +import datetime +import subprocess +import _thread as thread +import re +import logging +import logging.handlers as _handlers +import time +import io +import traceback +import codecs + +sys.path.append(sys.path[0] + "/../../") + +from gspylib.os.gsfile import g_file +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ErrorCode import OmError as _OmError + +# import typing for comment. +try: + from typing import Dict + from typing import List +except ImportError: + Dict = dict + List = list + +# max log file size +# 16M +MAXLOGFILESIZE = 16 * 1024 * 1024 +# The list of local action in preinstall +PREINSTALL_ACTION = ["prepare_path", "check_os_Version", "create_os_user", + "check_os_user", "create_cluster_paths", + "set_os_parameter", "set_finish_flag", "set_warning_env", + "prepare_user_cron_service", "prepare_user_sshd_service", + "set_library", "set_sctp", "set_virtualIp", + "clean_virtualIp", "check_hostname_mapping", + "init_gausslog", "check_envfile", "check_dir_owner", + "set_user_env", "set_tool_env", "gs_preinstall"] + +LOG_DEBUG = 1 +LOG_INFO = 2 +LOG_WARNING = 2.1 +LOG_ERROR = 3 +LOG_FATAL = 4 + +# +# _srcfile is used when walking the stack to check when we've got the first +# caller stack frame. +# +if hasattr(sys, 'frozen'): # support for py2exe + _srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:]) +elif __file__[-4:].lower() in ['.pyc', '.pyo']: + _srcfile = __file__[:-4] + '.py' +else: + _srcfile = __file__ +_srcfile = os.path.normcase(_srcfile) + + +class GaussLog: + """ + Class to handle log file + """ + + def __init__(self, logFile, module="", expectLevel=LOG_DEBUG): + """ + function: Constructor + input : NA + output: NA + """ + self.logFile = "" + self.expectLevel = expectLevel + self.moduleName = module + self.fp = None + self.size = 0 + self.suffix = "" + self.prefix = "" + self.dir = "" + self.pid = os.getpid() + self.step = 0 + self.lock = thread.allocate_lock() + self.tmpFile = None + self.ignoreErr = False + + logFileList = "" + try: + dirName = os.path.dirname(logFile) + # check log path + if (not os.path.exists(dirName)): + try: + topDirPath = DefaultValue.getTopPathNotExist(dirName) + self.tmpFile = '%s/topDirPath.dat' % dirName + if (not os.path.isdir(dirName)): + os.makedirs(dirName, + DefaultValue.KEY_DIRECTORY_PERMISSION) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50208"] % + dirName + " Error:\n%s" % str(e)) + cmd = "echo %s > '%s/topDirPath.dat' 2>/dev/null && chmod " \ + "600 '%s/topDirPath.dat'" % ( + topDirPath, dirName, dirName) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50206"] + % "the top path" + " Error:\n%s." % output + + "The cmd is %s" % cmd) + self.dir = dirName + originalFileName = os.path.basename(logFile) + resList = originalFileName.split(".") + if (len(resList) > 2): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50224"] + + " Error: The file name [%s] can not contain " + "more than one '.'." % logFile) + # check suffix + (self.prefix, self.suffix) = os.path.splitext(originalFileName) + if (self.suffix != ".log"): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50212"] % (logFile, ".log")) + + # get log file list + logFileList = "%s/logFileList_%s.dat" % (self.dir, self.pid) + cmd = "ls %s | grep '^%s-' | grep '%s$' > %s" % ( + self.dir, self.prefix, self.suffix, logFileList) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + with open(logFileList, "r") as fp: + filenameList = [] + while True: + # get real file name + filename = (fp.readline()).strip() + if not filename: + break + existedResList = filename.split(".") + if len(existedResList) > 2: + continue + (existedPrefix, existedSuffix) = \ + os.path.splitext(filename) + if existedSuffix != ".log": + continue + if len(originalFileName) + 18 != len(filename): + continue + timeStamp = filename[-21:-4] + # check log file name + if self.is_valid_date(timeStamp): + pass + else: + continue + filenameList.append(filename) + + if len(filenameList): + fileName = max(filenameList) + self.logFile = self.dir + "/" + fileName.strip() + g_file.createFileInSafeMode(self.logFile) + self.fp = open(self.logFile, "a") + DefaultValue.cleanTmpFile(logFileList) + return + + DefaultValue.cleanTmpFile(logFileList) + # create new log file + self.__openLogFile() + except Exception as ex: + DefaultValue.cleanTmpFile(logFileList) + print(str(ex)) + sys.exit(1) + + def __del__(self): + """ + function: Delete tmp file + input : NA + output: NA + """ + if (self.tmpFile is not None and os.path.isfile(self.tmpFile)): + if self.moduleName not in PREINSTALL_ACTION: + # If the moduleName is local action in preinstall, + # we do not delete the file, preinstall will use it to + # change path owner later. + if os.access(self.tmpFile, os.R_OK | os.W_OK): + os.remove(self.tmpFile) + + def __openLogFile(self): + """ + function: open log file + input : NA + output: NA + """ + try: + # get current time + currentTime = time.strftime("%Y-%m-%d_%H%M%S") + # init log file + self.logFile = self.dir + "/" + self.prefix + "-" + currentTime \ + + self.suffix + # Re-create the log file to add a retry 3 times mechanism, + # in order to call concurrently between multiple processes + retryTimes = 3 + count = 0 + while (True): + (status, output) = self.__createLogFile() + if status == 0: + break + count = count + 1 + time.sleep(1) + if (count > retryTimes): + raise Exception(output) + # open log file + self.fp = open(self.logFile, "a") + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50206"] + % self.logFile + " Error:\n%s" % str(e)) + + def __createLogFile(self): + """ + function: create log file + input : NA + output: (status, output) + """ + try: + if (not os.path.exists(self.logFile)): + os.mknod(self.logFile, DefaultValue.KEY_FILE_PERMISSION) + return (0, "") + except Exception as e: + return (1, str(e)) + + def is_valid_date(self, datastr): + """ + function: Judge if date valid + input : datastr + output: bool + """ + try: + time.strptime(datastr, "%Y-%m-%d_%H%M%S") + return True + except Exception as ex: + return False + + def closeLog(self): + """ + function: Function to close log file + input : NA + output: NA + """ + try: + if (self.fp): + self.fp.flush() + self.fp.close() + self.fp = None + except Exception as ex: + raise Exception(str(ex)) + + # print the flow message to console window and log file + # AddInfo: constant represent step constant, addStep represent step + # plus, None represent no step + def log(self, msg, stepFlag=""): + """ + function:print the flow message to console window and log file + input: msg,stepFlag + control: when stepFlag="", the OM background log does not display + step information. + when stepFlag="addStep", the OM background log step will + add 1. + when stepFlag="constant", the OM background log step + defaults to the current step. + output: NA + """ + if (LOG_INFO >= self.expectLevel): + print(msg) + self.__writeLog("LOG", msg, stepFlag) + + # print the flow message to log file only + def debug(self, msg, stepFlag=""): + """ + function:print the flow message to log file only + input: msg,stepFlag + control: when stepFlag="", the OM background log does not display + step information. + when stepFlag="addStep", the OM background log step will + add 1. + when stepFlag="constant", the OM background log step + defaults to the current step. + output: NA + """ + if (LOG_DEBUG >= self.expectLevel): + self.__writeLog("DEBUG", msg, stepFlag) + + def warn(self, msg, stepFlag=""): + """ + function:print the flow message to log file only + input: msg,stepFlag + control: when stepFlag="", the OM background log does not display + step information. + when stepFlag="addStep", the OM background log step will + add 1. + when stepFlag="constant", the OM background log step + defaults to the current step. + output: NA + """ + if (LOG_WARNING >= self.expectLevel): + print(msg) + self.__writeLog("WARNING", msg, stepFlag) + + # print the error message to console window and log file + def error(self, msg): + """ + function: print the error message to console window and log file + input : msg + output: NA + """ + if (LOG_ERROR >= self.expectLevel): + print(msg) + self.__writeLog("ERROR", msg) + + # print the error message to console window and log file,then exit + def logExit(self, msg): + """ + function: print the error message to console window and log file, + then exit + input : msg + output: NA + """ + if (LOG_FATAL >= self.expectLevel): + print(msg) + try: + self.__writeLog("ERROR", msg) + except Exception as ex: + print(str(ex)) + self.closeLog() + sys.exit(1) + + def Step(self, stepFlag): + """ + function: return Step number info + input: add + output: step number + """ + if (stepFlag == "constant"): + return self.step + else: + self.step = self.step + 1 + return self.step + + def __writeLog(self, level, msg, stepFlag=""): + """ + function: Write log to file + input: level, msg, stepFlag + output: NA + """ + if (self.fp is None): + return + + try: + self.lock.acquire() + # if the log file does not exits, create it + if (not os.path.exists(self.logFile)): + self.__openLogFile() + else: + LogPer = oct(os.stat(self.logFile).st_mode)[-3:] + if (not LogPer == "600"): + os.chmod(self.logFile, DefaultValue.KEY_FILE_PERMISSION) + # check if need switch to an new log file + self.size = os.path.getsize(self.logFile) + if (self.size >= MAXLOGFILESIZE and os.getuid() != 0): + self.closeLog() + self.__openLogFile() + + replace_reg = re.compile(r'-W[ ]*[^ ]*[ ]*') + msg = replace_reg.sub('-W *** ', str(msg)) + + if (msg.find("gs_redis") >= 0): + replace_reg = re.compile(r'-A[ ]*[^ ]*[ ]*') + msg = replace_reg.sub('-A *** ', str(msg)) + + strTime = datetime.datetime.now() + if (stepFlag == ""): + print("[%s][%d][%s][%s]:%s" % ( + strTime, self.pid, self.moduleName, level, msg), + file=self.fp) + else: + stepnum = self.Step(stepFlag) + print("[%s][%d][%s][%s][Step%d]:%s" % ( + strTime, self.pid, self.moduleName, level, stepnum, msg), + file=self.fp) + self.fp.flush() + self.lock.release() + except Exception as ex: + self.lock.release() + if self.ignoreErr: + return + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] + % (("log file %s") % self.logFile) + + " Error:\n%s" % str(ex)) + + @staticmethod + def exitWithError(msg, status=1): + """ + function: Exit with error message + input: msg, status=1 + output: NA + """ + sys.stderr.write("%s\n" % msg) + sys.exit(status) + + @staticmethod + def printMessage(msg): + """ + function: Print the String message + input: msg + output: NA + """ + sys.stdout.write("%s\n" % msg) + + +class FormatColor(object): + """ + Formatting string for displaying colors on the screen. + """ + + def __init__(self): + """ + Initialize the format color class. + """ + pass + + @staticmethod + def withColor(_string, _foregroundColorID, _backgroundColorID=49): + """ + Given foreground/background ANSI color codes, return a string that, + when printed, will format the supplied + string using the supplied colors. + + :param _string: The input string. + :param _foregroundColorID: The foreground color identify number. + :param _backgroundColorID: The background color identify number. + + :type _string: str + :type _foregroundColorID: int + :type _backgroundColorID: int + + :return: Return the string with color. + :rtype: str + """ + return "\x1b[%dm\x1b[%dm%s\x1b[39m\x1b[49m" % ( + _foregroundColorID, _backgroundColorID, _string) + + @staticmethod + def bold(_string): + """ + Returns a string that, when printed, will display the supplied + string in ANSI bold. + + :param _string: The input string. + :type _string: str + + :return: Return the bold string. + :rtype: str + """ + return "\x1b[1m%s\x1b[22m" % _string + + @staticmethod + def default(_string): + """ + Get the string with default color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.withColor(_string, 30) + + @staticmethod + def defaultWithBold(_string): + """ + Get the string with default color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.default(FormatColor.bold(_string)) + + @staticmethod + def red(_string): + """ + Get the string with red color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.withColor(_string, 31) + + @staticmethod + def redWithBold(_string): + """ + Get the string with red color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.red(FormatColor.bold(_string)) + + @staticmethod + def green(_string): + """ + Get the string with green color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.withColor(_string, 32) + + @staticmethod + def greenWithBold(_string): + """ + Get the string with green color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.green(FormatColor.bold(_string)) + + @staticmethod + def yellow(_string): + """ + Get the string with yellow color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.withColor(_string, 33) + + @staticmethod + def yellowWithBold(_string): + """ + Get the string with yellow color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.yellow(FormatColor.bold(_string)) + + @staticmethod + def blue(_string): + """ + Get the string with blue color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.withColor(_string, 34) + + @staticmethod + def blueWithBold(_string): + """ + Get the string with blue color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.blue(FormatColor.bold(_string)) + + @staticmethod + def magenta(_string): + """ + Get the string with magenta color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.withColor(_string, 35) + + @staticmethod + def magentaWithBold(_string): + """ + Get the string with magenta color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.magenta(FormatColor.bold(_string)) + + @staticmethod + def cyan(_string): + """ + Get the string with cyan color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.withColor(_string, 36) + + @staticmethod + def cyanWithBold(_string): + """ + Get the string with cyan color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.cyan(FormatColor.bold(_string)) + + @staticmethod + def white(_string): + """ + Get the string with white color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.withColor(_string, 37) + + @staticmethod + def whiteWithBold(_string): + """ + Get the string with white color. + + :param _string: The input string. + :type _string: str + + :return: Return the string with color. + :rtype: str + """ + return FormatColor.white(FormatColor.bold(_string)) + + @staticmethod + def hasColors(_stream): + """ + Returns boolean indicating whether or not the supplied stream + supports ANSI color. + + :param _stream: The stream instance. + :type _stream: file | io.TextIOWrapper + + :return: Return whether the supplied stream supports ANSI color. + :rtype: bool + """ + if not hasattr(_stream, "isatty") or not _stream.isatty(): + return False + + try: + # noinspection PyUnresolvedReferences + import curses as _curses + except ImportError: + pass + else: + try: + # Set the current terminal type to the value of the + # environment variable "TERM". + _curses.setupterm(None, _stream.fileno()) + + return _curses.tigetnum("colors") > 2 + except _curses.error: + pass + + retCode, output = subprocess.getstatusoutput("tput colors") + if retCode == 0: + try: + return int(output.strip()) > 2 + except (NameError, TypeError, EOFError, SyntaxError): + return False + else: + return False + + @staticmethod + def screenWidth(_steam): + """ + Get the command line interface width. + + :param: The steam instance, such as sys.stdout. + :type: file + + :return: Return the command line interface width. + :rtype: int + """ + try: + # noinspection PyUnresolvedReferences + import curses as _curses + except ImportError: + pass + else: + try: + _curses.setupterm(None, _steam.fileno()) + + return _curses.tigetnum("cols") + except _curses.error: + pass + + # Ignore the standard error, sometimes it will cause the tput + # command to return a wrong value. + retCode, output = subprocess.getstatusoutput("tput cols") + if retCode == 0: + try: + return int(output.strip()) + except (NameError, TypeError, EOFError, SyntaxError): + return -1 + else: + return -1 + + +# +# Progress logger config. +# +# Progress Handler Status. +_PROGRESS_STATUS_START = "STARTING" +_PROGRESS_STATUS_STOP = "STOPPING" +_PROGRESS_STATUS_CHECK = "CHECKING" +_PROGRESS_STATUS_PENDING = "PENDING" +_PROGRESS_STATUS_SUCCESS = "SUCCESS" +_PROGRESS_STATUS_FAILURE = "FAILURE" +_PROGRESS_STATUS_PASSED = "PASSED" +_PROGRESS_STATUS_CANCELED = "CANCELED" +# +# Progress Handler Color. +# +# The color of the status information. +PROGRESS_COLOR_RED = "red" +PROGRESS_COLOR_GREEN = "green" +PROGRESS_COLOR_YELLOW = "yellow" +PROGRESS_COLOR_BLUE = "blue" +PROGRESS_COLOR_MAGENTA = "magenta" +PROGRESS_COLOR_CYAN = "cyan" +PROGRESS_COLOR_WHITE = "white" +PROGRESS_COLOR_DEFAULT = "default" +# The color with bold of the status information. +PROGRESS_COLOR_RED_BOLD = "redWithBold" +PROGRESS_COLOR_GREEN_BOLD = "greenWithBold" +PROGRESS_COLOR_YELLOW_BOLD = "yellowWithBold" +PROGRESS_COLOR_BLUE_BOLD = "blueWithBold" +PROGRESS_COLOR_MAGENTA_BOLD = "magentaWithBold" +PROGRESS_COLOR_CYAN_BOLD = "cyanWithBold" +PROGRESS_COLOR_WHITE_BOLD = "whiteWithBold" +PROGRESS_COLOR_DEFAULT_BOLD = "defaultWithBold" +PROGRESS_COLOR_MAP = { + PROGRESS_COLOR_RED: FormatColor.red, + PROGRESS_COLOR_GREEN: FormatColor.green, + PROGRESS_COLOR_YELLOW: FormatColor.yellow, + PROGRESS_COLOR_BLUE: FormatColor.blue, + PROGRESS_COLOR_MAGENTA: FormatColor.magenta, + PROGRESS_COLOR_CYAN: FormatColor.cyan, + PROGRESS_COLOR_WHITE: FormatColor.white, + PROGRESS_COLOR_DEFAULT: FormatColor.default, + PROGRESS_COLOR_RED_BOLD: FormatColor.redWithBold, + PROGRESS_COLOR_GREEN_BOLD: FormatColor.greenWithBold, + PROGRESS_COLOR_YELLOW_BOLD: FormatColor.yellowWithBold, + PROGRESS_COLOR_BLUE_BOLD: FormatColor.blueWithBold, + PROGRESS_COLOR_MAGENTA_BOLD: FormatColor.magentaWithBold, + PROGRESS_COLOR_CYAN_BOLD: FormatColor.cyanWithBold, + PROGRESS_COLOR_WHITE_BOLD: FormatColor.whiteWithBold, + PROGRESS_COLOR_DEFAULT_BOLD: FormatColor.defaultWithBold +} +# Default std format. +_LOGGER_DEFAULT_STD_FORMAT = "%(message)s" + + +class LogColorFormatter(logging.Formatter, object): + """ + Print colour log information in terminal interface. + + It will only be used in the "StreamHandler" of the logger. + """ + + def __init__(self, *args, **kwargs): + """ + Initialize the formatter class. + + :param args: The additional for default Formatter class. + :param kwargs: The additional for default Formatter class. + + :type _isColorful: bool + :type args: str | None + :type kwargs: str | None + """ + logging.Formatter.__init__(self, *args, **kwargs) + + def format(self, _record): + """ + Format the specified record as text. + + :param _record: The log record instance. + :type _record: _logging.LogRecord + + :return: Return the formatted string. + :rtype: str + """ + try: + _record.message = _record.getMessage() + except Exception as e: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50009"] + + " Exception is %r, format string is %r", + e, _record.__dict__) + + if FormatColor.hasColors(sys.stderr): + if _record.levelname == "DEBUG" or _record.levelno == \ + logging.DEBUG: + _record.levelname = FormatColor.blue(_record.levelname) + elif _record.levelname == "INFO" or _record.levelno == \ + logging.INFO: + _record.levelname = FormatColor.green(_record.levelname) + elif _record.levelname == "WARNING" or _record.levelno == \ + logging.WARN: + _record.levelname = FormatColor.yellow(_record.levelname) + elif _record.levelname == "ERROR" or _record.levelno == \ + logging.ERROR: + _record.levelname = FormatColor.red(_record.levelname) + elif _record.levelname == "CRITICAL" or _record.levelno == \ + logging.CRITICAL: + _record.levelname = FormatColor.redWithBold(_record.levelname) + + return logging.Formatter.format(self, _record) + + +class ProgressHandler(logging.StreamHandler, object): + """ + Print progress handler. + + This class was used to print progress information on the screen. + Status flag can be displayed colorful. + + For example: + In a process, the first step is to display it on the screen. + [STARTING] Starting the cluster. + Next, the transaction in the process is processed. + When processing succeed, refresh information on the same line. + [SUCCESS ] Starting the cluster. + When processing failure, refresh information on the same line. + [FAILURE ] Starting the cluster. + """ + + def __init__(self): + """ + This class was used to print progress information on the screen. + """ + logging.StreamHandler.__init__(self, sys.stdout) + + # The registered status. + self.__status_list = {} # type: Dict[str, str] + # Width of middle brackets. + self.__wide = 0 + # The start flag. + self._isStart = False + # The storage flag. + self._storeMessage = [] + + def registerStatus(self, status, color): + """ + Register the status information and the color of the status + information. + + Repeated addition of status strings does not refresh status string + color information + + :param status: The status information. + :param color: the color of the status information. + + :type status: str + :type color: str + """ + if color in PROGRESS_COLOR_MAP.keys(): + handler = PROGRESS_COLOR_MAP.get(color) + self.__status_list.setdefault(status, handler(status)) + if len(status) > self.__wide: + self.__wide = len(status) + + # noinspection PyBroadException + def emit(self, record): + """ + Emit a record. + + The following progress information can be printed: + Progress information of start/end type. + Start the progress: Logger.start(self, message, status = + None, *args, **kwargs) + End the progress: Logger.stop(self, message, status = + None, *args, **kwargs) + Progress information of current-step/total-step type. + Progress information of percentage type. + + :param record: log record instance. + :type record: logging.LogRecord + """ + try: + # If the current step text does not exists, save it, otherwise + # delete the saved copy. + fs = self.format(record) + # Get the screen width, and clip the string to adapt the screen. + screenWidth = FormatColor.screenWidth(self.stream) + + if hasattr(record, "progress_handler_status_start"): + status = record.progress_handler_status_start + length = self.__wide - len(status) + if status in self.__status_list: + status = self.__status_list[status] + fs = "[%s] %s" % (status.center(len(status) + length), fs) + if 0 < screenWidth <= len(fs): + fs = fs[:screenWidth - 4] + "..." + + self.stream.write(fs) + self._isStart = True + elif hasattr(record, "progress_handler_status_stop"): + status = record.progress_handler_status_stop + length = self.__wide - len(status) + if status in self.__status_list: + status = self.__status_list[status] + fs = "\r[%s] %s\n" % (status.center(len(status) + length), fs) + if 0 < screenWidth <= len(fs): + fs = fs[:screenWidth - 4] + "...\n" + + self.stream.write(fs) + self._isStart = False + elif self._isStart: + # Use the other formatter + formatter = LogColorFormatter(_LOGGER_DEFAULT_STD_FORMAT) + fs = formatter.format(record) + "\n" + # Store the message. + self._storeMessage.append(fs) + else: + # Use the other formatter + formatter = LogColorFormatter(_LOGGER_DEFAULT_STD_FORMAT) + fs = formatter.format(record) + "\n" + + self.stream.write(fs) + + if not self._isStart: + # Flush the stop message. + self.flush() + # Print the warning or error message. + for message in self._storeMessage: + self.stream.write(message) + # Clean the storage message. + del self._storeMessage[:] + + self.flush() + except (KeyboardInterrupt, SystemExit): + raise + except BaseException: + self.handleError(record) + + +class RotatingFileHandler(_handlers.RotatingFileHandler, object): + """ + Handler for logging to a set of files, which switches from one file to + the next when the current file reaches + a certain size. + """ + + def __init__(self, filename, mode='a', maxBytes=0, backupCount=0, + encoding=None, delay=False): + """ + Open the specified file and use it as the stream for logging. + + By default, the file grows indefinitely. You can specify particular + values of maxBytes and backupCount to allow the file to rollover at + a predetermined size. + + Rollover occurs whenever the current log file is nearly maxBytes in + length. If backupCount is >= 1, the system will successively create + new files with the same pathname as the base file, but with extensions + ".1", ".2" etc. appended to it. For example, with a backupCount of 5 + and a base file name of "app.log", you would get "app.log", + "app.log.1", "app.log.2", ... through to "app.log.5". The file being + written to is always "app.log" - when it gets filled up, it is closed + and renamed to "app.log.1", and if files "app.log.1", "app.log.2" etc. + exist, then they are renamed to "app.log.2", "app.log.3" etc. + respectively. + + If maxBytes is zero, rollover never occurs. + + :param filename: The base file name for the logger. + :param mode: The file open mode, default is "a". + :param maxBytes: The max size of the log file. + :param backupCount: The back up count of the log file. + :param encoding: The encoding type of the log file. + :param delay: Whether to delay to open the file. + + :type filename: str + :type mode: str + :type maxBytes: int + :type backupCount: int + :type encoding: str | None + :type delay: bool + """ + # Store the base file name before the parent initialization. + self.baseFilename = filename.strip() if str else filename + # The real log file name is equal with the base log file name. + self._currentFileName = self.baseFilename + + # Check the log file list, get the last log file. + self.__getNewestFile() + + _handlers.RotatingFileHandler.__init__(self, filename, mode, maxBytes, + backupCount, encoding, delay) + + def doRollover(self): + """ + Do a rollover, as described in __init__(). + """ + # Close the output stream. + if self.stream: + self.stream.close() + self.stream = None + + # Change the log file. + self.__getNewestFile() + + # Open an new output stream. + if sys.version_info <= (2, 6) or not self.delay: + self.stream = self._open() + + def _open(self): + """ + Open the file stream. + + :return: Return the file descriptor. + :rtype: file + """ + # Get the log dir. + logDir = os.path.dirname(self.baseFilename) + # Check whether the log file directory exist. + try: + if not os.path.exists(logDir): + # Create the log dir. + os.makedirs(logDir, DefaultValue.KEY_DIRECTORY_PERMISSION) + except OSError: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50208"], logDir) + + # Open the file. + if self.encoding is None: + stream = open(self._currentFileName, self.mode) + else: + stream = codecs.open(self._currentFileName, self.mode, + self.encoding) + + return stream + + def __getNewestFile(self): + """ + Get the newest log file. + + :rtype: None + """ + + def getNewFileName(_filePath): + """ + Get the real file name from the base file name. + + :param _filePath: The input base file path. + :type _filePath: str + + :return: Return the real file name from the base file name. + :rtype: str + """ + # Get tht log directory. + _dirName = os.path.dirname(_filePath) + _fileName = os.path.basename(_filePath) + + # Get the prefix and the suffix. + _prefix, _suffix = os.path.splitext(os.path.basename(_fileName)) + + # Get the new file name. + _newFileName = "%(_prefix)s_%(timeStamp)s%(suffix)s" \ + % {"_prefix": _prefix.lower(), + "timeStamp": time.strftime("%Y-%m-%d_%H%M%S"), + "suffix": _suffix} + return os.path.join(_dirName, _newFileName) + + # Initialize the file log handler. + if self._currentFileName != self.baseFilename: + self._currentFileName = getNewFileName(self.baseFilename) + return + + dirName = os.path.dirname(self.baseFilename) + # If the log file path does not exist, create it, and generate the + # real log file name. + try: + # Check whether the log file directory exist. + if not os.path.exists(dirName): + # Create the log dir. + os.makedirs(dirName, DefaultValue.KEY_DIRECTORY_PERMISSION) + # Save the real log file name. + self._currentFileName = getNewFileName(self.baseFilename) + return + except OSError: + raise _OmError(ErrorCode.GAUSS_502["GAUSS_50208"], dirName) + + # Get the prefix and the suffix. + prefix, suffix = os.path.splitext(os.path.basename(self.baseFilename)) + # The file name list file. + fileList = "%(dirName)s/logFileList_%(pid)s.dat" % {"dirName": dirName, + "pid": os.getpid()} + try: + with open(fileList, "w") as fp: + try: + for filename in os.listdir(dirName): + if re.match(prefix + "-", filename) and \ + re.search(suffix + "$", filename): + fp.write(filename + '\n') + except OSError: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50208"], + dirName) + except Exception as e: + raise Exception("open %s in 'w' mode err." % fileList) + # Get the file list. + with open(fileList, "r") as fp: + lines = [line.strip() for line in fp.readlines() if + line and line.strip()] + + # Remove the file list file. + if os.path.exists(fileList): + os.remove(fileList) + + fileNameList = [] + for fileName in lines: + # Get the matched file. + pattern = r"%(prefix)s-(%(timeStamp)s)%(suffix)s" \ + % {"prefix": prefix, + "timeStamp": "\\d{4}-\\d{2}-\\d{2}_\\d{6}", + "suffix": suffix} + match = re.match(pattern, fileName) + if match: + timeStamp = match.groups()[0] + + # Check whether the time stamp is valid. + # noinspection PyBroadException + try: + time.strptime(timeStamp, "%Y-%m-%d_%H%M%S") + except Exception: + print("Time stamp %s type error." % timeStamp) + continue + else: + continue + + # Add to file list. + fileNameList.append(fileName) + + # If the log directory does not contain the specified log file. + if not fileNameList: + self._currentFileName = getNewFileName(self.baseFilename) + return + + # Get the newest log file. + fileName = os.path.join(dirName, max(fileNameList)) + if os.path.getsize(fileName) >= MAXLOGFILESIZE: + self._currentFileName = getNewFileName(self.baseFilename) + else: + self._currentFileName = fileName + + +class _Logger(logging.Logger, object): + """ + The logger class, expected to replace GaussLog class. + + Inheriting the "object" class for adapting Python 2.6. + """ + + def __init__(self, name, level=logging.NOTSET): + """ + Init the logger class. + + :param name: The logger name. + :param level: The default logger level. + + :type name: str + :type level: int + """ + logging.Logger.__init__(self, name, level) + + def start(self, message, status=None, *args, **kwargs): + """ + Start a new progress. + + Logger does not check whether progress is over, It requires the user + to control the beginning and end of the + progress. + + :param message: Progress messages that need to be recorded. + :param status: Progress status information. + :param args: A list of parameters for formatting into progress + messages. + :param kwargs: Include two parameters: "exc_info" and "extra". + exc_info: Exception information. + extra: Additional parameters will be used to initialize log + record instance. + + :type message: str + :type status: str + :type args: * + :type kwargs: * + """ + kwargs.setdefault("extra", {}) + kwargs["extra"].setdefault("progress_handler", True) + if status is not None and self.hasHandler(ProgressHandler): + kwargs["extra"].setdefault("progress_handler_status_start", status) + + self.info(message, *args, **kwargs) + + def stop(self, message, status=None, *args, **kwargs): + """ + End a progress. + + Logger does not check whether progress is over, It requires the user + to control the beginning and end of the + progress. + + :param message: Progress messages that need to be recorded. + :param status: Progress status information. + :param args: A list of parameters for formatting into progress + messages. + :param kwargs: Include two parameters: "exc_info" and "extra". + exc_info: Exception information. + extra: Additional parameters will be used to initialize log + record instance. + + :type message: str + :type status: str + :type args: * + :type kwargs: * + """ + kwargs.setdefault("extra", {}) + kwargs["extra"].setdefault("progress_handler", False) + if status is not None and self.hasHandler(ProgressHandler): + kwargs["extra"].setdefault("progress_handler_status_stop", status) + + self.info(message, *args, **kwargs) + + def hasHandler(self, handler_type): + """ + Check whether the list contains a handler instance of the specified + type. + + :param handler_type: The type of handler. + :type handler_type: type + + :return: If the list contains a handler instance of specified type. + """ + for handler in self.handlers: + if handler.__class__ == handler_type: + return True + + return False + + def addHandler(self, hdlr): + """ + Add a new log handler. + + StreamHandler and ProgressHandler are conflict. + StreamHandler will be overwritten by ProgressHandler, but not the + reverse. + The instance of progressHandler is unique in a logger instance. + + :param hdlr: log handler instance + :type hdlr: ProgressHandler | _logging.Handler + """ + for i in range(len(self.handlers) - 1, -1, -1): + handler = self.handlers[i] + + # StreamHandler will be overwritten by ProgressHandler. + if handler.__class__ == logging.StreamHandler and hdlr.__class__ \ + == ProgressHandler: + self.removeHandler(handler) + # If ProgressHandler instance is exist, StreamHandler instance + # will not be inserted into the list. + elif handler.__class__ == ProgressHandler and hdlr.__class__ == \ + logging.StreamHandler: + return + + # call the parent function. + logging.Logger.addHandler(self, hdlr) + + def findCaller(self, stack_info=False): + """ + Find the stack frame of the caller so that we can note the source + file name, line number and function name. + + :rtype: (str, int, str) | (str, int, str, str) + """ + f = logging.currentframe() + + # On some versions of IronPython, currentframe() returns None if + # IronPython isn't run with -X:Frames. + if f is not None: + f = f.f_back + + rv = "(unknown file)", 0, "(unknown function)" + while hasattr(f, "f_code"): + co = f.f_code + filename = os.path.normcase(co.co_filename) + # noinspection PyProtectedMember + if filename in [logging._srcfile, _srcfile]: + f = f.f_back + continue + + if sys.version_info[:2] >= (3, 0): + sInfo = None + if stack_info: + sio = io.StringIO() + sio.write('Stack (most recent call last):\n') + traceback.print_stack(f, file=sio) + sInfo = sio.getvalue() + if sInfo[-1] == '\n': + sInfo = sInfo[:-1] + + rv = (co.co_filename, f.f_lineno, co.co_name, sInfo) + else: + rv = (co.co_filename, f.f_lineno, co.co_name) + break + + return rv + + def callHandlers(self, record): + """ + Call the log processing routine to process log information. + + :param record: Log record instance. + :type record: logging.LogRecord + """ + c = self + found = 0 + while c: + for hdlr in c.handlers: + found += 1 + if record.levelno >= hdlr.level and isinstance( + hdlr, ProgressHandler): + hdlr.handle(record) + elif record.levelno >= hdlr.level and not \ + isinstance(hdlr, ProgressHandler) and \ + (not hasattr(record, "progress_handler") or + getattr(record, "progress_handler") is True): + hdlr.handle(record) + if not c.propagate: + c = None + else: + c = c.parent + + if found == 0: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50015"] + % "You have to register at lease one " + "handler for the logger") + + +class Logger(object): + """ + Logger management class. + + The main functions are as follows: + Initialize a unique logger. + addStreamHandler Add a command line interface recording routine. + addProgressHandler Add a progress information printed routine. + addFileHandler Add a file recording routine. + addFileErrorHandler Add a file error-recording routine. + addSyslogHandler Add a syslog recording routine. + getLogger Get the unique logger instance. + setLevel Set global log level. + """ + # Logger management instance. + __instance = None + + # Logger level. + # Debug level. + DEBUG = logging.DEBUG + # Info level. + INFO = logging.INFO + # Warning level. + WARN = logging.WARN + WARNING = logging.WARNING + # Error level. + ERROR = logging.ERROR + # Fatal level. + FATAL = logging.FATAL + CRITICAL = logging.CRITICAL + # log level not set. + NOTSET = logging.NOTSET + # log level list. + # noinspection PyProtectedMember + LOG_LEVEL_LIST = [level for level in logging._nameToLevel.keys() if + isinstance(level, str)] + + # The default logger format. + # %(name)s Name of the logger (logging channel) + # %(levelno)s Numeric logging level for the message (DEBUG, + # INFO, WARNING, ERROR, CRITICAL) + # %(levelname)s Text logging level for the message ("DEBUG", + # "INFO", "WARNING", "ERROR", "CRITICAL") + # %(pathname)s Full pathname of the source file where the + # logging call was issued (if available) + # %(filename)s Filename portion of pathname + # %(module)s Module (name portion of filename) + # %(lineno)d Source line number where the logging call was + # issued (if available) + # %(funcName)s Function name + # %(created)f Time when the LogRecord was created (time.time() + # return value) + # %(asctime)s Textual time when the LogRecord was created + # %(msecs)d Millisecond portion of the creation time + # %(relativeCreated)d Time in milliseconds when the LogRecord was + # created, relative to the time the logging module + # was loaded (typically at application startup time) + # %(thread)d Thread ID (if available) + # %(threadName)s Thread name (if available) + # %(process)d Process ID (if available) + # %(message)s The result of record.getMessage(), computed just + # as the record is emitted + LOGGER_DEFAULT_STD_FORMAT = _LOGGER_DEFAULT_STD_FORMAT + LOGGER_DEFAULT_FORMAT = "%(asctime)s %(module)s [%(levelname)s] %(" \ + "message)s" + LOGGER_DEBUG_FORMAT = "%(asctime)s %(module)s - %(funcName)s - line %(" \ + "lineno)d [%(levelname)s]" \ + " %(message)s" + + def __init__(self): + """ + Create logger management class. + + Initialization is performed only when the logger management instance + is first initialized. + """ + if Logger.__instance is None: + # set the unique logger manager instance. + Logger.__instance = self + + # Set default logger class to support ProgressHandler. + logging.setLoggerClass(_Logger) + + # set the unique logger instance. + self._logger = logging.getLogger(os.path.basename(sys.argv[0])) + + # set the default loglevel to info. + Logger.setLevel(Logger.INFO) + # add a default stream handler. + Logger.addStreamHandler() + + def __new__(cls): + """ + Used to create a single instance class. + + :return: Returns an uninitialized logger manager instance. + If the logger manager instance is exist, return it, then we will + not re-init it. + If the logger manager instance is not exist, return a new object + instance, then we will init it to a logger + manager instance. + :rtype: Logger + """ + if cls.__instance is None: + return object.__new__(cls) + else: + return cls.__instance + + @staticmethod + def addStreamHandler(loglevel=INFO, fmt=None, date_fmt=None): + """ + Add a command line interface recording routine. + + :param loglevel: Log level. + :param fmt: Log formatting type. + :param date_fmt: Date formatting type. + + :type loglevel: int + :type fmt: basestring + :type date_fmt: basestring + """ + if fmt is None: + fmt = Logger.LOGGER_DEFAULT_STD_FORMAT + + # create formatter. + formatter = LogColorFormatter(fmt, date_fmt) + + # create stream handler + stream = logging.StreamHandler() + stream.setLevel(loglevel) + stream.setFormatter(formatter) + + # add stream handler to logger instance. + logger = Logger.getLogger() + if not logger.hasHandler(logging.StreamHandler): + logger.addHandler(stream) + + @staticmethod + def addProgressHandler(): + """ + Add a progress information printed routine. + """ + # create formatter. + formatter = logging.Formatter("%(message)s") + + # create progress handler. Default log level is info. + progress = ProgressHandler() + progress.setFormatter(formatter) + progress.setLevel(Logger.INFO) + + # add progress handler to logger instance. + logger = Logger.getLogger() + # ProgressHandler instance is unique. + if not logger.hasHandler(ProgressHandler): + logger.addHandler(progress) + + @staticmethod + def addFileHandler(filename, logLevel=DEBUG, fmt=LOGGER_DEFAULT_FORMAT, + date_fmt=None, mode='a', + maxBytes=MAXLOGFILESIZE, encoding=None, delay=False): + """ + Add a file recording routine. + + :param filename: The log file path. + :param logLevel: Log level. + :param fmt: Log formatting type. + :param date_fmt: Date formatting type. + :param mode: Open mode of file. + :param maxBytes: The max size of the log file. + :param encoding: Encoding format of file. + :param delay: Whether to delay to open the file. + + :type filename: str + :type logLevel: int | str + :type fmt: str | None + :type date_fmt: str | None + :type mode: str + :type maxBytes: int + :type encoding: str | None + :type delay: bool + """ + if fmt is None: + fmt = Logger.LOGGER_DEFAULT_FORMAT + + # create a formatter. + formatter = logging.Formatter(fmt, date_fmt) + + # create a file handler + file_handler = RotatingFileHandler(filename, mode, maxBytes, + encoding=encoding, delay=delay) + file_handler.setLevel(logLevel) + file_handler.setFormatter(formatter) + + # add file handler to the logger instance. + logger = Logger.getLogger() + logger.addHandler(file_handler) + + @staticmethod + def addFileErrorHandler(filename, fmt=None, date_fmt=None, mode='a', + encoding=None, delay=False): + """ + + Add a file error-recording routine. + + :param filename: File path. + :param fmt: Log formatting type. + :param date_fmt: Date formatting type. + :param mode: Open mode of file. + :param encoding: Encoding format of file. + :param delay: Whether to delay to open the file. + + :type filename: str + :type fmt: str | None + :type date_fmt: str | None + :type mode: str + :type encoding: str | None + :type delay: bool + """ + Logger.addFileHandler(filename, logLevel=Logger.ERROR, fmt=fmt, + date_fmt=date_fmt, mode=mode, + encoding=encoding, delay=delay) + + @staticmethod + def addSyslogHandler(loglevel=DEBUG, address="/dev/log", + facility=_handlers.SysLogHandler.LOG_USER, + sockType=None, fmt=None, date_fmt=None): + """ + Add a syslog recording routine. + + :param loglevel: Log level. + :param address: The target address sent by syslog. + If address is specified as a string, a UNIX socket is used. To + log to a local syslogd, + "SysLogHandler(address="/dev/log")" can be used. + If address is specified as a tuple, a "sockType" socket is used. + To log to target host, + "SysLogHandler(address = (IP or hostname, port))" can be use. + :param facility: Syslog facility names + :param sockType: Socket type. + If address is specified as a string, this parameter is not used. + If address is specified as a tuple, this parameter is default + to "socket.SOCK_DGRAM". + :param fmt: Log formatting type. + :param date_fmt: Date formatting type. + + :type loglevel: int + :type address: str | tuple + :type facility: str + :type sockType: int + :type fmt: str + :type date_fmt: str + """ + if fmt is None: + fmt = Logger.LOGGER_DEFAULT_FORMAT + + # create a formatter. + formatter = logging.Formatter(fmt, date_fmt) + + # create a syslog handler. + if sys.version_info >= (2, 7): + # noinspection PyArgumentList + syslog = _handlers.SysLogHandler(address, facility, sockType) + else: + syslog = _handlers.SysLogHandler(address, facility) + syslog.setLevel(loglevel) + syslog.setFormatter(formatter) + + # add syslog handler to logger instance. + logger = Logger.getLogger() + logger.addHandler(syslog) + + @staticmethod + def getLogger(): + """ + Get the unique logger instance. + + :return: return the logger instance. + :rtype: _Logger + """ + return Logger()._logger + + @staticmethod + def setLevel(loglevel): + """ + Set global log level. + + :param loglevel: Log level. + :type loglevel: int | str + + :return: Return whether the log level is legal. + :rtype: bool + """ + logger = Logger.getLogger() + + # noinspection PyProtectedMember + if isinstance(loglevel, + str) and loglevel.upper() in logging._levelToName: + logger.setLevel(loglevel) + return True + elif isinstance(loglevel, int) and loglevel in logging._levelToName: + logger.setLevel(loglevel) + return True + else: + return False + + @staticmethod + def setFormat(_fmt, _date_fmt=None): + """ + Setting the log format and the date format. + + :param _fmt: Log formatting type. + :param _date_fmt: Date formatting type. + + :type _fmt: str + :type _date_fmt: str | None + """ + logger = Logger.getLogger() + for handler in logger.handlers: + # Process handler can not accept the other type of the print + # format. + # create a formatter. + formatter = logging.Formatter(_fmt, _date_fmt) + # Set the formatter. + handler.setFormatter(formatter) + + @staticmethod + def registerProgressStatus(status, color): + """ + Register the status information and the color of the status + information, which is used to progress handler. + + Repeated addition of status strings does not refresh status string + color information. + + :param status: The status information. + :param color: the color of the status information. + + :type status: str + :type color: str + """ + logger = Logger.getLogger() + + for handler in logger.handlers: + if isinstance(handler, ProgressHandler): + handler.registerStatus(status, color) + + +class ProgressLogger(object): + """ + Progress log printing decorator class. + + Used to simplify progress log printing code. + + This class is callable. + """ + PROGRESS_STATUS_START = _PROGRESS_STATUS_START + PROGRESS_STATUS_STOP = _PROGRESS_STATUS_STOP + PROGRESS_STATUS_CHECK = _PROGRESS_STATUS_CHECK + PROGRESS_STATUS_PENDING = _PROGRESS_STATUS_PENDING + PROGRESS_STATUS_SUCCESS = _PROGRESS_STATUS_SUCCESS + PROGRESS_STATUS_FAILURE = _PROGRESS_STATUS_FAILURE + PROGRESS_STATUS_PASSED = _PROGRESS_STATUS_PASSED + PROGRESS_STATUS_CANCELED = _PROGRESS_STATUS_CANCELED + + def __init__(self, message, status=None): + """ + Init the progress log printing decorator class. + + :param message: The log message. + :param status: The progress status message. + + :type message: str + :type status: List[str] | None + """ + # Store the progress status message. + if status is None or len(status) <= 1: + self._status = [_PROGRESS_STATUS_START, _PROGRESS_STATUS_SUCCESS] + else: + self._status = status + # Store the log message. + self._message = message + + def __call__(self, func): + """ + Call the original function. + + :param func: The original function. + :type func: function + + :return: Return the wrapper function. + :rtype: function + """ + + def wrapper(*args, **kwargs): + """ + Decorator function for wrapping log progress information. + + :param args: The additional parameters of the original function. + :param kwargs: The additional parameters of the original function. + + :type args: * + :type kwargs: * + + :return: Return the result of the original function. + :rtype: * + """ + # Get the logger. + logger = Logger.getLogger() + + # Start the progress. + logger.start(self._message, self._status[0]) + + try: + # Call the function. + ret = func(*args, **kwargs) + except (SystemExit, KeyboardInterrupt): + logger.stop(self._message, _PROGRESS_STATUS_CANCELED) + raise + except BaseException: + logger.stop(self._message, _PROGRESS_STATUS_FAILURE) + raise + + # End the progress. + logger.stop(self._message, self._status[1]) + return ret + + return wrapper + + @staticmethod + def initProgressHandler(): + """ + Create a log progress routine for the logger and register + log progress status type. + """ + # Add a new progress handler. + Logger.addProgressHandler() + + # Register progress status that will be used in this program. + Logger.registerProgressStatus(_PROGRESS_STATUS_START, + PROGRESS_COLOR_BLUE) + Logger.registerProgressStatus(_PROGRESS_STATUS_SUCCESS, + PROGRESS_COLOR_GREEN) + Logger.registerProgressStatus(_PROGRESS_STATUS_FAILURE, + PROGRESS_COLOR_RED) + Logger.registerProgressStatus(_PROGRESS_STATUS_CHECK, + PROGRESS_COLOR_BLUE) + Logger.registerProgressStatus(_PROGRESS_STATUS_PASSED, + PROGRESS_COLOR_GREEN) + Logger.registerProgressStatus(_PROGRESS_STATUS_CANCELED, + PROGRESS_COLOR_YELLOW) diff --git a/script/gspylib/common/GaussStat.py b/script/gspylib/common/GaussStat.py new file mode 100644 index 0000000..7fea3fe --- /dev/null +++ b/script/gspylib/common/GaussStat.py @@ -0,0 +1,3045 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : GaussStat.py is utility for statistics +############################################################################# +import subprocess +import os +import sys +import socket +import glob +import pwd +import datetime +from random import sample +from multiprocessing.dummy import Pool as ThreadPool + +sys.path.append(sys.path[0] + "/../../") +from gspylib.os.gsfile import g_file +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.threads.SshTool import SshTool +import gspylib.common.Sql as Sql + +######################################################################## +# Global variables define +######################################################################## +INSTANCE_TYPE_UNDEFINED = -1 +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 +DUMMY_STANDBY_INSTANCE = 2 +# Limit multithreading to a maximum of 4 +DEFAULT_PARALLEL_NUM = 4 + +ACTION_INSTALL_PMK = "install_pmk" +ACTION_COLLECT_STAT = "collect_stat" +ACTION_DISPLAY_STAT = "display_stat" +ACTION_ASYN_COLLECT = "asyn_collect" +ACTION_COLLECT_SINGLE_DN_INFO = "single_dn_info" + +SQL_FILE_PATH = os.path.realpath(os.path.join(os.path.dirname(__file__), + "../etc/sql")) + +g_recordList = {} +g_sessionCpuList = [] +g_sessionMemList = [] +g_sessionIOList = [] +g_clusterInfo = None +g_DWS_mode = False + + +def isNumber(num): + ''' + function: Judge if the variable is a number + input : num + output: bool + ''' + try: + ### try to convert num to float. if catch error, it means num + # is not a number + float(num) + except Exception as e: + return False + return True + + +def isIp(ip): + ''' + function: Judge if the variable is an ip address + input : ip + output: bool + ''' + try: + ### only support ipv4... + socket.inet_aton(ip) + except ImportError as e: + return False + return True + + +class statItem(): + ''' + Class for stating item + ''' + + def __init__(self, item_value, unit=None): + ''' + function: initialize the parameters + input : item_value, unit + output: NA + ''' + # remove space + item_value = item_value.strip() + # judge if item is number + if (isNumber(item_value)): + self.value = item_value + else: + self.value = None + self.unit = unit + + def __str__(self): + ''' + function: create a string + input : NA + output: value + ''' + if not self.value: + return "" + elif self.unit: + return "%-10s %s" % (self.value, self.unit) + else: + return "%s" % self.value + + +class clusterStatistics(): + ''' + Class for stating cluster message + ''' + + def __init__(self): + ''' + function: Constructor + input : NA + output: NA + ''' + self.cluster_stat_generate_time = None + + ### Host cpu time + self.cluster_host_total_cpu_time = None + self.cluster_host_cpu_busy_time = None + self.cluster_host_cpu_iowait_time = None + self.cluster_host_cpu_busy_time_perc = None + self.cluster_host_cpu_iowait_time_perc = None + + ### MPP cpu time + self.cluster_mppdb_cpu_time_in_busy_time = None + self.cluster_mppdb_cpu_time_in_total_time = None + + ### Shared buffer + self.cluster_share_buffer_read = None + self.cluster_share_buffer_hit = None + self.cluster_share_buffer_hit_ratio = None + + ### In memory sort ratio + self.cluster_in_memory_sort_count = None + self.cluster_disk_sort_count = None + self.cluster_in_memory_sort_ratio = None + + ### IO statistics + self.cluster_io_stat_number_of_files = None + self.cluster_io_stat_physical_reads = None + self.cluster_io_stat_physical_writes = None + self.cluster_io_stat_read_time = None + self.cluster_io_stat_write_time = None + + ### Disk usage + self.cluster_disk_usage_db_size = None + self.cluster_disk_usage_tot_physical_writes = None + self.cluster_disk_usage_avg_physical_write = None + self.cluster_disk_usage_max_physical_write = None + + ### Activity statistics + self.cluster_activity_active_sql_count = None + self.cluster_activity_session_count = None + + +class nodeStatistics(): + ''' + Class for stating node message + ''' + + def __init__(self, nodename): + ''' + function: Constructor + input : nodename + output: NA + ''' + self.nodename = nodename + self.node_mppdb_cpu_busy_time = None + self.node_host_cpu_busy_time = None + self.node_host_cpu_total_time = None + self.node_mppdb_cpu_time_in_busy_time = None + self.node_mppdb_cpu_time_in_total_time = None + self.node_physical_memory = None + self.node_db_memory_usage = None + self.node_shared_buffer_size = None + self.node_shared_buffer_hit_ratio = None + self.node_in_memory_sorts = None + self.node_in_disk_sorts = None + self.node_in_memory_sort_ratio = None + self.node_number_of_files = None + self.node_physical_reads = None + self.node_physical_writes = None + self.node_read_time = None + self.node_write_time = None + + +class sessionStatistics(): + ''' + Class for stating session message + ''' + + def __init__(self, nodename, dbname, username): + ''' + function: Constructor + input : nodename, dbname, username + output: NA + ''' + self.nodename = nodename + self.dbname = dbname + self.username = username + self.session_cpu_time = None + self.session_db_cpu_time = None + self.session_cpu_percent = None + + self.session_buffer_reads = None + self.session_buffer_hit_ratio = None + self.session_in_memory_sorts = None + self.session_in_disk_sorts = None + self.session_in_memory_sorts_ratio = None + self.session_total_memory_size = None + self.session_used_memory_size = None + + self.session_physical_reads = None + self.session_read_time = None + + +class GaussStat(): + ''' + Class for stating Gauss message + ''' + + def __init__(self, install_path="", user_name="", local_port="", + curr_time="", last_time="", snapshot_id="", + flag_num=0, master_host="", logger_fp=None, show_detail=False, + database_name="postgres"): + ''' + function: Constructor + input : install_path, user_name, local_port, curr_time, last_time, + snapshot_id, flag_num, master_host, logger_fp, show_detail, + database_name + output: NA + ''' + ### gsql paramter, must be set + if not install_path or not user_name or not local_port or not \ + logger_fp: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % + "p or -c or -u or -d " + ".") + else: + self.installPath = install_path + self.user = user_name + self.localport = local_port + self.logger = logger_fp + + if (curr_time == ""): + self.currTime = "NULL" + else: + self.currTime = "'%s'" % curr_time + + if (last_time == ""): + self.lastTime = "NULL" + else: + self.lastTime = "'%s'" % last_time + + if (snapshot_id == ""): + self.snapshotId = "NULL" + else: + self.snapshotId = snapshot_id + + self.flagNum = flag_num + self.masterHost = master_host + + ### show detail or not + self.showDetail = show_detail + + ### which database we should connect. + self.database = database_name + + ###initialize statistics + self.cluster_stat = clusterStatistics() + self.node_stat = [] + self.session_cpu_stat = [] + self.session_mem_stat = [] + self.session_io_stat = [] + + # internal parameter + self.__baselineFlag = "gauss_stat_output_time" + # default baseline check flag. + self.__TopNSessions = 10 + + def writeOutput(self, outstr): + ''' + function: write output message + input : outstr + output: NA + ''' + sys.stderr.write(outstr + "\n") + sys.stderr.flush() + + def loadSingleNodeSessionCpuStat(self, connInfo): + ''' + function: load single node(cn or dn) session cpu stat + input : connInfo + output: NA + ''' + self.logger.debug("Loading single node session cpu stat on " + "the node [%s]." % connInfo[0]) + global g_sessionCpuList + try: + nodeName = connInfo[0] + nodePort = connInfo[1] + # when I query from pgxc_node, if I query from a cn node, + # it will return all the logical nodes of the cluster. + # this node is DN + querySql = "SELECT node_name FROM DBE_PERF.node_name;" + + if (g_DWS_mode): + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(nodePort, querySql) + self.logger.debug("Get pgxc_node info from the cluster. " + "\ncommand: %s \nresult: %s." % (querySql, + result)) + + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % err_output) + + if (len(result) == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + pgxcNodeName = result[0][0] + + querySql = "SELECT o_node_name,o_db_name,o_user_name," \ + "o_session_cpu_time,o_mppdb_cpu_time," \ + "o_mppdb_cpu_time_perc \ + FROM pmk.get_session_cpu_stat('%s', 10)" % \ + pgxcNodeName + + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(nodePort, querySql) + self.logger.debug("Load single node session cpu stat. " + "\ncommand: %s \nresult: %s." % (querySql, + result)) + + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % result) + if (len(result) != 0): + lines = [] + for i in iter(result): + line = "|".join(i) + lines.append(line) + g_sessionCpuList.extend(lines) + else: + (status, output) = ClusterCommand.execSQLCommand(querySql, + self.user, '', + nodePort, + "postgres") + self.logger.debug("Get pgxc_node info from the cluster. " + "\ncommand: %s \nresult: %s." % (querySql, + output)) + + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % output) + + if (output == ""): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + pgxcNodeName = output.strip() + + querySql = "SELECT o_node_name,o_db_name,o_user_name," \ + "o_session_cpu_time,o_mppdb_cpu_time," \ + "o_mppdb_cpu_time_perc \ + FROM pmk.get_session_cpu_stat('%s', 10)" % \ + pgxcNodeName + + (status, output) = ClusterCommand.execSQLCommand( + querySql, self.user, '', nodePort, "postgres") + self.logger.debug("Load single node session cpu stat. " + "\ncommand: %s \nresult: %s." % (querySql, + output)) + + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % output) + if (output != ""): + lines = output.split("\n") + g_sessionCpuList.extend(lines) + except Exception as e: + raise Exception(str(e)) + self.logger.debug("Successfully loaded single node session cpu stat " + "on the node [%s]." % connInfo[0]) + + def loadSingleNodeSessionMemoryStat(self, connInfo): + ''' + function: load single node(cn or dn) session memory stat + input : connInfo + output: NA + ''' + self.logger.debug("Loading single node session memory stat on the " + "node [%s]." % connInfo[0]) + global g_sessionMemList + try: + nodeName = connInfo[0] + nodePort = connInfo[1] + # when I query from pgxc_node, if I query from a cn node, + # it will return all the logical nodes of the cluster. + # this node is DN + querySql = "SELECT node_name FROM DBE_PERF.node_name;" + + if (g_DWS_mode): + (status, result, + err_output) = ClusterCommand.excuteSqlOnLocalhost(nodePort, + querySql) + self.logger.debug( + "Get pgxc_node info " + "from the cluster. \ncommand: %s \nresult: %s." % ( + querySql, result)) + + if (g_DWS_mode): + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(nodePort, querySql) + self.logger.debug("Get pgxc_node info from the cluster. " + "\ncommand: %s \nresult: %s." % (querySql, + result)) + + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % err_output) + + if (len(result) == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + pgxcNodeName = result[0][0] + + querySql = "SELECT o_node_name ,o_db_name,o_user_name," \ + "o_session_total_memory_size," \ + "o_session_used_memory_size,\ + o_buffer_hits,o_session_buffer_hit_ratio," \ + "o_sorts_in_memory,o_sorts_in_disk," \ + "o_session_memory_sort_ratio \ + FROM pmk.get_session_memory_stat('%s', 10)" % \ + pgxcNodeName + + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(nodePort, querySql) + self.logger.debug("Load single node session memory stat. " + "\ncommand: %s \nresult: %s." % (querySql, + result)) + + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % err_output) + if (len(result) != 0): + lines = [] + for i in iter(result): + line = "|".join(i) + lines.append(line) + g_sessionMemList.extend(lines) + else: + (status, output) = ClusterCommand.execSQLCommand(querySql, + self.user, '', + nodePort, + "postgres") + self.logger.debug("Get pgxc_node info from the cluster. " + "\ncommand: %s \nresult: %s." % (querySql, + output)) + + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % output) + + if (output == ""): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + pgxcNodeName = output.strip() + + querySql = "SELECT o_node_name ,o_db_name,o_user_name," \ + "o_session_total_memory_size," \ + "o_session_used_memory_size,\ + o_buffer_hits,o_session_buffer_hit_ratio," \ + "o_sorts_in_memory,o_sorts_in_disk," \ + "o_session_memory_sort_ratio \ + FROM pmk.get_session_memory_stat('%s', 10)" % \ + pgxcNodeName + + (status, output) = ClusterCommand.execSQLCommand( + querySql, self.user, '', nodePort, "postgres") + self.logger.debug("Load single node session memory stat. " + "\ncommand: %s \nresult: %s." % (querySql, + output)) + + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % output) + if (output != ""): + lines = output.split("\n") + g_sessionMemList.extend(lines) + except Exception as e: + raise Exception(str(e)) + self.logger.debug("Successfully loaded single node session " + "memory stat on the node [%s]." % connInfo[0]) + + def loadSingleNodeSessionIOStat(self, connInfo): + ''' + function: load single node(cn or dn) session IO stat + input : connInfo + output: NA + ''' + self.logger.debug("Loading single node session IO stat " + "on the node [%s]." % connInfo[0]) + global g_sessionIOList + try: + nodeName = connInfo[0] + nodePort = connInfo[1] + # when I query from pgxc_node, if I query from a cn node, + # it will return all the logical nodes of the cluster. + # this node is DN + querySql = "SELECT node_name FROM DBE_PERF.node_name;" + if g_DWS_mode: + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(nodePort, querySql) + self.logger.debug("Get pgxc_node info from the cluster. " + "\ncommand: %s \nresult: %s." % (querySql, + result)) + + if status != 2: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % err_output) + + if len(result) == 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + pgxcNodeName = result[0][0] + + querySql = "SELECT o_node_name, o_db_name, " \ + "o_user_name, o_disk_reads, o_read_time " \ + "FROM pmk.get_session_io_stat('%s', 10)" \ + % pgxcNodeName + + (status, result, + err_output) = ClusterCommand.excuteSqlOnLocalhost(nodePort, + querySql) + self.logger.debug( + "Load single node session io stat." + " \ncommand: %s \nresult: %s." % ( + querySql, result)) + + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(nodePort, querySql) + self.logger.debug("Load single node session io stat. " + "\ncommand: %s \nresult: %s." % (querySql, + result)) + + if status != 2: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % err_output) + if len(result) != 0: + lines = [] + for i in iter(result): + line = "|".join(i) + lines.append(line) + g_sessionIOList.extend(lines) + else: + (status, output) = ClusterCommand.execSQLCommand( + querySql, self.user, '', nodePort, "postgres") + self.logger.debug("Get pgxc_node info from the cluster. " + "\ncommand: %s \nresult: %s." % (querySql, + output)) + + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % output) + + if output == "": + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + pgxcNodeName = output.strip() + + querySql = "SELECT o_node_name, o_db_name, " \ + "o_user_name, o_disk_reads, o_read_time " \ + "FROM pmk.get_session_io_stat('%s', 10)" \ + % pgxcNodeName + + (status, output) = ClusterCommand.execSQLCommand( + querySql, self.user, '', nodePort, "postgres") + self.logger.debug("Load single node session io stat. " + "\ncommand: %s \nresult: %s." % (querySql, + output)) + + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % output) + if output != "": + lines = output.split("\n") + g_sessionIOList.extend(lines) + except Exception as e: + raise Exception(str(e)) + self.logger.debug("Successfully loaded single node session IO" + " stat on the node [%s]." % connInfo[0]) + + def loadSingleNodeStat(self, connInfo): + ''' + function: load single node(cn or dn) stat + input : NA + output: NA + ''' + self.logger.debug("Loading single node stat on the node [%s]." % + connInfo[0]) + global g_recordList + try: + nodeName = connInfo[0] + nodePort = connInfo[1] + # when I query from pgxc_node, if I query from a cn node, + # it will return all the logical nodes of the cluster. + # this node is DN + querySql = "SELECT node_name FROM DBE_PERF.node_name;" + if (g_DWS_mode): + (status, result, + err_output) = ClusterCommand.excuteSqlOnLocalhost(nodePort, + querySql) + self.logger.debug( + "Get pgxc_node info from cluster." + " \ncommand: %s \nresult: %s." % ( + querySql, result)) + + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % err_output) + + if (len(result) == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + recordList = result[0] + if (len(recordList) != 1): + raise Exception(ErrorCode.GAUSS_517[ + "GAUSS_51700"] + + "The record in recordList is:%s." + % recordList) + if (recordList[0] != ''): + recordList[0] = (recordList[0]).strip() + nodeType = 'D' + + # when I query from pgxc_node on a DB node, the node type is + # 'C'. it's wrong, so I modify here. + # when it is DB node, I specify the node type as 'D' selfly. + # load single node stat + if (dwsFlag): + skipSupperRoles = 'TRUE' + else: + skipSupperRoles = 'FALSE' + + instType = 'D' + + querySql = "SELECT * " \ + "FROM pmk.load_node_stat(%s, %s, %s, '%s'" \ + ", '%s', %s)" % \ + (self.currTime, self.lastTime, self.snapshotId, + recordList[0], instType, skipSupperRoles) + (status, result, + err_output) = ClusterCommand.excuteSqlOnLocalhost(nodePort, + querySql) + self.logger.debug( + "Load single node stat. \ncommand: %s \nresult: %s." % ( + querySql, result)) + + querySql = "SELECT * FROM pmk.load_node_stat(%s, %s, %s, " \ + "'%s', '%s', %s)" % \ + (self.currTime, self.lastTime, self.snapshotId, + recordList[0], instType, skipSupperRoles) + (status, result, + err_output) = ClusterCommand.excuteSqlOnLocalhost(nodePort, + querySql) + self.logger.debug( + "Load single node stat. \ncommand: %s \nresult: %s." % ( + querySql, result)) + + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % err_output) + + if (len(result) == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + output = "" + if (len(result) != 0): + for i in iter(result): + line = "|".join(i) + output += line + "\n" + g_recordList[recordList[0]] = output + else: + (status, output) = ClusterCommand.execSQLCommand(querySql, + self.user, '', + nodePort, + "postgres") + self.logger.debug( + "Get node info from cluster." + " \ncommand: %s \nresult: %s." % ( + querySql, output)) + if (status != 0): + raise Exception(ErrorCode.GAUSS_513[ + "GAUSS_51300"] % querySql + + " Error:\n%s" % output) + if (output == ""): + raise Exception(ErrorCode.GAUSS_513[ + "GAUSS_51300"] % querySql + + " Return record is null") + recordList = output.split('|') + if (len(recordList) != 1): + raise Exception(ErrorCode.GAUSS_517[ + "GAUSS_51700"] + + "The record in recordList is:%s." + % recordList) + if (recordList[0] != ''): + recordList[0] = (recordList[0]).strip() + nodeType = 'D' + + # when I query from pgxc_node on a DB node, the node type is + # 'C'. it's wrong, so I modify here. + # when it is DB node, I specify the node type as 'D' selfly. + # load single node stat + if (dwsFlag): + skipSupperRoles = 'TRUE' + else: + skipSupperRoles = 'FALSE' + + instType = 'D' + + querySql = \ + "SELECT * FROM pmk.load_node_stat(%s, " \ + "%s, %s, '%s', '%s', %s)" % \ + (self.currTime, self.lastTime, self.snapshotId, + recordList[0], instType, skipSupperRoles) + (status, output) = ClusterCommand.execSQLCommand(querySql, + self.user, '', + nodePort, + "postgres") + self.logger.debug( + "Load single node stat. \ncommand: %s \nresult: %s." % ( + querySql, output)) + + querySql = "SELECT * FROM pmk.load_node_stat(%s, %s, %s, " \ + "'%s', '%s', %s)" % \ + (self.currTime, self.lastTime, self.snapshotId, + recordList[0], instType, skipSupperRoles) + (status, output) = ClusterCommand.execSQLCommand(querySql, + self.user, '', + nodePort, + "postgres") + self.logger.debug( + "Load single node stat. \ncommand: %s \nresult: %s." % ( + querySql, output)) + + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % output) + + if (output == ""): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Return record is null") + + g_recordList[recordList[0]] = output + except Exception as e: + raise Exception(str(e)) + self.logger.debug("Successfully loaded single node stat on the " + "node [%s]." % connInfo[0]) + + def checkRoleOfDnInst(self, dnDataDir): + ''' + function: check role of DB Inst + input : dnDataDir + output: NA + ''' + self.logger.debug("Checking role of database node instance.") + try: + if (not os.path.exists(os.path.join(dnDataDir, "postmaster.pid"))): + return False + + checkCmd = "gs_ctl query -D %s | grep 'HA state' -A 1 | grep " \ + "'local_role'" % dnDataDir + (status, output) = DefaultValue.retryGetstatusoutput(checkCmd) + if (status != 0): + cmd = "gs_ctl query -D %s" % dnDataDir + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 and output.find("could not connect to " + "the local server") > 0): + return False + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + checkCmd + " Error:\n%s." % output + + "The cmd is %s" % cmd) + + roleStatus = ((output.split(':'))[1]).strip() + if (roleStatus == "Primary"): + return True + else: + return False + except Exception as e: + raise Exception(str(e)) + + def getPGXCNode(self): + ''' + function: get pgxc node of the cluster,contains CN and master DNs + input : NA + output: pgxcNodeList + ''' + self.logger.debug("Getting pgxc node of the cluster.") + pgxcNodeList = [] + nodeItem = [] + nodeName = "" + nodePort = 0 + try: + # get node info + nodeInfo = g_clusterInfo.getDbNodeByName( + DefaultValue.GetHostIpOrName()) + for dnInst in nodeInfo.datanodes: + if (dnInst.instanceType != DUMMY_STANDBY_INSTANCE): + if self.checkRoleOfDnInst(dnInst.datadir) or len( + nodeInfo.datanodes) == 1: + nodeName = "" + nodePort = "%s" % dnInst.port + nodeItem = [] + nodeItem.append(nodeName) + nodeItem.append(nodePort) + pgxcNodeList.append(nodeItem) + return pgxcNodeList + except Exception as e: + raise Exception(str(e)) + + def getPerfCheckPsqlCommand(self, dbname): + """ + """ + cmd = ClusterCommand.getSQLCommand(self.localport, dbname, + os.path.join(self.installPath, + "bin/gsql")) + return cmd + + ### NOTICE: itemCounts must be more than two. so that we can distinguish + # records and (%d rows) + def execQueryCommand(self, sql, itemCounts, collectNum, baselineflag=""): + ''' + function: execute the query command + input : sql, itemCounts, collectNum, baselineflag + output: NA + ''' + if (baselineflag == ""): + baselineflag = self.__baselineFlag + + # save sql statement to file to reduce quot nesting + sqlFile = os.path.join(DefaultValue.getTmpDirFromEnv(), + "checkperf_query_%s_%s.sql" % (os.getpid(), + collectNum)) + if (dwsFlag): + sql = "set cgroup_name='Rush';" + sql + + cmd = "echo \"%s\" > '%s' && chown %s '%s'" % (sql, sqlFile, self.user, + sqlFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + "SQL statement to file" + "\nCommand:\n " + "%s\nError:\n %s" % ( + cmd, output)) + + try: + sql_cmd = self.getPerfCheckPsqlCommand(self.database) + if (os.getuid() == 0): + cmd = "su - %s -c \'%s -f %s -X " \ + "--variable=ON_ERROR_STOP=on\' " \ + "2>/dev/null" % (self.user, sql_cmd, sqlFile) + else: + cmd = "%s -f %s -X --variable=ON_ERROR_STOP=on 2>/dev/null" % \ + (sql_cmd, sqlFile) + self.logger.debug("Execute command: %s" % (cmd)) + + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or ClusterCommand.findErrorInSqlFile(sqlFile, + output): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % cmd + + " Error: \n%s" % output) + + DefaultValue.cleanTmpFile(sqlFile) + + baseline = self.checkExpectedOutput(output, baselineflag, False) + + if (baseline == -1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + ("Cannot fetch query baseline. Error: \n%s" % + (output))) + + if (self.checkExpectedOutput(output, "(0 rows)", True, + baseline) != -1): + ### can not support now + return None + + lines = output.split("\n") + linesCount = len(lines) + + ### result must more than 4 lines + if (linesCount <= baseline + 4): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + "Unexpected lines" + " Error: \n%s" % ( + output)) + + records = [] + for ino in range(baseline + 2, linesCount): + line = lines[ino] + record = line.split("|") + if (len(record) != itemCounts): + break + records.append(record) + + self.logger.debug("Query command succeeded.") + self.logger.debug("Query results: \n%s." % str(records)) + return records + except Exception as e: + ### execute query command failed. log and raise + self.logger.debug("Failed to execute the command of query [%s] " + "on local host." % sql) + DefaultValue.cleanTmpFile(sqlFile) + raise Exception(str(e)) + + ## check if the expected line existed in output. + def checkExpectedOutput(self, output, expect, strict=True, starter=0): + ''' + function: check expected output + input : output, expect, strict, starter + output: NA + ''' + lines = output.split("\n") + expect = expect.strip() + if (starter < len(lines)): + for i in range(starter, len(lines)): + line = lines[i] + if (strict): + if (expect == line.strip()): + return i - starter + else: + if (line.strip().find(expect) != -1): + return i - starter + return -1 + + def CheckInstanceMode(self): + ''' + function: test coordinator and datanode mode + input : NA + output: NA + ''' + try: + self.logger.debug("Checking the coordinator and datanode mode .") + + ### test coordinator and coordinator mode... + cmd = "ps ux|awk '{if($11 == \"%s\")print $0}'" % os.path.join( + self.installPath, "bin/gaussdb") + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output.strip() == ""): + self.logger.debug("Failed to test the CN and CN's mode " + "with the user name. Error: \n%s." % (output) + + "The cmd is %s" % cmd) + raise Exception(ErrorCode.GAUSS_516["GAUSS_51605"] % "CN" + + " Please check the cluster status.") + + self.logger.debug("Test CN output:\n%s" % output) + if (self.checkExpectedOutput(output, "--restoremode", + False) != -1): + self.logger.debug("CN is running in restore mode.") + raise Exception(ErrorCode.GAUSS_512["GAUSS_51212"] % + "running CN instance in normal mode") + elif (self.checkExpectedOutput(output, "--coordinator", + False) != -1): + self.logger.debug("CN is running in normal mode.") + else: + self.logger.debug("There is no running CN instance " + "on this node.") + raise Exception(ErrorCode.GAUSS_512["GAUSS_51212"] % + "running CN instance on this node") + + self.logger.debug("Successfully checked the coordinator and " + "datanode mode .") + except Exception as e: + ### execute query command failed. log and raise + self.logger.debug("Failed to check coordinator and datanode mode.") + raise Exception(str(e)) + + def installPMKInDWSMode(self): + ''' + function: install PMK shcema in DWS mode + input : NA + output: NA + ''' + try: + ### test pmk schema exist or not. + pmk_cmd = "select oid from pg_namespace where nspname='pmk'" + class_cmd = "select count(1) from pg_class where " \ + "relnamespace=(select oid from pg_namespace where " \ + "nspname='pmk')" + proc_cmd = "select count(1) from pg_proc where " \ + "pronamespace=(select oid from pg_namespace where " \ + "nspname='pmk')" + (pmk_status, pmkResult, pmk_error) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, pmk_cmd) + (class_status, classResult, class_error) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, class_cmd) + (proc_status, procResult, proc_error) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, proc_cmd) + self.logger.debug("Test PMK schema. Output: \n%s %s %s." % + (pmkResult, classResult, procResult)) + + tablespace = DefaultValue.getEnv("ELK_SYSTEM_TABLESPACE") + if (pmk_status != 2): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53028"] + % pmk_error) + if (class_status != 2): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53028"] + % class_error) + if proc_status != 2: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53028"] + % proc_error) + + if (len(pmkResult) == 0): + ### schema not exist, so we create it. + self.logger.debug("PMK schema does not exist. " + "Install it for the first time.") + elif ((str(classResult[0][0]).strip() == "13" and + str(procResult[0][0]).strip() == "29")): + ### schema already created. + self.logger.debug("PMK schema is already exist.") + return + elif ((str(classResult[0][0]).strip() == "5" and + str(procResult[0][0]).strip() == "29")): + ### schema already created. + self.logger.debug("PMK schema is already exist.") + return + else: + ### maybe class count or proc count not the same. + self.logger.debug("PMK schema is incomplete. Try to " + "execute \"drop schema pmk cascade;\".") + drop_cmd = "drop schema pmk cascade" + (drop_status, drop_result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, + drop_cmd) + if err_output != "": + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + drop_cmd + " Error: \n%s" % err_output) + else: + self.logger.debug("Successfully dropped schema PMK.") + + ### add pmk schema to database. + err_output = "" + + if tablespace is not None and tablespace != "": + for i in iter(Sql.PMK_NEW): + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, i) + if err_output != "": + self.logger.debug("Failed to install pmk schema," + "Error: \n%s" % err_output) + break + else: + for i in iter(Sql.PMK_ORIGINAL): + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, i) + if err_output != "": + self.logger.debug("Failed to install pmk schema," + "Error: \n%s" % err_output) + break + + # Determine the execution result of the pmk installation + if err_output != "": + dropSchemaCmd = "drop schema if exists pmk cascade" + (status, result, err1_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, + dropSchemaCmd) + if err1_output != "": + self.logger.debug("Failed to drop schema PMK. " + "Error: \n%s" % err1_output) + raise Exception(ErrorCode.GAUSS_530["GAUSS_53029"] + % err_output) + except Exception as e: + raise Exception(str(e)) + + def installPMKInNonDWSMode(self): + ''' + function: install PMK shcema in non DWS mode + input : NA + output: NA + ''' + try: + test_data_node_file = os.path.join(SQL_FILE_PATH, + "test_data_node.sql") + test_pmk_file = os.path.join(SQL_FILE_PATH, "test_pmk.sql") + gsql_path = os.path.join(self.installPath, "bin/gsql") + tablespace = DefaultValue.getEnv("ELK_SYSTEM_TABLESPACE") + pmk_schema_num1 = "pmk schema exist. class count is 13, " \ + "proc count is 29" + pmk_schema_num2 = "pmk schema exist. class count is 5, " \ + "proc count is 29" + + if (not g_clusterInfo.isSingleInstCluster()): + if (os.getuid() == 0): + cmd = "su - %s -c \'%s -U %s -p %s -d %s -X " \ + "--variable=ON_ERROR_STOP=on -f " % \ + (self.user, gsql_path, self.user, + str(self.localport), self.database) + cmd += "%s" % test_data_node_file + cmd += "\'" + else: + cmd = "%s -U %s -p %s -d %s -X " \ + "--variable=ON_ERROR_STOP=on -f " % \ + (gsql_path, self.user, str(self.localport), + self.database) + cmd += "%s" % test_data_node_file + + (status, output) = subprocess.getstatusoutput(cmd) + self.logger.debug("Command for testing node: %s" % cmd) + self.logger.debug("Output for testing node: %s" % output) + if (status != 0 or ClusterCommand.findErrorInSqlFile( + test_data_node_file, output)): + self.logger.debug( + "Failed to query dataNode. Error: \n%s" % output) + raise Exception(ErrorCode.GAUSS_513[ + "GAUSS_51300"] % cmd + + " Error: \n%s" % output) + + lines = output.split("\n") + if (len(lines) < 4 or self.checkExpectedOutput( + output, "(0 rows)") >= 2): + self.logger.debug("No database node is " + "configured in cluster.") + raise Exception(ErrorCode.GAUSS_512["GAUSS_51212"] % + "configured database node in cluster") + + ### test pmk schema exist or not. + if (os.getuid() == 0): + cmd = "su - %s -c \'%s -U %s -p %s -d %s -X -f " % \ + (self.user, gsql_path, self.user, str(self.localport), + self.database) + cmd += "%s" % (test_pmk_file) + cmd += "\'" + else: + cmd = "%s -U %s -p %s -d %s -X -f " % \ + (gsql_path, self.user, str(self.localport), + self.database) + cmd += "%s" % (test_pmk_file) + + (status, output) = subprocess.getstatusoutput(cmd) + self.logger.debug("Test PMK schema. Output: \n%s." % (output)) + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % cmd + + " Error: \n%s" % output) + + if (self.checkExpectedOutput(output, "ERROR: query returned no " + "rows", False) != -1): + ### schema not exist, so we create it. + self.logger.debug("PMK schema does not exist. Install it for" + " the first time.") + + elif (self.checkExpectedOutput(output, pmk_schema_num1, False) != + -1 and not tablespace): + ### schema already created. + self.logger.debug("PMK schema is already exist.") + return + elif (self.checkExpectedOutput(output, pmk_schema_num2, False) != + -1 and tablespace): + ### schema already created. + self.logger.debug("PMK schema is already exist.") + return + else: + ### maybe class count or proc count not the same. + self.logger.debug("PMK schema is incomplete. Try to " + "execute \"drop schema pmk cascade;\".") + if (os.getuid() == 0): + cmd = "su - %s -c 'gsql -d %s -p %s -X -c \"drop " \ + "schema pmk cascade;\"'" % \ + (self.user, self.database, str(self.localport)) + else: + cmd = "gsql -d %s -p %s -X -c \"drop schema " \ + "pmk cascade;\"" % \ + (self.database, str(self.localport)) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or ClusterCommand.findErrorInSql( + output) == True): + self.logger.debug("Failed to drop schema PMK. " + "Error: \n%s" % output) + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + cmd + " Error: \n%s" % output) + else: + self.logger.debug("Successfully dropped schema PMK: %s." % + output) + + if (g_clusterInfo.isSingleInstCluster()): + pmkSqlFile = (os.path.join( + self.installPath, + "share/postgresql/pmk_schema_single_inst.sql")) + else: + pmkSqlFile = (os.path.join( + self.installPath, "share/postgresql/pmk_schema.sql")) + pmkSqlFile_back = (os.path.join( + self.installPath, "share/postgresql/pmk_schema_bak.sql")) + cmd = "cp '%s' '%s'" % (pmkSqlFile, pmkSqlFile_back) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + if (tablespace is not None and tablespace != ""): + cmd = "sed -i \"s/START TRANSACTION;//g\" %s && " % \ + pmkSqlFile_back + cmd += "sed -i \"s/COMMIT;//g\" %s && " % pmkSqlFile_back + cmd += "sed -i \"/PRIMARY KEY/d\" %s && " % pmkSqlFile_back + cmd += "sed -i \"/CREATE INDEX/d\" %s && " % pmkSqlFile_back + cmd += "sed -i \"1i\\SET default_tablespace = %s;\" %s" % \ + (tablespace, pmkSqlFile_back) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + DefaultValue.cleanTmpFile(pmkSqlFile_back) + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + else: + if (not g_clusterInfo.isSingleInstCluster()): + self.logger.debug("Set installation groupt to " + "default_storage_nodegroup in this " + "session.") + sql_nodegroup = "SELECT group_name FROM " \ + "pg_catalog.pgxc_group WHERE " \ + "is_installation='t';" + if (os.getuid() == 0): + cmd = "su - %s -c 'gsql -d %s -p %s -x -A -c " \ + "\"%s\"'" % \ + (self.user, self.database, str(self.localport), + sql_nodegroup) + else: + cmd = "gsql -d %s -p %s -x -A -c \"%s\"" % \ + (self.database, str(self.localport), + sql_nodegroup) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or ClusterCommand.findErrorInSql(output)): + self.logger.debug("Failed to get installation groupt. " + "Error: \n%s" % output) + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + cmd + " Error: \n%s" % output) + + installation_groupt = output.split('|')[-1] + cmd = "sed -i \"1i\\SET default_storage_nodegroup = " \ + "%s;\" %s" % \ + (installation_groupt, pmkSqlFile_back) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + DefaultValue.cleanTmpFile(pmkSqlFile_back) + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd + " Error: \n%s" % output) + self.logger.debug("Successfully set " + "default_storage_nodegroup is %s in " + "this session." % \ + installation_groupt) + + ### add pmk schema to database. + if (os.getuid() == 0): + cmd = "su - %s -c \'%s -U %s -p %s -d %s -X " \ + "--variable=ON_ERROR_STOP=on -f \"" % \ + (self.user, gsql_path, self.user, str(self.localport), + self.database) + cmd += "%s" % pmkSqlFile_back + cmd += "\"\'" + else: + cmd = "%s -U %s -p %s -d %s -X " \ + "--variable=ON_ERROR_STOP=on -f \"" % \ + (gsql_path, self.user, str(self.localport), + self.database) + cmd += "%s" % pmkSqlFile_back + cmd += "\"" + + (status, output) = subprocess.getstatusoutput(cmd) + self.logger.debug("Create pmk output:%s" % output) + # Determine the execution result of the pmk installation + if (status != 0 or ClusterCommand.findErrorInSqlFile( + pmkSqlFile_back, output)): + # Determine whether the current user is the root user + if (os.getuid() == 0): + # Link under the root user command + dropSchemaCmd = "su - %s -c '%s -d %s -p %s -X -c " \ + "\"drop schema if exists pmk " \ + "cascade;\"' " % \ + (self.user, gsql_path, self.database, + str(self.localport)) + else: + # Link under the cluster user command + dropSchemaCmd = "%s -d %s -p %s -X -c \"drop schema if " \ + "exists pmk cascade;\"" % \ + (gsql_path, self.database, + str(self.localport)) + (status, output1) = subprocess.getstatusoutput(dropSchemaCmd) + # Judge the results of the fallback installation pmk + if (status != 0 or ClusterCommand.findErrorInSql(output1)): + self.logger.debug("Failed to drop schema PMK. Error: " + "\n%s" % output1) + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % cmd + + " Error: \n%s" % output) + except Exception as e: + ### execute query command failed. log and raise + self.logger.debug("Failed to check coordinator and datanode mode.") + raise Exception(str(e)) + + def installPMKSchema(self): + ''' + function: install PMK shcema + input : NA + output: NA + ''' + try: + self.logger.debug("Installing PMK schema.") + + if (not g_clusterInfo.isSingleInstCluster()): + # test DB mode + self.CheckInstanceMode() + + if (g_DWS_mode): + self.installPMKInDWSMode() + else: + self.installPMKInNonDWSMode() + ### create schema success. + self.logger.debug("Successfully installed PMK schema.") + except Exception as e: + raise Exception(str(e)) + + def collectStat(self, act): + ''' + function: collect performance statistics + input : act + output: NA + ''' + try: + self.logger.debug("Collecting each performance statistics [%s]." % + act) + if (act == "ClusterHostCpuStat"): + # collect cluster host CPU statistics + self.collectClusterHostCpuStat() + elif (act == "ClusterMPPDBCpuStat"): + # collect MPPDB CPU statistics + self.collectClusterMPPDBCpuStat() + elif (act == "ShareBufferStat"): + # collect share buffer statistics + self.collectShareBufferStat() + elif (act == "ClusterSortStat"): + # collect sort statistics + self.collectClusterSortStat() + elif (act == "ClusterIOStat"): + # collect IO statistics + self.collectClusterIOStat() + elif (act == "ClusterDiskStat"): + # collect disk usage statistics + self.collectClusterDiskStat() + elif (act == "ClusterActiveSqlCount"): + # collect active SQL statistics + self.collectClusterActiveSqlCount() + elif (act == "ClusterSessionCount"): + # collect session count statistics + self.collectClusterSessionCount() + elif (act == "NodeCpuStat"): + # collect node CPU statistics + self.collectNodeCpuStat() + elif (act == "NodeMemoryStat"): + # collect node memory statistics + self.collectNodeMemoryStat() + elif (act == "NodeIOStat"): + # collect node IO statistics + self.collectNodeIOStat() + elif (act == "SessionCpuStat"): + # collect session CPU statistics + self.collectSessionCpuStat() + elif (act == "SessionMemoryStat"): + # collect session memory statistics + self.collectSessionMemoryStat() + elif (act == "SessionIOStat"): + # collect session IO statistics + self.collectSessionIOStat() + self.logger.debug("Successfully collected each performance " + "statistics [%s]." % act) + except Exception as e: + raise Exception(str(e)) + + def collectPGXCNodeStat(self, pgxcNodeList): + ''' + function: collect PGXC node performance statistics + input : pgxcNodeList + output: NA + ''' + recordTempFile = "" + try: + self.logger.debug("Collecting PGXC node performance statistics.") + if (len(pgxcNodeList) != 0): + # load pgxc node statistics parallel + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(self.loadSingleNodeStat, pgxcNodeList) + pool.close() + pool.join() + for record in g_recordList.keys(): + self.logger.debug("%s: %s\n" % (record, + g_recordList[record])) + else: + return + + # create a temp file for records write + strCmd = "" + recordTempFile = os.path.join(DefaultValue.getTmpDirFromEnv(), + "recordTempFile_%d_%s" % \ + (self.flagNum, + DefaultValue.GetHostIpOrName())) + + # clean the temp file first + DefaultValue.cleanTmpFile(recordTempFile) + # write records into the temp file + for record in g_recordList.keys(): + strCmd += "%s::::%s\n" % (record, g_recordList[record]) + + g_file.createFileInSafeMode(recordTempFile) + with open(recordTempFile, 'w') as fp: + fp.writelines(strCmd) + + if self.masterHost != DefaultValue.GetHostIpOrName(): + # scp record Temp File to tmpDir + scpCmd = " pscp -H %s '%s' '%s/'" % ( + self.masterHost, recordTempFile, + DefaultValue.getTmpDirFromEnv()) + + self.logger.debug("Execute command: %s" % scpCmd) + (status, output) = subprocess.getstatusoutput(scpCmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + "record temp file" + " Error: \n%s" % + output) + self.logger.debug("Successfully collected PGXC node " + "performance statistics.") + except Exception as e: + # close and remove temporary file + DefaultValue.cleanTmpFile(recordTempFile) + raise Exception(str(e)) + + def collectSessionCpuStatNew(self, pgxcNodeList): + ''' + function: collect session cpu performance statistics + input : pgxcNodeList + output: NA + ''' + sessionCpuTempFile = "" + try: + self.logger.debug("Collecting session cpu performance statistics.") + if (len(pgxcNodeList) != 0): + # load session cpu statistics parallel + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.loadSingleNodeSessionCpuStat, + pgxcNodeList) + pool.close() + pool.join() + for record in g_sessionCpuList: + self.logger.debug("g_sessionCpuList: %s\n" % record) + else: + return + + # create a temp file for records write + strCmd = "" + sessionCpuTempFile = os.path.join(DefaultValue.getTmpDirFromEnv(), + "sessionCpuTempFile_%d_%s" % \ + (self.flagNum, + DefaultValue.GetHostIpOrName())) + + # clean the temp file first + DefaultValue.cleanTmpFile(sessionCpuTempFile) + # write records into the temp file + for record in g_sessionCpuList: + strCmd += "%s\n" % record + + g_file.createFileInSafeMode(sessionCpuTempFile) + with open(sessionCpuTempFile, 'w') as fp: + fp.writelines(strCmd) + + if self.masterHost != DefaultValue.GetHostIpOrName(): + # scp session Cpu Temp File to tmpDir + scpCmd = "pscp -H %s '%s' '%s'/" % ( + self.masterHost, sessionCpuTempFile, + DefaultValue.getTmpDirFromEnv()) + + self.logger.debug("Execute command: %s" % scpCmd) + (status, output) = subprocess.getstatusoutput(scpCmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + "record temp file" + " Error: \n%s" % + output) + + # close and remove temporary file + DefaultValue.cleanTmpFile(sessionCpuTempFile) + + self.logger.debug("Successfully collected session " + "cpu performance statistics.") + except Exception as e: + # close and remove temporary file + DefaultValue.cleanTmpFile(sessionCpuTempFile) + raise Exception(str(e)) + + def collectSessionMemStatNew(self, pgxcNodeList): + ''' + function: collect session memory performance statistics + input : pgxcNodeList + output: NA + ''' + sessionMemTempFile = "" + try: + self.logger.debug("Collecting session memory " + "performance statistics.") + if (len(pgxcNodeList) != 0): + # load session memory statistics parallel + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.loadSingleNodeSessionMemoryStat, + pgxcNodeList) + pool.close() + pool.join() + for record in g_sessionMemList: + self.logger.debug("g_sessionMemList: %s\n" % record) + else: + return + + # create a temp file for records write + strCmd = "" + sessionMemTempFile = os.path.join( + DefaultValue.getTmpDirFromEnv(), + "sessionMemTempFile_%d_%s" % ( + self.flagNum, DefaultValue.GetHostIpOrName())) + + # clean the temp file first + DefaultValue.cleanTmpFile(sessionMemTempFile) + # write records into the temp file + for record in g_sessionMemList: + strCmd += "%s\n" % record + + g_file.createFileInSafeMode(sessionMemTempFile) + with open(sessionMemTempFile, 'w') as fp: + fp.writelines(strCmd) + + if self.masterHost != DefaultValue.GetHostIpOrName(): + # scp session Mem TempFile to tmpDir + scpCmd = "pscp -H %s '%s' '%s'/" % ( + self.masterHost, sessionMemTempFile, + DefaultValue.getTmpDirFromEnv()) + + self.logger.debug("Execute command: %s" % scpCmd) + (status, output) = subprocess.getstatusoutput(scpCmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + "record temp file" + " Error: \n%s" % + output) + + # close and remove temporary file + DefaultValue.cleanTmpFile(sessionMemTempFile) + + self.logger.debug("Successfully collected session memory " + "performance statistics.") + except Exception as e: + # close and remove temporary file + DefaultValue.cleanTmpFile(sessionMemTempFile) + raise Exception(str(e)) + + def collectSessionIOStatNew(self, pgxcNodeList): + ''' + function: collect session io performance statistics + input : pgxcNodeList + output: NA + ''' + sessionIOTempFile = "" + try: + self.logger.debug("Collecting session IO performance statistics.") + if (len(pgxcNodeList) != 0): + # load session IO statistics parallel + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.loadSingleNodeSessionIOStat, + pgxcNodeList) + pool.close() + pool.join() + for record in g_sessionIOList: + self.logger.debug("g_sessionIOList: %s\n" % record) + else: + return + + # create a temp file for records write + strCmd = "" + sessionIOTempFile = os.path.join(DefaultValue.getTmpDirFromEnv(), + "sessionIOTempFile_%d_%s" % + (self.flagNum, + DefaultValue.GetHostIpOrName())) + + # clean the temp file first + DefaultValue.cleanTmpFile(sessionIOTempFile) + # write records into the temp file + for record in g_sessionIOList: + strCmd += "%s\n" % record + + g_file.createFileInSafeMode(sessionIOTempFile) + with open(sessionIOTempFile, 'w') as fp: + fp.writelines(strCmd) + + if self.masterHost != DefaultValue.GetHostIpOrName(): + # scp session IO Temp File to tmpDir + scpCmd = "pscp -H %s '%s' '%s'/" % ( + self.masterHost, sessionIOTempFile, + DefaultValue.getTmpDirFromEnv()) + + self.logger.debug("Execute command: %s" % scpCmd) + (status, output) = subprocess.getstatusoutput(scpCmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + "record temp file" + " Error: \n%s" % + output) + + # close and remove temporary file + DefaultValue.cleanTmpFile(sessionIOTempFile) + + self.logger.debug("Successfully collected session IO " + "performance statistics.") + except Exception as e: + # close and remove temporary file + DefaultValue.cleanTmpFile(sessionIOTempFile) + raise Exception(str(e)) + + def cleanTempFiles(self): + """ + """ + # clean all the temp files before start collect the performance data + recordTempFilePattern = os.path.join(DefaultValue.getTmpDirFromEnv(), + 'recordTempFile_*_*') + recordTempFileList = glob.iglob(r'%s' % recordTempFilePattern) + for tempFile in recordTempFileList: + os.remove(tempFile) + + sessionCpuTempFilePattern = os.path.join( + DefaultValue.getTmpDirFromEnv(), 'sessionCpuTempFile_*_*') + sessionCpuTempFileList = glob.iglob(r'%s' % sessionCpuTempFilePattern) + for tempFile in sessionCpuTempFileList: + os.remove(tempFile) + + sessionMemTempFilePattern = os.path.join( + DefaultValue.getTmpDirFromEnv(), 'sessionMemTempFile_*_*') + sessionMemTempFileList = glob.iglob(r'%s' % sessionMemTempFilePattern) + for tempFile in sessionMemTempFileList: + os.remove(tempFile) + + sessionIOTempFilePattern = os.path.join( + DefaultValue.getTmpDirFromEnv(), 'sessionIOTempFile_*_*') + sessionIOTempFileList = glob.iglob(r'%s' % sessionIOTempFilePattern) + for tempFile in sessionIOTempFileList: + os.remove(tempFile) + + sessionCpuTempResultPattern = os.path.join( + DefaultValue.getTmpDirFromEnv(), 'sessionCpuTempResult_*_*') + sessionCpuTempResultList = glob.iglob(r'%s' % + sessionCpuTempResultPattern) + for tempFile in sessionCpuTempResultList: + os.remove(tempFile) + + sessionMemTempResultPattern = os.path.join( + DefaultValue.getTmpDirFromEnv(), 'sessionMemTempResult_*_*') + sessionMemTempResultList = glob.iglob(r'%s' % + sessionMemTempResultPattern) + for tempFile in sessionMemTempResultList: + os.remove(tempFile) + + sessionIOTempResultPattern = os.path.join( + DefaultValue.getTmpDirFromEnv(), 'sessionIOTempResult_*_*') + sessionIOTempResultList = glob.iglob(r'%s' % + sessionIOTempResultPattern) + for tempFile in sessionIOTempResultList: + os.remove(tempFile) + + def collect(self): + ''' + function: collect performance statistics + input : NA + output: NA + ''' + try: + self.logger.debug("Collecting performance statistics.") + self.cleanTempFiles() + + # get pgxc node of the cluster + pgxcNodeList = self.getPGXCNode() + + self.logger.debug("pgxcNodeList: %s" % pgxcNodeList) + + # collect PGXC node performance statistics + self.collectPGXCNodeStat(pgxcNodeList) + + if (not dwsFlag): + # collect session cpu performance statistics + self.collectSessionCpuStatNew(pgxcNodeList) + # collect session memory performance statistics + self.collectSessionMemStatNew(pgxcNodeList) + # collect session io performance statistics + self.collectSessionIOStatNew(pgxcNodeList) + + self.logger.debug("Successfully collected performance statistics.") + + except Exception as e: + raise Exception(str(e)) + + def display(self): + ''' + function: display performance statistics + input : NA + output: NA + ''' + try: + self.logger.debug("Displaying performance statistics.") + + # clean all the temp files before display the performance data + queryTempFilePattern = os.path.join( + DefaultValue.getTmpDirFromEnv(), + 'checkperf_query_*_*') + queryTempFileList = glob.iglob(r'%s' % queryTempFilePattern) + for tempFile in queryTempFileList: + os.remove(tempFile) + + actionList = ["ClusterHostCpuStat", + "ClusterMPPDBCpuStat", + "ShareBufferStat", + "ClusterSortStat", + "ClusterIOStat", + "ClusterDiskStat", + "ClusterActiveSqlCount", + "ClusterSessionCount", + "NodeCpuStat", + "NodeMemoryStat", + "NodeIOStat"] + + sessionList = ["SessionCpuStat", + "SessionMemoryStat", + "SessionIOStat"] + + if (not dwsFlag): + actionList.extend(sessionList) + # Concurrent execute collectStat function + pool = ThreadPool(DEFAULT_PARALLEL_NUM) + results = pool.map(self.collectStat, actionList) + pool.close() + pool.join() + self.outPut() + self.logger.debug("Successfully displayed performance statistics.") + except Exception as e: + raise Exception(str(e)) + + def asynCollectDatabaseSize(self, nodePort): + """ + function: asyn collect database size + input : NA + output: NA + """ + self.logger.debug("Asyn collecting database size on current node.") + + try: + querySql = "SELECT 'total_database_size:' || " \ + "SUM(pg_database_size(oid))::bigint FROM pg_database;" + if (g_DWS_mode): + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(nodePort, querySql) + self.logger.debug("Asyn collecting database size. " + "\ncommand: %s \nresult: %s." % + (querySql, result)) + + if (status != 2 or err_output.strip() != ""): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % err_output) + + binPath = os.path.join(g_clusterInfo.appPath, "bin") + databaseSizeFile = os.path.join(binPath, + DefaultValue.DB_SIZE_FILE) + output = result[0][0] + + g_file.createFileInSafeMode(databaseSizeFile) + with open(databaseSizeFile, 'w') as f: + f.writelines(output) + if (f): + f.close() + tmp_sshTool = SshTool(g_clusterInfo.getClusterNodeNames(), + self.logger.logFile) + tmp_sshTool.scpFiles(databaseSizeFile, binPath) + else: + (status, output) = ClusterCommand.execSQLCommand(querySql, + self.user, + '', nodePort, + "postgres") + self.logger.debug("Asyn collecting database size. \ncommand: " + "%s \nresult: %s." % (querySql, output)) + + if (status != 0 or output.strip() == ""): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + querySql + " Error:\n%s" % output) + + binPath = os.path.join(g_clusterInfo.appPath, "bin") + databaseSizeFile = os.path.join(binPath, + DefaultValue.DB_SIZE_FILE) + g_file.createFileInSafeMode(databaseSizeFile) + with open(databaseSizeFile, 'w') as f: + f.writelines(output) + if (f): + f.close() + tmp_sshTool = SshTool(g_clusterInfo.getClusterNodeNames(), + self.logger.logFile) + tmp_sshTool.scpFiles(databaseSizeFile, binPath) + except Exception as e: + raise Exception(str(e)) + + self.logger.debug("Successfully asyn collected database size" + " on current node.") + + def outPut(self): + ''' + function: output statistics + input : NA + output: NA + ''' + try: + # judge if enter parameter '--detail' + if self.showDetail: + # detail display result + self.detailDisplay() + else: + # summary display result + self.summaryDisplay() + except Exception as e: + raise Exception(str(e)) + + def collectClusterHostCpuStat(self): + ''' + function: collect cluster host CPU statistics + input : NA + output: NA + ''' + sql = "SELECT o_avg_cpu_total_time, o_avg_cpu_busy_time, " \ + "o_avg_cpu_iowait_time, o_cpu_busy_perc, o_cpu_io_wait_perc " + sql += "FROM pmk.get_cluster_host_cpu_stat(null, null);" + + self.logger.debug("Collecting cluster host CPU statistics.") + if (g_DWS_mode): + try: + # execute the sql command + + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(result) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(result)) + items = result[0] + if (items is not None): + self.cluster_stat.cluster_host_total_cpu_time = \ + statItem(items[0], "Jiffies") + self.cluster_stat.cluster_host_cpu_busy_time = \ + statItem(items[1], "Jiffies") + self.cluster_stat.cluster_host_cpu_iowait_time = \ + statItem(items[2], "Jiffies") + self.cluster_stat.cluster_host_cpu_busy_time_perc = \ + statItem(items[3], "%") + self.cluster_stat.cluster_host_cpu_iowait_time_perc = \ + statItem(items[4], "%") + else: + sql = "SELECT o_stat_collect_time as %s, " % (self.__baselineFlag) + sql += "o_avg_cpu_total_time, o_avg_cpu_busy_time, " \ + "o_avg_cpu_iowait_time, o_cpu_busy_perc, " \ + "o_cpu_io_wait_perc " + sql += "FROM pmk.get_cluster_host_cpu_stat(null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 6, 1) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(records) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(records)) + items = records[0] + if (items is not None): + self.cluster_stat.cluster_host_total_cpu_time = \ + statItem(items[1], "Jiffies") + self.cluster_stat.cluster_host_cpu_busy_time = \ + statItem(items[2], "Jiffies") + self.cluster_stat.cluster_host_cpu_iowait_time = \ + statItem(items[3], "Jiffies") + self.cluster_stat.cluster_host_cpu_busy_time_perc = \ + statItem(items[4], "%") + self.cluster_stat.cluster_host_cpu_iowait_time_perc = \ + statItem(items[5], "%") + + self.logger.debug("Successfully collected cluster host CPU state.") + + def collectClusterMPPDBCpuStat(self): + ''' + function: collect MPPDB CPU statistics + input : NA + output: NA + ''' + sql = "SELECT o_mppdb_cpu_time_perc_wrt_busy_time, " \ + "o_mppdb_cpu_time_perc_wrt_total_time FROM " + sql += "pmk.get_cluster_mppdb_cpu_stat(null, null);" + + self.logger.debug("Collecting MPPDB CPU statistics.") + if (g_DWS_mode): + try: + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(result) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(result)) + items = result[0] + if (items is not None): + self.cluster_stat.cluster_mppdb_cpu_time_in_busy_time = \ + statItem(items[0], "%") + self.cluster_stat.cluster_mppdb_cpu_time_in_total_time = \ + statItem(items[1], "%") + else: + sql = "SELECT o_stat_collect_time as %s, " \ + "o_mppdb_cpu_time_perc_wrt_busy_time, " \ + "o_mppdb_cpu_time_perc_wrt_total_time FROM " % \ + (self.__baselineFlag) + sql += "pmk.get_cluster_mppdb_cpu_stat(null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 3, 2) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(records) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(records)) + items = records[0] + if (items is not None): + self.cluster_stat.cluster_mppdb_cpu_time_in_busy_time = \ + statItem(items[1], "%") + self.cluster_stat.cluster_mppdb_cpu_time_in_total_time = \ + statItem(items[2], "%") + self.logger.debug( + "Successfully collected cluster MPPDB CPU statistics.") + + def collectShareBufferStat(self): + ''' + function: collect share buffer statistics + input : NA + output: NA + ''' + sql = "SELECT o_total_blocks_read, o_total_blocks_hit, " \ + "o_shared_buffer_hit_ratio " + sql += "FROM pmk.get_cluster_shared_buffer_stat(null, null);" + + self.logger.debug("Collecting shared buffer statistics.") + if (g_DWS_mode): + try: + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(ErrorCode.GAUSS_536["GAUSS_53611"] % str(e)) + # failed to execute the sql command + if (len(result) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(result)) + items = result[0] + if (items is not None): + self.cluster_stat.cluster_share_buffer_read = \ + statItem(items[0]) + self.cluster_stat.cluster_share_buffer_hit = statItem(items[1]) + self.cluster_stat.cluster_share_buffer_hit_ratio = \ + statItem(items[2], "%") + else: + sql = "SELECT o_stat_collect_time as %s, o_total_blocks_read, " \ + "o_total_blocks_hit, o_shared_buffer_hit_ratio " % \ + (self.__baselineFlag) + sql += "FROM pmk.get_cluster_shared_buffer_stat(null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 4, 3) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(records) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(records)) + items = records[0] + if (items is not None): + self.cluster_stat.cluster_share_buffer_read = \ + statItem(items[1]) + self.cluster_stat.cluster_share_buffer_hit = statItem(items[2]) + self.cluster_stat.cluster_share_buffer_hit_ratio = \ + statItem(items[3], "%") + self.logger.debug("Successfully collected shared buffer statistics.") + + def collectClusterSortStat(self): + ''' + function: collect sort statistics + input : NA + output: NA + ''' + sql = "SELECT o_total_memory_sorts, o_total_disk_sorts, " \ + "o_memory_sort_ratio " + sql += "FROM pmk.get_cluster_memory_sort_stat(null, null);" + + self.logger.debug("Collecting sort statistics.") + if (g_DWS_mode): + try: + # execute the sql command + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(result) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(result)) + items = result[0] + if (items is not None): + self.cluster_stat.cluster_in_memory_sort_count = \ + statItem(items[0]) + self.cluster_stat.cluster_disk_sort_count = statItem(items[1]) + self.cluster_stat.cluster_in_memory_sort_ratio = \ + statItem(items[2], "%") + else: + sql = "SELECT o_stat_collect_time as %s, o_total_memory_sorts, " \ + "o_total_disk_sorts, o_memory_sort_ratio " % \ + (self.__baselineFlag) + sql += "FROM pmk.get_cluster_memory_sort_stat(null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 4, 4) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(records) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(records)) + items = records[0] + if (items is not None): + self.cluster_stat.cluster_in_memory_sort_count = \ + statItem(items[1]) + self.cluster_stat.cluster_disk_sort_count = statItem(items[2]) + self.cluster_stat.cluster_in_memory_sort_ratio = \ + statItem(items[3], "%") + self.logger.debug("Successfully collected cluster sort statistics.") + + def collectClusterIOStat(self): + ''' + function: collect IO statistics + input : NA + output: NA + ''' + sql = "SELECT o_number_of_files, o_physical_reads, " \ + "o_physical_writes, " \ + "o_read_time, o_write_time " + sql += "FROM pmk.get_cluster_io_stat(null, null);" + + self.logger.debug("Collecting IO statistics.") + if (g_DWS_mode): + try: + # execute the sql command + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(result) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(result)) + items = result[0] + if (items is not None): + self.cluster_stat.cluster_io_stat_number_of_files = \ + statItem(items[0]) + self.cluster_stat.cluster_io_stat_physical_reads = \ + statItem(items[1]) + self.cluster_stat.cluster_io_stat_physical_writes = \ + statItem(items[2]) + self.cluster_stat.cluster_io_stat_read_time = \ + statItem(items[3], "ms") + self.cluster_stat.cluster_io_stat_write_time = \ + statItem(items[4], "ms") + else: + sql = "SELECT o_stat_collect_time as %s, o_number_of_files, " \ + "o_physical_reads, o_physical_writes, o_read_time, " \ + "o_write_time " % (self.__baselineFlag) + sql += "FROM pmk.get_cluster_io_stat(null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 6, 5) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(records) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(records)) + items = records[0] + if (items is not None): + self.cluster_stat.cluster_io_stat_number_of_files = \ + statItem(items[1]) + self.cluster_stat.cluster_io_stat_physical_reads = \ + statItem(items[2]) + self.cluster_stat.cluster_io_stat_physical_writes = \ + statItem(items[3]) + self.cluster_stat.cluster_io_stat_read_time = \ + statItem(items[4], "ms") + self.cluster_stat.cluster_io_stat_write_time = \ + statItem(items[5], "ms") + self.logger.debug("Successfully collected cluster IO statistics.") + + def collectClusterDiskStat(self): + ''' + function: collect disk usage statistics + input : NA + output: NA + ''' + sql = "SELECT o_tot_datanode_db_size, o_tot_physical_writes, " \ + "o_avg_write_per_sec, o_max_node_physical_writes " + sql += "FROM pmk.get_cluster_disk_usage_stat(null, null, '%s');" % \ + str(database_size) + + self.logger.debug("Collecting disk usage statistics.") + if (g_DWS_mode): + try: + # execute the sql command + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(result) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(result)) + items = result[0] + if (items is not None): + self.cluster_stat.cluster_disk_usage_db_size = \ + statItem(items[0].split()[0], items[0].split()[1]) + self.cluster_stat.cluster_disk_usage_tot_physical_writes = \ + statItem(items[1]) + self.cluster_stat.cluster_disk_usage_avg_physical_write = \ + statItem(items[2]) + self.cluster_stat.cluster_disk_usage_max_physical_write = \ + statItem(items[3]) + else: + sql = "SELECT o_stat_collect_time as %s, o_tot_datanode_db_size," \ + " o_tot_physical_writes, o_avg_write_per_sec, " \ + "o_max_node_physical_writes " % (self.__baselineFlag) + sql += "FROM pmk.get_cluster_disk_usage_stat(null, null, '%s');" \ + % str(database_size) + try: + # execute the sql command + records = self.execQueryCommand(sql, 5, 6) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(records) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(records)) + items = records[0] + if (items is not None): + self.cluster_stat.cluster_disk_usage_db_size = \ + statItem(items[1].split()[0], items[1].split()[1]) + self.cluster_stat.cluster_disk_usage_tot_physical_writes = \ + statItem(items[2]) + self.cluster_stat.cluster_disk_usage_avg_physical_write = \ + statItem(items[3]) + self.cluster_stat.cluster_disk_usage_max_physical_write = \ + statItem(items[4]) + self.logger.debug("Successfully collected cluster disk statistics.") + + def collectClusterActiveSqlCount(self): + ''' + function: collect active SQL statistics + input : NA + output: NA + ''' + sql = "SELECT o_tot_active_sql_count " + sql += "FROM pmk.get_cluster_active_sql_count(null, null);" + + self.logger.debug("Collecting active SQL statistics.") + if (g_DWS_mode): + try: + # execute the sql command + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(result) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(result)) + items = result[0] + if (items is not None): + self.cluster_stat.cluster_activity_active_sql_count = \ + statItem(items[0]) + else: + sql = "SELECT o_stat_collect_time as %s, o_tot_active_sql_count " \ + "" % \ + (self.__baselineFlag) + sql += "FROM pmk.get_cluster_active_sql_count(null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 2, 7) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(records) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(records)) + items = records[0] + if (items is not None): + self.cluster_stat.cluster_activity_active_sql_count = \ + statItem(items[1]) + self.logger.debug("Successfully collected cluster active SQL count.") + + def collectClusterSessionCount(self): + ''' + function: collect session count statistics + input : NA + output: NA + ''' + sql = "SELECT o_tot_session_count " + sql += "FROM pmk.get_cluster_session_count(null, null);" + + self.logger.debug("Collecting session count statistics.") + if (g_DWS_mode): + try: + # execute the sql command + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(result) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(result)) + items = result[0] + if (items is not None): + self.cluster_stat.cluster_activity_session_count = \ + statItem(items[0]) + else: + sql = "SELECT o_stat_collect_time as %s, o_tot_session_count " % \ + (self.__baselineFlag) + sql += "FROM pmk.get_cluster_session_count(null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 2, 8) + except Exception as e: + raise Exception(str(e)) + # failed to execute the sql command + if (len(records) != 1): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error number: %d." % len(records)) + items = records[0] + if (items is not None): + self.cluster_stat.cluster_activity_session_count = \ + statItem(items[1]) + self.logger.debug("Successfully collected cluster session count.") + + def collectNodeCpuStat(self): + ''' + function: collect node CPU statistics + input : NA + output: NA + ''' + sql = "SELECT o_node_name, " + sql += "o_mppdb_cpu_time, o_host_cpu_busy_time, " \ + "o_host_cpu_total_time, " \ + "o_mppdb_cpu_time_perc_wrt_busy_time, " + sql += "o_mppdb_cpu_time_perc_wrt_total_time FROM " \ + "pmk.get_node_cpu_stat('all', null, null);" + + self.logger.debug("Collecting node CPU statistics.") + if (g_DWS_mode): + try: + # execute the sql command + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + recordsCount = len(result) + # failed to execute the sql command + if (recordsCount == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: No records returned.") + + for i in range(0, recordsCount): + record = result[i] + + found = False + for node in self.node_stat: + if (node.nodename == record[0].strip()): + found = True + break + + if (not found): + node = nodeStatistics(record[0].strip()) + self.node_stat.append(node) + + node.node_mppdb_cpu_busy_time = statItem(record[1], "Jiffies") + node.node_host_cpu_busy_time = statItem(record[2], "Jiffies") + node.node_host_cpu_total_time = statItem(record[3], "Jiffies") + node.node_mppdb_cpu_time_in_busy_time = statItem(record[4], + "%") + node.node_mppdb_cpu_time_in_total_time = statItem(record[5], + "%") + else: + sql = "SELECT o_stat_collect_time as %s, o_node_name, " % \ + (self.__baselineFlag) + sql += "o_mppdb_cpu_time, o_host_cpu_busy_time, " \ + "o_host_cpu_total_time, " \ + "o_mppdb_cpu_time_perc_wrt_busy_time, " + sql += "o_mppdb_cpu_time_perc_wrt_total_time FROM " \ + "pmk.get_node_cpu_stat('all', null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 7, 9) + except Exception as e: + raise Exception(str(e)) + + recordsCount = len(records) + # failed to execute the sql command + if (recordsCount == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: No records returned.") + + for i in range(0, recordsCount): + record = records[i] + + found = False + for node in self.node_stat: + if (node.nodename == record[1].strip()): + found = True + break + + if (not found): + node = nodeStatistics(record[1].strip()) + self.node_stat.append(node) + + node.node_mppdb_cpu_busy_time = statItem(record[2], "Jiffies") + node.node_host_cpu_busy_time = statItem(record[3], "Jiffies") + node.node_host_cpu_total_time = statItem(record[4], "Jiffies") + node.node_mppdb_cpu_time_in_busy_time = \ + statItem(record[5], "%") + node.node_mppdb_cpu_time_in_total_time = \ + statItem(record[6], "%") + self.logger.debug("Successfully collected node CPU statistics.") + + def collectNodeMemoryStat(self): + ''' + function: collect node memory statistics + input : NA + output: NA + ''' + sql = "SELECT o_node_name, " + sql += "o_physical_memory, o_shared_buffer_size, " \ + "o_shared_buffer_hit_ratio, o_sorts_in_memory, " + sql += "o_sorts_in_disk, o_in_memory_sort_ratio, o_db_memory_usage " \ + "FROM pmk.get_node_memory_stat('all', null, null);" + + self.logger.debug("Collecting node memory statistics.") + if (g_DWS_mode): + try: + # execute the sql command + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + recordsCount = len(result) + # failed to execute the sql command + if (recordsCount == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: No records returned.") + + for i in range(0, recordsCount): + record = result[i] + + found = False + for node in self.node_stat: + if (node.nodename == record[0].strip()): + found = True + break + + if (not found): + node = nodeStatistics(record[0].strip()) + self.node_stat.append(node) + + node.node_physical_memory = statItem(record[1], "Bytes") + node.node_db_memory_usage = statItem(record[7], "Bytes") + node.node_shared_buffer_size = statItem(record[2], "Bytes") + node.node_shared_buffer_hit_ratio = statItem(record[3], "%") + node.node_in_memory_sorts = statItem(record[4], ) + node.node_in_disk_sorts = statItem(record[5], ) + node.node_in_memory_sort_ratio = statItem(record[6], "%") + else: + sql = "SELECT o_stat_collect_time as %s, o_node_name, " % \ + (self.__baselineFlag) + sql += "o_physical_memory, o_shared_buffer_size, " \ + "o_shared_buffer_hit_ratio, o_sorts_in_memory, " + sql += "o_sorts_in_disk, o_in_memory_sort_ratio, " \ + "o_db_memory_usage " \ + "FROM pmk.get_node_memory_stat('all', null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 9, 10) + except Exception as e: + raise Exception(str(e)) + + recordsCount = len(records) + # failed to execute the sql command + if (recordsCount == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: No records returned.") + + for i in range(0, recordsCount): + record = records[i] + + found = False + for node in self.node_stat: + if (node.nodename == record[1].strip()): + found = True + break + + if (not found): + node = nodeStatistics(record[1].strip()) + self.node_stat.append(node) + + node.node_physical_memory = statItem(record[2], "Bytes") + node.node_db_memory_usage = statItem(record[8], "Bytes") + node.node_shared_buffer_size = statItem(record[3], "Bytes") + node.node_shared_buffer_hit_ratio = statItem(record[4], "%") + node.node_in_memory_sorts = statItem(record[5], ) + node.node_in_disk_sorts = statItem(record[6], ) + node.node_in_memory_sort_ratio = statItem(record[7], "%") + self.logger.debug("Successfully collected node memory statistics.") + + def collectNodeIOStat(self): + ''' + function: collect node IO statistics + input : NA + output: NA + ''' + sql = "SELECT o_node_name, " + sql += "o_number_of_files, o_physical_reads, o_physical_writes, " \ + "o_read_time, " + sql += "o_write_time FROM pmk.get_node_io_stat('all', null, null);" + + self.logger.debug("Collecting node IO statistics.") + if (g_DWS_mode): + try: + # execute the sql command + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(self.localport, sql) + except Exception as e: + raise Exception(str(e)) + + recordsCount = len(result) + # failed to execute the sql command + if (recordsCount == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: No records returned.") + + for i in range(0, recordsCount): + record = result[i] + + found = False + for node in self.node_stat: + if (node.nodename == record[0].strip()): + found = True + break + + if (not found): + node = nodeStatistics(record[0].strip()) + self.node_stat.append(node) + + node.node_number_of_files = statItem(record[1]) + node.node_physical_reads = statItem(record[2]) + node.node_physical_writes = statItem(record[3]) + node.node_read_time = statItem(record[4]) + node.node_write_time = statItem(record[5]) + else: + sql = "SELECT o_stat_collect_time as %s, o_node_name, " % \ + (self.__baselineFlag) + sql += "o_number_of_files, o_physical_reads, o_physical_writes, " \ + "o_read_time, " + sql += "o_write_time FROM pmk.get_node_io_stat('all', null, null);" + try: + # execute the sql command + records = self.execQueryCommand(sql, 7, 11) + except Exception as e: + raise Exception(str(e)) + + recordsCount = len(records) + # failed to execute the sql command + if (recordsCount == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: No records returned.") + + for i in range(0, recordsCount): + record = records[i] + + found = False + for node in self.node_stat: + if (node.nodename == record[1].strip()): + found = True + break + + if (not found): + node = nodeStatistics(record[1].strip()) + self.node_stat.append(node) + + node.node_number_of_files = statItem(record[2]) + node.node_physical_reads = statItem(record[3]) + node.node_physical_writes = statItem(record[4]) + node.node_read_time = statItem(record[5]) + node.node_write_time = statItem(record[6]) + self.logger.debug("Successfully collected node IO statistics.") + + def collectSessionCpuStat(self): + ''' + function: collect session CPU statistics + input : NA + output: NA + ''' + self.logger.debug("Collecting session CPU statistics.") + sessionCpuTempResult = "" + try: + # get session Cpu Temp Result + sessionCpuTempResult = os.path.join( + DefaultValue.getTmpDirFromEnv(), + "sessionCpuTempResult_%d_%s" % \ + (self.flagNum, self.masterHost)) + # read session Cpu Temp Result + with open(sessionCpuTempResult, 'r') as fp: + # parse session Cpu Temp Result + for line in fp.readlines(): + line = line.strip() + if line != "": + tempList = line.split('|') + sess = sessionStatistics((tempList[0]).strip(), + (tempList[1]).strip(), + (tempList[2]).strip()) + sess.session_cpu_time = statItem((tempList[3]).strip()) + sess.session_db_cpu_time = \ + statItem((tempList[4]).strip()) + sess.session_cpu_percent = \ + statItem((tempList[5]).strip(), "%") + self.session_cpu_stat.append(sess) + # close and remove session Cpu Temp Result + DefaultValue.cleanTmpFile(sessionCpuTempResult) + except Exception as e: + # close and remove session Cpu Temp Result + DefaultValue.cleanTmpFile(sessionCpuTempResult) + raise Exception(str(e)) + self.logger.debug("Successfully collected session CPU statistics.") + + def collectSessionMemoryStat(self): + ''' + function: collect session memory statistics + input : NA + output: NA + ''' + self.logger.debug("Collecting session memory statistics.") + sessionMemTempResult = "" + try: + # get session Memory Temp Result + sessionMemTempResult = os.path.join( + DefaultValue.getTmpDirFromEnv(), + "sessionMemTempResult_%d_%s" % \ + (self.flagNum, self.masterHost)) + # read session Memory Temp Result + with open(sessionMemTempResult, 'r') as fp: + # parse session Memory Temp Result + for line in fp.readlines(): + line = line.strip() + if line != "": + tempList = line.split('|') + sess = sessionStatistics((tempList[0]).strip(), + (tempList[1]).strip(), + (tempList[2]).strip()) + sess.session_buffer_reads = \ + statItem((tempList[5]).strip()) + sess.session_buffer_hit_ratio = \ + statItem((tempList[6]).strip()) + sess.session_in_memory_sorts = \ + statItem((tempList[7]).strip()) + sess.session_in_disk_sorts = statItem( + (tempList[8]).strip()) + sess.session_in_memory_sorts_ratio = \ + statItem((tempList[9]).strip()) + sess.session_total_memory_size = \ + statItem((tempList[3]).strip()) + sess.session_used_memory_size = \ + statItem((tempList[4]).strip()) + self.session_mem_stat.append(sess) + # close and remove session Memory Temp Result + DefaultValue.cleanTmpFile(sessionMemTempResult) + except Exception as e: + # close and remove session Memory Temp Result + DefaultValue.cleanTmpFile(sessionMemTempResult) + raise Exception(str(e)) + self.logger.debug("Successfully collected session memory statistics.") + + def collectSessionIOStat(self): + ''' + function: collect session IO statistics + input : NA + output: NA + ''' + self.logger.debug("Collecting session IO statistics.") + sessionIOTempResult = "" + try: + # get session IO Temp Result + sessionIOTempResult = os.path.join(DefaultValue.getTmpDirFromEnv(), + "sessionIOTempResult_%d_%s" % \ + (self.flagNum, self.masterHost)) + # read session IO Temp Result + with open(sessionIOTempResult, 'r') as fp: + # parse session IO Temp Result + for line in fp.readlines(): + line = line.strip() + if line != "": + tempList = line.split('|') + sess = sessionStatistics((tempList[0]).strip(), + (tempList[1]).strip(), + (tempList[2]).strip()) + sess.session_physical_reads = \ + statItem((tempList[3]).strip()) + sess.session_read_time = \ + statItem((tempList[4]).strip()) + self.session_io_stat.append(sess) + # close and remove session IO Temp Result + DefaultValue.cleanTmpFile(sessionIOTempResult) + except Exception as e: + # close and remove session IO Temp Result + DefaultValue.cleanTmpFile(sessionIOTempResult) + raise Exception(str(e)) + self.logger.debug("Successfully collected session IO statistics.") + + def displayOneStatItem(self, desc, disvalue): + ''' + function: display one statistic item + input : desc, disvalue + output: NA + ''' + # judge if disvalue is none + if (str(disvalue) != ""): + self.writeOutput(" %-45s: %s" % (desc, str(disvalue))) + else: + self.writeOutput(" %-45s: 0" % (desc)) + + def summaryDisplay(self): + ''' + function: summary display + input : NA + output: NA + ''' + # show cluster statistics summary information + self.writeOutput("Cluster statistics information:") + # show host CPU busy time ratio + self.displayOneStatItem( + "Host CPU busy time ratio", + self.cluster_stat.cluster_host_cpu_busy_time_perc) + # show MPPDB CPU time + self.displayOneStatItem( + "MPPDB CPU time % in busy time", + self.cluster_stat.cluster_mppdb_cpu_time_in_busy_time) + # show shared buffer hit ratio + self.displayOneStatItem( + "Shared Buffer Hit ratio", + self.cluster_stat.cluster_share_buffer_hit_ratio) + # show In-memory sort ratio + self.displayOneStatItem( + "In-memory sort ratio", + self.cluster_stat.cluster_in_memory_sort_ratio) + # show physical reads + self.displayOneStatItem( + "Physical Reads", + self.cluster_stat.cluster_io_stat_physical_reads) + # show physical writes + self.displayOneStatItem( + "Physical Writes", + self.cluster_stat.cluster_io_stat_physical_writes) + # show DB size + self.displayOneStatItem( + "DB size", + self.cluster_stat.cluster_disk_usage_db_size) + # show Total Physical writes + self.displayOneStatItem( + "Total Physical writes", + self.cluster_stat.cluster_disk_usage_tot_physical_writes) + # show Active SQL count + self.displayOneStatItem( + "Active SQL count", + self.cluster_stat.cluster_activity_active_sql_count) + # show Session count + self.displayOneStatItem( + "Session count", + self.cluster_stat.cluster_activity_session_count) + + def detailDisplay(self): + ''' + function: detail display + input : NA + output: NA + ''' + # show host CPU ratio in detail + self.writeOutput("Cluster statistics information:") + self.writeOutput("Host CPU usage rate:") + self.displayOneStatItem( + "Host total CPU time", + self.cluster_stat.cluster_host_total_cpu_time) + self.displayOneStatItem( + "Host CPU busy time", + self.cluster_stat.cluster_host_cpu_busy_time) + self.displayOneStatItem( + "Host CPU iowait time", + self.cluster_stat.cluster_host_cpu_iowait_time) + self.displayOneStatItem( + "Host CPU busy time ratio", + self.cluster_stat.cluster_host_cpu_busy_time_perc) + self.displayOneStatItem( + "Host CPU iowait time ratio", + self.cluster_stat.cluster_host_cpu_iowait_time_perc) + + # show MPPDB CPU time in detail + self.writeOutput("MPPDB CPU usage rate:") + self.displayOneStatItem( + "MPPDB CPU time % in busy time", + self.cluster_stat.cluster_mppdb_cpu_time_in_busy_time) + self.displayOneStatItem( + "MPPDB CPU time % in total time", + self.cluster_stat.cluster_mppdb_cpu_time_in_total_time) + + # show Shared Buffer Hit ratio in detail + self.writeOutput("Shared buffer hit rate:") + self.displayOneStatItem("Shared Buffer Reads", + self.cluster_stat.cluster_share_buffer_read) + self.displayOneStatItem("Shared Buffer Hits", + self.cluster_stat.cluster_share_buffer_hit) + self.displayOneStatItem( + "Shared Buffer Hit ratio", + self.cluster_stat.cluster_share_buffer_hit_ratio) + + # show In-memory sort ratio in detail + self.writeOutput("In memory sort rate:") + self.displayOneStatItem("In-memory sort count", + self.cluster_stat.cluster_in_memory_sort_count) + self.displayOneStatItem("In-disk sort count", + self.cluster_stat.cluster_disk_sort_count) + self.displayOneStatItem("In-memory sort ratio", + self.cluster_stat.cluster_in_memory_sort_ratio) + + # show I/O usage in detail + self.writeOutput("I/O usage:") + self.displayOneStatItem( + "Number of files", + self.cluster_stat.cluster_io_stat_number_of_files) + self.displayOneStatItem( + "Physical Reads", + self.cluster_stat.cluster_io_stat_physical_reads) + self.displayOneStatItem( + "Physical Writes", + self.cluster_stat.cluster_io_stat_physical_writes) + self.displayOneStatItem("Read Time", + self.cluster_stat.cluster_io_stat_read_time) + self.displayOneStatItem("Write Time", + self.cluster_stat.cluster_io_stat_write_time) + + # show Disk usage in detail + self.writeOutput("Disk usage:") + self.displayOneStatItem("DB size", + self.cluster_stat.cluster_disk_usage_db_size) + self.displayOneStatItem( + "Total Physical writes", + self.cluster_stat.cluster_disk_usage_tot_physical_writes) + self.displayOneStatItem( + "Average Physical write", + self.cluster_stat.cluster_disk_usage_avg_physical_write) + self.displayOneStatItem( + "Maximum Physical write", + self.cluster_stat.cluster_disk_usage_max_physical_write) + + # show SQL count and session count in detail + self.writeOutput("Activity statistics:") + self.displayOneStatItem( + "Active SQL count", + self.cluster_stat.cluster_activity_active_sql_count) + self.displayOneStatItem( + "Session count", + self.cluster_stat.cluster_activity_session_count) + + # show node statistics information + self.writeOutput("Node statistics information:") + for node in self.node_stat: + # show node name + self.writeOutput("%s:" % node.nodename) + # show cpu usage in host + self.displayOneStatItem("MPPDB CPU Time", + node.node_mppdb_cpu_busy_time) + self.displayOneStatItem("Host CPU Busy Time", + node.node_host_cpu_busy_time) + self.displayOneStatItem("Host CPU Total Time", + node.node_host_cpu_total_time) + self.displayOneStatItem("MPPDB CPU Time % in Busy Time", + node.node_mppdb_cpu_time_in_busy_time) + self.displayOneStatItem("MPPDB CPU Time % in Total Time", + node.node_mppdb_cpu_time_in_total_time) + + # show memory usage in host + self.displayOneStatItem("Physical memory", + node.node_physical_memory) + self.displayOneStatItem("DB Memory usage", + node.node_db_memory_usage) + self.displayOneStatItem("Shared buffer size", + node.node_shared_buffer_size) + self.displayOneStatItem("Shared buffer hit ratio", + node.node_shared_buffer_hit_ratio) + self.displayOneStatItem("Sorts in memory", + node.node_in_memory_sorts) + self.displayOneStatItem("Sorts in disk", + node.node_in_disk_sorts) + self.displayOneStatItem("In-memory sort ratio", + node.node_in_memory_sort_ratio) + + # show IO usage in host + self.displayOneStatItem("Number of files", + node.node_number_of_files) + self.displayOneStatItem("Physical Reads", + node.node_physical_reads) + self.displayOneStatItem("Physical Writes", + node.node_physical_writes) + self.displayOneStatItem("Read Time", + node.node_read_time) + self.displayOneStatItem("Write Time", + node.node_write_time) + + # show session statistics information + self.writeOutput("Session statistics information(Top %d):" % + self.__TopNSessions) + # show session cpu usage statistics + self.writeOutput("Session CPU statistics:") + for i in range(0, len(self.session_cpu_stat)): + sess = self.session_cpu_stat[i] + self.writeOutput("%d %s-%s-%s:" % + ( + i + 1, sess.nodename, sess.dbname, + sess.username)) + self.displayOneStatItem("Session CPU time", + sess.session_cpu_time) + self.displayOneStatItem("Database CPU time", + sess.session_db_cpu_time) + self.displayOneStatItem("Session CPU time %", + sess.session_cpu_percent) + + # show session Memory statistics + self.writeOutput("\nSession Memory statistics:") + for i in range(0, len(self.session_mem_stat)): + sess = self.session_mem_stat[i] + self.writeOutput("%d %s-%s-%s:" % + ( + i + 1, sess.nodename, sess.dbname, + sess.username)) + self.displayOneStatItem("Buffer Reads", sess.session_buffer_reads) + self.displayOneStatItem("Shared Buffer Hit ratio", + sess.session_buffer_hit_ratio) + self.displayOneStatItem("In Memory sorts", + sess.session_in_memory_sorts) + self.displayOneStatItem("In Disk sorts", + sess.session_in_disk_sorts) + self.displayOneStatItem("In Memory sorts ratio", + sess.session_in_memory_sorts_ratio) + self.displayOneStatItem("Total Memory Size", + sess.session_total_memory_size) + self.displayOneStatItem("Used Memory Size", + sess.session_used_memory_size) + + # show session IO statistics + self.writeOutput("\nSession IO statistics:") + for i in range(0, len(self.session_io_stat)): + sess = self.session_io_stat[i] + self.writeOutput("%d %s-%s-%s:" % + ( + i + 1, sess.nodename, sess.dbname, + sess.username)) + self.displayOneStatItem("Physical Reads", + sess.session_physical_reads) + self.displayOneStatItem("Read Time", sess.session_read_time) + + +if __name__ == '__main__': + + import getopt + + sys.path.append(sys.path[0] + "/../../") + from gspylib.common.GaussLog import GaussLog + + + def usage(): + """ +Usage: + python3 GaussStat.py -p installpath -u user -c ip:port [-f output] [-d] + [-l log] + +options: + -p Install path. + -u Database user name. + -c Host information. + -d --detail Show the detail info about + performance check. + -l --logpath=logfile The log file of operation. + -h --help Show this help, then exit. + """ + print(usage.__doc__) + + + # parse parameters from command line + try: + (opts, args) = getopt.getopt(sys.argv[1:], "t:p:u:c:l:dh", + ["logpath=", "detail", "help", "dws-mode", + "curr-time=", "last-time=", + "snapshot-id=", "flag-num=", + "master-host=", "database-size=", + "abnormal-CN="]) + except Exception as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % + str(args[0])) + + # state variable + installPath = "" + user = "" + logFile = "" + localPort = [] + detail = False + currTime = "" + lastTime = "" + snapshotId = "" + flagNum = 0 + masterHost = "" + action = "" + dwsFlag = False + database_size = 0 + abnormalCN = [] + + # get parameter value + for (key, value) in opts: + if (key == "-h" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-t"): + action = value.strip() + elif (key == "-p"): + installPath = value.strip() + elif (key == "-u"): + user = value.strip() + elif (key == "-c"): + localPort = value.strip() + elif (key == "-l" or key == "--logpath"): + logFile = value.strip() + elif (key == "-d" or key == "--detail"): + detail = True + elif (key == "--curr-time"): + currTime = value.strip() + elif (key == "--last-time"): + lastTime = value.strip() + elif (key == "--snapshot-id"): + snapshotId = value.strip() + elif (key == "--flag-num"): + flagNum = value + elif (key == "--master-host"): + masterHost = value.strip() + elif (key == "--dws-mode"): + dwsFlag = True + elif (key == "--database-size"): + database_size = int(value) + elif (key == "--abnormal-CN"): + abnormalCN = str(value).split(",") + + # judge if install path exists or user or local port is none + if (not os.path.exists(installPath) or user == "" or localPort == ""): + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % + "p or -u or -c" + ".") + + # get log file + if (logFile == ""): + logFile = "%s/om/%s" % (DefaultValue.getUserLogDirWithUser(user), + DefaultValue.LOCAL_LOG_FILE) + + # initialize log + logger = GaussLog(logFile, "GaussStat") + + try: + g_clusterInfo = dbClusterInfo() + # Init cluster from static configuration file + g_clusterInfo.initFromStaticConfig(user) + localNodeInfo = g_clusterInfo.getDbNodeByName( + DefaultValue.GetHostIpOrName()) + security_mode_value = DefaultValue.getSecurityMode() + if (security_mode_value == "on"): + g_DWS_mode = True + stat = GaussStat(installPath, user, localPort, currTime, lastTime, + snapshotId, int(flagNum), masterHost, logger, + detail) + if (action == ACTION_INSTALL_PMK): + # install PMK shcema + stat.installPMKSchema() + elif (action == ACTION_COLLECT_STAT): + # collect performance statistics + stat.collect() + elif (action == ACTION_DISPLAY_STAT): + # display performance statistics + stat.display() + elif (action == ACTION_ASYN_COLLECT): + # asyn collect database size + stat.asynCollectDatabaseSize(localPort) + except Exception as e: + logger.logExit(str(e)) + # close log file + logger.closeLog() diff --git a/script/gspylib/common/LocalBaseOM.py b/script/gspylib/common/LocalBaseOM.py new file mode 100644 index 0000000..7804633 --- /dev/null +++ b/script/gspylib/common/LocalBaseOM.py @@ -0,0 +1,166 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +import sys +import os + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.Common import DefaultValue +from gspylib.common.VersionInfo import VersionInfo +from gspylib.os.gsOSlib import g_OSlib +from gspylib.common.ErrorCode import ErrorCode +from gspylib.component.Kernel.DN_OLAP.DN_OLAP import DN_OLAP + + +class LocalBaseOM(object): + """ + Base class for local command + """ + + def __init__(self, logFile=None, user=None, clusterConf=None, + dwsMode=False, initParas=None, gtmInitParas=None): + ''' + Constructor + ''' + if (logFile is not None): + self.logger = GaussLog(logFile, self.__class__.__name__) + else: + self.logger = None + self.clusterInfo = None + self.dbNodeInfo = None + self.clusterConfig = clusterConf + self.user = user + self.group = "" + self.dws_mode = dwsMode + if initParas is None: + initParas = [] + self.initParas = initParas + if gtmInitParas is None: + gtmInitParas = [] + self.gtmInitParas = gtmInitParas + self.etcdCons = [] + self.cmCons = [] + self.gtmCons = [] + self.cnCons = [] + self.dnCons = [] + self.gtsCons = [] + + def initComponent(self): + """ + function: Init component + input : NA + output: NA + """ + self.initKernelComponent() + + def initComponentAttributes(self, component): + """ + function: Init component attributes on current node + input : Object component + output: NA + """ + component.logger = self.logger + component.binPath = "%s/bin" % self.clusterInfo.appPath + component.dwsMode = self.dws_mode + + def initKernelComponent(self): + """ + function: Init kernel component on current node + input : Object nodeInfo + output: NA + """ + for inst in self.dbNodeInfo.datanodes: + component = DN_OLAP() + # init component cluster type + component.clusterType = self.clusterInfo.clusterType + component.instInfo = inst + component.instInfo.peerInstanceInfos = \ + self.clusterInfo.getPeerInstance(component.instInfo) + self.initComponentAttributes(component) + component.initParas = self.initParas + self.dnCons.append(component) + + def readConfigInfo(self): + """ + function: Read config from static config file + input : NA + output: NA + """ + try: + self.clusterInfo = dbClusterInfo() + hostName = DefaultValue.GetHostIpOrName() + dynamicFileExist = False + if self.__class__.__name__ == "Start": + dynamicFileExist = \ + self.clusterInfo.dynamicConfigExists(self.user) + if dynamicFileExist: + self.clusterInfo.readDynamicConfig(self.user) + self.dbNodeInfo = self.clusterInfo.getDbNodeByName(hostName) + else: + self.clusterInfo.initFromStaticConfig(self.user) + self.dbNodeInfo = self.clusterInfo.getDbNodeByName(hostName) + if self.dbNodeInfo is None: + self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51619"] % + hostName) + except Exception as e: + self.logger.logExit(str(e)) + + self.logger.debug("Instance information on local node:\n%s" % + str(self.dbNodeInfo)) + + def readConfigInfoByXML(self): + """ + function: Read config from xml config file + input : NA + output: NA + """ + try: + if (self.clusterConfig is None): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] % + "XML configuration file") + static_config_file = "%s/bin/cluster_static_config" % \ + DefaultValue.getInstallDir(self.user) + self.clusterInfo = dbClusterInfo() + self.clusterInfo.initFromXml(self.clusterConfig, + static_config_file) + hostName = DefaultValue.GetHostIpOrName() + self.dbNodeInfo = self.clusterInfo.getDbNodeByName(hostName) + if (self.dbNodeInfo is None): + self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51619"] % + hostName) + except Exception as e: + self.logger.logExit(str(e)) + self.logger.debug("Instance information on local node:\n%s" % + str(self.dbNodeInfo)) + + def getUserInfo(self): + """ + Get user and group + """ + if os.path.islink(self.clusterInfo.appPath): + appPath = os.path.realpath(self.clusterInfo.appPath) + elif os.path.exists(self.clusterInfo.appPath): + appPath = self.clusterInfo.appPath + else: + commitid = VersionInfo.getCommitid() + appPath = self.clusterInfo.appPath + "_" + commitid + self.logger.debug("Get the install path %s user info." % appPath) + (self.user, self.group) = g_OSlib.getPathOwner(appPath) + if (self.user == "" or self.group == ""): + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50308"]) diff --git a/script/gspylib/common/OMCommand.py b/script/gspylib/common/OMCommand.py new file mode 100644 index 0000000..c0610b7 --- /dev/null +++ b/script/gspylib/common/OMCommand.py @@ -0,0 +1,376 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : OMCommand.py is utility to execute the OM command +############################################################################# +import os +import sys +import time +import re +import subprocess +from multiprocessing.dummy import Pool as ThreadPool + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.Common import DefaultValue, ClusterCommand, \ + TempfileManagement +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsplatform import g_Platform + + +class OMCommand(): + """ + Descript command of om + """ + + def __init__(self): + ''' + Constructor + ''' + + @staticmethod + def getLocalScript(script): + """ + function: get local script by GPHOME + input : script, path + output: path + """ + Current_Path = os.path.dirname(os.path.realpath(__file__)) + + if os.getgid() != 0: + gp_home = DefaultValue.getEnv("GPHOME") + Current_Path = os.path.join(gp_home, "script/gspylib/common") + + LocalScript = { + "Local_Backup": os.path.normpath( + Current_Path + "/../../local/Backup.py"), + "Local_Check_Config": os.path.normpath( + Current_Path + "/../../local/CheckConfig.py"), + "Local_Check_Install": os.path.normpath( + Current_Path + "/../../local/CheckInstall.py"), + "Local_Check_Uninstall": os.path.normpath( + Current_Path + "/../../local/CheckUninstall.py"), + "Local_Clean_Instance": os.path.normpath( + Current_Path + "/../../local/CleanInstance.py"), + "Local_Clean_OsUser": os.path.normpath( + Current_Path + "/../../local/CleanOsUser.py"), + "Local_Config_Hba": os.path.normpath( + Current_Path + "/../../local/ConfigHba.py"), + "Local_Config_Instance": os.path.normpath( + Current_Path + "/../../local/ConfigInstance.py"), + "Local_Init_Instance": os.path.normpath( + Current_Path + "/../../local/InitInstance.py"), + "Local_Install": os.path.normpath( + Current_Path + "/../../local/Install.py"), + "Local_Restore": os.path.normpath( + Current_Path + "/../../local/Restore.py"), + "Local_Uninstall": os.path.normpath( + Current_Path + "/../../local/Uninstall.py"), + "Local_PreInstall": os.path.normpath( + Current_Path + "/../../local/PreInstallUtility.py"), + "Local_Check_PreInstall": os.path.normpath( + Current_Path + "/../../local/CheckPreInstall.py"), + "Local_UnPreInstall": os.path.normpath( + Current_Path + "/../../local/UnPreInstallUtility.py"), + "Local_Roach": os.path.normpath( + Current_Path + "/../../local/LocalRoach.py"), + "Gauss_UnInstall": os.path.normpath( + Current_Path + "/../../gs_uninstall"), + "Gauss_Backup": os.path.normpath( + Current_Path + "/../../gs_backup"), + "Local_CheckOS": os.path.normpath( + Current_Path + "/../../local/LocalCheckOS.py"), + "Local_Check": os.path.normpath( + Current_Path + "/../../local/LocalCheck.py"), + "LOCAL_PERFORMANCE_CHECK": os.path.normpath( + Current_Path + "/../../local/LocalPerformanceCheck.py"), + "Gauss_CheckOS": os.path.normpath( + Current_Path + "/../../gs_checkos"), + "Gauss_PreInstall": os.path.normpath( + Current_Path + "/../../gs_preinstall"), + "Gauss_Replace": os.path.normpath( + Current_Path + "/../../gs_replace"), + "Gauss_Om": os.path.normpath(Current_Path + "/../../gs_om"), + "UTIL_GAUSS_STAT": os.path.normpath( + Current_Path + "/../../gspylib/common/GaussStat.py"), + "Gauss_Check": os.path.normpath(Current_Path + "/../../gs_check"), + "Local_Collect": os.path.normpath( + Current_Path + "/../../local/LocalCollect.py"), + "Local_Kerberos": os.path.normpath( + Current_Path + "/../../local/KerberosUtility.py"), + "Local_Execute_Sql": os.path.normpath( + Current_Path + "/../../local/ExecuteSql.py"), + "Local_StartInstance": os.path.normpath( + Current_Path + "/../../local/StartInstance.py"), + "Local_StopInstance": os.path.normpath( + Current_Path + "/../../local/StopInstance.py"), + "Local_Check_Upgrade": os.path.normpath( + Current_Path + "/../../local/CheckUpgrade.py"), + "Local_Upgrade_Utility": os.path.normpath( + Current_Path + "/../../local/UpgradeUtility.py") + } + + return "python3 '%s'" % LocalScript[script] + + @staticmethod + def getSetCronCmd(user, appPath): + """ + function: Set the crontab + input : user, appPath + output: cmd + """ + log_path = DefaultValue.getOMLogPath(DefaultValue.OM_MONITOR_DIR_FILE, + "", appPath) + cronFile = "%s/gauss_cron_%d" % ( + DefaultValue.getTmpDirFromEnv(), os.getpid()) + cmd = "crontab -l > %s;" % cronFile + cmd += "sed -i '/\\/bin\\/om_monitor/d' %s; " % cronFile + cmd += "echo \"*/1 * * * * source /etc/profile;(if [ -f ~/.profile " \ + "];then source ~/.profile;fi);source ~/.bashrc;nohup " \ + "%s/bin/om_monitor -L %s >>/dev/null 2>&1 &\" >> %s;" % ( + appPath, log_path, cronFile) + cmd += "crontab -u %s %s;service cron restart;" % (user, cronFile) + cmd += "rm -f %s" % cronFile + + return cmd + + @staticmethod + def getRemoveCronCmd(user): + """ + function: get remove crontab command + input : user + output: cmd + """ + cmd = "crontab -u %s -r;service cron restart" % user + + return cmd + + @staticmethod + def adaptArchiveCommand(localInstDataDir, similarInstDataDir): + """ + function: Adapt guc parameter 'archive_command' for each new instance. + It will be invoked by GaussReplace.py and GaussDilatation.py + input : localInstDataDir, similarInstDataDir + output: NA + """ + GUC_PARAM_PATTERN = "^\\s*archive_command.*=.*$" + pattern = re.compile(GUC_PARAM_PATTERN) + archiveParaLine = "" + archiveDir = "%s/pg_xlog/archive" % localInstDataDir + archiveCommand = "" + try: + configFile = os.path.join(localInstDataDir, "postgresql.conf") + + with open(configFile, 'r') as fp: + resList = fp.readlines() + lineNum = 0 + for oneLine in resList: + lineNum += 1 + # skip blank line + if (oneLine.strip() == ""): + continue + # skip comment line + if ((oneLine.strip()).startswith('#')): + continue + # search valid line + result = pattern.match(oneLine) + if (result is not None): + # have adapt archive_command parameter + archiveParaLine = oneLine.replace(similarInstDataDir, + localInstDataDir) + archiveList = archiveParaLine.split('#') + if (len(archiveList) > 0): + archiveCommand = archiveList[0] + break + + if (archiveParaLine != ""): + if (archiveParaLine.find("%f") < 0): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50009"] + + " The parameter archive command should " + "be set with %%f : %s." % archiveCommand) + + if (archiveParaLine.find("%p") < 0): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50009"] + + " The parameter archive command should" + " be set with %%p: %s." % archiveCommand) + + setCmd = "sed -i \"%dc%s\" %s" % (lineNum, archiveParaLine, + configFile) + (status, output) = subprocess.getstatusoutput(setCmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % setCmd + " Error: \n%s" % output) + + if (os.path.exists(archiveDir) and os.path.isdir(archiveDir)): + return + + mkDirCmd = "mkdir -p '%s' -m %s" % ( + archiveDir, DefaultValue.KEY_DIRECTORY_MODE) + (status, output) = subprocess.getstatusoutput(mkDirCmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50208"] + % archiveDir + " Error: \n%s." % output + + "The cmd is %s" % mkDirCmd) + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def doCheckStaus(user, nodeId, cluster_normal_status=None, + expected_redistributing=""): + """ + function: Check cluster status + input : user, nodeId, cluster_normal_status, expected_redistributing + output: status, output + """ + try: + statusFile = "/home/%s/gauss_check_status_%d.dat" % ( + user, os.getpid()) + TempfileManagement.removeTempFile(statusFile) + cmd = ClusterCommand.getQueryStatusCmd(user, "", statusFile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + TempfileManagement.removeTempFile(statusFile) + return (status, output) + + clusterStatus = DbClusterStatus() + clusterStatus.initFromFile(statusFile) + TempfileManagement.removeTempFile(statusFile) + except Exception as e: + DefaultValue.cleanTmpFile(statusFile) + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51600"] + "Error: %s." % str(e)) + status = 0 + output = "" + statusRep = None + if nodeId > 0: + nodeStatus = clusterStatus.getDbNodeStatusById(nodeId) + if nodeStatus is None: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % nodeId) + + status = 0 if nodeStatus.isNodeHealthy() else 1 + statusRep = nodeStatus.getNodeStatusReport() + else: + status = 0 if clusterStatus.isAllHealthy(cluster_normal_status) \ + and (clusterStatus.redistributing == + expected_redistributing or + expected_redistributing == "") else 1 + statusRep = clusterStatus.getClusterStatusReport() + output += "cluster_state : %s\n" % clusterStatus.clusterStatus + output += "redistributing : %s\n" % clusterStatus.redistributing + output += "node_count : %d\n" % statusRep.nodeCount + output += "Datanode State\n" + output += " primary : %d\n" % statusRep.dnPrimary + output += " standby : %d\n" % statusRep.dnStandby + output += " secondary : %d\n" % statusRep.dnDummy + output += " building : %d\n" % statusRep.dnBuild + output += " abnormal : %d\n" % statusRep.dnAbnormal + output += " down : %d\n" % statusRep.dnDown + + return (status, output) + + @staticmethod + def getClusterStatus(user, isExpandScene=False): + """ + function: get cluster status + input : user + output: clusterStatus + """ + userAbsolutePath = g_Platform.getUserHomePath() + statusFile = "%s/gauss_check_status_%d.dat" % ( + userAbsolutePath, os.getpid()) + TempfileManagement.removeTempFile(statusFile) + cmd = ClusterCommand.getQueryStatusCmd(user, "", statusFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + TempfileManagement.removeTempFile(statusFile) + return None + clusterStatus = DbClusterStatus() + clusterStatus.initFromFile(statusFile, isExpandScene) + TempfileManagement.removeTempFile(statusFile) + return clusterStatus + + @staticmethod + def getClusterDbNodeInfo(clusterUser, xmlFile=""): + """ + function: get cluster and database node info from static config file + input : clusterUser, xmlFile + output: NA + """ + try: + clusterInfo = dbClusterInfo() + if (os.getuid() == 0): + clusterInfo.initFromXml(xmlFile) + else: + clusterInfo.initFromStaticConfig(clusterUser) + hostName = DefaultValue.GetHostIpOrName() + dbNodeInfo = clusterInfo.getDbNodeByName(hostName) + if (dbNodeInfo is None): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % hostName) + return clusterInfo, dbNodeInfo + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def checkHostname(nodename): + """ + function: check host name + input : NA + output: NA + """ + try: + retry = 1 + cmd = "pssh -s -H %s hostname" % (nodename) + while True: + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and output.find("%s" % nodename) >= 0: + break + if retry >= 3: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51222"] + + " Command: \"%s\". Error: \n%s" + % (cmd, output)) + retry += 1 + time.sleep(1) + + hostnameCmd = "pssh -s -H %s 'cat /etc/hostname'" % (nodename) + (status, output) = subprocess.getstatusoutput(hostnameCmd) + if status == 0 and output.strip() == nodename: + pass + else: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51248"] % nodename + + " Command: \"%s\". Error: \n%s" + % (hostnameCmd, output)) + + except Exception as e: + raise Exception(str(e)) + + @staticmethod + def checkHostnameMapping(clusterInfo, logFile): + """ + function: check host name mapping + input: NA + output: NA + """ + nodes = clusterInfo.getClusterNodeNames() + if (len(nodes) > 0): + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(OMCommand.checkHostname, nodes) + pool.close() + pool.join() + except Exception as e: + raise Exception(str(e)) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py new file mode 100644 index 0000000..a4c3d64 --- /dev/null +++ b/script/gspylib/common/ParallelBaseOM.py @@ -0,0 +1,945 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import sys +import time +import signal +import copy +import subprocess +import re +import getpass +from datetime import datetime, timedelta +from multiprocessing.dummy import Pool as ThreadPool + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.Common import DefaultValue, ClusterCommand, \ + TempfileManagement +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.common.OMCommand import OMCommand +from gspylib.os.gsfile import g_file +from gspylib.os.gsplatform import g_Platform +from gspylib.threads.SshTool import SshTool +from gspylib.common.ErrorCode import ErrorCode +from gspylib.component.Kernel.DN_OLAP.DN_OLAP import DN_OLAP + +SPACE_USAGE_DBUSER = 80 + + +class ParallelBaseOM(object): + """ + Base class of parallel command + """ + ACTION_INSTALL = "install" + ACTION_CONFIG = "config" + ACTION_START = "start" + ACTION_REDISTRIBUTE = "redistribute" + ACTION_HEALTHCHECK = "healthcheck" + + HEALTH_CHECK_BEFORE = "before" + HEALTH_CHECK_AFTER = "after" + """ + Base class for parallel command + """ + + def __init__(self): + ''' + Constructor + ''' + self.logger = None + self.clusterInfo = None + self.oldClusterInfo = None + self.sshTool = None + self.action = "" + + # Cluster config file. + self.xmlFile = "" + self.oldXmlFile = "" + + self.logType = DefaultValue.LOCAL_LOG_FILE + self.logFile = "" + self.localLog = "" + self.user = "" + self.group = "" + self.mpprcFile = "" + # Temporary catalog for install + self.operateStepDir = TempfileManagement.getTempDir( + "%s_step" % self.__class__.__name__.lower()) + # Temporary files for install step + self.operateStepFile = "%s/%s_step.dat" % ( + self.operateStepDir, self.__class__.__name__.lower()) + self.initStep = "" + self.dws_mode = False + self.rollbackCommands = [] + self.etcdCons = [] + self.cmCons = [] + self.gtmCons = [] + self.cnCons = [] + self.dnCons = [] + # localMode is same as isSingle in all OM script, expect for + # gs_preinstall. + # in gs_preinstall, localMode means local mode for master-standby + # cluster. + # in gs_preinstall, localMode also means local mode for single + # cluster(will not create os user). + # in gs_preinstall, isSingle means single cluster, it will create + # os user. + # not isSingle and not localMode : master-standby cluster global + # mode(will create os user). + # not isSingle and localMode : master-standby cluster local + # mode(will not create os user). + # isSingle and not localMode : single cluster(will create os user). + # isSingle and localMode : single cluster(will not create os user). + self.localMode = False + self.isSingle = False + # Indicates whether there is a logical cluster. + # If elastic_group exists, the current cluster is a logical cluster. + # Otherwise, it is a large physical cluster. + self.isElasticGroup = False + self.isAddElasticGroup = False + self.lcGroup_name = "" + # Lock the cluster mode, there are two modes: exclusive lock and + # wait lock mode, + # the default exclusive lock + self.lockMode = "exclusiveLock" + + # SinglePrimaryMultiStandby support binary upgrade, inplace upgrade + self.isSinglePrimaryMultiStandby = False + + # Adapt to 200 and 300 + self.productVersion = None + + def initComponent(self): + """ + function: Init component + input : NA + output: NA + """ + for nodeInfo in self.clusterInfo.dbNodes: + self.initKernelComponent(nodeInfo) + + def initComponentAttributes(self, component): + """ + function: Init component attributes on current node + input : Object component + output: NA + """ + component.logger = self.logger + component.binPath = "%s/bin" % self.clusterInfo.appPath + component.dwsMode = self.dws_mode + + def initKernelComponent(self, nodeInfo): + """ + function: Init kernel component + input : Object nodeInfo + output: NA + """ + for inst in nodeInfo.datanodes: + component = DN_OLAP() + # init component cluster type + component.clusterType = self.clusterInfo.clusterType + component.instInfo = inst + self.initComponentAttributes(component) + self.dnCons.append(component) + + def initLogger(self, module=""): + """ + function: Init logger + input : module + output: NA + """ + # log level + LOG_DEBUG = 1 + self.logger = GaussLog(self.logFile, module, LOG_DEBUG) + + dirName = os.path.dirname(self.logFile) + self.localLog = os.path.join(dirName, DefaultValue.LOCAL_LOG_FILE) + + def initClusterInfo(self, refreshCN=True): + """ + function: Init cluster info + input : NA + output: NA + """ + try: + self.clusterInfo = dbClusterInfo() + if (refreshCN): + static_config_file = "%s/bin/cluster_static_config" % \ + DefaultValue.getInstallDir(self.user) + self.clusterInfo.initFromXml(self.xmlFile, static_config_file) + else: + self.clusterInfo.initFromXml(self.xmlFile) + except Exception as e: + raise Exception(str(e)) + self.logger.debug("Instance information of cluster:\n%s." % + str(self.clusterInfo)) + + def initClusterInfoFromStaticFile(self, user, flag=True): + """ + function: Function to init clusterInfo from static file + input : user + output: NA + """ + try: + self.clusterInfo = dbClusterInfo() + self.clusterInfo.initFromStaticConfig(user) + except Exception as e: + raise Exception(str(e)) + if flag: + self.logger.debug("Instance information of cluster:\n%s." % + str(self.clusterInfo)) + + def initSshTool(self, nodeNames, timeout=0): + """ + function: Init ssh tool + input : nodeNames, timeout + output: NA + """ + self.sshTool = SshTool(nodeNames, self.logger.logFile, timeout) + + def check_cluster_version_consistency(self, clusterNodes, newNodes=None): + """ + """ + self.logger.log("Check cluster version consistency.") + if newNodes is None: + newNodes = [] + dic_version_info = {} + # check version.cfg on every node. + gp_home = DefaultValue.getEnv("GPHOME") + gauss_home = DefaultValue.getEnv("GAUSSHOME") + if not (os.path.exists(gp_home) and os.path.exists(gauss_home)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] % + ("%s", "or %s") % (gp_home, gauss_home)) + for ip in clusterNodes: + if ip in newNodes: + cmd = "pssh -s -H %s 'cat %s/version.cfg'" % \ + (ip, DefaultValue.getEnv("GPHOME")) + else: + cmd = "pssh -s -H %s 'cat %s/bin/upgrade_version'" % \ + (ip, DefaultValue.getEnv("GAUSSHOME")) + status, output = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % str(output)) + if len(output.strip().split()) < 3: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51623"]) + dic_version_info[ip] = ",".join(output.strip().split()[1:]) + + self.logger.debug("The cluster version on every node.") + for check_ip, version_info in dic_version_info.items(): + self.logger.debug("%s : %s" % (check_ip, version_info)) + if len(set(dic_version_info.values())) != 1: + L_inconsistent = list(set(dic_version_info.values())) + self.logger.debug("The package version on some nodes are " + "inconsistent\n%s" % str(L_inconsistent)) + raise Exception("The package version on some nodes are " + "inconsistent,%s" % str(L_inconsistent)) + self.logger.log("Successfully checked cluster version.") + + def checkBaseFile(self, checkXml=True): + """ + function: Check xml file and log file + input : checkXml + output: NA + """ + if (checkXml): + if (self.xmlFile == ""): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".") + + if (not os.path.exists(self.xmlFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + ("configuration file [%s]" % self.xmlFile)) + + if (not os.path.isabs(self.xmlFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % + ("configuration file [%s]" % self.xmlFile)) + else: + self.xmlFile = "" + + if (self.logFile == ""): + self.logFile = DefaultValue.getOMLogPath(self.logType, + self.user, "", + self.xmlFile) + + if (not os.path.isabs(self.logFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + def initSignalHandler(self): + """ + function: Function to init signal handler + input : NA + output: NA + """ + signal.signal(signal.SIGINT, signal.SIG_IGN) + signal.signal(signal.SIGQUIT, signal.SIG_IGN) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + signal.signal(signal.SIGALRM, signal.SIG_IGN) + signal.signal(signal.SIGHUP, signal.SIG_IGN) + signal.signal(signal.SIGUSR1, signal.SIG_IGN) + signal.signal(signal.SIGUSR2, signal.SIG_IGN) + + def print_signal_stack(self, frame): + """ + function: Function to print signal stack + input : frame + output: NA + """ + if (self.logger is None): + return + try: + import inspect + stacks = inspect.getouterframes(frame) + for curr in range(len(stacks)): + stack = stacks[curr] + self.logger.debug("Stack level: %d. File: %s. Function: " + "%s. LineNo: %d." % ( + curr, stack[1], stack[3], + stack[2])) + self.logger.debug("Code: %s." % ( + stack[4][0].strip().strip("\n"))) + except Exception as e: + self.logger.debug("Failed to print signal stack. Error: \n%s" % + str(e)) + + def raise_handler(self, signal_num, frame): + """ + function: Function to raise handler + input : signal_num, frame + output: NA + """ + if (self.logger is not None): + self.logger.debug("Received signal[%d]." % (signal_num)) + self.print_signal_stack(frame) + raise Exception(ErrorCode.GAUSS_516["GAUSS_51614"] % (signal_num)) + + def setupTimeoutHandler(self): + """ + function: Function to set up time out handler + input : NA + output: NA + """ + signal.signal(signal.SIGALRM, self.timeout_handler) + + def setTimer(self, timeout): + """ + function: Function to set timer + input : timeout + output: NA + """ + self.logger.debug("Set timer. The timeout: %d." % timeout) + signal.signal(signal.SIGALRM, self.timeout_handler) + signal.alarm(timeout) + + def resetTimer(self): + """ + function: Reset timer + input : NA + output: NA + """ + signal.signal(signal.SIGALRM, signal.SIG_IGN) + self.logger.debug("Reset timer. Left time: %d." % signal.alarm(0)) + + def timeout_handler(self, signal_num, frame): + """ + function: Received the timeout signal + input : signal_num, frame + output: NA + """ + if (self.logger is not None): + self.logger.debug("Received the timeout signal: [%d]." % + (signal_num)) + self.print_signal_stack(frame) + raise Timeout("Time out.") + + def waitProcessStop(self, processKeywords, hostname): + """ + function: Wait the process stop + input : process name + output: NA + """ + count = 0 + while (True): + psCmd = "ps ux|grep -v grep |awk '{print \$11}'|grep '%s' " % \ + processKeywords.strip() + (status, output) = self.sshTool.getSshStatusOutput( + psCmd, [hostname]) + # Determine whether the process can be found. + if (status[hostname] != DefaultValue.SUCCESS): + self.logger.debug("The %s process stopped." % processKeywords) + break + + count += 1 + if (count % 20 == 0): + self.logger.debug("The %s process exists." % processKeywords) + time.sleep(3) + + def managerOperateStepDir(self, action='create', nodes=None): + """ + function: manager operate step directory + input : NA + output: currentStep + """ + if nodes is None: + nodes = [] + try: + # Creating the backup directory + if (action == "create"): + cmd = "(if [ ! -d '%s' ];then mkdir -p '%s' -m %s;fi)" % ( + self.operateStepDir, self.operateStepDir, + DefaultValue.KEY_DIRECTORY_MODE) + else: + cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % ( + self.operateStepDir, self.operateStepDir) + DefaultValue.execCommandWithMode(cmd, + "%s temporary directory" % action, + self.sshTool, + self.localMode or self.isSingle, + "", + nodes) + except Exception as e: + raise Exception(str(e)) + + def readOperateStep(self): + """ + function: read operate step signal + input : NA + output: currentStep + """ + currentStep = self.initStep + + if not os.path.exists(self.operateStepFile): + self.logger.debug("The %s does not exits." % self.operateStepFile) + return currentStep + + if not os.path.isfile(self.operateStepFile): + self.logger.debug("The %s must be a file." % self.operateStepFile) + return currentStep + + with open(self.operateStepFile, "r") as fp: + line = fp.readline().strip() + if line is not None and line != "": + currentStep = line + + return currentStep + + def writeOperateStep(self, stepName, nodes=None): + """ + function: write operate step signal + input : step + output: NA + """ + if nodes is None: + nodes = [] + try: + # write the step into INSTALL_STEP + # open the INSTALL_STEP + with open(self.operateStepFile, "w") as g_DB: + # write the INSTALL_STEP + g_DB.write(stepName) + g_DB.write(os.linesep) + g_DB.flush() + # change the INSTALL_STEP permissions + g_file.changeMode(DefaultValue.KEY_FILE_MODE, self.operateStepFile) + + # distribute file to all nodes + cmd = "mkdir -p -m %s '%s'" % (DefaultValue.KEY_DIRECTORY_MODE, + self.operateStepDir) + DefaultValue.execCommandWithMode(cmd, + "create backup directory " + "on all nodes", + self.sshTool, + self.localMode or self.isSingle, + "", + nodes) + + if not self.localMode and not self.isSingle: + self.sshTool.scpFiles(self.operateStepFile, + self.operateStepDir, nodes) + except Exception as e: + # failed to write the step into INSTALL_STEP + raise Exception(str(e)) + + def distributeFiles(self): + """ + function: distribute package to every host + input : NA + output: NA + """ + self.logger.debug("Distributing files.") + try: + # get the all nodes + hosts = self.clusterInfo.getClusterNodeNames() + if DefaultValue.GetHostIpOrName() not in hosts: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % + DefaultValue.GetHostIpOrName()) + hosts.remove(DefaultValue.GetHostIpOrName()) + # Send xml file to every host + DefaultValue.distributeXmlConfFile(self.sshTool, self.xmlFile, + hosts, self.mpprcFile) + # Successfully distributed files + self.logger.debug("Successfully distributed files.") + except Exception as e: + # failed to distribute package to every host + raise Exception(str(e)) + + def checkPreInstall(self, user, flag, nodes=None): + """ + function: check if have done preinstall on given nodes + input : user, nodes + output: NA + """ + if nodes is None: + nodes = [] + try: + cmd = "%s -U %s -t %s" % ( + OMCommand.getLocalScript("Local_Check_PreInstall"), user, flag) + DefaultValue.execCommandWithMode( + cmd, "check preinstall", self.sshTool, + self.localMode or self.isSingle, "", nodes) + except Exception as e: + raise Exception(str(e)) + + def checkNodeInstall(self, nodes=None, checkParams=None, + strictUserCheck=True): + """ + function: Check node install + input : nodes, checkParams, strictUserCheck + output: NA + """ + if nodes is None: + nodes = [] + if checkParams is None: + checkParams = [] + validParam = ["shared_buffers", "max_connections"] + cooGucParam = "" + for param in checkParams: + entry = param.split("=") + if (len(entry) != 2): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50009"]) + if (entry[0].strip() in validParam): + cooGucParam += " -C \\\"%s\\\"" % param + self.logger.log("Checking installation environment on all nodes.") + cmd = "%s -U %s:%s -R %s %s -l %s -X '%s'" % ( + OMCommand.getLocalScript("Local_Check_Install"), self.user, + self.group, self.clusterInfo.appPath, cooGucParam, self.localLog, + self.xmlFile) + if (not strictUserCheck): + cmd += " -O" + self.logger.debug("Checking the install command: %s." % cmd) + DefaultValue.execCommandWithMode(cmd, + "check installation environment", + self.sshTool, + self.localMode or self.isSingle, + "", + nodes) + + def cleanNodeConfig(self, nodes=None, datadirs=None): + """ + function: Clean instance + input : nodes, datadirs + output: NA + """ + self.logger.log("Deleting instances from all nodes.") + if nodes is None: + nodes = [] + if datadirs is None: + datadirs = [] + cmdParam = "" + for datadir in datadirs: + cmdParam += " -D %s " % datadir + cmd = "%s -U %s %s -l %s" % ( + OMCommand.getLocalScript("Local_Clean_Instance"), + self.user, cmdParam, self.localLog) + DefaultValue.execCommandWithMode( + cmd, "clean instance", self.sshTool, + self.localMode or self.isSingle, "", nodes) + self.logger.log("Successfully deleted instances from all nodes.") + + @staticmethod + def getPrepareKeysCmd(key_file, user, confFile, destPath, logfile, + userProfile="", localMode=False): + """ + function: get etcd communication keys command + input: key_file, user, confFile, destPath, localMode:do not scp keys + output: NA + """ + if (not os.path.exists(key_file)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % key_file) + if (not userProfile): + userProfile = DefaultValue.getMpprcFile() + # create the directory on all nodes + cmd = "source %s; %s -U %s -X %s --src-file=%s --dest-path=%s -l %s" \ + % (userProfile, OMCommand.getLocalScript("Local_PrepareKeys"), + user, confFile, key_file, destPath, logfile) + # if local mode, only prepare keys, do not scp keys to cluster nodes + if (localMode): + cmd += " -L" + return cmd + + def getClusterRings(self, clusterInfo): + """ + function: get clusterRings from cluster info + input: DbclusterInfo() instance + output: list + """ + hostPerNodeList = self.getDNHostnamesPerNode(clusterInfo) + # Loop the hostname list on each node where the master and slave + # of the DB instance. + for i in range(len(hostPerNodeList)): + # Loop the list after the i-th list + for perNodelist in hostPerNodeList[i + 1:len(hostPerNodeList)]: + # Define a tag + flag = 0 + # Loop the elements of each perNodelist + for hostNameElement in perNodelist: + # If elements on the i-th node, each element of the + # list are joined in hostPerNodeList[i + if hostNameElement in hostPerNodeList[i]: + flag = 1 + for element in perNodelist: + if element not in hostPerNodeList[i]: + hostPerNodeList[i].append(element) + if (flag == 1): + hostPerNodeList.remove(perNodelist) + + return hostPerNodeList + + def getDNHostnamesPerNode(self, clusterInfo): + """ + function: get DB hostnames per node + input: DbclusterInfo() instance + output: list + """ + hostPerNodeList = [] + for dbNode in clusterInfo.dbNodes: + nodeDnlist = [] + # loop per node + for dnInst in dbNode.datanodes: + if (dnInst.instanceType == DefaultValue.MASTER_INSTANCE): + if dnInst.hostname not in nodeDnlist: + nodeDnlist.append(dnInst.hostname) + # get other standby and dummy hostname + instances = clusterInfo.getPeerInstance(dnInst) + for inst in instances: + if inst.hostname not in nodeDnlist: + nodeDnlist.append(inst.hostname) + if nodeDnlist != []: + hostPerNodeList.append(nodeDnlist) + return hostPerNodeList + + # for olap function + def checkIsElasticGroupExist(self, dbNodes): + """ + function: Check if elastic_group exists. + input : NA + output: NA + """ + self.logger.debug("Checking if elastic group exists.") + + self.isElasticGroup = False + coorNode = [] + # traverse old nodes + for dbNode in dbNodes: + if (len(dbNode.coordinators) >= 1): + coorNode.append(dbNode.coordinators[0]) + break + + # check elastic group + CHECK_GROUP_SQL = "SELECT count(*) FROM pg_catalog.pgxc_group " \ + "WHERE group_name='elastic_group' " \ + "and group_kind='e'; " + (checkstatus, checkoutput) = ClusterCommand.remoteSQLCommand( + CHECK_GROUP_SQL, self.user, coorNode[0].hostname, coorNode[0].port) + if (checkstatus != 0 or not checkoutput.isdigit()): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + "node group" + " Error:\n%s" % str(checkoutput)) + elif (checkoutput.strip() == '1'): + self.isElasticGroup = True + elif (checkoutput.strip() == '0'): + self.isElasticGroup = False + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + "the number of node group") + + self.logger.debug("Successfully checked if elastic group exists.") + + def checkHostnameIsLoop(self, nodenameList): + """ + function: check if hostname is looped + input : NA + output: NA + """ + isRing = True + # 1.get ring information in the cluster + clusterRings = self.getClusterRings(self.clusterInfo) + nodeRing = "" + nodenameRings = [] + # 2.Check if the node is in the ring + for num in iter(clusterRings): + ringNodeList = [] + for nodename in nodenameList: + if (nodename in num): + ringNodeList.append(nodename) + if (len(ringNodeList) != 0 and len(ringNodeList) == + len(num)): + nodenameRings.append(ringNodeList) + if (len(ringNodeList) != 0 and len(ringNodeList) != + len(num)): + isRing = False + break + else: + continue + if not isRing: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % "h" + + " The hostname (%s) specified by the -h parameter " + "must be looped." % nodeRing) + return (clusterRings, nodenameRings) + + def getDNinstanceByNodeName(self, hostname, isMaster=True): + """ + function: Get the DB instance of the node based on the node name. + input : hostname + isMaster: get master DB instance + output: NA + """ + masterdnInsts = [] + standbydnInsts = [] + # notice + for dbNode in self.clusterInfo.dbNodes: + if (dbNode.name == hostname): + for dbInst in dbNode.datanodes: + # get master DB instance + if (dbInst.instanceType == DefaultValue.MASTER_INSTANCE): + masterdnInsts.append(dbInst) + # get standby or dummy DB instance + else: + standbydnInsts.append(dbInst) + + if (isMaster): + return masterdnInsts + else: + return standbydnInsts + + def getSQLResultList(self, sql, user, hostname, port, + database="postgres"): + """ + """ + (status, output) = ClusterCommand.remoteSQLCommand(sql, user, + hostname, port, + False, database) + if status != 0 or ClusterCommand.findErrorInSql(output): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + + " Error:\n%s" % str(output)) + # split the output string with '\n' + resultList = output.split("\n") + return resultList + + def getCooInst(self): + """ + function: get CN instance + input : NA + output: CN instance + """ + coorInst = [] + # get CN on nodes + for dbNode in self.clusterInfo.dbNodes: + if (len(dbNode.coordinators) >= 1): + coorInst.append(dbNode.coordinators[0]) + # check if contain CN on nodes + if (len(coorInst) == 0): + raise Exception(ErrorCode.GAUSS_526["GAUSS_52602"]) + else: + return coorInst + + def getGroupName(self, fieldName, fieldVaule): + """ + function: Get nodegroup name by field name and field vaule. + input : field name and field vaule + output: node group name + """ + # 1.get CN instance info from cluster + cooInst = self.getCooInst() + + # 2.obtain the node group + OBTAIN_SQL = "select group_name from pgxc_group where %s = %s; " % \ + (fieldName, fieldVaule) + # execute the sql command + (status, output) = ClusterCommand.remoteSQLCommand(OBTAIN_SQL, + self.user, + cooInst[0].hostname, + cooInst[0].port, + ignoreError=False) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + OBTAIN_SQL + " Error:\n%s" % str(output)) + + return output.strip() + + def killKernalSnapshotThread(self, dnInst): + """ + function: kill snapshot thread in Kernel, + avoid dead lock with redistribution) + input : NA + output: NA + """ + self.logger.debug("Stopping snapshot thread in database node Kernel.") + killSnapshotSQL = "select * from kill_snapshot();" + + (status, output) = ClusterCommand.remoteSQLCommand( + killSnapshotSQL, self.user, dnInst.hostname, dnInst.port, + False, DefaultValue.DEFAULT_DB_NAME) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + killSnapshotSQL + " Error:\n%s" % str(output)) + self.logger.debug("Successfully stopped snapshot " + "thread in database node Kernel.") + + def createServerCa(self, hostList=None): + """ + function: create grpc ca file + input : NA + output: NA + """ + self.logger.debug("Generating CA files.") + if hostList is None: + hostList = [] + appPath = DefaultValue.getInstallDir(self.user) + caPath = os.path.join(appPath, "share/sslcert/om") + self.logger.debug("The ca file dir is: %s." % caPath) + if (len(hostList) == 0): + for dbNode in self.clusterInfo.dbNodes: + hostList.append(dbNode.name) + # Create CA dir and prepare files for using. + self.logger.debug("Create CA file directory.") + try: + DefaultValue.createCADir(self.sshTool, caPath, hostList) + self.logger.debug("Add hostname to config file.") + DefaultValue.createServerCA(DefaultValue.SERVER_CA, caPath, + self.logger) + # Clean useless files, and change permission of ca file to 600. + DefaultValue.cleanServerCaDir(caPath) + self.logger.debug("Scp CA files to all nodes.") + except Exception as e: + certFile = caPath + "/demoCA/cacert.pem" + if os.path.exists(certFile): + g_file.removeFile(certFile) + DefaultValue.cleanServerCaDir(caPath) + raise Exception(str(e)) + if not self.isSingle: + # localhost no need scp files + for certFile in DefaultValue.SERVER_CERT_LIST: + scpFile = os.path.join(caPath, "%s" % certFile) + self.sshTool.scpFiles(scpFile, caPath, hostList) + self.logger.debug("Successfully generated server CA files.") + + def createGrpcCa(self, hostList=None): + """ + function: create grpc ca file + input : NA + output: NA + """ + self.logger.debug("Generating grpc CA files.") + if hostList is None: + hostList = [] + appPath = DefaultValue.getInstallDir(self.user) + caPath = os.path.join(appPath, "share/sslcert/grpc") + self.logger.debug("The ca file dir is: %s." % caPath) + if (len(hostList) == 0): + for dbNode in self.clusterInfo.dbNodes: + hostList.append(dbNode.name) + # Create CA dir and prepare files for using. + self.logger.debug("Create CA file directory.") + try: + DefaultValue.createCADir(self.sshTool, caPath, hostList) + self.logger.debug("Add hostname to config file.") + configPath = os.path.join(appPath, + "share/sslcert/grpc/openssl.cnf") + self.logger.debug("The ca file dir is: %s." % caPath) + # Add hostname to openssl.cnf file. + DefaultValue.changeOpenSslConf(configPath, hostList) + self.logger.debug("Generate CA files.") + DefaultValue.createCA(DefaultValue.GRPC_CA, caPath) + # Clean useless files, and change permission of ca file to 600. + DefaultValue.cleanCaDir(caPath) + self.logger.debug("Scp CA files to all nodes.") + except Exception as e: + certFile = caPath + "/demoCA/cacertnew.pem" + if os.path.exists(certFile): + g_file.removeFile(certFile) + DefaultValue.cleanCaDir(caPath) + raise Exception(str(e)) + for certFile in DefaultValue.GRPC_CERT_LIST: + scpFile = os.path.join(caPath, "%s" % certFile) + self.sshTool.scpFiles(scpFile, caPath, hostList) + self.logger.debug("Successfully generated grpc CA files.") + + def genCipherAndRandFile(self, hostList=None, initPwd=None): + self.logger.debug("Encrypting cipher and rand files.") + if hostList is None: + hostList = [] + appPath = DefaultValue.getInstallDir(self.user) + binPath = os.path.join(appPath, "bin") + retry = 0 + while True: + if not initPwd: + sshpwd = getpass.getpass("Please enter password for database:") + sshpwd_check = getpass.getpass("Please repeat for database:") + else: + sshpwd = sshpwd_check = initPwd + if sshpwd_check != sshpwd: + sshpwd = "" + sshpwd_check = "" + self.logger.error( + ErrorCode.GAUSS_503["GAUSS_50306"] % "database" + + "The two passwords are different, " + "please enter password again.") + else: + cmd = "%s/gs_guc encrypt -M server -K %s -D %s " % (binPath, + sshpwd, + binPath) + (status, output) = subprocess.getstatusoutput(cmd) + sshpwd = "" + sshpwd_check = "" + initPwd = "" + if status != 0: + self.logger.error( + ErrorCode.GAUSS_503["GAUSS_50322"] % "database" + + "Error:\n %s" % output) + else: + break + if retry >= 2: + raise Exception( + ErrorCode.GAUSS_503["GAUSS_50322"] % "database") + retry += 1 + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/server.key.cipher" % binPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/server.key.rand" % binPath) + if len(hostList) == 0: + for dbNode in self.clusterInfo.dbNodes: + hostList.append(dbNode.name) + if not self.isSingle: + # localhost no need scp files + for certFile in DefaultValue.BIN_CERT_LIST: + scpFile = os.path.join(binPath, "%s" % certFile) + self.sshTool.scpFiles(scpFile, binPath, hostList) + self.logger.debug("Successfully encrypted cipher and rand files.") + + +class Timeout(Exception): + pass diff --git a/script/gspylib/common/ParameterParsecheck.py b/script/gspylib/common/ParameterParsecheck.py new file mode 100644 index 0000000..294ecf7 --- /dev/null +++ b/script/gspylib/common/ParameterParsecheck.py @@ -0,0 +1,680 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : ParameterParsecheck.py is a utility to get Parameter +# information and check it. +############################################################################# +import os +import getopt +import sys + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue +from gspylib.common.GaussLog import GaussLog +from gspylib.os.gsfile import g_file +from gspylib.common.VersionInfo import VersionInfo + +PARAMETER_VALUEDICT = {} +PARAMETER_KEYLIST = [] +ParameterDict = {} +Itemstr = [] +skipItems = [] +user_passwd = [] +EnvParams = [] +DbInitParam = [] +DataGucParam = [] +NODE_NAME = [] + +# Add parameter: the logic cluster name +PARA_CHECK_LIST = ["-t", "-h", "-m", "--mode", + "-i", "-j", "-U", "-u", "-G", "-g", "--alarm-type", + "-n", "-g", + "-N", "--time-out", "--alarm-component", + "--parallel-jobs", '--redis-mode', "--ring-num", + "--virtual-ip", + "--nodeName", "--name", "--failure-limit"] +PATH_CHEKC_LIST = ["-M", "-o", "-f", "-X", "-P", "-s", "-R", "-Q", + "--position", "-B", + "--backupdir", "--sep-env-file", "-l", "--logpath", + "--backup-dir", + "--priority-tables", "--exclude-tables"] +VALUE_CHECK_LIST = ["|", ";", "&", "$", "<", ">", "`", "\\", "'", "\"", "{", + "}", "(", ")", + "[", "]", "~", "*", "?", "!", "\n"] + +# append ':' after short options if it required parameter +# append '=' after long options if it required parameter +# no child branch +gs_preinstall = ["-?", "--help", "-V", "--version", "-U:", "-G:", "-L", + "--skip-os-set", "-X:", + "--env-var=", "--sep-env-file=", "--skip-hostname-set", + "-l:", "--non-interactive"] +gs_install = ["-?", "--help", "-V", "--version", "-X:", "-l:", + "--gsinit-parameter=", "--dn-guc=", + "--time-out=", "--alarm-component="] +gs_uninstall = ["-?", "--help", "-V", "--version", "-l:", "-L", + "--delete-data"] +gs_postuninstall = ["-?", "--help", "-V", "--version", "--delete-user", + "--delete-group", "--clean-gphome", + "-U:", "-X:", "-l:", "-L"] +gs_check = ["-?", "--help", "-V", "--version", "-e:", "-i:", + "-U:", "-o:", "-l:", "-L", "--hosts=", + "--format=", "--cid=", "--disk-threshold=", + "--time-out=", "--routing=", "--skip-items=", + "--ShrinkNodes=", "--nodegroup-name=", + "--skip-root-items", "--set"] +gs_sshexkey = ["-?", "--help", "-V", "--version", + "-f:", "--skip-hostname-set", "-l:", "-h:", "-W:", "--no-deduplicate"] +gs_backup = ["-?", "--help", "-V", "--version", "--backup-dir=", + "--parameter", "--force", + "--binary", "--all", "-l:", "-h:", "-t:", "-X:"] +gs_collector = ["-?", "--help", "-V", "--version", "--begin-time=", + "--end-time=", + "--keyword=", "--speed-limit=", "-h:", "-f:", "-o:", + "-l:", "-C:"] +gs_checkperf = ["-?", "--help", "-V", "--version", "--detail", "-o:", + "-i:", "-l:", "-U:"] +gs_ssh = ["-?", "--help", "-V", "--version", "-c:"] +gs_checkos = ["-?", "--help", "-V", "--version", "-h:", "-f:", "-o:", + "-i:", "--detail", + "-l:", "-X:"] +gs_expansion = ["-?", "--help", "-V", "--version", "-U:", "-G:", "-L", + "-X:", "-h:", "--sep-env-file="] +gs_dropnode = ["-?", "--help", "-V", "--version", "-U:", "-G:", + "-h:", "--sep-env-file="] + +# gs_om child branch +gs_om_start = ["-t:", "-?", "--help", "-V", "--version", "-h:", "-I:", + "--time-out=", "--az=", "-l:", "--nodeId=", "-D:", + "--security-mode="] +gs_om_stop = ["-t:", "-?", "--help", "-V", "--version", "-h:", "-I:", "-m:", + "--az=", "-l:", "--mode=", "--nodeId=", "--time-out=", "-D:"] +gs_om_restart = ["-t:", "-?", "--help", "-V", "--version", "-h:", "-I:", + "--time-out=", "--az=", "-l:", "--nodeId=", "-D:", + "--security-mode="] +gs_om_view = ["-t:", "-?", "--help", "-V", "--version", "-o:", "-l:"] +gs_om_query = ["-t:", "-?", "--help", "-V", "--version", "-o:", "-l:"] +gs_om_status = ["-t:", "-?", "--help", "-V", "--version", "-h:", "-o:", + "--detail", "--all", "-l:"] +gs_om_generateconf = ["-t:", "-?", "--help", "-V", "--version", "-X:", + "--distribute", "-l:"] +gs_om_cert = ["-t:", "-?", "--help", "-V", "--version", "-L", "-l:", + "--cert-file=", "--rollback"] +gs_om_kerberos = ["-t:", "-?", "--help", "-V", "--version", "-m:", "-U:", + "-X:", "-l:", "--krb-server", "--krb-client"] +gs_sql_list = ["-t:", "-?", "--help", "-V", "--version", "-c:", + "--dbname=", "--dbuser=", "-W:"] +gs_start = ["-n:", "-?", "--help", "-V", "--version", "-t:", + "-D:"] +gs_stop = ["-n:", "-?", "--help", "-V", "--version", "-t:", + "-D:", "-m:"] +gs_om_refreshconf = ["-t:", "-?", "--help", "-V", "--version", "-l:"] +# gs_upgradectl child branch +# AP and TP are same +gs_upgradectl_chose_strategy = ["-t:", "-?", "--help", "-V", "--version", + "-l:"] +# auto-upgrade parameter lists +gs_upgradectl_auto_upgrade = ["-t:", "-?", "--help", "-V", "--version", "-l:", + "-X:", "--grey"] +# auto-rollback parameter lists +gs_upgradectl_auto_rollback = ["-t:", "-?", "--help", "-V", "--version", + "-l:", "-X:", "--force"] +# commit-upgrade parameter lists +gs_upgradectl_commit = ["-t:", "-?", "--help", "-V", "--version", "-l:", "-X:"] + +ParameterDict = {"preinstall": gs_preinstall, + "install": gs_install, + "uninstall": gs_uninstall, + "checkos": gs_checkos, + "checkperf": gs_checkperf, + "check": gs_check, + "auto_upgrade": gs_upgradectl_auto_upgrade, + "chose_strategy": gs_upgradectl_chose_strategy, + "commit_upgrade": gs_upgradectl_commit, + "auto_rollback": gs_upgradectl_auto_rollback, + "start": gs_om_start, + "stop": gs_om_stop, + "restart": gs_om_restart, + "status": gs_om_status, + "generateconf": gs_om_generateconf, + "cert": gs_om_cert, + "kerberos": gs_om_kerberos, + "sshexkey": gs_sshexkey, + "backup": gs_backup, + "collector": gs_collector, + "ssh": gs_ssh, + "postuninstall": gs_postuninstall, + "view": gs_om_view, + "query": gs_om_query, + "refreshconf": gs_om_refreshconf, + "expansion": gs_expansion, + "dropnode": gs_dropnode + } + +# List of scripts with the -t parameter +special_list = ["gs_om", "backup", "upgradectl"] + +# The -t parameter list +action_om = ["start", "stop", "status", "restart", "generateconf", "kerberos", + "cert", "view", "query", "refreshconf"] +action_upgradectl = ["chose-strategy", "auto-upgrade", "auto-rollback", + "commit-upgrade"] + + +class Parameter(): + ''' + get Parameter information and check it. + ''' + + def __init__(self): + ''' + ''' + self.action = "" + self.mode = "" + self.helpflag = False + self.versionflag = False + + def ParseParameterValue(self, module): + """ + function: parse the parameter value + input : parameter_list + output: options + """ + # get the parameter list + (shortParameter, longParameter) = self.getParseParameter(module) + + try: + paraList = sys.argv[1:] + for paraInfo in paraList: + if (paraInfo.startswith('--')): + isFlag = False + for longPara in longParameter: + if (paraInfo[2:].startswith(longPara.strip("="))): + isFlag = True + if (not isFlag): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % paraInfo) + # check delete parameter -h and -f, if specified lcname, + # not required -h or -f. + check_delete_name = False + for check_i in sys.argv[1:]: + if ("--name" in check_i): + check_delete_name = True + break + (opts, args) = getopt.getopt(sys.argv[1:], shortParameter, + longParameter) + except Exception as e: + s1 = str(e).split(" ") + option = s1[1] + if ("requires argument" in str(e)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % \ + option[1:] + " Error:\n%s" % str(e)) + elif ("not recognized" in str(e)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % option) + elif ("not a unique prefix" in str(e)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50006"] + % option) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + return opts + + def moveCrypto(self, module): + """ + function: Parse the parameter + input : parameter_list + output: PARAMETER_VALUEDICT + """ + if (module in ("preinstall", "sshexkey", "expansion")): + DefaultValue.doConfigForParamiko() + + def printVersionInfo(self): + """ + """ + if (self.versionflag): + print("%s %s" % (sys.argv[0].split("/")[-1], + VersionInfo.COMMON_VERSION)) + sys.exit(0) + + def ParameterCommandLine(self, module): + """ + function: Parse the parameter + input : parameter_list + output: PARAMETER_VALUEDICT + """ + # copy crypto + self.moveCrypto(module) + + # Determines whether help and version information is output + self.helpflag, self.versionflag = self.getHelpAndVersionStatus() + if (self.helpflag): + PARAMETER_VALUEDICT['helpFlag'] = self.helpflag + return PARAMETER_VALUEDICT + + # print version information + self.printVersionInfo() + + # Special handling of the -t parameter + self.getActionParameterValue(module) + + # get the parameter list + opts = self.ParseParameterValue(module) + + parameterNeedValue = {"-t": "action", + "-c": "cmd", + "-m": "Mode", + "--mode": "Mode", + # hotpatch name + "-n": "patch_name", + "-d": "destPath", + "-s": "sourcePath", + "-j": "jobs", + "-U": "user", + "-G": "group", + "-I": "instance_name", + "-e": "scenes", + "-C": "configFile", + "--format": "format", + "--cid": "cid", + "--routing": "routing", + "--ShrinkNodes": "shrinkNodes", + "--az": "az_name", + "--root-passwd": "rootPasswd", + "--alarm-type": "warningType", + "--alarm-server-addr": "warningserverip", + "--time-out": "time_out", "": "", + "--alarm-component": "alarm_component", + "--SSD-fault-time": "SSDFaultTime", + "--begin-time": "begintime", + "--end-time": "endtime", + "--keyword": "keyword", + "--redis-mode": "redismode", + "--failure-limit": "failure_limit", + "--virtual-ip": "virtual-ip", + "--master": "master", + "--standby": "standby", + "--disk-threshold": "disk-threshold", + "--target": "target", + "--name": "name", + "-N": "DSN", + "--type": "type", + "--remote-host": "remote_host", + "--remote-env-file": "remote_mpprcfile", + "--dilatation-mode": "dilatation_mode", + "--nodegroup-name": "nodegroup_name", + "--speed-limit": "speedLimit", + # add "resourcectl" for resource control + # in data redistribution + "--resource-level": "resource_level", + "-p": "port", + "--dn-port": "dn-port", + "--dn-ip": "dn-ip", + "--interval": "interval", + "--threshold": "threshold", + "--check-count": "check_count", + "--wait-count": "wait_count", + "--option": "option", + "--dbname": "dbname", + "--dbuser": "dbuser", + "--nodeId": "nodeId", + "--security-mode": "security_mode" + } + parameterNeedValue_keys = parameterNeedValue.keys() + + parameterIsBool = {"-L": "localMode", + "--set": "set", + "--skip-root-items": "skipRootItems", + "--non-interactive": "preMode", + "--skip-os-set": "skipOSSet", + "--skip-hostname-set": "skipHostnameSet", + "--no-deduplicate": "noDeduplicate", + "--reset": "reset", + "--parameter": "isParameter", + "--binary": "isBinary", + "--delete-data": "cleanInstance", + "--delete-user": "delete-user", + "--delete-group": "delete-group", + "--dws-mode": "dws-mode", + "--detail": "show_detail", + "--detail-all": "show_detail_all", + "--rollback": "rollback", + "--vacuum-full": "enable_vacuum", + "--fast-redis": "enable_fast", + "--distribute": "distribute", + "--build-redistb": "buildTable", + "--key-files": "key-files", + "--all": "all", + "--upgrade": "upgrade", + "--lcname-only": "lcname-only", + "--high-perform": "high-perform", + "--elastic-group": "elastic-group", + "--addto-elastic-group": "isAddElasticGroup", + "--express": "express", + "--checkdisk": "checkdisk", + "--inplace": "inplace", + "--continue": "continue", + "--force": "force", + "--grey": "grey", + "--agent-mode": "agentMode", + "--krb-server": "krb-server", + "--krb-client": "krb-client", + } + parameterIsBool_keys = parameterIsBool.keys() + + # Parameter assignment and return + for (key, value) in opts: + if (key in parameterNeedValue_keys): + PARAMETER_VALUEDICT[parameterNeedValue[key]] = value + elif (key in parameterIsBool_keys): + PARAMETER_VALUEDICT[parameterIsBool[key]] = True + elif (key == "-h"): + # Only obtain the last value of hostname + del NODE_NAME[:] + for node in value.strip().split(","): + if (node is not None and node != "" and ( + node not in NODE_NAME)): + NODE_NAME.append(node.strip()) + elif (key == "-W" or key == "--password"): + user_passwd.append(value) + elif (key == "-D"): + PARAMETER_VALUEDICT['dataDir'] = os.path.normpath(value) + elif (key == "-M"): + PARAMETER_VALUEDICT['cgroupMountDir'] = \ + os.path.realpath(value.strip()) + elif (key == "-o"): + PARAMETER_VALUEDICT['outFile'] = os.path.realpath(value) + if (module not in ["collector", "check"]): + self.createOutputDir(os.path.realpath(value)) + elif (key == "-i"): + for item in value.strip().split(","): + if item is not None and item != "" \ + and (item not in Itemstr): + Itemstr.append(item) + elif (key == "--skip-items"): + for item in value.strip().split(","): + if (item is not None and item != "" and ( + item not in skipItems)): + skipItems.append(item) + elif self.action != "license" and ( + key == "-f" or key == "--hosts"): + hostFile = self.checkPath(key, value) + PARAMETER_VALUEDICT['hostfile'] = os.path.realpath(hostFile) + elif (key == "-X"): + if (module != "uninstall"): + xmlFile = self.checkPath(key, value) + PARAMETER_VALUEDICT['confFile'] = os.path.realpath(xmlFile) + else: + xmlFile = str(value) + PARAMETER_VALUEDICT['confFile'] = os.path.realpath(xmlFile) + elif (key == "--env-var"): + EnvParams.append(value) + elif (key == "--sep-env-file"): + PARAMETER_VALUEDICT['mpprcFile'] = os.path.realpath(value) + elif (key == "--gsinit-parameter"): + DbInitParam.append(value) + elif (key == "--dn-guc"): + DataGucParam.append(value) + elif (key == "-l"): + PARAMETER_VALUEDICT['logFile'] = os.path.realpath(value) + elif (key == "--backup-dir"): + PARAMETER_VALUEDICT['backupDir'] = \ + os.path.realpath(value.strip()) + elif (key == "--all"): + PARAMETER_VALUEDICT['isParameter'] = True + PARAMETER_VALUEDICT['isBinary'] = True + elif (key == "--parallel-jobs"): + paralleljobs = self.checkParamternum(key, value) + PARAMETER_VALUEDICT['paralleljobs'] = paralleljobs + elif (key == "-g"): + nodesNum = self.checkParamternum(key, value) + PARAMETER_VALUEDICT['nodesNum'] = nodesNum + elif (key == "--ring-num"): + ringNumbers = self.checkParamternum(key, value) + PARAMETER_VALUEDICT['ringNumbers'] = ringNumbers + elif (key == "--cert-file"): + PARAMETER_VALUEDICT['cert-file'] = \ + os.path.realpath(value.strip()) + elif (key == "--priority-tables"): + PARAMETER_VALUEDICT['priority-tables'] = \ + os.path.realpath(value.strip()) + elif key == "--role": + PARAMETER_VALUEDICT['role'] = value.strip() + elif (key == "--exclude-tables"): + PARAMETER_VALUEDICT['exclude-tables'] = \ + os.path.realpath(value.strip()) + + # Only check / symbol for gs_lcct. + if key in ("--name", "--nodegroup-name"): + self.checkLcGroupName(key, value) + Parameter.checkParaVaild(key, value) + + parameterIsList = {"passwords": user_passwd, + "envparams": EnvParams, + "dbInitParams": DbInitParam, + "dataGucParams": DataGucParam, + "itemstr": Itemstr, + "skipItems": skipItems, + "nodename": NODE_NAME + } + parameterlenkeys = parameterIsList.keys() + for key in parameterlenkeys: + if (len(parameterIsList[key]) > 0): + PARAMETER_VALUEDICT[key] = parameterIsList[key] + return PARAMETER_VALUEDICT + + @staticmethod + def checkParaVaild(para, value): + """ + function: check para vaild + input : NA + output: NA + """ + for role in VALUE_CHECK_LIST: + if PARA_CHECK_LIST.__contains__(para): + if value.find(role) >= 0: + GaussLog.exitWithError(ErrorCode.GAUSS_500[ + "GAUSS_50011"] % \ + (para, + value) + " Invaild value: %s." % + role) + if PATH_CHEKC_LIST.__contains__(para): + if os.path.realpath(value).find(role) >= 0: + GaussLog.exitWithError(ErrorCode.GAUSS_500[ + "GAUSS_50011"] % \ + (para, value) + + " Invaild value: %s." % role) + + def checkLcGroupName(self, lcPara, lcGroupName): + """ + function: Check if the virtual cluster name is legal. + input : lcGroupName + output: NA + """ + import re + PATTERN = "^[a-zA-Z0-9_]{1,63}$" + pattern = re.compile(PATTERN) + result = pattern.match(lcGroupName) + if (result is None): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % lcPara[1:] + + " The name of the logical cluster does not " + "exceed 63 characters and can only contain " + "letters, numbers, and underscores.") + if (lcGroupName in ["group_version1", "group_version2", + "group_version3", + "elastic_group"]): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % lcPara[1:] + + " The name of the logical cluster cannot be " + "'group_version1' or 'group_version2' or " + "'group_version3' or 'elastic_group'.") + + def getHelpAndVersionStatus(self): + """ + function: get help and version information status + input : NA + output: helpflag, versionflag + """ + helpflag = False + versionflag = False + for parameter in sys.argv[1:]: + if (parameter == "-?" or parameter == "--help"): + helpflag = True + if (parameter == "-V" or parameter == "--version"): + versionflag = True + return helpflag, versionflag + + def getActionParameterValue(self, module): + """ + function: get the action value + input : parameter_list + output: NA + """ + actions = [] + getMode = False + if (module in special_list): + if (sys.argv[1:] == []): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50014"] + % module) + if (sys.argv[1:][-1] == "-t"): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % \ + "t" + " option -t requires argument.") + + for n, value in enumerate(sys.argv[1:]): + if (sys.argv[1:][n - 1] == "-t"): + actions.append(value) + if (len(actions) != 1): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50006"] % actions[0]) + self.action = value + + if self.action == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "t" + ".") + + if ((module == "gsom" and not self.action in action_om) + or (module == "upgradectl" + and not self.action in action_upgradectl)): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "t") + + def createOutputDir(self, path): + """ + function: create output directory + input : path + output: NA + """ + try: + DefaultValue.checkOutputFile(path) + except Exception as e: + GaussLog.exitWithError(str(e)) + dirName = os.path.dirname(os.path.realpath(path)) + if (not os.path.isdir(dirName)): + try: + os.makedirs(dirName, DefaultValue.DIRECTORY_PERMISSION) + except Exception as e: + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50206"] % \ + ("outputfile[%s]" % path) + + "Error:\n%s" % str(e)) + + def checkParamternum(self, key, value): + """ + function: Check some number parameters + input : key, value + output: numvalue + """ + try: + numvalue = int(value) + if (numvalue <= 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % \ + key[1:] + + " Parameter '%s' must be greater" + " than or equal to 1." % key) + except Exception as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] % \ + (key[1:], "integer") + + " Error:\n%s" % str(e)) + + return numvalue + + def checkPath(self, key, value): + """ + function: Check some path parameters + input : key, value + output: path + """ + # Check that the path parameter is a file + try: + if (not value): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % key[1:] + + "Error:\noption %s requires argument" + % key) + path = str(value) + g_file.checkFilePermission(path, True) + return path + except Exception as e: + GaussLog.exitWithError(str(e)) + + def getParseParameter(self, module): + """ + function: get parse parameters + input : parameter_list + output: shortPara,longPara + """ + + shortPara = "" + longPara = [] + var = "--" + + ParameterList = "" + if (module == "upgradectl"): + if (self.action == "chose-strategy"): + ParameterList = ParameterDict.get("chose_strategy") + elif (self.action == "auto-rollback"): + ParameterList = ParameterDict.get("auto_rollback") + elif (self.action == "auto-upgrade"): + ParameterList = ParameterDict.get("auto_upgrade") + elif (self.action == "commit-upgrade"): + ParameterList = ParameterDict.get("commit_upgrade") + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "t") + + elif (module == "gs_om"): + if (self.action in action_om): + ParameterList = ParameterDict.get(self.action) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "t") + else: + ParameterList = ParameterDict.get(module) + + for para in ParameterList: + if var in para: + varlong = para.strip("--") + longPara.append(varlong) + else: + varshort = para.strip("-") + shortPara += varshort + + return shortPara, longPara diff --git a/script/gspylib/common/SSDFaultInfo.py b/script/gspylib/common/SSDFaultInfo.py new file mode 100644 index 0000000..e84101f --- /dev/null +++ b/script/gspylib/common/SSDFaultInfo.py @@ -0,0 +1,204 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : OSSDFaultInfo.py is utility for SSD default info +############################################################################# +import sys + + +class SSDFaultInfo(): + def __init__(self): + pass + + Failurelibs = { + "0x11": ["hioerr", "Data recovery failure", + "GC and CM read, RAID recovery failed", + "data error and failed to recover", + "contact support for repairs"], + "0x2b": ["hioerr", "Read back failure after programming failure", + "write failure read back failure", + "4KB data error and failed to recover", + "contact support for repairs"], + "0x2e": ["hioerr", "No available reserved blocks", + "No available blocks", "result in timeout", + "contact support for repairs"], + "0x300": ["hioerr", "CMD timeout", "CMD timeout", + "CMD executive timeout", "contact support for repairs"], + "0x307": ["hioerr", "I/O error", "IO error", + "Read/write error, return by read and write return value", + "contact support for repairs"], + "0x30a": ["hioerr", "DDR init failed", "DDR initialization failure", + "Drive loading failed", "contact support for repairs"], + "0x30c": ["hioerr", "Controller reset sync hioerr", + "Controller reset status out of sync ", + "Drive loading failed", "contact support for repairs"], + "0x30d": ["hioerr", "Clock fault", "abnormals found in clock testing", + "If happened during the process of loading, it may cause " + "drive loading failed, during operation may cause SSD work " + "abnormally", + "contact support for repairs"], + "0x312": ["hiowarn", "CAP: voltage fault", + "Capacitance voltage alarming", + "cause power-fail protection failure, without affecting IO " + "functions", + "contact support for repairs"], + "0x313": ["hiowarn", "CAP: learn fault", + "Capacitance self learning error.", + "Unable to get accurate capacitance, may cause power fail " + "protection failure; without affecting IO functions", + "contact support for repairs"], + "0x314": ["hionote", "CAP status", + "Capacitance is in self learning status", "None", "Ignore"], + "0x31a": ["hiowarn", "CAP: short circuit", + "Capacitance voltage is zero, possibly short circuit", + "may cause power-fail protection failure; without " + "affecting IO functions", + "contact support for repairs"], + "0x31b": ["hiowarn", "Sensor fault", "Sensor access error", + "If occurred during the process of loading ,it may cause " + "drive loading failed; capacitor voltage could not be " + "monitored during operation", + "contact support for repairs"], + "0x39": ["hioerr", "Init: PBMT scan failure", + "initialization scanning PBMT read error", + "part of data mapping relationship missing, 64MB at most " + "can not find", + "contact support for repairs"], + "0x3b": ["hioerr", "Init: first page scan failure", + "initialization scan home page read error", + "part of data mapping relationship missing, 64MB at most " + "can not find (occur during initialization)", + "contact support for repairs"], + "0x3c": ["hioerr", "Init: scan unclosed block failure", + "Init:reset pointer, data page read error", + "4KB data mapping relationship missing, the 4KB data can " + "not be found", + "contact support for repairs"], + "0x40": ["hioerr", "Init: PMT recovery: data page read failure", + "Init: PMT recovery, data page read error", + "4KB data mapping relationship missing, the 4KB data can " + "not be found", + "contact support for repairs"], + "0x43": ["hioerr", "too many unclosed blocks", + "scan to the third unfulfilled block ", + "Split from original 0x3c scenario. Part of data mapping " + "relationship missing, 64MB at most can not find (occur " + "when initialization)", + "contact support for repairs"], + "0x45": ["hioerr", "Init: more than one PDW block found", + "PDW Initialization abnormal: found two and more than two " + "PWD", + "abnormal, may cause data missing", + "contact support for repairs"], + "0x47": ["hionote", "Init: PDW block not found", + "initialization abnormal: PDW is not found when " + "initialization", + "data may be incomplete", "contact support for repairs"], + "0x50": ["hioerr", "Cache: hit error data", "Cache hit data error", + "4KB data error and failed to recover", + "contact support for repairs"], + "0x51": ["hioerr", "Cache: read back failure", + "Cache completion and reading back error", + "4KB data error and failed to recover", + "contact support for repairs"], + "0x53": ["hioerr", "GC/WL read back failure", + "GC and WL read, data error", + "4KB data error and failed to recover", + "contact support for repairs"], + "0x7": ["hioerr", "No available blocks", + "no available block, free list is empty", + "data failed to write normally", + "contact support for repairs"], + "0x7e": ["hionote", "Read blank page", "read blank page", + "IO return successfully, but read wrong data", + "contact support for repairs"], + "0x7f": ["hiowarn", "Access flash timeout", "access flash timeout", + "without affecting data correctness, but access Flash " + "timeout", + "Ignore"], + "0x8a": ["hiowarn", "Warning: Bad Block close to limit", + "bad block level 1 alarming (exceed 11%)", + "bad block level 1 alarming (exceed 11%)", "Ignore"], + "0x8b": ["hioerr", "Error: Bad Block over limit", + "bad block level 2 alarming (exceed 14%)", + "bad block level 2 alarming (exceed 14%)", + "contact support for repairs"], + "0x8c": ["hiowarn", "Warning: P/E cycles close to limit", + "P/E cycles Level 1 alarming", "P/E cycles Level 1 alarming", + "Ignore"], + "0x8d": ["hioerr", "Error: P/E cycles over limit", + "P/E cycles Level 2 alarming", "P/E cycles Level 2 alarming", + "Scrapped"], + "0x90": ["hionote", "Over temperature", + "temperature value exceed limitation: current defined 90 " + "centi degrees", + "High temperature may cause SSD abnormal, if found this " + "alarm should test server fan speed etc. then drive will " + "run protection mechanism, limit IO speed (shut down this " + "function by API)", + "Suggest to check radiator"], + "0x91": ["hionote", "Temperature is OK", + "Temperature goes back to normal", "None", "Ignore"], + "0x92": ["hiowarn", "Battery fault", "Super-capacitor status alarming", + "Super-capacitor working status is abnormal", + "contact support for repairs"], + "0x93": ["hioerr", "SEU fault", "logical found SEU fault", + "May cause logical working abnormally", + "Power up and down on the SSD"], + "0x94": ["hioerr", "DDR error", + "data error found in controller plug-in DDR", + "May cause controller work abnormally (data may have been " + "in disorder status)", + "contact support for repairs"], + "0x95": ["hioerr", "Controller serdes error", + "Controller serdes test transmission error", + "May cause controller work abnormally(data may have been in " + "disorder status)", + "contact support for repairs"], + "0x96": ["hioerr", "Bridge serdes 1 error", + "Bridge controller serdes 1 test transmission error", + "May cause controller work abnormally(data may have been in " + "disorder status)", + "contact support for repairs"], + "0x97": ["hioerr", "Bridge serdes 2 error", + "Bridge controller serdes 2 test transmission error", + "May cause controller work abnormally(data may have been in " + "disorder status)", + "contact support for repairs"], + "0x98": ["hioerr", "SEU fault (corrected)", + "SEU fault (correctable error)", + "Split from original 0x3c scenario. May cause logical " + "working abnormally (10 seconds time-delay from error to " + "correct process)", + "Reset SSD"], + "0x9a": ["hionote", "Over temperature", + "temperature value exceed limitation: current defined 90 " + "centi degrees", + "High temperature may cause SSD abnormal, if found this " + "alarm should test server fan speed etc. then drive will " + "run protection mechanism, limit IO speed (shut down this " + "function by API)", + "Suggest to check radiator"], + "0xf1": ["hioerr", "Read failure without recovery", + "IOR read can not recover", + "4KB data error and failed to recover", + "contact support for repairs"], + "0xf7": ["hioerr", "Init: RAID not complete", + "Init: RAID not complete", + "RAID line data not complete before power failed", + "contact support for repairs"] + } diff --git a/script/gspylib/common/Signal.py b/script/gspylib/common/Signal.py new file mode 100644 index 0000000..577ab9e --- /dev/null +++ b/script/gspylib/common/Signal.py @@ -0,0 +1,133 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Signal.py is utility to process signal +############################################################################# +import sys +import signal + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode + + +class Signal(object): + + ########################################################################## + # init signal handler + ########################################################################## + + def __init__(self, logger): + """ + function: initialize signal handler + input : object logger + output: NA + """ + self.logger = logger + signal.signal(signal.SIGINT, signal.SIG_IGN) + signal.signal(signal.SIGQUIT, signal.SIG_IGN) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + signal.signal(signal.SIGALRM, signal.SIG_IGN) + signal.signal(signal.SIGHUP, signal.SIG_IGN) + signal.signal(signal.SIGUSR1, signal.SIG_IGN) + signal.signal(signal.SIGUSR2, signal.SIG_IGN) + + def setSignalEvent(self, functionName=None): + """ + function: initialize signal handler + input : function + output: NA + """ + if (functionName is not None): + signal.signal(signal.SIGINT, functionName) + else: + signal.signal(signal.SIGINT, signal.SIG_IGN) + + def print_signal_stack(self, frame): + """ + function: Function to print signal stack + input : frame + output: NA + """ + if (self.logger is None): + return + try: + import inspect + stacks = inspect.getouterframes(frame) + for curr in range(len(stacks)): + stack = stacks[curr] + self.logger.debug("Stack level: %d. File: %s. Function: %s. " + "LineNo: %d." % (curr, stack[1], stack[3], + stack[2])) + self.logger.debug("Code: %s." % + (stack[4][0].strip().strip("\n"))) + except Exception as e: + self.logger.debug("Failed to print signal stack. Error: \n%s" + % str(e)) + + def raise_handler(self, signal_num, frame): + """ + function: Function to raise handler + input : signal_num, frame + output: NA + """ + if (self.logger is not None): + self.logger.debug("Received signal[%d]." % (signal_num)) + self.print_signal_stack(frame) + raise Exception(ErrorCode.GAUSS_516["GAUSS_51614"] % (signal_num)) + + def setupTimeoutHandler(self): + """ + function: Function to set up time out handler + input : NA + output: NA + """ + signal.signal(signal.SIGALRM, self.timeout_handler) + + def setTimer(self, timeout): + """ + function: Function to set timer + input : timeout + output: NA + """ + self.logger.debug("Set timer. The timeout: %d." % timeout) + signal.signal(signal.SIGALRM, self.timeout_handler) + signal.alarm(timeout) + + def resetTimer(self): + """ + function: Reset timer + input : NA + output: NA + """ + signal.signal(signal.SIGALRM, signal.SIG_IGN) + self.logger.debug("Reset timer. Left time: %d." % signal.alarm(0)) + + def timeout_handler(self, signal_num, frame): + """ + function: Received the timeout signal + input : signal_num, frame + output: NA + """ + if (self.logger is not None): + self.logger.debug("Received the timeout signal: [%d]." + % (signal_num)) + self.print_signal_stack(frame) + raise Timeout("Time out.") + + +class Timeout(Exception): + pass diff --git a/script/gspylib/common/Sql.py b/script/gspylib/common/Sql.py new file mode 100644 index 0000000..d0f9e09 --- /dev/null +++ b/script/gspylib/common/Sql.py @@ -0,0 +1,5653 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Sql.py is used to store the sql used in OLAP +############################################################################# + +PMK_ORIGINAL = ["""START TRANSACTION;""", """CREATE SCHEMA pmk;""", + """CREATE TABLE pmk.pmk_configuration +( + config_param_name varchar(64) NOT NULL +, config_value text NOT NULL +, PRIMARY KEY (config_param_name) +);""", """CREATE TABLE pmk.pmk_snapshot +( + snapshot_id int + -- Snapshot ID (Running number) +, current_snapshot_time timestamp with time zone + -- Time at the beginning of the snapshot +, last_snapshot_time timestamp with time zone + -- Time at the end of the snapshot; the actual time the snapshot was taken +, creation_time timestamp with time zone + -- Time the snapshot was created +, PRIMARY KEY (snapshot_id) +);""", + """CREATE INDEX ix_pmk_snapshot_time ON pmk.pmk_snapshot + (current_snapshot_time DESC);""", + """CREATE TABLE pmk.pmk_snapshot_datanode_stat +( + snapshot_id int + -- Snapshot Id +, node_name text + -- node name from pgxc_node +, node_host text + -- node host from pgxc_node +, last_startup_time timestamp with time zone + -- last restart time of the node before snapshot starts +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed bigint +, checkpoints_timed_delta bigint +, checkpoints_req bigint +, checkpoints_req_delta bigint +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +, PRIMARY KEY (snapshot_id) +);""", + """CREATE INDEX ix_pmk_snapshot_dnode_stat_node_name ON +pmk.pmk_snapshot_datanode_stat (UPPER(node_name), snapshot_id);""", + """CREATE TABLE pmk.pmk_meta_data +( + pmk_version varchar(128) +, last_snapshot_id int +, last_snapshot_collect_time timestamp with time zone +, PRIMARY KEY (pmk_version) +);""", """CREATE OR REPLACE FUNCTION pmk.put_line + (IN message text + ) +RETURNS boolean +AS +$$ +DECLARE l_error_message TEXT; +BEGIN + l_error_message := TRIM(message); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN TRUE; + END IF; + + RETURN FALSE; +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.check_node_type +RETURNS TEXT +AS +$$ +DECLARE l_node_type CHAR(1); +BEGIN + + l_node_type := 'D'; + RETURN NULL; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.check_pmk_enabled +RETURNS TEXT +AS +$$ +DECLARE l_pmk_enabled_i TEXT; +BEGIN + + SELECT UPPER(config_value) + INTO l_pmk_enabled_i + FROM pmk.pmk_configuration + WHERE config_param_name = 'Enable PMK'; + + IF l_pmk_enabled_i = 'FALSE' + THEN + RETURN 'ERROR:: PMK should be enabled to use the PMK features.'; + ELSE + RETURN NULL; + END IF; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.pmk_version ( ) +RETURNS varchar(128) +AS +$$ +DECLARE l_pmk_version varchar(128); + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN 'f'; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN 'f'; + END IF; + + SELECT pmk_version + INTO l_pmk_version + FROM pmk.pmk_meta_data; + + RETURN l_pmk_version; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.configure_parameter + ( IN i_config_param_name varchar(64) + , IN i_config_value text + ) +RETURNS boolean +AS +$$ +DECLARE l_collect_count_value INT; + l_config_value TEXT; + l_upper_config_param TEXT; + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + l_upper_config_param := UPPER(TRIM(BOTH ' ' FROM i_config_param_name)); + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + AND l_upper_config_param <> 'ENABLE PMK' + THEN + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + IF i_config_param_name IS NULL + THEN + l_error_message := 'ERROR:: Null should not be provided for + configuration parameter name.'; + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + IF i_config_value IS NULL + THEN + l_error_message := 'ERROR:: Null should not be provided for + configuration value.'; + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + IF l_upper_config_param = 'COLLECTION COUNT' + THEN + l_collect_count_value := i_config_value::int; + + IF l_collect_count_value < -1 + THEN + l_error_message := 'ERROR:: Configuration value "' + || i_config_value || '" should not be less than -1.'; + pmk.put_line(l_error_message); + RETURN FALSE; + + ELSIF l_collect_count_value = 0 + THEN + l_error_message := 'ERROR:: 0 should not be provided since + atleast one collection should be retained.'; + pmk.put_line(l_error_message); + RETURN FALSE; + + ELSE + l_config_value := l_collect_count_value; + END IF; + + ELSIF l_upper_config_param = 'ENABLE PMK' + THEN + l_config_value := UPPER(TRIM(BOTH ' ' FROM i_config_value)); + + IF l_config_value NOT IN ('TRUE', 'FALSE') + THEN + l_error_message := 'ERROR:: Allowed values are TRUE or FALSE for + the configuration parameter "Enable PMK".'; + pmk.put_line(l_error_message); + RETURN FALSE; + + END IF; + END IF; + + SET allow_concurrent_tuple_update = ON; + + UPDATE pmk.pmk_configuration + SET config_value = l_config_value + WHERE UPPER(config_param_name) = l_upper_config_param; + + IF NOT FOUND THEN + l_error_message := 'ERROR:: Invalid configuration parameter "' + || i_config_param_name || + '" provided for configuring PMK parameter ...'; + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + RETURN TRUE; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_configuration_parameter + ( IN i_config_param_name TEXT ) +RETURNS TABLE +( + config_param_name varchar(64) +, config_value text +) +AS +$$ +DECLARE l_upper_config_param TEXT; + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_upper_config_param := UPPER(TRIM(BOTH ' ' FROM i_config_param_name)); + + IF l_upper_config_param = 'ALL' + THEN + + RETURN QUERY + SELECT config_param_name + , config_value + FROM pmk.pmk_configuration + ORDER BY config_param_name; + + ELSE + + RETURN QUERY + SELECT config_param_name + , config_value + FROM pmk.pmk_configuration + WHERE UPPER(config_param_name) = l_upper_config_param; + + END IF; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.find_perf_stat + ( IN i_skip_supper_role boolean + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_physical_memory bigint + , OUT o_shared_buffer_size bigint + , OUT o_session_memory_total_size bigint + , OUT o_session_memory_used_size bigint + , OUT o_blocks_read bigint + , OUT o_blocks_hit bigint + , OUT o_db_size bigint + , OUT o_work_memory_size bigint + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_active_sql_count int + , OUT o_wait_sql_count int + , OUT o_session_count int + , OUT o_busy_time numeric + , OUT o_idle_time numeric + , OUT o_iowait_time numeric + , OUT o_db_cpu_time numeric + , OUT o_db_memory_usage bigint + , OUT o_node_startup_time timestamp with time zone + , OUT o_node_host_name text + , OUT o_xact_commit bigint + , OUT o_xact_rollback bigint + , OUT o_checkpoints_timed bigint + , OUT o_checkpoints_req bigint + , OUT o_checkpoint_write_time double precision + ) +AS +$$ +DECLARE + l_block_size int; + l_record_chk int; +BEGIN + + o_node_startup_time := pg_postmaster_start_time(); + o_node_host_name := get_hostname(); + + SELECT COUNT(*) AS number_of_files + , SUM(phyrds) AS physical_reads + , SUM(phywrts) AS physical_writes + , SUM(readtim) AS read_time + , SUM(writetim) AS write_time + INTO o_number_of_files + , o_physical_reads + , o_physical_writes + , o_read_time + , o_write_time + FROM gs_file_stat; + + IF o_number_of_files = 0 + THEN + o_physical_reads := 0; + o_physical_writes := 0; + o_read_time := 0; + o_write_time := 0; + END IF; + + WITH os_stat AS + ( + SELECT os.name AS statname + , os.value AS statvalue + FROM gs_os_run_info os + WHERE os.name IN ( 'PHYSICAL_MEMORY_BYTES', 'BUSY_TIME', + 'IDLE_TIME', 'IOWAIT_TIME' ) + ) + SELECT (SELECT statvalue FROM os_stat WHERE statname = + 'PHYSICAL_MEMORY_BYTES') + , (SELECT statvalue FROM os_stat WHERE statname = 'BUSY_TIME') + , (SELECT statvalue FROM os_stat WHERE statname = 'IDLE_TIME') + , (SELECT statvalue FROM os_stat WHERE statname = 'IOWAIT_TIME') + INTO o_physical_memory + , o_busy_time + , o_idle_time + , o_iowait_time + FROM DUAL; + + -- gs_db_time is not available; temporarily PMK extension is used. + o_db_cpu_time := total_cpu(); + o_db_memory_usage := total_memory()*1024; + + WITH config_value AS + ( SELECT name + , setting::bigint AS config_value + FROM pg_settings + WHERE name IN ( 'block_size', 'shared_buffers', 'work_mem' ) + ) + , config_value1 AS + ( SELECT (SELECT config_value FROM config_value WHERE name = + 'block_size') AS block_size + , (SELECT config_value FROM config_value WHERE name = + 'shared_buffers') AS shared_buffers + , (SELECT config_value FROM config_value WHERE name = + 'work_mem') AS work_mem + FROM DUAL + ) + SELECT block_size + , (shared_buffers * block_size)::bigint + , (work_mem * 1024)::bigint + INTO l_block_size + , o_shared_buffer_size + , o_work_memory_size + FROM config_value1; + + /* Commented since these statistics are not used for node and + * cluster reports + */ + o_session_memory_total_size := 0; + o_session_memory_used_size := 0; + + SELECT SUM(blks_read)::bigint + , SUM(blks_hit)::bigint + , SUM(xact_commit)::bigint + , SUM(xact_rollback)::bigint + INTO o_blocks_read + , o_blocks_hit + , o_xact_commit + , o_xact_rollback + FROM pg_stat_database; + + o_db_size := 0; + IF i_skip_supper_role = 'TRUE' + THEN + WITH session_state AS + ( SELECT state, waiting , usename + FROM pg_stat_activity a, pg_roles r + WHERE r.rolsuper = 'f' AND a.usename = r.rolname + ) + , active_session AS + ( SELECT state, waiting , usename + FROM session_state s, pg_roles r + WHERE s.state IN ('active', 'fastpath function call', 'retrying') + AND r.rolsuper = 'f' AND s.usename = r.rolname + ) + SELECT ( SELECT COUNT(*) FROM active_session ) + , ( SELECT COUNT(*) FROM active_session WHERE waiting = TRUE ) + , ( SELECT COUNT(*) FROM session_state ) + INTO o_active_sql_count, o_wait_sql_count , o_session_count + FROM DUAL; + ELSE + WITH session_state AS + ( SELECT state, waiting + FROM pg_stat_activity + ) + , active_session AS + ( SELECT state, waiting + FROM session_state + WHERE state IN ('active', 'fastpath function call', 'retrying') + ) + SELECT ( SELECT COUNT(*) FROM active_session ) + , ( SELECT COUNT(*) FROM active_session WHERE waiting = TRUE ) + , ( SELECT COUNT(*) FROM session_state ) + INTO o_active_sql_count, o_wait_sql_count, o_session_count + FROM DUAL; + END IF; + + -- Currently, the below statistics are calculated from gs_session_stat + -- (which is not accurate) since gs_db_stat is not available + WITH sort_state AS + ( SELECT statname + , SUM(value)::bigint AS sorts_cnt + FROM gs_session_stat + WHERE statname IN ('n_sort_in_memory', 'n_sort_in_disk') + GROUP BY statname + ) + SELECT (SELECT sorts_cnt FROM sort_state WHERE statname = + 'n_sort_in_memory') + , (SELECT sorts_cnt FROM sort_state WHERE statname = + 'n_sort_in_disk') + INTO o_sorts_in_memory + , o_sorts_in_disk + FROM DUAL; + + SELECT SUM(checkpoints_timed)::bigint + , SUM(checkpoints_req)::bigint + , SUM(checkpoint_write_time)::bigint + INTO o_checkpoints_timed + , o_checkpoints_req + , o_checkpoint_write_time + FROM pg_stat_bgwriter; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.find_node_stat + (IN i_skip_supper_role boolean + , OUT o_number_of_files_1 int + , OUT o_physical_reads_1 bigint + , OUT o_physical_writes_1 bigint + , OUT o_read_time_1 bigint + , OUT o_write_time_1 bigint + , OUT o_physical_memory_1 bigint + , OUT o_shared_buffer_size_1 bigint + , OUT o_session_memory_total_size_1 bigint + , OUT o_session_memory_used_size_1 bigint + , OUT o_blocks_read_1 bigint + , OUT o_blocks_hit_1 bigint + , OUT o_db_size_1 bigint + , OUT o_work_memory_size_1 bigint + , OUT o_sorts_in_memory_1 bigint + , OUT o_sorts_in_disk_1 bigint + , OUT o_active_sql_count_1 int + , OUT o_wait_sql_count_1 int + , OUT o_session_count_1 int + , OUT o_busy_time_1 numeric + , OUT o_idle_time_1 numeric + , OUT o_iowait_time_1 numeric + , OUT o_db_cpu_time_1 numeric + , OUT o_db_memory_usage_1 bigint + , OUT o_node_startup_time_1 timestamp with time zone + , OUT o_node_host_name_1 text + , OUT o_xact_commit_1 bigint + , OUT o_xact_rollback_1 bigint + , OUT o_checkpoints_timed_1 bigint + , OUT o_checkpoints_req_1 bigint + , OUT o_checkpoint_write_time_1 double precision + ) +AS +$$ +BEGIN + + SELECT o_number_of_files + , o_physical_reads + , o_physical_writes + , o_read_time + , o_write_time + , o_physical_memory + , o_shared_buffer_size + , o_session_memory_total_size + , o_session_memory_used_size + , o_blocks_read + , o_blocks_hit + , o_db_size + , o_work_memory_size + , o_sorts_in_memory + , o_sorts_in_disk + , o_active_sql_count + , o_wait_sql_count + , o_session_count + , o_busy_time + , o_idle_time + , o_iowait_time + , o_db_cpu_time + , o_db_memory_usage + , o_node_startup_time + , o_node_host_name + , o_xact_commit + , o_xact_rollback + , o_checkpoints_timed + , o_checkpoints_req + , o_checkpoint_write_time + INTO o_number_of_files_1 + , o_physical_reads_1 + , o_physical_writes_1 + , o_read_time_1 + , o_write_time_1 + , o_physical_memory_1 + , o_shared_buffer_size_1 + , o_session_memory_total_size_1 + , o_session_memory_used_size_1 + , o_blocks_read_1 + , o_blocks_hit_1 + , o_db_size_1 + , o_work_memory_size_1 + , o_sorts_in_memory_1 + , o_sorts_in_disk_1 + , o_active_sql_count_1 + , o_wait_sql_count_1 + , o_session_count_1 + , o_busy_time_1 + , o_idle_time_1 + , o_iowait_time_1 + , o_db_cpu_time_1 + , o_db_memory_usage_1 + , o_node_startup_time_1 + , o_node_host_name_1 + , o_xact_commit_1 + , o_xact_rollback_1 + , o_checkpoints_timed_1 + , o_checkpoints_req_1 + , o_checkpoint_write_time_1 + + FROM pmk.find_perf_stat(i_skip_supper_role); + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.load_datanode_stat + ( IN i_snapshot_id int + , IN i_last_snapshot_id int + , IN i_pmk_last_collect_start_time timestamp with time zone + , IN i_node_name text + , IN i_number_of_files int + , IN i_physical_reads bigint + , IN i_physical_writes bigint + , IN i_read_time bigint + , IN i_write_time bigint + , IN i_physical_memory bigint + , IN i_shared_buffer_size bigint + , IN i_session_memory_total_size bigint + , IN i_session_memory_used_size bigint + , IN i_blocks_read bigint + , IN i_blocks_hit bigint + , IN i_db_size bigint + , IN i_work_memory_size bigint + , IN i_sorts_in_memory bigint + , IN i_sorts_in_disk bigint + , IN i_active_sql_count int + , IN i_wait_sql_count int + , IN i_session_count int + , IN i_busy_time numeric + , IN i_idle_time numeric + , IN i_iowait_time numeric + , IN i_db_cpu_time numeric + , IN i_db_memory_usage bigint + , IN i_node_startup_time timestamp with time zone + , IN i_node_host_name text + , IN i_xact_commit bigint + , IN i_xact_rollback bigint + , IN i_checkpoints_timed bigint + , IN i_checkpoints_req bigint + , IN i_checkpoint_write_time double precision + , IN i_skip_supper_role boolean + , OUT o_dn_snapshot_id int + , OUT o_dn_node_name text + -- node name from pgxc_node + , OUT o_dn_node_host text + -- node host from pgxc_node + , OUT o_dn_last_startup_time timestamp with time zone + -- last restart time of the node before snapshot starts + , OUT o_dn_number_of_files int + , OUT o_dn_physical_reads bigint + , OUT o_dn_physical_reads_delta bigint + , OUT o_dn_physical_writes bigint + , OUT o_dn_physical_writes_delta bigint + , OUT o_dn_read_time bigint + , OUT o_dn_read_time_delta bigint + , OUT o_dn_write_time bigint + , OUT o_dn_write_time_delta bigint + , OUT o_dn_db_size bigint + , OUT o_dn_active_sql_count int + , OUT o_dn_wait_sql_count int + , OUT o_dn_session_count int + , OUT o_dn_xact_commit bigint + , OUT o_dn_xact_commit_delta bigint + , OUT o_dn_xact_rollback bigint + , OUT o_dn_xact_rollback_delta bigint + , OUT o_dn_checkpoints_timed bigint + , OUT o_dn_checkpoints_timed_delta bigint + , OUT o_dn_checkpoints_req bigint + , OUT o_dn_checkpoints_req_delta bigint + , OUT o_dn_checkpoint_write_time double precision + , OUT o_dn_checkpoint_write_time_delta double precision + , OUT o_dn_physical_memory bigint + , OUT o_dn_db_memory_usage bigint + , OUT o_dn_shared_buffer_size bigint + , OUT o_dn_session_memory_total_size bigint + , OUT o_dn_session_memory_used_size bigint + , OUT o_dn_blocks_read bigint + , OUT o_dn_blocks_read_delta bigint + , OUT o_dn_blocks_hit bigint + , OUT o_dn_blocks_hit_delta bigint + , OUT o_dn_work_memory_size bigint + , OUT o_dn_sorts_in_memory bigint + , OUT o_dn_sorts_in_memory_delta bigint + , OUT o_dn_sorts_in_disk bigint + , OUT o_dn_sorts_in_disk_delta bigint + , OUT o_dn_busy_time numeric + , OUT o_dn_busy_time_delta numeric + , OUT o_dn_idle_time numeric + , OUT o_dn_idle_time_delta numeric + , OUT o_dn_iowait_time numeric + , OUT o_dn_iowait_time_delta numeric + , OUT o_dn_db_cpu_time numeric + , OUT o_dn_db_cpu_time_delta numeric + ) +AS +$$ +DECLARE l_physical_reads_delta bigint; + l_physical_writes_delta bigint; + l_read_time_delta bigint; + l_write_time_delta bigint; + l_blocks_read_delta bigint; + l_blocks_hit_delta bigint; + l_sorts_in_memory_delta bigint; + l_sorts_in_disk_delta bigint; + l_busy_time_delta numeric; + l_idle_time_delta numeric; + l_iowait_time_delta numeric; + l_db_cpu_time_delta numeric; + l_xact_commit_delta bigint; + l_xact_rollback_delta bigint; + l_checkpoints_timed_delta bigint; + l_checkpoints_req_delta bigint; + l_checkpoint_write_time_delta double precision; + i_skip_supper_role_delta boolean; +BEGIN + + l_physical_reads_delta := i_physical_reads; + l_physical_writes_delta := i_physical_writes; + l_read_time_delta := i_read_time; + l_write_time_delta := i_write_time; + l_xact_commit_delta := i_xact_commit; + l_xact_rollback_delta := i_xact_rollback; + l_checkpoints_timed_delta := i_checkpoints_timed; + l_checkpoints_req_delta := i_checkpoints_req; + l_checkpoint_write_time_delta := i_checkpoint_write_time; + i_skip_supper_role_delta := i_skip_supper_role; + l_blocks_read_delta := i_blocks_read; + l_blocks_hit_delta := i_blocks_hit; + + l_busy_time_delta := i_busy_time; + l_idle_time_delta := i_idle_time; + l_iowait_time_delta := i_iowait_time; + l_db_cpu_time_delta := i_db_cpu_time; + + -- Currently, the below statistics are calculated from gs_session_stat + -- (which is not accurate) since gs_db_stat is not available + -- These statistics are cumulative from instance startup. + l_sorts_in_memory_delta := i_sorts_in_memory; + l_sorts_in_disk_delta := i_sorts_in_disk; + + o_dn_snapshot_id := i_snapshot_id; + o_dn_node_name := i_node_name; + o_dn_node_host := i_node_host_name; + o_dn_last_startup_time := i_node_startup_time; + o_dn_number_of_files := i_number_of_files; + o_dn_physical_reads := i_physical_reads; + o_dn_physical_reads_delta := l_physical_reads_delta; + o_dn_physical_writes := i_physical_writes; + o_dn_physical_writes_delta := l_physical_writes_delta; + o_dn_read_time := i_read_time; + o_dn_read_time_delta := l_read_time_delta; + o_dn_write_time := i_write_time; + o_dn_write_time_delta := l_write_time_delta; + o_dn_db_size := i_db_size; + o_dn_active_sql_count := i_active_sql_count; + o_dn_wait_sql_count := i_wait_sql_count; + o_dn_session_count := i_session_count; + o_dn_xact_commit := i_xact_commit; + o_dn_xact_commit_delta := l_xact_commit_delta; + o_dn_xact_rollback := i_xact_rollback; + o_dn_xact_rollback_delta := l_xact_rollback_delta; + o_dn_checkpoints_timed := i_checkpoints_timed; + o_dn_checkpoints_timed_delta := l_checkpoints_timed_delta; + o_dn_checkpoints_req := i_checkpoints_req; + o_dn_checkpoints_req_delta := l_checkpoints_req_delta; + o_dn_checkpoint_write_time := i_checkpoint_write_time; + o_dn_checkpoint_write_time_delta := l_checkpoint_write_time_delta; + o_dn_physical_memory := i_physical_memory; + o_dn_db_memory_usage := i_db_memory_usage; + o_dn_shared_buffer_size := i_shared_buffer_size; + o_dn_session_memory_total_size := i_session_memory_total_size; + o_dn_session_memory_used_size := i_session_memory_used_size; + o_dn_blocks_read := i_blocks_read; + o_dn_blocks_read_delta := l_blocks_read_delta; + o_dn_blocks_hit := i_blocks_hit; + o_dn_blocks_hit_delta := l_blocks_hit_delta; + o_dn_work_memory_size := i_work_memory_size; + o_dn_sorts_in_memory := i_sorts_in_memory; + o_dn_sorts_in_memory_delta := l_sorts_in_memory_delta; + o_dn_sorts_in_disk := i_sorts_in_disk; + o_dn_sorts_in_disk_delta := l_sorts_in_disk_delta; + o_dn_busy_time := i_busy_time; + o_dn_busy_time_delta := l_busy_time_delta; + o_dn_idle_time := i_idle_time; + o_dn_idle_time_delta := l_idle_time_delta; + o_dn_iowait_time := i_iowait_time; + o_dn_iowait_time_delta := l_iowait_time_delta; + o_dn_db_cpu_time := i_db_cpu_time; + o_dn_db_cpu_time_delta := l_db_cpu_time_delta; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.load_node_stat + ( IN i_pmk_curr_collect_start_time TIMESTAMP WITH TIME ZONE + , IN i_pmk_last_collect_start_time TIMESTAMP WITH TIME ZONE + , IN i_last_snapshot_id INT + , IN i_node_name TEXT + , IN i_node_type char(1) + , IN i_skip_supper_role boolean + ) +RETURNS TABLE +( + snapshot_id int +, node_name text +, node_host text +, last_startup_time timestamp with time zone +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed bigint +, checkpoints_timed_delta bigint +, checkpoints_req bigint +, checkpoints_req_delta bigint +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +) +AS +$$ +DECLARE l_snapshot_id INT; + l_query_str TEXT; + l_node_stat_cur RECORD; +BEGIN + + IF i_last_snapshot_id IS NULL + OR i_last_snapshot_id = 2147483647 + THEN + l_snapshot_id := 1; + ELSE + l_snapshot_id := i_last_snapshot_id + 1; + END IF; + + FOR l_node_stat_cur IN SELECT * FROM pmk.find_node_stat(i_skip_supper_role) + LOOP + RETURN QUERY + (SELECT * FROM pmk.load_datanode_stat ( l_snapshot_id + , i_last_snapshot_id + , i_pmk_last_collect_start_time + , i_node_name + , l_node_stat_cur.o_number_of_files_1 + , l_node_stat_cur.o_physical_reads_1 + , l_node_stat_cur.o_physical_writes_1 + , l_node_stat_cur.o_read_time_1 + , l_node_stat_cur.o_write_time_1 + , l_node_stat_cur.o_physical_memory_1 + , l_node_stat_cur.o_shared_buffer_size_1 + , l_node_stat_cur.o_session_memory_total_size_1 + , l_node_stat_cur.o_session_memory_used_size_1 + , l_node_stat_cur.o_blocks_read_1 + , l_node_stat_cur.o_blocks_hit_1 + , l_node_stat_cur.o_db_size_1 + , l_node_stat_cur.o_work_memory_size_1 + , l_node_stat_cur.o_sorts_in_memory_1 + , l_node_stat_cur.o_sorts_in_disk_1 + , l_node_stat_cur.o_active_sql_count_1 + , l_node_stat_cur.o_wait_sql_count_1 + , l_node_stat_cur.o_session_count_1 + , l_node_stat_cur.o_busy_time_1 + , l_node_stat_cur.o_idle_time_1 + , l_node_stat_cur.o_iowait_time_1 + , l_node_stat_cur.o_db_cpu_time_1 + , l_node_stat_cur.o_db_memory_usage_1 + , l_node_stat_cur.o_node_startup_time_1 + , l_node_stat_cur.o_node_host_name_1 + , l_node_stat_cur.o_xact_commit_1 + , l_node_stat_cur.o_xact_rollback_1 + , l_node_stat_cur.o_checkpoints_timed_1 + , l_node_stat_cur.o_checkpoints_req_1 + , l_node_stat_cur.o_checkpoint_write_time_1 + , i_skip_supper_role + )); + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.delete_expired_snapshots ( ) +RETURNS void +AS +$$ +DECLARE l_collection_count INT; + l_retention_snapshot_id INT; +BEGIN + + -- Deleting node statistics based on "collection count" config param + SELECT config_value + INTO l_collection_count + FROM pmk.pmk_configuration + WHERE config_param_name = 'Collection Count'; + + IF l_collection_count > -1 + THEN + IF l_collection_count = 0 + THEN + l_collection_count := 1; + END IF; + + SELECT MIN(snapshot_id) + INTO l_retention_snapshot_id + FROM ( SELECT snapshot_id + FROM pmk.pmk_snapshot + ORDER BY snapshot_id DESC + LIMIT l_collection_count ); + + DELETE FROM pmk.pmk_snapshot_datanode_stat + WHERE snapshot_id < l_retention_snapshot_id; + + + DELETE FROM pmk.pmk_snapshot + WHERE snapshot_id < l_retention_snapshot_id; + + END IF; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_meta_data + ( OUT l_pmk_curr_collect_start_time timestamp with time zone + , OUT l_pmk_last_collect_start_time timestamp with time zone + , OUT l_last_snapshot_id int + ) +AS +$$ +DECLARE l_error_message TEXT; +BEGIN + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + SELECT last_snapshot_id, last_snapshot_collect_time + INTO l_last_snapshot_id, l_pmk_last_collect_start_time + FROM pmk.pmk_meta_data; + + l_pmk_curr_collect_start_time := date_trunc('second', current_timestamp); + + IF l_pmk_curr_collect_start_time < l_pmk_last_collect_start_time + THEN + l_error_message := 'ERROR:: There is a change in system time of Gauss + MPPDB host. PMK does not support the scenarios related to system time + change.'; + pmk.put_line(l_error_message); + RETURN; + ELSIF l_pmk_curr_collect_start_time = l_pmk_last_collect_start_time + THEN + l_error_message := 'ERROR:: Multiple statistics-collections can not + be done within a second.'; + pmk.put_line(l_error_message); + RETURN; + END IF; +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_pgxc_node + ( OUT o_node_name TEXT + , OUT o_node_type CHAR(1) + ) +RETURNS SETOF RECORD +AS +$$ +DECLARE l_error_message TEXT; + v_rec RECORD; +BEGIN + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + FOR v_rec IN (SELECT node_name FROM DBE_PERF.node_name) LOOP + o_node_name := v_rec.node_name; + o_node_type := 'D'; + RETURN NEXT; + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.check_start_end_dates + ( INOUT io_start_pmk_time timestamp with time zone + , INOUT io_end_pmk_time timestamp with time zone + , OUT o_error_message text + ) +AS +$$ +DECLARE l_last_collect_time timestamp with time zone; +BEGIN + + SELECT last_snapshot_collect_time + INTO l_last_collect_time + FROM pmk.pmk_meta_data; + + IF io_start_pmk_time > l_last_collect_time + THEN + o_error_message := 'ERROR:: The from-time provided is greater than the + last statistics-collection time(' || l_last_collect_time || '). + Invalid value(s) provided for the input time-range'; + RETURN; + END IF; + + IF io_end_pmk_time IS NULL + THEN + io_end_pmk_time := l_last_collect_time; + + IF io_start_pmk_time IS NULL + THEN + io_start_pmk_time := io_end_pmk_time; + END IF; + ELSE + IF (io_start_pmk_time IS NULL) OR + (io_start_pmk_time > io_end_pmk_time) + THEN + o_error_message := 'ERROR:: Invalid value(s) provided for + the input time-range'; + RETURN; + END IF; + END IF; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_host_cpu_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_avg_cpu_total_time numeric(21, 3) + , OUT o_avg_cpu_busy_time numeric(21, 3) + , OUT o_avg_cpu_iowait_time numeric(21, 3) + , OUT o_cpu_busy_perc numeric(5, 2) + , OUT o_cpu_io_wait_perc numeric(5, 2) + , OUT o_min_cpu_busy_perc numeric(5, 2) + , OUT o_max_cpu_busy_perc numeric(5, 2) + , OUT o_min_cpu_iowait_perc numeric(5, 2) + , OUT o_max_cpu_iowait_perc numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, + l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || + ' during generation of cluster host CPU statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , os_cpu_stat AS + ( SELECT s.pmk_curr_collect_start_time + , node_host + , node_name + , (busy_time_delta * 10) AS cpu_busy_time + , (idle_time_delta * 10) AS cpu_idle_time + , (iowait_time_delta * 10) AS cpu_iowait_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + , os_cpu_stat1 AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_host + , cpu_busy_time + , cpu_idle_time + , cpu_iowait_time + , (cpu_busy_time+cpu_idle_time+cpu_iowait_time)::numeric AS + cpu_total_time + FROM ( SELECT pmk_curr_collect_start_time + , node_host + , cpu_busy_time + , cpu_idle_time + , cpu_iowait_time + , rank() OVER (PARTITION BY pmk_curr_collect_start_time, + node_host ORDER BY cpu_busy_time DESC, node_name) + AS node_cpu_busy_order + FROM os_cpu_stat + ) + WHERE node_cpu_busy_order = 1 + ) + SELECT hcs.stat_collect_time + , AVG(hcs.cpu_total_time)::numeric(21, 3) AS avg_cpu_total_time + , AVG(hcs.cpu_busy_time)::numeric(21, 3) AS avg_cpu_busy_time + , AVG(hcs.cpu_iowait_time)::numeric(21, 3) AS avg_cpu_iowait_time + , ( (SUM(cpu_busy_time) * 100.0) / NULLIF(SUM(cpu_total_time), 0) ) + ::numeric(5, 2) AS cpu_busy_perc + , ( (SUM(cpu_iowait_time) * 100.0) / NULLIF(SUM(cpu_total_time), 0) ) + ::numeric(5, 2) AS cpu_io_wait_perc + , MIN(hcs.cpu_busy_time_perc)::numeric(5, 2) AS min_cpu_busy_perc + , MAX(hcs.cpu_busy_time_perc)::numeric(5, 2) AS max_cpu_busy_perc + , MIN(hcs.cpu_iowait_time_perc)::numeric(5, 2) AS min_cpu_iowait_perc + , MAX(hcs.cpu_iowait_time_perc)::numeric(5, 2) AS max_cpu_iowait_perc + FROM ( SELECT node_host + , stat_collect_time + , cpu_total_time + , cpu_busy_time + , cpu_iowait_time + , ( (cpu_busy_time * 100.0) / NULLIF(cpu_total_time, 0) ) + ::numeric(5, 2) AS cpu_busy_time_perc + , ( (cpu_iowait_time * 100.0) / NULLIF(cpu_total_time, 0) ) + ::numeric(5, 2) AS cpu_iowait_time_perc + FROM os_cpu_stat1 ) hcs + GROUP BY hcs.stat_collect_time + ORDER BY hcs.stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_mppdb_cpu_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_avg_mppdb_cpu_time numeric(21, 3) + , OUT o_avg_host_cpu_busy_time numeric(21, 3) + , OUT o_avg_host_cpu_total_time numeric(21, 3) + , OUT o_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + , OUT o_min_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_max_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_min_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + , OUT o_max_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times +pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || + ' during generation of cluster MPPDB CPU statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , cpu_stat AS + ( SELECT s.snapshot_id + , s.pmk_curr_collect_start_time + , dns.node_name + , dns.node_host + , (dns.busy_time_delta * 10) AS host_cpu_busy_time + , (dns.idle_time_delta * 10) AS host_cpu_idle_time + , (dns.iowait_time_delta * 10) AS host_cpu_iowait_time + , (dns.db_cpu_time_delta * 10) AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + , host_cpu_stat AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_host + , host_cpu_busy_time + , host_cpu_idle_time + , host_cpu_iowait_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time) + ::numeric AS host_cpu_total_time + FROM ( SELECT pmk_curr_collect_start_time + , node_host + , host_cpu_busy_time + , host_cpu_idle_time + , host_cpu_iowait_time + , rank() OVER (PARTITION BY snapshot_id, node_host + ORDER BY host_cpu_busy_time DESC, + node_name) AS node_cpu_busy_order + FROM cpu_stat + ) + WHERE node_cpu_busy_order = 1 + ) + , host_cpu_stat_summary AS + ( SELECT stat_collect_time + , AVG(host_cpu_busy_time)::numeric(21, 3) + AS avg_host_cpu_busy_time + , AVG(host_cpu_total_time)::numeric(21, 3) + AS avg_host_cpu_total_time + , SUM(host_cpu_busy_time)::numeric(21, 3) + AS tot_host_cpu_busy_time + , SUM(host_cpu_total_time)::numeric(21, 3) + AS tot_host_cpu_total_time + FROM host_cpu_stat + GROUP BY stat_collect_time + ) + , mppdb_cpu_stat0 AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_name + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time) + ::numeric AS host_cpu_total_time + FROM cpu_stat + ) + , mppdb_cpu_stat AS + ( SELECT stat_collect_time + , node_name + , mppdb_cpu_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) ) + ::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_total_time, 0) ) + ::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat0 + ) + , mppdb_cpu_stat_summary AS + ( SELECT stat_collect_time + , AVG(mppdb_cpu_time)::numeric(21, 3) AS avg_mppdb_cpu_time + , SUM(mppdb_cpu_time)::numeric(21, 3) AS tot_mppdb_cpu_time + , MIN(mppdb_cpu_time_perc_wrt_busy_time)::numeric(5, 2) + AS min_mppdb_cpu_time_perc_wrt_busy_time + , MAX(mppdb_cpu_time_perc_wrt_busy_time)::numeric(5, 2) + AS max_mppdb_cpu_time_perc_wrt_busy_time + , MIN(mppdb_cpu_time_perc_wrt_total_time)::numeric(5, 2) + AS min_mppdb_cpu_time_perc_wrt_total_time + , MAX(mppdb_cpu_time_perc_wrt_total_time)::numeric(5, 2) + AS max_mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat + GROUP BY stat_collect_time + ) + SELECT mcs.stat_collect_time + , mcs.avg_mppdb_cpu_time + , hcs.avg_host_cpu_busy_time + , hcs.avg_host_cpu_total_time + , CASE WHEN mcs.tot_mppdb_cpu_time < hcs.tot_host_cpu_busy_time + THEN ( (mcs.tot_mppdb_cpu_time * 100.0) + / NULLIF(hcs.tot_host_cpu_busy_time, 0) )::numeric(5, 2) + ELSE 100.00 + END AS mppdb_cpu_time_perc_wrt_busy_time + , CASE WHEN mcs.tot_mppdb_cpu_time < hcs.tot_host_cpu_total_time + THEN ( (mcs.tot_mppdb_cpu_time * 100.0) + / NULLIF(hcs.tot_host_cpu_total_time, 0) )::numeric(5, 2) + ELSE 100.00 + END AS mppdb_cpu_time_perc_wrt_total_time + , mcs.min_mppdb_cpu_time_perc_wrt_busy_time + , mcs.max_mppdb_cpu_time_perc_wrt_busy_time + , mcs.min_mppdb_cpu_time_perc_wrt_total_time + , mcs.max_mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat_summary mcs + , host_cpu_stat_summary hcs + WHERE mcs.stat_collect_time = hcs.stat_collect_time + ORDER BY mcs.stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_shared_buffer_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_min_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_max_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_total_blocks_read bigint + , OUT o_total_blocks_hit bigint + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || + ' during generation of cluster shared buffer statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , ( (total_blocks_hit * 100.0) + / NULLIF(total_blocks_read+total_blocks_hit, 0) )::numeric(5, 2) + AS shared_buffer_hit_ratio + , min_shared_buffer_hit_ratio + , max_shared_buffer_hit_ratio + , total_blocks_read + , total_blocks_hit + FROM ( SELECT pmk_curr_collect_start_time + , SUM(blocks_read)::bigint AS total_blocks_read + , SUM(blocks_hit)::bigint AS total_blocks_hit + , MIN(shared_buffer_hit_ratio)::numeric(5, 2) + AS min_shared_buffer_hit_ratio + , MAX(shared_buffer_hit_ratio)::numeric(5, 2) + AS max_shared_buffer_hit_ratio + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , ( (blocks_hit_delta * 100.0) + / NULLIF((blocks_read_delta + blocks_hit_delta), 0) ) + ::numeric(5, 2) AS shared_buffer_hit_ratio + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_memory_sort_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_memory_sort_ratio numeric(5, 2) + , OUT o_min_memory_sort_ratio numeric(5, 2) + , OUT o_max_memory_sort_ratio numeric(5, 2) + , OUT o_total_memory_sorts bigint + , OUT o_total_disk_sorts bigint + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || + ' during generation of cluster memory sort statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , ( (total_memory_sorts * 100.0) + / NULLIF(total_disk_sorts+total_memory_sorts, 0) ) + ::numeric(5, 2) AS memory_sort_ratio + , min_memory_sort_ratio + , max_memory_sort_ratio + , total_memory_sorts + , total_disk_sorts + FROM ( SELECT pmk_curr_collect_start_time + , SUM(memory_sorts)::bigint AS total_memory_sorts + , SUM(disk_sorts)::bigint AS total_disk_sorts + , MIN(memory_sort_ratio)::numeric(5, 2) AS min_memory_sort_ratio + , MAX(memory_sort_ratio)::numeric(5, 2) AS max_memory_sort_ratio + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , sorts_in_memory_delta AS memory_sorts + , sorts_in_disk_delta AS disk_sorts + , ( (sorts_in_memory_delta * 100.0) + / NULLIF((sorts_in_disk_delta + sorts_in_memory_delta) + , 0) )::numeric(5, 2) AS memory_sort_ratio + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_io_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_avg_read_per_sec numeric(20,2) + , OUT o_avg_read_time numeric(20,3) + , OUT o_avg_write_per_sec numeric(20,2) + , OUT o_avg_write_time numeric(20,3) + , OUT o_min_node_read_per_sec numeric(20,2) + , OUT o_max_node_read_per_sec numeric(20,2) + , OUT o_min_node_read_time numeric(20,3) + , OUT o_max_node_read_time numeric(20,3) + , OUT o_min_node_write_per_sec numeric(20,2) + , OUT o_max_node_write_per_sec numeric(20,2) + , OUT o_min_node_write_time numeric(20,3) + , OUT o_max_node_write_time numeric(20,3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || + ' during generation of cluster I/O statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) ) + ::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) ) + ::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) ) + ::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) ) + ::numeric(20,3) AS avg_write_time + , min_node_read_per_sec + , max_node_read_per_sec + , min_node_read_time + , max_node_read_time + , min_node_write_per_sec + , max_node_write_per_sec + , min_node_write_time + , max_node_write_time + FROM ( SELECT pmk_curr_collect_start_time + , SUM(number_of_files)::int AS number_of_files + , SUM(physical_reads_delta)::bigint AS physical_reads + , SUM(physical_writes_delta)::bigint AS physical_writes + , SUM(read_time_delta)::bigint AS read_time + , SUM(write_time_delta)::bigint AS write_time + , MIN(node_read_per_sec) AS min_node_read_per_sec + , MAX(node_read_per_sec) AS max_node_read_per_sec + , MIN(node_read_time) AS min_node_read_time + , MAX(node_read_time) AS max_node_read_time + , MIN(node_write_per_sec) AS min_node_write_per_sec + , MAX(node_write_per_sec) AS max_node_write_per_sec + , MIN(node_write_time) AS min_node_write_time + , MAX(node_write_time) AS max_node_write_time + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , number_of_files + , physical_reads_delta + , physical_writes_delta + , read_time_delta + , write_time_delta + , ( physical_reads_delta * 1000000.0 + / NULLIF(read_time_delta, 0) )::numeric(20,2) + AS node_read_per_sec + , ( read_time_delta * 1.0 + / NULLIF(physical_reads_delta, 0) )::numeric(20,3) + AS node_read_time + , ( physical_writes_delta * 1000000.0 + / NULLIF(write_time_delta, 0) )::numeric(20,2) + AS node_write_per_sec + , ( write_time_delta * 1.0 + / NULLIF(physical_writes_delta, 0) )::numeric(20,3) + AS node_write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_disk_usage_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , IN i_db_size text + , OUT o_stat_collect_time timestamp + , OUT o_tot_datanode_db_size text + , OUT o_max_datanode_db_size text + , OUT o_tot_physical_writes bigint + , OUT o_max_node_physical_writes bigint + , OUT o_max_node_write_per_sec numeric(20,2) + , OUT o_avg_write_per_sec numeric(20,2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +DECLARE l_db_size bigint; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || + ' during generation of cluster disk usage statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + IF i_db_size = '0' + THEN + SELECT SUM(pg_database_size(oid))::bigint + INTO l_db_size + FROM pg_database; + ELSE + SELECT SUM(i_db_size)::bigint + INTO l_db_size; + END IF; + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , disk_stat AS + ( + SELECT s.pmk_curr_collect_start_time + , db_size + , physical_writes_delta + , write_time_delta + , ( physical_writes_delta * 1000000.0 / NULLIF(write_time_delta, 0) ) + ::numeric(20,2) AS node_write_per_sec + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , pg_size_pretty(tot_datanode_db_size) AS tot_datanode_db_size + , pg_size_pretty(max_datanode_db_size) AS max_datanode_db_size + , tot_physical_writes + , max_node_physical_writes + , max_node_write_per_sec + , ( tot_physical_writes * 1000000.0 / NULLIF(tot_write_time, 0) ) + ::numeric(20,2) AS avg_write_per_sec + FROM ( SELECT pmk_curr_collect_start_time + , l_db_size::bigint AS tot_datanode_db_size + , MAX(db_size)::bigint AS max_datanode_db_size + , SUM(physical_writes_delta)::bigint AS tot_physical_writes + , SUM(write_time_delta)::bigint AS tot_write_time + , MAX(physical_writes_delta)::bigint AS max_node_physical_writes + , MAX(node_write_per_sec) AS max_node_write_per_sec + FROM disk_stat + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY pmk_curr_collect_start_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_active_sql_count + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_tot_active_sql_count int + , OUT o_avg_active_sql_count numeric(9, 2) + , OUT o_min_active_sql_count int + , OUT o_max_active_sql_count int + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || + ' during generation of active SQL count statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , tot_active_sql_count + , avg_active_sql_count + , min_active_sql_count + , max_active_sql_count + FROM ( SELECT s.pmk_curr_collect_start_time + , SUM(active_sql_count)::int AS tot_active_sql_count + , ROUND(AVG(active_sql_count), 2)::numeric(9, 2) + AS avg_active_sql_count + , MIN(active_sql_count)::int AS min_active_sql_count + , MAX(active_sql_count)::int AS max_active_sql_count + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + GROUP BY s.pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_session_count + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_tot_session_count int + , OUT o_avg_session_count numeric(9, 2) + , OUT o_min_session_count int + , OUT o_max_session_count int + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || + ' during generation of session count statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , tot_session_count + , avg_session_count + , min_session_count + , max_session_count + FROM ( SELECT s.pmk_curr_collect_start_time + , SUM(session_count)::int AS tot_session_count + , ROUND(AVG(session_count), 2)::numeric(9, 2) + AS avg_session_count + , MIN(session_count)::int AS min_session_count + , MAX(session_count)::int AS max_session_count + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + GROUP BY s.pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_node_cpu_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_mppdb_cpu_time bigint + , OUT o_host_cpu_busy_time bigint + , OUT o_host_cpu_total_time bigint + , OUT o_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name + || '") provided during generation of node (MPPDB instance) + CPU statistics ...'; + + pmk.put_line(l_error_message); + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node + (MPPDB instance) CPU statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time) + ::bigint AS host_cpu_total_time + , ( (LEAST(mppdb_cpu_time,host_cpu_busy_time) * 100.0) + / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) + AS mppdb_cpu_time_perc_wrt_busy_time + , ( (LEAST(mppdb_cpu_time,host_total_cpu_time) * 100.0) + / NULLIF((host_cpu_busy_time + host_cpu_idle_time + + host_cpu_iowait_time), 0) )::numeric(5, 2) + AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , ((busy_time_delta+idle_time_delta+iowait_time_delta)*10) + ::bigint AS host_total_cpu_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time) + ::bigint AS host_cpu_total_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) ) + ::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF((host_cpu_busy_time + + host_cpu_idle_time+host_cpu_iowait_time), 0) )::numeric(5, 2) + AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_node_memory_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_physical_memory bigint + , OUT o_db_memory_usage bigint + , OUT o_shared_buffer_size bigint + , OUT o_blocks_read bigint + , OUT o_blocks_hit bigint + , OUT o_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_work_memory_size bigint + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_in_memory_sort_ratio numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name + || '") provided during generation of node (MPPDB instance) memory + statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node + (MPPDB instance) memory statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) ) + ::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) / NULLIF((sorts_in_disk + sorts_in_memory) + , 0) )::numeric(5, 2) AS in_memory_sort_ratio + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) ) + ::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) / NULLIF((sorts_in_disk + + sorts_in_memory), 0) )::numeric(5, 2) AS in_memory_sort_ratio + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_node_io_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_avg_read_per_sec numeric(20,2) + , OUT o_avg_read_time numeric(20,3) + , OUT o_avg_write_per_sec numeric(20,2) + , OUT o_avg_write_time numeric(20,3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name + || '") provided during generation of node (MPPDB instance) + I/O statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node + (MPPDB instance) I/O statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) ) + ::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) ) + ::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) ) + ::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) ) + ::numeric(20,3) AS avg_write_time + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) ) + ::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) ) + ::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) ) + ::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) ) + ::numeric(20,3) AS avg_write_time + FROM ( SELECT node_name + , node_host + , pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_session_cpu_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_session_cpu_time bigint + , OUT o_mppdb_cpu_time bigint + , OUT o_mppdb_cpu_time_perc numeric(5, 2) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF RECORD +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = + COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_time_stat0 AS + ( SELECT sessid, stat_name + , (value/1000.0)::numeric AS stat_value + -- converting to millisecond + FROM gs_session_time + WHERE stat_name IN ( ''CPU_TIME'', + ''EXECUTION_TIME'') + ) + , sess_time_stat AS + ( SELECT DISTINCT stso.sessid + , (SELECT stsi.stat_value FROM + sess_time_stat0 stsi WHERE stsi.sessid = + stso.sessid AND stsi.stat_name = + ''CPU_TIME'') AS session_cpu_time + , (SELECT stsi.stat_value FROM + sess_time_stat0 stsi WHERE stsi.sessid = + stso.sessid AND stsi.stat_name = + ''EXECUTION_TIME'') AS session_sql_time + FROM sess_time_stat0 stso + ) + , mppdb_cpu_time AS + ( SELECT (total_cpu()*10.0)::bigint AS mppdb_cpu_time + -- converting to millisecond + FROM DUAL + ) + , sess_cpu_stat AS + ( SELECT ''' || i.node_name || ''' + ::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start):: + timestamp AS session_start_time + , date_trunc(''second'', a.xact_start):: + timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , ROUND(st.session_cpu_time)::bigint + AS session_cpu_time + , m.mppdb_cpu_time + , ( (st.session_cpu_time * 100.0) + / NULLIF(m.mppdb_cpu_time, 0) ) + ::numeric(5, 2) AS mppdb_cpu_time_perc + , st.sessid + , st.session_sql_time + FROM pg_stat_activity a + , sess_time_stat st + , mppdb_cpu_time m + WHERE a.state IN (''active'', + ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.session_cpu_time DESC + , mppdb_cpu_time_perc DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT scs.node_name + , scs.db_name + , scs.user_name + , scs.client_hostname + , scs.session_start_time + , scs.xact_start_time + , scs.waiting + , scs.state + , scs.query + , scs.session_cpu_time + , scs.mppdb_cpu_time + , scs.mppdb_cpu_time_perc + , ( scs.session_sql_time + / NULLIF(ss.value, 0) )::numeric(15, 3) + AS avg_sql_exec_time + FROM sess_cpu_stat scs + , gs_session_stat ss + WHERE ss.sessid = scs.sessid + AND ss.statname = ''n_sql'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_session_memory_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_session_total_memory_size bigint + , OUT o_session_used_memory_size bigint + , OUT o_buffer_hits bigint + , OUT o_disk_reads bigint + , OUT o_session_buffer_hit_ratio numeric(5, 2) + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_session_memory_sort_ratio numeric(5, 2) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = + COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), + UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_memory_usage AS + ( SELECT sessid + , SUM(totalsize)::bigint AS totalsize + , SUM(usedsize)::bigint AS usedsize + FROM gs_session_memory_detail + GROUP BY sessid + ) + , sess_stat0 AS + ( SELECT ss.sessid + , ss.statname AS statname + , ss.value AS statvalue + FROM sess_memory_usage st, gs_session_stat ss + WHERE ss.sessid = st.sessid + AND ss.statname IN ( ''n_blocks_fetched'' + , ''n_shared_blocks_read'' + , ''n_local_blocks_read'' + , ''n_sort_in_disk'' + , ''n_sort_in_memory'' + , ''n_sql'' ) + ) + , sess_stat1 AS + ( SELECT oss.sessid + , oss.totalsize + , oss.usedsize + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_blocks_fetched'') AS total_reads + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_shared_blocks_read'') + AS disk_to_shared_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_local_blocks_read'') + AS disk_to_local_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_sort_in_disk'') AS sorts_in_disk + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_sort_in_memory'') AS sorts_in_memory + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_sql'') AS sql_count + FROM sess_memory_usage oss + ) + , sess_stat AS + ( SELECT ss.sessid + , ss.totalsize + , ss.usedsize + , ss.total_reads + , (ss.disk_to_shared_buffer + + ss.disk_to_local_buffer) AS disk_reads + , (ss.total_reads-(ss.disk_to_shared_buffer + + ss.disk_to_local_buffer)) AS buffer_hits + , sorts_in_disk + , sorts_in_memory + , sql_count + FROM sess_stat1 ss + ) + , sess_memory_stat AS + ( SELECT ''' || i.node_name || ''' + ::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start) + ::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start) + ::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , st.totalsize AS session_total_memory_size + , st.usedsize AS session_used_memory_size + , st.buffer_hits, st.disk_reads + , ( (st.buffer_hits * 100.0) + / NULLIF(st.total_reads, 0) )::numeric(5, 2) + AS session_buffer_hit_ratio + , st.sorts_in_memory, st.sorts_in_disk + , ( (st.sorts_in_memory * 100.0) + / NULLIF(st.sorts_in_memory + + st.sorts_in_disk, 0) )::numeric(5, 2) + AS session_memory_sort_ratio + , st.sessid + , st.sql_count + FROM pg_stat_activity a + , sess_stat st + WHERE a.state IN (''active'', + ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.totalsize DESC + , st.usedsize DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT sms.node_name + , sms.db_name + , sms.user_name + , sms.client_hostname + , sms.session_start_time + , sms.xact_start_time + , sms.waiting + , sms.state + , sms.query + , sms.session_total_memory_size + , sms.session_used_memory_size + , sms.buffer_hits + , sms.disk_reads + , sms.session_buffer_hit_ratio + , sms.sorts_in_memory + , sms.sorts_in_disk + , sms.session_memory_sort_ratio + , ( ss.value / (NULLIF(sms.sql_count, 0) + * 1000.0) )::numeric(15, 3) + AS avg_sql_exec_time + FROM sess_memory_stat sms + , gs_session_time ss + WHERE ss.sessid = sms.sessid + AND ss.stat_name = ''EXECUTION_TIME'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_session_io_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_disk_reads bigint + , OUT o_read_time bigint + , OUT o_avg_read_per_sec numeric(20, 2) + , OUT o_avg_read_time numeric(20, 3) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = + COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), + UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_stat0 AS + ( SELECT ss.sessid + , ss.statname AS statname + , ss.value AS statvalue + FROM gs_session_stat ss + WHERE ss.statname IN ( + ''n_shared_blocks_read'' + , ''n_local_blocks_read'' + , ''n_blocks_read_time'', ''n_sql'' ) + ) + , sess_stat1 AS + ( SELECT DISTINCT ss.sessid + , (SELECT statvalue FROM sess_stat0 iss + WHERE iss.sessid = ss.sessid AND + iss.statname = ''n_shared_blocks_read'') + AS disk_to_shared_buffer + , (SELECT statvalue FROM sess_stat0 iss + WHERE iss.sessid = ss.sessid + AND iss.statname = ''n_local_blocks_read'') + AS disk_to_local_buffer + , (SELECT statvalue FROM sess_stat0 iss + WHERE iss.sessid = ss.sessid AND + iss.statname = ''n_blocks_read_time'') + AS read_time + , (SELECT statvalue FROM sess_stat0 iss + WHERE iss.sessid = ss.sessid + AND iss.statname = ''n_sql'') AS sql_count + FROM sess_stat0 ss + ) + , sess_stat AS + ( SELECT ss.sessid + , (ss.disk_to_shared_buffer + + ss.disk_to_local_buffer) AS disk_reads + , ss.read_time + , ss.sql_count + FROM sess_stat1 ss + ) + , sess_io_stat AS + ( SELECT ''' || i.node_name || ''' + ::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start) + ::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start) + ::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , st.disk_reads + , st.read_time + , ( st.disk_reads * 1000000.0 + / NULLIF(st.read_time, 0) )::numeric(20,2) + AS avg_read_per_sec + , ( st.read_time * 1.0 + / NULLIF(st.disk_reads, 0) ) + ::numeric(20,3) AS avg_read_time + , st.sessid + , st.sql_count + FROM pg_stat_activity a + , sess_stat st + WHERE a.state IN (''active'', + ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.disk_reads DESC + , st.read_time DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT sios.node_name + , sios.db_name + , sios.user_name + , sios.client_hostname + , sios.session_start_time + , sios.xact_start_time + , sios.waiting + , sios.state + , sios.query + , sios.disk_reads + , sios.read_time + , sios.avg_read_per_sec + , sios.avg_read_time + , ( ss.value / (NULLIF(sios.sql_count, 0) + * 1000.0) )::numeric(15, 3) + AS avg_sql_exec_time + FROM sess_io_stat sios + , gs_session_time ss + WHERE ss.sessid = sios.sessid + AND ss.stat_name = ''EXECUTION_TIME'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.insertBaseValue() +RETURNS TEXT +AS +$$ +DECLARE l_configuration_count_value INT; + l_meta_data_count_value INT; + l_version_string varchar(128); + l_result varchar(128); +BEGIN + SELECT count(config_param_name) + INTO l_configuration_count_value + FROM pmk.pmk_configuration + WHERE config_param_name IN ('Collection Count', 'Enable PMK'); + + IF l_configuration_count_value != 2 + THEN + DELETE FROM pmk.pmk_configuration; + INSERT INTO pmk.pmk_configuration(config_param_name, config_value) + VALUES ('Collection Count', '9'), ('Enable PMK', 'TRUE'); + END IF; + + SELECT count(pmk_version) + INTO l_meta_data_count_value + FROM pmk.pmk_meta_data; + + SELECT substring(version() from '[a-zA-Z0-9 ]* [0-9]+\.[0-9]+\.[0-9]+') + INTO l_version_string; + l_result := l_version_string; + + IF l_meta_data_count_value < 1 + THEN + INSERT INTO pmk.pmk_meta_data (pmk_version, last_snapshot_id, + last_snapshot_collect_time) VALUES (l_result, NULL, NULL); + END IF; + + RETURN NULL; +END; +$$ +LANGUAGE plpgsql;""", """ +SELECT pmk.insertBaseValue();""", """COMMIT;""", """ +analyze pmk.pmk_configuration;""", """ +analyze pmk.pmk_snapshot;""", """ +analyze pmk.pmk_snapshot_datanode_stat;""", """ +analyze pmk.pmk_meta_data;"""] + +PMK_NEW = ["""SET default_tablespace = defaulttablespace;""", + """CREATE SCHEMA pmk;""", + """CREATE TABLE pmk.pmk_configuration +( + config_param_name varchar(64) NOT NULL +, config_value text NOT NULL +);""", """CREATE TABLE pmk.pmk_snapshot +( + snapshot_id int + -- Snapshot ID (Running number) +, current_snapshot_time timestamp with time zone + -- Time at the beginning of the snapshot +, last_snapshot_time timestamp with time zone + -- Time at the end of the snapshot; the actual time the snapshot was taken +, creation_time timestamp with time zone + -- Time the snapshot was created +);""", """CREATE TABLE pmk.pmk_snapshot_datanode_stat +( + snapshot_id int + -- Snapshot Id +, node_name text + -- node name from pgxc_node +, node_host text + -- node host from pgxc_node +, last_startup_time timestamp with time zone + -- last restart time of the node before snapshot starts +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed bigint +, checkpoints_timed_delta bigint +, checkpoints_req bigint +, checkpoints_req_delta bigint +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +);""", """CREATE TABLE pmk.pmk_meta_data +( + pmk_version varchar(128) +, last_snapshot_id int +, last_snapshot_collect_time timestamp with time zone +);""", """CREATE OR REPLACE FUNCTION pmk.put_line + (IN message text + ) +RETURNS boolean +AS +$$ +DECLARE l_error_message TEXT; +BEGIN + l_error_message := TRIM(message); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN TRUE; + END IF; + + RETURN FALSE; +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.check_node_type +RETURNS TEXT +AS +$$ +DECLARE l_node_type CHAR(1); +BEGIN + + l_node_type := 'D'; + RETURN NULL; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.check_pmk_enabled +RETURNS TEXT +AS +$$ +DECLARE l_pmk_enabled_i TEXT; +BEGIN + + SELECT UPPER(config_value) + INTO l_pmk_enabled_i + FROM pmk.pmk_configuration + WHERE config_param_name = 'Enable PMK'; + + IF l_pmk_enabled_i = 'FALSE' + THEN + RETURN 'ERROR:: PMK should be enabled to use the PMK features.'; + ELSE + RETURN NULL; + END IF; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.pmk_version ( ) +RETURNS varchar(128) +AS +$$ +DECLARE l_pmk_version varchar(128); + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN 'f'; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN 'f'; + END IF; + + SELECT pmk_version + INTO l_pmk_version + FROM pmk.pmk_meta_data; + + RETURN l_pmk_version; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.configure_parameter + ( IN i_config_param_name varchar(64) + , IN i_config_value text + ) +RETURNS boolean +AS +$$ +DECLARE l_collect_count_value INT; + l_config_value TEXT; + l_upper_config_param TEXT; + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + l_upper_config_param := UPPER(TRIM(BOTH ' ' FROM i_config_param_name)); + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + AND l_upper_config_param <> 'ENABLE PMK' + THEN + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + IF i_config_param_name IS NULL + THEN + l_error_message := 'ERROR:: Null should not be provided for + configuration parameter name.'; + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + IF i_config_value IS NULL + THEN + l_error_message := 'ERROR:: Null should not be provided for configuration value.'; + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + IF l_upper_config_param = 'COLLECTION COUNT' + THEN + l_collect_count_value := i_config_value::int; + + IF l_collect_count_value < -1 + THEN + l_error_message := 'ERROR:: Configuration value "' || + i_config_value || '" should not be less than -1.'; + pmk.put_line(l_error_message); + RETURN FALSE; + + ELSIF l_collect_count_value = 0 + THEN + l_error_message := 'ERROR:: 0 should not be provided since + atleast one collection should be retained.'; + pmk.put_line(l_error_message); + RETURN FALSE; + + ELSE + l_config_value := l_collect_count_value; + END IF; + + ELSIF l_upper_config_param = 'ENABLE PMK' + THEN + l_config_value := UPPER(TRIM(BOTH ' ' FROM i_config_value)); + + IF l_config_value NOT IN ('TRUE', 'FALSE') + THEN + l_error_message := 'ERROR:: Allowed values are TRUE or FALSE for + the configuration parameter "Enable PMK".'; + pmk.put_line(l_error_message); + RETURN FALSE; + + END IF; + END IF; + + SET allow_concurrent_tuple_update = ON; + + UPDATE pmk.pmk_configuration + SET config_value = l_config_value + WHERE UPPER(config_param_name) = l_upper_config_param; + + IF NOT FOUND THEN + l_error_message := 'ERROR:: Invalid configuration parameter "' + || i_config_param_name || '" provided for configuring PMK parameter ...'; + pmk.put_line(l_error_message); + RETURN FALSE; + END IF; + + RETURN TRUE; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_configuration_parameter + ( IN i_config_param_name TEXT ) +RETURNS TABLE +( + config_param_name varchar(64) +, config_value text +) +AS +$$ +DECLARE l_upper_config_param TEXT; + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_upper_config_param := UPPER(TRIM(BOTH ' ' FROM i_config_param_name)); + + IF l_upper_config_param = 'ALL' + THEN + + RETURN QUERY + SELECT config_param_name + , config_value + FROM pmk.pmk_configuration + ORDER BY config_param_name; + + ELSE + + RETURN QUERY + SELECT config_param_name + , config_value + FROM pmk.pmk_configuration + WHERE UPPER(config_param_name) = l_upper_config_param; + + END IF; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.find_perf_stat + ( IN i_skip_supper_role boolean + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_physical_memory bigint + , OUT o_shared_buffer_size bigint + , OUT o_session_memory_total_size bigint + , OUT o_session_memory_used_size bigint + , OUT o_blocks_read bigint + , OUT o_blocks_hit bigint + , OUT o_db_size bigint + , OUT o_work_memory_size bigint + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_active_sql_count int + , OUT o_wait_sql_count int + , OUT o_session_count int + , OUT o_busy_time numeric + , OUT o_idle_time numeric + , OUT o_iowait_time numeric + , OUT o_db_cpu_time numeric + , OUT o_db_memory_usage bigint + , OUT o_node_startup_time timestamp with time zone + , OUT o_node_host_name text + , OUT o_xact_commit bigint + , OUT o_xact_rollback bigint + , OUT o_checkpoints_timed bigint + , OUT o_checkpoints_req bigint + , OUT o_checkpoint_write_time double precision + ) +AS +$$ +DECLARE + l_block_size int; + l_record_chk int; +BEGIN + + o_node_startup_time := pg_postmaster_start_time(); + o_node_host_name := get_hostname(); + + SELECT COUNT(*) AS number_of_files + , SUM(phyrds) AS physical_reads + , SUM(phywrts) AS physical_writes + , SUM(readtim) AS read_time + , SUM(writetim) AS write_time + INTO o_number_of_files + , o_physical_reads + , o_physical_writes + , o_read_time + , o_write_time + FROM gs_file_stat; + + IF o_number_of_files = 0 + THEN + o_physical_reads := 0; + o_physical_writes := 0; + o_read_time := 0; + o_write_time := 0; + END IF; + + WITH os_stat AS + ( + SELECT os.name AS statname + , os.value AS statvalue + FROM gs_os_run_info os + WHERE os.name IN ( 'PHYSICAL_MEMORY_BYTES', 'BUSY_TIME', + 'IDLE_TIME', 'IOWAIT_TIME' ) + ) + SELECT (SELECT statvalue FROM os_stat WHERE statname = + 'PHYSICAL_MEMORY_BYTES') + , (SELECT statvalue FROM os_stat WHERE statname = 'BUSY_TIME') + , (SELECT statvalue FROM os_stat WHERE statname = 'IDLE_TIME') + , (SELECT statvalue FROM os_stat WHERE statname = 'IOWAIT_TIME') + INTO o_physical_memory + , o_busy_time + , o_idle_time + , o_iowait_time + FROM DUAL; + + -- gs_db_time is not available; temporarily PMK extension is used. + o_db_cpu_time := total_cpu(); + o_db_memory_usage := total_memory()*1024; + + WITH config_value AS + ( SELECT name + , setting::bigint AS config_value + FROM pg_settings + WHERE name IN ( 'block_size', 'shared_buffers', 'work_mem' ) + ) + , config_value1 AS + ( SELECT (SELECT config_value FROM config_value WHERE name = + 'block_size') AS block_size + , (SELECT config_value FROM config_value WHERE name = + 'shared_buffers') AS shared_buffers + , (SELECT config_value FROM config_value WHERE name = + 'work_mem') AS work_mem + FROM DUAL + ) + SELECT block_size + , (shared_buffers * block_size)::bigint + , (work_mem * 1024)::bigint + INTO l_block_size + , o_shared_buffer_size + , o_work_memory_size + FROM config_value1; + + /* Commented since these statistics are not used for node and + * cluster reports + */ + o_session_memory_total_size := 0; + o_session_memory_used_size := 0; + + SELECT SUM(blks_read)::bigint + , SUM(blks_hit)::bigint + , SUM(xact_commit)::bigint + , SUM(xact_rollback)::bigint + INTO o_blocks_read + , o_blocks_hit + , o_xact_commit + , o_xact_rollback + FROM pg_stat_database; + + o_db_size := 0; + IF i_skip_supper_role = 'TRUE' + THEN + WITH session_state AS + ( SELECT state, waiting , usename + FROM pg_stat_activity a, pg_roles r + WHERE r.rolsuper = 'f' AND a.usename = r.rolname + ) + , active_session AS + ( SELECT state, waiting , usename + FROM session_state s, pg_roles r + WHERE s.state IN ('active', 'fastpath function call', 'retrying') + AND r.rolsuper = 'f' AND s.usename = r.rolname + ) + SELECT ( SELECT COUNT(*) FROM active_session ) + , ( SELECT COUNT(*) FROM active_session WHERE waiting = TRUE ) + , ( SELECT COUNT(*) FROM session_state ) + INTO o_active_sql_count, o_wait_sql_count , o_session_count + FROM DUAL; + ELSE + WITH session_state AS + ( SELECT state, waiting + FROM pg_stat_activity + ) + , active_session AS + ( SELECT state, waiting + FROM session_state + WHERE state IN ('active', 'fastpath function call', 'retrying') + ) + SELECT ( SELECT COUNT(*) FROM active_session ) + , ( SELECT COUNT(*) FROM active_session WHERE waiting = TRUE ) + , ( SELECT COUNT(*) FROM session_state ) + INTO o_active_sql_count, o_wait_sql_count, o_session_count + FROM DUAL; + END IF; + -- Currently, the below statistics are calculated from gd_session_stat + -- (which is not accurate) since gs_db_stat is not available + WITH sort_state AS + ( SELECT statname + , SUM(value)::bigint AS sorts_cnt + FROM gs_session_stat + WHERE statname IN ('n_sort_in_memory', 'n_sort_in_disk') + GROUP BY statname + ) + SELECT (SELECT sorts_cnt FROM sort_state WHERE statname = + 'n_sort_in_memory') + , (SELECT sorts_cnt FROM sort_state WHERE statname = + 'n_sort_in_disk') + INTO o_sorts_in_memory + , o_sorts_in_disk + FROM DUAL; + + SELECT SUM(checkpoints_timed)::bigint + , SUM(checkpoints_req)::bigint + , SUM(checkpoint_write_time)::bigint + INTO o_checkpoints_timed + , o_checkpoints_req + , o_checkpoint_write_time + FROM pg_stat_bgwriter; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.find_node_stat + (IN i_skip_supper_role boolean + , OUT o_number_of_files_1 int + , OUT o_physical_reads_1 bigint + , OUT o_physical_writes_1 bigint + , OUT o_read_time_1 bigint + , OUT o_write_time_1 bigint + , OUT o_physical_memory_1 bigint + , OUT o_shared_buffer_size_1 bigint + , OUT o_session_memory_total_size_1 bigint + , OUT o_session_memory_used_size_1 bigint + , OUT o_blocks_read_1 bigint + , OUT o_blocks_hit_1 bigint + , OUT o_db_size_1 bigint + , OUT o_work_memory_size_1 bigint + , OUT o_sorts_in_memory_1 bigint + , OUT o_sorts_in_disk_1 bigint + , OUT o_active_sql_count_1 int + , OUT o_wait_sql_count_1 int + , OUT o_session_count_1 int + , OUT o_busy_time_1 numeric + , OUT o_idle_time_1 numeric + , OUT o_iowait_time_1 numeric + , OUT o_db_cpu_time_1 numeric + , OUT o_db_memory_usage_1 bigint + , OUT o_node_startup_time_1 timestamp with time zone + , OUT o_node_host_name_1 text + , OUT o_xact_commit_1 bigint + , OUT o_xact_rollback_1 bigint + , OUT o_checkpoints_timed_1 bigint + , OUT o_checkpoints_req_1 bigint + , OUT o_checkpoint_write_time_1 double precision + ) +AS +$$ +BEGIN + + SELECT o_number_of_files + , o_physical_reads + , o_physical_writes + , o_read_time + , o_write_time + , o_physical_memory + , o_shared_buffer_size + , o_session_memory_total_size + , o_session_memory_used_size + , o_blocks_read + , o_blocks_hit + , o_db_size + , o_work_memory_size + , o_sorts_in_memory + , o_sorts_in_disk + , o_active_sql_count + , o_wait_sql_count + , o_session_count + , o_busy_time + , o_idle_time + , o_iowait_time + , o_db_cpu_time + , o_db_memory_usage + , o_node_startup_time + , o_node_host_name + , o_xact_commit + , o_xact_rollback + , o_checkpoints_timed + , o_checkpoints_req + , o_checkpoint_write_time + INTO o_number_of_files_1 + , o_physical_reads_1 + , o_physical_writes_1 + , o_read_time_1 + , o_write_time_1 + , o_physical_memory_1 + , o_shared_buffer_size_1 + , o_session_memory_total_size_1 + , o_session_memory_used_size_1 + , o_blocks_read_1 + , o_blocks_hit_1 + , o_db_size_1 + , o_work_memory_size_1 + , o_sorts_in_memory_1 + , o_sorts_in_disk_1 + , o_active_sql_count_1 + , o_wait_sql_count_1 + , o_session_count_1 + , o_busy_time_1 + , o_idle_time_1 + , o_iowait_time_1 + , o_db_cpu_time_1 + , o_db_memory_usage_1 + , o_node_startup_time_1 + , o_node_host_name_1 + , o_xact_commit_1 + , o_xact_rollback_1 + , o_checkpoints_timed_1 + , o_checkpoints_req_1 + , o_checkpoint_write_time_1 + + FROM pmk.find_perf_stat(i_skip_supper_role); + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.load_datanode_stat + ( IN i_snapshot_id int + , IN i_last_snapshot_id int + , IN i_pmk_last_collect_start_time timestamp with time zone + , IN i_node_name text + , IN i_number_of_files int + , IN i_physical_reads bigint + , IN i_physical_writes bigint + , IN i_read_time bigint + , IN i_write_time bigint + , IN i_physical_memory bigint + , IN i_shared_buffer_size bigint + , IN i_session_memory_total_size bigint + , IN i_session_memory_used_size bigint + , IN i_blocks_read bigint + , IN i_blocks_hit bigint + , IN i_db_size bigint + , IN i_work_memory_size bigint + , IN i_sorts_in_memory bigint + , IN i_sorts_in_disk bigint + , IN i_active_sql_count int + , IN i_wait_sql_count int + , IN i_session_count int + , IN i_busy_time numeric + , IN i_idle_time numeric + , IN i_iowait_time numeric + , IN i_db_cpu_time numeric + , IN i_db_memory_usage bigint + , IN i_node_startup_time timestamp with time zone + , IN i_node_host_name text + , IN i_xact_commit bigint + , IN i_xact_rollback bigint + , IN i_checkpoints_timed bigint + , IN i_checkpoints_req bigint + , IN i_checkpoint_write_time double precision + , IN i_skip_supper_role boolean + , OUT o_dn_snapshot_id int + , OUT o_dn_node_name text + -- node name from pgxc_node + , OUT o_dn_node_host text + -- node host from pgxc_node + , OUT o_dn_last_startup_time timestamp with time zone + -- last restart time of the node before snapshot starts + , OUT o_dn_number_of_files int + , OUT o_dn_physical_reads bigint + , OUT o_dn_physical_reads_delta bigint + , OUT o_dn_physical_writes bigint + , OUT o_dn_physical_writes_delta bigint + , OUT o_dn_read_time bigint + , OUT o_dn_read_time_delta bigint + , OUT o_dn_write_time bigint + , OUT o_dn_write_time_delta bigint + , OUT o_dn_db_size bigint + , OUT o_dn_active_sql_count int + , OUT o_dn_wait_sql_count int + , OUT o_dn_session_count int + , OUT o_dn_xact_commit bigint + , OUT o_dn_xact_commit_delta bigint + , OUT o_dn_xact_rollback bigint + , OUT o_dn_xact_rollback_delta bigint + , OUT o_dn_checkpoints_timed bigint + , OUT o_dn_checkpoints_timed_delta bigint + , OUT o_dn_checkpoints_req bigint + , OUT o_dn_checkpoints_req_delta bigint + , OUT o_dn_checkpoint_write_time double precision + , OUT o_dn_checkpoint_write_time_delta double precision + , OUT o_dn_physical_memory bigint + , OUT o_dn_db_memory_usage bigint + , OUT o_dn_shared_buffer_size bigint + , OUT o_dn_session_memory_total_size bigint + , OUT o_dn_session_memory_used_size bigint + , OUT o_dn_blocks_read bigint + , OUT o_dn_blocks_read_delta bigint + , OUT o_dn_blocks_hit bigint + , OUT o_dn_blocks_hit_delta bigint + , OUT o_dn_work_memory_size bigint + , OUT o_dn_sorts_in_memory bigint + , OUT o_dn_sorts_in_memory_delta bigint + , OUT o_dn_sorts_in_disk bigint + , OUT o_dn_sorts_in_disk_delta bigint + , OUT o_dn_busy_time numeric + , OUT o_dn_busy_time_delta numeric + , OUT o_dn_idle_time numeric + , OUT o_dn_idle_time_delta numeric + , OUT o_dn_iowait_time numeric + , OUT o_dn_iowait_time_delta numeric + , OUT o_dn_db_cpu_time numeric + , OUT o_dn_db_cpu_time_delta numeric + ) +AS +$$ +DECLARE l_physical_reads_delta bigint; + l_physical_writes_delta bigint; + l_read_time_delta bigint; + l_write_time_delta bigint; + l_blocks_read_delta bigint; + l_blocks_hit_delta bigint; + l_sorts_in_memory_delta bigint; + l_sorts_in_disk_delta bigint; + l_busy_time_delta numeric; + l_idle_time_delta numeric; + l_iowait_time_delta numeric; + l_db_cpu_time_delta numeric; + l_xact_commit_delta bigint; + l_xact_rollback_delta bigint; + l_checkpoints_timed_delta bigint; + l_checkpoints_req_delta bigint; + l_checkpoint_write_time_delta double precision; + i_skip_supper_role_delta boolean; +BEGIN + + l_physical_reads_delta := i_physical_reads; + l_physical_writes_delta := i_physical_writes; + l_read_time_delta := i_read_time; + l_write_time_delta := i_write_time; + l_xact_commit_delta := i_xact_commit; + l_xact_rollback_delta := i_xact_rollback; + l_checkpoints_timed_delta := i_checkpoints_timed; + i_skip_supper_role_delta := i_skip_supper_role; + l_checkpoints_req_delta := i_checkpoints_req; + l_checkpoint_write_time_delta := i_checkpoint_write_time; + + l_blocks_read_delta := i_blocks_read; + l_blocks_hit_delta := i_blocks_hit; + + l_busy_time_delta := i_busy_time; + l_idle_time_delta := i_idle_time; + l_iowait_time_delta := i_iowait_time; + l_db_cpu_time_delta := i_db_cpu_time; + + -- Currently, the below statistics are calculated from gs_session_stat + -- (which is not accurate) since gs_db_stat is not available + -- These statistics are cumulative from instance startup. + l_sorts_in_memory_delta := i_sorts_in_memory; + l_sorts_in_disk_delta := i_sorts_in_disk; + + o_dn_snapshot_id := i_snapshot_id; + o_dn_node_name := i_node_name; + o_dn_node_host := i_node_host_name; + o_dn_last_startup_time := i_node_startup_time; + o_dn_number_of_files := i_number_of_files; + o_dn_physical_reads := i_physical_reads; + o_dn_physical_reads_delta := l_physical_reads_delta; + o_dn_physical_writes := i_physical_writes; + o_dn_physical_writes_delta := l_physical_writes_delta; + o_dn_read_time := i_read_time; + o_dn_read_time_delta := l_read_time_delta; + o_dn_write_time := i_write_time; + o_dn_write_time_delta := l_write_time_delta; + o_dn_db_size := i_db_size; + o_dn_active_sql_count := i_active_sql_count; + o_dn_wait_sql_count := i_wait_sql_count; + o_dn_session_count := i_session_count; + o_dn_xact_commit := i_xact_commit; + o_dn_xact_commit_delta := l_xact_commit_delta; + o_dn_xact_rollback := i_xact_rollback; + o_dn_xact_rollback_delta := l_xact_rollback_delta; + o_dn_checkpoints_timed := i_checkpoints_timed; + o_dn_checkpoints_timed_delta := l_checkpoints_timed_delta; + o_dn_checkpoints_req := i_checkpoints_req; + o_dn_checkpoints_req_delta := l_checkpoints_req_delta; + o_dn_checkpoint_write_time := i_checkpoint_write_time; + o_dn_checkpoint_write_time_delta := l_checkpoint_write_time_delta; + o_dn_physical_memory := i_physical_memory; + o_dn_db_memory_usage := i_db_memory_usage; + o_dn_shared_buffer_size := i_shared_buffer_size; + o_dn_session_memory_total_size := i_session_memory_total_size; + o_dn_session_memory_used_size := i_session_memory_used_size; + o_dn_blocks_read := i_blocks_read; + o_dn_blocks_read_delta := l_blocks_read_delta; + o_dn_blocks_hit := i_blocks_hit; + o_dn_blocks_hit_delta := l_blocks_hit_delta; + o_dn_work_memory_size := i_work_memory_size; + o_dn_sorts_in_memory := i_sorts_in_memory; + o_dn_sorts_in_memory_delta := l_sorts_in_memory_delta; + o_dn_sorts_in_disk := i_sorts_in_disk; + o_dn_sorts_in_disk_delta := l_sorts_in_disk_delta; + o_dn_busy_time := i_busy_time; + o_dn_busy_time_delta := l_busy_time_delta; + o_dn_idle_time := i_idle_time; + o_dn_idle_time_delta := l_idle_time_delta; + o_dn_iowait_time := i_iowait_time; + o_dn_iowait_time_delta := l_iowait_time_delta; + o_dn_db_cpu_time := i_db_cpu_time; + o_dn_db_cpu_time_delta := l_db_cpu_time_delta; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.load_node_stat + ( IN i_pmk_curr_collect_start_time TIMESTAMP WITH TIME ZONE + , IN i_pmk_last_collect_start_time TIMESTAMP WITH TIME ZONE + , IN i_last_snapshot_id INT + , IN i_node_name TEXT + , IN i_node_type char(1) + , IN i_skip_supper_role boolean + ) +RETURNS TABLE +( + snapshot_id int +, node_name text +, node_host text +, last_startup_time timestamp with time zone +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed bigint +, checkpoints_timed_delta bigint +, checkpoints_req bigint +, checkpoints_req_delta bigint +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +) +AS +$$ +DECLARE l_snapshot_id INT; + l_query_str TEXT; + l_node_stat_cur RECORD; +BEGIN + + IF i_last_snapshot_id IS NULL + OR i_last_snapshot_id = 2147483647 + THEN + l_snapshot_id := 1; + ELSE + l_snapshot_id := i_last_snapshot_id + 1; + END IF; + + FOR l_node_stat_cur IN SELECT * FROM pmk.find_node_stat(i_skip_supper_role) + LOOP + RETURN QUERY + (SELECT * FROM pmk.load_datanode_stat ( l_snapshot_id + , i_last_snapshot_id + , i_pmk_last_collect_start_time + , i_node_name + , l_node_stat_cur.o_number_of_files_1 + , l_node_stat_cur.o_physical_reads_1 + , l_node_stat_cur.o_physical_writes_1 + , l_node_stat_cur.o_read_time_1 + , l_node_stat_cur.o_write_time_1 + , l_node_stat_cur.o_physical_memory_1 + , l_node_stat_cur.o_shared_buffer_size_1 + , l_node_stat_cur.o_session_memory_total_size_1 + , l_node_stat_cur.o_session_memory_used_size_1 + , l_node_stat_cur.o_blocks_read_1 + , l_node_stat_cur.o_blocks_hit_1 + , l_node_stat_cur.o_db_size_1 + , l_node_stat_cur.o_work_memory_size_1 + , l_node_stat_cur.o_sorts_in_memory_1 + , l_node_stat_cur.o_sorts_in_disk_1 + , l_node_stat_cur.o_active_sql_count_1 + , l_node_stat_cur.o_wait_sql_count_1 + , l_node_stat_cur.o_session_count_1 + , l_node_stat_cur.o_busy_time_1 + , l_node_stat_cur.o_idle_time_1 + , l_node_stat_cur.o_iowait_time_1 + , l_node_stat_cur.o_db_cpu_time_1 + , l_node_stat_cur.o_db_memory_usage_1 + , l_node_stat_cur.o_node_startup_time_1 + , l_node_stat_cur.o_node_host_name_1 + , l_node_stat_cur.o_xact_commit_1 + , l_node_stat_cur.o_xact_rollback_1 + , l_node_stat_cur.o_checkpoints_timed_1 + , l_node_stat_cur.o_checkpoints_req_1 + , l_node_stat_cur.o_checkpoint_write_time_1 + , i_skip_supper_role + )); + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.delete_expired_snapshots ( ) +RETURNS void +AS +$$ +DECLARE l_collection_count INT; + l_retention_snapshot_id INT; +BEGIN + + -- Deleting node statistics based on "collection count" config param + SELECT config_value + INTO l_collection_count + FROM pmk.pmk_configuration + WHERE config_param_name = 'Collection Count'; + + IF l_collection_count > -1 + THEN + IF l_collection_count = 0 + THEN + l_collection_count := 1; + END IF; + + SELECT MIN(snapshot_id) + INTO l_retention_snapshot_id + FROM ( SELECT snapshot_id + FROM pmk.pmk_snapshot + ORDER BY snapshot_id DESC + LIMIT l_collection_count ); + + DELETE FROM pmk.pmk_snapshot_datanode_stat + WHERE snapshot_id < l_retention_snapshot_id; + + DELETE FROM pmk.pmk_snapshot + WHERE snapshot_id < l_retention_snapshot_id; + + END IF; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.get_meta_data + ( OUT l_pmk_curr_collect_start_time timestamp with time zone + , OUT l_pmk_last_collect_start_time timestamp with time zone + , OUT l_last_snapshot_id int + ) +AS +$$ +DECLARE l_error_message TEXT; +BEGIN + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + SELECT last_snapshot_id, last_snapshot_collect_time + INTO l_last_snapshot_id, l_pmk_last_collect_start_time + FROM pmk.pmk_meta_data; + + l_pmk_curr_collect_start_time := date_trunc('second', current_timestamp); + + IF l_pmk_curr_collect_start_time < l_pmk_last_collect_start_time + THEN + l_error_message := 'ERROR:: There is a change in system time of Gauss + MPPDB host. PMK does not support the scenarios related to + system time change.'; + pmk.put_line(l_error_message); + RETURN; + ELSIF l_pmk_curr_collect_start_time = l_pmk_last_collect_start_time + THEN + l_error_message := 'ERROR:: Multiple statistics-collections can not + be done within a second.'; + pmk.put_line(l_error_message); + RETURN; + END IF; +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.get_pgxc_node + ( OUT o_node_name TEXT + , OUT o_node_type CHAR(1) + ) +RETURNS SETOF RECORD +AS +$$ +DECLARE l_error_message TEXT; + v_rec RECORD; +BEGIN + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + FOR v_rec IN (SELECT node_name from DBE_PERF.node_name;) LOOP + o_node_name := v_rec.node_name; + o_node_type := 'D'; + RETURN NEXT; + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.check_start_end_dates + ( INOUT io_start_pmk_time timestamp with time zone + , INOUT io_end_pmk_time timestamp with time zone + , OUT o_error_message text + ) +AS +$$ +DECLARE l_last_collect_time timestamp with time zone; +BEGIN + + SELECT last_snapshot_collect_time + INTO l_last_collect_time + FROM pmk.pmk_meta_data; + + IF io_start_pmk_time > l_last_collect_time + THEN + o_error_message := 'ERROR:: The from-time provided is greater than + the last statistics-collection time(' || l_last_collect_time || '). + Invalid value(s) provided for the input time-range'; + RETURN; + END IF; + + IF io_end_pmk_time IS NULL + THEN + io_end_pmk_time := l_last_collect_time; + + IF io_start_pmk_time IS NULL + THEN + io_start_pmk_time := io_end_pmk_time; + END IF; + ELSE + IF (io_start_pmk_time IS NULL) OR + (io_start_pmk_time > io_end_pmk_time) + THEN + o_error_message := 'ERROR:: Invalid value(s) provided for + the input time-range'; + RETURN; + END IF; + END IF; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_host_cpu_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_avg_cpu_total_time numeric(21, 3) + , OUT o_avg_cpu_busy_time numeric(21, 3) + , OUT o_avg_cpu_iowait_time numeric(21, 3) + , OUT o_cpu_busy_perc numeric(5, 2) + , OUT o_cpu_io_wait_perc numeric(5, 2) + , OUT o_min_cpu_busy_perc numeric(5, 2) + , OUT o_max_cpu_busy_perc numeric(5, 2) + , OUT o_min_cpu_iowait_perc numeric(5, 2) + , OUT o_max_cpu_iowait_perc numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster + host CPU statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , os_cpu_stat AS + ( SELECT s.pmk_curr_collect_start_time + , node_host + , node_name + , (busy_time_delta * 10) AS cpu_busy_time + , (idle_time_delta * 10) AS cpu_idle_time + , (iowait_time_delta * 10) AS cpu_iowait_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + , os_cpu_stat1 AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_host + , cpu_busy_time + , cpu_idle_time + , cpu_iowait_time + , (cpu_busy_time+cpu_idle_time+cpu_iowait_time) + ::numeric AS cpu_total_time + FROM ( SELECT pmk_curr_collect_start_time + , node_host + , cpu_busy_time + , cpu_idle_time + , cpu_iowait_time + , rank() OVER (PARTITION BY pmk_curr_collect_start_time + , node_host ORDER BY cpu_busy_time DESC, node_name + ) AS node_cpu_busy_order + FROM os_cpu_stat + ) + WHERE node_cpu_busy_order = 1 + ) + SELECT hcs.stat_collect_time + , AVG(hcs.cpu_total_time)::numeric(21, 3) AS avg_cpu_total_time + , AVG(hcs.cpu_busy_time)::numeric(21, 3) AS avg_cpu_busy_time + , AVG(hcs.cpu_iowait_time)::numeric(21, 3) AS avg_cpu_iowait_time + , ((SUM(cpu_busy_time) * 100.0) + / NULLIF(SUM(cpu_total_time), 0))::numeric(5, 2) + AS cpu_busy_perc + , ( (SUM(cpu_iowait_time) * 100.0) + / NULLIF(SUM(cpu_total_time), 0) )::numeric(5, 2) + AS cpu_io_wait_perc + , MIN(hcs.cpu_busy_time_perc)::numeric(5, 2) + AS min_cpu_busy_perc + , MAX(hcs.cpu_busy_time_perc)::numeric(5, 2) + AS max_cpu_busy_perc + , MIN(hcs.cpu_iowait_time_perc)::numeric(5, 2) + AS min_cpu_iowait_perc + , MAX(hcs.cpu_iowait_time_perc)::numeric(5, 2) + AS max_cpu_iowait_perc + FROM ( SELECT node_host + , stat_collect_time + , cpu_total_time + , cpu_busy_time + , cpu_iowait_time + , ( (cpu_busy_time * 100.0) + / NULLIF(cpu_total_time, 0) )::numeric(5, 2) + AS cpu_busy_time_perc + , ( (cpu_iowait_time * 100.0) + / NULLIF(cpu_total_time, 0) )::numeric(5, 2) + AS cpu_iowait_time_perc + FROM os_cpu_stat1 ) hcs + GROUP BY hcs.stat_collect_time + ORDER BY hcs.stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_mppdb_cpu_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_avg_mppdb_cpu_time numeric(21, 3) + , OUT o_avg_host_cpu_busy_time numeric(21, 3) + , OUT o_avg_host_cpu_total_time numeric(21, 3) + , OUT o_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + , OUT o_min_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_max_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_min_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + , OUT o_max_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of + cluster MPPDB CPU statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , cpu_stat AS + ( SELECT s.snapshot_id + , s.pmk_curr_collect_start_time + , dns.node_name + , dns.node_host + , (dns.busy_time_delta * 10) AS host_cpu_busy_time + , (dns.idle_time_delta * 10) AS host_cpu_idle_time + , (dns.iowait_time_delta * 10) AS host_cpu_iowait_time + , (dns.db_cpu_time_delta * 10) AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + , host_cpu_stat AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_host + , host_cpu_busy_time + , host_cpu_idle_time + , host_cpu_iowait_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time) + ::numeric AS host_cpu_total_time + FROM ( SELECT pmk_curr_collect_start_time + , node_host + , host_cpu_busy_time + , host_cpu_idle_time + , host_cpu_iowait_time + , rank() OVER (PARTITION BY snapshot_id, node_host + ORDER BY host_cpu_busy_time DESC, node_name) + AS node_cpu_busy_order + FROM cpu_stat + ) + WHERE node_cpu_busy_order = 1 + ) + , host_cpu_stat_summary AS + ( SELECT stat_collect_time + , AVG(host_cpu_busy_time)::numeric(21, 3) + AS avg_host_cpu_busy_time + , AVG(host_cpu_total_time)::numeric(21, 3) + AS avg_host_cpu_total_time + , SUM(host_cpu_busy_time)::numeric(21, 3) + AS tot_host_cpu_busy_time + , SUM(host_cpu_total_time)::numeric(21, 3) + AS tot_host_cpu_total_time + FROM host_cpu_stat + GROUP BY stat_collect_time + ) + , mppdb_cpu_stat0 AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_name + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time) + ::numeric AS host_cpu_total_time + FROM cpu_stat + ) + , mppdb_cpu_stat AS + ( SELECT stat_collect_time + , node_name + , mppdb_cpu_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) ) + ::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_total_time, 0) ) + ::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat0 + ) + , mppdb_cpu_stat_summary AS + ( SELECT stat_collect_time + , AVG(mppdb_cpu_time)::numeric(21, 3) AS avg_mppdb_cpu_time + , SUM(mppdb_cpu_time)::numeric(21, 3) AS tot_mppdb_cpu_time + , MIN(mppdb_cpu_time_perc_wrt_busy_time)::numeric(5, 2) + AS min_mppdb_cpu_time_perc_wrt_busy_time + , MAX(mppdb_cpu_time_perc_wrt_busy_time)::numeric(5, 2) + AS max_mppdb_cpu_time_perc_wrt_busy_time + , MIN(mppdb_cpu_time_perc_wrt_total_time)::numeric(5, 2) + AS min_mppdb_cpu_time_perc_wrt_total_time + , MAX(mppdb_cpu_time_perc_wrt_total_time)::numeric(5, 2) + AS max_mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat + GROUP BY stat_collect_time + ) + SELECT mcs.stat_collect_time + , mcs.avg_mppdb_cpu_time + , hcs.avg_host_cpu_busy_time + , hcs.avg_host_cpu_total_time + , CASE WHEN mcs.tot_mppdb_cpu_time < hcs.tot_host_cpu_busy_time + THEN ( (mcs.tot_mppdb_cpu_time * 100.0) + / NULLIF(hcs.tot_host_cpu_busy_time, 0) )::numeric(5, 2) + ELSE 100.00 + END AS mppdb_cpu_time_perc_wrt_busy_time + , CASE WHEN mcs.tot_mppdb_cpu_time < hcs.tot_host_cpu_total_time + THEN ( (mcs.tot_mppdb_cpu_time * 100.0) + / NULLIF(hcs.tot_host_cpu_total_time, 0) )::numeric(5, 2) + ELSE 100.00 + END AS mppdb_cpu_time_perc_wrt_total_time + , mcs.min_mppdb_cpu_time_perc_wrt_busy_time + , mcs.max_mppdb_cpu_time_perc_wrt_busy_time + , mcs.min_mppdb_cpu_time_perc_wrt_total_time + , mcs.max_mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat_summary mcs + , host_cpu_stat_summary hcs + WHERE mcs.stat_collect_time = hcs.stat_collect_time + ORDER BY mcs.stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", """CREATE OR REPLACE FUNCTION pmk.get_cluster_shared_buffer_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_min_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_max_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_total_blocks_read bigint + , OUT o_total_blocks_hit bigint + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of + cluster shared buffer statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , ( (total_blocks_hit * 100.0) + / NULLIF(total_blocks_read+total_blocks_hit, 0) )::numeric(5, 2) + AS shared_buffer_hit_ratio + , min_shared_buffer_hit_ratio + , max_shared_buffer_hit_ratio + , total_blocks_read + , total_blocks_hit + FROM ( SELECT pmk_curr_collect_start_time + , SUM(blocks_read)::bigint AS total_blocks_read + , SUM(blocks_hit)::bigint AS total_blocks_hit + , MIN(shared_buffer_hit_ratio)::numeric(5, 2) + AS min_shared_buffer_hit_ratio + , MAX(shared_buffer_hit_ratio)::numeric(5, 2) + AS max_shared_buffer_hit_ratio + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , ( (blocks_hit_delta * 100.0) + / NULLIF((blocks_read_delta + blocks_hit_delta), 0) ) + ::numeric(5, 2) AS shared_buffer_hit_ratio + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_memory_sort_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_memory_sort_ratio numeric(5, 2) + , OUT o_min_memory_sort_ratio numeric(5, 2) + , OUT o_max_memory_sort_ratio numeric(5, 2) + , OUT o_total_memory_sorts bigint + , OUT o_total_disk_sorts bigint + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of + cluster memory sort statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , ( (total_memory_sorts * 100.0) + / NULLIF(total_disk_sorts+total_memory_sorts, 0) )::numeric(5, 2) + AS memory_sort_ratio + , min_memory_sort_ratio + , max_memory_sort_ratio + , total_memory_sorts + , total_disk_sorts + FROM ( SELECT pmk_curr_collect_start_time + , SUM(memory_sorts)::bigint AS total_memory_sorts + , SUM(disk_sorts)::bigint AS total_disk_sorts + , MIN(memory_sort_ratio)::numeric(5, 2) AS min_memory_sort_ratio + , MAX(memory_sort_ratio)::numeric(5, 2) AS max_memory_sort_ratio + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , sorts_in_memory_delta AS memory_sorts + , sorts_in_disk_delta AS disk_sorts + , ( (sorts_in_memory_delta * 100.0) + / NULLIF((sorts_in_disk_delta + sorts_in_memory_delta), 0) ) + ::numeric(5, 2) AS memory_sort_ratio + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_io_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_avg_read_per_sec numeric(20,2) + , OUT o_avg_read_time numeric(20,3) + , OUT o_avg_write_per_sec numeric(20,2) + , OUT o_avg_write_time numeric(20,3) + , OUT o_min_node_read_per_sec numeric(20,2) + , OUT o_max_node_read_per_sec numeric(20,2) + , OUT o_min_node_read_time numeric(20,3) + , OUT o_max_node_read_time numeric(20,3) + , OUT o_min_node_write_per_sec numeric(20,2) + , OUT o_max_node_write_per_sec numeric(20,2) + , OUT o_min_node_write_time numeric(20,3) + , OUT o_max_node_write_time numeric(20,3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of + cluster I/O statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) )::numeric(20,2) + AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) )::numeric(20,3) + AS avg_read_time + , ( physical_writes * 1000000.0 + / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) )::numeric(20,3) + AS avg_write_time + , min_node_read_per_sec + , max_node_read_per_sec + , min_node_read_time + , max_node_read_time + , min_node_write_per_sec + , max_node_write_per_sec + , min_node_write_time + , max_node_write_time + FROM ( SELECT pmk_curr_collect_start_time + , SUM(number_of_files)::int AS number_of_files + , SUM(physical_reads_delta)::bigint AS physical_reads + , SUM(physical_writes_delta)::bigint AS physical_writes + , SUM(read_time_delta)::bigint AS read_time + , SUM(write_time_delta)::bigint AS write_time + , MIN(node_read_per_sec) AS min_node_read_per_sec + , MAX(node_read_per_sec) AS max_node_read_per_sec + , MIN(node_read_time) AS min_node_read_time + , MAX(node_read_time) AS max_node_read_time + , MIN(node_write_per_sec) AS min_node_write_per_sec + , MAX(node_write_per_sec) AS max_node_write_per_sec + , MIN(node_write_time) AS min_node_write_time + , MAX(node_write_time) AS max_node_write_time + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , number_of_files + , physical_reads_delta + , physical_writes_delta + , read_time_delta + , write_time_delta + , ( physical_reads_delta * 1000000.0 + / NULLIF(read_time_delta, 0) )::numeric(20,2) + AS node_read_per_sec + , ( read_time_delta * 1.0 + / NULLIF(physical_reads_delta, 0) )::numeric(20,3) + AS node_read_time + , ( physical_writes_delta * 1000000.0 + / NULLIF(write_time_delta, 0) )::numeric(20,2) + AS node_write_per_sec + , ( write_time_delta * 1.0 + / NULLIF(physical_writes_delta, 0) )::numeric(20,3) + AS node_write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_disk_usage_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , IN i_db_size text + , OUT o_stat_collect_time timestamp + , OUT o_tot_datanode_db_size text + , OUT o_max_datanode_db_size text + , OUT o_tot_physical_writes bigint + , OUT o_max_node_physical_writes bigint + , OUT o_max_node_write_per_sec numeric(20,2) + , OUT o_avg_write_per_sec numeric(20,2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +DECLARE l_db_size bigint; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster + disk usage statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + IF i_db_size = '0' + THEN + SELECT SUM(pg_database_size(oid))::bigint + INTO l_db_size + FROM pg_database; + ELSE + SELECT SUM(i_db_size)::bigint + INTO l_db_size; + END IF; + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , disk_stat AS + ( + SELECT s.pmk_curr_collect_start_time + , db_size + , physical_writes_delta + , write_time_delta + , ( physical_writes_delta * 1000000.0 + / NULLIF(write_time_delta, 0) )::numeric(20,2) + AS node_write_per_sec + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , pg_size_pretty(tot_datanode_db_size) AS tot_datanode_db_size + , pg_size_pretty(max_datanode_db_size) AS max_datanode_db_size + , tot_physical_writes + , max_node_physical_writes + , max_node_write_per_sec + , ( tot_physical_writes * 1000000.0 + / NULLIF(tot_write_time, 0) )::numeric(20,2) AS avg_write_per_sec + FROM ( SELECT pmk_curr_collect_start_time + , l_db_size::bigint AS tot_datanode_db_size + , MAX(db_size)::bigint AS max_datanode_db_size + , SUM(physical_writes_delta)::bigint AS tot_physical_writes + , SUM(write_time_delta)::bigint AS tot_write_time + , MAX(physical_writes_delta)::bigint + AS max_node_physical_writes + , MAX(node_write_per_sec) AS max_node_write_per_sec + FROM disk_stat + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY pmk_curr_collect_start_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_active_sql_count + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_tot_active_sql_count int + , OUT o_avg_active_sql_count numeric(9, 2) + , OUT o_min_active_sql_count int + , OUT o_max_active_sql_count int + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of + active SQL count statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , tot_active_sql_count + , avg_active_sql_count + , min_active_sql_count + , max_active_sql_count + FROM ( SELECT s.pmk_curr_collect_start_time + , SUM(active_sql_count)::int AS tot_active_sql_count + , ROUND(AVG(active_sql_count), 2)::numeric(9, 2) + AS avg_active_sql_count + , MIN(active_sql_count)::int AS min_active_sql_count + , MAX(active_sql_count)::int AS max_active_sql_count + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + GROUP BY s.pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_cluster_session_count + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_tot_session_count int + , OUT o_avg_session_count numeric(9, 2) + , OUT o_min_session_count int + , OUT o_max_session_count int + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of + session count statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , tot_session_count + , avg_session_count + , min_session_count + , max_session_count + FROM ( SELECT s.pmk_curr_collect_start_time + , SUM(session_count)::int AS tot_session_count + , ROUND(AVG(session_count), 2)::numeric(9, 2) + AS avg_session_count + , MIN(session_count)::int AS min_session_count + , MAX(session_count)::int AS max_session_count + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + GROUP BY s.pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_node_cpu_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_mppdb_cpu_time bigint + , OUT o_host_cpu_busy_time bigint + , OUT o_host_cpu_total_time bigint + , OUT o_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || + i_node_name || '") provided during generation of node + (MPPDB instance) CPU statistics ...'; + + pmk.put_line(l_error_message); + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node + (MPPDB instance) CPU statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time) + ::bigint AS host_cpu_total_time + , ( (mppdb_cpu_time * 100.0) + / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) + AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) + / NULLIF((host_cpu_busy_time+host_cpu_idle_time + +host_cpu_iowait_time), 0) )::numeric(5, 2) + AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time) + ::bigint AS host_cpu_total_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) ) + ::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF((host_cpu_busy_time+ + host_cpu_idle_time+host_cpu_iowait_time), 0) )::numeric(5, 2) + AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_node_memory_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_physical_memory bigint + , OUT o_db_memory_usage bigint + , OUT o_shared_buffer_size bigint + , OUT o_blocks_read bigint + , OUT o_blocks_hit bigint + , OUT o_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_work_memory_size bigint + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_in_memory_sort_ratio numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' + || i_node_name || '") provided during generation of node + (MPPDB instance) memory statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node + (MPPDB instance) memory statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) ) + ::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) + / NULLIF((sorts_in_disk + sorts_in_memory), 0) )::numeric(5, 2) + AS in_memory_sort_ratio + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) ) + ::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) / NULLIF((sorts_in_disk + + sorts_in_memory), 0) )::numeric(5, 2) AS in_memory_sort_ratio + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_node_io_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_avg_read_per_sec numeric(20,2) + , OUT o_avg_read_time numeric(20,3) + , OUT o_avg_write_per_sec numeric(20,2) + , OUT o_avg_write_time numeric(20,3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + pmk.put_line(l_error_message); + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name + || '") provided during generation of node (MPPDB instance) + I/O statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node + (MPPDB instance) I/O statistics ...'; + pmk.put_line(l_error_message); + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 + / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 + / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 + / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 + / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time + BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 + / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 + / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 + / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 + / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + FROM ( SELECT node_name + , node_host + , pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_session_cpu_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_session_cpu_time bigint + , OUT o_mppdb_cpu_time bigint + , OUT o_mppdb_cpu_time_perc numeric(5, 2) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF RECORD +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = + COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_time_stat0 AS + ( SELECT sessid, stat_name + , (value/1000.0)::numeric AS stat_value + -- converting to millisecond + FROM gs_session_time + WHERE stat_name + IN ( ''CPU_TIME'', ''EXECUTION_TIME'') + ) + , sess_time_stat AS + ( SELECT DISTINCT stso.sessid + , (SELECT stsi.stat_value FROM sess_time_stat0 + stsi WHERE stsi.sessid = stso.sessid + AND stsi.stat_name = ''CPU_TIME'') + AS session_cpu_time + , (SELECT stsi.stat_value FROM sess_time_stat0 + stsi WHERE stsi.sessid = stso.sessid + AND stsi.stat_name = ''EXECUTION_TIME'') + AS session_sql_time + FROM sess_time_stat0 stso + ) + , mppdb_cpu_time AS + ( SELECT (total_cpu()*10.0)::bigint + AS mppdb_cpu_time -- converting to millisecond + FROM DUAL + ) + , sess_cpu_stat AS + ( SELECT ''' || i.node_name || '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start) + ::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start) + ::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , ROUND(st.session_cpu_time)::bigint + AS session_cpu_time + , m.mppdb_cpu_time + , ( (st.session_cpu_time * 100.0) + / NULLIF(m.mppdb_cpu_time, 0) )::numeric(5, 2) + AS mppdb_cpu_time_perc + , st.sessid + , st.session_sql_time + FROM pg_stat_activity a + , sess_time_stat st + , mppdb_cpu_time m + WHERE a.state IN (''active'', + ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.session_cpu_time DESC + , mppdb_cpu_time_perc DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT scs.node_name + , scs.db_name + , scs.user_name + , scs.client_hostname + , scs.session_start_time + , scs.xact_start_time + , scs.waiting + , scs.state + , scs.query + , scs.session_cpu_time + , scs.mppdb_cpu_time + , scs.mppdb_cpu_time_perc + , ( scs.session_sql_time + / NULLIF(ss.value, 0) )::numeric(15, 3) + AS avg_sql_exec_time + FROM sess_cpu_stat scs + , gs_session_stat ss + WHERE ss.sessid = scs.sessid + AND ss.statname = ''n_sql'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_session_memory_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_session_total_memory_size bigint + , OUT o_session_used_memory_size bigint + , OUT o_buffer_hits bigint + , OUT o_disk_reads bigint + , OUT o_session_buffer_hit_ratio numeric(5, 2) + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_session_memory_sort_ratio numeric(5, 2) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = + COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), + UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_memory_usage AS + ( SELECT sessid + , SUM(totalsize)::bigint AS totalsize + , SUM(usedsize)::bigint AS usedsize + FROM gs_session_memory_detail + GROUP BY sessid + ) + , sess_stat0 AS + ( SELECT ss.sessid + , ss.statname AS statname + , ss.value AS statvalue + FROM sess_memory_usage st, gs_session_stat ss + WHERE ss.sessid = st.sessid + AND ss.statname IN ( ''n_blocks_fetched'' + , ''n_shared_blocks_read'' + , ''n_local_blocks_read'' + , ''n_sort_in_disk'' + , ''n_sort_in_memory'' + , ''n_sql'' ) + ) + , sess_stat1 AS + ( SELECT oss.sessid + , oss.totalsize + , oss.usedsize + , (SELECT statvalue FROM sess_stat0 iss + WHERE iss.sessid = oss.sessid AND iss.statname = + ''n_blocks_fetched'') AS total_reads + , (SELECT statvalue FROM sess_stat0 iss + WHERE iss.sessid = oss.sessid AND iss.statname = + ''n_shared_blocks_read'') AS disk_to_shared_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_local_blocks_read'') AS disk_to_local_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_sort_in_disk'') AS sorts_in_disk + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid AND iss.statname = + ''n_sort_in_memory'') AS sorts_in_memory + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = oss.sessid + AND iss.statname = ''n_sql'') AS sql_count + FROM sess_memory_usage oss + ) + , sess_stat AS + ( SELECT ss.sessid + , ss.totalsize + , ss.usedsize + , ss.total_reads + , (ss.disk_to_shared_buffer + + ss.disk_to_local_buffer) AS disk_reads + , (ss.total_reads - (ss.disk_to_shared_buffer + + ss.disk_to_local_buffer)) AS buffer_hits + , sorts_in_disk + , sorts_in_memory + , sql_count + FROM sess_stat1 ss + ) + , sess_memory_stat AS + ( SELECT ''' || i.node_name || '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start) + ::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start) + ::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , st.totalsize AS session_total_memory_size + , st.usedsize AS session_used_memory_size + , st.buffer_hits, st.disk_reads + , ( (st.buffer_hits * 100.0) + / NULLIF(st.total_reads, 0) )::numeric(5, 2) + AS session_buffer_hit_ratio + , st.sorts_in_memory, st.sorts_in_disk + , ( (st.sorts_in_memory * 100.0) + / NULLIF(st.sorts_in_memory + st.sorts_in_disk, + 0) )::numeric(5, 2) AS session_memory_sort_ratio + , st.sessid + , st.sql_count + FROM pg_stat_activity a + , sess_stat st + WHERE a.state IN (''active'', ''fastpath + function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.totalsize DESC + , st.usedsize DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT sms.node_name + , sms.db_name + , sms.user_name + , sms.client_hostname + , sms.session_start_time + , sms.xact_start_time + , sms.waiting + , sms.state + , sms.query + , sms.session_total_memory_size + , sms.session_used_memory_size + , sms.buffer_hits + , sms.disk_reads + , sms.session_buffer_hit_ratio + , sms.sorts_in_memory + , sms.sorts_in_disk + , sms.session_memory_sort_ratio + , ( ss.value / (NULLIF(sms.sql_count, 0) + * 1000.0) )::numeric(15, 3) + AS avg_sql_exec_time + FROM sess_memory_stat sms + , gs_session_time ss + WHERE ss.sessid = sms.sessid + AND ss.stat_name = ''EXECUTION_TIME'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.get_session_io_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_disk_reads bigint + , OUT o_read_time bigint + , OUT o_avg_read_per_sec numeric(20, 2) + , OUT o_avg_read_time numeric(20, 3) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = + COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), + UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_stat0 AS + ( SELECT ss.sessid + , ss.statname AS statname + , ss.value AS statvalue + FROM gs_session_stat ss + WHERE ss.statname IN ( ''n_shared_blocks_read'' + , ''n_local_blocks_read'' + , ''n_blocks_read_time'' + , ''n_sql'' ) + ) + , sess_stat1 AS + ( SELECT DISTINCT ss.sessid + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = ss.sessid AND iss.statname = + ''n_shared_blocks_read'') AS disk_to_shared_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = ss.sessid AND iss.statname = + ''n_local_blocks_read'') AS disk_to_local_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = ss.sessid AND iss.statname = + ''n_blocks_read_time'') AS read_time + , (SELECT statvalue FROM sess_stat0 iss WHERE + iss.sessid = ss.sessid AND iss.statname = + ''n_sql'') AS sql_count + FROM sess_stat0 ss + ) + , sess_stat AS + ( SELECT ss.sessid + , (ss.disk_to_shared_buffer + + ss.disk_to_local_buffer) AS disk_reads + , ss.read_time + , ss.sql_count + FROM sess_stat1 ss + ) + , sess_io_stat AS + ( SELECT ''' || i.node_name || + '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', + a.backend_start)::timestamp AS session_start_time + , date_trunc(''second'', + a.xact_start)::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , st.disk_reads + , st.read_time + , ( st.disk_reads * 1000000.0 + / NULLIF(st.read_time, 0) )::numeric(20,2) + AS avg_read_per_sec + , ( st.read_time * 1.0 + / NULLIF(st.disk_reads, 0) )::numeric(20,3) + AS avg_read_time + , st.sessid + , st.sql_count + FROM pg_stat_activity a + , sess_stat st + WHERE a.state IN (''active'', + ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.disk_reads DESC + , st.read_time DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT sios.node_name + , sios.db_name + , sios.user_name + , sios.client_hostname + , sios.session_start_time + , sios.xact_start_time + , sios.waiting + , sios.state + , sios.query + , sios.disk_reads + , sios.read_time + , sios.avg_read_per_sec + , sios.avg_read_time + , ( ss.value / (NULLIF(sios.sql_count, 0) + * 1000.0) )::numeric(15, 3) + AS avg_sql_exec_time + FROM sess_io_stat sios + , gs_session_time ss + WHERE ss.sessid = sios.sessid + AND ss.stat_name = ''EXECUTION_TIME'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql;""", + """CREATE OR REPLACE FUNCTION pmk.insertBaseValue() +RETURNS TEXT +AS +$$ +DECLARE l_configuration_count_value INT; + l_meta_data_count_value INT; + l_version_string varchar(128); + l_result varchar(128); +BEGIN + SELECT count(config_param_name) + INTO l_configuration_count_value + FROM pmk.pmk_configuration + WHERE config_param_name IN ('Collection Count', 'Enable PMK'); + + IF l_configuration_count_value != 2 + THEN + DELETE FROM pmk.pmk_configuration; + INSERT INTO pmk.pmk_configuration(config_param_name, config_value) + VALUES ('Collection Count', '9'), ('Enable PMK', 'TRUE'); + END IF; + + SELECT count(pmk_version) + INTO l_meta_data_count_value + FROM pmk.pmk_meta_data; + + SELECT substring(version() from '[a-zA-Z0-9 ]* [0-9]+\.[0-9]+\.[0-9]+') + INTO l_version_string; + l_result := l_version_string; + + IF l_meta_data_count_value < 1 + THEN + INSERT INTO pmk.pmk_meta_data (pmk_version, last_snapshot_id, + last_snapshot_collect_time) VALUES (l_result, NULL, NULL); + END IF; + + RETURN NULL; +END; +$$ +LANGUAGE plpgsql;""", """ +SELECT pmk.insertBaseValue();""", """ +analyze pmk.pmk_configuration;""", """ +analyze pmk.pmk_snapshot;""", """ +analyze pmk.pmk_snapshot_datanode_stat;""", """ +analyze pmk.pmk_meta_data;"""] +TEST_PMK = """ +DECLARE + pmk_oid oid; + class_count int; + proc_count int; +BEGIN + --if pmk schema not exist, it will raise an error. + select oid from pg_namespace where nspname='pmk' into pmk_oid; + select count(*) from pg_class where relnamespace=pmk_oid into class_count; + select count(*) from pg_proc where pronamespace=pmk_oid into proc_count; + raise exception 'pmk schema exist. class count is %, proc count is %.', + class_count , proc_count; +END; +""" +dnquerySql = "" +dnquerySql += "SELECT node_name, COALESCE(pns.physical_reads, 0), " \ + "COALESCE(pns.physical_writes, 0), " +dnquerySql += "COALESCE(pns.read_time, 0), COALESCE(pns.write_time, 0), " \ + "COALESCE(pns.xact_commit, 0), " +dnquerySql += "COALESCE(pns.xact_rollback, 0), " \ + "COALESCE(pns.checkpoints_timed, 0)," \ + " COALESCE(pns.checkpoints_req, 0), " +dnquerySql += "COALESCE(pns.checkpoint_write_time, 0), " \ + "COALESCE(pns.blocks_read, 0), " \ + "COALESCE(pns.blocks_hit, 0), " +dnquerySql += "COALESCE(pns.busy_time, 0), " \ + "COALESCE(pns.idle_time, 0), " \ + "COALESCE(pns.iowait_time, 0), " +dnquerySql += "COALESCE(pns.db_cpu_time, 0) FROM " \ + "pmk.pmk_snapshot_datanode_stat pns " +dnquerySql += "WHERE pns.snapshot_id = %s" + +totalSql = { + "getMetaData": "SELECT l_pmk_curr_collect_start_time, " + "l_pmk_last_collect_start_time, l_last_snapshot_id " + "FROM pmk.get_meta_data();", + "deleteSnapShots": "SELECT * FROM pmk.delete_expired_snapshots();", + "getRedistributeRate": "set statement_timeout = 20000;select name,value " + "from public.redis_progress where name in " + "('Bytes Done', 'Bytes Left');", + "getNodeStatDN": dnquerySql, + "checkTransactionReadonly": "show default_transaction_read_only;" +} diff --git a/script/gspylib/common/SqlResult.py b/script/gspylib/common/SqlResult.py new file mode 100644 index 0000000..b914d57 --- /dev/null +++ b/script/gspylib/common/SqlResult.py @@ -0,0 +1,70 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Result.py is a utility to store search result from database +############################################################################# +import os +import sys +from ctypes import * + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.Common import DefaultValue + + +class sqlResult(): + """ + Class for storing search result from database + """ + + def __init__(self, result): + """ + Constructor + """ + self.resCount = 0 + self.resSet = [] + self.result = result + + def parseResult(self): + """ + function : get resCount and resSet from result + input:NA + output:NA + """ + try: + libpath = os.path.join(DefaultValue.getEnv("GAUSSHOME"), "lib") + sys.path.append(libpath) + libc = cdll.LoadLibrary("libpq.so.5.5") + libc.PQntuples.argtypes = [c_void_p] + libc.PQntuples.restype = c_int + libc.PQnfields.argtypes = [c_void_p] + libc.PQnfields.restype = c_int + libc.PQgetvalue.restype = c_char_p + ntups = libc.PQntuples(self.result) + nfields = libc.PQnfields(self.result) + libc.PQgetvalue.argtypes = [c_void_p, c_int, c_int] + self.resCount = ntups + for i in range(ntups): + tmpString = [] + for j in range(nfields): + paramValue = libc.PQgetvalue(self.result, i, j) + if (paramValue is not None): + tmpString.append(string_at(paramValue).decode()) + else: + tmpString.append("") + self.resSet.append(tmpString) + except Exception as e: + raise Exception("%s" % str(e)) diff --git a/script/gspylib/common/VersionInfo.py b/script/gspylib/common/VersionInfo.py new file mode 100644 index 0000000..f39d725 --- /dev/null +++ b/script/gspylib/common/VersionInfo.py @@ -0,0 +1,126 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +""" +This file is for Gauss version things. +""" + +import os +import sys +import re + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode + + +class VersionInfo(): + """ + Info about current version + """ + + def __init__(self): + pass + + # package version + __PACKAGE_VERSION = "" + # OM version string + COMMON_VERSION = "Gauss200 OM VERSION" + # It will be replaced with the product version, such as "Gauss200", + # while being packaged by mpp_package.sh + PRODUCT_NAME = "__GAUSS_PRODUCT_STRING__" + PRODUCT_NAME_PACKAGE = "-".join(PRODUCT_NAME.split()) + __COMPATIBLE_VERSION = [] + COMMITID = "" + + @staticmethod + def getPackageVersion(): + """ + function: Get the current version from version.cfg + input : NA + output: String + """ + if (VersionInfo.__PACKAGE_VERSION != ""): + return VersionInfo.__PACKAGE_VERSION + # obtain version file + versionFile = VersionInfo.get_version_file() + version, number, commitid = VersionInfo.get_version_info(versionFile) + # the 2 value is package version + VersionInfo.__PACKAGE_VERSION = version + return VersionInfo.__PACKAGE_VERSION + + @staticmethod + def getCommitid(): + if VersionInfo.COMMITID != "": + return VersionInfo.COMMITID + versionFile = VersionInfo.get_version_file() + version, number, commitid = VersionInfo.get_version_info(versionFile) + # the 2 value is package version + VersionInfo.COMMITID = commitid + return VersionInfo.COMMITID + + @staticmethod + def get_version_file(): + """ + function: Get version.cfg file + input : NA + output: String + """ + # obtain version file + dirName = os.path.dirname(os.path.realpath(__file__)) + versionFile = os.path.join(dirName, "./../../../", "version.cfg") + versionFile = os.path.realpath(versionFile) + if (not os.path.exists(versionFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % versionFile) + if (not os.path.isfile(versionFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % versionFile) + return versionFile + + @staticmethod + def get_version_info(versionFile): + + # the infomation of versionFile like this: + # openGauss-1.0 + # XX.0 + # ae45cfgt + if not os.path.exists(versionFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % versionFile) + if not os.path.isfile(versionFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % versionFile) + with open(versionFile, 'r') as fp: + retLines = fp.readlines() + if len(retLines) < 3: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % versionFile) + + version = re.compile(r'[0-9]+\.[0-9]+\.[0-9]+').search( + retLines[0].strip()).group() + number = retLines[1].strip() + commitId = retLines[2].strip() + + if version is None: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % + "version.cfg" + "Does not have version " + "such as openGauss-1.0") + try: + float(number) + except Exception as e: + raise Exception(str(e) + ErrorCode.GAUSS_516["GAUSS_51628"] + % number) + + if not (commitId.isalnum() and len(commitId) == 8): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % versionFile + + " Commit id is wrong.") + return version, number, commitId diff --git a/script/gspylib/common/__init__.py b/script/gspylib/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/component/BaseComponent.py b/script/gspylib/component/BaseComponent.py new file mode 100644 index 0000000..f03ab7f --- /dev/null +++ b/script/gspylib/component/BaseComponent.py @@ -0,0 +1,201 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +import sys +import os +import socket +import time + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsnetwork import g_network + +TIME_OUT = 2 +RETRY_TIMES = 100 + + +class BaseComponent(object): + ''' + The class is used to define base component. + ''' + + def __init__(self): + ''' + function: initialize the parameters + input : NA + output: NA + ''' + self.logger = None + self.instInfo = None + self.version = "" + self.pkgName = "" + self.initParas = {} + self.binPath = "" + self.dwsMode = False + self.level = 1 + self.clusterType = DefaultValue.CLUSTER_TYPE_SINGLE_INST + + def install(self): + pass + + def setGucConfig(self, setMode='set', paraDict=None): + pass + + def getGucConfig(self, paraList): + pass + + def setPghbaConfig(self): + pass + + def start(self): + pass + + def stop(self): + pass + + def uninstall(self): + pass + + def killProcess(self): + """ + function: kill process + input: process flag + output: NA + """ + pass + + def fixPermission(self): + pass + + def upgrade(self): + pass + + def createPath(self): + pass + + def perCheck(self): + """ + function: 1.Check instance port + 2.Check instance IP + input : NA + output: NA + """ + ipList = self.instInfo.listenIps + ipList.extend(self.instInfo.haIps) + portList = [] + portList.append(self.instInfo.port) + portList.append(self.instInfo.haPort) + + ipList = DefaultValue.Deduplication(ipList) + portList = DefaultValue.Deduplication(portList) + # check port + for port in portList: + self.__checkport(port, ipList) + # check ip + failIps = g_network.checkIpAddressList(ipList) + if (len(failIps) > 0): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50600"] + + " The IP is %s." % ",".join(failIps)) + + def __checkport(self, port, ipList): + """ + function: check Port + input : NA + output: NA + """ + tmpDir = DefaultValue.getTmpDirFromEnv() + if (not os.path.exists(tmpDir)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + tmpDir + " Please create it first.") + pgsqlFiles = os.listdir(tmpDir) + + self.__checkRandomPortRange(port) + + pgsql = ".s.PGSQL.%d" % port + pgsql_lock = ".s.PGSQL.%d.lock" % port + if (pgsql in pgsqlFiles): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50200"] % + "socket file" + " Port:%s." % port) + + if (pgsql_lock in pgsqlFiles): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50200"] % + "socket lock file" + " Port:%s." % port) + + # Verify that the port is occupied + for ip in ipList: + sk = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sk.settimeout(TIME_OUT) + + # retry port 4 times + retryFlag = True + retryTime = 0 + while (retryFlag): + try: + sk.bind((ip, port)) + sk.close() + break + except socket.error as e: + retryTime += 1 + time.sleep(1) + if (retryTime > RETRY_TIMES): + retryFlag = False + try: + portProcessInfo = g_OSlib.getPortProcessInfo(port) + self.logger.debug("The ip [%s] port [%s] is " + "occupied. \nBind error " + "msg:\n%s\nDetail msg:\n%s" % \ + (ip, port, str(e), + portProcessInfo)) + except Exception as e: + self.logger.debug("Failed to get the process " + "information of the port [%s], " + "output:%s." % (port, str(e))) + raise Exception(ErrorCode.GAUSS_506["GAUSS_50601"] % + port) + + def __checkRandomPortRange(self, port): + """ + function: Check if port is in the range of random port + input : port + output: NA + """ + res = [] + try: + rangeFile = "/proc/sys/net/ipv4/ip_local_port_range" + output = g_file.readFile(rangeFile) + res = output[0].split() + except Exception as e: + self.logger.debug( + "Warning: Failed to get the range of random port." + " Detail: \n%s" % str(e)) + return + if (len(res) != 2): + self.logger.debug("Warning: The range of random port is invalid. " + "Detail: \n%s" % str(output)) + return + minPort = int(res[0]) + maxPort = int(res[1]) + if (port >= minPort and port <= maxPort): + self.logger.debug("Warning: Current instance port is in the " + "range of random port(%d - %d)." % (minPort, + maxPort)) + + def postCheck(self): + pass diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py new file mode 100644 index 0000000..ddd8d35 --- /dev/null +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -0,0 +1,448 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +import sys +import os + +sys.path.append(sys.path[0] + "/../../../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue, ClusterInstanceConfig +from gspylib.component.Kernel.Kernel import Kernel +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib + +METHOD_TRUST = "trust" +METHOD_SHA = "sha256" +MAX_PARA_NUMBER = 1000 +INSTANCE_TYPE_UNDEFINED = -1 +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 +DUMMY_STANDBY_INSTANCE = 2 +CASCADE_STANDBY_INSTANCE = 3 + + +class DN_OLAP(Kernel): + ''' + The class is used to define base component. + ''' + + def __init__(self): + ''' + Constructor + ''' + super(DN_OLAP, self).__init__() + + def getDnGUCDict(self): + """ + function : get init DB install guc parameter + input : String,String,String,int + output : String + """ + tmpDict = {} + tmpDict["ssl"] = "on" + tmpDict["ssl_cert_file"] = "'server.crt'" + tmpDict["ssl_key_file"] = "'server.key'" + tmpDict["ssl_ca_file"] = "'cacert.pem'" + return tmpDict + + def copyAndModCertFiles(self): + """ + function : copy and chage permission cert files + input : NA + output : NA + """ + user = g_OSlib.getUserInfo()["name"] + appPath = DefaultValue.getInstallDir(user) + caPath = os.path.join(appPath, "share/sslcert/om") + # cp cert files + g_file.cpFile("%s/server.crt" % caPath, "%s/" % + self.instInfo.datadir) + g_file.cpFile("%s/server.key" % caPath, "%s/" % + self.instInfo.datadir) + g_file.cpFile("%s/cacert.pem" % caPath, "%s/" % + self.instInfo.datadir) + g_file.cpFile("%s/server.key.cipher" % caPath, "%s/" % + self.instInfo.datadir) + g_file.cpFile("%s/server.key.rand" % caPath, "%s/" % + self.instInfo.datadir) + # change mode + g_file.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.crt" % + self.instInfo.datadir) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key" % + self.instInfo.datadir) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, "%s/cacert.pem" % + self.instInfo.datadir) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key.cipher" % + self.instInfo.datadir) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key.rand" % + self.instInfo.datadir) + + def initInstance(self): + """ + function: + init DB instance + input:string:NA + output: + """ + if (not os.path.exists(self.instInfo.datadir)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + ("data directory [%s]" % self.instInfo.datadir)) + + nodename = self.getInstanceNodeName() + # if nodename too long, obtains the first 22 digits + nodename = nodename[:22] + if (self.dwsMode): + image_path = DefaultValue.DWS_IMAGE_PATH + # decompress package to files + packageName = "%s/datanode.tar.gz" % image_path + g_file.decompressFiles(packageName, self.instInfo.datadir) + # set GUC parameter + tmpDict = {} + tmpDict["pgxc_node_name"] = "'%s'" % nodename + self.setGucConfig(tmpDict) + else: + # If xlogdir is set in xmlfile, an independent xlog + # path will be created. + if (self.instInfo.xlogdir != ''): + cmd = "%s/gs_initdb --locale=C -D %s -X %s " \ + "--nodename=%s %s -C %s" % ( + self.binPath, self.instInfo.datadir, + self.instInfo.xlogdir, nodename, + " ".join(self.initParas), self.binPath) + else: + cmd = "%s/gs_initdb --locale=C -D %s --nodename=%s %s -C %s" \ + % \ + (self.binPath, self.instInfo.datadir, nodename, + " ".join(self.initParas), self.binPath) + self.logger.debug("Command for initializing database " + "node instance: %s" % cmd) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51615"] + + " Command:%s. Error:\n%s" % (cmd, output)) + # set ssl to DB nodes. + dnGucParas = self.getDnGUCDict() + self.setGucConfig(dnGucParas) + self.copyAndModCertFiles() + + def getInstanceNodeName(self): + """ + function: Get Instance Node Name + input : NA + output: instance node name + """ + user = g_OSlib.getUserInfo()["name"] + clusterInfo = dbClusterInfo() + clusterInfo.initFromStaticConfig(user) + peerInsts = clusterInfo.getPeerInstance(self.instInfo) + nodename = "dn_%d" % self.instInfo.instanceId + if len(peerInsts) == 0: + return nodename + nodename = ClusterInstanceConfig. \ + setReplConninfoForSinglePrimaryMultiStandbyCluster( + self.instInfo, peerInsts, clusterInfo)[1] + return nodename + + + def getDNDict(self, user, configItemType=None, peerInsts=None, + azNames=None, syncNum=-1): + """ + function: Get database node configuration + input : user, configItemType=None, peerInsts, + azNames=None, syncNum + output: NA + """ + if peerInsts is None: + peerInsts = [] + if azNames is None: + azNames = [] + tmpDNDict = {} + tmpDNDict["listen_addresses"] = "'%s'" % ",".join( + self.instInfo.listenIps) + tmpDNDict["local_bind_address"] = "'%s'" % self.instInfo.listenIps[0] + tmpDNDict["port"] = self.instInfo.port + + if (configItemType == "ConfigInstance"): + tmpDNDict["cstore_buffers"] = "1GB" + tmpDNDict["max_connections"] = "3000" + tmpDNDict["shared_buffers"] = "1GB" + tmpDNDict["work_mem"] = "64MB" + tmpDNDict["maintenance_work_mem"] = "128MB" + tmpDNDict["data_replicate_buffer_size"] = "128MB" + if (self.clusterType == + DefaultValue.CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY or + self.clusterType == DefaultValue.CLUSTER_TYPE_SINGLE_INST): + tmpDNDict["enable_data_replicate"] = "off" + tmpDNDict["replication_type"] = "1" + tmpDNDict["max_wal_senders"] = "16" + totalnum = len(peerInsts) + for inst in peerInsts: + if inst.instanceType == CASCADE_STANDBY_INSTANCE: + totalnum = totalnum - 1 + tmpDNDict["application_name"] = "'dn_%s'" % \ + self.instInfo.instanceId + if len(azNames) == 1 and totalnum > 0: + if syncNum == -1 and totalnum > 1: + num = totalnum - 1 + tmpDNDict["synchronous_standby_names"] = \ + "'ANY %d(%s)'" % (num, azNames[0]) + elif syncNum > 0: + tmpDNDict["synchronous_standby_names"] = \ + "'ANY %d(%s)'" % (syncNum, azNames[0]) + elif syncNum == 0: + tmpDNDict["synchronous_standby_names"] = \ + "'ANY 1(%s)'" % (azNames[0]) + elif len(azNames) == 2 and totalnum in (3, 4): + tmpDNDict["synchronous_standby_names"] = \ + "'ANY 2(%s,%s)'" % (azNames[0], azNames[1]) + elif len(azNames) == 2 and totalnum in (5, 6, 7): + tmpDNDict["synchronous_standby_names"] = \ + "'ANY 3(%s,%s)'" % (azNames[0], azNames[1]) + elif len(azNames) == 3 and totalnum in (3, 4): + tmpDNDict["synchronous_standby_names"] = \ + "'ANY 2(%s,%s,%s)'" % (azNames[0], azNames[1], azNames[2]) + elif len(azNames) == 3 and totalnum in (5, 6, 7): + tmpDNDict["synchronous_standby_names"] = \ + "'ANY 3(%s,%s,%s)'" % (azNames[0], azNames[1], azNames[2]) + + if (self.clusterType == DefaultValue.CLUSTER_TYPE_SINGLE): + tmpDNDict["replication_type"] = "2" + + if (configItemType != "ChangeIPUtility"): + tmpDNDict["log_directory"] = "'%s/pg_log/dn_%d'" % ( + DefaultValue.getUserLogDirWithUser(user), + self.instInfo.instanceId) + tmpDNDict["audit_directory"] = "'%s/pg_audit/dn_%d'" % ( + DefaultValue.getUserLogDirWithUser(user), + self.instInfo.instanceId) + + if (len(self.instInfo.ssdDir) != 0 and configItemType != + "ChangeIPUtility"): + tmpDNDict["ssd_cache_dir"] = "'%s'" % (self.instInfo.ssdDir) + tmpDNDict["enable_adio_function"] = "on" + tmpDNDict["enable_cstore_ssd_cache"] = "on" + return tmpDNDict + + def getPrivateGucParamList(self): + """ + function : Get the private guc parameter list. + input : NA + output + """ + # only used by dummy standby instance + # max_connections value is 100 + # memorypool_enable value is false + # shared_buffers value is 32MB + # bulk_write_ring_size value is 32MB + # max_prepared_transactions value is 10 + # cstore_buffers value is 16MB + # autovacuum_max_workers value is 0 + # max_pool_size value is 50 + # wal_buffers value is -1 + + # add the parameter content to the dictionary list + priavetGucParamDict = {} + priavetGucParamDict["max_connections"] = "100" + priavetGucParamDict["memorypool_enable"] = "false" + priavetGucParamDict["shared_buffers"] = "32MB" + priavetGucParamDict["bulk_write_ring_size"] = "32MB" + priavetGucParamDict["max_prepared_transactions"] = "10" + priavetGucParamDict["cstore_buffers"] = "16MB" + priavetGucParamDict["autovacuum_max_workers"] = "0" + priavetGucParamDict["wal_buffers"] = "-1" + priavetGucParamDict["max_locks_per_transaction"] = "64" + priavetGucParamDict["sysadmin_reserved_connections"] = "3" + priavetGucParamDict["max_wal_senders"] = "4" + return priavetGucParamDict + + def modifyDummpyStandbyConfigItem(self): + """ + function: Modify the parameter at dummyStandby instance. + It only be used by DB instance. + input : Inst, configFile + output: NA + """ + # only modify config item for dummpy standby instance + if (self.instInfo.instanceType != DefaultValue.DUMMY_STANDBY_INSTANCE): + return + tmpDNDict = self.getPrivateGucParamList() + self.setGucConfig(tmpDNDict) + + def setPrimaryStandyConnInfo(self, peerInsts): + """ + function: Modify replconninfo for datanode + input : peerInsts + output: NA + """ + connInfo1 = None + connInfo2 = None + dummyStandbyInst = None + nodename = None + user = g_OSlib.getUserInfo()["name"] + clusterInfo = dbClusterInfo() + clusterInfo.initFromStaticConfig(user) + if (self.clusterType == + DefaultValue.CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY or + self.clusterType == DefaultValue.CLUSTER_TYPE_SINGLE_INST): + (connInfo1, nodename) = ClusterInstanceConfig. \ + setReplConninfoForSinglePrimaryMultiStandbyCluster( + self.instInfo, peerInsts, clusterInfo) + for i in range(len(connInfo1)): + connInfo = "replconninfo" + "%d" % (i + 1) + tmpDict1 = {} + tmpDict1[connInfo] = "'%s'" % connInfo1[i] + self.setGucConfig(tmpDict1) + self.setGucConfig({"available_zone": "'%s'" % + self.instInfo.azName}) + else: + (connInfo1, connInfo2, dummyStandbyInst, nodename) = \ + ClusterInstanceConfig.setReplConninfo(self.instInfo, + peerInsts, clusterInfo) + connInfo = "replconninfo1" + tmpDict1 = {} + tmpDict1[connInfo] = "'%s'" % connInfo1 + self.setGucConfig(tmpDict1) + + if (dummyStandbyInst is not None): + tmpDict2 = {} + tmpDict2["replconninfo2"] = "'%s'" % connInfo2 + self.setGucConfig(tmpDict2) + + def configInstance(self, user, dataConfig, peerInsts, + configItemType=None, alarm_component=None, + azNames=None, gucXml=False, clusterInfo=None): + """ + peerInsts : peerInsts is empty means that it is a single cluster. + """ + if azNames is None: + azNames = [] + syncNum = self.instInfo.syncNum + tmpDNDict = self.getDNDict(user, configItemType, peerInsts, + azNames, syncNum) + + commonDict = self.setCommonItems() + self.setGucConfig(commonDict) + + self.logger.debug("Check if tmp_guc file exists.") + tmpGucFile = "" + tmpGucPath = DefaultValue.getTmpDirFromEnv(user) + tmpGucFile = "%s/tmp_guc" % tmpGucPath + if (os.path.exists(tmpGucFile)): + dynamicDict = {} + dynamicDict = DefaultValue.dynamicGuc(user, self.logger, + "dn", tmpGucFile, + gucXml) + if gucXml: + dynamicDict["log_line_prefix"] = "'%s'" % \ + dynamicDict["log_line_prefix"] + dynamicDict["thread_pool_attr"] = "'%s'" % \ + dynamicDict[ + "thread_pool_attr"] + if (len(dynamicDict) != 0): + self.logger.debug("set dynamic guc parameters " + "for database node instances.") + if (self.instInfo.instanceType == + DefaultValue.DUMMY_STANDBY_INSTANCE): + self.logger.debug("remove max_process_memory if " + "current datanode is dummy one.") + dummydynamicDict = dynamicDict + dummydynamicDict.pop("max_process_memory") + tmpDNDict.update(dummydynamicDict) + else: + tmpDNDict.update(dynamicDict) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + "guc_list.conf") + tmpDNDict.update(dataConfig) + tmpDNDict["alarm_component"] = "'%s'" % alarm_component + self.setGucConfig(tmpDNDict) + + if (len(peerInsts)): + self.setPrimaryStandyConnInfo(peerInsts) + else: + tmpDict1 = {} + tmpDict1["synchronous_commit"] = "off" + self.setGucConfig(tmpDict1) + + if syncNum == 0 or (syncNum == -1 and len(peerInsts) == 1): + tmpDict1 = {} + tmpDict1["synchronous_commit"] = "off" + self.setGucConfig(tmpDict1) + + + self.modifyDummpyStandbyConfigItem() + + def setPghbaConfig(self, clusterAllIpList, user='all'): + """ + """ + principal = None + if (DefaultValue.checkKerberos(DefaultValue.getMpprcFile())): + + (status, output) = \ + g_OSlib.getGrepValue("-Er", "^default_realm", + os.path.join(os.path.dirname( + DefaultValue.getMpprcFile()), + DefaultValue.FI_KRB_CONF)) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % + "krb5.conf" + "Error:\n%s" % output) + principal = output.split("=")[1].strip() + + # build ip string list + # Every 1000 records merged into one + i = 0 + GUCParasStr = "" + GUCParasStrList = [] + for ipAddress in clusterAllIpList: + i += 1 + # Set the initial user and initial database access permissions + if principal is None: + GUCParasStr += "-h \"host all %s %s/32 %s\" " % \ + (user, ipAddress, METHOD_TRUST) + else: + GUCParasStr += "-h \"host all %s %s/32 gss " \ + "include_realm=1 krb_realm=%s\" "\ + % (user, ipAddress, principal) + if (i % MAX_PARA_NUMBER == 0): + GUCParasStrList.append(GUCParasStr) + i = 0 + GUCParasStr = "" + + if (GUCParasStr != ""): + GUCParasStrList.append(GUCParasStr) + + for parasStr in GUCParasStrList: + self.doGUCConfig("set", parasStr, True) + + """ + Desc: + Under the AP branch, we don't need to the + uninstall/postcheck for every componet. + """ + + def fixPermission(self): + pass + + def upgrade(self): + pass + + def createPath(self): + pass + + def postCheck(self): + pass diff --git a/script/gspylib/component/Kernel/DN_OLAP/__init__.py b/script/gspylib/component/Kernel/DN_OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py new file mode 100644 index 0000000..0fe15f1 --- /dev/null +++ b/script/gspylib/component/Kernel/Kernel.py @@ -0,0 +1,493 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +import sys +import os +import subprocess +import grp +import pwd +import base64 +import re +import time + +sys.path.append(sys.path[0] + "/../../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.GaussLog import GaussLog +from gspylib.component.BaseComponent import BaseComponent +from gspylib.os.gsfile import g_file +from gspylib.common.Common import DefaultValue +from gspylib.threads.parallelTool import parallelTool, CommandThread +from gspylib.os.gsfile import g_file, g_Platform + +RETRY_COUNT = 3 +MAX_PARA_NUMBER = 1000 + + +class Kernel(BaseComponent): + ''' + The class is used to define base component. + ''' + + def __init__(self): + """ + """ + super(Kernel, self).__init__() + # init paramter schemaCoordinatorFile, + # schemaJobFile and schemaDatanodeFile + tmpDir = DefaultValue.getTmpDirFromEnv() + self.schemaCoordinatorFile = "%s/%s" % ( + tmpDir, DefaultValue.SCHEMA_COORDINATOR) + self.coordinatorJobDataFile = "%s/%s" % ( + tmpDir, DefaultValue.COORDINATOR_JOB_DATA) + self.schemaDatanodeFile = "%s/%s" % (tmpDir, + DefaultValue.SCHEMA_DATANODE) + self.dumpTableFile = "%s/%s" % (tmpDir, + DefaultValue.DUMP_TABLES_DATANODE) + self.dumpOutputFile = "%s/%s" % (tmpDir, + DefaultValue.DUMP_Output_DATANODE) + self.coordinatorStatisticsDataFile = "%s/%s" % ( + tmpDir, DefaultValue.COORDINATOR_STAT_DATA) + + """ + Desc: + start/stop/query single instance + """ + + def start(self, time_out=DefaultValue.TIMEOUT_CLUSTER_START, + security_mode="off", cluster_number=None): + """ + """ + if cluster_number: + cmd = "%s/gs_ctl start -o '-u %s' -D %s " % ( + self.binPath, int(float(cluster_number) * 1000), + self.instInfo.datadir) + else: + cmd = "%s/gs_ctl start -D %s " % ( + self.binPath, self.instInfo.datadir) + if self.instInfo.instanceType == DefaultValue.MASTER_INSTANCE: + if len(self.instInfo.peerInstanceInfos) > 0: + cmd += "-M primary" + elif self.instInfo.instanceType == DefaultValue.CASCADE_STANDBY: + cmd += "-M cascade_standby" + elif self.instInfo.instanceType == DefaultValue.STANDBY_INSTANCE: + cmd += "-M standby" + if time_out is not None: + cmd += " -t %s" % time_out + if security_mode == "on": + cmd += " -o \'--securitymode\'" + configFile = "%s/postgresql.conf" % self.instInfo.datadir + output = g_file.readFile(configFile, "logging_collector") + value = None + for line in output: + line = line.split('#')[0].strip() + if line.find('logging_collector') >= 0 and line.find('=') > 0: + value = line.split('=')[1].strip() + break + if value == "off": + cmd += " >/dev/null 2>&1" + self.logger.debug("start cmd = %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or re.search("start failed", output): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "instance" + + " Error: Please check the gs_ctl log for " + "failure details." + "\n" + output) + if value == "off": + output = "[BACKEND] WARNING: The parameter logging_collector is " \ + "set to off. The log will not be recorded to file. " \ + "Please check any error manually." + self.logger.log(output) + + def stop(self, stopMode="", time_out=300): + """ + """ + cmd = "%s/gs_ctl stop -D %s " % ( + self.binPath, self.instInfo.datadir) + if not self.isPidFileExist(): + cmd += " -m immediate" + else: + # check stop mode + if stopMode != "": + cmd += " -m %s" % stopMode + cmd += " -t %s" % time_out + self.logger.debug("stop cmd = %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % + "instance" + " Error: \n%s." % output) + if output.find("No such process") > 0: + cmd = "ps c -eo pid,euid,cmd | grep gaussdb | grep -v grep | " \ + "awk '{if($2 == curuid && $1!=\"-n\") " \ + "print \"/proc/\"$1\"/cwd\"}' curuid=`id -u`|" \ + " xargs ls -l |awk '{if ($NF==\"%s\") print $(NF-2)}' | " \ + "awk -F/ '{print $3 }'" % (self.instInfo.datadir) + (status, rightpid) = subprocess.getstatusoutput(cmd) + if rightpid or status != 0: + GaussLog.exitWithError(output) + + def isPidFileExist(self): + pidFile = "%s/postmaster.pid" % self.instInfo.datadir + return os.path.isfile(pidFile) + + def query(self): + """ + """ + cmd = "%s/gs_ctl query -D %s" % (self.binPath, self.instInfo.datadir) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + return (status, output) + + def build(self, buidMode="full", standByBuildTimeout=300): + """ + """ + cmd = "%s/gs_ctl build -D %s -M standby -b %s -r %d " % ( + self.binPath, self.instInfo.datadir, buidMode, standByBuildTimeout) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + + def build_cascade(self, buidMode="full", standByBuildTimeout=300): + """ + """ + cmd = "%s/gs_ctl build -D %s -M cascade_standby -b %s -r %d " % ( + self.binPath, self.instInfo.datadir, buidMode, standByBuildTimeout) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + + def queryBuild(self): + """ + """ + cmd = "%s/gs_ctl querybuild -D %s" % (self.binPath, + self.instInfo.datadir) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + + """ + Desc: + Under the AP branch, the installation package of each + component is not distinguished. + After checking, unzip the public installation package and + complete the installation. + """ + + def install(self, nodeName="", dbInitParams=""): + """ + """ + pass + + def getInstanceTblspcDirs(self, nodeName=""): + """ + function: Get instance dirs + input : NA + output: NA + """ + tbsDirList = [] + + if (not os.path.exists("%s/pg_tblspc" % self.instInfo.datadir)): + self.logger.debug("%s/pg_tblspc does not exists." % + self.instInfo.datadir) + return tbsDirList + + fileList = os.listdir("%s/pg_tblspc" % self.instInfo.datadir) + if (len(fileList)): + for filename in fileList: + if (os.path.islink("%s/pg_tblspc/%s" % (self.instInfo.datadir, + filename))): + linkDir = os.readlink("%s/pg_tblspc/%s" % ( + self.instInfo.datadir, filename)) + if (os.path.isdir(linkDir)): + tblspcDir = "%s/%s_%s" % ( + linkDir, DefaultValue.TABLESPACE_VERSION_DIRECTORY, + nodeName) + self.logger.debug("Table space directories is %s." % + tblspcDir) + tbsDirList.append(tblspcDir) + else: + self.logger.debug( + "%s is not link directory." % linkDir) + else: + self.logger.debug("%s is not a link file." % filename) + else: + self.logger.debug("%s/pg_tblspc is empty." % self.instInfo.datadir) + + return tbsDirList + + def getLockFiles(self): + """ + function: Get lock files + input : NA + output: NA + """ + fileList = [] + # the static file must be exists + tmpDir = os.path.realpath(DefaultValue.getTmpDirFromEnv()) + + pgsql = ".s.PGSQL.%d" % self.instInfo.port + pgsqlLock = ".s.PGSQL.%d.lock" % self.instInfo.port + fileList.append(os.path.join(tmpDir, pgsql)) + fileList.append(os.path.join(tmpDir, pgsqlLock)) + return fileList + + def removeSocketFile(self, fileName): + """ + """ + g_file.removeFile(fileName, "shell") + + def removeTbsDir(self, tbsDir): + """ + """ + g_file.removeDirectory(tbsDir) + + def cleanDir(self, instDir): + """ + function: Clean the dirs + input : instDir + output: NA + """ + if (not os.path.exists(instDir)): + return + + dataDir = [] + dataDir = os.listdir(instDir) + if (os.getuid() == 0): + pglDir = '%s/pg_location' % instDir + isPglDirEmpty = False + if (os.path.exists(pglDir) and len(os.listdir(pglDir)) == 0): + isPglDirEmpty = True + if (len(dataDir) == 0 or isPglDirEmpty): + g_file.cleanDirectoryContent(instDir) + else: + for info in dataDir: + if (str(info) == "pg_location"): + resultMount = [] + resultFile = [] + resultDir = [] + pglDir = '%s/pg_location' % instDir + + # delete all files in the mount point + cmd = "%s | %s '%s' | %s '{printf $3}'" % \ + (g_Platform.getMountCmd(), g_Platform.getGrepCmd(), + pglDir, g_Platform.getAwkCmd()) + (status, outputMount) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % + instDir + " Error:\n%s." % + str(outputMount) + + "The cmd is %s" % cmd) + else: + if (len(outputMount) > 0): + resultMount = str(outputMount).split() + for infoMount in resultMount: + g_file.cleanDirectoryContent(infoMount) + else: + g_file.cleanDirectoryContent(instDir) + continue + + # delete file in the pg_location directory + if (not os.path.exists(pglDir)): + continue + cmd = "cd '%s'" % pglDir + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd + " Error: \n%s " % output) + + outputFile = g_file.findFile(".", "f", "type") + if (len(outputFile) > 0): + for infoFile in outputFile: + tmpinfoFile = pglDir + infoFile[1:] + for infoMount in resultMount: + if (tmpinfoFile.find(infoMount) < 0 and + infoMount.find(tmpinfoFile) < 0): + realFile = "'%s/%s'" % (pglDir, infoFile) + g_file.removeFile(realFile, "shell") + + # delete directory in the pg_location directory + cmd = "if [ -d '%s' ]; then cd '%s' && find -type d; fi" \ + % \ + (pglDir, pglDir) + (status, outputDir) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % + instDir + " Error:\n%s." % + str(outputDir) + "The cmd is %s" % cmd) + else: + resultDir = g_file.findFile(".", "d", "type") + resultDir.remove(".") + if (len(resultDir) > 0): + for infoDir in resultDir: + tmpinfoDir = pglDir + infoDir[1:] + for infoMount in resultMount: + if (tmpinfoDir.find(infoMount) < 0 and + infoMount.find(tmpinfoDir) < 0): + realPath = "'%s/%s'" % ( + pglDir, infoDir) + g_file.removeDirectory(realPath) + cmd = "if [ -d '%s' ];then cd '%s' && find . ! -name " \ + "'pg_location' " \ + "! -name '..' ! -name '.' -print0 |xargs -r -0 -n100 rm " \ + "-rf; " \ + "fi" % (instDir, instDir) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % + instDir + " Error:\n%s." % str(output) + + "The cmd is %s" % cmd) + + def uninstall(self, instNodeName): + """ + function: Clean node instances. + 1.get the data dirs, tablespaces, soketfiles + 2.use theard delete the dirs or files + input : instNodeName + output: NA + """ + self.logger.log("Cleaning instance.") + + # tablespace data directory + tbsDirList = self.getInstanceTblspcDirs(instNodeName) + + # sockete file + socketFiles = self.getLockFiles() + + # clean tablespace dir + if (len(tbsDirList) != 0): + try: + self.logger.debug("Deleting instances tablespace directories.") + for tbsDir in tbsDirList: + self.removeTbsDir(tbsDir) + except Exception as e: + raise Exception(str(e)) + self.logger.log("Successfully cleaned instance tablespace.") + + if (len(self.instInfo.datadir) != 0): + try: + self.logger.debug("Deleting instances directories.") + self.cleanDir(self.instInfo.datadir) + except Exception as e: + raise Exception(str(e)) + self.logger.log("Successfully cleaned instances.") + + if (len(self.instInfo.xlogdir) != 0): + try: + self.logger.debug("Deleting instances xlog directories.") + self.cleanDir(self.instInfo.xlogdir) + except Exception as e: + raise Exception(str(e)) + self.logger.log("Successfully cleaned instances.") + + if (len(socketFiles) != 0): + try: + self.logger.debug("Deleting socket files.") + for socketFile in socketFiles: + self.removeSocketFile(socketFile) + except Exception as e: + raise Exception(str(e)) + self.logger.log("Successfully cleaned socket files.") + + def setCommonItems(self): + """ + function: set common items + input : tmpDir + output: tempCommonDict + """ + tempCommonDict = {} + tmpDir = DefaultValue.getTmpDirFromEnv() + tempCommonDict["unix_socket_directory"] = "'%s'" % tmpDir + tempCommonDict["unix_socket_permissions"] = "0700" + tempCommonDict["log_file_mode"] = "0600" + tempCommonDict["enable_nestloop"] = "off" + tempCommonDict["enable_mergejoin"] = "off" + tempCommonDict["explain_perf_mode"] = "pretty" + tempCommonDict["log_line_prefix"] = "'%m %c %d %p %a %x %n %e '" + tempCommonDict["modify_initial_password"] = "true" + + return tempCommonDict + + def doGUCConfig(self, action, GUCParasStr, isHab=False): + """ + """ + # check instance data directory + if (self.instInfo.datadir == "" or not os.path.exists( + self.instInfo.datadir)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + ("data directory of the instance[%s]" % + str(self.instInfo))) + + if (GUCParasStr == ""): + return + + # check conf file + if (isHab == True): + configFile = "%s/pg_hba.conf" % self.instInfo.datadir + else: + configFile = "%s/postgresql.conf" % self.instInfo.datadir + if (not os.path.exists(configFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % configFile) + + cmd = "%s/gs_guc %s -D %s %s " % (self.binPath, action, + self.instInfo.datadir, GUCParasStr) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 3) + if (status != 0): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % "GUC" + + " Command: %s. Error:\n%s" % (cmd, output)) + + def setGucConfig(self, paraDict=None, setMode='set'): + """ + """ + i = 0 + GUCParasStr = "" + GUCParasStrList = [] + if paraDict is None: + paraDict = {} + for paras in paraDict: + i += 1 + GUCParasStr += " -c \"%s=%s\" " % (paras, paraDict[paras]) + if (i % MAX_PARA_NUMBER == 0): + GUCParasStrList.append(GUCParasStr) + i = 0 + GUCParasStr = "" + if (GUCParasStr != ""): + GUCParasStrList.append(GUCParasStr) + + for parasStr in GUCParasStrList: + self.doGUCConfig(setMode, parasStr, False) + + def removeIpInfoOnPghbaConfig(self, ipAddressList): + """ + """ + i = 0 + GUCParasStr = "" + GUCParasStrList = [] + for ipAddress in ipAddressList: + i += 1 + GUCParasStr += " -h \"host all all %s/32\"" % (ipAddress) + if (i % MAX_PARA_NUMBER == 0): + GUCParasStrList.append(GUCParasStr) + i = 0 + GUCParasStr = "" + if (GUCParasStr != ""): + GUCParasStrList.append(GUCParasStr) + + for parasStr in GUCParasStrList: + self.doGUCConfig("set", parasStr, True) diff --git a/script/gspylib/component/Kernel/__init__.py b/script/gspylib/component/Kernel/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/component/__init__.py b/script/gspylib/component/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/etc/conf/check_list.conf b/script/gspylib/etc/conf/check_list.conf new file mode 100644 index 0000000..deba792 --- /dev/null +++ b/script/gspylib/etc/conf/check_list.conf @@ -0,0 +1,67 @@ +#The file(check_list.conf) is the gs_check and gs_checkos configuration file. +#The file is placed in $GPHOME/script/util + +# the system control parameter +[/etc/sysctl.conf] +net.ipv4.tcp_max_tw_buckets = 10000 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_tw_recycle = 1 +net.ipv4.tcp_keepalive_time = 30 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_keepalive_probes = 9 +net.ipv4.tcp_retries2 = 12 +net.sctp.addip_enable = 0 +net.core.wmem_max = 21299200 +net.core.rmem_max = 21299200 +net.core.wmem_default = 21299200 +net.core.rmem_default = 21299200 +net.sctp.sctp_mem = 94500000 915000000 927000000 +net.sctp.sctp_rmem = 8192 250000 16777216 +net.sctp.sctp_wmem = 8192 250000 16777216 +kernel.sem = 250 6400000 1000 25600 +net.ipv4.tcp_rmem = 8192 250000 16777216 +net.ipv4.tcp_wmem = 8192 250000 16777216 +# vm.min_free_kbytes would set to 5% of total system memory real time, total system memory get with cmd: free -k|grep Mem|awk '{print $2}'. +vm.min_free_kbytes = total_system_memory*5% +net.core.netdev_max_backlog = 65535 +net.ipv4.tcp_max_syn_backlog = 65535 +net.core.somaxconn = 65535 +net.ipv4.tcp_syncookies = 1 +vm.overcommit_memory = 0 +kernel.shmall = 1152921504606846720 +kernel.shmmax = 18446744073709551615 + +# if parameter value is not equal to ths OS's value, print the waring, and not error +[SUGGEST:/etc/sysctl.conf] +net.sctp.sndbuf_policy = 0 +net.sctp.rcvbuf_policy = 0 +net.ipv4.ip_local_port_range = 26000 65535 +net.ipv4.tcp_fin_timeout = 60 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_timestamps = 1 +net.ipv4.tcp_retries1 = 5 +net.ipv4.tcp_syn_retries = 5 +net.ipv4.tcp_synack_retries = 5 +net.sctp.path_max_retrans = 10 +net.sctp.max_init_retransmits = 10 +net.sctp.association_max_retrans = 10 +net.sctp.hb_interval = 30000 +vm.extfrag_threshold = 500 +vm.overcommit_ratio = 90 +SctpChecksumErrors = 0 + +# open file number, please set it to set '1000000' +[/etc/security/limits.conf] +open files = 1000000 +stack size = 3072 +virtual memory = unlimited +max user processes = unlimited +# network parameter +# if the level of network is greater or equal to 10000Mb/s, please set RX/TX to 4096; +# we will check if the MTU is greater or equal to 1500, but gs_checkos dose not set it. +# else, skip it. +[/sbin/ifconfig] +MTU = 8192 +RX = 4096 +TX = 4096 + diff --git a/script/gspylib/etc/conf/check_list_dws.conf b/script/gspylib/etc/conf/check_list_dws.conf new file mode 100644 index 0000000..a7f7b7c --- /dev/null +++ b/script/gspylib/etc/conf/check_list_dws.conf @@ -0,0 +1,65 @@ +#The file(check_list.conf) is the gs_check and gs_checkos configuration file. +#The file is placed in $GPHOME/script/util + +# the system control parameter +[/etc/sysctl.conf] +net.ipv4.tcp_max_tw_buckets = 10000 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_tw_recycle = 1 +net.ipv4.tcp_keepalive_time = 30 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_keepalive_probes = 9 +net.ipv4.tcp_retries2 = 12 +net.sctp.addip_enable = 0 +net.core.wmem_max = 21299200 +net.core.rmem_max = 21299200 +net.core.wmem_default = 21299200 +net.core.rmem_default = 21299200 +net.sctp.sctp_mem = 94500000 915000000 927000000 +net.sctp.sctp_rmem = 8192 250000 16777216 +net.sctp.sctp_wmem = 8192 250000 16777216 +kernel.sem = 250 6400000 1000 25600 +net.ipv4.tcp_rmem = 8192 250000 16777216 +net.ipv4.tcp_wmem = 8192 250000 16777216 +# vm.min_free_kbytes would set to 5% of total system memory real time, total system memory get with cmd: free -k|grep Mem|awk '{print $2}'. +vm.min_free_kbytes = total_system_memory*5% +net.core.netdev_max_backlog = 65535 +net.ipv4.tcp_max_syn_backlog = 65535 +net.core.somaxconn = 65535 +net.ipv4.tcp_syncookies = 1 +vm.overcommit_memory = 0 +net.sctp.sndbuf_policy = 0 +net.sctp.rcvbuf_policy = 0 +net.ipv4.tcp_fin_timeout = 60 +kernel.shmall = 1152921504606846720 +kernel.shmmax = 18446744073709551615 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_timestamps = 1 +net.ipv4.tcp_retries1 = 10 +net.ipv4.tcp_syn_retries = 10 +net.ipv4.tcp_synack_retries = 10 +net.sctp.path_max_retrans = 10 +net.sctp.max_init_retransmits = 10 +net.sctp.association_max_retrans = 10 +net.sctp.hb_interval = 30000 +vm.extfrag_threshold = 500 +vm.overcommit_ratio = 90 + +# if parameter value is not equal to ths OS's value, print the waring, and not error +[SUGGEST:/etc/sysctl.conf] +SctpChecksumErrors = 0 + +# open file number, please set it to set '1000000' +[/etc/security/limits.conf] +open files = 1000000 +stack size = 3072 + +# network parameter +# if the level of network is greater or equal to 10000Mb/s, please set RX/TX to 4096; +# we will check if the MTU is greater or equal to 1500, but gs_checkos dose not set it. +# else, skip it. +[/sbin/ifconfig] +MTU = 8192 +RX = 4096 +TX = 4096 + diff --git a/script/gspylib/etc/conf/cluster_config_template.xml b/script/gspylib/etc/conf/cluster_config_template.xml new file mode 100644 index 0000000..48fd766 --- /dev/null +++ b/script/gspylib/etc/conf/cluster_config_template.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/script/gspylib/etc/conf/gs-OS-set.service b/script/gspylib/etc/conf/gs-OS-set.service new file mode 100644 index 0000000..5277857 --- /dev/null +++ b/script/gspylib/etc/conf/gs-OS-set.service @@ -0,0 +1,12 @@ +[Unit] +Description=OS Optimize Service +After=local-fs.target + +[Service] +Type=idle +ExecStart=/usr/local/gauss/script/gauss-OS-set.sh +Delegate=yes + +[Install] +WantedBy=multi-user.target + diff --git a/script/gspylib/etc/conf/gs_collector.json b/script/gspylib/etc/conf/gs_collector.json new file mode 100644 index 0000000..d0d4a68 --- /dev/null +++ b/script/gspylib/etc/conf/gs_collector.json @@ -0,0 +1,9 @@ +{ + "Collect": + [ + {"TypeName": "System", "Content":"RunTimeInfo, HardWareInfo","Interval":"0", "Count":"1"}, + {"TypeName": "Log", "Content" : "DataNode,ClusterManager", "Interval":"0", "Count":"1"}, + {"TypeName": "Database", "Content": "pg_locks,pg_stat_activity,pg_thread_wait_status","Interval":"0", "Count":"1"}, + {"TypeName": "Config", "Content": "DataNode", "Interval":"0", "Count":"1"} + ] +} diff --git a/script/gspylib/etc/conf/guc_cloud_list.xml b/script/gspylib/etc/conf/guc_cloud_list.xml new file mode 100644 index 0000000..d27c55f --- /dev/null +++ b/script/gspylib/etc/conf/guc_cloud_list.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/script/gspylib/etc/conf/guc_list.xml b/script/gspylib/etc/conf/guc_list.xml new file mode 100644 index 0000000..10cd9e3 --- /dev/null +++ b/script/gspylib/etc/conf/guc_list.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/script/gspylib/etc/controller/gs_install.xml b/script/gspylib/etc/controller/gs_install.xml new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/etc/controller/gs_preinstall.xml b/script/gspylib/etc/controller/gs_preinstall.xml new file mode 100644 index 0000000..0c97f5f --- /dev/null +++ b/script/gspylib/etc/controller/gs_preinstall.xml @@ -0,0 +1,75 @@ + + + + + + ture/false + ture/false + + + + + + gspylib.common.ParameterParsecheck + ParameterCommandLine + current + 0 + block + None + None + all + 0 + non-block + + + + gspylib.common.ParameterParsecheck + ParameterCommandLine + current + 0 + block + None + None + all + 0 + non-block + + + + + + + + + + + + + + + + + + + + ture/false + ture/false + + + + + + + + + + + + + + + + + + + diff --git a/script/gspylib/etc/sql/pmk_schema.sql b/script/gspylib/etc/sql/pmk_schema.sql new file mode 100644 index 0000000..135204b --- /dev/null +++ b/script/gspylib/etc/sql/pmk_schema.sql @@ -0,0 +1,3215 @@ +-- It starts a transaction during the PMK installation +START TRANSACTION; + +CREATE SCHEMA pmk; + +-- PMK Configuration table +CREATE TABLE pmk.pmk_configuration +( + config_param_name varchar(64) NOT NULL +, config_value text NOT NULL +, PRIMARY KEY (config_param_name) +); + +-- Snapshot (statistics collection) information +CREATE TABLE pmk.pmk_snapshot +( + snapshot_id int -- Snapshot ID (Running number) +, current_snapshot_time timestamp with time zone -- Time at the beginning of the snapshot +, last_snapshot_time timestamp with time zone -- Time at the end of the snapshot; the actual time the snapshot was taken +, creation_time timestamp with time zone -- Time the snapshot was created +, PRIMARY KEY (snapshot_id) +); + +CREATE INDEX ix_pmk_snapshot_time ON pmk.pmk_snapshot (current_snapshot_time DESC); + +-- Statistics for each node +CREATE TABLE pmk.pmk_snapshot_datanode_stat +( + snapshot_id int -- Snapshot Id +, node_id int -- node id from pgxc_node +, node_name text -- node name from pgxc_node +, node_host text -- node host from pgxc_node +, last_startup_time timestamp with time zone -- last restart time of the node before snapshot starts +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed numeric(40,0) +, checkpoints_timed_delta numeric(40,0) +, checkpoints_req numeric(40,0) +, checkpoints_req_delta numeric(40,0) +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +, PRIMARY KEY (snapshot_id, node_id) +); + +CREATE INDEX ix_pmk_snapshot_dnode_stat_node_name ON pmk.pmk_snapshot_datanode_stat (UPPER(node_name), snapshot_id); + +CREATE TABLE pmk.pmk_snapshot_coordinator_stat +( + snapshot_id int -- Snapshot Id +, node_id int -- node id from pgxc_node +, node_name text -- node name from pgxc_node +, node_host text -- node host from pgxc_node +, last_startup_time timestamp with time zone -- last restart time of the node before snapshot starts +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed numeric(40,0) +, checkpoints_timed_delta numeric(40,0) +, checkpoints_req numeric(40,0) +, checkpoints_req_delta numeric(40,0) +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +, PRIMARY KEY (snapshot_id, node_id) +); + +CREATE INDEX ix_pmk_snapshot_coord_stat_node_name ON pmk.pmk_snapshot_coordinator_stat (UPPER(node_name), snapshot_id); + +-- Table to maintain PMK meta data +CREATE TABLE pmk.pmk_meta_data +( + pmk_version varchar(128) +, last_snapshot_id int +, last_snapshot_collect_time timestamp with time zone +, PRIMARY KEY (pmk_version) +); + +CREATE OR REPLACE FUNCTION pmk.check_node_type +RETURNS TEXT +AS +$$ +DECLARE l_node_type CHAR(1); +BEGIN + + SELECT n.node_type + INTO l_node_type + FROM pgxc_node n, pg_settings s + WHERE s.name = 'pgxc_node_name' + AND n.node_name = s.setting; + + IF l_node_type = 'D' + THEN + RETURN 'ERROR:: PMK commands can not be executed from data node. Please execute it from coordinator.'; + ELSE + RETURN NULL; + END IF; + +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION pmk.check_pmk_enabled +RETURNS TEXT +AS +$$ +DECLARE l_pmk_enabled_i TEXT; +BEGIN + + SELECT UPPER(config_value) + INTO l_pmk_enabled_i + FROM pmk.pmk_configuration + WHERE config_param_name = 'Enable PMK'; + + IF l_pmk_enabled_i = 'FALSE' + THEN + RETURN 'ERROR:: PMK should be enabled to use the PMK features.'; + ELSE + RETURN NULL; + END IF; + +END; +$$ +LANGUAGE plpgsql; + +-- This function is used to find the PMK version +-- If it is executed from a data-node, it throws the appropriate error. + +CREATE OR REPLACE FUNCTION pmk.pmk_version ( ) +RETURNS varchar(128) +AS +$$ +DECLARE l_pmk_version varchar(128); + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN 'f'; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN 'f'; + END IF; + + SELECT pmk_version + INTO l_pmk_version + FROM pmk.pmk_meta_data; + + RETURN l_pmk_version; + +END; +$$ +LANGUAGE plpgsql; + +-- This function is used to configure the PMK configuration parameters +-- -1 indicates all the statistics collection should be retained. +-- Atleast one statistics collection should be retained in the database. + +CREATE OR REPLACE FUNCTION pmk.configure_parameter + ( IN i_config_param_name varchar(64) + , IN i_config_value text + ) +RETURNS boolean +AS +$$ +DECLARE l_collect_count_value INT; + l_config_value TEXT; + l_upper_config_param TEXT; + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + l_upper_config_param := UPPER(TRIM(BOTH ' ' FROM i_config_param_name)); + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + AND l_upper_config_param <> 'ENABLE PMK' + THEN + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + IF i_config_param_name IS NULL + THEN + l_error_message := 'ERROR:: Null should not be provided for configuration parameter name.'; + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + IF i_config_value IS NULL + THEN + l_error_message := 'ERROR:: Null should not be provided for configuration value.'; + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + IF l_upper_config_param = 'COLLECTION COUNT' + THEN + l_collect_count_value := i_config_value::int; + + IF l_collect_count_value < -1 + THEN + l_error_message := 'ERROR:: Configuration value "' || i_config_value || '" should not be less than -1.'; + raise notice '%',l_error_message; + RETURN FALSE; + + ELSIF l_collect_count_value = 0 + THEN + l_error_message := 'ERROR:: 0 should not be provided since atleast one collection should be retained.'; + raise notice '%',l_error_message; + RETURN FALSE; + + ELSE + l_config_value := l_collect_count_value; + END IF; + + ELSIF l_upper_config_param = 'ENABLE PMK' + THEN + l_config_value := UPPER(TRIM(BOTH ' ' FROM i_config_value)); + + IF l_config_value NOT IN ('TRUE', 'FALSE') + THEN + l_error_message := 'ERROR:: Allowed values are TRUE or FALSE for the configuration parameter "Enable PMK".'; + raise notice '%',l_error_message; + RETURN FALSE; + + END IF; + END IF; + + SET allow_concurrent_tuple_update = ON; + + UPDATE pmk.pmk_configuration + SET config_value = l_config_value + WHERE UPPER(config_param_name) = l_upper_config_param; + + IF NOT FOUND THEN + l_error_message := 'ERROR:: Invalid configuration parameter "' || i_config_param_name || '" provided for configuring PMK parameter ...'; + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + RETURN TRUE; + +END; +$$ +LANGUAGE plpgsql; + +-- If ALL is provided, it returns the details of all the configuration parameters. +-- If a specific config parameter is provided, it returns the details of the configuration parameter. + +CREATE OR REPLACE FUNCTION pmk.get_configuration_parameter + ( IN i_config_param_name TEXT ) +RETURNS TABLE +( + config_param_name varchar(64) +, config_value text +) +AS +$$ +DECLARE l_upper_config_param TEXT; + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_upper_config_param := UPPER(TRIM(BOTH ' ' FROM i_config_param_name)); + + IF l_upper_config_param = 'ALL' + THEN + + RETURN QUERY + SELECT config_param_name + , config_value + FROM pmk.pmk_configuration + ORDER BY config_param_name; + + ELSE + + RETURN QUERY + SELECT config_param_name + , config_value + FROM pmk.pmk_configuration + WHERE UPPER(config_param_name) = l_upper_config_param; + + END IF; + +END; +$$ +LANGUAGE plpgsql; + + /* + This function is used to collect statistics for each node (including data node and coordinator) +*/ + +CREATE OR REPLACE FUNCTION pmk.find_perf_stat + ( IN i_skip_supper_role boolean + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_physical_memory bigint + , OUT o_shared_buffer_size bigint + , OUT o_session_memory_total_size bigint + , OUT o_session_memory_used_size bigint + , OUT o_blocks_read bigint + , OUT o_blocks_hit bigint + , OUT o_db_size bigint + , OUT o_work_memory_size bigint + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_active_sql_count int + , OUT o_wait_sql_count int + , OUT o_session_count int + , OUT o_busy_time numeric + , OUT o_idle_time numeric + , OUT o_iowait_time numeric + , OUT o_db_cpu_time numeric + , OUT o_db_memory_usage bigint + , OUT o_node_startup_time timestamp with time zone + , OUT o_node_host_name text + , OUT o_xact_commit bigint + , OUT o_xact_rollback bigint + , OUT o_checkpoints_timed numeric(40,0) + , OUT o_checkpoints_req numeric(40,0) + , OUT o_checkpoint_write_time double precision + ) +AS +$$ +DECLARE + l_block_size int; + l_record_chk int; +BEGIN + + o_node_startup_time := pg_postmaster_start_time(); + o_node_host_name := get_hostname(); + + SELECT COUNT(*) AS number_of_files + , SUM(phyrds) AS physical_reads + , SUM(phywrts) AS physical_writes + , SUM(readtim) AS read_time + , SUM(writetim) AS write_time + INTO o_number_of_files + , o_physical_reads + , o_physical_writes + , o_read_time + , o_write_time + FROM pv_file_stat; + + IF o_number_of_files = 0 + THEN + o_physical_reads := 0; + o_physical_writes := 0; + o_read_time := 0; + o_write_time := 0; + END IF; + + WITH os_stat AS + ( + SELECT os.name AS statname + , os.value AS statvalue + FROM pv_os_run_info os + WHERE os.name IN ( 'PHYSICAL_MEMORY_BYTES', 'BUSY_TIME', 'IDLE_TIME', 'IOWAIT_TIME' ) + ) + SELECT (SELECT statvalue FROM os_stat WHERE statname = 'PHYSICAL_MEMORY_BYTES') + , (SELECT statvalue FROM os_stat WHERE statname = 'BUSY_TIME') + , (SELECT statvalue FROM os_stat WHERE statname = 'IDLE_TIME') + , (SELECT statvalue FROM os_stat WHERE statname = 'IOWAIT_TIME') + INTO o_physical_memory + , o_busy_time + , o_idle_time + , o_iowait_time + ; + + -- pv_db_time is not available; temporarily PMK extension is used. + o_db_cpu_time := total_cpu(); + o_db_memory_usage := total_memory()*1024; + + WITH config_value AS + ( SELECT name + , setting::bigint AS config_value + FROM pg_settings + WHERE name IN ( 'block_size', 'shared_buffers', 'work_mem' ) + ) + , config_value1 AS + ( SELECT (SELECT config_value FROM config_value WHERE name = 'block_size') AS block_size + , (SELECT config_value FROM config_value WHERE name = 'shared_buffers') AS shared_buffers + , (SELECT config_value FROM config_value WHERE name = 'work_mem') AS work_mem + ) + SELECT block_size + , (shared_buffers * block_size)::bigint + , (work_mem * 1024)::bigint + INTO l_block_size + , o_shared_buffer_size + , o_work_memory_size + FROM config_value1; + + /* Commented since these statistics are not used for node and cluster reports + */ + o_session_memory_total_size := 0; + o_session_memory_used_size := 0; + + SELECT SUM(blks_read)::bigint + , SUM(blks_hit)::bigint + , SUM(xact_commit)::bigint + , SUM(xact_rollback)::bigint + INTO o_blocks_read + , o_blocks_hit + , o_xact_commit + , o_xact_rollback + FROM pg_stat_database; + + o_db_size := 0; + IF i_skip_supper_role = 'TRUE' + THEN + WITH session_state AS + ( SELECT state, waiting , usename + FROM pg_stat_activity a, pg_roles r + WHERE r.rolsuper = 'f' AND a.usename = r.rolname + ) + , active_session AS + ( SELECT state, waiting , usename + FROM session_state s, pg_roles r + WHERE s.state IN ('active', 'fastpath function call', 'retrying') + AND r.rolsuper = 'f' AND s.usename = r.rolname + ) + SELECT ( SELECT COUNT(*) FROM active_session ) + , ( SELECT COUNT(*) FROM active_session WHERE waiting = TRUE ) + , ( SELECT COUNT(*) FROM session_state ) + INTO o_active_sql_count, o_wait_sql_count , o_session_count + ; + ELSE + WITH session_state AS + ( SELECT state, waiting + FROM pg_stat_activity + ) + , active_session AS + ( SELECT state, waiting + FROM session_state + WHERE state IN ('active', 'fastpath function call', 'retrying') + ) + SELECT ( SELECT COUNT(*) FROM active_session ) + , ( SELECT COUNT(*) FROM active_session WHERE waiting = TRUE ) + , ( SELECT COUNT(*) FROM session_state ) + INTO o_active_sql_count, o_wait_sql_count, o_session_count + ; + END IF; + + -- Currently, the below statistics are calculated from pv_session_stat (which is not accurate) since pv_db_stat is not available + WITH sort_state AS + ( SELECT statname + , SUM(value)::bigint AS sorts_cnt + FROM pv_session_stat + WHERE statname IN ('n_sort_in_memory', 'n_sort_in_disk') + GROUP BY statname + ) + SELECT (SELECT sorts_cnt FROM sort_state WHERE statname = 'n_sort_in_memory') + , (SELECT sorts_cnt FROM sort_state WHERE statname = 'n_sort_in_disk') + INTO o_sorts_in_memory + , o_sorts_in_disk + ; + + SELECT SUM(checkpoints_timed)::numeric(40,0) + , SUM(checkpoints_req)::numeric(40,0) + , SUM(checkpoint_write_time)::bigint + INTO o_checkpoints_timed + , o_checkpoints_req + , o_checkpoint_write_time + FROM pg_stat_bgwriter; + +END; +$$ +LANGUAGE plpgsql; + +/* +pmk.find_node_stat +*/ +CREATE OR REPLACE FUNCTION pmk.find_node_stat + (IN i_skip_supper_role boolean + , OUT o_number_of_files_1 int + , OUT o_physical_reads_1 bigint + , OUT o_physical_writes_1 bigint + , OUT o_read_time_1 bigint + , OUT o_write_time_1 bigint + , OUT o_physical_memory_1 bigint + , OUT o_shared_buffer_size_1 bigint + , OUT o_session_memory_total_size_1 bigint + , OUT o_session_memory_used_size_1 bigint + , OUT o_blocks_read_1 bigint + , OUT o_blocks_hit_1 bigint + , OUT o_db_size_1 bigint + , OUT o_work_memory_size_1 bigint + , OUT o_sorts_in_memory_1 bigint + , OUT o_sorts_in_disk_1 bigint + , OUT o_active_sql_count_1 int + , OUT o_wait_sql_count_1 int + , OUT o_session_count_1 int + , OUT o_busy_time_1 numeric + , OUT o_idle_time_1 numeric + , OUT o_iowait_time_1 numeric + , OUT o_db_cpu_time_1 numeric + , OUT o_db_memory_usage_1 bigint + , OUT o_node_startup_time_1 timestamp with time zone + , OUT o_node_host_name_1 text + , OUT o_xact_commit_1 bigint + , OUT o_xact_rollback_1 bigint + , OUT o_checkpoints_timed_1 numeric(40,0) + , OUT o_checkpoints_req_1 numeric(40,0) + , OUT o_checkpoint_write_time_1 double precision + ) +AS +$$ +BEGIN + + SELECT o_number_of_files + , o_physical_reads + , o_physical_writes + , o_read_time + , o_write_time + , o_physical_memory + , o_shared_buffer_size + , o_session_memory_total_size + , o_session_memory_used_size + , o_blocks_read + , o_blocks_hit + , o_db_size + , o_work_memory_size + , o_sorts_in_memory + , o_sorts_in_disk + , o_active_sql_count + , o_wait_sql_count + , o_session_count + , o_busy_time + , o_idle_time + , o_iowait_time + , o_db_cpu_time + , o_db_memory_usage + , o_node_startup_time + , o_node_host_name + , o_xact_commit + , o_xact_rollback + , o_checkpoints_timed + , o_checkpoints_req + , o_checkpoint_write_time + INTO o_number_of_files_1 + , o_physical_reads_1 + , o_physical_writes_1 + , o_read_time_1 + , o_write_time_1 + , o_physical_memory_1 + , o_shared_buffer_size_1 + , o_session_memory_total_size_1 + , o_session_memory_used_size_1 + , o_blocks_read_1 + , o_blocks_hit_1 + , o_db_size_1 + , o_work_memory_size_1 + , o_sorts_in_memory_1 + , o_sorts_in_disk_1 + , o_active_sql_count_1 + , o_wait_sql_count_1 + , o_session_count_1 + , o_busy_time_1 + , o_idle_time_1 + , o_iowait_time_1 + , o_db_cpu_time_1 + , o_db_memory_usage_1 + , o_node_startup_time_1 + , o_node_host_name_1 + , o_xact_commit_1 + , o_xact_rollback_1 + , o_checkpoints_timed_1 + , o_checkpoints_req_1 + , o_checkpoint_write_time_1 + FROM pmk.find_perf_stat(i_skip_supper_role); + +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION pmk.load_datanode_stat + ( IN i_snapshot_id int + , IN i_last_snapshot_id int + , IN i_pmk_last_collect_start_time timestamp with time zone + , IN i_node_id int + , IN i_node_name text + , IN i_number_of_files int + , IN i_physical_reads bigint + , IN i_physical_writes bigint + , IN i_read_time bigint + , IN i_write_time bigint + , IN i_physical_memory bigint + , IN i_shared_buffer_size bigint + , IN i_session_memory_total_size bigint + , IN i_session_memory_used_size bigint + , IN i_blocks_read bigint + , IN i_blocks_hit bigint + , IN i_db_size bigint + , IN i_work_memory_size bigint + , IN i_sorts_in_memory bigint + , IN i_sorts_in_disk bigint + , IN i_active_sql_count int + , IN i_wait_sql_count int + , IN i_session_count int + , IN i_busy_time numeric + , IN i_idle_time numeric + , IN i_iowait_time numeric + , IN i_db_cpu_time numeric + , IN i_db_memory_usage bigint + , IN i_node_startup_time timestamp with time zone + , IN i_node_host_name text + , IN i_xact_commit bigint + , IN i_xact_rollback bigint + , IN i_checkpoints_timed numeric(40,0) + , IN i_checkpoints_req numeric(40,0) + , IN i_checkpoint_write_time double precision + , IN i_skip_supper_role boolean + , OUT o_dn_snapshot_id int + , OUT o_dn_node_id int -- node id from pgxc_node + , OUT o_dn_node_name text -- node name from pgxc_node + , OUT o_dn_node_host text -- node host from pgxc_node + , OUT o_dn_last_startup_time timestamp with time zone -- last restart time of the node before snapshot starts + , OUT o_dn_number_of_files int + , OUT o_dn_physical_reads bigint + , OUT o_dn_physical_reads_delta bigint + , OUT o_dn_physical_writes bigint + , OUT o_dn_physical_writes_delta bigint + , OUT o_dn_read_time bigint + , OUT o_dn_read_time_delta bigint + , OUT o_dn_write_time bigint + , OUT o_dn_write_time_delta bigint + , OUT o_dn_db_size bigint + , OUT o_dn_active_sql_count int + , OUT o_dn_wait_sql_count int + , OUT o_dn_session_count int + , OUT o_dn_xact_commit bigint + , OUT o_dn_xact_commit_delta bigint + , OUT o_dn_xact_rollback bigint + , OUT o_dn_xact_rollback_delta bigint + , OUT o_dn_checkpoints_timed numeric(40,0) + , OUT o_dn_checkpoints_timed_delta numeric(40,0) + , OUT o_dn_checkpoints_req numeric(40,0) + , OUT o_dn_checkpoints_req_delta numeric(40,0) + , OUT o_dn_checkpoint_write_time double precision + , OUT o_dn_checkpoint_write_time_delta double precision + , OUT o_dn_physical_memory bigint + , OUT o_dn_db_memory_usage bigint + , OUT o_dn_shared_buffer_size bigint + , OUT o_dn_session_memory_total_size bigint + , OUT o_dn_session_memory_used_size bigint + , OUT o_dn_blocks_read bigint + , OUT o_dn_blocks_read_delta bigint + , OUT o_dn_blocks_hit bigint + , OUT o_dn_blocks_hit_delta bigint + , OUT o_dn_work_memory_size bigint + , OUT o_dn_sorts_in_memory bigint + , OUT o_dn_sorts_in_memory_delta bigint + , OUT o_dn_sorts_in_disk bigint + , OUT o_dn_sorts_in_disk_delta bigint + , OUT o_dn_busy_time numeric + , OUT o_dn_busy_time_delta numeric + , OUT o_dn_idle_time numeric + , OUT o_dn_idle_time_delta numeric + , OUT o_dn_iowait_time numeric + , OUT o_dn_iowait_time_delta numeric + , OUT o_dn_db_cpu_time numeric + , OUT o_dn_db_cpu_time_delta numeric + ) +AS +$$ +DECLARE l_physical_reads_delta bigint; + l_physical_writes_delta bigint; + l_read_time_delta bigint; + l_write_time_delta bigint; + l_blocks_read_delta bigint; + l_blocks_hit_delta bigint; + l_sorts_in_memory_delta bigint; + l_sorts_in_disk_delta bigint; + l_busy_time_delta numeric; + l_idle_time_delta numeric; + l_iowait_time_delta numeric; + l_db_cpu_time_delta numeric; + l_xact_commit_delta bigint; + l_xact_rollback_delta bigint; + l_checkpoints_timed_delta numeric(40,0); + l_checkpoints_req_delta numeric(40,0); + l_checkpoint_write_time_delta double precision; + i_skip_supper_role_delta boolean; +BEGIN + + l_physical_reads_delta := i_physical_reads; + l_physical_writes_delta := i_physical_writes; + l_read_time_delta := i_read_time; + l_write_time_delta := i_write_time; + l_xact_commit_delta := i_xact_commit; + l_xact_rollback_delta := i_xact_rollback; + l_checkpoints_timed_delta := i_checkpoints_timed; + l_checkpoints_req_delta := i_checkpoints_req; + l_checkpoint_write_time_delta := i_checkpoint_write_time; + i_skip_supper_role_delta := i_skip_supper_role; + l_blocks_read_delta := i_blocks_read; + l_blocks_hit_delta := i_blocks_hit; + + l_busy_time_delta := i_busy_time; + l_idle_time_delta := i_idle_time; + l_iowait_time_delta := i_iowait_time; + l_db_cpu_time_delta := i_db_cpu_time; + + -- Currently, the below statistics are calculated from pv_session_stat (which is not accurate) since pv_db_stat is not available + -- These statistics are cumulative from instance startup. + l_sorts_in_memory_delta := i_sorts_in_memory; + l_sorts_in_disk_delta := i_sorts_in_disk; + + o_dn_snapshot_id := i_snapshot_id; + o_dn_node_id := i_node_id; + o_dn_node_name := i_node_name; + o_dn_node_host := i_node_host_name; + o_dn_last_startup_time := i_node_startup_time; + o_dn_number_of_files := i_number_of_files; + o_dn_physical_reads := i_physical_reads; + o_dn_physical_reads_delta := l_physical_reads_delta; + o_dn_physical_writes := i_physical_writes; + o_dn_physical_writes_delta := l_physical_writes_delta; + o_dn_read_time := i_read_time; + o_dn_read_time_delta := l_read_time_delta; + o_dn_write_time := i_write_time; + o_dn_write_time_delta := l_write_time_delta; + o_dn_db_size := i_db_size; + o_dn_active_sql_count := i_active_sql_count; + o_dn_wait_sql_count := i_wait_sql_count; + o_dn_session_count := i_session_count; + o_dn_xact_commit := i_xact_commit; + o_dn_xact_commit_delta := l_xact_commit_delta; + o_dn_xact_rollback := i_xact_rollback; + o_dn_xact_rollback_delta := l_xact_rollback_delta; + o_dn_checkpoints_timed := i_checkpoints_timed; + o_dn_checkpoints_timed_delta := l_checkpoints_timed_delta; + o_dn_checkpoints_req := i_checkpoints_req; + o_dn_checkpoints_req_delta := l_checkpoints_req_delta; + o_dn_checkpoint_write_time := i_checkpoint_write_time; + o_dn_checkpoint_write_time_delta := l_checkpoint_write_time_delta; + o_dn_physical_memory := i_physical_memory; + o_dn_db_memory_usage := i_db_memory_usage; + o_dn_shared_buffer_size := i_shared_buffer_size; + o_dn_session_memory_total_size := i_session_memory_total_size; + o_dn_session_memory_used_size := i_session_memory_used_size; + o_dn_blocks_read := i_blocks_read; + o_dn_blocks_read_delta := l_blocks_read_delta; + o_dn_blocks_hit := i_blocks_hit; + o_dn_blocks_hit_delta := l_blocks_hit_delta; + o_dn_work_memory_size := i_work_memory_size; + o_dn_sorts_in_memory := i_sorts_in_memory; + o_dn_sorts_in_memory_delta := l_sorts_in_memory_delta; + o_dn_sorts_in_disk := i_sorts_in_disk; + o_dn_sorts_in_disk_delta := l_sorts_in_disk_delta; + o_dn_busy_time := i_busy_time; + o_dn_busy_time_delta := l_busy_time_delta; + o_dn_idle_time := i_idle_time; + o_dn_idle_time_delta := l_idle_time_delta; + o_dn_iowait_time := i_iowait_time; + o_dn_iowait_time_delta := l_iowait_time_delta; + o_dn_db_cpu_time := i_db_cpu_time; + o_dn_db_cpu_time_delta := l_db_cpu_time_delta; + +END; +$$ +LANGUAGE plpgsql; + +/* + This function is used to find the "delta values" of coordinator statistics and + create one or more entries (in multiple data nodes) into PMK table (pmk.pmk_snapshot_coordinator_stat). +*/ + + +CREATE OR REPLACE FUNCTION pmk.load_coordinator_stat + ( IN i_snapshot_id int + , IN i_last_snapshot_id int + , IN i_pmk_last_collect_start_time timestamp with time zone + , IN i_node_id int + , IN i_node_name text + , IN i_number_of_files int + , IN i_physical_reads bigint + , IN i_physical_writes bigint + , IN i_read_time bigint + , IN i_write_time bigint + , IN i_physical_memory bigint + , IN i_shared_buffer_size bigint + , IN i_session_memory_total_size bigint + , IN i_session_memory_used_size bigint + , IN i_blocks_read bigint + , IN i_blocks_hit bigint + , IN i_db_size bigint + , IN i_work_memory_size bigint + , IN i_sorts_in_memory bigint + , IN i_sorts_in_disk bigint + , IN i_active_sql_count int + , IN i_wait_sql_count int + , IN i_session_count int + , IN i_busy_time numeric + , IN i_idle_time numeric + , IN i_iowait_time numeric + , IN i_db_cpu_time numeric + , IN i_db_memory_usage bigint + , IN i_node_startup_time timestamp with time zone + , IN i_node_host_name text + , IN i_xact_commit bigint + , IN i_xact_rollback bigint + , IN i_checkpoints_timed numeric(40,0) + , IN i_checkpoints_req numeric(40,0) + , IN i_checkpoint_write_time double precision + , IN i_skip_supper_role boolean + , OUT o_cn_snapshot_id int + , OUT o_cn_node_id int + , OUT o_cn_node_name text + , OUT o_cn_node_host_name text + , OUT o_cn_node_startup_time timestamp with time zone + , OUT o_cn_number_of_files int + , OUT o_cn_physical_reads bigint + , OUT o_cn_physical_reads_delta bigint + , OUT o_cn_physical_writes bigint + , OUT o_cn_physical_writes_delta bigint + , OUT o_cn_read_time bigint + , OUT o_cn_read_time_delta bigint + , OUT o_cn_write_time bigint + , OUT o_cn_write_time_delta bigint + , OUT o_cn_db_size bigint + , OUT o_cn_active_sql_count int + , OUT o_cn_wait_sql_count int + , OUT o_cn_session_count int + , OUT o_cn_xact_commit bigint + , OUT o_cn_xact_commit_delta bigint + , OUT o_cn_xact_rollback bigint + , OUT o_cn_xact_rollback_delta bigint + , OUT o_cn_checkpoints_timed numeric(40,0) + , OUT o_cn_checkpoints_timed_delta numeric(40,0) + , OUT o_cn_checkpoints_req numeric(40,0) + , OUT o_cn_checkpoints_req_delta numeric(40,0) + , OUT o_cn_checkpoint_write_time double precision + , OUT o_cn_checkpoint_write_time_delta double precision + , OUT o_cn_physical_memory bigint + , OUT o_cn_db_memory_usage bigint + , OUT o_cn_shared_buffer_size bigint + , OUT o_cn_session_memory_total_size bigint + , OUT o_cn_session_memory_used_size bigint + , OUT o_cn_blocks_read bigint + , OUT o_cn_blocks_read_delta bigint + , OUT o_cn_blocks_hit bigint + , OUT o_cn_blocks_hit_delta bigint + , OUT o_cn_work_memory_size bigint + , OUT o_cn_sorts_in_memory bigint + , OUT o_cn_sorts_in_memory_delta bigint + , OUT o_cn_sorts_in_disk bigint + , OUT o_cn_sorts_in_disk_delta bigint + , OUT o_cn_busy_time numeric + , OUT o_cn_busy_time_delta numeric + , OUT o_cn_idle_time numeric + , OUT o_cn_idle_time_delta numeric + , OUT o_cn_iowait_time numeric + , OUT o_cn_iowait_time_delta numeric + , OUT o_cn_db_cpu_time numeric + , OUT o_cn_db_cpu_time_delta numeric + ) +AS +$$ +DECLARE l_physical_reads_delta bigint; + l_physical_writes_delta bigint; + l_read_time_delta bigint; + l_write_time_delta bigint; + l_blocks_read_delta bigint; + l_blocks_hit_delta bigint; + l_sorts_in_memory_delta bigint; + l_sorts_in_disk_delta bigint; + l_busy_time_delta numeric; + l_idle_time_delta numeric; + l_iowait_time_delta numeric; + l_db_cpu_time_delta numeric; + l_xact_commit_delta bigint; + l_xact_rollback_delta bigint; + l_checkpoints_timed_delta numeric(40,0); + l_checkpoints_req_delta numeric(40,0); + l_checkpoint_write_time_delta double precision; + i_skip_supper_role_delta boolean; +BEGIN + + l_physical_reads_delta := i_physical_reads; + l_physical_writes_delta := i_physical_writes; + l_read_time_delta := i_read_time; + l_write_time_delta := i_write_time; + l_xact_commit_delta := i_xact_commit; + l_xact_rollback_delta := i_xact_rollback; + l_checkpoints_timed_delta := i_checkpoints_timed; + l_checkpoints_req_delta := i_checkpoints_req; + l_checkpoint_write_time_delta := i_checkpoint_write_time; + i_skip_supper_role_delta := i_skip_supper_role; + l_blocks_read_delta := i_blocks_read; + l_blocks_hit_delta := i_blocks_hit; + + l_busy_time_delta := i_busy_time; + l_idle_time_delta := i_idle_time; + l_iowait_time_delta := i_iowait_time; + l_db_cpu_time_delta := i_db_cpu_time; + + -- Currently, the below statistics are calculated from pv_session_stat (which is not accurate) since pv_db_stat is not available + -- These statistics are cumulative from instance startup. + l_sorts_in_memory_delta := i_sorts_in_memory; + l_sorts_in_disk_delta := i_sorts_in_disk; + + o_cn_snapshot_id := i_snapshot_id; + o_cn_node_id := i_node_id; + o_cn_node_name := i_node_name; + o_cn_node_host_name := i_node_host_name; + o_cn_node_startup_time := i_node_startup_time; + o_cn_number_of_files := i_number_of_files; + o_cn_physical_reads := i_physical_reads; + o_cn_physical_reads_delta := l_physical_reads_delta; + o_cn_physical_writes := i_physical_writes; + o_cn_physical_writes_delta := l_physical_writes_delta; + o_cn_read_time := i_read_time; + o_cn_read_time_delta := l_read_time_delta; + o_cn_write_time := i_write_time; + o_cn_write_time_delta := l_write_time_delta; + o_cn_db_size := i_db_size; + o_cn_active_sql_count := i_active_sql_count; + o_cn_wait_sql_count := i_wait_sql_count; + o_cn_session_count := i_session_count; + o_cn_xact_commit := i_xact_commit; + o_cn_xact_commit_delta := l_xact_commit_delta; + o_cn_xact_rollback := i_xact_rollback; + o_cn_xact_rollback_delta := l_xact_rollback_delta; + o_cn_checkpoints_timed := i_checkpoints_timed; + o_cn_checkpoints_timed_delta := l_checkpoints_timed_delta; + o_cn_checkpoints_req := i_checkpoints_req; + o_cn_checkpoints_req_delta := l_checkpoints_req_delta; + o_cn_checkpoint_write_time := i_checkpoint_write_time; + o_cn_checkpoint_write_time_delta := l_checkpoint_write_time_delta; + o_cn_physical_memory := i_physical_memory; + o_cn_db_memory_usage := i_db_memory_usage; + o_cn_shared_buffer_size := i_shared_buffer_size; + o_cn_session_memory_total_size := i_session_memory_total_size; + o_cn_session_memory_used_size := i_session_memory_used_size; + o_cn_blocks_read := i_blocks_read; + o_cn_blocks_read_delta := l_blocks_read_delta; + o_cn_blocks_hit := i_blocks_hit; + o_cn_blocks_hit_delta := l_blocks_hit_delta; + o_cn_work_memory_size := i_work_memory_size; + o_cn_sorts_in_memory := i_sorts_in_memory; + o_cn_sorts_in_memory_delta := l_sorts_in_memory_delta; + o_cn_sorts_in_disk := i_sorts_in_disk; + o_cn_sorts_in_disk_delta := l_sorts_in_disk_delta; + o_cn_busy_time := i_busy_time; + o_cn_busy_time_delta := l_busy_time_delta; + o_cn_idle_time := i_idle_time; + o_cn_idle_time_delta := l_idle_time_delta; + o_cn_iowait_time := i_iowait_time; + o_cn_iowait_time_delta := l_iowait_time_delta; + o_cn_db_cpu_time := i_db_cpu_time; + o_cn_db_cpu_time_delta := l_db_cpu_time_delta; + +END; +$$ +LANGUAGE plpgsql; + +/* + This function is used to find the performance statistics of each single node (datanode or coordinator). + After we get performance statistics of each single node, then we will insert into PMK tables (pmk.pmk_snapshot_datanode_stat for datanode and pmk.pmk_snapshot_coordinator_stat for coordinator). +*/ +CREATE OR REPLACE FUNCTION pmk.load_node_stat + ( IN i_pmk_curr_collect_start_time TIMESTAMP WITH TIME ZONE + , IN i_pmk_last_collect_start_time TIMESTAMP WITH TIME ZONE + , IN i_last_snapshot_id INT + , IN i_node_name TEXT + , IN i_node_type char(1) + , IN i_node_id INT + , IN i_skip_supper_role boolean + ) +RETURNS TABLE +( + snapshot_id int +, node_id int +, node_name text +, node_host text +, last_startup_time timestamp with time zone +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed numeric(40,0) +, checkpoints_timed_delta numeric(40,0) +, checkpoints_req numeric(40,0) +, checkpoints_req_delta numeric(40,0) +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +) +AS +$$ +DECLARE l_snapshot_id INT; + l_query_str TEXT; + l_node_stat_cur RECORD; +BEGIN + + IF i_last_snapshot_id IS NULL + OR i_last_snapshot_id = 2147483647 + THEN + l_snapshot_id := 1; + ELSE + l_snapshot_id := i_last_snapshot_id + 1; + END IF; + + FOR l_node_stat_cur IN SELECT * FROM pmk.find_node_stat(i_skip_supper_role) + LOOP + IF i_node_type = 'D' + THEN + RETURN QUERY + (SELECT * FROM pmk.load_datanode_stat ( l_snapshot_id + , i_last_snapshot_id + , i_pmk_last_collect_start_time + , i_node_id + , i_node_name + , l_node_stat_cur.o_number_of_files_1 + , l_node_stat_cur.o_physical_reads_1 + , l_node_stat_cur.o_physical_writes_1 + , l_node_stat_cur.o_read_time_1 + , l_node_stat_cur.o_write_time_1 + , l_node_stat_cur.o_physical_memory_1 + , l_node_stat_cur.o_shared_buffer_size_1 + , l_node_stat_cur.o_session_memory_total_size_1 + , l_node_stat_cur.o_session_memory_used_size_1 + , l_node_stat_cur.o_blocks_read_1 + , l_node_stat_cur.o_blocks_hit_1 + , l_node_stat_cur.o_db_size_1 + , l_node_stat_cur.o_work_memory_size_1 + , l_node_stat_cur.o_sorts_in_memory_1 + , l_node_stat_cur.o_sorts_in_disk_1 + , l_node_stat_cur.o_active_sql_count_1 + , l_node_stat_cur.o_wait_sql_count_1 + , l_node_stat_cur.o_session_count_1 + , l_node_stat_cur.o_busy_time_1 + , l_node_stat_cur.o_idle_time_1 + , l_node_stat_cur.o_iowait_time_1 + , l_node_stat_cur.o_db_cpu_time_1 + , l_node_stat_cur.o_db_memory_usage_1 + , l_node_stat_cur.o_node_startup_time_1 + , l_node_stat_cur.o_node_host_name_1 + , l_node_stat_cur.o_xact_commit_1 + , l_node_stat_cur.o_xact_rollback_1 + , l_node_stat_cur.o_checkpoints_timed_1 + , l_node_stat_cur.o_checkpoints_req_1 + , l_node_stat_cur.o_checkpoint_write_time_1 + , i_skip_supper_role + )); + ELSE + RETURN QUERY + (SELECT * FROM pmk.load_coordinator_stat ( l_snapshot_id + , i_last_snapshot_id + , i_pmk_last_collect_start_time + , i_node_id + , i_node_name + , l_node_stat_cur.o_number_of_files_1 + , l_node_stat_cur.o_physical_reads_1 + , l_node_stat_cur.o_physical_writes_1 + , l_node_stat_cur.o_read_time_1 + , l_node_stat_cur.o_write_time_1 + , l_node_stat_cur.o_physical_memory_1 + , l_node_stat_cur.o_shared_buffer_size_1 + , l_node_stat_cur.o_session_memory_total_size_1 + , l_node_stat_cur.o_session_memory_used_size_1 + , l_node_stat_cur.o_blocks_read_1 + , l_node_stat_cur.o_blocks_hit_1 + , l_node_stat_cur.o_db_size_1 + , l_node_stat_cur.o_work_memory_size_1 + , l_node_stat_cur.o_sorts_in_memory_1 + , l_node_stat_cur.o_sorts_in_disk_1 + , l_node_stat_cur.o_active_sql_count_1 + , l_node_stat_cur.o_wait_sql_count_1 + , l_node_stat_cur.o_session_count_1 + , l_node_stat_cur.o_busy_time_1 + , l_node_stat_cur.o_idle_time_1 + , l_node_stat_cur.o_iowait_time_1 + , l_node_stat_cur.o_db_cpu_time_1 + , l_node_stat_cur.o_db_memory_usage_1 + , l_node_stat_cur.o_node_startup_time_1 + , l_node_stat_cur.o_node_host_name_1 + , l_node_stat_cur.o_xact_commit_1 + , l_node_stat_cur.o_xact_rollback_1 + , l_node_stat_cur.o_checkpoints_timed_1 + , l_node_stat_cur.o_checkpoints_req_1 + , l_node_stat_cur.o_checkpoint_write_time_1 + , i_skip_supper_role + )); + END IF; + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + +-- This function is used to delete the statistics snapshots based on "collection count" config param + +CREATE OR REPLACE FUNCTION pmk.delete_expired_snapshots ( ) +RETURNS void +AS +$$ +DECLARE l_collection_count INT; + l_retention_snapshot_id INT; +BEGIN + + -- Deleting node statistics based on "collection count" config param + SELECT config_value + INTO l_collection_count + FROM pmk.pmk_configuration + WHERE config_param_name = 'Collection Count'; + + IF l_collection_count > -1 + THEN + IF l_collection_count = 0 + THEN + l_collection_count := 1; + END IF; + + SELECT MIN(snapshot_id) + INTO l_retention_snapshot_id + FROM ( SELECT snapshot_id + FROM pmk.pmk_snapshot + ORDER BY snapshot_id DESC + LIMIT l_collection_count ); + + DELETE FROM pmk.pmk_snapshot_datanode_stat + WHERE snapshot_id < l_retention_snapshot_id; + + DELETE FROM pmk.pmk_snapshot_coordinator_stat + WHERE snapshot_id < l_retention_snapshot_id; + + DELETE FROM pmk.pmk_snapshot + WHERE snapshot_id < l_retention_snapshot_id; + + END IF; + +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION pmk.get_meta_data + ( OUT l_pmk_curr_collect_start_time timestamp with time zone + , OUT l_pmk_last_collect_start_time timestamp with time zone + , OUT l_last_snapshot_id int + ) +AS +$$ +DECLARE l_error_message TEXT; +BEGIN + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + SELECT last_snapshot_id, last_snapshot_collect_time + INTO l_last_snapshot_id, l_pmk_last_collect_start_time + FROM pmk.pmk_meta_data; + + l_pmk_curr_collect_start_time := date_trunc('second', current_timestamp); + + IF l_pmk_curr_collect_start_time < l_pmk_last_collect_start_time + THEN + l_error_message := 'ERROR:: There is a change in system time of Gauss MPPDB host. PMK does not support the scenarios related to system time change.'; + raise notice '%',l_error_message; + RETURN; + ELSIF l_pmk_curr_collect_start_time = l_pmk_last_collect_start_time + THEN + l_error_message := 'ERROR:: Multiple statistics-collections can not be done within a second.'; + raise notice '%',l_error_message; + RETURN; + END IF; +END; +$$ +LANGUAGE plpgsql; + +/* +*/ +CREATE OR REPLACE FUNCTION pmk.get_pgxc_node + ( OUT o_node_name TEXT + , OUT o_node_type CHAR(1) + , OUT o_node_id INT + ) +RETURNS SETOF RECORD +AS +$$ +DECLARE l_error_message TEXT; + v_rec RECORD; +BEGIN + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + FOR v_rec IN (SELECT node_name, node_type, node_id FROM pgxc_node) LOOP + o_node_name := v_rec.node_name; + o_node_type := v_rec.node_type; + o_node_id := v_rec.node_id; + RETURN NEXT; + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + +/* + If start time is greater than the last stat-collect time, it throws an error. + If end time is null, it assign the last statistics collection time to the end time. + If both start time and end time are null, it assign the last statistics collection time to both the parameters + If start time is null and end time is not null, it throws an error. + If start time is greater than end time, it throws an error. +*/ +CREATE OR REPLACE FUNCTION pmk.check_start_end_dates + ( INOUT io_start_pmk_time timestamp with time zone + , INOUT io_end_pmk_time timestamp with time zone + , OUT o_error_message text + ) +AS +$$ +DECLARE l_last_collect_time timestamp with time zone; +BEGIN + + SELECT last_snapshot_collect_time + INTO l_last_collect_time + FROM pmk.pmk_meta_data; + + IF io_start_pmk_time > l_last_collect_time + THEN + o_error_message := 'ERROR:: The from-time provided is greater than the last statistics-collection time(' || l_last_collect_time || '). Invalid value(s) provided for the input time-range'; + RETURN; + END IF; + + IF io_end_pmk_time IS NULL + THEN + io_end_pmk_time := l_last_collect_time; + + IF io_start_pmk_time IS NULL + THEN + io_start_pmk_time := io_end_pmk_time; + END IF; + ELSE + IF (io_start_pmk_time IS NULL) OR + (io_start_pmk_time > io_end_pmk_time) + THEN + o_error_message := 'ERROR:: Invalid value(s) provided for the input time-range'; + RETURN; + END IF; + END IF; + +END; +$$ +LANGUAGE plpgsql; + +-- Host CPU statistics at cluster level + +CREATE OR REPLACE FUNCTION pmk.get_cluster_host_cpu_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_avg_cpu_total_time numeric(21, 3) + , OUT o_avg_cpu_busy_time numeric(21, 3) + , OUT o_avg_cpu_iowait_time numeric(21, 3) + , OUT o_cpu_busy_perc numeric(5, 2) + , OUT o_cpu_io_wait_perc numeric(5, 2) + , OUT o_min_cpu_busy_perc numeric(5, 2) + , OUT o_max_cpu_busy_perc numeric(5, 2) + , OUT o_min_cpu_iowait_perc numeric(5, 2) + , OUT o_max_cpu_iowait_perc numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster host CPU statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , os_cpu_stat AS + ( SELECT s.pmk_curr_collect_start_time + , node_host + , node_name + , (busy_time_delta * 10) AS cpu_busy_time + , (idle_time_delta * 10) AS cpu_idle_time + , (iowait_time_delta * 10) AS cpu_iowait_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + UNION ALL + SELECT s.pmk_curr_collect_start_time + , node_host + , node_name + , (busy_time_delta * 10) AS cpu_busy_time + , (idle_time_delta * 10) AS cpu_idle_time + , (iowait_time_delta * 10) AS cpu_iowait_time + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + , os_cpu_stat1 AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_host + , cpu_busy_time + , cpu_idle_time + , cpu_iowait_time + , (cpu_busy_time+cpu_idle_time+cpu_iowait_time)::numeric AS cpu_total_time + FROM ( SELECT pmk_curr_collect_start_time + , node_host + , cpu_busy_time + , cpu_idle_time + , cpu_iowait_time + , rank() OVER (PARTITION BY pmk_curr_collect_start_time, node_host ORDER BY cpu_busy_time DESC, node_name) AS node_cpu_busy_order + FROM os_cpu_stat + ) + WHERE node_cpu_busy_order = 1 + ) + SELECT hcs.stat_collect_time + , AVG(hcs.cpu_total_time)::numeric(21, 3) AS avg_cpu_total_time + , AVG(hcs.cpu_busy_time)::numeric(21, 3) AS avg_cpu_busy_time + , AVG(hcs.cpu_iowait_time)::numeric(21, 3) AS avg_cpu_iowait_time + , ( (SUM(cpu_busy_time) * 100.0) / NULLIF(SUM(cpu_total_time), 0) )::numeric(5, 2) AS cpu_busy_perc + , ( (SUM(cpu_iowait_time) * 100.0) / NULLIF(SUM(cpu_total_time), 0) )::numeric(5, 2) AS cpu_io_wait_perc + , MIN(hcs.cpu_busy_time_perc)::numeric(5, 2) AS min_cpu_busy_perc + , MAX(hcs.cpu_busy_time_perc)::numeric(5, 2) AS max_cpu_busy_perc + , MIN(hcs.cpu_iowait_time_perc)::numeric(5, 2) AS min_cpu_iowait_perc + , MAX(hcs.cpu_iowait_time_perc)::numeric(5, 2) AS max_cpu_iowait_perc + FROM ( SELECT node_host + , stat_collect_time + , cpu_total_time + , cpu_busy_time + , cpu_iowait_time + , ( (cpu_busy_time * 100.0) / NULLIF(cpu_total_time, 0) )::numeric(5, 2) AS cpu_busy_time_perc + , ( (cpu_iowait_time * 100.0) / NULLIF(cpu_total_time, 0) )::numeric(5, 2) AS cpu_iowait_time_perc + FROM os_cpu_stat1 ) hcs + GROUP BY hcs.stat_collect_time + ORDER BY hcs.stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION pmk.get_cluster_mppdb_cpu_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_avg_mppdb_cpu_time numeric(21, 3) + , OUT o_avg_host_cpu_busy_time numeric(21, 3) + , OUT o_avg_host_cpu_total_time numeric(21, 3) + , OUT o_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + , OUT o_min_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_max_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_min_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + , OUT o_max_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster MPPDB CPU statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , cpu_stat AS + ( SELECT s.snapshot_id + , s.pmk_curr_collect_start_time + , dns.node_name + , dns.node_host + , (dns.busy_time_delta * 10) AS host_cpu_busy_time + , (dns.idle_time_delta * 10) AS host_cpu_idle_time + , (dns.iowait_time_delta * 10) AS host_cpu_iowait_time + , (dns.db_cpu_time_delta * 10) AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + UNION ALL + SELECT s.snapshot_id + , s.pmk_curr_collect_start_time + , dns.node_name + , dns.node_host + , (dns.busy_time_delta * 10) AS host_cpu_busy_time + , (dns.idle_time_delta * 10) AS host_cpu_idle_time + , (dns.iowait_time_delta * 10) AS host_cpu_iowait_time + , (dns.db_cpu_time_delta * 10) AS mppdb_cpu_time + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + , host_cpu_stat AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_host + , host_cpu_busy_time + , host_cpu_idle_time + , host_cpu_iowait_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::numeric AS host_cpu_total_time + FROM ( SELECT pmk_curr_collect_start_time + , node_host + , host_cpu_busy_time + , host_cpu_idle_time + , host_cpu_iowait_time + , rank() OVER (PARTITION BY snapshot_id, node_host + ORDER BY host_cpu_busy_time DESC, node_name) AS node_cpu_busy_order + FROM cpu_stat + ) + WHERE node_cpu_busy_order = 1 + ) + , host_cpu_stat_summary AS + ( SELECT stat_collect_time + , AVG(host_cpu_busy_time)::numeric(21, 3) AS avg_host_cpu_busy_time + , AVG(host_cpu_total_time)::numeric(21, 3) AS avg_host_cpu_total_time + , SUM(host_cpu_busy_time)::numeric(21, 3) AS tot_host_cpu_busy_time + , SUM(host_cpu_total_time)::numeric(21, 3) AS tot_host_cpu_total_time + FROM host_cpu_stat + GROUP BY stat_collect_time + ) + , mppdb_cpu_stat0 AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_name + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::numeric AS host_cpu_total_time + FROM cpu_stat + ) + , mppdb_cpu_stat AS + ( SELECT stat_collect_time + , node_name + , mppdb_cpu_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_total_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat0 + ) + , mppdb_cpu_stat_summary AS + ( SELECT stat_collect_time + , AVG(mppdb_cpu_time)::numeric(21, 3) AS avg_mppdb_cpu_time + , SUM(mppdb_cpu_time)::numeric(21, 3) AS tot_mppdb_cpu_time + , MIN(mppdb_cpu_time_perc_wrt_busy_time)::numeric(5, 2) AS min_mppdb_cpu_time_perc_wrt_busy_time + , MAX(mppdb_cpu_time_perc_wrt_busy_time)::numeric(5, 2) AS max_mppdb_cpu_time_perc_wrt_busy_time + , MIN(mppdb_cpu_time_perc_wrt_total_time)::numeric(5, 2) AS min_mppdb_cpu_time_perc_wrt_total_time + , MAX(mppdb_cpu_time_perc_wrt_total_time)::numeric(5, 2) AS max_mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat + GROUP BY stat_collect_time + ) + SELECT mcs.stat_collect_time + , mcs.avg_mppdb_cpu_time + , hcs.avg_host_cpu_busy_time + , hcs.avg_host_cpu_total_time + , CASE WHEN mcs.tot_mppdb_cpu_time < hcs.tot_host_cpu_busy_time + THEN ( (mcs.tot_mppdb_cpu_time * 100.0) / NULLIF(hcs.tot_host_cpu_busy_time, 0) )::numeric(5, 2) + ELSE 100.00 + END AS mppdb_cpu_time_perc_wrt_busy_time + , CASE WHEN mcs.tot_mppdb_cpu_time < hcs.tot_host_cpu_total_time + THEN ( (mcs.tot_mppdb_cpu_time * 100.0) / NULLIF(hcs.tot_host_cpu_total_time, 0) )::numeric(5, 2) + ELSE 100.00 + END AS mppdb_cpu_time_perc_wrt_total_time + , mcs.min_mppdb_cpu_time_perc_wrt_busy_time + , mcs.max_mppdb_cpu_time_perc_wrt_busy_time + , mcs.min_mppdb_cpu_time_perc_wrt_total_time + , mcs.max_mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat_summary mcs + , host_cpu_stat_summary hcs + WHERE mcs.stat_collect_time = hcs.stat_collect_time + ORDER BY mcs.stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Shared buffer statistics at cluster level + +CREATE OR REPLACE FUNCTION pmk.get_cluster_shared_buffer_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_min_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_max_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_total_blocks_read bigint + , OUT o_total_blocks_hit bigint + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster shared buffer statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , ( (total_blocks_hit * 100.0) / NULLIF(total_blocks_read+total_blocks_hit, 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + , min_shared_buffer_hit_ratio + , max_shared_buffer_hit_ratio + , total_blocks_read + , total_blocks_hit + FROM ( SELECT pmk_curr_collect_start_time + , SUM(blocks_read)::bigint AS total_blocks_read + , SUM(blocks_hit)::bigint AS total_blocks_hit + , MIN(shared_buffer_hit_ratio)::numeric(5, 2) AS min_shared_buffer_hit_ratio + , MAX(shared_buffer_hit_ratio)::numeric(5, 2) AS max_shared_buffer_hit_ratio + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , ( (blocks_hit_delta * 100.0) / NULLIF((blocks_read_delta + blocks_hit_delta), 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + UNION ALL + SELECT s.pmk_curr_collect_start_time + , node_name + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , ( (blocks_hit_delta * 100.0) / NULLIF((blocks_read_delta + blocks_hit_delta), 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Sort statistics at cluster level + +CREATE OR REPLACE FUNCTION pmk.get_cluster_memory_sort_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_memory_sort_ratio numeric(5, 2) + , OUT o_min_memory_sort_ratio numeric(5, 2) + , OUT o_max_memory_sort_ratio numeric(5, 2) + , OUT o_total_memory_sorts bigint + , OUT o_total_disk_sorts bigint + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster memory sort statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , ( (total_memory_sorts * 100.0) / NULLIF(total_disk_sorts+total_memory_sorts, 0) )::numeric(5, 2) AS memory_sort_ratio + , min_memory_sort_ratio + , max_memory_sort_ratio + , total_memory_sorts + , total_disk_sorts + FROM ( SELECT pmk_curr_collect_start_time + , SUM(memory_sorts)::bigint AS total_memory_sorts + , SUM(disk_sorts)::bigint AS total_disk_sorts + , MIN(memory_sort_ratio)::numeric(5, 2) AS min_memory_sort_ratio + , MAX(memory_sort_ratio)::numeric(5, 2) AS max_memory_sort_ratio + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , sorts_in_memory_delta AS memory_sorts + , sorts_in_disk_delta AS disk_sorts + , ( (sorts_in_memory_delta * 100.0) / NULLIF((sorts_in_disk_delta + sorts_in_memory_delta), 0) )::numeric(5, 2) AS memory_sort_ratio + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + UNION ALL + SELECT s.pmk_curr_collect_start_time + , node_name + , sorts_in_memory_delta AS memory_sorts + , sorts_in_disk_delta AS disk_sorts + , ( (sorts_in_memory_delta * 100.0) / NULLIF((sorts_in_disk_delta + sorts_in_memory_delta), 0) )::numeric(5, 2) AS memory_sort_ratio + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- I/O statistics at cluster level +CREATE OR REPLACE FUNCTION pmk.get_cluster_io_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_avg_read_per_sec numeric(20,2) + , OUT o_avg_read_time numeric(20,3) + , OUT o_avg_write_per_sec numeric(20,2) + , OUT o_avg_write_time numeric(20,3) + , OUT o_min_node_read_per_sec numeric(20,2) + , OUT o_max_node_read_per_sec numeric(20,2) + , OUT o_min_node_read_time numeric(20,3) + , OUT o_max_node_read_time numeric(20,3) + , OUT o_min_node_write_per_sec numeric(20,2) + , OUT o_max_node_write_per_sec numeric(20,2) + , OUT o_min_node_write_time numeric(20,3) + , OUT o_max_node_write_time numeric(20,3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster I/O statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + , min_node_read_per_sec + , max_node_read_per_sec + , min_node_read_time + , max_node_read_time + , min_node_write_per_sec + , max_node_write_per_sec + , min_node_write_time + , max_node_write_time + FROM ( SELECT pmk_curr_collect_start_time + , SUM(number_of_files)::int AS number_of_files + , SUM(physical_reads_delta)::bigint AS physical_reads + , SUM(physical_writes_delta)::bigint AS physical_writes + , SUM(read_time_delta)::bigint AS read_time + , SUM(write_time_delta)::bigint AS write_time + , MIN(node_read_per_sec) AS min_node_read_per_sec + , MAX(node_read_per_sec) AS max_node_read_per_sec + , MIN(node_read_time) AS min_node_read_time + , MAX(node_read_time) AS max_node_read_time + , MIN(node_write_per_sec) AS min_node_write_per_sec + , MAX(node_write_per_sec) AS max_node_write_per_sec + , MIN(node_write_time) AS min_node_write_time + , MAX(node_write_time) AS max_node_write_time + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , number_of_files + , physical_reads_delta + , physical_writes_delta + , read_time_delta + , write_time_delta + , ( physical_reads_delta * 1000000.0 / NULLIF(read_time_delta, 0) )::numeric(20,2) AS node_read_per_sec + , ( read_time_delta * 1.0 / NULLIF(physical_reads_delta, 0) )::numeric(20,3) AS node_read_time + , ( physical_writes_delta * 1000000.0 / NULLIF(write_time_delta, 0) )::numeric(20,2) AS node_write_per_sec + , ( write_time_delta * 1.0 / NULLIF(physical_writes_delta, 0) )::numeric(20,3) AS node_write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + UNION ALL + SELECT s.pmk_curr_collect_start_time + , node_name + , number_of_files + , physical_reads_delta + , physical_writes_delta + , read_time_delta + , write_time_delta + , ( physical_reads_delta * 1000000.0 / NULLIF(read_time_delta, 0) )::numeric(20,2) AS node_read_per_sec + , ( read_time_delta * 1.0 / NULLIF(physical_reads_delta, 0) )::numeric(20,3) AS node_read_time + , ( physical_writes_delta * 1000000.0 / NULLIF(write_time_delta, 0) )::numeric(20,2) AS node_write_per_sec + , ( write_time_delta * 1.0 / NULLIF(physical_writes_delta, 0) )::numeric(20,3) AS node_write_time + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Disk usage statistics at cluster level +CREATE OR REPLACE FUNCTION pmk.get_cluster_disk_usage_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , IN i_db_size text + , OUT o_stat_collect_time timestamp + , OUT o_tot_datanode_db_size text + , OUT o_max_datanode_db_size text + , OUT o_tot_physical_writes bigint + , OUT o_max_node_physical_writes bigint + , OUT o_max_node_write_per_sec numeric(20,2) + , OUT o_avg_write_per_sec numeric(20,2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +DECLARE l_db_size bigint; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster disk usage statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + IF i_db_size = '0' + THEN + SELECT SUM(pg_database_size(oid))::bigint + INTO l_db_size + FROM pg_database; + ELSE + SELECT SUM(i_db_size)::bigint + INTO l_db_size; + END IF; + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , disk_stat AS + ( + SELECT s.pmk_curr_collect_start_time + , db_size + , physical_writes_delta + , write_time_delta + , ( physical_writes_delta * 1000000.0 / NULLIF(write_time_delta, 0) )::numeric(20,2) AS node_write_per_sec + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , pg_size_pretty(tot_datanode_db_size) AS tot_datanode_db_size + , pg_size_pretty(max_datanode_db_size) AS max_datanode_db_size + , tot_physical_writes + , max_node_physical_writes + , max_node_write_per_sec + , ( tot_physical_writes * 1000000.0 / NULLIF(tot_write_time, 0) )::numeric(20,2) AS avg_write_per_sec + FROM ( SELECT pmk_curr_collect_start_time + , l_db_size::bigint AS tot_datanode_db_size + , MAX(db_size)::bigint AS max_datanode_db_size + , SUM(physical_writes_delta)::bigint AS tot_physical_writes + , SUM(write_time_delta)::bigint AS tot_write_time + , MAX(physical_writes_delta)::bigint AS max_node_physical_writes + , MAX(node_write_per_sec) AS max_node_write_per_sec + FROM disk_stat + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY pmk_curr_collect_start_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Active SQL count statistics at cluster level +CREATE OR REPLACE FUNCTION pmk.get_cluster_active_sql_count + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_tot_active_sql_count int + , OUT o_avg_active_sql_count numeric(9, 2) + , OUT o_min_active_sql_count int + , OUT o_max_active_sql_count int + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of active SQL count statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , tot_active_sql_count + , avg_active_sql_count + , min_active_sql_count + , max_active_sql_count + FROM ( SELECT s.pmk_curr_collect_start_time + , SUM(active_sql_count)::int AS tot_active_sql_count + , ROUND(AVG(active_sql_count), 2)::numeric(9, 2) AS avg_active_sql_count + , MIN(active_sql_count)::int AS min_active_sql_count + , MAX(active_sql_count)::int AS max_active_sql_count + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + GROUP BY s.pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Connected session count statistics at cluster level +CREATE OR REPLACE FUNCTION pmk.get_cluster_session_count + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_tot_session_count int + , OUT o_avg_session_count numeric(9, 2) + , OUT o_min_session_count int + , OUT o_max_session_count int + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of session count statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , tot_session_count + , avg_session_count + , min_session_count + , max_session_count + FROM ( SELECT s.pmk_curr_collect_start_time + , SUM(session_count)::int AS tot_session_count + , ROUND(AVG(session_count), 2)::numeric(9, 2) AS avg_session_count + , MIN(session_count)::int AS min_session_count + , MAX(session_count)::int AS max_session_count + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + GROUP BY s.pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- CPU statistics at node level +CREATE OR REPLACE FUNCTION pmk.get_node_cpu_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_mppdb_cpu_time bigint + , OUT o_host_cpu_busy_time bigint + , OUT o_host_cpu_total_time bigint + , OUT o_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + SELECT MAX(node_type) + INTO l_node_type + FROM pgxc_node + WHERE UPPER(node_name) = l_node_name + LIMIT 1; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name || '") provided during generation of node (MPPDB instance) CPU statistics ...'; + + raise notice '%',l_error_message; + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node (MPPDB instance) CPU statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::bigint AS host_cpu_total_time + , ( (LEAST(mppdb_cpu_time,host_cpu_busy_time) * 100.0) / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (LEAST(mppdb_cpu_time,host_total_cpu_time) * 100.0) / NULLIF((host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time), 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , ((busy_time_delta+idle_time_delta+iowait_time_delta)*10)::bigint AS host_total_cpu_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + UNION ALL + SELECT 'C'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , ((busy_time_delta+idle_time_delta+iowait_time_delta)*10)::bigint AS host_total_cpu_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + + ELSE + + IF l_node_type = 'D' + THEN + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::bigint AS host_cpu_total_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF((host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time), 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'C'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::bigint AS host_cpu_total_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF((host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time), 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + + END IF; -- end of l_node_type = 'D' + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql; + +-- Memory statistics at node level +CREATE OR REPLACE FUNCTION pmk.get_node_memory_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_physical_memory bigint + , OUT o_db_memory_usage bigint + , OUT o_shared_buffer_size bigint + , OUT o_blocks_read bigint + , OUT o_blocks_hit bigint + , OUT o_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_work_memory_size bigint + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_in_memory_sort_ratio numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + SELECT MAX(node_type) + INTO l_node_type + FROM pgxc_node + WHERE UPPER(node_name) = l_node_name + LIMIT 1; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name || '") provided during generation of node (MPPDB instance) memory statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node (MPPDB instance) memory statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) / NULLIF((sorts_in_disk + sorts_in_memory), 0) )::numeric(5, 2) AS in_memory_sort_ratio + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + UNION ALL + SELECT 'C'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + ELSE + IF l_node_type = 'D' + THEN + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) / NULLIF((sorts_in_disk + sorts_in_memory), 0) )::numeric(5, 2) AS in_memory_sort_ratio + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'C'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) / NULLIF((sorts_in_disk + sorts_in_memory), 0) )::numeric(5, 2) AS in_memory_sort_ratio + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + + END IF; -- end of l_node_type = 'D' + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql; + +-- I/O statistics at node level +CREATE OR REPLACE FUNCTION pmk.get_node_io_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_avg_read_per_sec numeric(20,2) + , OUT o_avg_read_time numeric(20,3) + , OUT o_avg_write_per_sec numeric(20,2) + , OUT o_avg_write_time numeric(20,3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + SELECT MAX(node_type) + INTO l_node_type + FROM pgxc_node + WHERE UPPER(node_name) = l_node_name + LIMIT 1; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name || '") provided during generation of node (MPPDB instance) I/O statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node (MPPDB instance) I/O statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + UNION ALL + SELECT 'C'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + + ELSE + + IF l_node_type = 'D' + THEN + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + FROM ( SELECT node_name + , node_host + , pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'C'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + FROM ( SELECT node_name + , node_host + , pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_coordinator_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + + END IF; -- end of l_node_type = 'D' + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql; + +-- This function is used to find TOP N sessions, which take more CPU time +-- But this function returns Top N sessions from each node. +-- The outer function (get_session_cpu_stat) return the Top N sessions as the final result. + +CREATE OR REPLACE FUNCTION pmk.get_session_cpu_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_session_cpu_time bigint + , OUT o_mppdb_cpu_time bigint + , OUT o_mppdb_cpu_time_perc numeric(5, 2) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF RECORD +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM pgxc_node nl + WHERE UPPER(nl.node_name) = COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_time_stat0 AS + ( SELECT sessid, stat_name + , (value/1000.0)::numeric AS stat_value -- converting to millisecond + FROM pv_session_time + WHERE stat_name IN ( ''CPU_TIME'', ''EXECUTION_TIME'') + ) + , sess_time_stat AS + ( SELECT DISTINCT stso.sessid + , (SELECT stsi.stat_value FROM sess_time_stat0 stsi WHERE stsi.sessid = stso.sessid AND stsi.stat_name = ''CPU_TIME'') AS session_cpu_time + , (SELECT stsi.stat_value FROM sess_time_stat0 stsi WHERE stsi.sessid = stso.sessid AND stsi.stat_name = ''EXECUTION_TIME'') AS session_sql_time + FROM sess_time_stat0 stso + ) + , mppdb_cpu_time AS + ( SELECT (total_cpu()*10.0)::bigint AS mppdb_cpu_time -- converting to millisecond + ) + , sess_cpu_stat AS + ( SELECT ''' || i.node_name || '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start)::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start)::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , ROUND(st.session_cpu_time)::bigint AS session_cpu_time + , m.mppdb_cpu_time + , ( (st.session_cpu_time * 100.0) / NULLIF(m.mppdb_cpu_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc + , st.sessid + , st.session_sql_time + FROM pg_stat_activity a + , sess_time_stat st + , mppdb_cpu_time m + WHERE a.state IN (''active'', ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.session_cpu_time DESC + , mppdb_cpu_time_perc DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT scs.node_name + , scs.db_name + , scs.user_name + , scs.client_hostname + , scs.session_start_time + , scs.xact_start_time + , scs.waiting + , scs.state + , scs.query + , scs.session_cpu_time + , scs.mppdb_cpu_time + , scs.mppdb_cpu_time_perc + , ( scs.session_sql_time / NULLIF(ss.value, 0) )::numeric(15, 3) AS avg_sql_exec_time + FROM sess_cpu_stat scs + , pv_session_stat ss + WHERE ss.sessid = scs.sessid + AND ss.statname = ''n_sql'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + + + +-- This function is used to find TOP N sessions, which are sorted by physical_reads DESC and sorts_in_memory DESC +-- But this function returns Top N sessions from each node. +-- The outer function (get_session_memory_stat) return the Top N sessions as the final result. + +CREATE OR REPLACE FUNCTION pmk.get_session_memory_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_session_total_memory_size bigint + , OUT o_session_used_memory_size bigint + , OUT o_buffer_hits bigint + , OUT o_disk_reads bigint + , OUT o_session_buffer_hit_ratio numeric(5, 2) + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_session_memory_sort_ratio numeric(5, 2) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM pgxc_node nl + WHERE UPPER(nl.node_name) = COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_memory_usage AS + ( SELECT sessid + , SUM(totalsize)::bigint AS totalsize + , SUM(usedsize)::bigint AS usedsize + FROM pv_session_memory_detail + GROUP BY sessid + ) + , sess_stat0 AS + ( SELECT ss.sessid + , ss.statname AS statname + , ss.value AS statvalue + FROM sess_memory_usage st, pv_session_stat ss + WHERE ss.sessid = st.sessid + AND ss.statname IN ( ''n_blocks_fetched'' + , ''n_shared_blocks_read'', ''n_local_blocks_read'' + , ''n_sort_in_disk'', ''n_sort_in_memory'' + , ''n_sql'' ) + ) + , sess_stat1 AS + ( SELECT oss.sessid + , oss.totalsize + , oss.usedsize + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_blocks_fetched'') AS total_reads + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_shared_blocks_read'') AS disk_to_shared_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_local_blocks_read'') AS disk_to_local_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_sort_in_disk'') AS sorts_in_disk + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_sort_in_memory'') AS sorts_in_memory + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_sql'') AS sql_count + FROM sess_memory_usage oss + ) + , sess_stat AS + ( SELECT ss.sessid + , ss.totalsize + , ss.usedsize + , ss.total_reads + , (ss.disk_to_shared_buffer + ss.disk_to_local_buffer) AS disk_reads + , (ss.total_reads - (ss.disk_to_shared_buffer+ss.disk_to_local_buffer)) AS buffer_hits + , sorts_in_disk + , sorts_in_memory + , sql_count + FROM sess_stat1 ss + ) + , sess_memory_stat AS + ( SELECT ''' || i.node_name || '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start)::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start)::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , st.totalsize AS session_total_memory_size + , st.usedsize AS session_used_memory_size + , st.buffer_hits, st.disk_reads + , ( (st.buffer_hits * 100.0) / NULLIF(st.total_reads, 0) )::numeric(5, 2) AS session_buffer_hit_ratio + , st.sorts_in_memory, st.sorts_in_disk + , ( (st.sorts_in_memory * 100.0) / NULLIF(st.sorts_in_memory + st.sorts_in_disk, 0) )::numeric(5, 2) AS session_memory_sort_ratio + , st.sessid + , st.sql_count + FROM pg_stat_activity a + , sess_stat st + WHERE a.state IN (''active'', ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.totalsize DESC + , st.usedsize DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT sms.node_name + , sms.db_name + , sms.user_name + , sms.client_hostname + , sms.session_start_time + , sms.xact_start_time + , sms.waiting + , sms.state + , sms.query + , sms.session_total_memory_size + , sms.session_used_memory_size + , sms.buffer_hits + , sms.disk_reads + , sms.session_buffer_hit_ratio + , sms.sorts_in_memory + , sms.sorts_in_disk + , sms.session_memory_sort_ratio + , ( ss.value / (NULLIF(sms.sql_count, 0) * 1000.0) )::numeric(15, 3) AS avg_sql_exec_time + FROM sess_memory_stat sms + , pv_session_time ss + WHERE ss.sessid = sms.sessid + AND ss.stat_name = ''EXECUTION_TIME'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + + +-- This function is used to find TOP N sessions, which do more physical I/O operations. +-- But this function returns Top N sessions from each node. +-- The outer function (get_session_io_stat) return the Top N sessions as the final result. + +CREATE OR REPLACE FUNCTION pmk.get_session_io_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_disk_reads bigint + , OUT o_read_time bigint + , OUT o_avg_read_per_sec numeric(20, 2) + , OUT o_avg_read_time numeric(20, 3) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM pgxc_node nl + WHERE UPPER(nl.node_name) = COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_stat0 AS + ( SELECT ss.sessid + , ss.statname AS statname + , ss.value AS statvalue + FROM pv_session_stat ss + WHERE ss.statname IN ( ''n_shared_blocks_read'', ''n_local_blocks_read'' + , ''n_blocks_read_time'', ''n_sql'' ) + ) + , sess_stat1 AS + ( SELECT DISTINCT ss.sessid + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = ss.sessid AND iss.statname = ''n_shared_blocks_read'') AS disk_to_shared_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = ss.sessid AND iss.statname = ''n_local_blocks_read'') AS disk_to_local_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = ss.sessid AND iss.statname = ''n_blocks_read_time'') AS read_time + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = ss.sessid AND iss.statname = ''n_sql'') AS sql_count + FROM sess_stat0 ss + ) + , sess_stat AS + ( SELECT ss.sessid + , (ss.disk_to_shared_buffer + ss.disk_to_local_buffer) AS disk_reads + , ss.read_time + , ss.sql_count + FROM sess_stat1 ss + ) + , sess_io_stat AS + ( SELECT ''' || i.node_name || '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start)::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start)::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , st.disk_reads + , st.read_time + , ( st.disk_reads * 1000000.0 / NULLIF(st.read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( st.read_time * 1.0 / NULLIF(st.disk_reads, 0) )::numeric(20,3) AS avg_read_time + , st.sessid + , st.sql_count + FROM pg_stat_activity a + , sess_stat st + WHERE a.state IN (''active'', ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.disk_reads DESC + , st.read_time DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT sios.node_name + , sios.db_name + , sios.user_name + , sios.client_hostname + , sios.session_start_time + , sios.xact_start_time + , sios.waiting + , sios.state + , sios.query + , sios.disk_reads + , sios.read_time + , sios.avg_read_per_sec + , sios.avg_read_time + , ( ss.value / (NULLIF(sios.sql_count, 0) * 1000.0) )::numeric(15, 3) AS avg_sql_exec_time + FROM sess_io_stat sios + , pv_session_time ss + WHERE ss.sessid = sios.sessid + AND ss.stat_name = ''EXECUTION_TIME'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + + +-- if config_value = -1, it is considered as infinite. + +CREATE OR REPLACE FUNCTION pmk.insertBaseValue() +RETURNS TEXT +AS +$$ +DECLARE l_configuration_count_value INT; + l_meta_data_count_value INT; + l_version_string varchar(128); + l_result varchar(128); +BEGIN + SELECT count(config_param_name) + INTO l_configuration_count_value + FROM pmk.pmk_configuration + WHERE config_param_name IN ('Collection Count', 'Enable PMK'); + + IF l_configuration_count_value != 2 + THEN + DELETE FROM pmk.pmk_configuration; + INSERT INTO pmk.pmk_configuration(config_param_name, config_value) VALUES ('Collection Count', '9'), ('Enable PMK', 'TRUE'); + END IF; + + SELECT count(pmk_version) + INTO l_meta_data_count_value + FROM pmk.pmk_meta_data; + + SELECT substring(version() from '[a-zA-Z0-9 ]* V[0-9]{3}R[0-9]{3}C[0-9]{2}') INTO l_version_string; + l_result := l_version_string; + + IF l_meta_data_count_value < 1 + THEN + INSERT INTO pmk.pmk_meta_data (pmk_version, last_snapshot_id, last_snapshot_collect_time) VALUES (l_result, NULL, NULL); + END IF; + + RETURN NULL; +END; +$$ +LANGUAGE plpgsql; + +SELECT pmk.insertBaseValue(); + +-- It ends the transaction started in the begining of PMK installation +COMMIT; + +analyze pmk.pmk_configuration; +analyze pmk.pmk_snapshot; +analyze pmk.pmk_snapshot_datanode_stat; +analyze pmk.pmk_snapshot_coordinator_stat; +analyze pmk.pmk_meta_data; \ No newline at end of file diff --git a/script/gspylib/etc/sql/pmk_schema_single_inst.sql b/script/gspylib/etc/sql/pmk_schema_single_inst.sql new file mode 100644 index 0000000..a6ccb5b --- /dev/null +++ b/script/gspylib/etc/sql/pmk_schema_single_inst.sql @@ -0,0 +1,2680 @@ +-- It starts a transaction during the PMK installation +START TRANSACTION; + +CREATE SCHEMA pmk; + +-- PMK Configuration table +CREATE TABLE pmk.pmk_configuration +( + config_param_name varchar(64) NOT NULL +, config_value text NOT NULL +, PRIMARY KEY (config_param_name) +); + +-- Snapshot (statistics collection) information +CREATE TABLE pmk.pmk_snapshot +( + snapshot_id int -- Snapshot ID (Running number) +, current_snapshot_time timestamp with time zone -- Time at the beginning of the snapshot +, last_snapshot_time timestamp with time zone -- Time at the end of the snapshot; the actual time the snapshot was taken +, creation_time timestamp with time zone -- Time the snapshot was created +, PRIMARY KEY (snapshot_id) +); + +CREATE INDEX ix_pmk_snapshot_time ON pmk.pmk_snapshot (current_snapshot_time DESC); + +-- Statistics for each node +CREATE TABLE pmk.pmk_snapshot_datanode_stat +( + snapshot_id int -- Snapshot Id +, node_name text -- node name from pg_node_env +, node_host text -- node host from pg_node_env +, last_startup_time timestamp with time zone -- last restart time of the node before snapshot starts +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed numeric(40,0) +, checkpoints_timed_delta numeric(40,0) +, checkpoints_req numeric(40,0) +, checkpoints_req_delta numeric(40,0) +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +, PRIMARY KEY (snapshot_id) +); + +CREATE INDEX ix_pmk_snapshot_dnode_stat_node_name ON pmk.pmk_snapshot_datanode_stat (UPPER(node_name), snapshot_id); + +-- Table to maintain PMK meta data +CREATE TABLE pmk.pmk_meta_data +( + pmk_version varchar(128) +, last_snapshot_id int +, last_snapshot_collect_time timestamp with time zone +, PRIMARY KEY (pmk_version) +); + +CREATE OR REPLACE FUNCTION pmk.check_node_type +RETURNS TEXT +AS +$$ +DECLARE l_node_type CHAR(1); +BEGIN + + l_node_type := 'D'; + + RETURN NULL; + +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION pmk.check_pmk_enabled +RETURNS TEXT +AS +$$ +DECLARE l_pmk_enabled_i TEXT; +BEGIN + + SELECT UPPER(config_value) + INTO l_pmk_enabled_i + FROM pmk.pmk_configuration + WHERE config_param_name = 'Enable PMK'; + + IF l_pmk_enabled_i = 'FALSE' + THEN + RETURN 'ERROR:: PMK should be enabled to use the PMK features.'; + ELSE + RETURN NULL; + END IF; + +END; +$$ +LANGUAGE plpgsql; + +-- This function is used to find the PMK version +-- If it is executed from a data-node, it throws the appropriate error. + +CREATE OR REPLACE FUNCTION pmk.pmk_version ( ) +RETURNS varchar(128) +AS +$$ +DECLARE l_pmk_version varchar(128); + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN 'f'; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN 'f'; + END IF; + + SELECT pmk_version + INTO l_pmk_version + FROM pmk.pmk_meta_data; + + RETURN l_pmk_version; + +END; +$$ +LANGUAGE plpgsql; + +-- This function is used to configure the PMK configuration parameters +-- -1 indicates all the statistics collection should be retained. +-- Atleast one statistics collection should be retained in the database. + +CREATE OR REPLACE FUNCTION pmk.configure_parameter + ( IN i_config_param_name varchar(64) + , IN i_config_value text + ) +RETURNS boolean +AS +$$ +DECLARE l_collect_count_value INT; + l_config_value TEXT; + l_upper_config_param TEXT; + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + l_upper_config_param := UPPER(TRIM(BOTH ' ' FROM i_config_param_name)); + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + AND l_upper_config_param <> 'ENABLE PMK' + THEN + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + IF i_config_param_name IS NULL + THEN + l_error_message := 'ERROR:: Null should not be provided for configuration parameter name.'; + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + IF i_config_value IS NULL + THEN + l_error_message := 'ERROR:: Null should not be provided for configuration value.'; + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + IF l_upper_config_param = 'COLLECTION COUNT' + THEN + l_collect_count_value := i_config_value::int; + + IF l_collect_count_value < -1 + THEN + l_error_message := 'ERROR:: Configuration value "' || i_config_value || '" should not be less than -1.'; + raise notice '%',l_error_message; + RETURN FALSE; + + ELSIF l_collect_count_value = 0 + THEN + l_error_message := 'ERROR:: 0 should not be provided since atleast one collection should be retained.'; + raise notice '%',l_error_message; + RETURN FALSE; + + ELSE + l_config_value := l_collect_count_value; + END IF; + + ELSIF l_upper_config_param = 'ENABLE PMK' + THEN + l_config_value := UPPER(TRIM(BOTH ' ' FROM i_config_value)); + + IF l_config_value NOT IN ('TRUE', 'FALSE') + THEN + l_error_message := 'ERROR:: Allowed values are TRUE or FALSE for the configuration parameter "Enable PMK".'; + raise notice '%',l_error_message; + RETURN FALSE; + + END IF; + END IF; + + SET allow_concurrent_tuple_update = ON; + + UPDATE pmk.pmk_configuration + SET config_value = l_config_value + WHERE UPPER(config_param_name) = l_upper_config_param; + + IF NOT FOUND THEN + l_error_message := 'ERROR:: Invalid configuration parameter "' || i_config_param_name || '" provided for configuring PMK parameter ...'; + raise notice '%',l_error_message; + RETURN FALSE; + END IF; + + RETURN TRUE; + +END; +$$ +LANGUAGE plpgsql; + +-- If ALL is provided, it returns the details of all the configuration parameters. +-- If a specific config parameter is provided, it returns the details of the configuration parameter. + +CREATE OR REPLACE FUNCTION pmk.get_configuration_parameter + ( IN i_config_param_name TEXT ) +RETURNS TABLE +( + config_param_name varchar(64) +, config_value text +) +AS +$$ +DECLARE l_upper_config_param TEXT; + l_error_message TEXT; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_upper_config_param := UPPER(TRIM(BOTH ' ' FROM i_config_param_name)); + + IF l_upper_config_param = 'ALL' + THEN + + RETURN QUERY + SELECT config_param_name + , config_value + FROM pmk.pmk_configuration + ORDER BY config_param_name; + + ELSE + + RETURN QUERY + SELECT config_param_name + , config_value + FROM pmk.pmk_configuration + WHERE UPPER(config_param_name) = l_upper_config_param; + + END IF; + +END; +$$ +LANGUAGE plpgsql; + + /* + This function is used to collect statistics for each node (including data node and coordinator) +*/ + +CREATE OR REPLACE FUNCTION pmk.find_perf_stat + ( IN i_skip_supper_role boolean + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_physical_memory bigint + , OUT o_shared_buffer_size bigint + , OUT o_session_memory_total_size bigint + , OUT o_session_memory_used_size bigint + , OUT o_blocks_read bigint + , OUT o_blocks_hit bigint + , OUT o_db_size bigint + , OUT o_work_memory_size bigint + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_active_sql_count int + , OUT o_wait_sql_count int + , OUT o_session_count int + , OUT o_busy_time numeric + , OUT o_idle_time numeric + , OUT o_iowait_time numeric + , OUT o_db_cpu_time numeric + , OUT o_db_memory_usage bigint + , OUT o_node_startup_time timestamp with time zone + , OUT o_node_host_name text + , OUT o_xact_commit bigint + , OUT o_xact_rollback bigint + , OUT o_checkpoints_timed numeric(40,0) + , OUT o_checkpoints_req numeric(40,0) + , OUT o_checkpoint_write_time double precision + ) +AS +$$ +DECLARE + l_block_size int; + l_record_chk int; +BEGIN + + o_node_startup_time := pg_postmaster_start_time(); + o_node_host_name := get_hostname(); + + SELECT COUNT(*) AS number_of_files + , SUM(phyrds) AS physical_reads + , SUM(phywrts) AS physical_writes + , SUM(readtim) AS read_time + , SUM(writetim) AS write_time + INTO o_number_of_files + , o_physical_reads + , o_physical_writes + , o_read_time + , o_write_time + FROM gs_file_stat; + + IF o_number_of_files = 0 + THEN + o_physical_reads := 0; + o_physical_writes := 0; + o_read_time := 0; + o_write_time := 0; + END IF; + + WITH os_stat AS + ( + SELECT os.name AS statname + , os.value AS statvalue + FROM gs_os_run_info os + WHERE os.name IN ( 'PHYSICAL_MEMORY_BYTES', 'BUSY_TIME', 'IDLE_TIME', 'IOWAIT_TIME' ) + ) + SELECT (SELECT statvalue FROM os_stat WHERE statname = 'PHYSICAL_MEMORY_BYTES') + , (SELECT statvalue FROM os_stat WHERE statname = 'BUSY_TIME') + , (SELECT statvalue FROM os_stat WHERE statname = 'IDLE_TIME') + , (SELECT statvalue FROM os_stat WHERE statname = 'IOWAIT_TIME') + INTO o_physical_memory + , o_busy_time + , o_idle_time + , o_iowait_time + ; + + -- pv_db_time is not available; temporarily PMK extension is used. + o_db_cpu_time := total_cpu(); + o_db_memory_usage := total_memory()*1024; + + WITH config_value AS + ( SELECT name + , setting::bigint AS config_value + FROM pg_settings + WHERE name IN ( 'block_size', 'shared_buffers', 'work_mem' ) + ) + , config_value1 AS + ( SELECT (SELECT config_value FROM config_value WHERE name = 'block_size') AS block_size + , (SELECT config_value FROM config_value WHERE name = 'shared_buffers') AS shared_buffers + , (SELECT config_value FROM config_value WHERE name = 'work_mem') AS work_mem + ) + SELECT block_size + , (shared_buffers * block_size)::bigint + , (work_mem * 1024)::bigint + INTO l_block_size + , o_shared_buffer_size + , o_work_memory_size + FROM config_value1; + + /* Commented since these statistics are not used for node and cluster reports + */ + o_session_memory_total_size := 0; + o_session_memory_used_size := 0; + + SELECT SUM(blks_read)::bigint + , SUM(blks_hit)::bigint + , SUM(xact_commit)::bigint + , SUM(xact_rollback)::bigint + INTO o_blocks_read + , o_blocks_hit + , o_xact_commit + , o_xact_rollback + FROM pg_stat_database; + + o_db_size := 0; + IF i_skip_supper_role = 'TRUE' + THEN + WITH session_state AS + ( SELECT state, waiting , usename + FROM pg_stat_activity a, pg_roles r + WHERE r.rolsuper = 'f' AND a.usename = r.rolname + ) + , active_session AS + ( SELECT state, waiting , usename + FROM session_state s, pg_roles r + WHERE s.state IN ('active', 'fastpath function call', 'retrying') + AND r.rolsuper = 'f' AND s.usename = r.rolname + ) + SELECT ( SELECT COUNT(*) FROM active_session ) + , ( SELECT COUNT(*) FROM active_session WHERE waiting = TRUE ) + , ( SELECT COUNT(*) FROM session_state ) + INTO o_active_sql_count, o_wait_sql_count , o_session_count + ; + ELSE + WITH session_state AS + ( SELECT state, waiting + FROM pg_stat_activity + ) + , active_session AS + ( SELECT state, waiting + FROM session_state + WHERE state IN ('active', 'fastpath function call', 'retrying') + ) + SELECT ( SELECT COUNT(*) FROM active_session ) + , ( SELECT COUNT(*) FROM active_session WHERE waiting = TRUE ) + , ( SELECT COUNT(*) FROM session_state ) + INTO o_active_sql_count, o_wait_sql_count, o_session_count + ; + END IF; + + -- Currently, the below statistics are calculated from pv_session_stat (which is not accurate) since pv_db_stat is not available + WITH sort_state AS + ( SELECT statname + , SUM(value)::bigint AS sorts_cnt + FROM gs_session_stat + WHERE statname IN ('n_sort_in_memory', 'n_sort_in_disk') + GROUP BY statname + ) + SELECT (SELECT sorts_cnt FROM sort_state WHERE statname = 'n_sort_in_memory') + , (SELECT sorts_cnt FROM sort_state WHERE statname = 'n_sort_in_disk') + INTO o_sorts_in_memory + , o_sorts_in_disk + ; + + SELECT SUM(checkpoints_timed)::numeric(40,0) + , SUM(checkpoints_req)::numeric(40,0) + , SUM(checkpoint_write_time)::bigint + INTO o_checkpoints_timed + , o_checkpoints_req + , o_checkpoint_write_time + FROM pg_stat_bgwriter; + +END; +$$ +LANGUAGE plpgsql; + +/* +pmk.find_node_stat +*/ +CREATE OR REPLACE FUNCTION pmk.find_node_stat + (IN i_skip_supper_role boolean + , OUT o_number_of_files_1 int + , OUT o_physical_reads_1 bigint + , OUT o_physical_writes_1 bigint + , OUT o_read_time_1 bigint + , OUT o_write_time_1 bigint + , OUT o_physical_memory_1 bigint + , OUT o_shared_buffer_size_1 bigint + , OUT o_session_memory_total_size_1 bigint + , OUT o_session_memory_used_size_1 bigint + , OUT o_blocks_read_1 bigint + , OUT o_blocks_hit_1 bigint + , OUT o_db_size_1 bigint + , OUT o_work_memory_size_1 bigint + , OUT o_sorts_in_memory_1 bigint + , OUT o_sorts_in_disk_1 bigint + , OUT o_active_sql_count_1 int + , OUT o_wait_sql_count_1 int + , OUT o_session_count_1 int + , OUT o_busy_time_1 numeric + , OUT o_idle_time_1 numeric + , OUT o_iowait_time_1 numeric + , OUT o_db_cpu_time_1 numeric + , OUT o_db_memory_usage_1 bigint + , OUT o_node_startup_time_1 timestamp with time zone + , OUT o_node_host_name_1 text + , OUT o_xact_commit_1 bigint + , OUT o_xact_rollback_1 bigint + , OUT o_checkpoints_timed_1 numeric(40,0) + , OUT o_checkpoints_req_1 numeric(40,0) + , OUT o_checkpoint_write_time_1 double precision + ) +AS +$$ +BEGIN + + SELECT o_number_of_files + , o_physical_reads + , o_physical_writes + , o_read_time + , o_write_time + , o_physical_memory + , o_shared_buffer_size + , o_session_memory_total_size + , o_session_memory_used_size + , o_blocks_read + , o_blocks_hit + , o_db_size + , o_work_memory_size + , o_sorts_in_memory + , o_sorts_in_disk + , o_active_sql_count + , o_wait_sql_count + , o_session_count + , o_busy_time + , o_idle_time + , o_iowait_time + , o_db_cpu_time + , o_db_memory_usage + , o_node_startup_time + , o_node_host_name + , o_xact_commit + , o_xact_rollback + , o_checkpoints_timed + , o_checkpoints_req + , o_checkpoint_write_time + INTO o_number_of_files_1 + , o_physical_reads_1 + , o_physical_writes_1 + , o_read_time_1 + , o_write_time_1 + , o_physical_memory_1 + , o_shared_buffer_size_1 + , o_session_memory_total_size_1 + , o_session_memory_used_size_1 + , o_blocks_read_1 + , o_blocks_hit_1 + , o_db_size_1 + , o_work_memory_size_1 + , o_sorts_in_memory_1 + , o_sorts_in_disk_1 + , o_active_sql_count_1 + , o_wait_sql_count_1 + , o_session_count_1 + , o_busy_time_1 + , o_idle_time_1 + , o_iowait_time_1 + , o_db_cpu_time_1 + , o_db_memory_usage_1 + , o_node_startup_time_1 + , o_node_host_name_1 + , o_xact_commit_1 + , o_xact_rollback_1 + , o_checkpoints_timed_1 + , o_checkpoints_req_1 + , o_checkpoint_write_time_1 + FROM pmk.find_perf_stat(i_skip_supper_role); + +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION pmk.load_datanode_stat + ( IN i_snapshot_id int + , IN i_last_snapshot_id int + , IN i_pmk_last_collect_start_time timestamp with time zone + , IN i_node_name text + , IN i_number_of_files int + , IN i_physical_reads bigint + , IN i_physical_writes bigint + , IN i_read_time bigint + , IN i_write_time bigint + , IN i_physical_memory bigint + , IN i_shared_buffer_size bigint + , IN i_session_memory_total_size bigint + , IN i_session_memory_used_size bigint + , IN i_blocks_read bigint + , IN i_blocks_hit bigint + , IN i_db_size bigint + , IN i_work_memory_size bigint + , IN i_sorts_in_memory bigint + , IN i_sorts_in_disk bigint + , IN i_active_sql_count int + , IN i_wait_sql_count int + , IN i_session_count int + , IN i_busy_time numeric + , IN i_idle_time numeric + , IN i_iowait_time numeric + , IN i_db_cpu_time numeric + , IN i_db_memory_usage bigint + , IN i_node_startup_time timestamp with time zone + , IN i_node_host_name text + , IN i_xact_commit bigint + , IN i_xact_rollback bigint + , IN i_checkpoints_timed numeric(40,0) + , IN i_checkpoints_req numeric(40,0) + , IN i_checkpoint_write_time double precision + , IN i_skip_supper_role boolean + , OUT o_dn_snapshot_id int + , OUT o_dn_node_name text -- node name from pgxc_node + , OUT o_dn_node_host text -- node host from pgxc_node + , OUT o_dn_last_startup_time timestamp with time zone -- last restart time of the node before snapshot starts + , OUT o_dn_number_of_files int + , OUT o_dn_physical_reads bigint + , OUT o_dn_physical_reads_delta bigint + , OUT o_dn_physical_writes bigint + , OUT o_dn_physical_writes_delta bigint + , OUT o_dn_read_time bigint + , OUT o_dn_read_time_delta bigint + , OUT o_dn_write_time bigint + , OUT o_dn_write_time_delta bigint + , OUT o_dn_db_size bigint + , OUT o_dn_active_sql_count int + , OUT o_dn_wait_sql_count int + , OUT o_dn_session_count int + , OUT o_dn_xact_commit bigint + , OUT o_dn_xact_commit_delta bigint + , OUT o_dn_xact_rollback bigint + , OUT o_dn_xact_rollback_delta bigint + , OUT o_dn_checkpoints_timed numeric(40,0) + , OUT o_dn_checkpoints_timed_delta numeric(40,0) + , OUT o_dn_checkpoints_req numeric(40,0) + , OUT o_dn_checkpoints_req_delta numeric(40,0) + , OUT o_dn_checkpoint_write_time double precision + , OUT o_dn_checkpoint_write_time_delta double precision + , OUT o_dn_physical_memory bigint + , OUT o_dn_db_memory_usage bigint + , OUT o_dn_shared_buffer_size bigint + , OUT o_dn_session_memory_total_size bigint + , OUT o_dn_session_memory_used_size bigint + , OUT o_dn_blocks_read bigint + , OUT o_dn_blocks_read_delta bigint + , OUT o_dn_blocks_hit bigint + , OUT o_dn_blocks_hit_delta bigint + , OUT o_dn_work_memory_size bigint + , OUT o_dn_sorts_in_memory bigint + , OUT o_dn_sorts_in_memory_delta bigint + , OUT o_dn_sorts_in_disk bigint + , OUT o_dn_sorts_in_disk_delta bigint + , OUT o_dn_busy_time numeric + , OUT o_dn_busy_time_delta numeric + , OUT o_dn_idle_time numeric + , OUT o_dn_idle_time_delta numeric + , OUT o_dn_iowait_time numeric + , OUT o_dn_iowait_time_delta numeric + , OUT o_dn_db_cpu_time numeric + , OUT o_dn_db_cpu_time_delta numeric + ) +AS +$$ +DECLARE l_physical_reads_delta bigint; + l_physical_writes_delta bigint; + l_read_time_delta bigint; + l_write_time_delta bigint; + l_blocks_read_delta bigint; + l_blocks_hit_delta bigint; + l_sorts_in_memory_delta bigint; + l_sorts_in_disk_delta bigint; + l_busy_time_delta numeric; + l_idle_time_delta numeric; + l_iowait_time_delta numeric; + l_db_cpu_time_delta numeric; + l_xact_commit_delta bigint; + l_xact_rollback_delta bigint; + l_checkpoints_timed_delta numeric(40,0); + l_checkpoints_req_delta numeric(40,0); + l_checkpoint_write_time_delta double precision; + i_skip_supper_role_delta boolean; +BEGIN + + l_physical_reads_delta := i_physical_reads; + l_physical_writes_delta := i_physical_writes; + l_read_time_delta := i_read_time; + l_write_time_delta := i_write_time; + l_xact_commit_delta := i_xact_commit; + l_xact_rollback_delta := i_xact_rollback; + l_checkpoints_timed_delta := i_checkpoints_timed; + l_checkpoints_req_delta := i_checkpoints_req; + l_checkpoint_write_time_delta := i_checkpoint_write_time; + i_skip_supper_role_delta := i_skip_supper_role; + l_blocks_read_delta := i_blocks_read; + l_blocks_hit_delta := i_blocks_hit; + + l_busy_time_delta := i_busy_time; + l_idle_time_delta := i_idle_time; + l_iowait_time_delta := i_iowait_time; + l_db_cpu_time_delta := i_db_cpu_time; + + -- Currently, the below statistics are calculated from pv_session_stat (which is not accurate) since pv_db_stat is not available + -- These statistics are cumulative from instance startup. + l_sorts_in_memory_delta := i_sorts_in_memory; + l_sorts_in_disk_delta := i_sorts_in_disk; + + o_dn_snapshot_id := i_snapshot_id; + o_dn_node_name := i_node_name; + o_dn_node_host := i_node_host_name; + o_dn_last_startup_time := i_node_startup_time; + o_dn_number_of_files := i_number_of_files; + o_dn_physical_reads := i_physical_reads; + o_dn_physical_reads_delta := l_physical_reads_delta; + o_dn_physical_writes := i_physical_writes; + o_dn_physical_writes_delta := l_physical_writes_delta; + o_dn_read_time := i_read_time; + o_dn_read_time_delta := l_read_time_delta; + o_dn_write_time := i_write_time; + o_dn_write_time_delta := l_write_time_delta; + o_dn_db_size := i_db_size; + o_dn_active_sql_count := i_active_sql_count; + o_dn_wait_sql_count := i_wait_sql_count; + o_dn_session_count := i_session_count; + o_dn_xact_commit := i_xact_commit; + o_dn_xact_commit_delta := l_xact_commit_delta; + o_dn_xact_rollback := i_xact_rollback; + o_dn_xact_rollback_delta := l_xact_rollback_delta; + o_dn_checkpoints_timed := i_checkpoints_timed; + o_dn_checkpoints_timed_delta := l_checkpoints_timed_delta; + o_dn_checkpoints_req := i_checkpoints_req; + o_dn_checkpoints_req_delta := l_checkpoints_req_delta; + o_dn_checkpoint_write_time := i_checkpoint_write_time; + o_dn_checkpoint_write_time_delta := l_checkpoint_write_time_delta; + o_dn_physical_memory := i_physical_memory; + o_dn_db_memory_usage := i_db_memory_usage; + o_dn_shared_buffer_size := i_shared_buffer_size; + o_dn_session_memory_total_size := i_session_memory_total_size; + o_dn_session_memory_used_size := i_session_memory_used_size; + o_dn_blocks_read := i_blocks_read; + o_dn_blocks_read_delta := l_blocks_read_delta; + o_dn_blocks_hit := i_blocks_hit; + o_dn_blocks_hit_delta := l_blocks_hit_delta; + o_dn_work_memory_size := i_work_memory_size; + o_dn_sorts_in_memory := i_sorts_in_memory; + o_dn_sorts_in_memory_delta := l_sorts_in_memory_delta; + o_dn_sorts_in_disk := i_sorts_in_disk; + o_dn_sorts_in_disk_delta := l_sorts_in_disk_delta; + o_dn_busy_time := i_busy_time; + o_dn_busy_time_delta := l_busy_time_delta; + o_dn_idle_time := i_idle_time; + o_dn_idle_time_delta := l_idle_time_delta; + o_dn_iowait_time := i_iowait_time; + o_dn_iowait_time_delta := l_iowait_time_delta; + o_dn_db_cpu_time := i_db_cpu_time; + o_dn_db_cpu_time_delta := l_db_cpu_time_delta; + +END; +$$ +LANGUAGE plpgsql; + +/* + This function is used to find the performance statistics of each single node (datanode or coordinator). + After we get performance statistics of each single node, then we will insert into PMK tables (pmk.pmk_snapshot_datanode_stat for datanode and pmk.pmk_snapshot_coordinator_stat for coordinator). +*/ +CREATE OR REPLACE FUNCTION pmk.load_node_stat + ( IN i_pmk_curr_collect_start_time TIMESTAMP WITH TIME ZONE + , IN i_pmk_last_collect_start_time TIMESTAMP WITH TIME ZONE + , IN i_last_snapshot_id INT + , IN i_node_name TEXT + , IN i_node_type char(1) + , IN i_skip_supper_role boolean + ) +RETURNS TABLE +( + snapshot_id int +, node_name text +, node_host text +, last_startup_time timestamp with time zone +, number_of_files int +, physical_reads bigint +, physical_reads_delta bigint +, physical_writes bigint +, physical_writes_delta bigint +, read_time bigint +, read_time_delta bigint +, write_time bigint +, write_time_delta bigint +, db_size bigint +, active_sql_count int +, wait_sql_count int +, session_count int +, xact_commit bigint +, xact_commit_delta bigint +, xact_rollback bigint +, xact_rollback_delta bigint +, checkpoints_timed numeric(40,0) +, checkpoints_timed_delta numeric(40,0) +, checkpoints_req numeric(40,0) +, checkpoints_req_delta numeric(40,0) +, checkpoint_write_time double precision +, checkpoint_write_time_delta double precision +, physical_memory bigint +, db_memory_usage bigint +, shared_buffer_size bigint +, session_memory_total_size bigint +, session_memory_used_size bigint +, blocks_read bigint +, blocks_read_delta bigint +, blocks_hit bigint +, blocks_hit_delta bigint +, work_memory_size bigint +, sorts_in_memory bigint +, sorts_in_memory_delta bigint +, sorts_in_disk bigint +, sorts_in_disk_delta bigint +, busy_time numeric +, busy_time_delta numeric +, idle_time numeric +, idle_time_delta numeric +, iowait_time numeric +, iowait_time_delta numeric +, db_cpu_time numeric +, db_cpu_time_delta numeric +) +AS +$$ +DECLARE l_snapshot_id INT; + l_query_str TEXT; + l_node_stat_cur RECORD; +BEGIN + + IF i_last_snapshot_id IS NULL + OR i_last_snapshot_id = 2147483647 + THEN + l_snapshot_id := 1; + ELSE + l_snapshot_id := i_last_snapshot_id + 1; + END IF; + + FOR l_node_stat_cur IN SELECT * FROM pmk.find_node_stat(i_skip_supper_role) + LOOP + RETURN QUERY + (SELECT * FROM pmk.load_datanode_stat ( l_snapshot_id + , i_last_snapshot_id + , i_pmk_last_collect_start_time + , i_node_name + , l_node_stat_cur.o_number_of_files_1 + , l_node_stat_cur.o_physical_reads_1 + , l_node_stat_cur.o_physical_writes_1 + , l_node_stat_cur.o_read_time_1 + , l_node_stat_cur.o_write_time_1 + , l_node_stat_cur.o_physical_memory_1 + , l_node_stat_cur.o_shared_buffer_size_1 + , l_node_stat_cur.o_session_memory_total_size_1 + , l_node_stat_cur.o_session_memory_used_size_1 + , l_node_stat_cur.o_blocks_read_1 + , l_node_stat_cur.o_blocks_hit_1 + , l_node_stat_cur.o_db_size_1 + , l_node_stat_cur.o_work_memory_size_1 + , l_node_stat_cur.o_sorts_in_memory_1 + , l_node_stat_cur.o_sorts_in_disk_1 + , l_node_stat_cur.o_active_sql_count_1 + , l_node_stat_cur.o_wait_sql_count_1 + , l_node_stat_cur.o_session_count_1 + , l_node_stat_cur.o_busy_time_1 + , l_node_stat_cur.o_idle_time_1 + , l_node_stat_cur.o_iowait_time_1 + , l_node_stat_cur.o_db_cpu_time_1 + , l_node_stat_cur.o_db_memory_usage_1 + , l_node_stat_cur.o_node_startup_time_1 + , l_node_stat_cur.o_node_host_name_1 + , l_node_stat_cur.o_xact_commit_1 + , l_node_stat_cur.o_xact_rollback_1 + , l_node_stat_cur.o_checkpoints_timed_1 + , l_node_stat_cur.o_checkpoints_req_1 + , l_node_stat_cur.o_checkpoint_write_time_1 + , i_skip_supper_role + )); + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + +-- This function is used to delete the statistics snapshots based on "collection count" config param + +CREATE OR REPLACE FUNCTION pmk.delete_expired_snapshots ( ) +RETURNS void +AS +$$ +DECLARE l_collection_count INT; + l_retention_snapshot_id INT; +BEGIN + + -- Deleting node statistics based on "collection count" config param + SELECT config_value + INTO l_collection_count + FROM pmk.pmk_configuration + WHERE config_param_name = 'Collection Count'; + + IF l_collection_count > -1 + THEN + IF l_collection_count = 0 + THEN + l_collection_count := 1; + END IF; + + SELECT MIN(snapshot_id) + INTO l_retention_snapshot_id + FROM ( SELECT snapshot_id + FROM pmk.pmk_snapshot + ORDER BY snapshot_id DESC + LIMIT l_collection_count ); + + DELETE FROM pmk.pmk_snapshot_datanode_stat + WHERE snapshot_id < l_retention_snapshot_id; + + DELETE FROM pmk.pmk_snapshot + WHERE snapshot_id < l_retention_snapshot_id; + + END IF; + +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION pmk.get_meta_data + ( OUT l_pmk_curr_collect_start_time timestamp with time zone + , OUT l_pmk_last_collect_start_time timestamp with time zone + , OUT l_last_snapshot_id int + ) +AS +$$ +DECLARE l_error_message TEXT; +BEGIN + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + SELECT last_snapshot_id, last_snapshot_collect_time + INTO l_last_snapshot_id, l_pmk_last_collect_start_time + FROM pmk.pmk_meta_data; + + l_pmk_curr_collect_start_time := date_trunc('second', current_timestamp); + + IF l_pmk_curr_collect_start_time < l_pmk_last_collect_start_time + THEN + l_error_message := 'ERROR:: There is a change in system time of Gauss MPPDB host. PMK does not support the scenarios related to system time change.'; + raise notice '%',l_error_message; + RETURN; + ELSIF l_pmk_curr_collect_start_time = l_pmk_last_collect_start_time + THEN + l_error_message := 'ERROR:: Multiple statistics-collections can not be done within a second.'; + raise notice '%',l_error_message; + RETURN; + END IF; +END; +$$ +LANGUAGE plpgsql; + +/* +*/ +CREATE OR REPLACE FUNCTION pmk.get_pgxc_node + ( OUT o_node_name TEXT + , OUT o_node_type CHAR(1) + ) +RETURNS SETOF RECORD +AS +$$ +DECLARE l_error_message TEXT; + v_rec RECORD; +BEGIN + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + FOR v_rec IN (SELECT node_name FROM DBE_PERF.node_name) LOOP + o_node_name := v_rec.node_name; + o_node_type := 'D'; + RETURN NEXT; + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + +/* + If start time is greater than the last stat-collect time, it throws an error. + If end time is null, it assign the last statistics collection time to the end time. + If both start time and end time are null, it assign the last statistics collection time to both the parameters + If start time is null and end time is not null, it throws an error. + If start time is greater than end time, it throws an error. +*/ +CREATE OR REPLACE FUNCTION pmk.check_start_end_dates + ( INOUT io_start_pmk_time timestamp with time zone + , INOUT io_end_pmk_time timestamp with time zone + , OUT o_error_message text + ) +AS +$$ +DECLARE l_last_collect_time timestamp with time zone; +BEGIN + + SELECT last_snapshot_collect_time + INTO l_last_collect_time + FROM pmk.pmk_meta_data; + + IF io_start_pmk_time > l_last_collect_time + THEN + o_error_message := 'ERROR:: The from-time provided is greater than the last statistics-collection time(' || l_last_collect_time || '). Invalid value(s) provided for the input time-range'; + RETURN; + END IF; + + IF io_end_pmk_time IS NULL + THEN + io_end_pmk_time := l_last_collect_time; + + IF io_start_pmk_time IS NULL + THEN + io_start_pmk_time := io_end_pmk_time; + END IF; + ELSE + IF (io_start_pmk_time IS NULL) OR + (io_start_pmk_time > io_end_pmk_time) + THEN + o_error_message := 'ERROR:: Invalid value(s) provided for the input time-range'; + RETURN; + END IF; + END IF; + +END; +$$ +LANGUAGE plpgsql; + +-- Host CPU statistics at cluster level + +CREATE OR REPLACE FUNCTION pmk.get_cluster_host_cpu_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_avg_cpu_total_time numeric(21, 3) + , OUT o_avg_cpu_busy_time numeric(21, 3) + , OUT o_avg_cpu_iowait_time numeric(21, 3) + , OUT o_cpu_busy_perc numeric(5, 2) + , OUT o_cpu_io_wait_perc numeric(5, 2) + , OUT o_min_cpu_busy_perc numeric(5, 2) + , OUT o_max_cpu_busy_perc numeric(5, 2) + , OUT o_min_cpu_iowait_perc numeric(5, 2) + , OUT o_max_cpu_iowait_perc numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster host CPU statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , os_cpu_stat AS + ( SELECT s.pmk_curr_collect_start_time + , node_host + , node_name + , (busy_time_delta * 10) AS cpu_busy_time + , (idle_time_delta * 10) AS cpu_idle_time + , (iowait_time_delta * 10) AS cpu_iowait_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + , os_cpu_stat1 AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_host + , cpu_busy_time + , cpu_idle_time + , cpu_iowait_time + , (cpu_busy_time+cpu_idle_time+cpu_iowait_time)::numeric AS cpu_total_time + FROM ( SELECT pmk_curr_collect_start_time + , node_host + , cpu_busy_time + , cpu_idle_time + , cpu_iowait_time + , rank() OVER (PARTITION BY pmk_curr_collect_start_time, node_host ORDER BY cpu_busy_time DESC, node_name) AS node_cpu_busy_order + FROM os_cpu_stat + ) + WHERE node_cpu_busy_order = 1 + ) + SELECT hcs.stat_collect_time + , AVG(hcs.cpu_total_time)::numeric(21, 3) AS avg_cpu_total_time + , AVG(hcs.cpu_busy_time)::numeric(21, 3) AS avg_cpu_busy_time + , AVG(hcs.cpu_iowait_time)::numeric(21, 3) AS avg_cpu_iowait_time + , ( (SUM(cpu_busy_time) * 100.0) / NULLIF(SUM(cpu_total_time), 0) )::numeric(5, 2) AS cpu_busy_perc + , ( (SUM(cpu_iowait_time) * 100.0) / NULLIF(SUM(cpu_total_time), 0) )::numeric(5, 2) AS cpu_io_wait_perc + , MIN(hcs.cpu_busy_time_perc)::numeric(5, 2) AS min_cpu_busy_perc + , MAX(hcs.cpu_busy_time_perc)::numeric(5, 2) AS max_cpu_busy_perc + , MIN(hcs.cpu_iowait_time_perc)::numeric(5, 2) AS min_cpu_iowait_perc + , MAX(hcs.cpu_iowait_time_perc)::numeric(5, 2) AS max_cpu_iowait_perc + FROM ( SELECT node_host + , stat_collect_time + , cpu_total_time + , cpu_busy_time + , cpu_iowait_time + , ( (cpu_busy_time * 100.0) / NULLIF(cpu_total_time, 0) )::numeric(5, 2) AS cpu_busy_time_perc + , ( (cpu_iowait_time * 100.0) / NULLIF(cpu_total_time, 0) )::numeric(5, 2) AS cpu_iowait_time_perc + FROM os_cpu_stat1 ) hcs + GROUP BY hcs.stat_collect_time + ORDER BY hcs.stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION pmk.get_cluster_mppdb_cpu_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_avg_mppdb_cpu_time numeric(21, 3) + , OUT o_avg_host_cpu_busy_time numeric(21, 3) + , OUT o_avg_host_cpu_total_time numeric(21, 3) + , OUT o_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + , OUT o_min_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_max_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_min_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + , OUT o_max_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster MPPDB CPU statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , cpu_stat AS + ( SELECT s.snapshot_id + , s.pmk_curr_collect_start_time + , dns.node_name + , dns.node_host + , (dns.busy_time_delta * 10) AS host_cpu_busy_time + , (dns.idle_time_delta * 10) AS host_cpu_idle_time + , (dns.iowait_time_delta * 10) AS host_cpu_iowait_time + , (dns.db_cpu_time_delta * 10) AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + , host_cpu_stat AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_host + , host_cpu_busy_time + , host_cpu_idle_time + , host_cpu_iowait_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::numeric AS host_cpu_total_time + FROM ( SELECT pmk_curr_collect_start_time + , node_host + , host_cpu_busy_time + , host_cpu_idle_time + , host_cpu_iowait_time + , rank() OVER (PARTITION BY snapshot_id, node_host + ORDER BY host_cpu_busy_time DESC, node_name) AS node_cpu_busy_order + FROM cpu_stat + ) + WHERE node_cpu_busy_order = 1 + ) + , host_cpu_stat_summary AS + ( SELECT stat_collect_time + , AVG(host_cpu_busy_time)::numeric(21, 3) AS avg_host_cpu_busy_time + , AVG(host_cpu_total_time)::numeric(21, 3) AS avg_host_cpu_total_time + , SUM(host_cpu_busy_time)::numeric(21, 3) AS tot_host_cpu_busy_time + , SUM(host_cpu_total_time)::numeric(21, 3) AS tot_host_cpu_total_time + FROM host_cpu_stat + GROUP BY stat_collect_time + ) + , mppdb_cpu_stat0 AS + ( SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , node_name + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::numeric AS host_cpu_total_time + FROM cpu_stat + ) + , mppdb_cpu_stat AS + ( SELECT stat_collect_time + , node_name + , mppdb_cpu_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_total_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat0 + ) + , mppdb_cpu_stat_summary AS + ( SELECT stat_collect_time + , AVG(mppdb_cpu_time)::numeric(21, 3) AS avg_mppdb_cpu_time + , SUM(mppdb_cpu_time)::numeric(21, 3) AS tot_mppdb_cpu_time + , MIN(mppdb_cpu_time_perc_wrt_busy_time)::numeric(5, 2) AS min_mppdb_cpu_time_perc_wrt_busy_time + , MAX(mppdb_cpu_time_perc_wrt_busy_time)::numeric(5, 2) AS max_mppdb_cpu_time_perc_wrt_busy_time + , MIN(mppdb_cpu_time_perc_wrt_total_time)::numeric(5, 2) AS min_mppdb_cpu_time_perc_wrt_total_time + , MAX(mppdb_cpu_time_perc_wrt_total_time)::numeric(5, 2) AS max_mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat + GROUP BY stat_collect_time + ) + SELECT mcs.stat_collect_time + , mcs.avg_mppdb_cpu_time + , hcs.avg_host_cpu_busy_time + , hcs.avg_host_cpu_total_time + , CASE WHEN mcs.tot_mppdb_cpu_time < hcs.tot_host_cpu_busy_time + THEN ( (mcs.tot_mppdb_cpu_time * 100.0) / NULLIF(hcs.tot_host_cpu_busy_time, 0) )::numeric(5, 2) + ELSE 100.00 + END AS mppdb_cpu_time_perc_wrt_busy_time + , CASE WHEN mcs.tot_mppdb_cpu_time < hcs.tot_host_cpu_total_time + THEN ( (mcs.tot_mppdb_cpu_time * 100.0) / NULLIF(hcs.tot_host_cpu_total_time, 0) )::numeric(5, 2) + ELSE 100.00 + END AS mppdb_cpu_time_perc_wrt_total_time + , mcs.min_mppdb_cpu_time_perc_wrt_busy_time + , mcs.max_mppdb_cpu_time_perc_wrt_busy_time + , mcs.min_mppdb_cpu_time_perc_wrt_total_time + , mcs.max_mppdb_cpu_time_perc_wrt_total_time + FROM mppdb_cpu_stat_summary mcs + , host_cpu_stat_summary hcs + WHERE mcs.stat_collect_time = hcs.stat_collect_time + ORDER BY mcs.stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Shared buffer statistics at cluster level + +CREATE OR REPLACE FUNCTION pmk.get_cluster_shared_buffer_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_min_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_max_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_total_blocks_read bigint + , OUT o_total_blocks_hit bigint + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster shared buffer statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , ( (total_blocks_hit * 100.0) / NULLIF(total_blocks_read+total_blocks_hit, 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + , min_shared_buffer_hit_ratio + , max_shared_buffer_hit_ratio + , total_blocks_read + , total_blocks_hit + FROM ( SELECT pmk_curr_collect_start_time + , SUM(blocks_read)::bigint AS total_blocks_read + , SUM(blocks_hit)::bigint AS total_blocks_hit + , MIN(shared_buffer_hit_ratio)::numeric(5, 2) AS min_shared_buffer_hit_ratio + , MAX(shared_buffer_hit_ratio)::numeric(5, 2) AS max_shared_buffer_hit_ratio + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , ( (blocks_hit_delta * 100.0) / NULLIF((blocks_read_delta + blocks_hit_delta), 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Sort statistics at cluster level + +CREATE OR REPLACE FUNCTION pmk.get_cluster_memory_sort_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_memory_sort_ratio numeric(5, 2) + , OUT o_min_memory_sort_ratio numeric(5, 2) + , OUT o_max_memory_sort_ratio numeric(5, 2) + , OUT o_total_memory_sorts bigint + , OUT o_total_disk_sorts bigint + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster memory sort statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , ( (total_memory_sorts * 100.0) / NULLIF(total_disk_sorts+total_memory_sorts, 0) )::numeric(5, 2) AS memory_sort_ratio + , min_memory_sort_ratio + , max_memory_sort_ratio + , total_memory_sorts + , total_disk_sorts + FROM ( SELECT pmk_curr_collect_start_time + , SUM(memory_sorts)::bigint AS total_memory_sorts + , SUM(disk_sorts)::bigint AS total_disk_sorts + , MIN(memory_sort_ratio)::numeric(5, 2) AS min_memory_sort_ratio + , MAX(memory_sort_ratio)::numeric(5, 2) AS max_memory_sort_ratio + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , sorts_in_memory_delta AS memory_sorts + , sorts_in_disk_delta AS disk_sorts + , ( (sorts_in_memory_delta * 100.0) / NULLIF((sorts_in_disk_delta + sorts_in_memory_delta), 0) )::numeric(5, 2) AS memory_sort_ratio + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- I/O statistics at cluster level +CREATE OR REPLACE FUNCTION pmk.get_cluster_io_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_avg_read_per_sec numeric(20,2) + , OUT o_avg_read_time numeric(20,3) + , OUT o_avg_write_per_sec numeric(20,2) + , OUT o_avg_write_time numeric(20,3) + , OUT o_min_node_read_per_sec numeric(20,2) + , OUT o_max_node_read_per_sec numeric(20,2) + , OUT o_min_node_read_time numeric(20,3) + , OUT o_max_node_read_time numeric(20,3) + , OUT o_min_node_write_per_sec numeric(20,2) + , OUT o_max_node_write_per_sec numeric(20,2) + , OUT o_min_node_write_time numeric(20,3) + , OUT o_max_node_write_time numeric(20,3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster I/O statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + , min_node_read_per_sec + , max_node_read_per_sec + , min_node_read_time + , max_node_read_time + , min_node_write_per_sec + , max_node_write_per_sec + , min_node_write_time + , max_node_write_time + FROM ( SELECT pmk_curr_collect_start_time + , SUM(number_of_files)::int AS number_of_files + , SUM(physical_reads_delta)::bigint AS physical_reads + , SUM(physical_writes_delta)::bigint AS physical_writes + , SUM(read_time_delta)::bigint AS read_time + , SUM(write_time_delta)::bigint AS write_time + , MIN(node_read_per_sec) AS min_node_read_per_sec + , MAX(node_read_per_sec) AS max_node_read_per_sec + , MIN(node_read_time) AS min_node_read_time + , MAX(node_read_time) AS max_node_read_time + , MIN(node_write_per_sec) AS min_node_write_per_sec + , MAX(node_write_per_sec) AS max_node_write_per_sec + , MIN(node_write_time) AS min_node_write_time + , MAX(node_write_time) AS max_node_write_time + FROM ( SELECT s.pmk_curr_collect_start_time + , node_name + , number_of_files + , physical_reads_delta + , physical_writes_delta + , read_time_delta + , write_time_delta + , ( physical_reads_delta * 1000000.0 / NULLIF(read_time_delta, 0) )::numeric(20,2) AS node_read_per_sec + , ( read_time_delta * 1.0 / NULLIF(physical_reads_delta, 0) )::numeric(20,3) AS node_read_time + , ( physical_writes_delta * 1000000.0 / NULLIF(write_time_delta, 0) )::numeric(20,2) AS node_write_per_sec + , ( write_time_delta * 1.0 / NULLIF(physical_writes_delta, 0) )::numeric(20,3) AS node_write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Disk usage statistics at cluster level +CREATE OR REPLACE FUNCTION pmk.get_cluster_disk_usage_stat + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , IN i_db_size text + , OUT o_stat_collect_time timestamp + , OUT o_tot_datanode_db_size text + , OUT o_max_datanode_db_size text + , OUT o_tot_physical_writes bigint + , OUT o_max_node_physical_writes bigint + , OUT o_max_node_write_per_sec numeric(20,2) + , OUT o_avg_write_per_sec numeric(20,2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +DECLARE l_db_size bigint; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of cluster disk usage statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + IF i_db_size = '0' + THEN + SELECT SUM(pg_database_size(oid))::bigint + INTO l_db_size + FROM pg_database; + ELSE + SELECT SUM(i_db_size)::bigint + INTO l_db_size; + END IF; + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + , disk_stat AS + ( + SELECT s.pmk_curr_collect_start_time + , db_size + , physical_writes_delta + , write_time_delta + , ( physical_writes_delta * 1000000.0 / NULLIF(write_time_delta, 0) )::numeric(20,2) AS node_write_per_sec + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , pg_size_pretty(tot_datanode_db_size) AS tot_datanode_db_size + , pg_size_pretty(max_datanode_db_size) AS max_datanode_db_size + , tot_physical_writes + , max_node_physical_writes + , max_node_write_per_sec + , ( tot_physical_writes * 1000000.0 / NULLIF(tot_write_time, 0) )::numeric(20,2) AS avg_write_per_sec + FROM ( SELECT pmk_curr_collect_start_time + , l_db_size::bigint AS tot_datanode_db_size + , MAX(db_size)::bigint AS max_datanode_db_size + , SUM(physical_writes_delta)::bigint AS tot_physical_writes + , SUM(write_time_delta)::bigint AS tot_write_time + , MAX(physical_writes_delta)::bigint AS max_node_physical_writes + , MAX(node_write_per_sec) AS max_node_write_per_sec + FROM disk_stat + GROUP BY pmk_curr_collect_start_time + ) + ORDER BY pmk_curr_collect_start_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Active SQL count statistics at cluster level +CREATE OR REPLACE FUNCTION pmk.get_cluster_active_sql_count + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_tot_active_sql_count int + , OUT o_avg_active_sql_count numeric(9, 2) + , OUT o_min_active_sql_count int + , OUT o_max_active_sql_count int + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of active SQL count statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , tot_active_sql_count + , avg_active_sql_count + , min_active_sql_count + , max_active_sql_count + FROM ( SELECT s.pmk_curr_collect_start_time + , SUM(active_sql_count)::int AS tot_active_sql_count + , ROUND(AVG(active_sql_count), 2)::numeric(9, 2) AS avg_active_sql_count + , MIN(active_sql_count)::int AS min_active_sql_count + , MAX(active_sql_count)::int AS max_active_sql_count + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + GROUP BY s.pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- Connected session count statistics at cluster level +CREATE OR REPLACE FUNCTION pmk.get_cluster_session_count + ( IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_stat_collect_time timestamp + , OUT o_tot_session_count int + , OUT o_avg_session_count numeric(9, 2) + , OUT o_min_session_count int + , OUT o_max_session_count int + ) +RETURNS SETOF record +AS +$$ +DECLARE l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of session count statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT pmk_curr_collect_start_time::timestamp AS stat_collect_time + , tot_session_count + , avg_session_count + , min_session_count + , max_session_count + FROM ( SELECT s.pmk_curr_collect_start_time + , SUM(session_count)::int AS tot_session_count + , ROUND(AVG(session_count), 2)::numeric(9, 2) AS avg_session_count + , MIN(session_count)::int AS min_session_count + , MAX(session_count)::int AS max_session_count + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + GROUP BY s.pmk_curr_collect_start_time + ) + ORDER BY stat_collect_time; + +END; +$$ +LANGUAGE plpgsql; + +-- CPU statistics at node level +CREATE OR REPLACE FUNCTION pmk.get_node_cpu_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_mppdb_cpu_time bigint + , OUT o_host_cpu_busy_time bigint + , OUT o_host_cpu_total_time bigint + , OUT o_mppdb_cpu_time_perc_wrt_busy_time numeric(5, 2) + , OUT o_mppdb_cpu_time_perc_wrt_total_time numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name || '") provided during generation of node (MPPDB instance) CPU statistics ...'; + + raise notice '%',l_error_message; + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node (MPPDB instance) CPU statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::bigint AS host_cpu_total_time + , ( (LEAST(mppdb_cpu_time,host_cpu_busy_time) * 100.0) / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (LEAST(mppdb_cpu_time,host_total_cpu_time) * 100.0) / NULLIF((host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time), 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , ((busy_time_delta+idle_time_delta+iowait_time_delta)*10)::bigint AS host_total_cpu_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , mppdb_cpu_time + , host_cpu_busy_time + , (host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time)::bigint AS host_cpu_total_time + , ( (mppdb_cpu_time * 100.0) / NULLIF(host_cpu_busy_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_busy_time + , ( (mppdb_cpu_time * 100.0) / NULLIF((host_cpu_busy_time+host_cpu_idle_time+host_cpu_iowait_time), 0) )::numeric(5, 2) AS mppdb_cpu_time_perc_wrt_total_time + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , (busy_time_delta * 10)::bigint AS host_cpu_busy_time + , (idle_time_delta * 10)::bigint AS host_cpu_idle_time + , (iowait_time_delta * 10)::bigint AS host_cpu_iowait_time + , (db_cpu_time_delta * 10)::bigint AS mppdb_cpu_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql; + +-- Memory statistics at node level +CREATE OR REPLACE FUNCTION pmk.get_node_memory_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_physical_memory bigint + , OUT o_db_memory_usage bigint + , OUT o_shared_buffer_size bigint + , OUT o_blocks_read bigint + , OUT o_blocks_hit bigint + , OUT o_shared_buffer_hit_ratio numeric(5, 2) + , OUT o_work_memory_size bigint + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_in_memory_sort_ratio numeric(5, 2) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name || '") provided during generation of node (MPPDB instance) memory statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node (MPPDB instance) memory statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) / NULLIF((sorts_in_disk + sorts_in_memory), 0) )::numeric(5, 2) AS in_memory_sort_ratio + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + ELSE + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read + , blocks_hit + , ( (blocks_hit * 100.0) / NULLIF((blocks_read + blocks_hit), 0) )::numeric(5, 2) AS shared_buffer_hit_ratio + , work_memory_size + , sorts_in_memory + , sorts_in_disk + , ( (sorts_in_memory * 100.0) / NULLIF((sorts_in_disk + sorts_in_memory), 0) )::numeric(5, 2) AS in_memory_sort_ratio + FROM ( SELECT node_name + , node_host + , s.pmk_curr_collect_start_time + , physical_memory + , db_memory_usage + , shared_buffer_size + , blocks_read_delta AS blocks_read + , blocks_hit_delta AS blocks_hit + , work_memory_size + , sorts_in_memory_delta AS sorts_in_memory + , sorts_in_disk_delta AS sorts_in_disk + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql; + +-- I/O statistics at node level +CREATE OR REPLACE FUNCTION pmk.get_node_io_stat + ( IN i_node_name text + , IN i_start_pmk_time timestamp with time zone + , IN i_end_pmk_time timestamp with time zone + , OUT o_node_type char(1) + , OUT o_node_name text + , OUT o_node_host text + , OUT o_stat_collect_time timestamp + , OUT o_number_of_files int + , OUT o_physical_reads bigint + , OUT o_physical_writes bigint + , OUT o_read_time bigint + , OUT o_write_time bigint + , OUT o_avg_read_per_sec numeric(20,2) + , OUT o_avg_read_time numeric(20,3) + , OUT o_avg_write_per_sec numeric(20,2) + , OUT o_avg_write_time numeric(20,3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_type char(1); + l_node_name text; + l_error_message text; +BEGIN + + l_error_message := pmk.check_node_type(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_error_message := pmk.check_pmk_enabled(); + + IF l_error_message IS NOT NULL + THEN + raise notice '%',l_error_message; + RETURN; + END IF; + + l_node_name := UPPER(i_node_name); + + IF l_node_name <> 'ALL' + THEN + l_node_type := 'D'; + + IF l_node_type IS NULL + THEN + l_error_message := 'ERROR:: Invalid node name ("' || i_node_name || '") provided during generation of node (MPPDB instance) I/O statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + END IF; + + -- Verifying the input start and end times + pmk.check_start_end_dates(i_start_pmk_time, i_end_pmk_time, l_error_message); + + IF l_error_message IS NOT NULL + THEN + l_error_message := l_error_message || ' during generation of node (MPPDB instance) I/O statistics ...'; + raise notice '%',l_error_message; + RETURN; + END IF; + + IF l_node_name = 'ALL' + THEN + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + FROM ( SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , s.pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + ) + ORDER BY node_type, node_name, stat_collect_time; + + ELSE + + RETURN QUERY + WITH snap AS + ( SELECT snapshot_id + , current_snapshot_time AS pmk_curr_collect_start_time + FROM pmk.pmk_snapshot + WHERE current_snapshot_time BETWEEN i_start_pmk_time AND i_end_pmk_time + ) + SELECT 'D'::char(1) AS node_type + , node_name + , node_host + , pmk_curr_collect_start_time::timestamp AS stat_collect_time + , number_of_files + , physical_reads + , physical_writes + , read_time + , write_time + , ( physical_reads * 1000000.0 / NULLIF(read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( read_time * 1.0 / NULLIF(physical_reads, 0) )::numeric(20,3) AS avg_read_time + , ( physical_writes * 1000000.0 / NULLIF(write_time, 0) )::numeric(20,2) AS avg_write_per_sec + , ( write_time * 1.0 / NULLIF(physical_writes, 0) )::numeric(20,3) AS avg_write_time + FROM ( SELECT node_name + , node_host + , pmk_curr_collect_start_time + , number_of_files + , physical_reads_delta AS physical_reads + , physical_writes_delta AS physical_writes + , read_time_delta AS read_time + , write_time_delta AS write_time + FROM pmk.pmk_snapshot_datanode_stat dns, snap s + WHERE dns.snapshot_id = s.snapshot_id + AND UPPER(node_name) = l_node_name + ) + ORDER BY node_name, stat_collect_time; + END IF; -- end of l_node_name = 'ALL' + +END; +$$ +LANGUAGE plpgsql; + +-- This function is used to find TOP N sessions, which take more CPU time +-- But this function returns Top N sessions from each node. +-- The outer function (get_session_cpu_stat) return the Top N sessions as the final result. + +CREATE OR REPLACE FUNCTION pmk.get_session_cpu_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_session_cpu_time bigint + , OUT o_mppdb_cpu_time bigint + , OUT o_mppdb_cpu_time_perc numeric(5, 2) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF RECORD +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_time_stat0 AS + ( SELECT sessid, stat_name + , (value/1000.0)::numeric AS stat_value -- converting to millisecond + FROM gs_session_time + WHERE stat_name IN ( ''CPU_TIME'', ''EXECUTION_TIME'') + ) + , sess_time_stat AS + ( SELECT DISTINCT stso.sessid + , (SELECT stsi.stat_value FROM sess_time_stat0 stsi WHERE stsi.sessid = stso.sessid AND stsi.stat_name = ''CPU_TIME'') AS session_cpu_time + , (SELECT stsi.stat_value FROM sess_time_stat0 stsi WHERE stsi.sessid = stso.sessid AND stsi.stat_name = ''EXECUTION_TIME'') AS session_sql_time + FROM sess_time_stat0 stso + ) + , mppdb_cpu_time AS + ( SELECT (total_cpu()*10.0)::bigint AS mppdb_cpu_time -- converting to millisecond + ) + , sess_cpu_stat AS + ( SELECT ''' || i.node_name || '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start)::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start)::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , ROUND(st.session_cpu_time)::bigint AS session_cpu_time + , m.mppdb_cpu_time + , ( (st.session_cpu_time * 100.0) / NULLIF(m.mppdb_cpu_time, 0) )::numeric(5, 2) AS mppdb_cpu_time_perc + , st.sessid + , st.session_sql_time + FROM pg_stat_activity a + , sess_time_stat st + , mppdb_cpu_time m + WHERE a.state IN (''active'', ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.session_cpu_time DESC + , mppdb_cpu_time_perc DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT scs.node_name + , scs.db_name + , scs.user_name + , scs.client_hostname + , scs.session_start_time + , scs.xact_start_time + , scs.waiting + , scs.state + , scs.query + , scs.session_cpu_time + , scs.mppdb_cpu_time + , scs.mppdb_cpu_time_perc + , ( scs.session_sql_time / NULLIF(ss.value, 0) )::numeric(15, 3) AS avg_sql_exec_time + FROM sess_cpu_stat scs + , gs_session_stat ss + WHERE ss.sessid = scs.sessid + AND ss.statname = ''n_sql'''; + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + + + +-- This function is used to find TOP N sessions, which are sorted by physical_reads DESC and sorts_in_memory DESC +-- But this function returns Top N sessions from each node. +-- The outer function (get_session_memory_stat) return the Top N sessions as the final result. + +CREATE OR REPLACE FUNCTION pmk.get_session_memory_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_session_total_memory_size bigint + , OUT o_session_used_memory_size bigint + , OUT o_buffer_hits bigint + , OUT o_disk_reads bigint + , OUT o_session_buffer_hit_ratio numeric(5, 2) + , OUT o_sorts_in_memory bigint + , OUT o_sorts_in_disk bigint + , OUT o_session_memory_sort_ratio numeric(5, 2) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_memory_usage AS + ( SELECT sessid + , SUM(totalsize)::bigint AS totalsize + , SUM(usedsize)::bigint AS usedsize + FROM gs_session_memory_detail + GROUP BY sessid + ) + , sess_stat0 AS + ( SELECT ss.sessid + , ss.statname AS statname + , ss.value AS statvalue + FROM sess_memory_usage st, gs_session_stat ss + WHERE ss.sessid = st.sessid + AND ss.statname IN ( ''n_blocks_fetched'' + , ''n_shared_blocks_read'', ''n_local_blocks_read'' + , ''n_sort_in_disk'', ''n_sort_in_memory'' + , ''n_sql'' ) + ) + , sess_stat1 AS + ( SELECT oss.sessid + , oss.totalsize + , oss.usedsize + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_blocks_fetched'') AS total_reads + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_shared_blocks_read'') AS disk_to_shared_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_local_blocks_read'') AS disk_to_local_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_sort_in_disk'') AS sorts_in_disk + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_sort_in_memory'') AS sorts_in_memory + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = oss.sessid AND iss.statname = ''n_sql'') AS sql_count + FROM sess_memory_usage oss + ) + , sess_stat AS + ( SELECT ss.sessid + , ss.totalsize + , ss.usedsize + , ss.total_reads + , (ss.disk_to_shared_buffer + ss.disk_to_local_buffer) AS disk_reads + , (ss.total_reads - (ss.disk_to_shared_buffer+ss.disk_to_local_buffer)) AS buffer_hits + , sorts_in_disk + , sorts_in_memory + , sql_count + FROM sess_stat1 ss + ) + , sess_memory_stat AS + ( SELECT ''' || i.node_name || '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start)::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start)::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , st.totalsize AS session_total_memory_size + , st.usedsize AS session_used_memory_size + , st.buffer_hits, st.disk_reads + , ( (st.buffer_hits * 100.0) / NULLIF(st.total_reads, 0) )::numeric(5, 2) AS session_buffer_hit_ratio + , st.sorts_in_memory, st.sorts_in_disk + , ( (st.sorts_in_memory * 100.0) / NULLIF(st.sorts_in_memory + st.sorts_in_disk, 0) )::numeric(5, 2) AS session_memory_sort_ratio + , st.sessid + , st.sql_count + FROM pg_stat_activity a + , sess_stat st + WHERE a.state IN (''active'', ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.totalsize DESC + , st.usedsize DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT sms.node_name + , sms.db_name + , sms.user_name + , sms.client_hostname + , sms.session_start_time + , sms.xact_start_time + , sms.waiting + , sms.state + , sms.query + , sms.session_total_memory_size + , sms.session_used_memory_size + , sms.buffer_hits + , sms.disk_reads + , sms.session_buffer_hit_ratio + , sms.sorts_in_memory + , sms.sorts_in_disk + , sms.session_memory_sort_ratio + , ( ss.value / (NULLIF(sms.sql_count, 0) * 1000.0) )::numeric(15, 3) AS avg_sql_exec_time + FROM sess_memory_stat sms + , gs_session_time ss + WHERE ss.sessid = sms.sessid + AND ss.stat_name = ''EXECUTION_TIME'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + + +-- This function is used to find TOP N sessions, which do more physical I/O operations. +-- But this function returns Top N sessions from each node. +-- The outer function (get_session_io_stat) return the Top N sessions as the final result. + +CREATE OR REPLACE FUNCTION pmk.get_session_io_stat + ( IN i_node_name text + , IN i_top_n_sessions smallint + , OUT o_node_name name + , OUT o_db_name name + , OUT o_user_name name + , OUT o_client_hostname text + , OUT o_session_start_time timestamp + , OUT o_xact_start_time timestamp + , OUT o_waiting boolean + , OUT o_state text + , OUT o_query text + , OUT o_disk_reads bigint + , OUT o_read_time bigint + , OUT o_avg_read_per_sec numeric(20, 2) + , OUT o_avg_read_time numeric(20, 3) + , OUT o_avg_sql_exec_time numeric(15, 3) + ) +RETURNS SETOF record +AS +$$ +DECLARE l_node_query text; + l_execute_query text; +BEGIN + + FOR i IN ( SELECT node_name + FROM DBE_PERF.node_name nl + WHERE UPPER(nl.node_name) = COALESCE(NULLIF(UPPER(i_node_name), 'ALL'), UPPER(nl.node_name)) + ) + LOOP + + l_node_query := 'WITH sess_stat0 AS + ( SELECT ss.sessid + , ss.statname AS statname + , ss.value AS statvalue + FROM gs_session_stat ss + WHERE ss.statname IN ( ''n_shared_blocks_read'', ''n_local_blocks_read'' + , ''n_blocks_read_time'', ''n_sql'' ) + ) + , sess_stat1 AS + ( SELECT DISTINCT ss.sessid + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = ss.sessid AND iss.statname = ''n_shared_blocks_read'') AS disk_to_shared_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = ss.sessid AND iss.statname = ''n_local_blocks_read'') AS disk_to_local_buffer + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = ss.sessid AND iss.statname = ''n_blocks_read_time'') AS read_time + , (SELECT statvalue FROM sess_stat0 iss WHERE iss.sessid = ss.sessid AND iss.statname = ''n_sql'') AS sql_count + FROM sess_stat0 ss + ) + , sess_stat AS + ( SELECT ss.sessid + , (ss.disk_to_shared_buffer + ss.disk_to_local_buffer) AS disk_reads + , ss.read_time + , ss.sql_count + FROM sess_stat1 ss + ) + , sess_io_stat AS + ( SELECT ''' || i.node_name || '''::name AS node_name + , a.datname::name AS db_name + , a.usename::name AS user_name + , a.client_hostname + , date_trunc(''second'', a.backend_start)::timestamp AS session_start_time + , date_trunc(''second'', a.xact_start)::timestamp AS xact_start_time + , a.waiting + , a.state, a.query + , st.disk_reads + , st.read_time + , ( st.disk_reads * 1000000.0 / NULLIF(st.read_time, 0) )::numeric(20,2) AS avg_read_per_sec + , ( st.read_time * 1.0 / NULLIF(st.disk_reads, 0) )::numeric(20,3) AS avg_read_time + , st.sessid + , st.sql_count + FROM pg_stat_activity a + , sess_stat st + WHERE a.state IN (''active'', ''fastpath function call'', ''retrying'') + AND a.pid = sessionid2pid(st.sessid::cstring) + ORDER BY st.disk_reads DESC + , st.read_time DESC + LIMIT ' || i_top_n_sessions || ' + ) + SELECT sios.node_name + , sios.db_name + , sios.user_name + , sios.client_hostname + , sios.session_start_time + , sios.xact_start_time + , sios.waiting + , sios.state + , sios.query + , sios.disk_reads + , sios.read_time + , sios.avg_read_per_sec + , sios.avg_read_time + , ( ss.value / (NULLIF(sios.sql_count, 0) * 1000.0) )::numeric(15, 3) AS avg_sql_exec_time + FROM sess_io_stat sios + , gs_session_time ss + WHERE ss.sessid = sios.sessid + AND ss.stat_name = ''EXECUTION_TIME'''; + + + RETURN QUERY + EXECUTE l_node_query; + + END LOOP; + +END; +$$ +LANGUAGE plpgsql; + + +-- if config_value = -1, it is considered as infinite. + +CREATE OR REPLACE FUNCTION pmk.insertBaseValue() +RETURNS TEXT +AS +$$ +DECLARE l_configuration_count_value INT; + l_meta_data_count_value INT; + l_version_string varchar(128); + l_result varchar(128); +BEGIN + SELECT count(config_param_name) + INTO l_configuration_count_value + FROM pmk.pmk_configuration + WHERE config_param_name IN ('Collection Count', 'Enable PMK'); + + IF l_configuration_count_value != 2 + THEN + DELETE FROM pmk.pmk_configuration; + INSERT INTO pmk.pmk_configuration(config_param_name, config_value) VALUES ('Collection Count', '9'), ('Enable PMK', 'TRUE'); + END IF; + + SELECT count(pmk_version) + INTO l_meta_data_count_value + FROM pmk.pmk_meta_data; + + SELECT substring(version() from '[a-zA-Z0-9 ]* [0-9]+\.[0-9]+\.[0-9]+') INTO l_version_string; + l_result := l_version_string; + + IF l_meta_data_count_value < 1 + THEN + INSERT INTO pmk.pmk_meta_data (pmk_version, last_snapshot_id, last_snapshot_collect_time) VALUES (l_result, NULL, NULL); + END IF; + + RETURN NULL; +END; +$$ +LANGUAGE plpgsql; + +SELECT pmk.insertBaseValue(); + +-- It ends the transaction started in the begining of PMK installation +COMMIT; + +analyze pmk.pmk_configuration; +analyze pmk.pmk_snapshot; +analyze pmk.pmk_snapshot_datanode_stat; +analyze pmk.pmk_meta_data; \ No newline at end of file diff --git a/script/gspylib/etc/sql/test_data_node.sql b/script/gspylib/etc/sql/test_data_node.sql new file mode 100644 index 0000000..1ffa90b --- /dev/null +++ b/script/gspylib/etc/sql/test_data_node.sql @@ -0,0 +1,2 @@ +--test the data node +SELECT * FROM pgxc_node WHERE node_type = 'D'; diff --git a/script/gspylib/etc/sql/test_pmk.sql b/script/gspylib/etc/sql/test_pmk.sql new file mode 100644 index 0000000..d3d5166 --- /dev/null +++ b/script/gspylib/etc/sql/test_pmk.sql @@ -0,0 +1,17 @@ +-- +--test the pmk schema +-- +DECLARE + pmk_oid oid; + class_count int; + proc_count int; +BEGIN + --if pmk schema not exist, it will raise an error. + SELECT oid FROM pg_namespace WHERE nspname='pmk' INTO pmk_oid; + --select the count of class_count + SELECT COUNT(*) FROM pg_class WHERE relnamespace=pmk_oid INTO class_count; + --select the count of proc_count + SELECT COUNT(*) FROM pg_proc WHERE pronamespace=pmk_oid INTO proc_count; + RAISE INFO 'pmk schema exist. class count is %, proc count is %.', class_count , proc_count; +END; +/ \ No newline at end of file diff --git a/script/gspylib/etc/sql/unlock_cluster.sql b/script/gspylib/etc/sql/unlock_cluster.sql new file mode 100644 index 0000000..ecc909f --- /dev/null +++ b/script/gspylib/etc/sql/unlock_cluster.sql @@ -0,0 +1,33 @@ +-- +--unlock the cluster +--The query content must be the same as the values of LOCK_CLUSTER_SQL and WAITLOCK_CLUSTER_SQL in the local/LocalQuery.py file. +--The value must be the same. +-- +DECLARE + result BOOL; +--begin unlock the cluster sql +BEGIN + FOR i in (select * from pg_stat_activity where query like 'select case (select pgxc_lock_for_backup()) when true then (select pg_sleep(%)::text) end;' or query like 'select case (select count(*) from pg_advisory_lock(65535,65535)) when true then (select pg_sleep(%)::text) end;') + LOOP + --set info datid datname pid + RAISE INFO 'datid: %, datname: %, pid: %', i.datid, i.datname, i.pid; + --set info usesysid usename application_name + RAISE INFO 'usesysid: %, usename: %, application_name: %', i.usesysid, i.usename, i.application_name; + --set info client_addr client_hostname client_port + RAISE INFO 'client_addr: %, client_hostname: %, client_port: %', i.client_addr, i.client_hostname, i.client_port; + --set info backend_start xact_start + RAISE INFO 'backend_start: %, xact_start: %', i.backend_start, i.xact_start; + --set info query_start state_change + RAISE INFO 'query_start: %, state_change: %', i.query_start, i.state_change; + --set info waiting state + RAISE INFO 'waiting: %, state: %', i.waiting, i.state; + --set info query + RAISE INFO 'query: %', i.query; + --set result false + result := false; + --SELECT pg_cancel_backend + SET xc_maintenance_mode = on; SELECT pg_cancel_backend(i.pid) INTO result; RESET xc_maintenance_mode; + RAISE INFO 'cancel command result: %', result; + END LOOP; +END; +/ \ No newline at end of file diff --git a/script/gspylib/hardware/__init__.py b/script/gspylib/hardware/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/hardware/gscpu.py b/script/gspylib/hardware/gscpu.py new file mode 100644 index 0000000..09255d6 --- /dev/null +++ b/script/gspylib/hardware/gscpu.py @@ -0,0 +1,86 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : cpu.py is a utility to do something for cpu information. +############################################################################# +import os +import subprocess +import sys +import multiprocessing + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode + +""" +Requirements: +1. getCpuNum() -> get real cpu number. +2. getCpuOnlineOfflineInfo(isOnlineCpu) -> get cpu online/offline information +""" + + +class CpuInfo(object): + """ + function: Init the CpuInfo options + """ + + def __init__(self): + """ + function: Init the CpuInfo options + """ + + @staticmethod + def getCpuNum(): + """ + function : get cpu set of current board + input : null + output : total CPU count + """ + total = 0 + try: + total = multiprocessing.cpu_count() + except Exception as e: + raise Exception(ErrorCode.GAUSS_523["GAUSS_52301"] + str(e)) + return total + + @staticmethod + def getCpuOnlineOfflineInfo(isOnlineCpu=True): + """ + cat /sys/devices/system/cpu/online or /sys/devices/system/cpu/offline + """ + onlineFileName = "/sys/devices/system/cpu/online" + offlineFileName = "/sys/devices/system/cpu/offline" + + if (isOnlineCpu): + fileName = onlineFileName + else: + fileName = offlineFileName + + if (not os.path.exists(fileName)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % fileName) + if (not os.path.isfile(fileName)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % fileName) + + cmd = "cat '%s' 2>/dev/null" % fileName + status, output = subprocess.getstatusoutput(cmd) + if (status == 0): + return output + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % str(output)) + + +g_cpu = CpuInfo() diff --git a/script/gspylib/hardware/gsdisk.py b/script/gspylib/hardware/gsdisk.py new file mode 100644 index 0000000..a23455e --- /dev/null +++ b/script/gspylib/hardware/gsdisk.py @@ -0,0 +1,298 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : disk.py is a utility to do something for disk. +############################################################################# +import os +import subprocess +import sys +import psutil +import math + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsplatform import g_Platform + +""" +Requirements: +1. getUsageSize(directory) -> get directory or file real size. Unit is byte. +2. getMountPathByDataDir(directory) -> get the input directory of the mount +disk +3. getMountPathAvailSize(directory) -> get the avail size about the input +directory of the mount disk. Unit MB +4. getDiskSpaceUsage(directory) -> get directory or file space size. Unit is +byte. +5. getDiskInodeUsage(directory) -> get directory or file inode uage. Unit is +byte. +6. getDiskMountType(directory) -> get the type about the input directory of +the mount disk. +7. getDiskReadWritespeed(inputFile, outputFile, bs, count, iflag = '', +oflag = '') -> get disk read/write speed +""" + + +class diskInfo(): + """ + function: Init the DiskUsage options + """ + + def __init__(self): + self.mtabFile = g_Platform.getMtablFile() + + def getMountInfo(self, allInfo=False): + """ + get mount disk information: device mountpoint fstype opts + input: bool (physical devices and all others) + output: list + """ + return psutil.disk_partitions(allInfo) + + def getUsageSize(self, directory): + """ + get directory or file real size. Unit is byte + """ + cmd = "" + try: + cmd = "%s -l -R %s | %s ^- | %s '{t+=$5;} END {print t}'" % ( + g_Platform.getListCmd(), directory, g_Platform.getGrepCmd(), + g_Platform.getAwkCmd()) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + return output.split('\t')[0].strip() + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % str(output)) + except Exception as e: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + + # Mtab always keeps the partition information already mounted in the + # current system. + # For programs like fdisk and df, + # you must read the mtab file to get the partition mounting status in + # the current system. + def getMountPathByDataDir(self, datadir): + """ + function : Get the disk by the file path + input : datadir the file path + output : device disk + """ + device = "" + mountDisk = {} + if not os.path.exists(datadir): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50228"] % datadir) + try: + datadir = os.path.realpath(datadir) + with open(self.mtabFile, "r") as fp: + for line in fp.readlines(): + if line.startswith('none'): + continue + i_fields = line.split() + if len(i_fields) < 3: + continue + i_device = i_fields[0].strip() + i_mountpoint = i_fields[1].strip() + mountDisk[i_mountpoint] = [i_device, i_mountpoint] + + mountList = mountDisk.keys() + sortedMountList = sorted(mountList, reverse=True) + for mount in sortedMountList: + i_mountpoint = mountDisk[mount][1] + if (i_mountpoint == '/'): + i_mount_dirlst = [''] + else: + i_mount_dirlst = i_mountpoint.split('/') + data_dirlst = datadir.split('/') + if len(i_mount_dirlst) > len(data_dirlst): + continue + if (i_mount_dirlst == data_dirlst[:len(i_mount_dirlst)]): + device = mountDisk[mount][0] + break + + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] + + " disk mount." + "Error: %s" % str(e)) + return device + + # Mtab always keeps the partition information already mounted in the + # current system. + # For programs like fdisk and df, + # you must read the mtab file to get the partition mounting status in + # the current system. + def getMountPathAvailSize(self, device, sizeUnit='MB'): + """ + function : Get the disk size by the file path + input : device the file path + : sizeUnit byte, GB, MB, KB + output : total disk size + """ + if (not os.path.exists(device)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50228"] % device) + try: + dev_info = os.statvfs(device) + if (sizeUnit == 'GB'): + total = dev_info.f_bavail * dev_info.f_frsize // ( + 1024 * 1024 * 1024) + elif (sizeUnit == 'MB'): + total = dev_info.f_bavail * dev_info.f_frsize // (1024 * 1024) + elif (sizeUnit == 'KB'): + total = dev_info.f_bavail * dev_info.f_frsize // 1024 + else: + total = dev_info.f_bavail * dev_info.f_frsize + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] + " disk size." + + "Error: %s" % str(e)) + return total + + # Mtab always keeps the partition information already mounted in the + # current system. + # For programs like fdisk and df, + # you must read the mtab file to get the partition mounting status in + # the current system. + def getDiskSpaceUsage(self, path): + """ + function : Get the disk usage by the file path + method of calculation: + Total capacity (KB)=f_bsize*f_blocks/1024 [1k-blocks] + Usage (KB)= f_bsize*(f_blocks-f_bfree)/1024 [Used] + Valid capacity (KB) = f_bsize*f_bavail/1024 [Available] + Usage (%) = Usage/(Usage + Valid capacity) *100 [Use%] + input : path the file path + output : percent + """ + percent = 0 + if (not os.path.exists(path)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50228"] % path) + try: + dev_info = os.statvfs(path) + used = dev_info.f_blocks - dev_info.f_bfree + valueable = dev_info.f_bavail + used + percent = math.ceil((float(used) / valueable) * 100) + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] + " disk space." + + "Error: %s" % str(e)) + return float(percent) + + def getDiskSpaceForShrink(self, path, delta): + """ + function : Get the disk usage by the file path for Shrink + input : path the file path and deltasize + output : percent + """ + percent = 0 + if (not os.path.exists(path)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50228"] % path) + try: + dev_info = os.statvfs(path) + used = (dev_info.f_blocks - dev_info.f_bfree) * dev_info.f_bsize + valueable = dev_info.f_bavail * dev_info.f_bsize + used + delta + percent = math.ceil((float(used) // valueable) * 100) + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] + " disk space." + + "Error: %s" % str(e)) + return float(percent) + + def getDiskInodeUsage(self, Path): + """ + function : Get the inode by the file path + input : Path the file path + output : percent + """ + percent = 0 + if (not os.path.exists(Path)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50228"] % Path) + try: + dev_info = os.statvfs(Path) + used = dev_info.f_files - dev_info.f_ffree + valueable = dev_info.f_favail + used + percent = math.ceil((float(used) // valueable) * 100) + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] + " disk Inode." + + "Error: %s" % str(e)) + return float(percent) + + def getDiskMountType(self, device): + """ + function : Get the mount type by device + input : device eg:/dev/pts + output : fstype device type + """ + fstype = "" + try: + + with open(self.mtabFile, "r") as fp: + for line in fp.readlines(): + if line.startswith('#'): + continue + i_fields = line.split() + if len(i_fields) < 3: + continue + i_device = i_fields[0].strip() + i_fstype = i_fields[2].strip() + if i_device == device: + fstype = i_fstype + break + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] + + " disk mount type." + "Error: %s" % str(e)) + return fstype + + def getDiskReadWritespeed(self, inputFile, outputFile, bs, count, iflag='', + oflag=''): + """ + function : Get the disk read or write rate + input : inputFile + : outputFile + : bs + : count + : iflag + : oflag + output : speed + """ + try: + cmd = "%s if=%s of=%s bs=%s count=%s " % ( + g_Platform.getDdCmd(), inputFile, outputFile, bs, count) + if iflag: + cmd += "iflag=%s " % iflag + if oflag: + cmd += "oflag=%s " % oflag + + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + output = output.split("\n") + resultInfolist = output[2].strip().split(",") + if ((resultInfolist[2]).split()[1] == "KB/s"): + speed = float((resultInfolist[2]).split()[0]) * 1024 + elif ((resultInfolist[2]).split()[1] == "MB/s"): + speed = float((resultInfolist[2]).split()[0]) * 1024 * 1024 + elif ((resultInfolist[2]).split()[1] == "GB/s"): + speed = float( + (resultInfolist[2]).split()[0]) * 1024 * 1024 * 1024 + elif ((resultInfolist[2]).split()[1] == "TB/s"): + speed = float((resultInfolist[2]).split()[ + 0]) * 1024 * 1024 * 1024 * 1024 + else: + speed = float((resultInfolist[2]).split()[0]) + return speed + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % str(output)) + except Exception as e: + raise Exception( + ErrorCode.GAUSS_504["GAUSS_50406"] + "Error:\n%s" % str(e)) + + +g_disk = diskInfo() diff --git a/script/gspylib/hardware/gsmemory.py b/script/gspylib/hardware/gsmemory.py new file mode 100644 index 0000000..07f8974 --- /dev/null +++ b/script/gspylib/hardware/gsmemory.py @@ -0,0 +1,101 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : memory.py is a utility to do something for memory information. +############################################################################# +import sys +import psutil + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode + +""" +Requirements: +get memory and swap size +""" + + +class memoryInfo(object): + """ + function: Init the MemInfo options + """ + + def __init__(self): + """ + function: Init the MemInfo options + """ + + @staticmethod + def getMemUsedSize(): + """ + get used memory size + """ + return psutil.virtual_memory().used + + @staticmethod + def getMemFreeSize(): + """ + get free memory size + """ + return psutil.virtual_memory().free + + @staticmethod + def getSwapUsedSize(): + """ + get used swap size + """ + return psutil.swap_memory().used + + @staticmethod + def getSwapFreeSize(): + """ + get free swap size + """ + return psutil.swap_memory().free + + @staticmethod + def getSwapTotalSize(): + """ + function : Get swap memory total size + input : null + output : total memory size (byte) + """ + total = 0 + try: + total = psutil.swap_memory().total + except Exception as e: + raise Exception(ErrorCode.GAUSS_505["GAUSS_50502"] + + "Error: %s" % str(e)) + return total + + @staticmethod + def getMemTotalSize(): + """ + function : Get system virtual memory total size + input : null + output : total virtual memory(byte) + """ + total = 0 + try: + total = psutil.virtual_memory().total + except Exception as e: + raise Exception(ErrorCode.GAUSS_505["GAUSS_50502"] + + "Error: %s" % str(e)) + return total + + +g_memory = memoryInfo() diff --git a/script/gspylib/inspection/__init__.py b/script/gspylib/inspection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/inspection/common/CheckItem.py b/script/gspylib/inspection/common/CheckItem.py new file mode 100644 index 0000000..94a862f --- /dev/null +++ b/script/gspylib/inspection/common/CheckItem.py @@ -0,0 +1,472 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +from gspylib.inspection.common import SharedFuncs + +import json +import imp +import types +from abc import abstractmethod +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.inspection.common.Log import LoggerFactory +from gspylib.inspection.common.CheckResult import LocalItemResult, \ + ResultStatus +from gspylib.inspection.common.Exception import CheckNAException + + +def defaultAnalysis(self, itemResult): + # Get the item result information + itemResult.standard = self.standard + itemResult.suggestion = self.suggestion + itemResult.category = self.category + itemResult.title = self.title + + errors = [] + ngs = [] + warnings = [] + vals = {} + for i in itemResult.getLocalItems(): + if (i.rst == ResultStatus.OK or i.rst == ResultStatus.NA): + if (i.val): + vals[i.host] = i.val + continue + elif (i.rst == ResultStatus.ERROR): + errors.append("%s : %s" % (i.host, i.val)) + elif (i.rst == ResultStatus.WARNING): + warnings.append("%s : %s" % (i.host, i.val)) + else: + ngs.append("%s : %s" % (i.host, i.val)) + # Analysis results + if (len(ngs) > 0 or len(errors) > 0 or len(warnings) > 0): + rst = ResultStatus.WARNING + if len(errors) > 0: + rst = ResultStatus.ERROR + elif len(ngs) > 0: + rst = ResultStatus.NG + itemResult.rst = rst + itemResult.analysis = "\n".join(ngs + errors + warnings) + else: + itemResult.rst = ResultStatus.OK + itemResult.analysis = "" + + analysisStrList = [] + nas, oks, ngs, warnings, errors = classifyItemResult(itemResult) + total = len(oks) + len(ngs) + len(warnings) + len(errors) + + rst = ResultStatus.OK + okMsg, okAnalysisList = countItemResult(oks) + warningMsg, warningAnalysisList = countItemResult(warnings) + failedMsg, failedAnalysisList = countItemResult(ngs) + errorMsg, errorAnalysisList = countItemResult(errors) + if (len(warnings) > 0): + rst = ResultStatus.WARNING + if (len(ngs) > 0): + rst = ResultStatus.NG + if (len(errors) > 0): + rst = ResultStatus.ERROR + countMsg = "The item run on %s nodes. %s%s%s%s" % ( + total, okMsg, warningMsg, failedMsg, errorMsg) + analysisStrList.append(countMsg) + if (errorAnalysisList): + analysisStrList.extend(errorAnalysisList) + if (failedAnalysisList): + analysisStrList.extend(failedAnalysisList) + if (warningAnalysisList): + analysisStrList.extend(warningAnalysisList) + if (itemResult.name == 'CheckSysTable'): + value = [vals[key] for key in sorted(vals.keys())] + analysisStrList.extend(value) + itemResult.rst = rst + itemResult.analysis = "\n".join(analysisStrList) + return itemResult + + +def consistentAnalysis(self, itemResult): + # check the rst in each node and make sure the var is consistence + itemResult.standard = self.standard + itemResult.suggestion = self.suggestion + itemResult.category = self.category + itemResult.title = self.title + + analysisStrList = [] + nas, oks, ngs, warnings, errors = classifyItemResult(itemResult) + total = len(oks) + len(ngs) + len(warnings) + len(errors) + + # The item run on %s nodes. success: %s warning: %s ng:%s error:% + rst = ResultStatus.OK + if (len(oks) == total): + okMsg, okAnalysisList = countItemResult(oks, True) + else: + okMsg, okAnalysisList = countItemResult(oks) + warningMsg, warningAnalysisList = countItemResult(warnings) + failedMsg, failedAnalysisList = countItemResult(ngs) + errorMsg, errorAnalysisList = countItemResult(errors) + if (len(okAnalysisList) > 0): + okMsg += " (consistent) " if ( + len(okAnalysisList) == 1) else " (not consistent) " + + if (len(warnings) > 0 and rst == ResultStatus.OK): + rst = ResultStatus.WARNING + if (len(okAnalysisList) > 1): + rst = ResultStatus.NG + if (itemResult.name in ["CheckDiskConfig", "CheckCpuCount", + "CheckMemInfo", "CheckStack", + "CheckKernelVer"]): + rst = ResultStatus.WARNING + if (len(ngs) > 0): + rst = ResultStatus.NG + if (len(errors) > 0): + rst = ResultStatus.ERROR + + countMsg = "The item run on %s nodes. %s%s%s%s" % ( + total, okMsg, warningMsg, failedMsg, errorMsg) + analysisStrList.append(countMsg) + if (errorAnalysisList): + analysisStrList.extend(errorAnalysisList) + if (failedAnalysisList): + analysisStrList.extend(failedAnalysisList) + if (warningAnalysisList): + analysisStrList.extend(warningAnalysisList) + if (okAnalysisList): + analysisStrList.extend(okAnalysisList) + itemResult.rst = rst + itemResult.analysis = "\n".join(analysisStrList) + return itemResult + + +def getValsItems(vals): + """ + + :param vals: + :return: + """ + ret = {} + for i_key, i_val in list(vals.items()): + try: + i_val = eval(i_val) + except Exception: + i_val = i_val + if isinstance(i_val, dict): + for j_key, j_val in list(i_val.items()): + ret[j_key] = j_val + + return ret + + +def getCheckType(category): + ''' + function : get check type + input : category + output : 1,2,3 + ''' + if not category: + return 0 + if category == "cluster": + return 1 + elif category == "database": + return 3 + else: + return 2 + + +def classifyItemResult(itemResult): + nas = [] + oks = [] + ngs = [] + wns = [] + ers = [] + # Summary results + for i in itemResult.getLocalItems(): + if (i.rst == ResultStatus.OK): + oks.append(i) + if (i.rst == ResultStatus.NA): + nas.append(i) + if (i.rst == ResultStatus.NG): + ngs.append(i) + if (i.rst == ResultStatus.WARNING): + wns.append(i) + if (i.rst == ResultStatus.ERROR): + ers.append(i) + return (nas, oks, ngs, wns, ers) + + +def countItemResult(itemList, allNode=False): + if (itemList is None or len(itemList) == 0): + return ("", []) + first = itemList[0] + msgTitle = "default" + if (first.rst == ResultStatus.WARNING): + msgTitle = "warning" + if (first.rst == ResultStatus.NG): + msgTitle = "ng" + if (first.rst == ResultStatus.ERROR): + msgTitle = "error" + if (first.rst == ResultStatus.OK): + msgTitle = "success" + countMsg = " %s: %s " % (msgTitle, len(itemList)) + + defaultHosts = [first.host] + diffs = [] + for i in itemList[1:]: + if i.val == first.val: + defaultHosts.append(i.host) + continue + else: + diffs.append("The different[%s] value:\n%s" % (i.host, i.val)) + if (allNode): + analysisStrList = [ + "The %s on all nodes value:\n%s" % (msgTitle, first.val)] + else: + analysisStrList = ["The %s%s value:\n%s" % ( + msgTitle, '[' + ",".join(defaultHosts) + ']', first.val)] + if (len(diffs) > 0): + analysisStrList.extend(diffs) + return (countMsg, analysisStrList) + + +class BaseItem(object): + ''' + base class of check item + ''' + + def __init__(self, name): + ''' + Constructor + ''' + self.name = name + self.title = None + self.set = False + self.log = None + self.suggestion = None + self.standard = None + self.threshold = {} + self.category = 'other' + self.permission = 'user' + self.analysis = 'default' + self.scope = 'all' + self.cluster = None + self.port = None + self.user = None + self.nodes = None + self.mpprcFile = None + self.thresholdDn = None + self.context = None + self.tmpPath = None + self.outPath = None + self.host = DefaultValue.GetHostIpOrName() + self.result = LocalItemResult(name, self.host) + self.routing = None + self.skipSetItem = [] + self.ipAddr = None + # self cluster name not only lc + self.LCName = None + self.ShrinkNodes = None + + @abstractmethod + def preCheck(self): + ''' + abstract precheck for check item + ''' + pass + + @abstractmethod + def doCheck(self): + ''' + check script for each item + ''' + pass + + @abstractmethod + def postAnalysis(self, itemResult, category="", name=""): + ''' + analysis the item result got from each node + ''' + pass + + def initFrom(self, context): + ''' + initialize the check item from context + ''' + item = next(i for i in context.items if i['name'] == self.name) + if item: + self.title = self.__getLocaleAttr(item, 'title') + self.suggestion = self.__getLocaleAttr(item, 'suggestion') + self.standard = self.__getLocaleAttr(item, 'standard') + if (item.__contains__('threshold')): + self.category = item['category'] + if (item.__contains__('threshold')): + self.threshold = item['threshold'] + # set pre check method + self.setScope(item['scope']) + # set post analysis method + self.setAnalysis(item['analysis']) + + self.context = context + self.cluster = context.cluster + self.user = context.user + self.nodes = context.nodes + self.mpprcFile = context.mpprc + self.result.checkID = context.checkID + self.result.user = context.user + self.tmpPath = context.tmpPath + self.outPath = context.outPath + self.set = context.set + self.log = context.log + self.routing = context.routing + self.skipSetItem = context.skipSetItem + self.__getLocalIP(context.nodes) + self.LCName = context.LCName + self.ShrinkNodes = context.ShrinkNodes + if not context.thresholdDn: + self.thresholdDn = 90 + else: + self.thresholdDn = context.thresholdDn + # new host without cluster installed + if (not self.user): + self.host = DefaultValue.GetHostIpOrName() + self.result.host = DefaultValue.GetHostIpOrName() + + def __getLocalIP(self, nodeList): + for node in nodeList: + if (SharedFuncs.is_local_node(node) and SharedFuncs.validate_ipv4( + node)): + self.ipAddr = node + return + + def __getLocaleAttr(self, obj, attr, language='zh'): + ''' + get attribute value for different language + ''' + locAttr = str(attr) + '_' + language + if (not obj.__contains__(locAttr) or obj[locAttr] == ""): + return obj[str(attr) + '_' + 'zh'] + else: + return obj[locAttr] + + def setScope(self, scope): + # Choose execution node + self.scope = scope + # cn node to perform the check + if (scope == 'cn'): + self.preCheck = self.__cnPreCheck(self.preCheck) + # Local implementation of the inspection + elif (scope == 'local'): + self.preCheck = self.__localPreCheck(self.preCheck) + + def setAnalysis(self, analysis): + # Analyze the test results + self.analysis = analysis + # Consistency analysis for ap + if (analysis == 'consistent'): + self.postAnalysis = types.MethodType(consistentAnalysis, self) + # Default analysis for ap + elif (analysis == 'default'): + self.postAnalysis = types.MethodType(defaultAnalysis, self) + + def runCheck(self, context, g_logger): + ''' + main process for checking + ''' + try: + g_logger.debug("Start to run %s" % self.name) + # initialization + self.initFrom(context) + self.preCheck() + # Perform the inspection + self.doCheck() + if (self.set and ( + self.result.rst == ResultStatus.NG + or self.result.rst == ResultStatus.WARNING) + and self.name not in self.skipSetItem): + self.doSet() + self.doCheck() + g_logger.debug("Finish to run %s" % self.name) + except CheckNAException: + self.result.rst = ResultStatus.NA + # An internal error occurred while executing code + except Exception as e: + self.result.rst = ResultStatus.ERROR + self.result.val = str(e) + g_logger.debug( + "Exception occur when running %s:\n%s" % (self.name, str(e))) + finally: + # output result + self.result.output(context.tmpPath) + + def __cnPreCheck(self, func): + # cn Pre-check node + def wrapper(): + if (not hasattr(self, 'cluster')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53030"] + % "cluster attribute") + if (not hasattr(self, 'host')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53030"] + % "host attribute") + if (not self.cluster): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53031"]) + dbNode = self.cluster.getDbNodeByName(self.host) + # The specified node does not exist or is empty + if (dbNode is None or dbNode == ""): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "The dbNode") + if self.cluster.isSingleInstCluster(): + masterDn = SharedFuncs.getMasterDnNum(self.user, + self.mpprcFile) + if len(dbNode.datanodes) < 1 or dbNode.datanodes[ + 0].instanceId not in masterDn: + raise CheckNAException( + "The node does not contains materDn instance") + self.port = dbNode.datanodes[0].port + else: + # The specified CN node does not exist + if (len(dbNode.coordinators) == 0): + raise CheckNAException( + "The node does not contains cn instance") + # get cn port + self.port = dbNode.coordinators[0].port + self.cntype = dbNode.coordinators[0].instanceType + return func() + + return wrapper + + def __localPreCheck(self, func): + def wrapper(): + return func() + + return wrapper + + +class CheckItemFactory(object): + @staticmethod + def createItem(name, path, scope='all', analysis='default'): + mod = imp.load_source(name, path) + clazz = getattr(mod, name) + checker = clazz() + # set pre check method + checker.setScope(scope) + # set post analysis method + checker.setAnalysis(analysis) + return checker + + @staticmethod + def createFrom(name, path, context): + mod = imp.load_source(name, path) + clazz = getattr(mod, name) + checker = clazz() + checker.initFrom(context) + return checker diff --git a/script/gspylib/inspection/common/CheckResult.py b/script/gspylib/inspection/common/CheckResult.py new file mode 100644 index 0000000..87281e5 --- /dev/null +++ b/script/gspylib/inspection/common/CheckResult.py @@ -0,0 +1,252 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# + +import os +import sys +import json +import time +import pwd +from gspylib.inspection.common import SharedFuncs +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common.Log import LoggerFactory + +class GsCheckEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, bytes): + return str(obj, encoding='utf-8') + return json.JSONEncoder.default(self, obj) + + +class ResultStatus(object): + OK = "OK" + NA = "NA" + WARNING = "WARNING" + NG = "NG" + ERROR = "ERROR" + + +class LocalItemResult(object): + ''' + the check result running on one host + ''' + + def __init__(self, name, host): + self.name = name + self.host = host + self.raw = "" + self.rst = ResultStatus.NA + self.val = "" + self.checkID = None + self.user = None + + def output(self, outPath): + u""" +[HOST] {host} +[NAM] {name} +[RST] {rst} +[VAL] +{val} +[RAW] +{raw} + """ + + val = self.val if self.val else "" + raw = self.raw if self.raw else "" + try: + content = self.output.__doc__.format(name=self.name, rst=self.rst, + host=self.host, val=val, + raw=raw) + except Exception: + content = self.output.__doc__.encode('utf-8').format( + name=self.name, rst=self.rst, host=self.host, val=val, + raw=raw).decode('utf-8', 'ignore') + fileName = "%s_%s_%s.out" % (self.name, self.host, self.checkID) + # output the result to local path + SharedFuncs.writeFile(fileName, content, outPath, + DefaultValue.KEY_FILE_MODE, self.user) + + +class ItemResult(object): + def __init__(self, name): + self.name = name + self._items = [] + self.rst = ResultStatus.NA + self.standard = "" + self.suggestion = "" + self.category = 'other' + self.analysis = "" + + def __iter__(self): + return iter(self._items) + + def __getitem__(self, idx): + return self._items[idx] + + def append(self, val): + self._items.append(val) + + def formatOutput(self, detail=False): + result = u"{name:.<25}...............{rst:.>6}".format(name=self.name, + rst=self.rst) + result += u"\r\n%s\r\n" % self.analysis + return result + + def getLocalItems(self): + return self._items + + @staticmethod + def parse(output): + itemResult = None + localItemResult = None + host = None + idx = 0 + for line in output.splitlines(): + idx += 1 + if (idx == len( + output.splitlines()) and localItemResult is not None): + itemResult.append(localItemResult) + current = line.strip() + if (not current): + continue + if (current.startswith('[HOST]')): + host = current.split()[1].strip() + if (current.startswith('[NAM]')): + name = current.split()[1].strip() + if (itemResult is None): + itemResult = ItemResult(name) + if (localItemResult is not None): + itemResult.append(localItemResult) + localItemResult = LocalItemResult(current.split()[1].strip(), + host) + if (current.startswith('[RST]')): + localItemResult.rst = current.split()[1].strip() + if (current.startswith('[VAL]')): + localItemResult.val = ItemResult.__parseMultiLine( + output.splitlines()[idx:]) + if (current.startswith('[RAW]')): + localItemResult.raw = ItemResult.__parseMultiLine( + output.splitlines()[idx:]) + return itemResult + + @staticmethod + def __parseMultiLine(lines): + vals = [] + starter = ('[HOST]', '[NAM]', '[RST]', '[VAL]', '[RAW]') + for line in lines: + current = line.strip() + if (current.startswith(starter)): + break + else: + vals.append(current) + return "\n".join(vals) + + +class CheckResult(object): + def __init__(self): + self._items = [] + + def __iter__(self): + return iter(self._items) + + def __getitem__(self, idx): + return self._items[idx] + + def append(self, val): + self._items.append(val) + + def outputStatistic(self): + ok = 0 + warning = 0 + ng = 0 + error = 0 + for i in self._items: + if (i.rst == ResultStatus.ERROR): + error += 1 + elif (i.rst == ResultStatus.NG): + ng += 1 + elif (i.rst == ResultStatus.WARNING): + warning += 1 + else: + ok += 1 + okMsg = " Success:%s " % ok if ok > 0 else "" + warningMsg = " Warning:%s " % warning if warning > 0 else "" + ngMsg = " NG:%s " % ng if ng > 0 else "" + errorMsg = " Error:%s " % error if error > 0 else "" + result = "" + result += "Failed." if (ng + error) > 0 else "Success." + result += "\tAll check items run completed. Total:%s %s %s %s %s" % ( + ok + warning + ng + error, okMsg, warningMsg, ngMsg, errorMsg) + return result + + def outputRaw(self): + u""" +{date} [NAM] {name} +{date} [STD] {standard} +{date} [RST] {rst} +{val} +{date} [RAW] +{raw} + """ + + result = "" + for i in self._items: + for j in i._items: + t = time.localtime(time.time()) + dateString = time.strftime("%Y-%m-%d %H:%M:%S", t) + rst = j.rst + if (j.rst == ResultStatus.NA): + rst = "NONE" + elif ( + j.rst == ResultStatus.WARNING + or j.rst == ResultStatus.ERROR): + rst = "NG" + result += self.outputRaw.__doc__.format(date=dateString, + name=j.name, + standard=i.standard, + rst=rst, + val=j.val, raw=j.raw) + result += "\r\n" + return result + + def outputResult(self): + result = "" + for i in self._items: + result += i.formatOutput() + result += "\r\n" + result += self.outputStatistic() + return result + + def outputJson(self): + resultDic = {} + for itemResult in self._items: + resultDic['name'] = itemResult.name + resultDic['category'] = itemResult.category + resultDic['std'] = "" if itemResult.standard.strip() == "" \ + else itemResult.standard.decode('utf-8', 'ignore') + resultDic['rst'] = itemResult.rst + resultDic['analysis'] = itemResult.analysis + resultDic['suggestion'] = itemResult.suggestion + localList = [] + for localitem in itemResult: + local = {} + local['host'] = localitem.host + local['rstd'] = localitem.val + local['raw'] = localitem.raw + localList.append(local) + resultDic['hosts'] = localList + return json.dumps(resultDic, cls=GsCheckEncoder, indent=2) diff --git a/script/gspylib/inspection/common/Exception.py b/script/gspylib/inspection/common/Exception.py new file mode 100644 index 0000000..f4f5884 --- /dev/null +++ b/script/gspylib/inspection/common/Exception.py @@ -0,0 +1,184 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +import sys + + +class CheckException(Exception): + def __init__(self, content): + self.code = "GAUSS-53000" + self.content = content + + def __str__(self): + return "[%s]: ERROR: " % self.code + self.content + + +class ParameterException(CheckException): + def __init__(self, content): + self.code = "GAUSS-53012" + self.content = "Errors occurred when parsing parameters: %s." % content + + +class UnknownParameterException(CheckException): + def __init__(self, param): + self.code = "GAUSS-53013" + self.content = "Unknown parameters were set: %s." % param + + +class EmptyParameterException(CheckException): + def __init__(self): + self.code = "GAUSS-53014" + self.content = "The parameters cannot be empty." + + +class UseBothParameterException(CheckException): + def __init__(self, params): + self.code = "GAUSS-53015" + self.content = \ + " The parameter '-%s' and '-%s' can not be used together." % ( + params[0], params[1]) + + +class AvailableParameterException(CheckException): + def __init__(self, parent, subs): + self.code = "GAUSS-53016" + self.content = " The parameter '%s' were not available for '%s'." % ( + ",".join(subs), parent) + + +class SceneNotFoundException(CheckException): + def __init__(self, scene, supportScenes): + self.code = "GAUSS-53017" + self.content = \ + "The scene %s and its configuaration file scene_%s.xml " \ + "were not found in config folder." % ( + scene, scene) + "\nThe support scenes is: [%s]" % ",".join( + supportScenes) + + +class ParseItemException(CheckException): + def __init__(self, items): + self.code = "GAUSS-53017" + self.content = \ + "There were errors when parsing these items: %s." % ",".join( + items) + \ + " maybe items name is incorrect." + + +class NotEmptyException(CheckException): + def __init__(self, elem, detail=""): + self.code = "GAUSS-53018" + self.content = "The %s cannot be empty. %s" % (elem, detail) + + +class NotExistException(CheckException): + def __init__(self, elem, List): + self.code = "GAUSS-53019" + self.content = "The %s does not exist in %s." % (elem, List) + + +class InterruptException(CheckException): + def __init__(self): + self.code = "GAUSS-53020" + self.content = \ + "The checking process was interrupted by user with Ctrl+C command" + + +class TrustException(CheckException): + def __init__(self, hosts): + self.code = "GAUSS-53021" + self.content = "Faild to verified SSH trust on hosts: %s" % hosts + + +class ShellCommandException(CheckException): + def __init__(self, cmd, output): + self.code = "GAUSS-53025" + self.cmd = cmd + self.output = output + self.content = \ + "Execute Shell command faild: %s , the exception is: %s" % ( + self.cmd, self.output) + + +class SshCommandException(CheckException): + def __init__(self, host, cmd, output): + self.code = "GAUSS-53026" + self.cmd = cmd + self.host = host + self.output = output + self.content = \ + "Execute SSH command on host %s faild. The exception is: %s" % ( + self.host, self.output) + + +class SQLCommandException(CheckException): + def __init__(self, sql, output): + self.code = "GAUSS-53027" + self.sql = sql + self.output = output + self.content = \ + "Execute SQL command faild: %s , the exception is: %s" % ( + self.sql, self.output) + + +class TimeoutException(CheckException): + def __init__(self, nodes): + self.code = "GAUSS-53028" + self.content = "The node[%s] execute timeout." % ",".join(nodes) + + +class ThreadCheckException(CheckException): + def __init__(self, thread, exception): + self.code = "GAUSS-53020" + if (isinstance(exception, ShellCommandException) + or isinstance(exception, SQLCommandException) + or isinstance(exception, SshCommandException)): + output = exception.output + elif (isinstance(exception, TimeoutException)): + output = exception.content + elif (isinstance(exception, CheckException)): + output = exception.content + else: + output = str(exception) + self.content = \ + "The thread %s running checking item but occurs errors: %s" % ( + thread, output) + + +class ContextDumpException(CheckException): + def __init__(self, errors): + self.code = "GAUSS-53030" + self.content = "Dumping context has errors: %s." % str(errors) + + +class ContextLoadException(CheckException): + def __init__(self, errors): + self.code = "GAUSS-53031" + self.content = "Loading context has errors: %s." % str(errors) + + +class CheckErrorException(CheckException): + def __init__(self): + self.code = "GAUSS-53032" + self.content = "An internal error occurred during the checking process" + + +class CheckNAException(CheckException): + def __init__(self, item): + self.code = "GAUSS-53033" + self.content = \ + "Check item %s are not needed at the current node" % item diff --git a/script/gspylib/inspection/common/Log.py b/script/gspylib/inspection/common/Log.py new file mode 100644 index 0000000..55fac35 --- /dev/null +++ b/script/gspylib/inspection/common/Log.py @@ -0,0 +1,181 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +import sys +import pwd +import time +import subprocess +import logging.handlers +import os +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode + +# max log file size +# 16M +MAXLOGFILESIZE = 16 * 1024 * 1024 +KEY_FILE_MODE = 600 + + +class LoggerFactory(): + def __init__(self): + pass + + @staticmethod + def getLogger(module, logFile, user=""): + """ + function : config log handler + input : module, logFileName, logLevel + output : log + """ + afilename = LoggerFactory.getLogFileName(os.path.realpath(logFile)) + if (not os.path.exists(afilename)): + dirName = os.path.dirname(afilename) + cmd = "if [ ! -d %s ]; then mkdir %s -p -m %s;fi" % ( + dirName, dirName, DefaultValue.KEY_DIRECTORY_MODE) + cmd += ";touch %s && chmod %s %s" % ( + afilename, KEY_FILE_MODE, afilename) + # The user exists and is not the current user + if (user and pwd.getpwnam(user).pw_uid != os.getuid()): + cmd = "su - %s -c \"%s\" " % (user, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50206"] + % ("log file [%s]" % afilename) + + "Error:\n%s" % output + + "The cmd is %s " % cmd) + log = logging.getLogger(module) + LoggerFactory._addFileHandle(log, afilename) + LoggerFactory._addConsoleHandle(log) + return (log, afilename) + + @staticmethod + def getLogFileName(oldLogFile): + """ + function : Increase the time stamp and check the file size + input : logFileName + output : String + """ + # get current time + currentTime = time.strftime("%Y-%m-%d_%H%M%S") + # Check log file correctness + dirName = os.path.dirname(oldLogFile) + originalFileName = os.path.basename(oldLogFile) + resList = originalFileName.split(".") + if (len(resList) > 2): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50235"] % oldLogFile) + (prefix, suffix) = os.path.splitext(originalFileName) + if (suffix != ".log"): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50212"] + % (oldLogFile, ".log")) + # The log file have time stamped in -L mode + if (len(originalFileName) > 21): + timeStamp = originalFileName[-21:-4] + if (LoggerFactory.is_valid_date(timeStamp)): + return oldLogFile + + # Defaults log file + newLogFile = dirName + "/" + prefix + "-" + currentTime + suffix + if (os.path.isdir(dirName)): + # Check old log file list + cmd = "ls %s | grep '^%s-' | grep '%s$'" % ( + dirName, prefix, suffix) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + filenameList = [] + for echoLine in output.split("\n"): + filename = echoLine.strip() + existedResList = filename.split(".") + if (len(existedResList) > 2): + continue + existedSuffix = os.path.splitext(filename)[1] + if (existedSuffix != ".log"): + continue + if (len(originalFileName) + 18 != len(filename)): + continue + timeStamp = filename[-21:-4] + # check log file name + if (LoggerFactory.is_valid_date(timeStamp)): + pass + else: + continue + # Add the valid log file + filenameList.append(filename) + + if (len(filenameList)): + fileName = max(filenameList) + logFile = dirName + "/" + fileName.strip() + # check if need switch to an new log file + size = os.path.getsize(logFile) + if (size <= MAXLOGFILESIZE): + newLogFile = logFile + return newLogFile + + @staticmethod + def is_valid_date(datastr): + ''' + Judge if date valid + ''' + try: + time.strptime(datastr, "%Y-%m-%d_%H%M%S") + return True + except Exception: + return False + + @staticmethod + def getScriptLogger(): + filePath = os.path.split(os.path.realpath(__file__))[0] + afilename = "%s/../output/log/script_%s.log" % ( + filePath, DefaultValue.GetHostIpOrName()) + + log = logging.getLogger() + LoggerFactory._addFileHandle(log, afilename) + return log + + @staticmethod + def _addFileHandle(log, fileName): + # create log file + if not os.path.exists(os.path.dirname(fileName)): + dir_permission = 0o700 + os.makedirs(os.path.dirname(fileName), mode=dir_permission) + else: + if oct(os.stat(fileName).st_mode)[-3:] != '600': + os.chmod(fileName, DefaultValue.KEY_FILE_PERMISSION) + + fmt = logging.Formatter( + '[%(asctime)s][%(filename)s][line:%(lineno)d][%(levelname)s] ' + '%(message)s', + '%Y-%m-%d %H:%M:%S') + # output the log to a file + # 16M takes precedence over 20M, Here cut the file does not trigger + rthandler = logging.handlers.RotatingFileHandler( + fileName, + maxBytes=20 * 1024 * 1024, + backupCount=2) + rthandler.setFormatter(fmt) + rthandler.setLevel(logging.DEBUG) + log.handlers = [] + log.addHandler(rthandler) + + @staticmethod + def _addConsoleHandle(log): + fmt = logging.Formatter('%(message)s') + # output the log to screen the same time + console = logging.StreamHandler() + console.setFormatter(fmt) + console.setLevel(logging.INFO) + log.addHandler(console) + log.setLevel(logging.DEBUG) diff --git a/script/gspylib/inspection/common/ProgressBar.py b/script/gspylib/inspection/common/ProgressBar.py new file mode 100644 index 0000000..79908cd --- /dev/null +++ b/script/gspylib/inspection/common/ProgressBar.py @@ -0,0 +1,112 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +import re +import sys +import threading + +CLEAR_TO_END = "\033[K" +UP_ONE_LINE = "\033[F" + + +class ProgressBar(object): + def __init__(self, width=25, title=''): + self.width = width + self.title = ProgressBar.filter_str(title) + self._lock = threading.Lock() + + @property + def lock(self): + return self._lock + + def update(self, progress=0): + pass + + @staticmethod + def filter_str(pending_str): + """remove strings like \r \t \n""" + return re.sub(pattern=r'\r|\t|\n', repl='', string=pending_str) + + +class LineProgress(ProgressBar): + def __init__(self, total=100, symbol='#', width=25, title=''): + """ + @param total : count of progress bar + @param symbol : symbol to show + @param width : width of progress bar + @param title : text before progress bar + """ + super(LineProgress, self).__init__(width=width, title=title) + self.total = total + self.symbol = symbol + self._current_progress = 0 + + def update(self, progress=0): + """ + @param progress : current value + """ + with self.lock: + if progress > 0: + self._current_progress = float(progress) + sys.stdout.write('\r' + CLEAR_TO_END) + hashes = '=' * int( + self._current_progress // self.total * self.width) + spaces = ' ' * (self.width - len(hashes)) + sys.stdout.write("\r%-25s [%s] %d/%d" % ( + self.title, hashes + spaces, self._current_progress, + self.total)) + + +class MultiProgressManager(object): + def __new__(cls, *args, **kwargs): + """singleton""" + if not hasattr(cls, '_instance'): + cls._instance = super(MultiProgressManager, cls).__new__(cls) + return cls._instance + + def __init__(self): + self._progress_dict = {} + self._lock = threading.Lock() + + def put(self, key, progress_bar): + with self._lock: + if key and progress_bar: + self._progress_dict[key] = progress_bar + progress_bar.index = len(self._progress_dict) - 1 + + def clear(self): + with self._lock: + self._progress_dict.clear() + + def update(self, key, progress): + """ + @param key : progress bar key + @param progress : value + """ + with self._lock: + if not key: + return + delta_line = len(self._progress_dict) + sys.stdout.write( + UP_ONE_LINE * delta_line if delta_line > 0 else '') + for tmp_key in self._progress_dict.keys(): + progress_bar = self._progress_dict.get(tmp_key) + tmp_progress = 0 + if key == tmp_key: + tmp_progress = progress + progress_bar.update(tmp_progress) + sys.stdout.write('\n') diff --git a/script/gspylib/inspection/common/SharedFuncs.py b/script/gspylib/inspection/common/SharedFuncs.py new file mode 100644 index 0000000..b5370eb --- /dev/null +++ b/script/gspylib/inspection/common/SharedFuncs.py @@ -0,0 +1,982 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +############################################################################# +import sys +import subprocess +import os +import pwd +import time +import re +import multiprocessing +from datetime import datetime, timedelta +from gspylib.common.Common import DefaultValue +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.ErrorCode import ErrorCode +from multiprocessing.pool import ThreadPool +from gspylib.os.gsfile import g_file +from gspylib.os.gsfile import g_Platform +from gspylib.os.gsnetwork import g_network +from gspylib.inspection.common.Exception import TrustException, \ + ShellCommandException, SshCommandException, SQLCommandException + +localPath = os.path.dirname(__file__) +sys.path.insert(0, localPath + "/../lib") + +FILE_MODE = 640 +FILE_WRITE_MODE = 220 +DIRECTORY_MODE = 750 +KEY_FILE_MODE = 600 +KEY_DIRECTORY_MODE = 700 +MAX_FILE_NODE = 755 +MAX_DIRECTORY_NODE = 755 +INIT_FILE_SUSE = "/etc/init.d/boot.local" +INIT_FILE_REDHAT = "/etc/rc.d/rc.local" + + +def runShellCmd(cmd, user=None, mpprcFile=""): + """ + function: run shell cmd + input : md, user, mpprcFile + output : str + """ + if (mpprcFile): + cmd = "source '%s'; %s" % (mpprcFile, cmd) + # Set the output LANG to English + cmd = "export LC_ALL=C; %s" % cmd + # change user but can not be root user + if (user and user != getCurrentUser()): + cmd = "su - %s -c \"source /etc/profile 2>/dev/null; %s\"" % ( + user, cmd) + cmd = cmd.replace("$", "\$") + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 and DefaultValue.checkDockerEnv()): + return output + if (status != 0): + raise ShellCommandException(cmd, output) + return output + + +def runSshCmd(cmd, host, user="", mpprcFile="", timeout=""): + """ + function: run ssh cmd + input : cmd, host, user, mpprcFile, timeout + output : str + """ + if (timeout): + timeout = "-o ConnectTimeout=%s" % timeout + if (mpprcFile): + cmd = "source '%s'; %s" % (mpprcFile, cmd) + # Set the output LANG to English + cmd = "export LC_ALL=C; %s" % cmd + # RedHat does not automatically source /etc/profile + # but SuSE executes when using ssh to remotely execute commands + # Some environment variables are written in /etc/profile + # when there is no separation of environment variables + if (host == DefaultValue.GetHostIpOrName()): + sshCmd = cmd + else: + sshCmd = "pssh -s -H %s %s 'source /etc/profile 2>/dev/null;%s'" % ( + host, timeout, cmd) + if (user and user != getCurrentUser()): + sshCmd = "su - %s -c \"%s\"" % (user, sshCmd) + (status, output) = subprocess.getstatusoutput(sshCmd) + if (status != 0): + raise SshCommandException(host, sshCmd, output) + return output + + +def runSshCmdWithPwd(cmd, host, user="", passwd="", mpprcFile=""): + """ + function: run ssh cmd with password + input : cmd, host, user, passwd, mpprcFile + output : str + """ + # Environment variables separation + if (mpprcFile): + cmd = "source '%s'; %s" % (mpprcFile, cmd) + ssh = None + try: + if (passwd): + import paramiko + cmd = "export LC_ALL=C; source /etc/profile 2>/dev/null; %s" % cmd + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + # Remote Connection + ssh.connect(host, 22, user, passwd) + stdout, stderr = ssh.exec_command(cmd)[1:3] + output = stdout.read() + error = stderr.read() + if error: + raise SshCommandException(host, cmd, error) + return output.decode() + else: + cmd = \ + "pssh -s -H %s \"export LC_ALL=C; " \ + "source /etc/profile 2>/dev/null; %s\"" % ( + host, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise SshCommandException(host, cmd, output) + return output + except Exception as e: + raise Exception(str(e)) + finally: + if (ssh): + ssh.close() + + +def runRootCmd(cmd, rootuser, passwd, mpprcFile=''): + """ + function: run root cmd + input : cmd, rootuser, passwd, mpprcFile + output : str + """ + if (mpprcFile): + cmd = "source '%s'; %s" % (mpprcFile, cmd) + ssh = None + try: + import paramiko + cmd = "export LC_ALL=C; source /etc/profile 2>/dev/null; %s" % cmd + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect('localhost', 22, rootuser, passwd) + stdout, stderr = ssh.exec_command(cmd, get_pty=True)[1:3] + output = stdout.read() + error = stderr.read() + if error: + raise SshCommandException(cmd, "localhost", error) + return output + except Exception as e: + raise Exception(str(e)) + finally: + if ssh: + ssh.close() + + +def verifyPasswd(host, user, pswd=None): + """ + function: verify password + Connect to the remote node + input : host, user, pswd + output : bool + """ + import paramiko + ssh = paramiko.Transport((host, 22)) + try: + ssh.connect(username=user, password=pswd) + return True + except paramiko.AuthenticationException: + return False + finally: + ssh.close() + + +def cleanOutput(output): + """ + function: run ssh cmd + clean warning or password message + input : output + output : str + """ + lines = output.splitlines() + if (len(lines) == 0): + return '' + idx = 1 + for line in lines: + if (line.lower().find('password:') != -1): + break + idx += 1 + return output if idx == len(lines) + 1 else "\n".join(lines[idx:]) + + +def runSqlCmdWithTimeOut(sql, user, host, port, tmpPath, database="postgres", + mpprcFile="", needmpara=False, timeout=60): + """ + function: run sql cmd with timeout + input : sql, user, host, port, tmpPath, database + mpprcFile, needmpara, timeou + output : str + """ + infoList = [ + [sql, user, host, port, tmpPath, database, mpprcFile, needmpara]] + endTime = datetime.now() + timedelta(seconds=timeout) + pool = ThreadPool(1) + result = pool.map_async(executeSql, infoList) + while datetime.now() < endTime: + if (result._ready): + pool.close() + if (result._value[0] == "NO RESULT"): + return "" + elif (result._value[0].startswith("ERROR")): + raise SQLCommandException(sql, result._value[0]) + else: + return result._value[0] + else: + time.sleep(1) + pool.close() + raise SQLCommandException( + sql, + "Running timeout, exceed the limit %s seconds" % timeout) + + +def executeSql(paraList): + """ + function: execute sql + input : NA + output : NA + """ + sql = paraList[0] + user = paraList[1] + host = paraList[2] + port = paraList[3] + tmpPath = paraList[4] + database = paraList[5] + mpprcFile = paraList[6] + needmpara = paraList[7] + try: + output = runSqlCmd(sql, user, host, port, tmpPath, database, mpprcFile, + needmpara) + if (not output): + output = "NO RESULT" + except Exception as e: + output = "ERROR:%s" % (str(e)) + return output + + +def runSqlCmd(sql, user, host, port, tmpPath, database="postgres", + mpprcFile="", maintenance=False): + """ + function : Execute sql command + input : String,String,String,int + output : String + """ + database = database.replace('$', '\$') + # Get the current time + currentTime = time.strftime("%Y-%m-%d_%H%M%S") + # Get the process ID + pid = os.getpid() + # init SQL query file + sqlFile = os.path.join(tmpPath, + "check_query.sql_%s_%s_%s" % ( + str(port), str(currentTime), str(pid))) + # init SQL result file + queryResultFile = os.path.join(tmpPath, + "check_result.sql_%s_%s_%s" % ( + str(port), str(currentTime), str(pid))) + # Clean up the file + cleanFile("%s,%s" % (queryResultFile, sqlFile)) + + # create an empty sql query file + try: + cmd = "touch %s && chmod %s %s" % \ + (sqlFile, DefaultValue.MAX_DIRECTORY_MODE, sqlFile) + runShellCmd(cmd, user, mpprcFile) + except ShellCommandException as e: + raise SQLCommandException(sql, + "create sql query file failed." + e.output) + + # write the SQL command into sql query file + try: + with open(sqlFile, 'w') as fp: + fp.writelines(sql) + except Exception as e: + # Clean up the file + cleanFile(sqlFile) + raise SQLCommandException(sql, + "write into sql query file failed. " + str( + e)) + + # read the content of query result file. + try: + # init host + hostPara = ( + "-h %s" % host) \ + if host != "" and host != "localhost" \ + and host != DefaultValue.GetHostIpOrName() else "" + # build shell command + cmd = "gsql %s -p %s -d %s -f %s --output %s -t -A -X" % ( + hostPara, port, database, sqlFile, queryResultFile) + if (maintenance): + cmd += ' -m' + # Environment variables separation + if mpprcFile != "": + cmd = "source '%s' && " % mpprcFile + cmd + # Execute the shell command + output = runShellCmd(cmd, user) + if findErrorInSqlFile(sqlFile, output): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % output) + + # Reading documents + fp = None + with open(queryResultFile, 'r') as fp: + rowList = fp.readlines() + except Exception as e: + cleanFile("%s,%s" % (queryResultFile, sqlFile)) + if isinstance(e, ShellCommandException): + output = e.output + else: + output = str(e) + raise SQLCommandException(sql, output) + + # remove local sqlFile + cleanFile("%s,%s" % (queryResultFile, sqlFile)) + + return "".join(rowList)[:-1] + + +def runSqlSimplely(sql, user, host, port, tmpPath, database="postgres", + mpprcFile="", needmpara=False): + """ + function : Execute sql command + input : String,String,String,int + output : String + """ + # Get the current time + currentTime = time.strftime("%Y-%m-%d_%H%M%S") + # Get the process ID + pid = os.getpid() + # init SQL query file + sqlFile = os.path.join(tmpPath, + "check_query.sql_%s_%s_%s" % ( + str(port), str(currentTime), str(pid))) + + # Clean up the file + if (os.path.exists(sqlFile)): + cleanFile("%s" % (sqlFile)) + + # create an empty sql query file + try: + cmd = "touch %s && chmod %s %s" % \ + (sqlFile, DefaultValue.MAX_DIRECTORY_MODE, sqlFile) + runShellCmd(cmd, user, mpprcFile) + except ShellCommandException as e: + raise SQLCommandException(sql, "create sql query file failed.") + + # write the SQL command into sql query file + try: + with open(sqlFile, 'w') as fp: + fp.writelines(sql) + except Exception as e: + # Clean up the file + cleanFile(sqlFile) + raise SQLCommandException(sql, + "write into sql query file failed. " + str( + e)) + + # read the content of query result file. + try: + # init host + hostPara = ( + "-h %s" % host) \ + if host != "" and host != "localhost" else "" + # build shell command + if (needmpara): + cmd = "gsql %s -p %s -d %s -f %s -m" % ( + hostPara, port, database, sqlFile) + else: + cmd = "gsql %s -p %s -d %s -f %s" % ( + hostPara, port, database, sqlFile) + # Environment variables separation + if mpprcFile != "": + cmd = "source '%s' && " % mpprcFile + cmd + # Execute the shell command + output = runShellCmd(cmd, user) + if findErrorInSqlFile(sqlFile, output): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % output) + + # Reading documents + except Exception as e: + cleanFile("%s" % (sqlFile)) + if isinstance(e, ShellCommandException): + output = e.output + else: + output = str(e) + raise SQLCommandException(sql, output) + + # remove local sqlFile + cleanFile("%s" % (sqlFile)) + + return output + + +def findErrorInSqlFile(sqlFile, output): + """ + function : Find error in the sql file + input : String,String + output : String + """ + GSQL_BIN_FILE = "gsql" + # init flag + ERROR_MSG_FLAG = "(ERROR|FATAL|PANIC)" + GSQL_ERROR_PATTERN = "^%s:%s:(\d*): %s:.*" % ( + GSQL_BIN_FILE, sqlFile, ERROR_MSG_FLAG) + pattern = re.compile(GSQL_ERROR_PATTERN) + for line in output.split("\n"): + line = line.strip() + result = pattern.match(line) + if (result is not None): + return True + return False + + +def cleanFile(fileName, hostname=""): + """ + function : remove file + input : String,hostname + output : NA + """ + fileList = fileName.split(",") + cmd = "" + for fileStr in fileList: + if cmd != "": + cmd += ';(if [ -f %s ];then rm -f %s;fi)' % (fileStr, fileStr) + else: + cmd = '(if [ -f %s ];then rm -f %s;fi)' % (fileStr, fileStr) + if hostname == "": + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS-50207"] % "file" + + " Error: \n%s." % output + + "The cmd is %s " % cmd) + else: + sshCmd = "pssh -s -H %s '%s'" % (hostname, cmd) + (status, output) = subprocess.getstatusoutput(sshCmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS-50207"] % "file" + + " Error: \n%s." % output + + "The cmd is %s " % sshCmd) + + +def checkComplete(checkId, host, hostname, user, tmpPath, passwd=None): + """ + function: check whether has completed or not + input : NA + output : NA + """ + cmd = "cd %s && ls -l |grep %s_%s.out|wc -l" % (tmpPath, hostname, checkId) + if (is_local_node(host)): + output = runShellCmd(cmd, user) + elif (passwd): + output = runSshCmdWithPwd(cmd, host, user, passwd) + else: + output = runSshCmd(cmd, host, user) + if (len(output.splitlines()) > 1): + output = output.splitlines()[-1] + return output + + +def getVersion(): + """ + Get current file version by VersionInfo + + """ + return ("%s %s" % (sys.argv[0].split("/")[-1], VersionInfo.COMMON_VERSION)) + + +def createFolder(folderName, path, permission=DIRECTORY_MODE, user=""): + # Folder path + folderName = os.path.join(path, folderName) + # Create a folder + g_file.createDirectory(folderName, True, permission) + # change owner + if (user): + g_file.changeOwner(user, folderName) + return folderName + + +def createFile(fileName, path, permission=FILE_MODE, user=""): + # file path + fileName = os.path.join(path, fileName) + # Create a file + g_file.createFile(fileName, True, permission) + # change owner + if (user): + g_file.changeOwner(user, fileName) + return fileName + + +def chmodFile(fileName, permission=FILE_MODE, user=""): + # Modify the file permissions + g_file.changeMode(permission, fileName) + if (user): + g_file.changeOwner(user, fileName) + + +def writeFile(fileName, content, path, permission=FILE_MODE, user=""): + """ + function: write file + input : NA + output : NA + """ + filePath = os.path.join(path, fileName) + # Create a file + g_file.createFile(filePath, True, permission) + # Modify the file permissions + if (user): + g_file.changeOwner(user, filePath) + g_file.writeFile(filePath, [content]) + + +def readFile(fileName): + # Get the contents of the file + text = g_file.readFile(fileName) + return "\n".join(text) + + +def sendFile(fileName, host, user, path, passwd=None): + # Copy files remotely + t = None + if (passwd): + try: + import paramiko + t = paramiko.Transport((host, 22)) + t.connect(username=user, password=passwd) + sftp = paramiko.SFTPClient.from_transport(t) + sftp.put(fileName, os.path.join(path, os.path.basename(fileName))) + except Exception as e: + raise Exception(str(e)) + finally: + if (t): + t.close() + else: + if "HOST_IP" not in list(os.environ.keys()): + host = "%s@%s" % (user, host) + cmd = "pscp -H %s '%s' %s" % (host, fileName, path) + if (os.getuid() == 0): + cmd = "su - %s -c \"%s\"" % (user, cmd) + runShellCmd(cmd) + + +def receiveFile(fileName, host, user, path, passwd=None): + # Receive remote files + t = None + if (passwd): + try: + import paramiko + t = paramiko.Transport((host, 22)) + t.connect(username=user, password=passwd) + sftp = paramiko.SFTPClient.from_transport(t) + if (type(fileName) == list): + for fname in fileName: + sftp.get(fname, + os.path.join(path, os.path.basename(fname))) + else: + sftp.get(fileName, os.path.join(path, fileName)) + except Exception as e: + raise Exception(str(e)) + finally: + if (t): + t.close() + else: + if "HOST_IP" not in list(os.environ.keys()): + host = "%s@%s" % (user, host) + cmd = "pssh -s -H %s 'pscp -H %s %s %s' " % ( + host, DefaultValue.GetHostIpOrName(), fileName, path) + if (os.getuid() == 0): + cmd = "su - %s -c \"%s\"" % (user, cmd) + runShellCmd(cmd) + + +def getCurrentUser(): + return pwd.getpwuid(os.getuid())[0] + + +def verifyTrust(hosts, user): + """ + function: Ensure the proper password-less access to the remote host. + input : hostname + output: True/False + """ + try: + pool = ThreadPool(multiprocessing.cpu_count()) + params = zip(hosts, [user, ]) + results = pool.map(lambda x: checkAuthentication(x[0], x[1]), params) + pool.close() + pool.join() + hostnames = "" + for (key, value) in results: + if (not key): + hostnames = hostnames + ',' + value + if (hostnames != ""): + raise TrustException(hostnames) + except Exception: + raise TrustException(",".join(hosts)) + return True + + +def checkAuthentication(host, user): + """ + function: check authentication + input : NA + output : NA + """ + cmd = 'pssh -s -H %s true' % host + try: + runSshCmd(cmd, host, user) + except Exception: + return (False, host) + return (True, host) + + +def checkClusterUser(username, mpprcFile=''): + """ + function: check cluster user + input : NA + output : NA + """ + try: + pwd.getpwnam(username).pw_gid + except Exception: + return False + mpprc = mpprcFile if mpprcFile else '~/.bashrc' + cmd = "echo \"%s$GAUSS_ENV\" 2>/dev/null" % ( + "\\" if (username and username != getCurrentUser()) else "") + try: + output = runShellCmd(cmd, username, mpprc) + gaussEnv = output.split("\n")[0] + if not gaussEnv: + return False + except Exception: + return False + return True + + +def getMasterDnNum(user, mpprcFile): + """ + function : get cluster master DB number + input : string, string + output : List + """ + masterDnList = [] + cmd = "gs_om -t query |grep Primary" + output = runShellCmd(cmd, user, mpprcFile) + line = output.splitlines()[0] + instanceinfo = line.split() + for idx in range(len(instanceinfo)): + if (instanceinfo[idx] == "Primary"): + if (idx > 2 and instanceinfo[idx - 2].isdigit()): + masterDnList.append(int(instanceinfo[idx - 2])) + return masterDnList + + +def checkBondMode(bondingConfFile): + """ + function : Check Bond mode + input : String, bool + output : List + """ + + netNameList = [] + cmd = "grep -w 'Bonding Mode' %s | awk -F ':' '{print $NF}'" \ + % bondingConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output.strip() == ""): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51403"] % "Bonding Mode" + + "The cmd is %s " % cmd) + cmd = "grep -w 'Slave Interface' %s | awk -F ':' '{print $NF}'" \ + % bondingConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51403"] % + "Slave Interface" + "The cmd is %s " % cmd) + + for networkname in output.split('\n'): + netNameList.append(networkname.strip()) + return netNameList + + +def is_local_node(host): + """ + function: check whether is or not local node + input : NA + output : NA + """ + if (host == DefaultValue.GetHostIpOrName()): + return True + allNetworkInfo = g_network.getAllNetworkIp() + for network in allNetworkInfo: + if (host == network.ipAddress): + return True + return False + + +def validate_ipv4(ip_str): + """ + function: check whether is or not validate ipv4 + input : NA + output : NA + """ + sep = ip_str.split('.') + if len(sep) != 4: + return False + for i, x in enumerate(sep): + try: + int_x = int(x) + if int_x < 0 or int_x > 255: + return False + except ValueError: + return False + return True + + +def SetLimitsConf(typeList, item, value, limitFile): + """ + function: set limits conf + input : NA + output : NA + """ + for typeName in typeList: + cmd = """sed -i '/^.* %s *%s .*$/d' %s && + echo "* %s %s %s" >> %s""" % ( + typeName, item, limitFile, typeName, item, value, limitFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + return "Failed to set variable '%s %s'. Error: \n%s." % ( + typeName, item, output) + "The cmd is %s " % cmd + return "Success" + + +def isSupportSystemOs(): + """ + function: check whether is or not redhat + input : NA + output : NA + """ + osName = g_Platform.dist()[0] + if osName in ["redhat", "centos", "euleros", "openEuler"]: + return True + else: + return False + + +def getInitFile(): + """ + function: get init file + input : NA + output : NA + """ + if isSupportSystemOs(): + return INIT_FILE_REDHAT + else: + return INIT_FILE_SUSE + + +def getNICNum(ipAddress): + """ + function: get nic num + input : NA + output : NA + """ + if g_Platform.isPlatFormEulerOSOrRHEL7X(): + cmd = "/sbin/ifconfig -a | grep -B1 \"inet %s \" | " \ + "grep -v \"inet %s \" | awk '{print $1}'" % ( + ipAddress, ipAddress) + else: + cmd = "/sbin/ifconfig -a | grep -B1 \"addr:%s \" | " \ + "grep -v \"addr:%s \" | awk '{print $1}'" % ( + ipAddress, ipAddress) + output = runShellCmd(cmd) + if g_Platform.isPlatFormEulerOSOrRHEL7X(): + return output.strip()[:-1] + else: + return output.strip() + + +def getIpByHostName(host): + """ + function: get ip by hostname + input : NA + output : NA + """ + ipList = g_file.readFile("/etc/hosts", host) + + pattern = re.compile( + r'^[1-9 \t].*%s[ \t]*#Gauss.* IP Hosts Mapping' % host) + for ipInfo in ipList: + match = pattern.match(ipInfo.strip()) + if (match): + return match.group().split(' ')[0].strip() + #If no ip address is found, the first ip address + # that is not commented out is returned + for ip_info in ipList: + ip_info = ip_info.replace("\t", " ").strip() + if not ip_info.startswith("#"): + return ip_info.split(' ')[0] + + # get local host by os function + # Replace host with the IP address. + hostIp = host + return hostIp + + +def isBond(netWorkNum): + """ + function: check whether is or not bond + input : NA + output : NA + """ + bondingConfFile = "/proc/net/bonding/%s" % netWorkNum + if g_Platform.isPlatFormEulerOSOrRHEL7X(): + cmd = "/sbin/ifconfig %s " \ + "| grep -E '\' | awk -F ' ' '{print $2}'" % netWorkNum + else: + cmd = "/sbin/ifconfig %s " \ + "| grep -E '\' | awk -F ' ' '{print $NF}'" % netWorkNum + MacAddr = runShellCmd(cmd) + cmd = "/sbin/ifconfig -a | grep '\<%s\>' | wc -l" % MacAddr + output = runShellCmd(cmd) + MacAddrNum = int(output) + if (MacAddrNum > 2 and os.path.exists(bondingConfFile)): + return True + else: + return False + + +def getNetWorkConfFile(networkCardNum): + """ + function: get network conf file + input : NA + output : NA + """ + SuSENetWorkConfPath = "/etc/sysconfig/network" + RedHatNetWorkConfPath = "/etc/sysconfig/network-scripts" + if isSupportSystemOs(): + NetWorkConfFile = "%s/ifcfg-%s" % ( + RedHatNetWorkConfPath, networkCardNum) + else: + NetWorkConfFile = "%s/ifcfg-%s" % (SuSENetWorkConfPath, networkCardNum) + + if (not os.path.exists(NetWorkConfFile)): + if isSupportSystemOs(): + cmd = "find %s -iname 'ifcfg-*-%s' -print" % ( + RedHatNetWorkConfPath, networkCardNum) + else: + cmd = "find %s -iname 'ifcfg-*-%s' -print" % ( + SuSENetWorkConfPath, networkCardNum) + output = runShellCmd(cmd) + if (DefaultValue.checkDockerEnv() and + output.find("No such file or directory") >= 0): + return output.strip() + if (output.strip() == "" or len(output.split('\n')) != 1): + if DefaultValue.checkDockerEnv(): + return "" + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % NetWorkConfFile) + NetWorkConfFile = output.strip() + return NetWorkConfFile + + +def CheckNetWorkBonding(serviceIP): + """ + function: check network bonding + input : NA + output : NA + """ + networkCardNum = getNICNum(serviceIP) + NetWorkConfFile = getNetWorkConfFile(networkCardNum) + if ((NetWorkConfFile.find("No such file or directory") >= 0 + or NetWorkConfFile == "") and DefaultValue.checkDockerEnv()): + return "Shell command faild" + bondingConfFile = "/proc/net/bonding/%s" % networkCardNum + networkCardNumList = [networkCardNum] + cmd = "grep -i 'BONDING_OPTS\|BONDING_MODULE_OPTS' %s" % NetWorkConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if ((status == 0) and (output.strip() != "")): + if ((output.find("mode") > 0) and os.path.exists(bondingConfFile)): + networkCardNumList = networkCardNumList + checkBondMode( + bondingConfFile) + else: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50611"] + + "The cmd is %s " % cmd) + return networkCardNumList + + + +def getTHPandOSInitFile(): + """ + function : We know that the centos have same init file + and THP file as RedHat. + input : NA + output : String, String + """ + THPFile = "/sys/kernel/mm/transparent_hugepage/enabled" + initFile = getOSInitFile() + if (initFile == ""): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50618"] + % "startup file of current OS") + return (THPFile, initFile) + + +def getOSInitFile(): + """ + function : Get the OS initialization file + input : NA + output : String + """ + distname = g_Platform.dist()[0] + systemd_system_dir = "/usr/lib/systemd/system/" + systemd_system_file = "/usr/lib/systemd/system/gs-OS-set.service" + # OS init file + # now we only support SuSE and RHEL + initFileSuse = "/etc/init.d/boot.local" + initFileRedhat = "/etc/rc.d/rc.local" + # system init file + initSystemFile = "/usr/local/gauss/script/gauss-OS-set.sh" + dirName = os.path.dirname(os.path.realpath(__file__)) + # Get the startup file of suse or redhat os + if (os.path.isdir(systemd_system_dir)): + if (not os.path.exists(systemd_system_file)): + cmd = "cp '%s'/gs-OS-set.service '%s'; chmod %s '%s'" % ( + dirName, systemd_system_file, DefaultValue.KEY_FILE_MODE, + systemd_system_file) + runShellCmd(cmd) + cmd = "systemctl enable gs-OS-set.service" + runShellCmd(cmd) + if (not os.path.exists(initSystemFile)): + cmd = "mkdir -p '%s'" % os.path.dirname(initSystemFile) + runShellCmd(cmd) + g_file.createFileInSafeMode(initSystemFile) + with open(initSystemFile, "w") as fp: + fp.write("#!/bin/bash\n") + cmd = "chmod %s '%s'" % (DefaultValue.KEY_FILE_MODE, initSystemFile) + runShellCmd(cmd) + return initSystemFile + if (distname == "SuSE" and os.path.isfile(initFileSuse)): + initFile = initFileSuse + elif (distname in ( + "redhat", "centos", "euleros", "openEuler") and os.path.isfile( + initFileRedhat)): + initFile = initFileRedhat + else: + initFile = "" + return initFile + + +def getMaskByIP(IPAddr): + """ + function: get netMask by ip addr + """ + if g_Platform.isPlatFormEulerOSOrRHEL7X(): + cmd = "/sbin/ifconfig -a |grep -E '\<%s\>'| awk '{print $4}'" % IPAddr + else: + cmd = \ + "/sbin/ifconfig -a |grep -E '\<%s\>'| awk -F ':' '{print $NF}'" \ + % IPAddr + netMask = runShellCmd(cmd) + return netMask diff --git a/script/gspylib/inspection/common/TaskPool.py b/script/gspylib/inspection/common/TaskPool.py new file mode 100644 index 0000000..62bf8a0 --- /dev/null +++ b/script/gspylib/inspection/common/TaskPool.py @@ -0,0 +1,204 @@ +# -*- coding:utf-8 -*- +############################################################################# +import sys +import os +import signal +import threading +from queue import Queue +from gspylib.inspection.common.Exception import InterruptException + + +class TaskThread(threading.Thread): + def __init__(self, queWork, queResult, iTimeout): + """ + function: constructor + """ + threading.Thread.__init__(self) + # timeout for fetching task + self.m_iTimeout = iTimeout + self.m_bRunning = True + self.setDaemon(True) + self.m_queWork = queWork + self.m_queResult = queResult + self.start() + + def run(self): + """ + function: run method + input : NA + output : NA + """ + while self.m_bRunning: + if Queue is None: + break + try: + # fetch a task from the queue, + # here timout parameter MUST be asigned, + # otherwise get() will wait for ever + callableFun, args = self.m_queWork.get(timeout=self.m_iTimeout) + # run the task + Ret = callableFun(args[0]) + self.m_queResult.put(Ret) + # if task queue is empty + except Exception: + self.m_bRunning = False + continue + + +class TaskPool: + def __init__(self, iNumOfThreads, iTimeOut=1): + """ + function: constructor + """ + self.m_queWork = Queue.Queue() + self.m_queResult = Queue.Queue() + self.m_lstThreads = [] + self.m_iTimeOut = iTimeOut + self.__createThreadPool(iNumOfThreads) + + def __createThreadPool(self, iNumOfThreads): + """ + function: create thread pool + input : iNumOfThreads + output : NA + """ + for i in range(iNumOfThreads): + aThread = TaskThread(self.m_queWork, self.m_queResult, + self.m_iTimeOut) + self.m_lstThreads.append(aThread) + + # add a task into the thread pool + def addTask(self, callableFunc, *args): + """ + function: add task + input : callableFunc, *args + output : NA + """ + self.m_queWork.put((callableFunc, list(args))) + + # get one task executing result + def getOneResult(self): + """ + function: get one result + input : NA + output : NA + """ + try: + # get a reult from queue, + # get will not return until a result is got + aItem = self.m_queResult.get() + return aItem + except Exception: + return None + + # notify all theads in the thread pool to exit + def notifyStop(self): + """ + function: notify stop + input : NA + output : NA + """ + for aThread in self.m_lstThreads: + aThread.m_bRunning = False + + # Waiting for all threads in the thread pool exit + def waitForComplete(self): + # wait all threads terminate + while len(self.m_lstThreads): + aThread = self.m_lstThreads.pop() + # wait the thread terminates + if aThread.isAlive(): + aThread.join() + + +class Watcher: + """ + this class solves two problems with multithreaded + programs in Python, (1) a signal might be delivered + to any thread (which is just a malfeature) and (2) if + the thread that gets the signal is waiting, the signal + is ignored (which is a bug). + + The watcher is a concurrent process (not thread) that + waits for a signal and the process that contains the + threads. + """ + + def __init__(self): + """ + Creates a child thread, which returns. + The parent thread waits for a KeyboardInterrupt + and then kills the child thread. + """ + self.child = os.fork() + if self.child == 0: + return + else: + self.watch() + + def watch(self): + """ + function: watch + input : NA + output : NA + """ + try: + os.wait() + except KeyboardInterrupt: + # I put the capital B in KeyBoardInterrupt so I can + # tell when the Watcher gets the SIGINT + self.kill() + raise InterruptException() + sys.exit() + + def kill(self): + """ + function: kill + input : NA + output : NA + """ + os.kill(self.child, signal.SIGKILL) + + +class CheckThread(threading.Thread): + def __init__(self, name, func, *args): + """ + function: constructor + """ + super(CheckThread, self).__init__(name=name, target=func, args=args) + self._stop_event = threading.Event() + self.setDaemon(True) + self.exitcode = 0 + self.exception = None + self.name = name + self.func = func + self.args = args + self.start() + + def run(self): + """ + function: run + input : NA + output : NA + """ + try: + self.func(*self.args) + except Exception as e: + self.exitcode = 1 + self.exception = e + + def stop(self): + """ + function: stop + input : NA + output : NA + """ + self._stop_event.set() + + def stopped(self): + """ + function: stopped + input : NA + output : NA + """ + return self._stop_event.is_set() diff --git a/script/gspylib/inspection/common/__init__.py b/script/gspylib/inspection/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/inspection/config/check_list_V1R6C10.conf b/script/gspylib/inspection/config/check_list_V1R6C10.conf new file mode 100644 index 0000000..75a2203 --- /dev/null +++ b/script/gspylib/inspection/config/check_list_V1R6C10.conf @@ -0,0 +1,62 @@ +#The file(check_list.conf) is the gs_check and gs_checkos configuration file. +#The file is placed in $GPHOME/script/util + +# the system control parameter +[/etc/sysctl.conf] +net.ipv4.tcp_max_tw_buckets = 10000 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_tw_recycle = 1 +net.ipv4.tcp_keepalive_time = 30 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_keepalive_probes = 9 +net.ipv4.tcp_retries2 = 80 +net.sctp.addip_enable = 0 +net.core.wmem_max = 21299200 +net.core.rmem_max = 21299200 +net.core.wmem_default = 21299200 +net.core.rmem_default = 21299200 +net.sctp.sctp_mem = 94500000 915000000 927000000 +net.sctp.sctp_rmem = 8192 250000 16777216 +net.sctp.sctp_wmem = 8192 250000 16777216 +kernel.sem = 250 6400000 1000 25600 +net.ipv4.tcp_rmem = 8192 250000 16777216 +net.ipv4.tcp_wmem = 8192 250000 16777216 +# vm.min_free_kbytes would set to 5% of total system memory real time, total system memory get with cmd: free -k|grep Mem|awk '{print $2}'. +vm.min_free_kbytes = total_system_memory*5% +net.core.netdev_max_backlog = 65535 +net.ipv4.tcp_max_syn_backlog = 65535 +net.core.somaxconn = 65535 +net.ipv4.tcp_syncookies = 1 +vm.overcommit_memory = 0 +vm.panic_on_oom = 0; +vm.oom_kill_allocating_task = 0; +net.sctp.sndbuf_policy = 0 +net.sctp.rcvbuf_policy = 0 + +# if parameter value is not equal to ths OS's value, print the waring, and not error +[SUGGEST:/etc/sysctl.conf] +net.ipv4.tcp_fin_timeout = 60 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_timestamps = 1 +net.ipv4.tcp_retries1 = 5 +net.ipv4.tcp_syn_retries = 5 +net.ipv4.tcp_synack_retries = 5 +net.sctp.path_max_retrans = 10 +net.sctp.max_init_retransmits = 10 +net.sctp.association_max_retrans = 10 +net.sctp.hb_interval = 30000 + +# open file number, please set it to set '1000000' +[/etc/security/limits.conf] +open files = 1000000 +stack size = 3072 + +# network parameter +# if the level of network is greater or equal to 10000Mb/s, please set RX/TX to 4096; +# we will check if the MTU is greater or equal to 1500, but gs_checkos dose not set it. +# else, skip it. +[/sbin/ifconfig] +MTU = 1500 +RX = 4096 +TX = 4096 + diff --git a/script/gspylib/inspection/config/check_list_V1R7C00.conf b/script/gspylib/inspection/config/check_list_V1R7C00.conf new file mode 100644 index 0000000..eae949f --- /dev/null +++ b/script/gspylib/inspection/config/check_list_V1R7C00.conf @@ -0,0 +1,181 @@ +#The file(check_list.conf) is the gs_check and gs_checkos configuration file. +#The file is placed in $GPHOME/script/util + +# the system control parameter +[/etc/sysctl.conf] +net.ipv4.tcp_max_tw_buckets = 10000 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_tw_recycle = 1 +net.ipv4.tcp_keepalive_time = 30 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_keepalive_probes = 9 +net.ipv4.tcp_retries2 = 80 +net.sctp.addip_enable = 0 +net.core.wmem_max = 21299200 +net.core.rmem_max = 21299200 +net.core.wmem_default = 21299200 +net.core.rmem_default = 21299200 +net.sctp.sctp_mem = 94500000 915000000 927000000 +net.sctp.sctp_rmem = 8192 250000 16777216 +net.sctp.sctp_wmem = 8192 250000 16777216 +kernel.sem = 250 6400000 1000 25600 +net.ipv4.tcp_rmem = 8192 250000 16777216 +net.ipv4.tcp_wmem = 8192 250000 16777216 +# vm.min_free_kbytes would set to 5% of total system memory real time, total system memory get with cmd: free -k|grep Mem|awk '{print $2}'. +vm.min_free_kbytes = total_system_memory*5% +net.core.netdev_max_backlog = 65535 +net.ipv4.tcp_max_syn_backlog = 65535 +net.core.somaxconn = 65535 +net.ipv4.tcp_syncookies = 1 +vm.overcommit_memory = 0 +vm.panic_on_oom = 0 +vm.oom_kill_allocating_task = 0 +net.sctp.sndbuf_policy = 0 +net.sctp.rcvbuf_policy = 0 +kernel.shmall = 1152921504606846720 +kernel.shmmax = 18446744073709551615 + +# if parameter value is not equal to ths OS's value, print the waring, and not error +[SUGGEST:/etc/sysctl.conf] +net.ipv4.tcp_fin_timeout = 60 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_timestamps = 1 +net.ipv4.tcp_retries1 = 5 +net.ipv4.tcp_syn_retries = 5 +net.ipv4.tcp_synack_retries = 5 +net.sctp.path_max_retrans = 10 +net.sctp.max_init_retransmits = 10 +net.sctp.association_max_retrans = 10 +net.sctp.hb_interval = 30000 +vm.extfrag_threshold = 500 +vm.overcommit_ratio = 90 +SctpChecksumErrors = 0 + +# open file number, please set it to set '1000000' +[/etc/security/limits.conf] +open files = 1000000 +stack size = 3072 + +# network parameter +# if the level of network is greater or equal to 10000Mb/s, please set RX/TX to 4096; +# we will check if the MTU is greater or equal to 1500, but gs_checkos dose not set it. +# else, skip it. +[/sbin/ifconfig] +MTU = 8192 +RX = 4096 +TX = 4096 + +#gs_check CheckGucConsistent guc parameter ignore list +[guc_ignore] +listen_addresses = 0 +local_bind_address = 0 +port = 0 +cstore_buffers = 0 +max_connections = 0 +shared_buffers = 0 +work_mem = 0 +maintenance_work_mem = 0 +data_replicate_buffer_size = 0 +pooler_port = 0 +log_directory = 0 +audit_directory = 0 +pgxc_node_name = 0 +ssd_cache_dir = 0 +enable_adio_function = 0 +enable_cstore_ssd_cache = 0 +unix_socket_directory = 0 +unix_socket_permissions = 0 +log_file_mode = 0 +max_coordinators = 0 +max_datanodes = 0 +enable_nestloop = 0 +enable_mergejoin = 0 +comm_tcp_mode = 0 +explain_perf_mode = 0 +log_line_prefix = 0 +max_active_statements = 0 +# Ip and port related +comm_control_port = 0 +comm_sctp_port = 0 +replconninfo2 = 0 +replconninfo1 = 0 +# Instance directory related +ident_file = 0 +config_file = 0 +hba_file = 0 +data_directory = 0 +archive_command = 0 +xc_maintenance_mode = 0 + +[guc_logic] +allow_concurrent_tuple_update = 0 +prefetch_quantity = 0 +backwrite_quantity = 0 +cstore_prefetch_quantity = 0 +cstore_backwrite_max_threshold = 0 +cstore_backwrite_quantity = 0 +fast_extend_file_size = 0 +bgwriter_delay = 0 +bgwriter_lru_maxpages = 0 +bgwriter_flush_after = 0 +autovacuum_naptime = 0 +autovacuum_vacuum_threshold = 0 +autovacuum_analyze_threshold = 0 +autovacuum_vacuum_scale_factor = 0 +autovacuum_analyze_scale_factor = 0 +enable_stream_operator = 0 +enable_data_replicate = 0 +wal_keep_segments = 0 +wal_sender_timeout = 0 +wal_writer_delay = 0 +checkpoint_segments = 0 +checkpoint_timeout = 0 +checkpoint_warning = 0 +checkpoint_flush_after = 0 +checkpoint_wait_timeout = 0 +vacuum_cost_page_hit = 0 +vacuum_cost_page_miss = 0 +vacuum_cost_page_dirty = 0 +vacuum_cost_limit = 0 +vacuum_cost_delay = 0 +autovacuum_vacuum_cost_delay = 0 +autovacuum_vacuum_cost_limit = 0 +full_page_writes = 0 +fsync = 0 +io_limits = 0 +io_priority = 0 +bulk_write_ring_size = 0 +bulk_read_ring_size = 0 +partition_mem_batch = 0 +partition_max_cache_size = 0 +temp_file_limit = 0 +query_mem = 0 +maintenance_work_mem = 0 +synchronous_commit = 0 +work_mem = 0 +dynamic_memory_quota = 0 +temp_buffers = 0 +max_loaded_cudesc = 0 +wal_receiver_status_interval = 0 +wal_receiver_timeout = 0 +wal_receiver_connect_timeout = 0 +wal_receiver_connect_retries = 0 +wal_receiver_buffer_size = 0 +data_replicate_buffer_size = 0 +max_connections = 0 +max_files_per_process = 0 +shared_buffers = 0 +memorypool_size = 0 +cstore_buffers = 0 +UDFWorkerMemHardLimit = 0 +walsender_max_send_size = 0 +wal_buffers = 0 +max_wal_senders = 0 +autovacuum_freeze_max_age = 0 +autovacuum_max_workers = 0 +track_activity_query_size = 0 +event_source = 0 +zhparser_dict_in_memory = 0 +memorypool_enable = 0 +enable_memory_limit = 0 + diff --git a/script/gspylib/inspection/config/items.xml b/script/gspylib/inspection/config/items.xml new file mode 100644 index 0000000..1dbac79 --- /dev/null +++ b/script/gspylib/inspection/config/items.xml @@ -0,0 +1,1982 @@ + + + + + + + + + + + + + + + <zh>检查NTPD服务</zh> + <en>Check the time consistency status</en> + + + + 检查是否安装并开启ntpq服务,并与ntp时钟源同步。 + + + 检查系统NTPD服务,如果服务开启且各节点时间误差在1分钟以内则检查项通过,否则检查项不通过。 + + os + user + all + custom + + + + + <zh>检查CPU占用率</zh> + <en>Check CPU Idle and I/O wait</en> + + + StandardCPUIdle=30; + StandardWIO=30 + + + 如果idle不足 CPU负载过高,请扩容节点,如果iowait过高,则磁盘为瓶颈,扩容磁盘 + + + 检查主机CPU占用率,如果idle 大于30%,或者 iowait 小于 30%.则检查项通过,否则检查项不通过 + + os + user + all + default + + + + + <zh>检查时区一致性</zh> + <en>Check the usage of time zone</en> + + + + 用正确的时区文件覆盖时区文件 cp /usr/share/zoneinfo/Asia/XXX /etc/localtime 再通过hwclock命令写入BIOS + + + 检查集群内各节点时区,如果时区一致则检查通过,否则检查项不通过 + + os + user + all + consistent + + + + + <zh>检查编码格式</zh> + <en>Check the unicode status</en> + + + + 在异常节点修改/etc/profile文件,添加或修改LANG=XXX信息与其他节点相同。 + + + 通过locale | grep '^LANG='获取节点的系统编码,判断是否一致,如果编码一致则检查项通过,否则检查项不通过。 + + os + user + all + consistent + + + + + <zh>检查防火墙状态</zh> + <en>Check the firewall state</en> + + + + 关闭防火墙服务。 + + + 检查主机防火墙状态,如果防火墙关闭则检查项通过,否则检查项不通过。 + + os + root + all + default + + + + + <zh>检查内核版本</zh> + <en>Check the kernel version status</en> + + + + 确认异常节点的内核版本是否和其他节点存在差异,如果存在差异则可根据资料说明进行节点替换或温备操作(gs_replace) + + + 检查各节点系统内核版本信息,如果版本信息一致则检查项通过,否则报warning。 + + os + user + all + consistent + + + + + <zh>检查句柄最大设置</zh> + <en>Check the Open Files</en> + + + + 设置/etc/security/limits.d/90-nofile.conf文件中的soft nofile和hard nofile参数,然后重启会话。 + + + 检查操作系统最大句柄值,如果该值大于等于1000000则检查项通过,否则检查项不通过。 + + os + user + root + all + default + + + + + <zh>检查系统参数</zh> + <en>Check the system control parameters status</en> + + + version=V1R7C00 + + + 编辑/etc/sysctl.conf写入正确的参数配置,通过sysctl -p生效。 + + + 检查各节点操作系统参数,判断是否等于预期值。所有参数符合预期值则检查项通过,否则检查项不通过并打印不满足预期项 + + os + user + root + all + default + + + + + <zh>检查操作系统版本</zh> + <en>Check the OS version status</en> + + + + 确认异常节点的操作系统版本是否和主节点在同一混搭列表内,如果不在同一列表内需要按照资料说明,进行节点替换或温备操作(gs_replace) + + + 检查集群内各个节点的操作系统版本信息,如果满足版本兼容列表且全集群在同一混搭列表中则检查通过,否则检查不通过 + + os + user + all + custom + + + + + <zh>检查THP服务</zh> + <en>Check the THP service status</en> + + + + 如果THP文件存在,echo never > THPFile 并将关闭指定写入自启动文件sed -i '/^.*transparent_hugepage.*enabled.*echo + never.*$/d' initFile echo "echo never > THPFile" >> initFile + + + + 检查系统THP服务,如果服务未开启则检查项通过,否则检查项不通过。 + + os + user + root + all + default + + + + + <zh>检查sshd服务是否已启动</zh> + <en>Check sshd service status</en> + + + + 如果检查发现某项关键服务未启动,请先启动 + + + 检查系统是否存在sshd服务,若存在则检查项通过,否则检查项不通过 + + os + root + all + default + + + + + <zh>检查crontab服务是否已启动</zh> + <en>Check Crond service status</en> + + + + 如果检查发现某项关键服务未启动,请先启动 + + + 检查系统是否存在crontab服务,若存在则检查项通过,否则检查项不通过 + + os + root + all + default + + + + + <zh>检查crontab是否有残留Gauss相关信息</zh> + <en>Check crontab</en> + + + crontabUser=omm + + + 如果存在,删除此定时任务 + + + 检查crontab是否残留Gauss相关信息,若无该信息则检查项通过,否则检查项不通过 + + os + user + all + default + + + + + <zh>检查文件目录是否有残留(/srv/BigData/ ,/opt/huawei/Bigdata/ ,/var/log/Bigdata/, /home/omm) + </zh> + <en>Check directory</en> + + + directoryList=/opt/huawei/Bigdata/,/var/log/Bigdata/,/home/omm/ + + + 删除存在的目录 + + + + 检查扩容新节点上是否存在(/srv/BigData/ ,/opt/huawei/Bigdata/ ,/var/log/Bigdata/, /home/omm)目录,若不存在则检查项通过,否则检查项不通过 + + + os + user + all + default + + + + + <zh>检查进程是否有残留(检查gauss进程,omm用户进程是否残留)</zh> + <en>Check process</en> + + + + 检查残留的具体进程,确认后kill掉这些进程 + + + 检查扩容新节点上是否残留gaussdb和omm进程,若未残留则检查项通过,否则检查项不通过 + + os + user + all + default + + + + + <zh>检查栈深度</zh> + <en>Check stack depth</en> + + + + 设置栈大小为3072 + + + 检查各个节点栈深度是否一致,若不一致报warning,若大于等于3072则检查项通过,否则不通过 + + os + user + root + all + consistent + + + + + <zh>关键进程oom_adj检查</zh> + <en>Check oom_adj value of key processes</en> + + + + 修改关键进程omm_adj值为0 + + + 检查所有关键进程,如果所有关键进程omm_adj值为0,则通过,否则不通过 + + os + user + all + default + + + + + <zh>检查nochecksum值是否为预期值且一致(默认为N,RedHat6.4/6.5且bond是为Y)</zh> + <en>Check the nochecksum</en> + + + + 修改nochecksum值为一致的预期值 + + + 检查nochecksum值,若符合预期且一致则检查项通过,否则检查项不通过 + + network + root + all + consistent + + + + + <zh>检查omm用户是否已删除</zh> + <en>Check omm user</en> + + + + 如果存在,删除用户(useradd -rf omm) + + + 检查扩容新节点上是否存在omm用户,若不存在omm用户则检查项通过,否则检查项不通过s + + os + root + all + default + + + + + <zh>检查DN,CN端口是否占用</zh> + <en>Check port conflict</en> + + + cooPort=0; + dataPortBase1=0; + dataPortBase2=0; + dataPortStandby1=0; + dataPortStandby2=0; + dataPortDummyStandby1=0; + dataPortDummyStandby2=0; + cmServerPortBase_mpp=0; + cmServerPortStandby_mpp=0; + gtmPortBase_mpp=0; + gtmPortStandby_mpp=0; + cooPortBase_mpp=0; + dataPortBase_mpp=0; + dataPortStandby_mpp=0; + dataPortDummyStandby_mpp=0 + + + 如果发现端口占用,确认占用进程,释放端口 + + + 检查CN端口,DN端口是否已被占用,若未占用则检查项通过,否则检查项不通过 + + os + root + all + default + + + + + <zh>检查ip_local_port_range设置范围</zh> + <en>Check ip_local_port_range</en> + + + + 修改参数范围为26000 65535 + + + 检查ip_local_port_range系统参数范围,若范围在26000 65535则检查项通过,否则检查项不通过 + + os + user + root + all + default + + + + + <zh>检查/etc/hosts中是否有重复地址以及localhost配置</zh> + + + + + /etc/hosts没有配置localhost检查项不通过,存在带有#Gauss200注释的映射报abnormal,相同IP不同hostname报abnormal + + os + user + all + default + + + + + <zh>检查CPU核数</zh> + <en>Check CPU count</en> + + + + 检查CPU + + + CPU核心与可用CPU不符检查项不通过,相符但存在不可用信息Warning。 所有节点CPU信息不相同则检查项不通过。 + + os + user + all + consistent + + + + + <zh>检查sctp服务</zh> + <en>Check sctp service</en> + + + + 安装及加载sctp服务 + + + stcp服务开启且写在开机自启动文件中则检查项通过,否则检查项不通过 + + os + root + all + default + + + + + <zh>检查超线程是否打开</zh> + <en>Check Hyper Threading</en> + + + + 检查超线程 + + + 检查超线程,若打开则检查项通过,否则检查项不通过 + + os + user + all + default + + + + + <zh>检查内存总大小</zh> + <en>Check total memory</en> + + + + 需要保证节点上总内存大小一致 + + + 检查各节点总内存,若内存大小一致则检查项通过,否则报warning + + os + user + all + consistent + + + + + <zh>检查sshd服务配置是否正确</zh> + <en>Check sshd config</en> + + + PasswordAuthentication=yes; + MaxStartups=1000; + UseDNS=no; + ClientAliveInterval=10800 + + + 修改sshd配置 + + + 检查/etc/ssh/sshd_config文件,判断参数配置是否符合预期,若符合则检查项通过,否则检查项不通过 + + os + root + all + default + + + + + <zh>检查max_process_memory参数设置是否合理</zh> + <en>Check max_process_memory setting</en> + + + Threshold_NG=1048576 + + + 使用gs_guc重设max_process_memory的值 + + + 检查cn和主备dn max_process_memory值,判断参数配置是否符合预期,若符合则检查项通过,否则检查项不通过 + + os + root + all + default + + + + + <zh>检查是否有人为添加启动项</zh> + + + + + 检查启动项 + + + 如果有人为添加特定启动项则检查不通过,否则检查通过 + + os + root + all + default + + + + + <zh>文件句柄检测</zh> + + + Threshold_Warning=800000 + + + 检查负载均衡信息 + + + 检查每个gaussdb进程打开的进程数是否超过80万,超过则检查不通过,是否有slave进程使用的句柄说超过master进程,有则检查不通过 + + os + root + all + default + + + + + <zh>DropCache进程检测</zh> + + + + + 检查dropCache进程 + + + 检查各节点是否有dropcache进程在运行,若是则检查通过,否则检查不通过 + + os + root + all + default + + + + + <zh>检查内存使用是否超标</zh> + <en>Check whether memroy usage exceed threshold or not</en> + + + percent_total=0.8; + percent_max=0.9 + + + 若整体内存使用率超标,检查占用内存高的进程,清理内存空间,若gaussdb进程内存使用超标,需要分析当前高负载业务 + If the overall memory usage exceeds the standard, check the processes with high memory usage and clean up the memory space. If the memory usage of gaussdb process exceeds the standard, it is necessary to analyze the current high-load business. + + + 检查节点整体内存使用率是否超过percent_total,若超过则告警,否则继续检查各个gaussdb进程是否超过max_process_memroy的percent_max + check whether any node's memory usgae exceeded threshold,or any guassdb process's memory usage exceeded threshold + + + 内存资源不足 + No sufficent Meory + + os + user + all + default + + + + + <zh>检查交换内存是否小于总内存</zh> + <en>Check the SwapMemory</en> + + + + 减少交换内存的大小 + + + 检查交换内存和总内存大小,若检查结果为0则检查项通过,否则检查项报Warning 大于总内存时检查项不通过 + + device + user + root + all + default + + + + + <zh>检查磁盘逻辑块</zh> + <en>Check the LogicalBlock</en> + + + + 修改磁盘逻辑块大小为512 + + + 检查磁盘逻辑块大小,若为512则检查项通过,否则检查项不通过 + + device + user + root + all + default + + + + + <zh>检查IO请求</zh> + <en>Check the IO request</en> + + + + + 检查IO值,如果该值为32768则检查项通过,否则报warning + + device + root + root + all + default + + + + + <zh>检查最大异步请求</zh> + <en>Check Max Asy IO requests</en> + + + + + 检查当前异步IO值,若其大于(dn+cn)*1048576和104857600则检查项通过,否则检查项不通过 + + device + user + root + all + default + + + + + <zh>检查IO配置</zh> + <en>Check IO Configure</en> + + + + + 检查IO配置,如果是deadline则检查项通过,否则检查项不通过 + + device + root + all + default + + + + + <zh>检查磁盘预读块</zh> + <en>Check the pre-read block size status</en> + + + BlockSize=16384 + + + 设置/sbin/blockdev --setra 16384 devname预读块大小为16384,并 echo "/sbin/blockdev --setra expecte devname" >> + initFile写入自启动文件 + + + + 检查磁盘预读块大小,如果预读块大小为16384则检查项通过,否则检查项不通过 + + device + root + all + default + + + + + <zh>检查磁盘格式参数</zh> + <en>Check the disk configuration status</en> + + + + 设置/etc/fdisk文件,将xfs格式的磁盘配置为'rw,noatime,inode64,allocsize=16m',并重新mount。 + + + 检查磁盘格式信息,如果格式为ext3/ext4/xfs中的一种且xfs的配置为'rw,noatime,inode64,allocsize=16m'则检查项通过,否则报warning + + device + user + all + default + + + + + <zh>检查磁盘inodes使用率</zh> + <en>Check the usage of disk inodes</en> + + + Threshold_NG=80; + Threshold_Warning=60 + + + 清理磁盘对应中的空间或更换更大的磁盘。 + + + 通过df -i检查磁盘指定目录(目录列表)inodes使用率,如果使用率超过warning阈值(默认值为60%)则报warning,超过NG阈值(默认值为80%)则报NG,inode总数少于5亿则报NG + + device + user + all + default + + + + + <zh>检查磁盘使用率</zh> + <en>Check the usage of disk</en> + + + Threshold_NG=90; + Threshold_Warning=70; + DiskVailPGHOST=5; + DiskVailGPHOME=5; + DiskVailGAUSSHOME=5; + DiskVailGAUSSLOG=20; + DiskVailOS_TMP=5; + DiskVailDATA=20 + + + 清理磁盘对应中的空间或更换更大的磁盘。 + + + "如果使用率超过warning阈值(默认为70%) 报warning,超过NG阈值(默认为90%)则检查项不通过 。集群路径下检查GAUSSHOME/PGHOST/GPHOME/GAUSSLOG/tmp/data路径的剩余空间,不满足阈值则检查项不通过" + + device + user + all + default + + + + + <zh>检查磁盘空间大小一致性</zh> + <en>Check the disk configuration consistent</en> + + + + + 检查磁盘名大小挂载点一致,不满足报warning + + device + user + all + consistent + + + + + <zh>检查CheckXid数值</zh> + <en>Check the value of CheckXid</en> + + + + 暂无安全处理方案 + + + 如果xid的值大于10亿,抛出Warning。如果xid的值大于18亿,抛出Abnormal。 + + database + user + cn + default + + + + + <zh>检查每个实例的系统表容量</zh> + <en>Check the size of system table in every instances</en> + + + + 清理硬盘至容量足够 + + + 如果容量不足,抛出Abnormal。 + + database + user + all + default + + + + + <zh>检查表级别数据倾斜</zh> + <en></en> + + + + 知会客户手工修复 + + + 检查是否存在表级别数据倾斜,若存在则检查不通过 + + database + user + cn + default + + + + + <zh>检查未做analyze的表</zh> + <en></en> + + + + 对没有做analyze的表执行analyze。 + + + 若数据库中存在未做过analyze的表,则检查不通过 + + database + user + cn + default + + + + + <zh>检查DN级别的数据分布倾斜</zh> + <en></en> + + + + 知会客户手工修复 + + + 若dn数据分布倾斜,则报NG,否则报OK + + database + user + all + custom + + + + + + <zh>检查磁盘是否存在故障</zh> + <en></en> + + + + 通知客户手工修复 + + + 对集群中所有数据库所有表做全量查询,如果出现故障则报NG,全部查询成功报OK + + database + user + cn + default + + + + + <zh>检查慢盘</zh> + <en>Check the slow node</en> + + + max=200; + high=100 + + + 更换磁盘 + Replace the disk. + + + 如果不存在磁盘的IO平均服务时间低于普遍水平则检查项通过,否则检查项不通过 + If the IO average service time of the disk does not exist below the general level, the check item passes, Otherwise, the item fails the check. + + + 数据库性能下降 + Database performance degradation. + + device + user + all + default + + + + + <zh>检查集群状态</zh> + <en>Check the cluster status</en> + + + + 查询cm_agent进程并检查集群状态 + + + 检查fenced UDF状态,如果为down则报warning;检查集群状态,如果为Normal则检查项通过,否则检查项不通过 + + cluster + user + local + default + + + + + <zh>检查数据库集群参数</zh> + <en>Check database parameters</en> + + + + 使用GUC工具配置CN、DN的参数。 + + + + "检查CN检查共享缓冲区大小和Sem参数主DN实例检查共享缓冲区大小和最大连接数共享缓冲区需要大于128kB且大于shmmax且大于shmall*PAGESIZE若存在CN,则Sem值需大于(DN最大连接数+150)/16向上取整以上项完全满足则检查项通过,否则检查项不通过" + + + cluster + user + cn + default + + + + + <zh>检查日志级别</zh> + <en>Check debug switch</en> + + + + 使用guc工具将log_min_messages设为指定值。 + + + 在各节点检查各实例的配置文件中log_min_messages参数的值,为空则认为是Warning,判断日志级别是 waring,不是则报warning + + + cluster + user + all + default + + + + + <zh>检查升级版本是否一致</zh> + <en>Check the consistence of upgrade version</en> + + + + 重新进行发包,保证各节点安装包版本一致。 + + + 检查集群各个节点上升级包的版本,如果一致则检查项通过,否则检查项不通过 + + cluster + user + all + consistent + + + + + <zh>检查目录权限</zh> + <en>Check the primitive of key folders</en> + + + + 将对应的目录权限进行修改。 + + + 检查节点目录(实例xlog路径、GAUSSHOME、GPHOME、PGHOST、GAUSSLOG)权限,如果目录有写入权限且不大于750则检查项通过,否则检查项不通过 + + + cluster + user + root + all + default + + + + + <zh>检查环境变量</zh> + <en>Check environment profile</en> + + + + 在用户环境变量文件或/etc/profile中添加统一的$GAUSSHOME,并通过export $GAUSSHOME:$PATH 的方式分别添加到$LD_LIBRARY_PATH、$PATH中 + + + + 检查节点环境变量($GAUSSHOME、$LD_LIBRARY_PATH、$PATH),如果环境变量存在并配置正确则检查项通过,否则检查项不通过 + + + cluster + user + all + consistent + + + + + <zh>检查gaussdb版本</zh> + <en>Check the gaussdb version</en> + + + + 重新进行发包,保证各节点新gaussdb文件版本一致 + + + "检查各个节点gaussdb版本是否一致,如果版本一致则检查项通过,否则检查项不通过 + 通过source env,gsql -V | awk '{print $4""_""$6}'获取新的gaussdb版本信息,判断各节点是否一致 + 判断两种方式获得的版本信息是否一致" + + + cluster + user + all + consistent + + + + + <zh>检查端口范围</zh> + <en>Check the port range</en> + + + ip_local_port_range=26000 65535 + + + 通过gs_om -t changeip 更改集群使用的端口号到合法的范围内。 + + + 若ip_local_port_range的范围在阈值范围内(默认是26000 65535),并且实例端口不在ip_local_port_range范围内则检查项通过,否则检查项不通过 + + + cluster + user + all + default + + + + + <zh>检查只读模式</zh> + <en>Check the readonly mode</en> + + + + 确认磁盘空间足够,未执行其他管理操作后,使用GUC工具关闭只读模式。 + + + 检查集群中所有含CN节点上default_transaction_read_only值若为为off则检查通过,否则不通过 + + cluster + user + cn + default + + + + + <zh>检查Catchup(gaussdb进程堆栈应搜索不到CatchupMain函数)</zh> + <en>Check catchup function</en> + + + + gaussdb进程堆栈搜索到CatchupMain函数,MPPDB重启后需要重新检查 + + + 检查gaussdb进程堆栈是否能搜索到CatchupMain函数,若搜索不到则检查项通过,否则检查项不通过 + + + cluster + user + all + default + + + + + <zh>检查进程状态</zh> + <en>Check the Process Status</en> + + + + 如果存在属主不是omm的进程,则停止集群,kill掉所有残留进程,然后重启集群,再次检查进程的属主是否正确。 + + + 检查其他用户的进程中是否存在gaussdb等MPPDB进程,若无则检查项通过,否则检查项不通过 + + cluster + user + all + default + + + + + <zh>特殊文件检查</zh> + <en>Check Special File</en> + + + + 人工排除 + + + + 检查tmp目录(PGHOST)、OM目录(GPHOME)、日志目录(GAUSSLOG)、data目录、程序目录(GAUSSHOME)下文件是否存在特殊字符以及非omm用户的文件,若不存在则检查项通过,否则检查项不通过 + + + cluster + user + all + default + + + + + <zh>检查集群的信息收集</zh> + <en>Check the information of the log collection</en> + + + + 确认信息收集是否成功 + + + 在output目录下查看信息收集是否成功,若收集成功则检查项通过,否则检查项不通过 + If the collection is successful, the check item passes, otherwise the check item does not pass. + + cluster + user + cn + default + + + + + <zh>检查gaussdb数据一致性</zh> + <en>Check the gaussdb sha256</en> + + + + 从统一节点分发gaussdb文件 + + + 检查各个节点gaussdb的sha256值,若各节点一致则检查项通过,否则检查项不通过。 + + + cluster + user + all + consistent + + + + + <zh>检查数据目录大文件</zh> + <en>Check the datadir large file</en> + + + size=4G + + + 删除大文件 + + + 检查各个DN实例目录是否存在过大的文件,存在则检查项不通过,否则检查项通过。 + If there is a large file in the DN instance directory, the check item does not pass. Otherwise, the check item passes. + + cluster + user + all + default + + + + + <zh>系统表膨胀检查</zh> + <en>Check Dilate System Table</en> + + + Threshold_NG=100; + Threshold_Warning=50 + + + + 检查系统表是否膨胀,若膨胀则不通过,否则检查通过 + + cluster + user + cn + default + + + + + <zh>关键进程启动时间检测</zh> + <en>check start time of key process</en> + + + + + 检查关键进程启动时间是否间隔超过一分钟,超过则检查不通过,否则检查通过 + + cluster + user + all + custom + + + + + <zh>检测环境变量分离文件改动</zh> + <en>check if there is any change on mpprc file</en> + + + + + 检查是否存在对环境变量分离文件的改动,若存在则检查不通过,否则检查通过 + + cluster + user + all + default + + + + + <zh>检查锁数量</zh> + <en>Check the lock count</en> + + + + + 检查数据库锁数量,查询成功则检查项通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>检查归档参数</zh> + + + + + 检查数据库归档参数,如果未打开或打开且在CN下则检查项通过,打开且不在CN目录下则检查项不通过 + + database + user + cn + default + + + + + <zh>检查当前连接数</zh> + <en>Check the current connection count</en> + + + + 断开不使用的连接 + + + 检查数据库连接数,如果连接数小于最大连接数的90%则检查项通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>检查当前游标数</zh> + <en>Check cursor num</en> + + + + 清理不使用的游标 + + + 检查数据库的游标数,查询成功则检查项通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>检查comm_max_datanode参数值范围小于DN个数</zh> + <en>Check the parameter value of comm_max_datanode</en> + + + nodeCount=90; + dnCount=5 + + + 使用guc工具将comm_max_datanode调整为更大的值(2的指数) + + + 检查最大DN数,若最大DN数小于xml配置的节点数*DN数(默认值为90*5)报warning,否则检查项通过 + + database + user + cn + default + + + + + <zh>检查残留两阶段事务</zh> + <en>Check the value of pg_prepared_xacts</en> + + + + + 检查pg_prepared_xacts参数,如果不存在二阶段事物则检查项通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>pgxc_group表中in_redistribution为Y的个数是否为0</zh> + <en>Check the value of pgxc_group</en> + + + version=V1R7C00 + + + + 检查pgxc_group表中in_redistribution为Y的个数,如果数量为0则检查项通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>集群是否被锁</zh> + <en>Check the cluster lock status</en> + + + + 确认集群未进行其他操作后,kill掉存在的锁集群进程。 + + + 检查集群是否被锁,若集群未锁则检查通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>检查空闲会话</zh> + <en>Check idle session status</en> + + + + kill掉存在的空会话进程。 + + + 检查空闲会话数,如果没有空闲会话则检查项通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>检查数据库连接</zh> + <en>Check the database connection</en> + + + + 检查集群状态及实例状态。 + + + 检查能否连接数据库,如果连接成功则检查项通过,否则检查项不通过 + + database + user + all + default + + + + + <zh>GUC参数检查</zh> + <en>Check the GUC value</en> + + + + 修改GUC参数 + + + 检查(max_connections + max_prepared_transactions) * + max_locks_per_transaction的值,若该值大于等于1000000则检查项通过,否则检查项不通过。 + + + database + user + cn + default + + + + + <zh>检查PMK异常数据</zh> + <en>Check pmk exception data</en> + + + + 连接数据库修复异常数据 + + + 检查数据库PMK schema是否包含有异常数据,如果不存在异常数据则检查项通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>检查系统表</zh> + + + database=postgres + + + + 检查系统表,查询成功则检查项通过,否则检查项不通过。 + + database + user + all + default + + + + + <zh>检查表空间</zh> + + + + + + 表空间路径和集群路径之间不能存在嵌套且表空间路径相互不能存在嵌套,则检查项通过,否则检查项不通过 + + database + user + cn + default + + + + + <zh>检查sysadmin用户</zh> + <en>Check sysadmin user</en> + + + + + 检查除集群属主外是否存在sysadmin用户,若存在则不通过,否则检查通过 + Check whether there is a sysadmin user other than cluster user. If it exists, it does not pass. Otherwise, the check is passed. + + database + user + cn + default + + + + + <zh>检查guc参数一致性</zh> + <en>Check guc parameter consistency</en> + + + version=V1R7C00 + + + + 检查各CN/DN实例的guc参数是否一致,若全部一致则检查通过,否则检查不通过 + Check whether the guc parameters of each CN/DN instance are consistent. If all parameters are the same, the check is passed. Otherwise, the check fails. + + database + user + all + custom + + + + + <zh>检查Default表达式中包含nextval(sequence)</zh> + + + + + 1、重建replicate table, 开始的时候就创建好所有列 + 2、退出session即可, 避免使用temp sequence + + + 若Default表达式中包含nextval(sequence)则不通过 + + database + user + cn + default + + + + + <zh>检查是否执行过alter table drop column</zh> + + + + + 1、创建新表 create table new(like old including all); + 2、如原表有索引,需要禁用新表索引alter index idx_name UNUSABLE; + 3、导入数据 insert into new select * from old; + 4、恢复新表索引ALTER INDEX idx_name REBUILD; + 5、删除老表 drop table old; + 6、重命名新表 alter table new rename to old; + + + 若执行过alter table drop column操作未消除影响则检查不通过 + + database + user + cn + default + + + + + <zh>检查是否存在TD模式数据库下的orc表,且包含date类型的列</zh> + + + + 删除表或改为其他类型 + + 若存在TD模式数据库下的orc表,且包含date类型的列则检查不通过 + + database + user + cn + default + + + + + <zh>检查是否存在hash index</zh> + + + + 删除hash index + + 若存在hash index则检查不通过 + + database + user + cn + default + + + + + <zh>检查用户自定义函数是否包含非法返回值</zh> + <en>Check if the user-defined function contains an illegal return value</en> + + + + + 用户自定义函数包含非法返回类型,检查不通过,否则检查通过 + + database + user + cn + default + + + + <zh>检查是否存在非SQL_ASCII字符的node group名称</zh> + <en>Check if there is a node group name with non-SQL_ASCII characters</en> + + + + + 存在非SQL_ASCII字符的node group名称,检查不通过,否则检查通过 + + database + user + cn + default + + + + <zh>检查视图中,子查询是否存在隐式的重命名字段</zh> + <en>Check if there is an implicit rename field in the subquery in the view</en> + + + + + 存在重命名字段,检查不通过,否则检查通过 + + database + user + cn + default + + + + <zh>检查数据库中是否存在重分布残留的临时表pgxc_redistb</zh> + <en>Check if there is a redistributed residual temporary table pgxc_redistb in the database</en> + + + + + 不存在则检查通过,否则检查不通过 + + database + user + cn + default + + + + <zh>检查网络通畅</zh> + <en>Check network ping</en> + + + + 检查异常IP节点网络状况。 + + + 检查集群内所有节点的互通性,如果各节点所有IP均可ping通则检查项通过,否则检查项不通过 + + network + user + all + default + + + + + <zh>检查网卡RXTX值</zh> + <en>Check the network RXTX value</en> + + + + 到异常节点执行ethtool eth* RX/TX 4096 将指定网卡RX/TX设置为4096。 + + + 检查节点backIP对应的网卡的网卡速率,若不是万兆网卡报 warning,检查此节点backIp的RX/TX值,如果该值为4096则检查项通过,否则检查项不通过 + + network + root + all + default + + + + + <zh>检查网卡MTU值</zh> + <en>Check the network care MTU</en> + + + expectMTU1=8192; + expectMTU2=1500 + + + 到异常节点执行ifconfig eth* mtu 8192 将指定网卡MTU值设置为8192。 + + + 检查节点backIP对应的网卡MTU值( bond后的物理网卡要确保一致),如果该值不是8192或1500 报warning 若集群MTU值一致则检查项通过,否则检查项不通过 + + network + root + all + consistent + + + + + <zh>检查网络掉包率</zh> + <en>Check network care Drop</en> + + + + + 检查各IP1分钟内网络掉包率,如果不超过1%则检查项通过,否则检查项不通过。 + + network + user + all + default + + + + + <zh>检查网卡绑定模式</zh> + <en>Check the network care bond mode</en> + + + + 到异常节点将指定网卡bond模式设置为与其他节点相同。 + + + 检查是否有配置BONDING_OPTS或BONDING_MODULE_OPTS,若没有配置则报NG。检查各节点bond模式是否一致,如果同时满足则检查项通过,否则检查项不通过 + + network + root + all + consistent + + + + + <zh>检查网卡多队列</zh> + <en>Check the network care multi-queue</en> + + + + 开启网卡多队列 + + + 检查cat /proc/interrupts,判断是否开启网卡多队列且绑定不同CPU,如果满足则检查项通过,否则检查项不通过 + + network + root + all + default + + + + + <zh>检查随机端口使用数量</zh> + <en>Check port used number</en> + + + + 增大net.ipv4.ip_local_port_range或降低并发 + + + 检查net.ipv4.ip_local_port_range,范围大于等于OS默认值通过;检查TCP协议随机端口数,小于总随机端口数的80%通过;检查SCTP协议随机端口数,小于总随机端口数的80%通过 + + network + user + all + default + + + + + <zh>网络带宽测试</zh> + <en>Check net speed</en> + + + + 检查网络组网环境 + + + 使用speed_test跑满网络带宽,带宽大于600MB通过;网络满载时,检查网络ping值,小于1秒通过;网络满载时,检查网卡丢包率,小于1%通过 + + network + user + all + default + + + + + <zh>网卡型号检查</zh> + <en>Check NIC model</en> + + + + 使用相同型号及驱动版本的网卡 + + + 检查各节点网卡的型号及驱动版本,各节点一致时通过 + + network + root + all + consistent + + + + + <zh>本地路由表检查</zh> + <en>Check routing table</en> + + + + 设置节点上只有一个业务网段IP + + + 检查单节点上业务网段的IP个数,超过一个则不通过 + + network + user + all + default + + + + + <zh>检查DN磁盘使用率</zh> + <en>Check the usage of DN disk</en> + + + + 清理磁盘对应中的空间或更换更大的磁盘。 + + + 检查磁盘DN目录空间和索引使用率,如果使用率低于90%则检查项通过,否则检查项不通过 + + + other + user + all + default + + + + + <zh>检查系统安装磁盘空间使用率</zh> + <en>Check the usage of install disk</en> + + + + 清理磁盘对应中的空间或更换更大的磁盘。 + + + 检查磁盘系统安装目录空间和索引使用率,如果使用率低于90%则检查项通过,否则检查项不通过 + + + other + user + all + default + + + + + <zh>检查日志磁盘空间使用率</zh> + <en>Check the usage of log disk</en> + + + + 清理磁盘对应中的空间或更换更大的磁盘。 + + + 检查磁盘日志目录空间和索引使用率,如果使用率低于90%则检查项通过,否则检查项不通过 + + + other + user + all + default + + + + + <zh>检查临时磁盘空间使用率</zh> + <en>Check the usage of tmp disk</en> + + + + 清理磁盘对应中的空间或更换更大的磁盘。 + + + 检查磁盘临时目录磁盘和索引使用率,如果使用率低于90%则检查项通过,否则检查项不通过 + + + other + user + all + default + + + \ No newline at end of file diff --git a/script/gspylib/inspection/config/scene_binary_upgrade.xml b/script/gspylib/inspection/config/scene_binary_upgrade.xml new file mode 100644 index 0000000..3044382 --- /dev/null +++ b/script/gspylib/inspection/config/scene_binary_upgrade.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/script/gspylib/inspection/config/scene_health.xml b/script/gspylib/inspection/config/scene_health.xml new file mode 100644 index 0000000..de7670e --- /dev/null +++ b/script/gspylib/inspection/config/scene_health.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/script/gspylib/inspection/config/scene_inspect.xml b/script/gspylib/inspection/config/scene_inspect.xml new file mode 100644 index 0000000..463e4b7 --- /dev/null +++ b/script/gspylib/inspection/config/scene_inspect.xml @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/script/gspylib/inspection/config/scene_install.xml b/script/gspylib/inspection/config/scene_install.xml new file mode 100644 index 0000000..a189193 --- /dev/null +++ b/script/gspylib/inspection/config/scene_install.xml @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + version=V1R7C00 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/script/gspylib/inspection/config/scene_longtime.xml b/script/gspylib/inspection/config/scene_longtime.xml new file mode 100644 index 0000000..7509f67 --- /dev/null +++ b/script/gspylib/inspection/config/scene_longtime.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/script/gspylib/inspection/config/scene_slow_node.xml b/script/gspylib/inspection/config/scene_slow_node.xml new file mode 100644 index 0000000..b055100 --- /dev/null +++ b/script/gspylib/inspection/config/scene_slow_node.xml @@ -0,0 +1,19 @@ + + + + + + + + + StandardCPUIdle=10; + + + + + dropRate=1; + + + + + \ No newline at end of file diff --git a/script/gspylib/inspection/config/scene_upgrade.xml b/script/gspylib/inspection/config/scene_upgrade.xml new file mode 100644 index 0000000..426785a --- /dev/null +++ b/script/gspylib/inspection/config/scene_upgrade.xml @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + version=V1R7C00 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/script/gspylib/inspection/items/cluster/CheckCatchup.py b/script/gspylib/inspection/items/cluster/CheckCatchup.py new file mode 100644 index 0000000..057326c --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckCatchup.py @@ -0,0 +1,39 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckCatchup(BaseItem): + def __init__(self): + super(CheckCatchup, self).__init__(self.__class__.__name__) + + def doCheck(self): + cmd = "ps -ef |grep '^<%s\>' | grep '\' | grep -v grep |" \ + " awk '{print $2}' |(while read arg; do gstack $arg |" \ + " grep CatchupMain; done) 2>/dev/null" % self.user + output = SharedFuncs.runShellCmd(cmd) + if (output != ""): + self.result.rst = ResultStatus.NG + self.result.val = "The gatchdb process stack contains the" \ + " CatchupMain function." + else: + self.result.rst = ResultStatus.OK + self.result.val = "The gatchdb process stack not contains" \ + " the CatchupMain function." + self.result.raw = cmd diff --git a/script/gspylib/inspection/items/cluster/CheckClusterState.py b/script/gspylib/inspection/items/cluster/CheckClusterState.py new file mode 100644 index 0000000..2ddb3c4 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckClusterState.py @@ -0,0 +1,81 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.common.Common import ClusterCommand +from gspylib.os.gsfile import g_file + +KEY_FILE_MODE = 600 + + +class CheckClusterState(BaseItem): + def __init__(self): + super(CheckClusterState, self).__init__(self.__class__.__name__) + + def doCheck(self): + tmpFile = os.path.join(self.tmpPath, "gauss_cluster_status.dat") + tmpFileName = os.path.join(self.tmpPath, "abnormal_node_status.dat") + try: + self.result.val = "" + self.result.raw = "" + # Check the cluster status with cm_ctl + cmd = ClusterCommand.getQueryStatusCmd(self.user, "", tmpFile) + output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + self.result.raw += output + # Check whether the cluster needs to be balanced + # Check whether redistribution is required + # Initialize cluster status information for temporary file + clusterStatus = DbClusterStatus() + clusterStatus.initFromFile(tmpFile) + # Get the status of cluster + statusInfo = clusterStatus.getClusterStauts(self.user) + self.result.val = statusInfo + if clusterStatus.isAllHealthy(): + self.result.rst = ResultStatus.OK + if os.path.exists(tmpFile): + os.remove(tmpFile) + return + # If the abnormal node is present, create a temporary file + # and print out the details + g_file.createFile(tmpFileName, True, KEY_FILE_MODE) + with open(tmpFileName, "w+") as tmpFileFp: + for dbNode in clusterStatus.dbNodes: + if not dbNode.isNodeHealthy(): + dbNode.outputNodeStatus(tmpFileFp, self.user, True) + tmpFileFp.flush() + tmpFileFp.seek(0) + self.result.raw = tmpFileFp.read() + if self.result.raw == "": + self.result.raw = "Failed to obtain the cluster status." + self.result.rst = ResultStatus.NG + # Delete the temporary file + if os.path.exists(tmpFileName): + os.remove(tmpFileName) + if os.path.exists(tmpFile): + os.remove(tmpFile) + except Exception as e: + if os.path.exists(tmpFile): + os.remove(tmpFile) + if os.path.exists(tmpFileName): + os.remove(tmpFileName) + raise Exception(str(e)) + + diff --git a/script/gspylib/inspection/items/cluster/CheckCollector.py b/script/gspylib/inspection/items/cluster/CheckCollector.py new file mode 100644 index 0000000..e62c1ed --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckCollector.py @@ -0,0 +1,99 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file +from gspylib.common.Common import DefaultValue + +SHELLPATH = os.path.realpath( + os.path.join(os.path.split(os.path.realpath(__file__))[0], + "../../lib/checkcollector/")) +# file permission +FILE_MODE = 700 + + +class CheckCollector(BaseItem): + def __init__(self): + super(CheckCollector, self).__init__(self.__class__.__name__) + + def checkFilePermission(self, filename): + """ + Function : check file: 1.exist 2. isfile 3. permission + Note : 1.You must check that the file exist and is a file. + 2.You can choose whether to check the file's + permission:executable. + """ + # Check if the file exists + if (not os.path.exists(filename)): + raise Exception("The file %s does not exist." % filename) + # Check whether the file + if (not os.path.isfile(filename)): + raise Exception("%s is not file." % filename) + # Check the file permissions + # Modify the file permissions + if (not os.access(filename, os.X_OK)): + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, filename) + + def genhostfile(self, nodenames): + """ + Function : generate host file + """ + iphostInfo = "" + nodenameFile = "hostfile" + # the path of script + recordFile = os.path.join(SHELLPATH, nodenameFile) + for nodename in nodenames: + iphostInfo += '%s\n' % nodename + + g_file.createFile(recordFile, True, DefaultValue.KEY_DIRECTORY_MODE) + + # Write IP information to file + g_file.writeFile(recordFile, [iphostInfo]) + + def doCheck(self): + parRes = "" + # generate hostfile file, server node name + self.genhostfile(self.nodes) + # shell name + shellName = "getClusterInfo.sh" + # the path of script + shellName = os.path.join(SHELLPATH, shellName) + # judge permission + self.checkFilePermission(shellName) + + g_file.replaceFileLineContent('omm', self.user, shellName) + g_file.replaceFileLineContent( + '\/opt\/huawei\/Bigdata\/mppdb\/.mppdbgs_profile', + self.mpprcFile.replace('/', '\/'), shellName) + # the shell command + exectueCmd = "cd %s && sh %s -p %s" % ( + SHELLPATH, shellName, self.port) + self.result.raw = exectueCmd + # Call the shell script + SharedFuncs.runShellCmd(exectueCmd, self.user, self.mpprcFile) + self.result.rst = ResultStatus.OK + pacakageName = os.path.join(self.outPath, "checkcollector_%s" + % self.context.checkID) + # crate tar package + g_file.compressZipFiles(pacakageName, os.path.join(SHELLPATH, 'out')) + # Check the result information + parRes += "The inspection(checkcollector) has been completed!\n" + parRes += "Please perform decompression firstly." \ + " The log is saved in '%s.zip'" % (pacakageName) + self.result.val = parRes diff --git a/script/gspylib/inspection/items/cluster/CheckDBParams.py b/script/gspylib/inspection/items/cluster/CheckDBParams.py new file mode 100644 index 0000000..1d2bbe3 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckDBParams.py @@ -0,0 +1,267 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +import math +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.common.Common import ClusterCommand + + +class CheckDBParams(BaseItem): + def __init__(self): + super(CheckDBParams, self).__init__(self.__class__.__name__) + + def doCheck(self): + # Gets the current node information + nodeInfo = self.cluster.getDbNodeByName(self.host) + # Get the number of instances + InatorsList = nodeInfo.datanodes + # Get local primary DB id + primaryDNidList = self.getLocalPrimaryDNid(nodeInfo) + self.result.raw = "" + # Determine if there are DB instances + if (len(primaryDNidList) < 1): + self.result.raw = "There is no primary database node " \ + "instance in the current node." + self.result.rst = ResultStatus.OK + return + for inst in InatorsList: + self.CheckGaussdbParameters(inst, nodeInfo, primaryDNidList) + if (self.result.rst != ResultStatus.NG): + self.result.rst = ResultStatus.OK + + def getLocalPrimaryDNid(self, nodeInfo): + """ + function: Get local primary DNid + input: NA + output: NA + """ + tmpFile = os.path.join(self.tmpPath, "gauss_dn_status.dat") + primaryDNidList = [] + try: + # Use cm_ctl to query the current node instance + cmd = ClusterCommand.getQueryStatusCmd(self.user, nodeInfo.name, + tmpFile) + SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + # Match query results and cluster configuration + clusterStatus = DbClusterStatus() + clusterStatus.initFromFile(tmpFile) + if (os.path.exists(tmpFile)): + os.remove(tmpFile) + # Find the master DB instance + for dbNode in clusterStatus.dbNodes: + for instance in dbNode.datanodes: + if instance.status == 'Primary': + primaryDNidList.append(instance.instanceId) + return primaryDNidList + except Exception as e: + if (os.path.exists(tmpFile)): + os.remove(tmpFile) + raise Exception(str(e)) + + def CheckSingleGaussdbParameter(self, port, desc, + INDENTATION_VALUE_INT=60): + """ + function: check gaussdb instance parameters + input: int, string, int + output: bool + """ + sqlResultFile = "" + try: + flag = True + # Generate different temporary files when parallel + # Identify by instance number + # Remove parentheses from the instance number + InstNum = desc.replace('(', '') + InstNum = InstNum.replace(')', '') + # get max connection number + sqlcmd = "show max_connections;" + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", port, + self.tmpPath, "postgres", + self.mpprcFile) + maxConnections = int(output) + if (desc.find("CN(") < 0): + self.result.raw += "The max number of %s connections " \ + "is %s.\n" % (desc, maxConnections) + # get shared_buffers size + GB = 1 * 1024 * 1024 * 1024 + MB = 1 * 1024 * 1024 + kB = 1 * 1024 + shared_buffers = 0 + # Execute the query command + sqlcmd = "show shared_buffers;" + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", port, + self.tmpPath, "postgres", + self.mpprcFile) + shared_buffer_size = str(output) + # The result of the conversion query is a regular display + if shared_buffer_size[0:-2].isdigit() and ( + (shared_buffer_size[-2:] > "GB") - ( + shared_buffer_size[-2:] < "GB")) == 0: + shared_buffers = int(shared_buffer_size[0:-2]) * GB + if shared_buffer_size[0:-2].isdigit() and ( + (shared_buffer_size[-2:] > "MB") - ( + shared_buffer_size[-2:] < "MB")) == 0: + shared_buffers = int(shared_buffer_size[0:-2]) * MB + if shared_buffer_size[0:-2].isdigit() and ( + (shared_buffer_size[-2:] > "kB") - ( + shared_buffer_size[-2:] < "kB")) == 0: + shared_buffers = int(shared_buffer_size[0:-2]) * kB + if shared_buffer_size[0:-1].isdigit() and ( + (shared_buffer_size[-2:] > "B") - ( + shared_buffer_size[-2:] < "B")) == 0: + shared_buffers = int(shared_buffer_size[0:-1]) + + # check shared_buffers + strCmd = "cat /proc/sys/kernel/shmmax" + status, shmmax = subprocess.getstatusoutput(strCmd) + if (status != 0): + self.result.raw += "Failed to obtain shmmax parameters." \ + " Command: %s.\n" % strCmd + flag = False + # check shmall parameters + strCmd = "cat /proc/sys/kernel/shmall" + status, shmall = subprocess.getstatusoutput(strCmd) + if (status != 0): + self.result.raw += "Failed to obtain shmall parameters." \ + " Command: %s.\n" % strCmd + flag = False + # get PAGESIZE + strCmd = "getconf PAGESIZE" + status, PAGESIZE = subprocess.getstatusoutput(strCmd) + if (status != 0): + self.result.raw += "Failed to obtain PAGESIZE." \ + " Command: %s.\n" % strCmd + flag = False + if (shared_buffers < 128 * kB): + self.result.raw += "Shared_buffers must be greater " \ + "than or equal to 128KB.\n" + flag = False + elif (shared_buffers > int(shmmax)): + self.result.raw += "Shared_buffers must be less" \ + " than shmmax(%d).\n" % int(shmmax) + flag = False + elif (shared_buffers > int(shmall) * int(PAGESIZE)): + self.result.raw += "Shared_buffers must be less " \ + "than shmall*PAGESIZE(%d).\n" \ + % int(shmall) * int(PAGESIZE) + flag = False + else: + self.result.raw += "%s Shared buffers size is %s.\n" \ + % (desc, shared_buffer_size) + # check sem + if (desc.find("CN(") >= 0): + strCmd = "cat /proc/sys/kernel/sem" + status, output = subprocess.getstatusoutput(strCmd) + if (status != 0): + self.result.raw += "Failed to obtain sem parameters." \ + " Error: %s.\n" % output + \ + " Command: %s.\n" % strCmd + flag = False + paramList = output.split("\t") + if (int(paramList[0]) < 17): + self.result.raw += "The system limit for the maximum" \ + " number of semaphores per set" \ + " (SEMMSL) must be greater than or" \ + " equal to 17. The current SEMMSL " \ + "value is: " + str(paramList[0]) \ + + ".\n" + flag = False + + if (int(paramList[3]) < math.ceil( + (maxConnections + 150) // 16)): + self.result.raw += "The system limit for the maximum" \ + " number of semaphore sets (SEMMNI)" \ + " must be greater than or equal to" \ + " the value(math.ceil((" \ + "maxConnections + 150) / 16)) " + \ + str(math.ceil((maxConnections + + 150) // 16)) + \ + ", The current SEMMNI value is: " + \ + str(paramList[3]) + ".\n" + flag = False + elif (int(paramList[1]) < math.ceil( + (maxConnections + 150) // 16) * 17): + self.result.raw += "The system limit for the maximum" \ + " number of semaphores (SEMMNS) must" \ + " be greater than or equal to the" \ + " value(math.ceil((maxConnections" \ + " + 150) / 16) * 17) " \ + + str(math.ceil((maxConnections + + 150) // 16) * 17) + \ + ", The current SEMMNS value is: " + \ + str(paramList[1]) + ".\n" + flag = False + else: + self.result.raw += "The max number of %s connections" \ + " is %s.\n" % (desc, maxConnections) + if (os.path.exists(sqlResultFile)): + os.remove(sqlResultFile) + return flag + except Exception as e: + if (os.path.exists(sqlResultFile)): + os.remove(sqlResultFile) + raise Exception(ErrorCode.GAUSS_513["GAUSS_51306"] % + (("The max number of %s connections.\n" % + desc).ljust(INDENTATION_VALUE_INT), str(e))) + + def CheckGaussdbParameters(self, inst, nodeInfo, primaryDNidList): + """ + function: Check gaussdb instance parameters + input: instance + output: NA + """ + INDENTATION_VALUE_INT = 50 + resultList = [] + try: + # Check all master DB instances + if (primaryDNidList != []): + if (inst in nodeInfo.datanodes): + if inst.instanceId in primaryDNidList: + resultList.append( + self.CheckSingleGaussdbParameter( + inst.port, "DN(%s)" % str(inst.instanceId), + INDENTATION_VALUE_INT)) + if (False in resultList): + self.result.rst = ResultStatus.NG + return + except Exception as e: + raise Exception(str(e)) + + def doSet(self): + resultStr = "" + cmd = "gs_guc set -N all -I all -c" \ + " 'shared_buffers=1GB' -c 'max_connections=400'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Falied to set cn shared_buffers.\nError : %s" \ + % output + " Command: %s.\n" % cmd + cmd = "gs_guc set -N all -I all -c 'shared_buffers=1GB'" \ + " -c 'max_connections=3000'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Falied to set database node shared_buffers.\n" \ + "Error : %s" % output + " Command: %s.\n" % cmd + if (len(resultStr) > 0): + self.result.val = resultStr + else: + self.result.val = "Set shared_buffers successfully." diff --git a/script/gspylib/inspection/items/cluster/CheckDebugSwitch.py b/script/gspylib/inspection/items/cluster/CheckDebugSwitch.py new file mode 100644 index 0000000..c96a812 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckDebugSwitch.py @@ -0,0 +1,146 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + +# Conf file name constant +POSTGRESQL_CONF = "postgresql.conf" +INSTANCE_ROLE_DATANODE = 4 +g_result = [] + + +class CheckDebugSwitch(BaseItem): + def __init__(self): + super(CheckDebugSwitch, self).__init__(self.__class__.__name__) + + def obtainDataDirLength(self, nodeInfo): + """ + function: Obtain data dir length + input: NA + output: int, list + """ + # Get the longest path + DirLength = 0 + # Get the DB instance and the longest DB path + for inst in nodeInfo.datanodes: + if (len(inst.datadir) > DirLength): + DirLength = len(inst.datadir) + + return DirLength + + def checkSingleParaFile(self, inst, desc, INDENTATION_VALUE_INT): + """ + function: Check the log_min_messages parameter for each instance + input: String, String, int + output: int + """ + # The instance directory must exist + if (not os.path.exists(inst.datadir) or len( + os.listdir(inst.datadir)) == 0): + g_result.append( + "%s: Abnormal reason: The directory doesn't exist" + " or is empty." % ( + "%s(%s) log_min_messages parameter" % ( + desc, inst.datadir)).ljust(INDENTATION_VALUE_INT)) + return -1 + paraPath = "" + # Gets the database node configuration file + if inst.instanceRole == INSTANCE_ROLE_DATANODE: + paraPath = os.path.join(inst.datadir, POSTGRESQL_CONF) + else: + g_result.append( + "%s: Abnormal reason: Invalid instance type: %s." % ( + ("%s(%s) log_min_messages parameter " % ( + desc, inst.datadir)).ljust(INDENTATION_VALUE_INT), + inst.instanceRole)) + return - 1 + # The instance configuration file must exist + if (not os.path.exists(paraPath)): + g_result.append("%s: Abnormal reason: %s does not exist." % ( + ("%s(%s) log_min_messages parameter " % ( + desc, inst.datadir)).ljust(INDENTATION_VALUE_INT), + paraPath)) + return -1 + # Gets the log_min_messages parameter in the configuration file + output = g_file.readFile(paraPath, "log_min_messages") + value = None + for line in output: + line = line.split('#')[0].strip() + if (line.find('log_min_messages') >= 0 and line.find('=') > 0): + value = line.split('=')[1].strip() + break + if not value: + value = "warning" + # Determines whether the log_min_messages parameter is valid + if (value.lower() != "warning"): + g_result.append( + "%s: Warning reason: The parameter 'log_min_messages(%s)'" + " value is incorrect. It should be 'warning'." + % (("%s(%s) log_min_messages parameter(%s)" + % (desc, paraPath, value)).ljust(INDENTATION_VALUE_INT), + value)) + return -1 + g_result.append("%s: Normal" % ( + "%s(%s) log_min_messages parameter(%s)" % ( + desc, paraPath, value)).ljust( + INDENTATION_VALUE_INT)) + return 0 + + def doCheck(self): + global g_result + g_result = [] + nodeInfo = self.cluster.getDbNodeByName(self.host) + intervalLen = self.obtainDataDirLength(nodeInfo) + resultList = [] + self.result.val = "" + INDENTATION_VALUE_INT = intervalLen + 64 + # Check all DB instance debug switch + for inst in nodeInfo.datanodes: + resultList.append( + self.checkSingleParaFile(inst, "DN", INDENTATION_VALUE_INT)) + if (-1 in resultList): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.OK + for detail in g_result: + self.result.val = self.result.val + '%s\n' % detail + + def doSet(self): + nodeInfo = self.cluster.getDbNodeByName(self.host) + intervalLen = self.obtainDataDirLength(nodeInfo) + flag = 0 + resultStr = "" + INDENTATION_VALUE_INT = intervalLen + 64 + for inst in nodeInfo.datanodes: + flag = self.checkSingleParaFile(inst, "DN", INDENTATION_VALUE_INT) + if (flag == -1): + cmd = "gs_guc set -N all -I all -c" \ + " 'log_min_messages = warning'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Falied to set database node " \ + "log_min_massages.\n Error : %s" % output + \ + " Command: %s.\n" % cmd + if (len(resultStr) > 0): + self.result.val = resultStr + else: + self.result.val = "Set log_min_messages successfully." diff --git a/script/gspylib/inspection/items/cluster/CheckDilateSysTab.py b/script/gspylib/inspection/items/cluster/CheckDilateSysTab.py new file mode 100644 index 0000000..84c0976 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckDilateSysTab.py @@ -0,0 +1,95 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + +dbList = [] + + +class CheckDilateSysTab(BaseItem): + def __init__(self): + super(CheckDilateSysTab, self).__init__(self.__class__.__name__) + self.Threshold_NG = None + self.Threshold_Warning = None + + def preCheck(self): + super(CheckDilateSysTab, self).preCheck() + if (not (self.threshold.__contains__( + 'Threshold_NG') and self.threshold.__contains__( + 'Threshold_Warning'))): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "The threshold Threshold_NG and" + " Threshold_Warning ") + if (not self.threshold['Threshold_NG'].isdigit() or not + self.threshold['Threshold_Warning'].isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53014"] + % "The threshold Threshold_NG and" + " Threshold_Warning ") + self.Threshold_NG = int(self.threshold['Threshold_NG']) + self.Threshold_Warning = int(self.threshold['Threshold_Warning']) + + def doCheck(self): + global dbList + self.result.rst = ResultStatus.OK + sqldb = "select datname from pg_database;" + output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + dbList.remove("template0") + sql = "select (pg_table_size(1259)/count(*)/247.172)::numeric(10,3)" \ + " from pg_class;" + result = [] + for db in dbList: + # Calculate the size with sql cmd + output = SharedFuncs.runSqlCmd(sql, self.user, "", self.port, + self.tmpPath, db, self.mpprcFile) + if (float(output) > self.Threshold_NG): + self.result.rst = ResultStatus.NG + result.append(db) + elif (float(output) > self.Threshold_Warning): + result.append(db) + if (self.result.rst == ResultStatus.OK): + self.result.rst = ResultStatus.WARNING + + if (self.result.rst == ResultStatus.OK): + self.result.val = "no system table dilate" + else: + self.result.val = "there is system table dilate in" \ + " databases:\n%s" % "\n".join(result) + + def doSet(self): + reslutStr = "" + sqlCmd = "cluster pg_attribute using" \ + " pg_attribute_relid_attnum_index;" \ + "cluster pg_class using pg_class_oid_index;" \ + "cluster pg_type using pg_type_oid_index;" \ + "cluster pg_proc using pg_proc_oid_index;" \ + "cluster pg_depend using pg_depend_depender_index;" \ + "cluster pg_index using pg_index_indexrelid_index;" \ + "cluster pg_namespace using pg_namespace_oid_index;" \ + "cluster pgxc_class using pgxc_class_pcrelid_index;" \ + "vacuum full pg_statistic;" + for databaseName in dbList: + for sql in sqlCmd.split(';'): + output = SharedFuncs.runSqlCmd(sql, self.user, "", self.port, + self.tmpPath, databaseName, + self.mpprcFile) + reslutStr += output + self.result.val = reslutStr diff --git a/script/gspylib/inspection/items/cluster/CheckDirPermissions.py b/script/gspylib/inspection/items/cluster/CheckDirPermissions.py new file mode 100644 index 0000000..3b55c23 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckDirPermissions.py @@ -0,0 +1,187 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import pwd +import grp +import subprocess +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + +DIRECTORY_MODE = 750 +g_result = [] +g_chList = [] + + +class CheckDirPermissions(BaseItem): + def __init__(self): + super(CheckDirPermissions, self).__init__(self.__class__.__name__) + + def obtainDataDirLength(self, nodeInfo): + """ + function: Obtain data dir length + input: NA + output: int, list + """ + # Get the longest path + DirLength = 0 + dataDirList = [] + # Get the DB instance and the longest DB path + for inst in nodeInfo.datanodes: + dataDirList.append(inst.datadir) + if (len(inst.datadir) > DirLength): + DirLength = len(inst.datadir) + # Get the CMserver instance and longest path in the CMserver, DN + for inst in nodeInfo.cmservers: + dataDirList.append(inst.datadir) + if (len(inst.datadir) > DirLength): + DirLength = len(inst.datadir) + # Get the CMagent instance and longest path in the CM, DN + for inst in nodeInfo.cmagents: + dataDirList.append(inst.datadir) + if (len(inst.datadir) > DirLength): + DirLength = len(inst.datadir) + # Get the CN instance and longest path in the CM, DN, CN + for inst in nodeInfo.coordinators: + dataDirList.append(inst.datadir) + if (len(inst.datadir) > DirLength): + DirLength = len(inst.datadir) + # Get the GTM instance and longest path in the CM, DN, CN, GTM + for inst in nodeInfo.gtms: + dataDirList.append(inst.datadir) + if (len(inst.datadir) > DirLength): + DirLength = len(inst.datadir) + # Get the ETCD instance and longest path in the all instance + if (hasattr(nodeInfo, 'etcds')): + for inst in nodeInfo.etcds: + dataDirList.append(inst.datadir) + if (len(inst.datadir) > DirLength): + DirLength = len(inst.datadir) + + return (DirLength, dataDirList) + + def checkDirWriteable(self, dirPath, user, flag=""): + """ + function : Check if target directory is writeable for user. + input : String,String + output : boolean + """ + return os.access(dirPath, os.W_OK) + + def checkSingleDirectoryPermission(self, singledir, desc, + INDENTATION_VALUE_INT): + """ + function: Check Directory Permissions + input: String, String, int + output: int + """ + # The directory must be a folder + if (not os.path.isdir(singledir)): + g_result.append( + "%s: Abnormal reason: Directory does not exist." % ( + "%s directory(%s)" % (desc, singledir)).ljust( + INDENTATION_VALUE_INT)) + return -1 + # Gets the folder permissions + currentPremission = int(oct(os.stat(singledir).st_mode)[-3:]) + # Check the write access and compare the permission size + if (self.checkDirWriteable(singledir, self.user) + and currentPremission <= DIRECTORY_MODE): + + g_result.append( + "%s: Normal" % ("%s directory(%s) permissions %s" % ( + desc, singledir, str(currentPremission))).ljust( + INDENTATION_VALUE_INT)) + return 0 + elif (currentPremission > DIRECTORY_MODE): + g_result.append( + "%s: Abnormal reason: Directory permission" + " can not exceed 750." + % (("%s directory(%s) permissions %s" + % (desc, singledir, + str(currentPremission))).ljust(INDENTATION_VALUE_INT))) + return -1 + else: + g_result.append( + "%s: Abnormal reason: Directory is not writable for users." + % ("%s directory(%s) permissions %s" + % (desc, singledir, + str(currentPremission))).ljust(INDENTATION_VALUE_INT)) + return -1 + + def doCheck(self): + global g_chList + global g_result + resultList = [] + g_result = [] + nodeInfo = self.cluster.getDbNodeByName(self.host) + tmpDir = DefaultValue.getEnv("PGHOST") + logDir = DefaultValue.getEnv("GAUSSLOG") + toolDir = DefaultValue.getEnv("GPHOME") + (intervalLen, instList) = self.obtainDataDirLength(nodeInfo) + if intervalLen < len(self.cluster.appPath): + intervalLen = len(self.cluster.appPath) + if intervalLen < len(logDir): + intervalLen = len(logDir) + INDENTATION_VALUE_INT = intervalLen + 44 + # Check the permissions for appPath + resultList.append( + self.checkSingleDirectoryPermission(self.cluster.appPath, + "AppPath", + INDENTATION_VALUE_INT)) + g_chList.append(self.cluster.appPath) + # Check the permissions for tmpPath + resultList.append(self.checkSingleDirectoryPermission( + tmpDir, "Tmp", INDENTATION_VALUE_INT)) + # Check the permissions for logPath + g_chList.append(tmpDir) + resultList.append(self.checkSingleDirectoryPermission( + logDir, "Log", INDENTATION_VALUE_INT)) + # Check the permissions for logPath + g_chList.append(logDir) + resultList.append( + self.checkSingleDirectoryPermission(toolDir, "ToolPath", + INDENTATION_VALUE_INT)) + # Check the permissions for all CMserver + g_chList.append(toolDir) + # Check the permissions for all DB instance + for inst in nodeInfo.datanodes: + resultList.append( + self.checkSingleDirectoryPermission(inst.datadir, "DN", + INDENTATION_VALUE_INT)) + # Check the xlog permissions for all DB instance + xlogDir = "%s/pg_xlog" % inst.datadir + resultList.append( + self.checkSingleDirectoryPermission(xlogDir, "DN Xlog", + INDENTATION_VALUE_INT)) + g_chList.append(inst.datadir) + g_chList.append(xlogDir) + if (-1 in resultList): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = "" + for detail in g_result: + self.result.val = self.result.val + '%s\n' % detail + + def doSet(self): + resultStr = "" + for dirName in g_chList: + g_file.changeOwner(self.user, dirName, True) + g_file.changeMode(DIRECTORY_MODE, dirName) + self.result.val = "Set DirPermissions completely." diff --git a/script/gspylib/inspection/items/cluster/CheckEnvProfile.py b/script/gspylib/inspection/items/cluster/CheckEnvProfile.py new file mode 100644 index 0000000..3588ec1 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckEnvProfile.py @@ -0,0 +1,151 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file +from gspylib.common.VersionInfo import VersionInfo + +g_envProfileDist = {} + + +class CheckEnvProfile(BaseItem): + def __init__(self): + super(CheckEnvProfile, self).__init__(self.__class__.__name__) + + def getProcessEnv(self, ProcessNum, Process): + abnormal_flag = False + processEnvDist = {} + # Get environment variables + if (os.path.isfile("/proc/%s/environ" % ProcessNum)): + envInfoList = g_file.readFile("/proc/%s/environ" % ProcessNum)[ + 0].split('\0') + for env in envInfoList: + envName = env.split('=')[0].strip() + processEnvDist[envName] = env.split('=')[-1].strip() + for env in g_envProfileDist.keys(): + # environment variables if exist + if (not env in processEnvDist.keys() or + not processEnvDist[env]): + abnormal_flag = True + self.result.val += "There is no env[%s] in " \ + "process %s[%s].\n " \ + % (env, Process, ProcessNum) + continue + # environment variables is GAUSSHOME + if (env == "GAUSSHOME"): + if (g_envProfileDist[env] != processEnvDist[env]): + abnormal_flag = True + self.result.val += "The env[GAUSSHOME] is " \ + "inconsistent in process %s[%s] " \ + "and system.\nProcess: %s\n" \ + % (Process, ProcessNum, + processEnvDist[env]) + ##environment variables is PATH + elif (env == "PATH"): + binPath = "%s/bin" % g_envProfileDist["GAUSSHOME"] + ProcessEnvList = processEnvDist[env].split(':') + if (binPath not in ProcessEnvList): + abnormal_flag = True + self.result.val += "There is no [%s] in " \ + "process %s[%s]'s environment " \ + "variable [%s].\n " \ + % (binPath, Process, + ProcessNum, env) + else: + libPath = "%s/lib" % g_envProfileDist["GAUSSHOME"] + ProcessEnvList = processEnvDist[env].split(':') + if (libPath not in ProcessEnvList): + abnormal_flag = True + self.result.val += "There is no [%s] in process" \ + " %s[%s]'s environment variable" \ + " [%s].\n " % (libPath, Process, + ProcessNum, env) + + return abnormal_flag + + def doCheck(self): + g_envProfileDist["GAUSSHOME"] = DefaultValue.getEnv("GAUSSHOME") + g_envProfileDist["PATH"] = DefaultValue.getEnv("PATH") + g_envProfileDist["LD_LIBRARY_PATH"] = DefaultValue.getEnv( + "LD_LIBRARY_PATH") + + self.result.val = "" + ProcessList = [] + ProcessDisk = {} + abnormal_flag = False + if (g_envProfileDist["GAUSSHOME"] == ""): + abnormal_flag = True + self.result.val += "The environmental variable " \ + "GAUSSHOME is empty.\n" + else: + self.result.val += "GAUSSHOME %s\n" % g_envProfileDist[ + "GAUSSHOME"] + + libPath = "%s/lib" % g_envProfileDist["GAUSSHOME"] + if (libPath not in g_envProfileDist["LD_LIBRARY_PATH"].split(':')): + abnormal_flag = True + self.result.val += \ + VersionInfo.PRODUCT_NAME + \ + " lib path does not exist in LD_LIBRARY_PATH.\n" + else: + self.result.val += "LD_LIBRARY_PATH %s\n" % libPath + binPath = "%s/bin" % g_envProfileDist["GAUSSHOME"] + # Whether the environment variable bin is in path + if (binPath not in g_envProfileDist["PATH"].split(':')): + abnormal_flag = True + self.result.val += VersionInfo.PRODUCT_NAME + \ + " bin path does not exist in PATH.\n" + else: + self.result.val += "PATH %s\n" % binPath + + if abnormal_flag: + self.result.rst = ResultStatus.NG + return + + # Gets the current node information + nodeInfo = self.cluster.getDbNodeByName(self.host) + # check the number of instances + if len(nodeInfo.datanodes) > 0: + ProcessList.append("gaussdb") + + # Query process + for Process in ProcessList: + cmd = "ps ux | grep '%s/bin/%s' | grep -v 'grep' |" \ + " awk '{print $2}'" % (self.cluster.appPath, Process) + output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + if (output != ""): + if (len(output.split('\n')) > 1): + for ProcessNum in output.split('\n'): + ProcessDisk[ProcessNum] = [Process] + else: + ProcessDisk[output] = [Process] + else: + self.result.val += "The process %s is not exist.\n" % Process + abnormal_flag = True + for ProcessNum in ProcessDisk.keys(): + # Get the process environment variables + result = self.getProcessEnv(ProcessNum, ProcessDisk[ProcessNum]) + if not abnormal_flag: + abnormal_flag = result + + if abnormal_flag: + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK diff --git a/script/gspylib/inspection/items/cluster/CheckGaussVer.py b/script/gspylib/inspection/items/cluster/CheckGaussVer.py new file mode 100644 index 0000000..5ed2a96 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckGaussVer.py @@ -0,0 +1,47 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckGaussVer(BaseItem): + def __init__(self): + super(CheckGaussVer, self).__init__(self.__class__.__name__) + + def doCheck(self): + gaussdbVersion = "" + gsqlVersion = "" + # Get the version + cmd = "gaussdb -V | awk '{print $4\"_\"$6}'" + self.result.raw = cmd + "\n" + gaussdbVersion = SharedFuncs.runShellCmd(cmd, "", self.mpprcFile) + if (gaussdbVersion[-1] == ")"): + gaussdbVersion = gaussdbVersion[:-1] + # Get the version + cmd = "gsql -V | awk '{print $4\"_\"$6}'" + self.result.raw += cmd + gsqlVersion = SharedFuncs.runShellCmd(cmd, "", self.mpprcFile) + if (gsqlVersion[-1] == ")"): + gsqlVersion = gsqlVersion[:-1] + # Compare the two version numbers are the same + if gaussdbVersion and gaussdbVersion == gsqlVersion: + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG + self.result.val = "gaussdb Version: %s \ngsql Version: %s" % ( + gaussdbVersion, gsqlVersion) diff --git a/script/gspylib/inspection/items/cluster/CheckIntegrity.py b/script/gspylib/inspection/items/cluster/CheckIntegrity.py new file mode 100644 index 0000000..7dafbac --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckIntegrity.py @@ -0,0 +1,38 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + + +class CheckIntegrity(BaseItem): + def __init__(self): + super(CheckIntegrity, self).__init__(self.__class__.__name__) + + def doCheck(self): + gaussHome = self.cluster.appPath + gaussdbFile = os.path.join(gaussHome, "bin/gaussdb") + gaussdbIntegrity = g_file.getFileSHA256(gaussdbFile) + self.result.raw = gaussdbIntegrity + if (gaussdbIntegrity != "" and len(gaussdbIntegrity) == 64): + self.result.rst = ResultStatus.OK + self.result.val = "gaussdb sha256sum: %s" % gaussdbIntegrity + else: + self.result.rst = ResultStatus.NG + self.result.val = "Failed to obtain gaussdb sha256 value." \ + " Error:\n%s" % gaussdbIntegrity diff --git a/script/gspylib/inspection/items/cluster/CheckLargeFile.py b/script/gspylib/inspection/items/cluster/CheckLargeFile.py new file mode 100644 index 0000000..73a429b --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckLargeFile.py @@ -0,0 +1,96 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +import subprocess +from multiprocessing.dummy import Pool as ThreadPool +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + + +class CheckLargeFile(BaseItem): + def __init__(self): + super(CheckLargeFile, self).__init__(self.__class__.__name__) + self.Threshold_SIZE = None + + def preCheck(self): + super(CheckLargeFile, self).preCheck() + if (not (self.threshold.__contains__('size'))): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "The threshold size") + self.Threshold_SIZE = (self.threshold['size']) + + def obtainDataDir(self, nodeInfo): + dataDirList = [] + for inst in nodeInfo.datanodes: + dataDirList.append(inst.datadir) + return dataDirList + + def checkLargeFile(self, path): + fileList = [] + failList = [] + cmd = "find %s -type f -size +%s" % (path, self.Threshold_SIZE) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 and output.find("Permission denied") > 0): + for fileName in output.splitlines(): + if (fileName.find("Permission denied") > 0): + failList.append(fileName) + else: + for fileName in output.splitlines(): + fileList.append(os.path.join(path, fileName)) + return fileList, failList + + def doCheck(self): + outputList = [] + failList = [] + pathList = [] + if (self.cluster): + paths = self.obtainDataDir( + self.cluster.getDbNodeByName(self.host)) + else: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "cluster") + for path in paths: + if (path): + pathList.append(path) + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.checkLargeFile, pathList) + pool.close() + pool.join() + + for outlist, flist in results: + if (outlist): + outputList.extend(outlist) + if (flist): + failList.extend(flist) + + if (len(outputList) == 0 and len(failList) == 0): + self.result.rst = ResultStatus.OK + self.result.val = "No file more than %s" % self.Threshold_SIZE + else: + if (len(outputList) > 0): + self.result.val = "Files more than %s:\n%s" % ( + self.Threshold_SIZE, "\n".join(outputList)) + if (len(failList) > 0): + self.result.val = "Files more than %s:\n%s\n%s" % ( + self.Threshold_SIZE, "\n".join(outputList), + "\n".join(failList)) + else: + self.result.val = "%s" % ("\n".join(failList)) + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/items/cluster/CheckMpprcFile.py b/script/gspylib/inspection/items/cluster/CheckMpprcFile.py new file mode 100644 index 0000000..54d6b9d --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckMpprcFile.py @@ -0,0 +1,97 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckMpprcFile(BaseItem): + def __init__(self): + super(CheckMpprcFile, self).__init__(self.__class__.__name__) + + def doCheck(self): + self.result.rst = ResultStatus.NG + self.result.val = "There are illegal characters in mpprc file" + appPath = self.cluster.appPath + mpprcFile = self.mpprcFile + bashfile = "/home/%s/.bashrc" % self.user + if (mpprcFile == "" or not mpprcFile or mpprcFile == "/etc/profile" + or mpprcFile == "~/.bashrc" or mpprcFile == bashfile + or not os.path.exists(mpprcFile)): + self.result.rst = ResultStatus.NG + self.result.val = "There is no mpprc file" + return + try: + with open(mpprcFile, 'r') as fp: + env_list = fp.readlines() + while '' in env_list: + env_list.remove('') + # get ec content + ec_content = "if [ -f '%s/utilslib/env_ec' ] &&" \ + " [ `id -u` -ne 0 ];" \ + " then source '%s/utilslib/env_ec'; fi " \ + % (appPath, appPath) + ec_content_old = "if [ -f '%s/utilslib/env_ec' ] ;" \ + " then source '%s/utilslib/env_ec'; fi " \ + % (appPath, appPath) + # remove ec content from list + if ec_content in env_list: + env_list.remove(ec_content) + if ec_content_old in env_list: + env_list.remove(ec_content_old) + # white elements + list_white = ["ELK_CONFIG_DIR", "ELK_SYSTEM_TABLESPACE", + "MPPDB_ENV_SEPARATE_PATH", "GPHOME", "PATH", + "LD_LIBRARY_PATH", "PYTHONPATH", + "GAUSS_WARNING_TYPE", "GAUSSHOME", "PATH", + "LD_LIBRARY_PATH", + "S3_CLIENT_CRT_FILE", "GAUSS_VERSION", "PGHOST", + "GS_CLUSTER_NAME", "GAUSSLOG", + "GAUSS_ENV", "KRB5_CONFIG", "PGKRBSRVNAME", + "KRBHOSTNAME", "ETCD_UNSUPPORTED_ARCH"] + # black elements + list_black = ["|", ";", "&", "<", ">", "`", "\\", "'", "\"", + "{", "}", "(", ")", "[", "]", "~", "*", "?", + "!", "\n"] + for env in env_list: + env = env.strip() + if env == "": + continue + if len(env.split()) != 2: + return + if env.split()[0] == "umask" and env.split()[1] == "077": + continue + for black in list_black: + flag = env.find(black) + if flag >= 0: + return + if ((not env.startswith("export")) or ( + env.split()[0] != "export")): + return + else: + val = env[6:].strip() + if not val.find("="): + return + elif (val.split("=")[0].strip() not in list_white): + return + self.result.rst = ResultStatus.OK + self.result.val = "Mpprc file is ok" + except Exception as e: + self.result.rst = ResultStatus.NG + self.result.val = "Can not read mpprc file" diff --git a/script/gspylib/inspection/items/cluster/CheckPortRange.py b/script/gspylib/inspection/items/cluster/CheckPortRange.py new file mode 100644 index 0000000..d65fda3 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckPortRange.py @@ -0,0 +1,78 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file + + +class CheckPortRange(BaseItem): + def __init__(self): + super(CheckPortRange, self).__init__(self.__class__.__name__) + self.ip_local_port_range = None + + def preCheck(self): + # check current node contains cn instances if not raise exception + super(CheckPortRange, self).preCheck() + # check the threshold was set correctly + if (not self.threshold.__contains__('ip_local_port_range')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold ip_local_port_range") + self.ip_local_port_range = self.threshold['ip_local_port_range'] + + def getPort(self): + cooInst = None + portList = {} + dbNode = self.cluster.getDbNodeByName(self.host) + for dnInst in dbNode.datanodes: + portList[dnInst.port] = dnInst.instanceRole + portList[dnInst.haPort] = dnInst.instanceRole + + return portList + + def doCheck(self): + parRes = "" + flag = None + instance = {0: "CMSERVER", 1: "GTM", 2: "ETCD", 3: "COODINATOR", + 4: "DATANODE", 5: "CMAGENT"} + portList = self.getPort() + # Check the port range + output = g_file.readFile('/proc/sys/net/ipv4/ip_local_port_range')[ + 0].strip() + smallValue = output.split('\t')[0].strip() + bigValue = output.split('\t')[1].strip() + expect = self.ip_local_port_range.split() + if (int(smallValue) < int(expect[0].strip()) or int(bigValue) > int( + expect[1].strip())): + parRes += "The value of net.ipv4.ip_local_port_range is" \ + " incorrect, expect value is %s.\n" \ + % self.ip_local_port_range + parRes += "The value of net.ipv4.ip_local_port_range is %d %d." \ + % (int(smallValue), int(bigValue)) + + for port in portList.keys(): + if (int(port) <= int(bigValue) and int(port) >= int(smallValue)): + flag = 1 + parRes += "\n %s" \ + % ("The instance %s port \"%d\" is incorrect." + % (instance[portList[port]], int(port))) + if (flag == 1): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = parRes + self.result.raw = output diff --git a/script/gspylib/inspection/items/cluster/CheckProStartTime.py b/script/gspylib/inspection/items/cluster/CheckProStartTime.py new file mode 100644 index 0000000..d3ff795 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckProStartTime.py @@ -0,0 +1,127 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from datetime import datetime +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common.CheckResult import ResultStatus + +monthdic = {"Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, + "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12} + + +class CheckProStartTime(BaseItem): + def __init__(self): + super(CheckProStartTime, self).__init__(self.__class__.__name__) + + def doCheck(self): + self.result.rst = ResultStatus.OK + timelist = [] + gaussPro = "gaussdb" + cmd = "ps -C %s -o lstart,args | grep -v grep | grep -v 'om_monitor'" \ + " 2>/dev/null" % gaussPro + output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + for line in output.splitlines()[1:]: + resultList = line.split() + year = resultList[4] + month = monthdic[resultList[1]] + day = resultList[2] + time = resultList[3] + timestring = "%s-%s-%s %s" % (year, month, day, time) + dattime = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S') + timelist.append(dattime) + if (timelist): + mintime = timelist[0] + maxtime = timelist[0] + else: + mintime = None + maxtime = None + for tmpdatetime in timelist: + if (tmpdatetime < mintime): + mintime = tmpdatetime + elif (tmpdatetime > maxtime): + maxtime = tmpdatetime + if (maxtime and mintime): + if (int((maxtime - mintime).days) > 0 or int( + (maxtime - mintime).seconds) > 300): + self.result.rst = ResultStatus.WARNING + self.result.val = output + else: + self.result.rst = ResultStatus.OK + self.result.val = output + + def postAnalysis(self, itemResult): + errors = [] + timedic = {} + valdic = {} + allhost = [] + nghost = [] + Mintime = None + for v in itemResult.getLocalItems(): + output = v.val + timelist = [] + for line in output.splitlines()[1:]: + resultList = line.split() + year = resultList[4] + month = monthdic[resultList[1]] + day = resultList[2] + time = resultList[3] + timestring = "%s-%s-%s %s" % (year, month, day, time) + dattime = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S') + timelist.append(dattime) + if (timelist): + mintime = timelist[0] + maxtime = timelist[0] + else: + mintime = None + maxtime = None + for tmpdatetime in timelist: + if (tmpdatetime < mintime): + mintime = tmpdatetime + elif (tmpdatetime > maxtime): + maxtime = tmpdatetime + timelist = [] + if (maxtime and mintime): + timelist.append(mintime) + timelist.append(maxtime) + if (Mintime and Mintime < mintime): + pass + else: + Mintime = mintime + if (timelist): + timedic[v.host] = timelist + valdic[v.host] = output + allhost.append(v.host) + for host in allhost: + hostmax = timedic[host][1] + if (int((hostmax - Mintime).days) > 0 or int( + (hostmax - Mintime).seconds) > 300): + if (host not in nghost): + nghost.append(host) + + if (nghost): + itemResult.rst = ResultStatus.WARNING + resultStr = "" + for host in nghost: + resultStr += "%s:\n%s\n" % (host, valdic[host]) + itemResult.analysis = resultStr + else: + itemResult.rst = ResultStatus.OK + itemResult.analysis = "Basically ,all the gaussdb process" \ + " start at the same time" + return itemResult diff --git a/script/gspylib/inspection/items/cluster/CheckProcessStatus.py b/script/gspylib/inspection/items/cluster/CheckProcessStatus.py new file mode 100644 index 0000000..5a68841 --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckProcessStatus.py @@ -0,0 +1,45 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckProcessStatus(BaseItem): + def __init__(self): + super(CheckProcessStatus, self).__init__(self.__class__.__name__) + + def doCheck(self): + parRes = "" + flag = 0 + self.result.raw = "" + processList = ['gaussdb'] + for process in processList: + # Query process status + cmd = "ps -u %s -N | grep '\<%s\>'" % (self.user, process) + self.result.raw += "%s\n" % cmd + (status, output) = subprocess.getstatusoutput(cmd) + # Resolve and outputs the execution results + if (status == 0 and output.find("%s" % process) >= 0): + parRes += "\n %s" % (output) + flag = 1 + if (flag == 1): + self.result.rst = ResultStatus.NG + self.result.val = parRes + else: + self.result.rst = ResultStatus.OK + self.result.val = "All process Status is Normal." diff --git a/script/gspylib/inspection/items/cluster/CheckReadonlyMode.py b/script/gspylib/inspection/items/cluster/CheckReadonlyMode.py new file mode 100644 index 0000000..3624e3d --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckReadonlyMode.py @@ -0,0 +1,36 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckReadonlyMode(BaseItem): + def __init__(self): + super(CheckReadonlyMode, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "show default_transaction_read_only;" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if (output == "off"): + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG + self.result.val = output diff --git a/script/gspylib/inspection/items/cluster/CheckSpecialFile.py b/script/gspylib/inspection/items/cluster/CheckSpecialFile.py new file mode 100644 index 0000000..efd3aea --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckSpecialFile.py @@ -0,0 +1,206 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +from multiprocessing.dummy import Pool as ThreadPool +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + + +class CheckSpecialFile(BaseItem): + def __init__(self): + super(CheckSpecialFile, self).__init__(self.__class__.__name__) + + def getDiskPath(self): + nodeDirs = [] + # get PGHOST Dir + tmpDir = DefaultValue.getEnv("PGHOST") + nodeDirs.append(tmpDir) + + # get gphome dir + gphome_path = DefaultValue.getEnv("GPHOME") + nodeDirs.append(gphome_path) + + # get log dir + log_path = DefaultValue.getEnv("GAUSSLOG") + nodeDirs.append(log_path) + + # get gausshome dir + gausshome_path = DefaultValue.getEnv("GAUSSHOME") + nodeDirs.append(os.path.realpath(gausshome_path)) + + hostName = DefaultValue.GetHostIpOrName() + dbNode = self.cluster.getDbNodeByName(hostName) + # including dn + for dbInst in dbNode.datanodes: + nodeDirs.append(dbInst.datadir) + + return nodeDirs + + def checkPathVaild(self, envValue): + """ + function: check path vaild + input : envValue + output: NA + """ + if (envValue.strip() == ""): + return 0 + # check path vaild + for rac in DefaultValue.PATH_CHECK_LIST: + flag = envValue.find(rac) + if flag >= 0: + return 1 + return 0 + + def ignorePath(self, path): + # Part of the root path and file permissions need to be ignored + ignorePathList = [] + toolPath = DefaultValue.getEnv("GPHOME") + sudoPath = os.path.join(toolPath, "sudo") + inspectionPath = os.path.join(toolPath, "script/inspection") + ignorePathList.append("%s/script/gs_preinstall" % toolPath) + ignorePathList.append("%s/script/gs_postuninstall" % toolPath) + ignorePathList.append("%s/script/gs_checkos" % toolPath) + + scriptPath = os.path.join(toolPath, "script") + scriptDirList = scriptPath.split('/') + inspectionDirList = inspectionPath.split('/') + # ignore own special files + if (path in ignorePathList or os.path.dirname(path) == sudoPath): + return True + else: + (filename, suffix) = os.path.splitext(path) + pathDirList = path.split('/') + # ignore .pyc file in GPHOME/script + if (path.find(scriptPath) == 0 and pathDirList[:len( + scriptDirList)] == scriptDirList and suffix == ".pyc"): + return True + # ignore GPHOME/script/inspection dir + elif (path.find(inspectionPath) == 0 and pathDirList[:len( + inspectionDirList)] == inspectionDirList): + return True + else: + return False + + def checkSpecialChar(self): + outputList = [] + failList = [] + pathList = [] + paths = self.getDiskPath() + for path in paths: + if (not path or not os.path.isdir(path)): + continue + else: + pathList.append(path) + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.checkSingleSpecialChar, pathList) + pool.close() + pool.join() + for outlist, flist in results: + if (outlist): + outputList.extend(outlist) + if (flist): + failList.extend(flist) + if (len(outputList) > 0): + outputList = DefaultValue.Deduplication(outputList) + if (failList): + failList = DefaultValue.Deduplication(failList) + return outputList, failList + + def checkSingleSpecialChar(self, path): + # Check a single path + outputList = [] + failList = [] + cmd = "find '%s' -name '*'" % path + (status, output) = subprocess.getstatusoutput(cmd) + FileList = output.split('\n') + while '' in FileList: + FileList.remove('') + if (status != 0 and output.find("Permission denied") > 0): + for realPath in FileList: + if (realPath.find("Permission denied") > 0): + failList.append(realPath) + elif (self.checkPathVaild(realPath) != 0): + outputList.append(realPath) + else: + for realPath in FileList: + if (self.checkPathVaild(realPath) != 0): + outputList.append(realPath) + return outputList, failList + + ######################################################### + # get the files which under the all useful directory and + # its owner is not current execute use + ######################################################### + def checkErrorOwner(self, ownername): + outputList = [] + failList = [] + path = "" + for path in self.getDiskPath(): + if (not path or not os.path.isdir(path)): + continue + cmd = "find '%s' -iname '*' ! -user %s -print" % (path, ownername) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output != ""): + pathList = output.split("\n") + for path in pathList: + if (self.ignorePath(path)): + continue + outputList.append(path) + elif (output.find("Permission denied") > 0): + pathList = output.split("\n") + for path in pathList: + if (path.find("Permission denied") > 0): + failList.append(path) + continue + if (self.ignorePath(path)): + continue + outputList.append(path) + if (len(outputList) > 0): + outputList = DefaultValue.Deduplication(outputList) + return outputList, failList + + def doCheck(self): + parRes = "" + flag = 0 + output = "" + outputList, failList = self.checkSpecialChar() + for output in outputList: + if (output != ""): + flag = 1 + parRes += "\nSpecial characters file: \"%s\"" % output + + outputList, errorList = self.checkErrorOwner(self.user) + for output in outputList: + if (output != ""): + flag = 1 + parRes += "\nFile owner should be %s." \ + " Incorrect owner file: \"%s\"" \ + % (self.user, output) + failList.extend(errorList) + if (failList): + flag = 1 + failList = DefaultValue.Deduplication(failList) + parRes += "\n%s" % ("\n".join(failList)) + if (flag == 1): + self.result.rst = ResultStatus.NG + self.result.val = parRes + else: + self.result.rst = ResultStatus.OK + self.result.val = "All files are normal." diff --git a/script/gspylib/inspection/items/cluster/CheckUpVer.py b/script/gspylib/inspection/items/cluster/CheckUpVer.py new file mode 100644 index 0000000..960911b --- /dev/null +++ b/script/gspylib/inspection/items/cluster/CheckUpVer.py @@ -0,0 +1,45 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file + + +class CheckUpVer(BaseItem): + def __init__(self): + super(CheckUpVer, self).__init__(self.__class__.__name__) + self.upgradepath = None + + def preCheck(self): + # check the threshold was set correctly + if (not self.threshold.__contains__("upgradepath")): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50236"] + % "The upgrade path") + self.upgradepath = self.threshold['upgradepath'] + if not os.path.isfile(os.path.join(self.upgradepath, "version.cfg")): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % ("new version file[%s]" % + os.path.join(self.upgradepath, "version.cfg"))) + + def doCheck(self): + packageFile = os.path.realpath( + os.path.join(self.upgradepath, "version.cfg")) + output = g_file.readFile(packageFile) + self.result.rst = ResultStatus.OK + self.result.val = "".join(output) diff --git a/script/gspylib/inspection/items/database/CheckArchiveParameter.py b/script/gspylib/inspection/items/database/CheckArchiveParameter.py new file mode 100644 index 0000000..f549456 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckArchiveParameter.py @@ -0,0 +1,70 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckArchiveParameter(BaseItem): + def __init__(self): + super(CheckArchiveParameter, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "show archive_mode;" + self.result.raw = sqlcmd + + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if output.strip() == "on": + sqlcmd = "show archive_command;" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + cooInst = self.cluster.getDbNodeByName(self.host).coordinators[0] + dataInst = self.cluster.getDbNodeByName(self.host).datanodes[0] + if ((self.cluster.isSingleInstCluster() and not ( + output.find("%s" % dataInst.datadir) >= 0)) and not ( + output.find("%s" % cooInst.datadir) >= 0)): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.OK + self.result.val = output + + def doSet(self): + resultStr = "" + cooInst = self.cluster.getDbNodeByName(self.host).coordinators[0] + dataInst = self.cluster.getDbNodeByName(self.host).datanodes[0] + if self.cluster.isSingleInstCluster(): + cmd = "gs_guc reload -N all -I " \ + "all -c \"archive_command = 'cp -P --remove-destination" \ + " %s %s/pg_xlog/archive/%s'\" " % dataInst.datadir + else: + cmd = "gs_guc reload -N all -I " \ + "all -c \"archive_command = 'cp -P --remove-destination" \ + " %s %s/pg_xlog/archive/%s'\"" % cooInst.datadir + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr = "Failed to set ArchiveMode.\n Error : %s." % output + resultStr += "The cmd is %s " % cmd + else: + resultStr = "Set ArchiveMode successfully." + self.result.val = resultStr diff --git a/script/gspylib/inspection/items/database/CheckCreateView.py b/script/gspylib/inspection/items/database/CheckCreateView.py new file mode 100644 index 0000000..0d2aaf3 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckCreateView.py @@ -0,0 +1,103 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.Common import DefaultValue + +SQLPATH = os.path.realpath( + os.path.join(os.path.split(os.path.realpath(__file__))[0], + "../../lib/checkcreateview/")) +OUTPUTPATH = os.path.realpath( + os.path.join(os.path.split(os.path.realpath(__file__))[0], + "../../output/")) + + +class CheckCreateView(BaseItem): + def __init__(self): + super(CheckCreateView, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = 1 + resultStr = "" + databaseListSql = "select datname from pg_database where datname != " \ + "'template0';" + output = SharedFuncs.runSqlCmd(databaseListSql, self.user, "", + self.port, self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + sqlFileName = os.path.join(SQLPATH, "check_viewdef.sql") + cmd = "chmod %s %s" % (DefaultValue.KEY_DIRECTORY_MODE, sqlFileName) + SharedFuncs.runShellCmd(cmd) + for databaseName in dbList: + sqlFile = "%s/viewdef_%s.sql" % (OUTPUTPATH, databaseName) + cmd = "gsql -d %s -p %s -q -t -f %s -o %s/viewdef_%s.sql" % ( + databaseName, self.port, sqlFileName, OUTPUTPATH, databaseName) + if (os.getuid() == 0): + cmd = "su - %s -c \"source %s;%s\"" % ( + self.user, self.mpprcFile, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "execute %s error. Error:%s\n" % (cmd, output) + flag = 0 + continue + cmd = "gsql -d %s -p %s -f %s -o %s/viewdef_%s.out " % ( + databaseName, self.port, sqlFile, OUTPUTPATH, databaseName) + if (os.getuid() == 0): + cmd = "su - %s -c \"source %s;%s\"" % ( + self.user, self.mpprcFile, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "execute %s error. Error:%s\n" % (cmd, output) + flag = 0 + continue + else: + for line in output.split("\n"): + line = line.split(":") + for word in line: + if ("ERROR" == word.strip()): + flag = 0 + errorNum = line[line.index(' ERROR') - 1] + with open(sqlFile, 'r') as fp: + sqlLines = fp.readlines() + view = sqlLines[int(errorNum) - 1].split()[ + -1].strip(';') + viewList = view.strip('\"').split('.') + resultStr = "view %s needs to be fixed" % \ + sqlLines[int(errorNum) - 1].split()[ + -1].strip(';') + cmd = "gs_dump %s -p %s -t '\"%s\"'.'\"%s\"' -f " \ + "'%s/%s.sql'" % ( + databaseName, self.port, viewList[0], + viewList[1], + OUTPUTPATH, view.strip('\"')) + if (os.getuid() == 0): + cmd = "su - %s -c \"source %s;%s\"" % ( + self.user, self.mpprcFile, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val += "execute %s error\n" % cmd + else: + continue + if (flag == 0): + self.result.rst = ResultStatus.NG + self.result.val = resultStr + else: + self.result.rst = ResultStatus.OK + self.result.val = " No view needs to be fixed." diff --git a/script/gspylib/inspection/items/database/CheckCurConnCount.py b/script/gspylib/inspection/items/database/CheckCurConnCount.py new file mode 100644 index 0000000..185fbbf --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckCurConnCount.py @@ -0,0 +1,52 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckCurConnCount(BaseItem): + def __init__(self): + super(CheckCurConnCount, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd1 = "show max_connections;" + sqlcmd2 = "SELECT count(*) FROM pg_stat_activity;" + self.result.raw = sqlcmd1 + sqlcmd2 + output1 = SharedFuncs.runSqlCmd(sqlcmd1, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + output2 = SharedFuncs.runSqlCmd(sqlcmd2, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if (not (output1.isdigit() and output2.isdigit())): + self.result.rst = ResultStatus.ERROR + self.result.val = "max_connections: %s\nCurConnCount: %s" % ( + output1, output2) + maxConnections = float(output1) + usedConnections = float(output2) + if (maxConnections > 0 and usedConnections > 0): + OccupancyRate = (usedConnections // maxConnections) + self.result.val = "%.2f%%" % (OccupancyRate * 100) + if (OccupancyRate < 0.9): + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.ERROR + self.result.val = "max_connections: %s\nCurConnCount: %s" % ( + maxConnections, usedConnections) diff --git a/script/gspylib/inspection/items/database/CheckCursorNum.py b/script/gspylib/inspection/items/database/CheckCursorNum.py new file mode 100644 index 0000000..73f2aaa --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckCursorNum.py @@ -0,0 +1,33 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckCursorNum(BaseItem): + def __init__(self): + super(CheckCursorNum, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "select count(*) from pg_cursors;" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + self.result.rst = ResultStatus.OK + self.result.val = output.strip() diff --git a/script/gspylib/inspection/items/database/CheckDBConnection.py b/script/gspylib/inspection/items/database/CheckDBConnection.py new file mode 100644 index 0000000..c493d15 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckDBConnection.py @@ -0,0 +1,61 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckDBConnection(BaseItem): + def __init__(self): + super(CheckDBConnection, self).__init__(self.__class__.__name__) + + def doCheck(self): + cmd = "gs_om -t status" + output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + if (output.find("Normal") < 0 and output.find("Degraded") < 0): + self.result.rst = ResultStatus.NG + self.result.val = "The database can not be connected." + return + instanceList = [] + AbnormalInst = [] + primaryDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) + nodeInfo = self.cluster.getDbNodeByName(self.host) + localDnList = nodeInfo.datanodes + for dn in localDnList: + if (dn.instanceId in primaryDnList): + instanceList.append(dn) + sqlcmd = "select pg_sleep(1);" + for instance in instanceList: + cmd = "gsql -m -d postgres -p %s -c '%s'" % (instance.port, sqlcmd) + if (self.mpprcFile): + cmd = "source '%s' && %s" % (self.mpprcFile, cmd) + if (os.getuid() == 0): + cmd = "su - %s -c \"%s\" " % (self.user, cmd) + self.result.raw += "\n%s" % cmd + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output.find("connect to server failed") > 0): + AbnormalInst.append(instance.instanceId) + self.result.val += "The install %s can not be connected.\n" \ + % instance.instanceId + self.result.raw += "\nError: %s" % output + if AbnormalInst: + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = "The database connection is normal." diff --git a/script/gspylib/inspection/items/database/CheckDNSkew.py b/script/gspylib/inspection/items/database/CheckDNSkew.py new file mode 100644 index 0000000..5452869 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckDNSkew.py @@ -0,0 +1,89 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import json +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file + + +class CheckDNSkew(BaseItem): + def __init__(self): + super(CheckDNSkew, self).__init__(self.__class__.__name__) + + def doCheck(self): + nodeInfo = self.cluster.getDbNodeByName(self.host) + maxusage = None + minusage = None + usagedic = {} + val = "" + masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) + for DnInstance in nodeInfo.datanodes: + if (DnInstance.instanceId in masterDnList): + datadir = os.path.join(DnInstance.datadir, "base") + output = g_file.getDirSize(datadir, "m") + output = output.split()[0][:-1] + if (not output.isdigit()): + raise Exception(ErrorCode.GAUSS_504["GAUSS_50412"] + % (DnInstance.instanceId)) + if (not maxusage or int(maxusage) < int(output)): + maxusage = int(output) + if (not minusage or int(minusage) > int(output)): + minusage = int(output) + usagedic[DnInstance.instanceId] = output + val += "\ndn %s: vol %sm" % (DnInstance.instanceId, output) + if (not usagedic): + self.result.rst = ResultStatus.NA + self.result.val = "No master database node in this host" + else: + if (maxusage > minusage * 1.05): + self.result.rst = ResultStatus.NG + self.result.val = "The result is not ok:\n%s" % val + self.result.raw = json.dumps(usagedic) + else: + self.result.rst = ResultStatus.OK + self.result.val = "Data distributed well in local host" + self.result.raw = json.dumps(usagedic) + + def postAnalysis(self, itemResult): + maxusage = None + minusage = None + val = "The result is not ok" + for v in itemResult.getLocalItems(): + try: + tmpdic = json.loads(v.raw) + for key, value in tmpdic.items(): + val += "\ndn %s: vol %sM" % (key, value) + if (not maxusage or int(maxusage) < int(value)): + maxusage = int(value) + if (not minusage or int(minusage) > int(value)): + minusage = int(value) + except Exception as err: + val += str(err) + if (maxusage and minusage): + if (maxusage > minusage * 1.05): + itemResult.rst = ResultStatus.NG + itemResult.analysis = val + else: + itemResult.rst = ResultStatus.OK + itemResult.analysis = "Data distributed well in all dns" + else: + itemResult.rst = ResultStatus.NA + itemResult.analysis = "No master database node in this cluster" + return itemResult diff --git a/script/gspylib/inspection/items/database/CheckDiskFailure.py b/script/gspylib/inspection/items/database/CheckDiskFailure.py new file mode 100644 index 0000000..8c6235e --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckDiskFailure.py @@ -0,0 +1,90 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import json +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckDiskFailure(BaseItem): + def __init__(self): + super(CheckDiskFailure, self).__init__(self.__class__.__name__) + + def doCheck(self): + if (self.cluster.isSingleInstCluster()): + try: + sqltables = "select b.nspname, a.relname, reloptions from " \ + "pg_class a, pg_namespace b where a.relnamespace" \ + " = " \ + "b.oid and b.nspname !~ '^pg_toast' and " \ + "a.relkind='r' and a.relpersistence='p';" + sqlForCustom = "select b.nspname, a.relname, reloptions " \ + "from pg_class a, pg_namespace b where " \ + "a.relnamespace = b.oid and b.nspname !~ " \ + "'^pg_toast' and a.relkind='r' and " \ + "a.relpersistence='p' and b.nspname <> " \ + "'pg_catalog' and b.nspname <> 'cstore' and" \ + " b.nspname <> 'information_schema' and " \ + "b.nspname <> 'schema_cur_table_col' and " \ + "b.nspname <> 'schema_cur_table' and " \ + "b.nspname !~ '^pg_toast';" + sqldb = "select datname from pg_database;" + output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.splitlines() + dbList.remove("template0") + dbList.remove("template1") + for db in dbList: + coltable = [] + rowtable = [] + if (db == "postgres"): + sql = sqltables + else: + sql = sqlForCustom + output = SharedFuncs.runSqlCmd(sql, self.user, "", + self.port, self.tmpPath, + db, self.mpprcFile) + tablelist = output.splitlines() + sql = "" + for tableinfo in tablelist: + if (len(tableinfo.split("|")) == 3): + schema = tableinfo.split("|")[0].strip() + tablename = tableinfo.split("|")[1].strip() + reloptions = tableinfo.split("|")[2].strip() + if ("column" in reloptions): + coltable.append("%s.%s" % (schema, tablename)) + else: + rowtable.append("%s.%s" % (schema, tablename)) + else: + pass + for table in rowtable: + sql += "select count(*) from %s;\n" % table + for table in coltable: + sql += "explain analyze select * from %s;\n" % table + SharedFuncs.runSqlCmd(sql, self.user, "", self.port, + self.tmpPath, db, self.mpprcFile) + except Exception as e: + self.result.rst = ResultStatus.NG + self.result.val = str(e) + return + self.result.rst = ResultStatus.OK + self.result.val = "No data is distributed on the fault disk" + else: + self.result.rst = ResultStatus.NA + self.result.val = "First cn is not in this host" diff --git a/script/gspylib/inspection/items/database/CheckDropColumn.py b/script/gspylib/inspection/items/database/CheckDropColumn.py new file mode 100644 index 0000000..53037b0 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckDropColumn.py @@ -0,0 +1,53 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import time +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckDropColumn(BaseItem): + def __init__(self): + super(CheckDropColumn, self).__init__(self.__class__.__name__) + + def doCheck(self): + sql1 = """select a.relname, b.attname ,n.nspname||'.'||a.relname + from pg_class a, pg_attribute b, pg_namespace n + where a.oid = b.attrelid + and b.attisdropped and n.oid = a.relnamespace;""" + sqldb = "select datname from pg_database;" + output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + dbList.remove("template0") + result = "" + for db in dbList: + output1 = SharedFuncs.runSqlSimplely(sql1, self.user, "", + self.port, self.tmpPath, + "postgres", self.mpprcFile) + if (output1.find("(0 rows)") < 0): + result += "%s:\n%s\n" % (db, output1) + if (result): + self.result.val = "Alter table drop column operation " \ + "is did in :\n%s" % result + self.result.rst = ResultStatus.NG + self.result.raw = sql1 + else: + self.result.val = "No alter table drop column operation" + self.result.rst = ResultStatus.OK diff --git a/script/gspylib/inspection/items/database/CheckGUCConsistent.py b/script/gspylib/inspection/items/database/CheckGUCConsistent.py new file mode 100644 index 0000000..6600d99 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckGUCConsistent.py @@ -0,0 +1,242 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import json +import configparser +import multiprocessing +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.inspection.common import SharedFuncs +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 +# dummy standby +DUMMY_STANDBY_INSTANCE = 2 + +# cn +INSTANCE_ROLE_COODINATOR = 3 +# dn +INSTANCE_ROLE_DATANODE = 4 + +g_gucDist = {} +g_ignoreList = [] +g_logicList = [] + + +class CheckGUCConsistent(BaseItem): + def __init__(self): + super(CheckGUCConsistent, self).__init__(self.__class__.__name__) + + def preCheck(self): + # check the threshold was set correctly + if (not self.threshold.__contains__('version')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "version") + self.version = self.threshold['version'] + + def checkLogicCluster(self): + clusterInfo = dbClusterInfo() + staticConfigDir = os.path.join(self.cluster.appPath, "bin") + cmd = "find %s -name *.cluster_static_config" % staticConfigDir + output = SharedFuncs.runShellCmd(cmd) + if output: + for staticConfigFile in output.splitlines(): + clusterInfo.initFromStaticConfig(self.user, staticConfigFile, + True) + lcName = os.path.splitext(os.path.basename(staticConfigFile))[ + 0] + for dbnode in clusterInfo.dbNodes: + if (dbnode.name == DefaultValue.GetHostIpOrName()): + return [lcName, dbnode] + return ["", None] + else: + return ["", None] + + def getIgnoreParameters(self, configFile, ignoreSection, logicSection): + global g_ignoreList + global g_logicList + fp = configparser.RawConfigParser() + fp.read(configFile) + secs = fp.sections() + if (ignoreSection not in secs): + return + g_ignoreList = fp.options(ignoreSection) + if self.cluster.isSingleInstCluster(): + g_ignoreList.append("synchronous_standby_names") + g_logicList = fp.options(logicSection) + + def checkInstanceGucValue(self, Instance, needm, lcName="", + logicCluster=False): + """ + get CN/DN instance guc parameters + """ + global g_gucDist + LCInstanceGucDist = {} + lcInstance = {} + sqlcmd = "select name,setting from pg_settings;" + InstanceGucDist = {} + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", Instance.port, + self.tmpPath, "postgres", + self.mpprcFile, needm) + gucValueList = output.split('\n') + for gucValue in gucValueList: + if (len(gucValue.split('|')) == 2): + (parameter, value) = gucValue.split('|') + if ( + parameter == "transaction_read_only" + and Instance.instanceRole == INSTANCE_ROLE_DATANODE): + continue + if (parameter not in g_ignoreList): + if (not logicCluster): + InstanceGucDist[parameter] = value + else: + if (parameter not in g_logicList): + InstanceGucDist[parameter] = value + elif (lcName and parameter in g_logicList): + LCInstanceGucDist[parameter] = value + else: + continue + if (lcName): + instanceName = "%s_%s_%s" % (lcName, "DN", Instance.instanceId) + lcInstance[instanceName] = LCInstanceGucDist + return lcInstance + if Instance.instanceRole == INSTANCE_ROLE_DATANODE: + Role = "DN" + instanceName = "%s_%s" % (Role, Instance.instanceId) + g_gucDist[instanceName] = InstanceGucDist + + def doCheck(self): + """ + + """ + global g_gucDist + # get ignore list + dirName = os.path.dirname(os.path.realpath(__file__)) + configFile = "%s/../../config/check_list_%s.conf" % ( + dirName, self.version) + self.getIgnoreParameters(configFile, 'guc_ignore', 'guc_logic') + DNidList = [] + result = [] + logicCluster = False + nodeInfo = self.cluster.getDbNodeByName(self.host) + masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) + for DnInstance in nodeInfo.datanodes: + if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE): + DNidList.append(DnInstance) + if len(DNidList) < 1: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51249"]) + # get information of logicCluster on current node + (lcName, dbnode) = self.checkLogicCluster() + if (dbnode): + logicCluster = True + for DnInstance in dbnode.datanodes: + if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE): + if (DnInstance.instanceId in masterDnList): + needm = False + else: + needm = True + result.append( + self.checkInstanceGucValue(DnInstance, needm, lcName, + logicCluster)) + g_gucDist[lcName] = result + # test database Connection + for Instance in DNidList: + if not Instance: + continue + sqlcmd = "select pg_sleep(1);" + if Instance.instanceId in masterDnList: + needm = False + else: + needm = True + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", + Instance.port, self.tmpPath, + 'postgres', self.mpprcFile, needm) + self.checkInstanceGucValue(Instance, needm, "", logicCluster) + + self.result.val = json.dumps(g_gucDist) + self.result.raw = str(g_gucDist) + self.result.rst = ResultStatus.OK + + def postAnalysis(self, itemResult): + errors = [] + ngs = [] + dnGucDist = {} + lcdnGucDist = {} + for i in itemResult.getLocalItems(): + if (i.rst == ResultStatus.ERROR): + errors.append("%s: %s" % (i.host, i.val)) + if (i.rst == ResultStatus.NG): + ngs.append("%s: %s" % (i.host, i.val)) + if (len(errors) > 0): + itemResult.rst = ResultStatus.ERROR + itemResult.analysis = "\n".join(errors) + return itemResult + if (len(ngs) > 0): + itemResult.rst = ResultStatus.NG + itemResult.analysis = "\n".join(ngs) + return itemResult + for v in itemResult.getLocalItems(): + gucDist = json.loads(v.val) + for InstanceName in gucDist.keys(): + if (InstanceName[:2] == 'DN'): + dnGucDist[InstanceName] = gucDist[InstanceName] + else: + if InstanceName in lcdnGucDist.keys(): + lcdnGucDist[InstanceName].extend(gucDist[InstanceName]) + else: + lcdnGucDist[InstanceName] = gucDist[InstanceName] + for parameter in dnGucDist[list(dnGucDist.keys())[0]].keys(): + InstanceName = list(dnGucDist.keys())[0] + keyValue = dnGucDist[InstanceName][parameter] + relultStr = "\n%s:\n%s: %s\n" % (parameter, InstanceName, keyValue) + flag = True + for dnInstance in list(dnGucDist.keys())[1:]: + value = dnGucDist[dnInstance][parameter] + relultStr += "%s: %s\n" % (dnInstance, value) + if (value != keyValue): + flag = False + if (not flag): + itemResult.analysis += relultStr + + for lcName in lcdnGucDist.keys(): + lcInstanceResult = lcdnGucDist[lcName] + baseDn = lcInstanceResult[0] + baseParameter = baseDn[list(baseDn.keys())[0]] + for parameter in baseParameter.keys(): + keyValue = baseParameter[parameter] + relultStr = "\n%s:\n%s: %s\n" % ( + parameter, list(baseDn.keys())[0], keyValue) + flag = True + for otherDn in lcInstanceResult[1:]: + dnInstance = list(otherDn.keys())[0] + value = otherDn[dnInstance][parameter] + relultStr += "%s: %s\n" % (dnInstance, value) + if (value != keyValue): + flag = False + if (not flag): + itemResult.analysis += relultStr + + if (itemResult.analysis): + itemResult.rst = ResultStatus.NG + else: + itemResult.rst = ResultStatus.OK + itemResult.analysis = "All DN instance guc value is consistent." + return itemResult diff --git a/script/gspylib/inspection/items/database/CheckGUCValue.py b/script/gspylib/inspection/items/database/CheckGUCValue.py new file mode 100644 index 0000000..b7e22f4 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckGUCValue.py @@ -0,0 +1,81 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + +g_max = {} + + +class CheckGUCValue(BaseItem): + def __init__(self): + super(CheckGUCValue, self).__init__(self.__class__.__name__) + + def doCheck(self): + global g_max + sqlcmd = "show max_connections;" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + max_connections = int(output.strip()) + g_max['conn'] = max_connections + sqlcmd = "show max_prepared_transactions;" + self.result.raw += sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + max_prepared_transactions = int(output.strip()) + g_max['pre'] = max_prepared_transactions + sqlcmd = "show max_locks_per_transaction;" + self.result.raw += sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + max_locks_per_transaction = int(output.strip()) + g_max['locks'] = max_locks_per_transaction + max_value = int(max_locks_per_transaction) * ( + int(max_connections) + int(max_prepared_transactions)) + g_max['value'] = max_value + self.result.val = "max_locks_per_transaction[%d] * (max_connections[" \ + "%d] + max_prepared_transactions[%d]) = %d" % ( + max_locks_per_transaction, max_connections, + max_prepared_transactions, + max_value) + if (int(max_value) < int(1000000)): + self.result.rst = ResultStatus.NG + self.result.val += " Must be lager than 1000000" + else: + self.result.rst = ResultStatus.OK + + def doSet(self): + if (g_max['pre'] > 1000): + locksTransaction = int( + 1000000 // (g_max['pre'] + g_max['conn'])) + 1 + cmd = "gs_guc set -N all -I all -c " \ + "'max_locks_per_transaction=%d'" % locksTransaction + SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + else: + cmd = "gs_guc set -N all -I all -c " \ + "'max_locks_per_transaction=512' -c 'max_connections=1000'" \ + " -c 'max_prepared_transactions = 1000'" + SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + cmd = "gs_guc set -N all -I all -c " \ + "'max_locks_per_transaction=512' -c 'max_connections=1000'" \ + " -c 'max_prepared_transactions = 1000'" + SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + self.result.val = "Set GUCValue successfully." diff --git a/script/gspylib/inspection/items/database/CheckHashIndex.py b/script/gspylib/inspection/items/database/CheckHashIndex.py new file mode 100644 index 0000000..7820f18 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckHashIndex.py @@ -0,0 +1,65 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckHashIndex(BaseItem): + def __init__(self): + super(CheckHashIndex, self).__init__(self.__class__.__name__) + + def doCheck(self): + databaseListSql = "select datname from pg_database " \ + "where datname != 'template0';" + sqlcmd = """ +SELECT +n.nspname AS schemaname, +c.relname AS tablename, +i.relname AS indexname, +o.amname AS indexmethod, +pg_get_indexdef(i.oid) AS indexdef +FROM pg_index x +JOIN pg_class c ON c.oid = x.indrelid +JOIN pg_class i ON i.oid = x.indexrelid +JOIN pg_am o ON o.oid = i.relam +LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r'::"char" +AND i.relkind = 'i'::"char" +AND o.amname not in ('btree','gin','psort','cbtree'); +""" + output = SharedFuncs.runSqlCmd(databaseListSql, self.user, "", + self.port, self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + resultStr = "" + for databaseName in dbList: + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, databaseName, + self.mpprcFile, True) + if (output): + self.result.raw += "%s: %s" % (databaseName, output) + tableList = output.split('\n') + resultStr += "database[%s]: %s\n" % ( + databaseName, ",".join(tableList)) + + if (resultStr): + self.result.rst = ResultStatus.NG + self.result.val = resultStr + else: + self.result.rst = ResultStatus.OK + self.result.val = "There is no hash index in all databases." diff --git a/script/gspylib/inspection/items/database/CheckIdleSession.py b/script/gspylib/inspection/items/database/CheckIdleSession.py new file mode 100644 index 0000000..a7612fc --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckIdleSession.py @@ -0,0 +1,45 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckIdleSession(BaseItem): + def __init__(self): + super(CheckIdleSession, self).__init__(self.__class__.__name__) + + def doCheck(self): + dbNode = self.cluster.getDbNodeByName(self.host) + sqlcmd = "select pid, query_id, application_name, query_start, " \ + "state, " \ + "query from pg_stat_activity where state <> 'idle' and " \ + " application_name not in ('JobScheduler', " \ + "'WorkloadManager', " \ + "'WLMArbiter', 'workload', 'WorkloadMonitor', 'Snapshot', " \ + "'PercentileJob') and " \ + "query_id <> 0 and query not like '%pg_stat_activity%';" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if (output != ''): + self.result.rst = ResultStatus.NG + self.result.val = output + else: + self.result.rst = ResultStatus.OK + self.result.val = "No idle process." diff --git a/script/gspylib/inspection/items/database/CheckLockNum.py b/script/gspylib/inspection/items/database/CheckLockNum.py new file mode 100644 index 0000000..5ee48b7 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckLockNum.py @@ -0,0 +1,33 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckLockNum(BaseItem): + def __init__(self): + super(CheckLockNum, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "select count(*) from pg_locks" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + self.result.rst = ResultStatus.OK + self.result.val = output diff --git a/script/gspylib/inspection/items/database/CheckLockState.py b/script/gspylib/inspection/items/database/CheckLockState.py new file mode 100644 index 0000000..22fc408 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckLockState.py @@ -0,0 +1,39 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckLockState(BaseItem): + def __init__(self): + super(CheckLockState, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "select count(1) from pg_stat_activity " \ + "where instr( query, 'pgxc_lock_for_backup()') > 0 " \ + "or instr( query, 'pg_advisory_lock(65535,65535)') > 0;" + self.result.raw = sqlcmd + + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if ((int(output.strip()) - 1) != 0): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = output diff --git a/script/gspylib/inspection/items/database/CheckMaxDatanode.py b/script/gspylib/inspection/items/database/CheckMaxDatanode.py new file mode 100644 index 0000000..62464c8 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckMaxDatanode.py @@ -0,0 +1,87 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + +g_value = 0 + + +class CheckMaxDatanode(BaseItem): + def __init__(self): + super(CheckMaxDatanode, self).__init__(self.__class__.__name__) + self.nodeCount = None + self.dnCount = None + + def preCheck(self): + # check current node contains cn instances if not raise exception + super(CheckMaxDatanode, self).preCheck() + # check the threshold was set correctly + if (not self.threshold.__contains__('nodeCount')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold nodeCount") + if (not self.threshold.__contains__('dnCount')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold dnCount") + + self.nodeCount = self.threshold['nodeCount'] + self.dnCount = self.threshold['dnCount'] + + def doCheck(self): + global g_value + dataNum = int(self.nodeCount) * int(self.dnCount) + sqlcmd = "SELECT setting FROM pg_settings WHERE " \ + "name='comm_max_datanode';" + self.result.raw = sqlcmd + comm_max_datanode = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", + self.port, self.tmpPath, + "postgres", self.mpprcFile) + + if comm_max_datanode.isdigit() and dataNum > int(comm_max_datanode): + if (dataNum < 256): + g_value = 256 + elif (dataNum < 512): + g_value = 512 + elif (dataNum < 1024): + g_value = 1024 + else: + value = 2048 + self.result.rst = ResultStatus.WARNING + self.result.val = "Invalid value for GUC parameter " \ + "comm_max_datanode: %s. Expect value: %s" % ( + comm_max_datanode, g_value) + else: + self.result.rst = ResultStatus.OK + self.result.val = "dataNum: %s, comm_max_datanode: %s" % ( + dataNum, comm_max_datanode) + + self.result.raw = sqlcmd + + def doSet(self): + cmd = " gs_guc set -N all -I all -c " \ + "'comm_max_datanode=%d'; " % g_value + cmd += " gs_guc set -N all -I all -c " \ + "'comm_max_datanode=%d' " % g_value + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val += "Falied to set comm_max_datanode.\n Error : " \ + "%s. " % output + self.result.val += "The cmd is %s " % cmd + else: + self.result.val += "Set comm_max_datanode successfully." diff --git a/script/gspylib/inspection/items/database/CheckNextvalInDefault.py b/script/gspylib/inspection/items/database/CheckNextvalInDefault.py new file mode 100644 index 0000000..6bba584 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckNextvalInDefault.py @@ -0,0 +1,79 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckNextvalInDefault(BaseItem): + def __init__(self): + super(CheckNextvalInDefault, self).__init__(self.__class__.__name__) + + def doCheck(self): + sql1 = """select distinct rt.relname from PG_ATTRDEF ad, +( +select c.oid,c.relname from pg_class c, pgxc_class xc +where +c.oid = xc.pcrelid and +c.relkind = 'r' and +xc.pclocatortype = 'R' +) as rt(oid,relname) +where ad.adrelid = rt.oid +and ad.adsrc like '%nextval%'; + """ + sql2 = """select relname from pg_class c, pg_namespace n +where relkind = 'S' and c.relnamespace = n.oid +and n.nspname like 'pg_temp%'; +""" + sqldb = "select datname from pg_database;" + output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + dbList.remove("template0") + result = "" + for db in dbList: + output1 = SharedFuncs.runSqlCmd(sql1, self.user, "", self.port, + self.tmpPath, db, self.mpprcFile) + tmptablist = [] + if (output1): + for tab in output1.splitlines(): + tmpsql = "select * from %s limit 1" % tab + tmpout = SharedFuncs.runSqlCmd(tmpsql, self.user, "", + self.port, self.tmpPath, db, + self.mpprcFile) + if (tmpout): + tmptablist.append(tab) + else: + pass + output2 = SharedFuncs.runSqlCmd(sql2, self.user, "", self.port, + self.tmpPath, db, self.mpprcFile) + if (output2): + for tab in output2.splitlines(): + if (tab not in tmptablist): + tmptablist.append(tab) + if (tmptablist): + result += "%s:\n%s\n" % (db, "\n".join(tmptablist)) + if (result): + self.result.val = "there is some default expression " \ + "contains nextval(sequence):\n%s" % result + self.result.rst = ResultStatus.NG + else: + self.result.val = "no default expression " \ + "contains nextval(sequence)" + self.result.rst = ResultStatus.OK diff --git a/script/gspylib/inspection/items/database/CheckNodeGroupName.py b/script/gspylib/inspection/items/database/CheckNodeGroupName.py new file mode 100644 index 0000000..5c559e5 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckNodeGroupName.py @@ -0,0 +1,72 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + +g_setDict = {} + + +class CheckNodeGroupName(BaseItem): + def __init__(self): + super(CheckNodeGroupName, self).__init__(self.__class__.__name__) + + def doCheck(self): + global g_setDict + databaseListSql = "select datname from pg_database where datname != " \ + "'template0';" + sqlCmd = "select group_name from pgxc_group where length(group_name)" \ + " != length(group_name::bytea, 'SQL_ASCII');" + output = SharedFuncs.runSqlCmd(databaseListSql, self.user, "", + self.port, self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + resultStr = "" + for databaseName in dbList: + output = SharedFuncs.runSqlCmd(sqlCmd, self.user, "", self.port, + self.tmpPath, databaseName, + self.mpprcFile, True) + if not output: + continue + else: + g_setDict[databaseName] = output + resultStr += "The node group name of %s with non-SQL_ASCII " \ + "characters.\n " % databaseName + if (resultStr): + self.result.rst = ResultStatus.NG + self.result.val = resultStr + else: + self.result.rst = ResultStatus.OK + self.result.val = "The node group name with SQL_ASCII characters" \ + " in all databases." + + def doSet(self): + resultStr = "" + i = 2 + for dbname in g_setDict.keys(): + for groupname in g_setDict[dbname]: + sqlCmd = "set xc_maintenance_mode=on;" + sqlCmd += "alter node group '%s' rename to " \ + "'groupversion%d';" % ( + groupname, i) + sqlCmd += "set xc_maintenance_mode=off;" + output = SharedFuncs.runSqlCmd(sqlCmd, self.user, "", + self.port, self.tmpPath, dbname, + self.mpprcFile, True) + i += 1 + resultStr += output + self.result.val = resultStr diff --git a/script/gspylib/inspection/items/database/CheckPMKData.py b/script/gspylib/inspection/items/database/CheckPMKData.py new file mode 100644 index 0000000..40bf260 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckPMKData.py @@ -0,0 +1,46 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckPMKData(BaseItem): + def __init__(self): + super(CheckPMKData, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "select proname,pronamespace from pg_proc " \ + "where pronamespace not in (select oid from pg_namespace);" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if (output == ""): + self.result.rst = ResultStatus.OK + self.result.val = "No exception data in PMK." + else: + self.result.rst = ResultStatus.NG + self.result.val = "PMK contains exception data: \n%s" % output + + def doSet(self): + sqlcmd = "drop schema pmk cascade;" + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + + self.result.val = output diff --git a/script/gspylib/inspection/items/database/CheckPgPreparedXacts.py b/script/gspylib/inspection/items/database/CheckPgPreparedXacts.py new file mode 100644 index 0000000..ff8d035 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckPgPreparedXacts.py @@ -0,0 +1,42 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckPgPreparedXacts(BaseItem): + def __init__(self): + super(CheckPgPreparedXacts, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "SELECT * FROM pg_catalog.pgxc_prepared_xacts;" + self.result.raw = sqlcmd + + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if (output != ""): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = output + + def doSet(self): + cmd = "gs_clean -s -v -a -p %s -r" % self.port + output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + self.result.val = output diff --git a/script/gspylib/inspection/items/database/CheckPgxcRedistb.py b/script/gspylib/inspection/items/database/CheckPgxcRedistb.py new file mode 100644 index 0000000..5671798 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckPgxcRedistb.py @@ -0,0 +1,67 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckPgxcRedistb(BaseItem): + def __init__(self): + super(CheckPgxcRedistb, self).__init__(self.__class__.__name__) + self.version = None + + def doCheck(self): + databaseListSql = "select datname from pg_database " \ + "where datname != 'template0';" + output = SharedFuncs.runSqlCmd(databaseListSql, self.user, "", + self.port, self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + residue = False + residueTableSql = "select * from pg_tables " \ + "where tablename='pgxc_redistb';" + residueSchemaSql = "select * from pg_namespace " \ + "where nspname='data_redis';" + + self.result.raw = residueTableSql + residueSchemaSql + for dbName in dbList: + # Check temporary table residue + output = SharedFuncs.runSqlCmd(residueTableSql, self.user, "", + self.port, self.tmpPath, dbName, + self.mpprcFile) + if output: + residue = True + self.result.val += "Redistributed " \ + "temporary table pgxc_redistb has " \ + "existed in database %s." % dbName + # Check temporary schema residues + output = SharedFuncs.runSqlCmd(residueSchemaSql, self.user, "", + self.port, self.tmpPath, dbName, + self.mpprcFile) + if output: + residue = True + self.result.val += "Redistributed temporary schema " \ + "data_redis has existed " \ + "in database %s." % dbName + + if (residue): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val += "Residue Table pgxc_redistb " \ + "and residue schema data_redis " \ + "do not exist in the cluster." diff --git a/script/gspylib/inspection/items/database/CheckPgxcgroup.py b/script/gspylib/inspection/items/database/CheckPgxcgroup.py new file mode 100644 index 0000000..090c8f4 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckPgxcgroup.py @@ -0,0 +1,52 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + + +class CheckPgxcgroup(BaseItem): + def __init__(self): + super(CheckPgxcgroup, self).__init__(self.__class__.__name__) + self.version = None + + def preCheck(self): + # check the threshold was set correctly + if (not self.threshold.__contains__('version')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "version") + self.version = self.threshold['version'] + + def doCheck(self): + if (self.version == "V1R7C10"): + sqlcmd = "select count(group_name) from pgxc_group " \ + "where in_redistribution='true' OR in_redistribution='y';" + else: + sqlcmd = "select count(group_name) " \ + "from pgxc_group " \ + "where in_redistribution='y' OR in_redistribution='t';" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + + if (output != '0'): + self.result.rst = ResultStatus.NG + self.result.val = "Cluster not completed redistribution." + else: + self.result.rst = ResultStatus.OK + self.result.val = "The cluster has been redistributed." diff --git a/script/gspylib/inspection/items/database/CheckReturnType.py b/script/gspylib/inspection/items/database/CheckReturnType.py new file mode 100644 index 0000000..e98eddd --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckReturnType.py @@ -0,0 +1,41 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckReturnType(BaseItem): + def __init__(self): + super(CheckReturnType, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "select proname from pg_proc, pg_type " \ + "where pg_proc.prorettype=pg_type.oid " \ + "and pg_type.typisdefined=false;" + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if not output: + self.result.rst = ResultStatus.OK + self.result.val = "User-defined functions " \ + "do not contain illegal return types." + else: + self.result.rst = ResultStatus.NG + self.result.val = "User-defined function contains " \ + "illegal return type." diff --git a/script/gspylib/inspection/items/database/CheckSysTabSize.py b/script/gspylib/inspection/items/database/CheckSysTabSize.py new file mode 100644 index 0000000..679cb5e --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckSysTabSize.py @@ -0,0 +1,144 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +import os +from gspylib.os.gsfile import g_file +from gspylib.os.gsplatform import g_Platform + +DUMMY_STANDBY_INSTANCE = 2 +INSTANCE_ROLE_COODINATOR = 3 + + +class CheckSysTabSize(BaseItem): + def __init__(self): + super(CheckSysTabSize, self).__init__(self.__class__.__name__) + + def doCheck(self): + instance = [] + allDisk = [] + + # Get all disk and the avail size + cmd_df = "df -B M" + diskinfo = SharedFuncs.runShellCmd(cmd_df, self.user, self.mpprcFile) + # split with \n and remove the title + diskList_space = diskinfo.split("\n") + diskList_space.remove(diskList_space[0]) + # loop the list, remove space and remove the size unit "MB", + # only keep disk path and avail size + for disk_space in diskList_space: + disk = disk_space.split() + disk_new = [] + disk_new.append(disk[0]) + disk_new.append(int(disk[3].replace("M", ""))) + allDisk.append(disk_new) + + # Get the port and datadir list of instance + nodeInfo = self.cluster.getDbNodeByName(self.host) + CN = nodeInfo.coordinators + # check if CN exists + if (len(CN) > 0): + instance.append(CN[0]) + else: + # no CN in instance, do nothing + pass + for DnInstance in nodeInfo.datanodes: + if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE): + instance.append(DnInstance) + + # check if no instances in this node + if (len(instance) == 0): + return + else: + pass + + for inst in instance: + cmd_dir = g_Platform.getDiskFreeCmd(inst.datadir) + result = SharedFuncs.runShellCmd(cmd_dir, self.user, + self.mpprcFile) + diskInfo_withspace = result.split("\n") + diskInfo = diskInfo_withspace[1].split() + for disk in allDisk: + if (diskInfo[0] == disk[0]): + disk.append(inst) + masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) + # Get the database in the node, remove template0 + sqldb = "select datname from pg_database;" + needm = False + if (instance[0].instanceRole == INSTANCE_ROLE_COODINATOR): + needm = False + elif (instance[0].instanceId in masterDnList): + needm = False + else: + needm = True + output = SharedFuncs.runSqlCmd(sqldb, self.user, "", instance[0].port, + self.tmpPath, "postgres", + self.mpprcFile, needm) + dbList = output.split("\n") + dbList.remove("template0") + + # loop all database with port list + value = "" + Flag = [] + for disk in allDisk: + sumSize = 0 + for inst in disk[2:]: + for db in dbList: + # Calculate the size with sql cmd + cmd = "select sum(pg_total_relation_size(oid)/1024)/1024" \ + " from pg_class where oid<16384 and relkind='r';" + needm = False + if (inst.instanceRole == INSTANCE_ROLE_COODINATOR): + needm = False + elif (inst.instanceId in masterDnList): + needm = False + else: + needm = True + output = SharedFuncs.runSqlCmd(cmd, self.user, "", + inst.port, self.tmpPath, db, + self.mpprcFile, needm) + sumSize = sumSize + float(output) + # Calculate the size of datadir + strdir = inst.datadir + clog = g_file.getDirSize(os.path.join(strdir, 'pg_clog'), "M") + size_clog = int(clog[0].replace("M", "")) + xlog = g_file.getDirSize(os.path.join(strdir, 'pg_xlog'), "M") + size_xlog = int(xlog[0].replace("M", "")) + sumSize = sumSize + size_clog + size_xlog + if (sumSize == 0): + continue + # Compare system table size with avail disk size + if (sumSize < disk[1]): + Flag.append(True) + FileSystem = "FileSystem: %s" % disk[0] + SystemTableSize = "SystemTableSize: %sM" % sumSize + DiskAvailSize = "DiskAvailSize: %sM" % disk[1] + value += FileSystem.ljust(35) + SystemTableSize.ljust(35) \ + + DiskAvailSize.ljust(35) + "Status: OK; \n" + elif (sumSize >= disk[1]): + Flag.append(False) + FileSystem = "FileSystem: %s" % disk[0] + SystemTableSize = "SystemTableSize: %sM" % sumSize + DiskAvailSize = "DiskAvailSize: %sM" % disk[1] + value += FileSystem.ljust(35) + SystemTableSize.ljust(35) \ + + DiskAvailSize.ljust(35) + "Status: NG; \n" + self.result.val = value + if (False not in Flag): + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/items/database/CheckSysTable.py b/script/gspylib/inspection/items/database/CheckSysTable.py new file mode 100644 index 0000000..22f340b --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckSysTable.py @@ -0,0 +1,145 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import grp +import pwd +from multiprocessing.dummy import Pool as ThreadPool +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.inspection.common.Exception import CheckNAException +from gspylib.os.gsfile import g_file + +# cn +INSTANCE_ROLE_COODINATOR = 3 +# dn +INSTANCE_ROLE_DATANODE = 4 + +MASTER_INSTANCE = 0 + + +class CheckSysTable(BaseItem): + def __init__(self): + super(CheckSysTable, self).__init__(self.__class__.__name__) + self.database = None + + def preCheck(self): + # check the threshold was set correctly + if (not self.threshold.__contains__('database')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold database") + self.database = self.threshold['database'] + + def checkSingleSysTable(self, Instance): + tablelist = ["pg_attribute", "pg_class", "pg_constraint", + "pg_partition", "pgxc_class", "pg_index", "pg_stats"] + localPath = os.path.dirname(os.path.realpath(__file__)) + resultMap = {} + try: + for i in tablelist: + sqlFile = "%s/sqlFile_%s_%s.sql" % ( + self.tmpPath, i, Instance.instanceId) + resFile = "%s/resFile_%s_%s.out" % ( + self.tmpPath, i, Instance.instanceId) + g_file.createFile(sqlFile, True, DefaultValue.SQL_FILE_MODE) + g_file.createFile(resFile, True, DefaultValue.SQL_FILE_MODE) + g_file.changeOwner(self.user, sqlFile) + g_file.changeOwner(self.user, resFile) + sql = "select * from pg_table_size('%s');" % i + sql += "select count(*) from %s;" % i + sql += "select * from pg_column_size('%s');" % i + g_file.writeFile(sqlFile, [sql]) + + cmd = "gsql -d %s -p %s -f %s --output %s -t -A -X" % ( + self.database, Instance.port, sqlFile, resFile) + if (self.mpprcFile != "" and self.mpprcFile is not None): + cmd = "source '%s' && %s" % (self.mpprcFile, cmd) + SharedFuncs.runShellCmd(cmd, self.user) + + restule = g_file.readFile(resFile) + g_file.removeFile(sqlFile) + g_file.removeFile(resFile) + + size = restule[0].strip() + line = restule[1].strip() + width = restule[2].strip() + Role = "" + if (Instance.instanceRole == INSTANCE_ROLE_COODINATOR): + Role = "CN" + elif (Instance.instanceRole == INSTANCE_ROLE_DATANODE): + Role = "DN" + instanceName = "%s_%s" % (Role, Instance.instanceId) + resultMap[i] = [instanceName, size, line, width] + return resultMap + except Exception as e: + if os.path.exists(sqlFile): + g_file.removeFile(sqlFile) + if os.path.exists(resFile): + g_file.removeFile(resFile) + raise Exception(str(e)) + + def checkSysTable(self): + primaryDNidList = [] + nodeInfo = self.cluster.getDbNodeByName(self.host) + CN = nodeInfo.coordinators + masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) + for DnInstance in nodeInfo.datanodes: + if (DnInstance.instanceId in masterDnList): + primaryDNidList.append(DnInstance) + if (len(CN) < 1 and len(primaryDNidList) < 1): + raise CheckNAException( + "There is no primary database node instance in the " + "current node.") + + # test database Connection + for Instance in (CN + primaryDNidList): + if not Instance: + continue + sqlcmd = "select pg_sleep(1);" + SharedFuncs.runSqlCmd(sqlcmd, self.user, "", Instance.port, + self.tmpPath, self.database, self.mpprcFile) + outputList = [] + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.checkSingleSysTable, CN + primaryDNidList) + pool.close() + pool.join() + for result in results: + if (result): + outputList.append(result) + outputList.sort() + return outputList + + def doCheck(self): + flag = True + resultStr = "" + resultStr += "Instance table size row " \ + "width row*width\n" + outputList = self.checkSysTable() + for resultMap in outputList: + for table in resultMap.keys(): + resultStr += "%s %s %s %s %s %s\n" % ( + resultMap[table][0], table.ljust(15), + resultMap[table][1].ljust(15), + resultMap[table][2].ljust(8), + resultMap[table][3].ljust(5), + int(resultMap[table][2]) * int(resultMap[table][3])) + + self.result.val = resultStr + self.result.raw = resultStr + self.result.rst = ResultStatus.OK diff --git a/script/gspylib/inspection/items/database/CheckSysadminUser.py b/script/gspylib/inspection/items/database/CheckSysadminUser.py new file mode 100644 index 0000000..442fcaf --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckSysadminUser.py @@ -0,0 +1,40 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckSysadminUser(BaseItem): + def __init__(self): + super(CheckSysadminUser, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "select rolname from pg_roles where rolsystemadmin=True and" \ + " rolname !='%s';" % self.user + self.result.raw = sqlcmd + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + if (output): + self.result.rst = ResultStatus.NG + self.result.val = "There are sysadmin users except %s:\n%s" % ( + self.user, output) + else: + self.result.rst = ResultStatus.OK + self.result.val = "Sysadmin user does not exist except %s." \ + % self.user diff --git a/script/gspylib/inspection/items/database/CheckTDDate.py b/script/gspylib/inspection/items/database/CheckTDDate.py new file mode 100644 index 0000000..5995ab5 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckTDDate.py @@ -0,0 +1,74 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckTDDate(BaseItem): + def __init__(self): + super(CheckTDDate, self).__init__(self.__class__.__name__) + + def doCheck(self): + databaseListSql = "select datname from pg_database " \ + "where datcompatibility = 'TD';" + self.result.raw = databaseListSql + output = SharedFuncs.runSqlCmd(databaseListSql, self.user, "", + self.port, self.tmpPath, "postgres", + self.mpprcFile) + if (not output.strip()): + self.result.val = "The database with TD mode does not exist." + self.result.rst = ResultStatus.OK + return + dbList = output.strip().split("\n") + self.result.raw = "The database of TD mode is: %s\n" % ','.join( + output.split('\n')) + resultStr = "" + sqlcmd = """ +select ns.nspname as namespace, c.relname as table_name, +attr.attname as column_name +from pg_attribute attr, pg_class c , pg_namespace ns +where attr.attrelid = c.oid +and ns.oid = c.relnamespace +and array_to_string(c.reloptions, ', ') like '%orientation=orc%' +and attr.atttypid = (select oid from pg_type where typname='date') +union all +select ns.nspname as namespace, c.relname as table_name, +attr.attname as column_name +from pg_attribute attr, pg_class c , pg_namespace ns, pg_foreign_table ft +where attr.attrelid = c.oid +and c.oid = ft.ftrelid +and ns.oid = c.relnamespace +and array_to_string(ft.ftoptions, ', ') like '%format=orc%' +and attr.atttypid = (select oid from pg_type where typname='date'); +""" + for databaseName in dbList: + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, databaseName, + self.mpprcFile, True) + if (output): + self.result.raw += "%s: %s" % (databaseName, output) + tableList = output.split('\n') + resultStr += "database[%s]: %s\n" % ( + databaseName, ",".join(tableList)) + if (resultStr): + self.result.rst = ResultStatus.NG + self.result.val = resultStr + else: + self.result.rst = ResultStatus.OK + self.result.val = "The orc table with the date column " \ + "in the TD schema database does not exist." diff --git a/script/gspylib/inspection/items/database/CheckTableSkew.py b/script/gspylib/inspection/items/database/CheckTableSkew.py new file mode 100644 index 0000000..92d38b7 --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckTableSkew.py @@ -0,0 +1,139 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import json +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.ErrorCode import ErrorCode + + +class CheckTableSkew(BaseItem): + def __init__(self): + super(CheckTableSkew, self).__init__(self.__class__.__name__) + + def doCheck(self): + security_mode_value = DefaultValue.getSecurityMode() + if (security_mode_value == "on"): + secMode = True + else: + secMode = False + if (self.cluster.isSingleInstCluster()): + if (secMode): + finalresult = "" + dbList = [] + sqlList = [] + sqlPath = os.path.realpath( + os.path.join(os.path.split(os.path.realpath(__file__))[0], + "../../lib/checkblacklist/")) + sqlFileName = os.path.join(sqlPath, "GetTableSkew.sql") + if (os.path.exists(sqlFileName)): + try: + with open(sqlFileName, "r") as fp: + lines = fp.read() + sqlList = lines.split("--sqlblock") + sqlList.pop() + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] + % ("file:%s,Error:%s" + % (sqlFileName, str(e)))) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] + % ("sql file:%s" % sqlFileName)) + sqldb = "select datname from pg_database;" + (status, result, error) = ClusterCommand.excuteSqlOnLocalhost( + self.port, sqldb) + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] + % sqldb + (" Error:%s" % error)) + recordsCount = len(result) + for i in range(0, recordsCount): + dbList.append(result[i][0]) + dbList.remove("template0") + dbList.remove("template1") + for db in dbList: + schemaTable = [] + for sql in sqlList: + sql = "set client_min_messages='error';\n" + sql + ClusterCommand.excuteSqlOnLocalhost(self.port, sql, db) + sql = "SELECT schemaname , tablename FROM " \ + "PUBLIC.pgxc_analyzed_skewness WHERE " \ + "skewness_tuple > 100000;" + (status, result, + error) = ClusterCommand.excuteSqlOnLocalhost(self.port, + sql, db) + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] + % sql + (" Error:%s" % error)) + else: + for i in iter(result): + schema = i[0] + table = i[1] + schemaTable.append("%s.%s" % (schema, table)) + if (schemaTable): + finalresult += "%s:\n%s\n" % ( + db, "\n".join(schemaTable)) + if (finalresult): + self.result.rst = ResultStatus.WARNING + self.result.val = "The result is not ok:\n%s" % finalresult + else: + self.result.rst = ResultStatus.OK + self.result.val = "Data is well distributed" + else: + finalresult = "" + sqlPath = os.path.realpath( + os.path.join(os.path.split(os.path.realpath(__file__))[0], + "../../lib/checkblacklist/")) + sqlFileName = os.path.join(sqlPath, "GetTableSkew.sql") + sqldb = "select datname from pg_database;" + output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + dbList.remove("template0") + dbList.remove("template1") + for db in dbList: + db = db.replace("$", "\\$") + cmd = "gsql -d %s -p %s -f %s" % ( + db, self.port, sqlFileName) + tmpout = "" + output = SharedFuncs.runShellCmd(cmd, self.user, + self.mpprcFile) + if (output.find("(0 rows)") < 0): + tmpresult = output.splitlines() + idxS = 0 + idxE = 0 + for idx in range(len(tmpresult)): + if (not tmpresult[idx].find("---+---") < 0): + idxS = idx - 1 + if (tmpresult[idx].find("row)") > 0 or tmpresult[ + idx].find("rows)") > 0): + idxE = idx + for i in range(idxS, idxE): + tmpout += "%s\n" % tmpresult[i] + finalresult += "%s:\n%s\n" % (db, tmpout) + if (finalresult): + self.result.rst = ResultStatus.WARNING + self.result.val = "Data is not well distributed:\n%s" \ + % finalresult + else: + self.result.rst = ResultStatus.OK + self.result.val = "Data is well distributed" + else: + self.result.rst = ResultStatus.NA + self.result.val = "First cn is not in this host" diff --git a/script/gspylib/inspection/items/database/CheckTableSpace.py b/script/gspylib/inspection/items/database/CheckTableSpace.py new file mode 100644 index 0000000..525f2de --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckTableSpace.py @@ -0,0 +1,113 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckTableSpace(BaseItem): + def __init__(self): + super(CheckTableSpace, self).__init__(self.__class__.__name__) + + def getClusterDirectorys(self, dbNode): + """ + function : Get cluster all directorys + input : NA + output : List + """ + nodeDirs = [] + # including cm_server, cm_agent, cn, dn, gtm, etcd, ssd + for dbInst in dbNode.datanodes: + nodeDirs.append(dbInst.datadir) + if (hasattr(dbInst, 'ssdDir') and len(dbInst.ssdDir) != 0): + nodeDirs.append(dbInst.ssdDir) + return nodeDirs + + def doCheck(self): + self.result.val = "" + nodeInfo = self.cluster.getDbNodeByName(self.host) + clusterPathList = self.getClusterDirectorys(nodeInfo) + clusterPathList.append(self.cluster.appPath) + clusterPathList.append(self.cluster.logPath) + clusterPathList.append(DefaultValue.getEnv('GPHOME')) + clusterPathList.append(DefaultValue.getEnv('PGHOST')) + + nodeInfo = self.cluster.getDbNodeByName(self.host) + if self.cluster.isSingleInstCluster(): + dirPath = nodeInfo.datanodes[0].datadir + else: + dirPath = nodeInfo.coordinators[0].datadir + tableSpaceDir = os.path.join(dirPath, "pg_tblspc") + tableSpaceList = os.listdir(tableSpaceDir) + tablespacePaths = [] + if (len(tableSpaceList)): + for filename in tableSpaceList: + if (os.path.islink(os.path.join(tableSpaceDir, filename))): + linkDir = os.readlink( + os.path.join(tableSpaceDir, filename)) + if (os.path.isdir(linkDir)): + tablespacePaths.append(linkDir) + + flag = "Normal" + for tableSpace in tablespacePaths: + if (tableSpace.find(' ') >= 0): + flag = "Error" + self.result.val += "Table space path[%s] contains spaces.\n" \ + % tableSpace + + # Support create tablespace in pg_location dir for V1R7 + if (tableSpace.find(os.path.join(dirPath, "pg_location")) == 0): + continue + tableSpaces = tableSpace.split('/') + for clusterPath in clusterPathList: + clusterPaths = clusterPath.split('/') + if (tableSpace.find(clusterPath) == 0 and + tableSpaces[:len(clusterPaths)] == clusterPaths): + if (flag == "Normal"): + flag = "Warning" + self.result.val += "Table space path[%s] and cluster " \ + "path[%s] are nested.\n" % ( + tableSpace, clusterPath) + elif (clusterPath.find(tableSpace) == 0 and + clusterPaths[:len(tableSpaces)] == tableSpaces): + flag = "Error" + self.result.val += "Table space path[%s] and cluster " \ + "path[%s] are nested.\n" % (tableSpace, + clusterPath) + else: + continue + for tableSpace1 in tablespacePaths: + tableSpaces1 = tableSpace1.split('/') + for tableSpace2 in tablespacePaths: + if (tableSpace1 == tableSpace2): + continue + tableSpaces2 = tableSpace2.split('/') + if (tableSpace1.find(tableSpace2) == 0 and + tableSpaces1[:len(tableSpaces2)] == tableSpaces2): + flag = "Error" + self.result.val += "Table space path[%s] and table space" \ + " path[%s] are nested.\n" \ + % (tableSpace1, tableSpace2) + + if (flag == "Error"): + self.result.rst = ResultStatus.NG + elif (flag == "Warning"): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.OK + self.result.val = "All table space path is normal." diff --git a/script/gspylib/inspection/items/database/CheckUnAnalyzeTable.py b/script/gspylib/inspection/items/database/CheckUnAnalyzeTable.py new file mode 100644 index 0000000..e670b8c --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckUnAnalyzeTable.py @@ -0,0 +1,173 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import json +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.ErrorCode import ErrorCode + +g_result = {} + + +class CheckUnAnalyzeTable(BaseItem): + def __init__(self): + super(CheckUnAnalyzeTable, self).__init__(self.__class__.__name__) + + def doCheck(self): + global g_result + if (self.cluster.isSingleInstCluster()): + sql1 = """create or replace FUNCTION get_antiempty_tables( +OUT result_tables text +) +returns text +as $$ +declare + test_sql text; + type cursor_type is ref cursor; + cur_sql_stmts cursor_type; + cur_test_sql_result cursor_type; + test_sql_result int; + result_tables text := ''; +begin + drop table if exists to_be_selected_check; + create temp table to_be_selected_check as select 'select 1 from ' || + nspname || '.' || relname || ' limit 1;' as stmt from pg_class c, + pg_namespace n where c.relnamespace=n.oid and c.reltuples=0 + AND n.nspname <> 'pg_catalog' + AND n.nspname <> 'cstore' + AND n.nspname <> 'information_schema' + AND n.nspname <> 'schema_cur_table_col' + AND n.nspname <> 'schema_cur_table' + AND c.relkind='r' + AND c.relpersistence='p' + AND n.nspname !~ '^pg_toast'; + + open cur_sql_stmts for 'select stmt from to_be_selected_check'; + loop + fetch cur_sql_stmts into test_sql; + exit when cur_sql_stmts%notfound; + open cur_test_sql_result for test_sql; + fetch cur_test_sql_result into test_sql_result; + if not cur_test_sql_result%notfound and 0 = position( + 'to_be_selected_check' in test_sql) then + result_tables = result_tables || replace(replace(replace( + test_sql, 'select 1 from ', ''), ' limit 1', ''), ';', CHR(10)); + end if ; + close cur_test_sql_result; + end loop; + close cur_sql_stmts; + drop table if exists to_be_selected_check; + return result_tables; +end; $$ +LANGUAGE 'plpgsql';""" + + sql2 = "select get_antiempty_tables();" + sqldb = "select datname from pg_database;" + security_mode_value = DefaultValue.getSecurityMode() + if (security_mode_value == "on"): + secMode = True + else: + secMode = False + if (secMode): + dbList = [] + (status, result, error) = ClusterCommand.excuteSqlOnLocalhost( + self.port, sqldb) + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] + % sqldb + (" Error:%s" % error)) + recordsCount = len(result) + for i in range(0, recordsCount): + dbList.append(result[i][0]) + dbList.remove("template0") + finalresult = "" + for db in dbList: + tablelist = [] + ClusterCommand.excuteSqlOnLocalhost(self.port, sql1, db) + ClusterCommand.excuteSqlOnLocalhost( + self.port, "set client_min_messages='error';create " + "table to_be_selected_check(test int);", db) + sql2 = "set client_min_messages='error';" + sql2 + (status, result, + error) = ClusterCommand.excuteSqlOnLocalhost(self.port, + sql2, db) + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] + % sql2 + (" Error:%s" % error)) + if (result and result[0][0]): + for tmptable in result[0][0].splitlines(): + if (db == "postgres" and + tmptable.upper().startswith("PMK.")): + pass + else: + tablelist.append(tmptable) + if (tablelist): + finalresult += "%s:\n%s\n" % ( + db, "\n".join(tablelist)) + g_result[db] = tablelist + if (finalresult): + self.result.val = "The result is not ok:\n%s" % finalresult + self.result.rst = ResultStatus.NG + else: + self.result.val = "All table analyzed" + self.result.rst = ResultStatus.OK + else: + # Get the database in the node, remove template0 + output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + dbList = output.split("\n") + dbList.remove("template0") + finalresult = "" + for db in dbList: + tablelist = [] + SharedFuncs.runSqlCmd(sql1, self.user, "", self.port, + self.tmpPath, db, self.mpprcFile) + output = SharedFuncs.runSqlCmd(sql2, self.user, "", + self.port, self.tmpPath, db, + self.mpprcFile) + for tmptable in output.splitlines(): + if (db == "postgres" and tmptable.upper().startswith( + "PMK.")): + pass + else: + tablelist.append(tmptable) + if (tablelist): + finalresult += "%s:\n%s\n" % (db, "\n".join(tablelist)) + g_result[db] = tablelist + if (finalresult): + self.result.val = "Tables unanalyzed:\n%s" % finalresult + self.result.rst = ResultStatus.NG + else: + self.result.val = "All table analyzed" + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NA + self.result.val = "First cn is not in this host" + + def doSet(self): + resultStr = "" + for db in g_result.keys(): + for table in g_result[db]: + sql = "analyze %s;" % table + output = SharedFuncs.runSqlCmd(sql, self.user, "", self.port, + self.tmpPath, db, + self.mpprcFile) + resultStr += "%s:%s Result: %s.\n" % (db, table, output) + self.result.val = "Analyze %s successfully." % resultStr diff --git a/script/gspylib/inspection/items/database/CheckXid.py b/script/gspylib/inspection/items/database/CheckXid.py new file mode 100644 index 0000000..572554c --- /dev/null +++ b/script/gspylib/inspection/items/database/CheckXid.py @@ -0,0 +1,39 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckXid(BaseItem): + def __init__(self): + super(CheckXid, self).__init__(self.__class__.__name__) + + def doCheck(self): + sqlcmd = "select txid_current();" + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, + self.tmpPath, "postgres", + self.mpprcFile) + num = int(output) + self.result.raw = sqlcmd + self.result.val = "The xid value is %s" % output + if (num <= 1000000000): + self.result.rst = ResultStatus.OK + elif (num <= 1800000000): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/items/device/CheckBlockdev.py b/script/gspylib/inspection/items/device/CheckBlockdev.py new file mode 100644 index 0000000..099bf97 --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckBlockdev.py @@ -0,0 +1,114 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsdisk import g_disk +from gspylib.os.gsOSlib import g_OSlib + +expectedReadAhead = "16384" +g_needRepair = [] + + +class blockdev: + def __init__(self): + """ + function : Init class blockdev + input : NA + output : NA + """ + self.ra = dict() # key is device name value is getra value + self.errormsg = '' + + +class CheckBlockdev(BaseItem): + def __init__(self): + super(CheckBlockdev, self).__init__(self.__class__.__name__) + + def getDevices(self): + """ + """ + cmd = "fdisk -l 2>/dev/null | grep \"Disk /dev/\"" \ + " | grep -v \"/dev/mapper/\" | awk '{ print $2 }' " \ + "| awk -F'/' '{ print $NF }' | sed s/:$//g" + output = SharedFuncs.runShellCmd(cmd) + devList = output.split('\n') + return devList + + def collectBlockdev(self): + """ + function : Collector blockdev + input : NA + output : Instantion + """ + data = blockdev() + devices = list() + try: + diskName = '' + # If the directory of '/' is a disk array, + # all disk prereads will be set + devlist = self.getDevices() + allDiskList = g_disk.getMountInfo() + for diskInfo in allDiskList: + if (diskInfo.mountpoint == '/'): + diskName = diskInfo.device.replace('/dev/', '') + for dev in devlist: + if (dev.strip() == diskName.strip()): + continue + devices.append("/dev/%s" % dev) + except Exception as e: + data.errormsg = e.__str__() + for d in devices: + data.ra[d] = g_OSlib.getDeviceIoctls(d) + + return data + + def doCheck(self): + global g_needRepair + data = self.collectBlockdev() + flag = True + abnormalMsg = "" + resultStr = "" + for dev in data.ra.keys(): + ra = data.ra[dev] + if int(ra) < int(expectedReadAhead): + g_needRepair.append(dev) + abnormalMsg += "On device (%s) 'blockdev readahead'" \ + " RealValue '%s' ExpectedValue '%s'\n" % ( + dev, ra, expectedReadAhead) + flag = False + else: + resultStr += "On device (%s) 'blockdev readahead': '%s' \n" % ( + dev, ra) + if flag: + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG + self.result.val = abnormalMsg + self.result.raw = abnormalMsg + resultStr + + def doSet(self): + for dev in g_needRepair: + self.SetBlockdev(expectedReadAhead, dev) + + def SetBlockdev(self, expectedValue, devname): + (THPFile, initFile) = SharedFuncs.getTHPandOSInitFile() + cmd = "/sbin/blockdev --setra %s %s " % (expectedReadAhead, devname) + cmd += " && echo \"/sbin/blockdev --setra %s %s\" >> %s" % ( + expectedReadAhead, devname, initFile) + SharedFuncs.runShellCmd(cmd) diff --git a/script/gspylib/inspection/items/device/CheckDiskConfig.py b/script/gspylib/inspection/items/device/CheckDiskConfig.py new file mode 100644 index 0000000..24ab50f --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckDiskConfig.py @@ -0,0 +1,41 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckDiskConfig(BaseItem): + def __init__(self): + super(CheckDiskConfig, self).__init__(self.__class__.__name__) + + def doCheck(self): + DiskInfoDict = {} + ResultStr = "" + cmd = "df -h -P | awk '{print $1,$2,$6}'" + output = SharedFuncs.runShellCmd(cmd) + diskList = output.split('\n')[1:] + for disk in diskList: + diskInfo = disk.split() + DiskInfoDict[diskInfo[0]] = disk + keys = DiskInfoDict.keys() + sortedKeys = sorted(keys) + for diskName in sortedKeys: + ResultStr += "%s\n" % DiskInfoDict[diskName] + self.result.val = ResultStr + self.result.rst = ResultStatus.OK diff --git a/script/gspylib/inspection/items/device/CheckDiskFormat.py b/script/gspylib/inspection/items/device/CheckDiskFormat.py new file mode 100644 index 0000000..ad146df --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckDiskFormat.py @@ -0,0 +1,69 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsdisk import g_disk + + +class CheckDiskFormat(BaseItem): + def __init__(self): + super(CheckDiskFormat, self).__init__(self.__class__.__name__) + + def doCheck(self): + self.result.val = "" + self.result.raw = "" + xfs_mounts = list() + expectedOption = "inode64" + + allDiskList = g_disk.getMountInfo() + for disk in allDiskList: + if (disk.fstype == 'xfs'): + xfs_mounts.append(disk) + informationlist = [] + if xfs_mounts == []: + self.result.rst = ResultStatus.OK + self.result.val = \ + "There is no XFS-formatted disk on the current node." + return + for disk in xfs_mounts: + if disk.fstype != "xfs": + informationlist.append( + "The device '%s' is not XFS filesystem " + "and is expected to be so." % disk.device) + continue + is_find = "failed" + self.result.raw += "[%s]: type='%s' opts='%s'" % ( + disk.device, disk.fstype, disk.opts) + for opt in disk.opts.split(','): + if (opt == expectedOption): + is_find = "success" + break + else: + continue + if (is_find == "failed"): + informationlist.append( + "XFS filesystem on device %s " + "is missing the recommended mount option '%s'." % ( + disk.device, expectedOption)) + self.result.rst = ResultStatus.WARNING + if (len(informationlist) != 0): + for info in informationlist: + self.result.val = self.result.val + '%s' % info + else: + self.result.rst = ResultStatus.OK + self.result.val = "All XFS-formatted disk " \ + "is normal on the current node." diff --git a/script/gspylib/inspection/items/device/CheckIOConfigure.py b/script/gspylib/inspection/items/device/CheckIOConfigure.py new file mode 100644 index 0000000..1bc1afb --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckIOConfigure.py @@ -0,0 +1,180 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import platform +import os +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + +deviceNeedRepair = [] + + +class CheckIOConfigure(BaseItem): + def __init__(self): + super(CheckIOConfigure, self).__init__(self.__class__.__name__) + + def obtainDataDir(self, nodeInfo): + dataDirList = [] + for inst in nodeInfo.datanodes: + dataDirList.append(inst.datadir) + for inst in nodeInfo.cmservers: + dataDirList.append(inst.datadir) + for inst in nodeInfo.coordinators: + dataDirList.append(inst.datadir) + for inst in nodeInfo.gtms: + dataDirList.append(inst.datadir) + if (hasattr(nodeInfo, 'etcds')): + for inst in nodeInfo.etcds: + dataDirList.append(inst.datadir) + + dataDirList.append(DefaultValue.getEnv("PGHOST")) + dataDirList.append(DefaultValue.getEnv("GPHOME")) + dataDirList.append(DefaultValue.getEnv("GAUSSHOME")) + dataDirList.append(DefaultValue.getEnv("GAUSSLOG")) + dataDirList.append("/tmp") + return dataDirList + + def obtainDiskDir(self): + cmd = "df -h -P /data* | grep -v 'Mounted' | awk '{print $6}'" + output = SharedFuncs.runShellCmd(cmd) + if output.lower().find("no such") >= 0: + allDiskPath = ["/"] + else: + allDiskPath = output.split('\n') + return allDiskPath + + def getDevices(self): + pathList = [] + devices = [] + diskName = "" + diskDic = {} + diskDic = self.getDisk() + if (self.cluster): + pathList = self.obtainDataDir( + self.cluster.getDbNodeByName(self.host)) + else: + pathList = self.obtainDiskDir() + for path in pathList: + if path.find('No such file or directory') >= 0 or path.find( + 'no file systems processed') >= 0: + self.result.rst = ResultStatus.ERROR + self.result.val += \ + "There are no cluster and no /data* directory." + return + cmd = "df -P -i %s" % path + output = SharedFuncs.runShellCmd(cmd) + # Filesystem Inodes IUsed IFree IUse% Mounted on + # /dev/xvda2 2363904 233962 2129942 10% / + diskName = output.split('\n')[-1].split()[0] + for disk in diskDic.keys(): + if diskName in diskDic[disk] and disk not in devices: + devices.append(disk) + return devices + + def getDisk(self): + diskDic = {} + cmd = "fdisk -l 2>/dev/null " \ + "| grep 'Disk /dev/' | grep -v '/dev/mapper/' " \ + "| awk '{ print $2 }'| awk -F'/' '{ print $NF }'| sed s/:$//g" + output = SharedFuncs.runShellCmd(cmd) + for disk in output.splitlines(): + cmd = "fdisk -l 2>/dev/null | grep '%s'" \ + "| grep -v '/dev/mapper/'| grep -v 'Disk /dev/'" \ + "| awk -F ' ' ' {print $1}'" % disk + output = SharedFuncs.runShellCmd(cmd) + if output: + diskDic[disk] = output.splitlines() + else: + diskDic[disk] = "/dev/" + disk + return diskDic + + def collectIOschedulers(self): + devices = set() + data = dict() + files = self.getDevices() + for f in files: + fname = "/sys/block/%s/queue/scheduler" % f + words = fname.split("/") + if len(words) != 6: + continue + devices.add(words[3].strip()) + + for d in devices: + if (not d): + continue + device = {} + scheduler = g_file.readFile("/sys/block/%s/queue/scheduler" % d)[0] + words = scheduler.split("[") + if len(words) != 2: + continue + words = words[1].split("]") + if len(words) != 2: + continue + device["request"] = words[0].strip() + for dead in scheduler.split(): + if dead.find("deadline") >= 0: + device["deadvalue"] = dead.split("[")[-1].split("]")[0] + else: + continue + data[d] = device + return data + + def doCheck(self): + global deviceNeedRepair + deviceNeedRepair = [] + expectedScheduler = "deadline" + data = self.collectIOschedulers() + flag = True + resultStr = "" + for i in data.keys(): + result = () + expectedScheduler = data[i]["deadvalue"] + request = data[i]["request"] + if (request != expectedScheduler): + result = (i, expectedScheduler) + deviceNeedRepair.append(result) + resultStr += \ + "On device (%s) 'IO Request' RealValue '%s' " \ + "ExpectedValue '%s'" % ( + i, request.strip(), expectedScheduler) + flag = False + self.result.val = resultStr + if flag: + self.result.rst = ResultStatus.OK + self.result.val = "All disk IO Request is deadline." + else: + self.result.rst = ResultStatus.NG + + def doSet(self): + for dev, expectedScheduler in deviceNeedRepair: + self.SetIOSchedulers(dev, expectedScheduler) + + def SetIOSchedulers(self, devname, expectedScheduler): + """ + function : Set IO Schedulers + input : String + output : NA + """ + (THPFile, initFile) = SharedFuncs.getTHPandOSInitFile() + cmd = " echo %s >> /sys/block/%s/queue/scheduler" % ( + expectedScheduler, devname) + cmd += " && echo \"echo %s >> /sys/block/%s/queue/scheduler\" >> %s" \ + % ( + expectedScheduler, devname, initFile) + SharedFuncs.runShellCmd(cmd) diff --git a/script/gspylib/inspection/items/device/CheckIOrequestqueue.py b/script/gspylib/inspection/items/device/CheckIOrequestqueue.py new file mode 100644 index 0000000..4c21314 --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckIOrequestqueue.py @@ -0,0 +1,150 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import glob +import platform +import os +import subprocess +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + +g_needRepair = [] +expectedScheduler = "32768" + + +class CheckIOrequestqueue(BaseItem): + def __init__(self): + super(CheckIOrequestqueue, self).__init__(self.__class__.__name__) + + def obtainDataDir(self, nodeInfo): + dataDirList = [] + for inst in nodeInfo.datanodes: + dataDirList.append(inst.datadir) + + dataDirList.append(DefaultValue.getEnv("PGHOST")) + dataDirList.append(DefaultValue.getEnv("GPHOME")) + dataDirList.append(DefaultValue.getEnv("GAUSSHOME")) + dataDirList.append(DefaultValue.getEnv("GAUSSLOG")) + dataDirList.append("/tmp") + return dataDirList + + def obtainDisk(self): + """ + function: get disk name by partition + input: partition list + return: disk dict + """ + devices = {} + cmd = "fdisk -l 2>/dev/null | grep \"Disk /dev/\" " \ + "| grep -v \"/dev/mapper/\" | awk '{ print $2 }' " \ + "| awk -F'/' '{ print $NF }' | sed s/:$//g" + output = SharedFuncs.runShellCmd(cmd) + for disk in output.splitlines(): + cmd = "fdisk -l 2>/dev/null | grep \"%s\" " \ + "| grep -v \"Disk\" | grep -v \"/dev/mapper/\" " \ + "| awk '{ print $1 }'" % disk + output = SharedFuncs.runShellCmd(cmd) + if output: + devices[disk] = output.splitlines() + else: + devices[disk] = "/dev/" + disk + return devices + + def obtainDiskDir(self): + cmd = "df -h -P /data* | grep -v 'Mounted' | awk '{print $6}'" + output = SharedFuncs.runShellCmd(cmd) + if output.lower().find("no such") >= 0: + allDiskPath = ["/"] + else: + allDiskPath = output.split('\n') + return allDiskPath + + def collectIORequest(self): + """ + function : Collector ioRequest + input : NA + output : Dict + """ + devices = [] + pathList = [] + + if (self.cluster): + pathList = self.obtainDataDir( + self.cluster.getDbNodeByName(self.host)) + else: + pathList = self.obtainDiskDir() + diskDict = self.obtainDisk() + for path in pathList: + cmd = "df -h %s" % path + output = SharedFuncs.runShellCmd(cmd) + partitionInfo = output.split('\n')[-1] + partitionName = partitionInfo.split()[0] + if (partitionName in devices): + continue + else: + devices.append(partitionName) + result = {} + for d in devices: + for item in diskDict.items(): + if d in item[1]: + request = g_file.readFile( + "/sys/block/%s/queue/nr_requests" % item[0])[0] + result[item[0]] = request.strip() + + return result + + def doCheck(self): + global g_needRepair + data = self.collectIORequest() + flag = True + resultList = [] + if len(data) == 0: + resultList.append("Not find IO Request file.") + for i in data.keys(): + request = data[i] + self.result.raw += "%s %s\n" % (i, request) + if (i.startswith('loop') or i.startswith('ram')): + continue + if int(request) != int(expectedScheduler): + g_needRepair.append(i) + resultList.append( + "On device (%s) 'IO Request' RealValue '%s' " + "ExpectedValue '%s'" % ( + i, request.strip(), expectedScheduler)) + flag = False + self.result.val = "\n".join(resultList) + if flag: + self.result.rst = ResultStatus.OK + self.result.val = "All disk IO request are normal." + else: + self.result.rst = ResultStatus.WARNING + + def doSet(self): + resultStr = "" + for dev in g_needRepair: + cmd = 'echo 32768 > /sys/block/%s/queue/nr_requests' % dev + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Failed to set dev %s.\n Error : %s." % ( + dev, output) + resultStr += "The cmd is %s " % cmd + if (len(resultStr) > 0): + self.result.val = resultStr + else: + self.result.val = "Set IOrequestqueue successfully." diff --git a/script/gspylib/inspection/items/device/CheckInodeUsage.py b/script/gspylib/inspection/items/device/CheckInodeUsage.py new file mode 100644 index 0000000..e915d91 --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckInodeUsage.py @@ -0,0 +1,133 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsdisk import g_disk + +TOTAL_THRESHOLD_NG = 500000000 + + +class CheckInodeUsage(BaseItem): + def __init__(self): + super(CheckInodeUsage, self).__init__(self.__class__.__name__) + self.Threshold_NG = None + self.Threshold_Warning = None + + def preCheck(self): + # check current node contains cn instances if not raise exception + super(CheckInodeUsage, self).preCheck() + # check the threshold was set correctly + if (not (self.threshold.__contains__( + 'Threshold_NG') and self.threshold.__contains__( + 'Threshold_Warning'))): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "The threshold Threshold_NG" + " and Threshold_Warning") + if (not self.threshold['Threshold_NG'].isdigit() or not self.threshold[ + 'Threshold_Warning'].isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53014"] + % ("The threshold Threshold_NG[%s]" + " and Threshold_Warning[%s]" % + (self.Threshold_NG, self.Threshold_Warning))) + self.Threshold_NG = int(self.threshold['Threshold_NG']) + self.Threshold_Warning = int(self.threshold['Threshold_Warning']) + if (self.Threshold_NG < self.Threshold_Warning): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53015"] + % (self.Threshold_NG, self.Threshold_Warning)) + if (self.Threshold_NG > 99 or self.Threshold_Warning < 1): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53016"] + % (self.Threshold_NG, self.Threshold_Warning)) + + def obtainDataDir(self, nodeInfo): + dataDirList = [] + for inst in nodeInfo.datanodes: + dataDirList.append(inst.datadir) + dataDirList.append(DefaultValue.getEnv("PGHOST")) + dataDirList.append(DefaultValue.getEnv("GPHOME")) + dataDirList.append(DefaultValue.getEnv("GAUSSHOME")) + dataDirList.append(DefaultValue.getEnv("GAUSSLOG")) + dataDirList.append("/tmp") + return dataDirList + + def obtainDiskDir(self): + cmd = "df -h -P | awk '{print $6}'" + output = SharedFuncs.runShellCmd(cmd) + allDiskPath = output.split('\n')[1:] + return allDiskPath + + def doCheck(self): + flag = "Normal" + resultStr = "" + top = "" + DiskList = [] + DiskInfoDict = {} + if (self.cluster): + pathList = self.obtainDataDir( + self.cluster.getDbNodeByName(self.host)) + else: + pathList = self.obtainDiskDir() + + for path in pathList: + diskName = g_disk.getMountPathByDataDir(path) + diskType = g_disk.getDiskMountType(diskName) + if (not diskType in ["xfs", "ext3", "ext4"]): + resultStr += \ + "Path(%s) inodes usage(%s) Warning reason: " \ + "The file system type [%s] is unrecognized " \ + "or not support. Please check it.\n" % ( + path, 0, diskType) + if (flag == "Normal"): + flag = "Warning" + continue + usageInfo = g_disk.getDiskInodeUsage(path) + if (diskName in DiskList): + continue + else: + DiskList.append(diskName) + DiskInfoDict[usageInfo] = "%s %s%%" % (diskName, usageInfo) + if (usageInfo > self.Threshold_NG): + resultStr += "The usage of the device " \ + "disk inodes[%s:%d%%] cannot be greater than" \ + " %d%%.\n" % ( + diskName, usageInfo, self.Threshold_NG) + flag = "Error" + elif (usageInfo > self.Threshold_Warning): + resultStr += \ + "The usage of the device disk inodes[%s:%d%%] " \ + "cannot be greater than %d%%.\n" % ( + diskName, usageInfo, self.Threshold_Warning) + if (flag == "Normal"): + flag = "Warning" + self.result.val = resultStr + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + self.result.val = "All disk inodes are sufficient." + elif (flag == "Warning"): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.NG + + keys = DiskInfoDict.keys() + sortedKeys = sorted(keys) + self.result.raw = "diskname inodeUsage" + for diskInfo in map(DiskInfoDict.get, sortedKeys): + self.result.raw += "\n%s" % diskInfo diff --git a/script/gspylib/inspection/items/device/CheckLogicalBlock.py b/script/gspylib/inspection/items/device/CheckLogicalBlock.py new file mode 100644 index 0000000..53fa6f2 --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckLogicalBlock.py @@ -0,0 +1,90 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import glob +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + +g_expectedScheduler = "512" +result = {} + + +class CheckLogicalBlock(BaseItem): + def __init__(self): + super(CheckLogicalBlock, self).__init__(self.__class__.__name__) + + def doCheck(self): + global result + devices = set() + self.result.val = "" + + files = glob.glob("/sys/block/*/queue/logical_block_size") + for f in files: + words = f.split("/") + if len(words) != 6: + continue + devices.add(words[3].strip()) + + for d in devices: + request = \ + g_file.readFile("/sys/block/%s/queue/logical_block_size" % d)[ + 0] + result[d] = request.strip() + + if len(result) == 0: + self.result.val = "Warning:Not find logical block file," \ + "please check it." + self.result.rst = ResultStatus.WARNING + + flag = True + for i in result.keys(): + reuqest = result[i] + self.result.raw += "%s %s\n" % (i, reuqest) + if (i.startswith('loop') or i.startswith('ram')): + continue + if int(reuqest) < int(g_expectedScheduler): + flag = False + self.result.val += "\nWarning:On device (%s) '" \ + "logicalBlock Request' RealValue '%d' " \ + "ExpectedValue '%d'" % ( + i, int(reuqest), + int(g_expectedScheduler)) + + if flag: + self.result.rst = ResultStatus.OK + self.result.val = "All disk LogicalBlock values are correct." + else: + self.result.rst = ResultStatus.NG + + def doSet(self): + resultStr = "" + for dev in result.keys(): + (THPFile, initFile) = SharedFuncs.getTHPandOSInitFile() + cmd = " echo %s >> /sys/block/%s/queue/logical_block_size" % ( + g_expectedScheduler, dev) + cmd += \ + " && echo \"echo %s >> " \ + "/sys/block/%s/queue/logical_block_size\" >> %s" % ( + g_expectedScheduler, dev, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr = "Failed to set logicalBlock Request.\n " \ + "Error : %s." % output + resultStr += "The cmd is %s " % cmd + self.result.val = resultStr diff --git a/script/gspylib/inspection/items/device/CheckMaxAsyIOrequests.py b/script/gspylib/inspection/items/device/CheckMaxAsyIOrequests.py new file mode 100644 index 0000000..f699750 --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckMaxAsyIOrequests.py @@ -0,0 +1,107 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 + + +class CheckMaxAsyIOrequests(BaseItem): + def __init__(self): + super(CheckMaxAsyIOrequests, self).__init__(self.__class__.__name__) + + def collectAsynchronousIORequest(self): + result = [] + request = g_file.readFile("/proc/sys/fs/aio-max-nr")[0] + result.append(request.strip()) + return result + + def getClusterInstancenum(self): + cnnum = 0 + dnnum = 0 + nodeInfo = self.cluster.getDbNodeByName(self.host) + for i in nodeInfo.coordinators: + if i.datadir != "": + cnnum += 1 + + for i in nodeInfo.datanodes: + if (i.instanceType == MASTER_INSTANCE): + dnnum += 1 + if (i.instanceType == STANDBY_INSTANCE): + dnnum += 1 + + return (dnnum + cnnum) * 1048576 + + def doCheck(self): + expectedScheduler = "104857600" + flag = True + resultStr = "" + instancenum = 0 + + if (self.cluster): + instancenum = self.getClusterInstancenum() + + data = self.collectAsynchronousIORequest() + + if len(data) == 0: + flag = False + resultStr += "Not find AsynchronousIORequest file.\n" + if (instancenum > expectedScheduler): + resultStr += "Asy IO requests must be greater than %s.\n" \ + % instancenum + else: + resultStr += "Asy IO requests must be greater than %s.\n" \ + % expectedScheduler + else: + for i in iter(data): + request = i + if (int(request) < int(instancenum) and int( + expectedScheduler) < int(instancenum)): + flag = False + resultStr += "Asy IO requests %s expectedScheduler " \ + "%s.\n" \ + % ( + request, instancenum) + elif (int(request) < int(expectedScheduler) and int( + instancenum) < int(expectedScheduler)): + flag = False + resultStr += "Asy IO requests %s expectedScheduler " \ + "%s.\n" \ + % ( + request, expectedScheduler) + else: + resultStr += "Asy IO requests is %s\n" % request + self.result.val = resultStr + if flag: + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.WARNING + + def doSet(self): + resultStr = "" + cmd = "echo 104857600 > /proc/sys/fs/aio-max-nr" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr = "Failed to set Asy IO requests.\nError : %s." % \ + output + "The cmd is %s " % cmd + if (len(resultStr) > 0): + self.result.val = resultStr + else: + self.result.val = "Set Asy IO requests successfully." diff --git a/script/gspylib/inspection/items/device/CheckSlowDisk.py b/script/gspylib/inspection/items/device/CheckSlowDisk.py new file mode 100644 index 0000000..7f45593 --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckSlowDisk.py @@ -0,0 +1,72 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import csv +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + + +class CheckSlowDisk(BaseItem): + def __init__(self): + super(CheckSlowDisk, self).__init__(self.__class__.__name__) + self.max = None + self.high = None + + def preCheck(self): + # check the threshold was set correctly + if (not self.threshold.__contains__('max') + or not self.threshold.__contains__('high')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "threshold") + if (not self.threshold['max'].isdigit() or + not self.threshold['high'].isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "The threshold max and high") + self.max = float(self.threshold['max']) + self.high = float(self.threshold['high']) + + def doCheck(self): + # Perform 60-pass disk data collection + dic = {} + slowDiskList = [] + cmd = "for varible1 in {1..30}; do iostat -d -x -k 1 1 " \ + "| grep -E -v \"Linux|Device\"|awk 'NF'" \ + "|awk '{print $1,$(NF-1)}'; " \ + "sleep 1;done" + output = SharedFuncs.runShellCmd(cmd) + for line in output.splitlines(): + diskname = line.split()[0] + svctmValue = line.split()[1] + if (diskname in dic.keys()): + diskList = dic[diskname] + diskList.append(float(svctmValue)) + dic[diskname] = diskList + else: + dic[diskname] = [float(svctmValue)] + for diskname, svctmValues in dic.items(): + diskList = sorted(svctmValues) + if (diskList[-1] > self.max and diskList[-10] > self.high): + slowDiskList.append(diskname) + if (slowDiskList): + self.result.rst = ResultStatus.NG + self.result.val = "Slow Disk Found:\n%s" % ( + "\n".join(slowDiskList)) + else: + self.result.rst = ResultStatus.OK + self.result.val = "No Slow Disk Found" diff --git a/script/gspylib/inspection/items/device/CheckSpaceUsage.py b/script/gspylib/inspection/items/device/CheckSpaceUsage.py new file mode 100644 index 0000000..b4286b4 --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckSpaceUsage.py @@ -0,0 +1,177 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +import psutil +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsdisk import g_disk + + +class CheckSpaceUsage(BaseItem): + def __init__(self): + super(CheckSpaceUsage, self).__init__(self.__class__.__name__) + self.diskVailPGHOST = None + self.diskVailGPHOME = None + self.diskVailGAUSSHOME = None + self.diskVailGAUSSLOG = None + self.diskVailOS_TMP = None + self.diskVailDATA = None + self.Threshold_NG = None + self.Threshold_Warning = None + + def preCheck(self): + # check current node contains cn instances if not raise exception + super(CheckSpaceUsage, self).preCheck() + # check the threshold was set correctly + if (not self.threshold.__contains__('DiskVailPGHOST')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold DiskVailPGHOST") + if (not self.threshold.__contains__('DiskVailGPHOME')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold DiskVailGPHOME") + if (not self.threshold.__contains__('DiskVailGAUSSHOME')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold DiskVailGAUSSHOME") + if (not self.threshold.__contains__('DiskVailGAUSSLOG')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold DiskVailGAUSSLOG") + if (not self.threshold.__contains__('DiskVailOS_TMP')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold DiskVailOS_TMP") + if (not self.threshold.__contains__('DiskVailDATA')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold DiskVailDATA") + + if (not (self.threshold.__contains__('Threshold_NG') and + self.threshold.__contains__('Threshold_Warning'))): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % + "The threshold Threshold_NG and Threshold_Warning") + if (not self.threshold['Threshold_NG'].isdigit() or + not self.threshold['Threshold_Warning'].isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % + "The threshold Threshold_NG and Threshold_Warning") + self.Threshold_NG = int(self.threshold['Threshold_NG']) + self.Threshold_Warning = int(self.threshold['Threshold_Warning']) + if (self.Threshold_NG < self.Threshold_Warning): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53015"] + % (self.Threshold_NG, self.Threshold_Warning)) + if (self.Threshold_NG > 99 or self.Threshold_Warning < 1): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53016"] + % (self.Threshold_NG, self.Threshold_Warning)) + + self.diskVailPGHOST = self.threshold['DiskVailPGHOST'] + self.diskVailGPHOME = self.threshold['DiskVailGPHOME'] + self.diskVailGAUSSHOME = self.threshold['DiskVailGAUSSHOME'] + self.diskVailGAUSSLOG = self.threshold['DiskVailGAUSSLOG'] + self.diskVailOS_TMP = self.threshold['DiskVailOS_TMP'] + self.diskVailDATA = self.threshold['DiskVailDATA'] + + def obtainDataDir(self, nodeInfo): + dataDirList = {} + dataDirList[DefaultValue.getEnv("PGHOST")] = ["PGHOST", + self.diskVailPGHOST] + dataDirList[DefaultValue.getEnv("GPHOME")] = ["GPHOME", + self.diskVailGPHOME] + dataDirList[DefaultValue.getEnv("GAUSSHOME")] = \ + ["GAUSSHOME", self.diskVailGAUSSHOME] + dataDirList[DefaultValue.getEnv("GAUSSLOG")] = ["GAUSSLOG", + self.diskVailGAUSSLOG] + dataDirList["/tmp"] = ["OS_TMP", self.diskVailOS_TMP] + for inst in nodeInfo.datanodes: + dataDirList[inst.datadir] = ["DN", self.diskVailDATA] + + return dataDirList + + def obtainDiskDir(self): + cmd = "df -h -P | awk '{print $NF}'" + output = SharedFuncs.runShellCmd(cmd) + allDiskPath = output.split('\n')[1:] + return allDiskPath + + def doCheck(self): + flag = "Normal" + resultStr = "" + DiskList = [] + DiskInfoDict = {} + pathDisk = {} + if (self.cluster): + pathDisk = self.obtainDataDir( + self.cluster.getDbNodeByName(self.host)) + pathList = pathDisk.keys() + else: + pathList = self.obtainDiskDir() + for path in pathList: + diskName = g_disk.getMountPathByDataDir(path) + usageInfo = g_disk.getDiskSpaceUsage(path) + diskInfo = "%s %s%%" % (diskName, usageInfo) + if (diskName in DiskList): + continue + else: + DiskList.append(diskName) + DiskInfoDict[usageInfo] = diskInfo + rateNum = usageInfo + if (rateNum > self.Threshold_NG): + resultStr += \ + "The usage of the device disk space[%s:%d%%] " \ + "cannot be greater than %d%%.\n" % ( + diskName, rateNum, self.Threshold_NG) + flag = "Error" + elif (rateNum > self.Threshold_Warning): + resultStr += \ + "The usage of the device disk space[%s:%d%%] " \ + "cannot be greater than %d%%.\n" % ( + diskName, rateNum, self.Threshold_Warning) + if (flag == "Normal"): + flag = "Warning" + + if (pathDisk): + if (diskInfo): + AvailableSpace = psutil.disk_usage( + path).free // 1024 // 1024 // 1024 + minSpace_KB = float(pathDisk[path][1]) + if (AvailableSpace < minSpace_KB): + resultStr += \ + "The %s path [%s] where" \ + " the disk available space[%.1fGB] is less than" \ + " %.1fGB.\n" % ( + pathDisk[path][0], path, AvailableSpace, + minSpace_KB) + flag = "Error" + else: + raise Exception(ErrorCode.GAUSS_504["GAUSS_50413"] + + "Error:\n%s" % diskInfo) + self.result.val = resultStr + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + self.result.val = "All disk space are sufficient.\n" + elif (flag == "Warning"): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.NG + + keys = DiskInfoDict.keys() + sortedKeys = sorted(keys) + MaxDisk = list(map(DiskInfoDict.get, sortedKeys))[-1] + MinDisk = list(map(DiskInfoDict.get, sortedKeys))[0] + self.result.val += "\nDisk Filesystem spaceUsage\nMax " \ + "free %s\nMin free %s" % (MaxDisk, MinDisk) + for diskInfo in list(map(DiskInfoDict.get, sortedKeys)): + self.result.raw += "\n%s" % diskInfo diff --git a/script/gspylib/inspection/items/device/CheckSwapMemory.py b/script/gspylib/inspection/items/device/CheckSwapMemory.py new file mode 100644 index 0000000..ac72319 --- /dev/null +++ b/script/gspylib/inspection/items/device/CheckSwapMemory.py @@ -0,0 +1,82 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import subprocess +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + + +class CheckSwapMemory(BaseItem): + def __init__(self): + super(CheckSwapMemory, self).__init__(self.__class__.__name__) + + def doCheck(self): + MemSwap = 0 + self.result.raw = "" + result_swap = g_file.readFile('/proc/meminfo', "SwapTotal")[0] + self.result.raw += result_swap + swapInfo = result_swap.strip().split(' ') + val = int(swapInfo[len(swapInfo) - 2]) + factor = swapInfo[len(swapInfo) - 1] + if factor == 'kB': + MemSwap = val * 1024 + elif (factor == ''): + MemSwap = val + + result_mem = g_file.readFile('/proc/meminfo', "MemTotal")[0] + self.result.raw += "\n%s" % result_mem + memInfo = result_mem.strip().split() + val = int(memInfo[len(memInfo) - 2]) + factor = memInfo[len(memInfo) - 1] + if factor == 'kB': + MemTotal = val * 1024 + elif (factor == ''): + MemTotal = val + + if (MemSwap > MemTotal): + self.result.rst = ResultStatus.NG + self.result.val = "SwapMemory(%d) must be 0.\nMemTotal: %d." % ( + MemSwap, MemTotal) + elif (MemSwap != 0): + self.result.rst = ResultStatus.WARNING + self.result.val = "SwapMemory(%d) must be 0.\nMemTotal: %d." % ( + MemSwap, MemTotal) + else: + self.result.rst = ResultStatus.OK + self.result.val = "SwapMemory %d\nMemTotal %d." % ( + MemSwap, MemTotal) + + def doSet(self): + resultStr = "" + configFile = "/etc/fstab" + cmd = "swapoff -a" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Failed to close swap information.\n Error : %s." \ + % output + resultStr += "The cmd is %s " % cmd + cmd = "sed -i '/^.*swap/d' %s" % configFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Failed to modify configuration file.\n Error : %s." \ + % output + resultStr += "The cmd is %s " % cmd + if (len(resultStr) > 0): + self.result.val = resultStr + else: + self.result.val = "Set SwapMemory successfully." diff --git a/script/gspylib/inspection/items/network/CheckBond.py b/script/gspylib/inspection/items/network/CheckBond.py new file mode 100644 index 0000000..66c9013 --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckBond.py @@ -0,0 +1,83 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +import platform +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file +from gspylib.os.gsnetwork import g_network +from gspylib.os.gsfile import g_Platform + +networkCards = [] + + +class CheckBond(BaseItem): + def __init__(self): + super(CheckBond, self).__init__(self.__class__.__name__) + + def doCheck(self): + global networkCards + if (self.cluster): + # Get node information + LocalNodeInfo = self.cluster.getDbNodeByName(self.host) + # Get the IP address + serviceIP = LocalNodeInfo.backIps[0] + elif (self.ipAddr): + serviceIP = self.ipAddr + else: + serviceIP = SharedFuncs.getIpByHostName(self.host) + networkCards = g_network.getAllNetworkInfo() + for network in networkCards: + if (network.ipAddress == serviceIP): + networkCardNum = network.NICNum + netBondMode = network.networkBondModeInfo + break + + self.result.val = netBondMode + self.result.rst = ResultStatus.OK + self.result.raw = "%s\n%s\n" % (networkCardNum, netBondMode) + + bondFile = '/proc/net/bonding/%s' % networkCardNum + if (os.path.exists(bondFile)): + self.result.raw += bondFile + flag1 = g_file.readFile(bondFile, 'BONDING_OPTS') + flag2 = g_file.readFile(bondFile, 'BONDING_MODULE_OPTS') + if (not flag1 and not flag2): + self.result.rst = ResultStatus.NG + self.result.val += "\nNo 'BONDING_OPTS' or" \ + " 'BONDING_MODULE_OPTS' in bond" \ + " config file[%s]." % bondFile + + def doSet(self): + ifcfgFileSuse = "/etc/sysconfig/network/ifcfg-%s" % networkCards + ifcfgFileRedhat = "/etc/sysconfig/network-scripts/ifcfg-%s" \ + % networkCards + distname, version, idnum = g_Platform.dist() + if (distname in ["redhat", "centos", "euleros", "openEuler"]): + cmd = "echo BONDING_MODULE_OPTS='mode=%d " \ + "miimon=100 use_carrier=0' >> %s " % (1, ifcfgFileRedhat) + else: + cmd = "echo BONDING_MODULE_OPTS='mode=%d miimon=100" \ + " use_carrier=0' >> %s" % (1, ifcfgFileSuse) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val = "Failed to set bond mode.\n" + \ + "The cmd is %s " % cmd + else: + self.result.val = "set bond mode successfully.\n" diff --git a/script/gspylib/inspection/items/network/CheckMTU.py b/script/gspylib/inspection/items/network/CheckMTU.py new file mode 100644 index 0000000..8f57549 --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckMTU.py @@ -0,0 +1,95 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsnetwork import g_network + +networkCardNum = "" + + +class CheckMTU(BaseItem): + def __init__(self): + super(CheckMTU, self).__init__(self.__class__.__name__) + self.expectMTU1 = None + self.expectMTU2 = None + + def preCheck(self): + # check current node contains cn instances if not raise exception + super(CheckMTU, self).preCheck() + # check the threshold was set correctly + if (not self.threshold.__contains__('expectMTU1')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold expectMTU1") + self.expectMTU1 = self.threshold['expectMTU1'] + if (not self.threshold.__contains__('expectMTU2')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold expectMTU2") + self.expectMTU2 = self.threshold['expectMTU2'] + + def doCheck(self): + global networkCardNum + if self.cluster: + # Get node information + LocalNodeInfo = self.cluster.getDbNodeByName(self.host) + # Get the IP address + backIP = LocalNodeInfo.backIps[0] + else: + backIP = SharedFuncs.getIpByHostName(self.host) + # Get the network card number + networkCards = g_network.getAllNetworkInfo() + for network in networkCards: + if network.ipAddress == backIP: + networkCardNum = network.NICNum + networkMTU = network.MTUValue + break + if not networkCardNum or not networkMTU: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50619"]) + # Check the mtu value obtained is not a number + if not str(networkMTU).isdigit(): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50612"] + % (networkCardNum + " " + "MTU")) + + self.result.val = str(networkMTU) + # Compare the acquired MTU with the threshold + if (int(networkMTU) != int(self.expectMTU1) and int( + networkMTU) != int(self.expectMTU2)): + self.result.rst = ResultStatus.WARNING + self.result.raw = "Warning MTU value[%s]: RealValue '%s' " \ + "ExpectedValue '%s' or '%s'.\n" \ + % (networkCardNum, int(networkMTU), + self.expectMTU1, self.expectMTU2) + else: + self.result.rst = ResultStatus.OK + self.result.raw = "[%s]MTU: %s" \ + % (networkCardNum, str(networkMTU)) + + def doSet(self): + resultStr = "" + (THPFile, initFile) = SharedFuncs.getTHPandOSInitFile() + cmd = "ifconfig %s mtu 1500;" % networkCardNum + cmd += "echo ifconfig %s mtu 1500 >> %s" % (networkCardNum, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Set MTU Failed.Error : %s." % output + resultStr += "The cmd is %s " % cmd + if (len(resultStr) > 0): + self.result.val = resultStr + else: + self.result.val = "Set MTU successfully." diff --git a/script/gspylib/inspection/items/network/CheckMultiQueue.py b/script/gspylib/inspection/items/network/CheckMultiQueue.py new file mode 100644 index 0000000..58d5e4b --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckMultiQueue.py @@ -0,0 +1,226 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue +from gspylib.os.gsnetwork import g_network +from gspylib.os.gsfile import g_file +from gspylib.os.gsplatform import findCmdInPath + +needRepairNetworkCardNum = [] +networkCardNums = [] +netWorkLevel = 10000 + + +class CheckMultiQueue(BaseItem): + def __init__(self): + super(CheckMultiQueue, self).__init__(self.__class__.__name__) + + def doCheck(self): + global needRepairNetworkCardNum + global networkCardNums + flag = "Normal" + + self.result.val = "" + self.result.raw = "" + if self.cluster: + LocalNodeInfo = self.cluster.getDbNodeByName(self.host) + backIP = LocalNodeInfo.backIps[0] + elif self.ipAddr: + backIP = self.ipAddr + else: + backIP = SharedFuncs.getIpByHostName(self.host) + # Get the network card number + allNetworkInfo = g_network.getAllNetworkInfo() + for network in allNetworkInfo: + if network.ipAddress == backIP: + networkNum = network.NICNum + BondMode = network.networkBondModeInfo + confFile = network.networkConfigFile + break + + if not networkNum or not BondMode or not confFile: + if DefaultValue.checkDockerEnv(): + return + raise Exception(ErrorCode.GAUSS_506["GAUSS_50619"]) + if BondMode != "BondMode Null": + bondFile = '/proc/net/bonding/%s' % networkNum + bondInfoList = g_file.readFile(bondFile, "Slave Interface") + for bondInfo in bondInfoList: + networkNum = bondInfo.split(':')[-1].strip() + networkCardNums.append(networkNum) + else: + networkCardNums.append(networkNum) + + for networkCardNum in networkCardNums: + cmdGetSpeedStr = "/sbin/ethtool %s | grep 'Speed:'" \ + % networkCardNum + (status, output) = subprocess.getstatusoutput(cmdGetSpeedStr) + if len(output.split('\n')) > 1: + for line in output.split('\n'): + if line.find("Speed:") >= 0: + output = line + break + if output.find("Speed:") >= 0 and output.find("Mb/s") >= 0: + netLevel = int(output.split(':')[1].strip()[:-4]) + if netLevel >= int(netWorkLevel): + cmd = "for i in `cat /proc/interrupts | grep '%s-' |" \ + " awk -F ' ' '{print $1}' | " \ + "awk -F ':' '{print $1}'`; " \ + "do cat /proc/irq/$i/smp_affinity ; done" \ + % networkCardNum + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.result.val += "Failed to obtain network card" \ + " [%s] interrupt value. " \ + "Commands for getting interrupt" \ + " value: %s.\n" % (networkCardNum, + cmd) + if networkCardNum not in needRepairNetworkCardNum: + needRepairNetworkCardNum.append(networkCardNum) + flag = "Error" + continue + + # cpu core number followed by 1 2 4 8,every 4 left shift 1 + Mapping = {0: "1", 1: "2", 2: "4", 3: "8"} + for index, eachLine in enumerate(output.split()): + # Remove the ',' + eachLine = eachLine.replace(",", "") + # Replace 0000,00001000 to 1,Remove invalid content + validValue = eachLine.replace("0", "") + # Convert the row index to the expected value + expandNum = Mapping[index % 4] + # Convert line index to expected position + expandBit = index / 4 * -1 - 1 + # value and position is correct + if (len(eachLine) * -1) > expandBit: + self.result.val += "Network card [%s] " \ + "multi-queue support is not" \ + " enabled.\n" % networkCardNum + flag = "Error" + break + if (eachLine[expandBit] == expandNum and + validValue == expandNum): + continue + else: + self.result.val += "Network card [%s] " \ + "multi-queue support is " \ + "not enabled.\n" \ + % networkCardNum + if (networkCardNum not in + needRepairNetworkCardNum): + needRepairNetworkCardNum.append( + networkCardNum) + flag = "Error" + break + + self.result.raw += "%s: \n %s \n" \ + % (networkCardNum, output) + else: + self.result.val += "Warning: The speed of current card" \ + " \"%s\" is less than %s Mb/s.\n" \ + % (networkCardNum, netWorkLevel) + else: + if output.find("Speed:") >= 0: + if (networkCardNum not in + needRepairNetworkCardNum): + needRepairNetworkCardNum.append(networkCardNum) + flag = "Error" + self.result.val += "Failed to obtain the network card" \ + " [%s] speed value. Maybe the network" \ + " card is not working.\n" \ + % networkCardNum + else: + self.result.val += "Failed to obtain the network" \ + " card [%s] speed value. Commands" \ + " for obtain the network card speed:" \ + " %s. Error:\n%s\n" \ + % (networkCardNum, cmdGetSpeedStr, + output) + if flag == "Normal": + self.result.rst = ResultStatus.OK + elif flag == "Warning": + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.NG + + def doSet(self): + self.result.val = "" + cmd = "ps ax | grep -v grep | grep -q irqbalance; echo $?" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 and output.strip() == "0": + subprocess.getstatusoutput("%s irqbalance" % + findCmdInPath("killall")) + for networkCardNum in networkCardNums: + cmd = "cat /proc/interrupts | grep '%s-' | wc -l" % networkCardNum + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.result.val += " Failed to obtain network" \ + " card interrupt count numbers. " + if not str(output.strip()).isdigit(): + count = 0 + else: + count = int(output.strip()) + i = 0 + while i < count: + # the dev name type like this: + # eth1-1, eth1-rx-1, eth1-tx-1, eth1-TxRx-1 + # eth1-rx1, eth-tx1 in arm, get all network name interrupt + cmd_IRQ = "cat /proc/interrupts | grep '%s.*-' |" \ + " awk -F ' ' '{print $1}' | " \ + "awk -F ':' '{print $1}'|awk 'NR==%s'" \ + % (networkCardNum, str(i + 1)) + (status, output) = subprocess.getstatusoutput(cmd_IRQ) + if status != 0 or output.strip() == "": + self.result.val = "Failed to obtain network card" \ + " interrupt value. Commands for " \ + "getting interrupt value: %s." % cmd_IRQ + else: + IRQ = output.strip() + self.result.raw += "The network '%s' interrupt" \ + " configuration path:" \ + " /proc/irq/%s/smp_affinity." \ + % (networkCardNum, IRQ) + num = 2 ** i + # Under SuSE platform, when the length is greater than 8, + # the ',' must be used. + value = str(hex(num))[2:] + # Decimal 63 or more long number sending in L + if len(value) > 16 and value[-1] == 'L': + value = value[:-1] + result_value = '' + while len(value) > 8: + result_value = ",%s%s" % (value[-8:], result_value) + value = value[:-8] + result_value = "%s%s" % (value, result_value) + + cmd_set = "echo '%s'> /proc/irq/%s/smp_affinity" % ( + result_value, IRQ) + (status, output) = subprocess.getstatusoutput(cmd_set) + if status != 0: + self.result.val += "Failed to set network '%s' IRQ." \ + " Commands for setting: %s." \ + % (networkCardNum, cmd_set) + else: + self.result.val += "Set network card '%s' IRQ" \ + " to \"%s\"." % (networkCardNum, + result_value) + i += 1 diff --git a/script/gspylib/inspection/items/network/CheckNICModel.py b/script/gspylib/inspection/items/network/CheckNICModel.py new file mode 100644 index 0000000..e617128 --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckNICModel.py @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +import platform +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckNICModel(BaseItem): + def __init__(self): + super(CheckNICModel, self).__init__(self.__class__.__name__) + + def doCheck(self): + if (self.cluster): + LocalNodeInfo = self.cluster.getDbNodeByName(self.host) + backIP = LocalNodeInfo.backIps[0] + elif (self.ipAddr): + backIP = self.ipAddr + else: + backIP = SharedFuncs.getIpByHostName(self.host) + networkCardNumList = SharedFuncs.CheckNetWorkBonding(backIP) + if networkCardNumList == "Shell command faild": + return + networkCardNums = [] + if (len(networkCardNumList) != 1): + networkCardNums = networkCardNumList[1:] + else: + networkCardNums.append(networkCardNumList[0]) + flag = True + for networkCardNum in networkCardNums: + cmd = "/sbin/ethtool -i %s" % networkCardNum + output = SharedFuncs.runShellCmd(cmd) + self.result.raw += "[%s]\n%s\n" % (networkCardNum, output) + NICVer = "" + PCIAddr = "" + for eachLine in output.split("\n"): + if (eachLine.startswith("version:")): + NICVer = eachLine + if (eachLine.startswith('bus-info:')): + if (len(eachLine.split(':')) == 4): + PCIAddr = eachLine.split(':')[2] + ':' + \ + eachLine.split(':')[3] + if (NICVer): + self.result.val += "%s\n" % (NICVer) + else: + self.result.val += "Failed to get NIC %s 'version' info\n" \ + % networkCardNum + flag = False + if (PCIAddr): + cmd = "lspci |grep %s" % PCIAddr + (status, output) = subprocess.getstatusoutput(cmd) + self.result.raw += "%s\n" % (output) + if status == 0 and len(output.split(':')) >= 3: + modelInfo = ':'.join(output.split(':')[2:]).split('(')[0] + self.result.val += "model: %s\n" % (modelInfo.strip()) + else: + self.result.val += "Failed to get NIC %s model" \ + " 'bus-info' info\n" % networkCardNum + self.result.val += "The cmd is %s " % cmd + flag = False + else: + self.result.val += "Failed to get NIC %s model" \ + " 'bus-info' info\n" % networkCardNum + flag = False + + if (flag): + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/items/network/CheckNetSpeed.py b/script/gspylib/inspection/items/network/CheckNetSpeed.py new file mode 100644 index 0000000..e53f7a2 --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckNetSpeed.py @@ -0,0 +1,243 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import pwd +import subprocess +import _thread as thread +import time +import psutil +import multiprocessing +from multiprocessing.pool import ThreadPool +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsnetwork import g_network +from gspylib.common.ErrorCode import ErrorCode + +DEFAULT_PARALLEL_NUM = 12 +DEFAULT_LISTEN_PORT = 20000 +DEFINE_DELAY_WARNING = 1000 +DEFINE_SPEED_WARNING = 600000 +DEFINE_DROP_WARNING = 0.005 +g_lock = thread.allocate_lock() +MaxDelayFailFlag = None +errorMsg = [] +speedMsg = "" +serviceIP = [] + + +class CheckNetSpeed(BaseItem): + def __init__(self): + super(CheckNetSpeed, self).__init__(self.__class__.__name__) + + def makeIpList(self): + ip_list = [] + for hostname in self.nodes: + ip_list.append(SharedFuncs.getIpByHostName(hostname)) + + return ip_list + + def runServer(self, serIP): + base_listen_port = DEFAULT_LISTEN_PORT + path = self.context.basePath + + server_count = 0 + max_server = 10 + while server_count < max_server: + listen_port = base_listen_port + server_count + try: + p = subprocess.Popen([path + "/lib/checknetspeed/speed_test", + "recv", serIP, str(listen_port), "tcp"], + shell=False, + stdout=open('/dev/null', 'w')) + except Exception as e: + raise Exception("[GAUSS-52200] :speed_test RuntimeException") + server_count += 1 + + return + + def runClient(self, self_index, ipList): + base_listen_port = DEFAULT_LISTEN_PORT + max_server = 10 + group = self_index // max_server + path = self.context.basePath + port = base_listen_port + self_index % max_server + for ip in ipList: + index = ipList.index(ip) + if (index == self_index): + continue + if (index // max_server != group): + continue + try: + p = subprocess.Popen([path + "/lib/checknetspeed/speed_test", + "send", ip, str(port), "tcp"], + shell=False, + stdout=open('/dev/null', 'w')) + except Exception as e: + raise Exception("[GAUSS-52200] :speed_test RuntimeException") + + return + + def getCpuSet(self): + """ + get cpu set of current board + cat /proc/cpuinfo |grep processor + """ + # do this function to get the parallel number + cpuSet = multiprocessing.cpu_count() + if (cpuSet > 1): + return cpuSet + else: + return DEFAULT_PARALLEL_NUM + + def checkMaxDelay(self, ip): + global MaxDelayFailFlag + global errorMsg + global serviceIP + cmd = "ping -s 8192 -c 10 -i 0.3 %s|awk -F / '{print $7}'|" \ + "awk '{print $1}'" % ip + output = SharedFuncs.runShellCmd(cmd) + if (output.strip() != ""): + try: + max_delay = float(output.strip()) + except Exception as e: + errorMsg.append(output.strip()) + return errorMsg + else: + MaxDelayFailFlag = True + return + if (max_delay > DEFINE_DELAY_WARNING): + g_lock.acquire() + string = "%s ping %s max delay is %.3fms" % ( + serviceIP, ip, max_delay) + errorMsg.append(string) + g_lock.release() + + return errorMsg + + def checkSar(self, ethName): + global errorMsg + global serviceIP + global speedMsg + cmd = "sar -n DEV 1 10|grep %s|grep Average|awk '{print $6}'" \ + % ethName + output = SharedFuncs.runShellCmd(cmd) + if (output.strip() != ""): + try: + average = float(output.strip()) + except Exception as e: + errorMsg.append(output.strip()) + return errorMsg + else: + errorMsg.append( + "get %s RX average failed. commands: %s" % (serviceIP, cmd)) + return errorMsg + + string = "%s RX average is %dkB/s" % (serviceIP, average) + if (average < DEFINE_SPEED_WARNING): + g_lock.acquire() + errorMsg.append(string) + g_lock.release() + else: + speedMsg = string + return errorMsg + + def checkDrop(self, ethName, before_recv, before_drop): + global errorMsg + global serviceIP + try: + after_recv = psutil.net_io_counters(True)[ethName].packets_recv + after_drop = psutil.net_io_counters(True)[ethName].dropin + except Exception as e: + self.doClean() + self.result.rst = ResultStatus.NG + self.result.val = "get %s RX drop percentage failed." % ethName + raise Exception(ErrorCode.GAUSS_506["GAUSS_50620"]) + self.doClean() + if (after_drop == before_drop): + return + + percentage = (after_drop - before_drop) / (after_recv - before_recv) + if (percentage > DEFINE_DROP_WARNING): + g_lock.acquire() + string = "%s RX droped percentage is %.4f" % ( + serviceIP, percentage * 100) + errorMsg.append(string) + g_lock.release() + return errorMsg + + def doClean(self): + currentUser = pwd.getpwuid(os.getuid())[0] + while True: + cmd = "ps -ef|grep speed_test|grep %s|grep -v grep|" \ + "awk '{print $2}'|xargs kill -9" % currentUser + (status, _) = subprocess.getstatusoutput(cmd) + if (status == 0): + break + time.sleep(1) + return + + def doCheck(self): + global errorMsg + global serviceIP + global MaxDelayFailFlag + network_card_num = "" + serviceIP = SharedFuncs.getIpByHostName(self.host) + for network in g_network.getAllNetworkInfo(): + if (network.ipAddress == serviceIP): + network_card_num = network.NICNum + break + if (not network_card_num): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50619"]) + + ethName = network_card_num + ipList = self.makeIpList() + + index = ipList.index(serviceIP) + + self.runServer(serviceIP) + self.runClient(index, ipList) + try: + before_recv = psutil.net_io_counters(True)[ethName].packets_recv + before_drop = psutil.net_io_counters(True)[ethName].dropin + except Exception as e: + self.doClean() + self.result.rst = ResultStatus.NG + self.result.val = "get %s RX drop percentage failed." % ethName + raise Exception(ErrorCode.GAUSS_506["GAUSS_50621"] + + "Error: %s" % str(e)) + + time.sleep(10) + MaxDelayMsg = "Failde to get max delay." + MaxDelayFailFlag = False + pool = ThreadPool(self.getCpuSet()) + results = pool.map(self.checkMaxDelay, ipList) + pool.close() + pool.join() + + if MaxDelayFailFlag: + errorMsg.append(MaxDelayMsg) + self.checkSar(ethName) + self.checkDrop(ethName, before_recv, before_drop) + + if errorMsg == []: + self.result.rst = ResultStatus.OK + self.result.val = "Check passed.\n%s" % speedMsg + else: + self.result.rst = ResultStatus.WARNING + self.result.val = "Check not passed:\n" + "\n".join( + errorMsg) + "\n%s" % speedMsg diff --git a/script/gspylib/inspection/items/network/CheckNetWorkDrop.py b/script/gspylib/inspection/items/network/CheckNetWorkDrop.py new file mode 100644 index 0000000..94db175 --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckNetWorkDrop.py @@ -0,0 +1,162 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import time +import platform +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsplatform import g_Platform + + +class CheckNetWorkDrop(BaseItem): + def __init__(self): + super(CheckNetWorkDrop, self).__init__(self.__class__.__name__) + + def doCheck(self): + """ + function: Check NetWork care package drop rate in 1 minute + """ + ipMap = {} + netWorkInfo = {} + distname, version, idnum = g_Platform.dist() + for nodeInfo in self.cluster.dbNodes: + ipMap[nodeInfo.sshIps[0]] = nodeInfo.backIps[0] + for sshIp in ipMap.keys(): + backIp = ipMap[sshIp] + # get remote IP network care number + if (g_Platform.isPlatFormEulerOSOrRHEL7X()): + if (os.getuid() == 0): + cmd = """pssh -s -P -H %s \\"/sbin/ifconfig\\"| + grep -B 5 \\"%s\\"|grep \\"RUNNING\\" """ \ + % (sshIp, backIp) + else: + cmd = """pssh -s -P -H %s "/sbin/ifconfig"| + grep -B 5 "%s"|grep "RUNNING" """ % (sshIp, backIp) + else: + if (os.getuid() == 0): + cmd = """pssh -s -P -H %s \\"/sbin/ifconfig\\"| + grep -B 5 \\"%s\\"|grep \\"Link encap\\" """ \ + % (sshIp, backIp) + else: + cmd = """pssh -s -P -H %s "/sbin/ifconfig"| + grep -B 5 "%s"|grep "Link encap" """ \ + % (sshIp, backIp) + output = SharedFuncs.runShellCmd(cmd, self.user) + if (g_Platform.isPlatFormEulerOSOrRHEL7X()): + NetWorkNum = output.split('\n')[-1].split()[0].split(':')[0] + else: + NetWorkNum = output.split('\n')[-1].strip().split()[0] + + if (g_Platform.isPlatFormEulerOSOrRHEL7X()): + if (os.getuid() == 0): + packageCmd1 = """pssh -s -P -H %s \\"/sbin/ifconfig %s| + grep packets|grep RX\\" """ % (sshIp, NetWorkNum) + else: + packageCmd1 = """pssh -s -P -H %s "/sbin/ifconfig %s| + grep packets|grep RX" """ % (sshIp, NetWorkNum) + else: + if (os.getuid() == 0): + packageCmd1 = """pssh -s -P -H %s \\"/sbin/ifconfig %s| + grep dropped|grep RX\\" """ % (sshIp, NetWorkNum) + else: + packageCmd1 = """pssh -s -P -H %s "/sbin/ifconfig %s| + grep dropped|grep RX" """ % (sshIp, NetWorkNum) + output = SharedFuncs.runShellCmd(packageCmd1, self.user) + if (g_Platform.isPlatFormEulerOSOrRHEL7X()): + package_begin = output.split('\n')[-1].strip().split()[2] + else: + package_begin = \ + output.split('\n')[-1].split(":")[1].strip().split()[0] + + if (os.getuid() == 0): + dropCmd1 = """pssh -s -P -H %s \\"/sbin/ifconfig %s| + grep dropped|grep RX\\" """ % (sshIp, NetWorkNum) + else: + dropCmd1 = """pssh -s -P -H %s "/sbin/ifconfig %s| + grep dropped|grep RX" """ % (sshIp, NetWorkNum) + output = SharedFuncs.runShellCmd(dropCmd1, self.user) + if (g_Platform.isPlatFormEulerOSOrRHEL7X()): + drop_begin = output.split('\n')[-1].strip().split()[4] + else: + drop_begin = \ + output.split('\n')[-1].split(":")[3].strip().split()[0] + netWorkInfo[backIp] = [NetWorkNum, package_begin, drop_begin, "", + ""] + time.sleep(60) + + for sshIp in ipMap.keys(): + backIp = ipMap[sshIp] + if (g_Platform.isPlatFormEulerOSOrRHEL7X()): + if (os.getuid() == 0): + packageCmd2 = """pssh -s -P -H %s \\"/sbin/ifconfig %s| + grep packets|grep RX\\" """ \ + % (sshIp, netWorkInfo[backIp][0]) + else: + packageCmd2 = """pssh -s -P -H %s "/sbin/ifconfig %s| + grep packets|grep RX" """ % (sshIp, + netWorkInfo[backIp][0]) + else: + if (os.getuid() == 0): + packageCmd2 = """pssh -s -P -H %s \\"/sbin/ifconfig %s| + grep dropped|grep RX\\" """ % (sshIp, + netWorkInfo[backIp][0]) + else: + packageCmd2 = """pssh -s -P -H %s "/sbin/ifconfig %s| + grep dropped|grep RX" """ % (sshIp, + netWorkInfo[backIp][0]) + output = SharedFuncs.runShellCmd(packageCmd2, self.user) + if (g_Platform.isPlatFormEulerOSOrRHEL7X()): + package_end = output.split('\n')[-1].strip().split()[2] + else: + package_end = \ + output.split('\n')[-1].split(":")[1].strip().split()[0] + if (os.getuid() == 0): + dropCmd2 = """pssh -s -P -H %s \\"/sbin/ifconfig %s| + grep dropped|grep RX\\" """ % (sshIp, netWorkInfo[backIp][0]) + else: + dropCmd2 = """pssh -s -P -H %s "/sbin/ifconfig %s| + grep dropped|grep RX" """ % (sshIp, netWorkInfo[backIp][0]) + output = SharedFuncs.runShellCmd(dropCmd2, self.user) + if (g_Platform.isPlatFormEulerOSOrRHEL7X()): + drop_end = output.split('\n')[-1].strip().split()[4] + else: + drop_end = \ + output.split('\n')[-1].split(":")[3].strip().split()[0] + netWorkInfo[backIp][3] = package_end + netWorkInfo[backIp][4] = drop_end + + flag = True + self.result.raw = "" + resultStr = "" + for ip in netWorkInfo.keys(): + packageSum = int(netWorkInfo[ip][3]) - int(netWorkInfo[ip][1]) + dropSum = int(netWorkInfo[ip][4]) - int(netWorkInfo[ip][2]) + dropRate = float(dropSum) / packageSum + if (dropRate > 0.01): + flag = False + resultStr += "\nAddress %s %s communication packet loss" \ + " rate of %.2f%%, more than 1%%." \ + % (ip, netWorkInfo[ip][0], dropRate * 100) + self.result.raw += "\n %s %s %s %s %.2f%%" % ( + ip, netWorkInfo[ip][0], dropSum, packageSum, dropRate * 100) + if flag: + self.result.rst = ResultStatus.OK + self.result.val = "All IP communications are stable." + else: + self.result.rst = ResultStatus.NG + self.result.val = resultStr diff --git a/script/gspylib/inspection/items/network/CheckNoCheckSum.py b/script/gspylib/inspection/items/network/CheckNoCheckSum.py new file mode 100644 index 0000000..64d0e52 --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckNoCheckSum.py @@ -0,0 +1,71 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import platform +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file +from gspylib.os.gsnetwork import g_network +from gspylib.os.gsfile import g_Platform +from gspylib.common.ErrorCode import ErrorCode + + +class CheckNoCheckSum(BaseItem): + def __init__(self): + super(CheckNoCheckSum, self).__init__(self.__class__.__name__) + + def getOSversion(self): + distname, version, idnum = g_Platform.dist() + return distname, version + + def doCheck(self): + if (not os.path.isfile("/sys/module/sctp/parameters/no_checksums")): + self.result.rst = ResultStatus.OK + self.result.val = "The SCTP service is not used and the" \ + " check item is skipped" + return + expect = "N" + if (self.cluster): + LocalNodeInfo = self.cluster.getDbNodeByName(self.host) + serviceIP = LocalNodeInfo.backIps[0] + else: + serviceIP = SharedFuncs.getIpByHostName(self.host) + for network in g_network.getAllNetworkInfo(): + if (network.ipAddress == serviceIP): + networkCardNum = network.NICNum + networkBond = network.networkBondModeInfo + break + if (not networkCardNum or not networkBond): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50619"]) + (distname, version) = self.getOSversion() + if ((distname in ("redhat", "centos")) and + (version in ("6.4", "6.5")) and + networkBond != "BondMode Null"): + expect = "Y" + + output = \ + g_file.readFile('/sys/module/sctp/parameters/no_checksums')[0] + if (output.strip() == expect): + self.result.rst = ResultStatus.OK + self.result.val = "Nochecksum value is %s,Check items pass." \ + % output.strip() + else: + self.result.rst = ResultStatus.NG + self.result.val = "Nochecksum value(%s) is not %s," \ + "Check items are not passed." \ + % (output.strip(), expect) diff --git a/script/gspylib/inspection/items/network/CheckPing.py b/script/gspylib/inspection/items/network/CheckPing.py new file mode 100644 index 0000000..17c5814 --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckPing.py @@ -0,0 +1,55 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +import multiprocessing +import _thread as thread +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.threads.parallelTool import parallelTool +from gspylib.os.gsnetwork import g_network + +DEFAULT_PARALLEL_NUM = 12 +g_lock = thread.allocate_lock() +noPassIPs = [] + + +class CheckPing(BaseItem): + def __init__(self): + super(CheckPing, self).__init__(self.__class__.__name__) + + def doCheck(self): + global noPassIPs + allIP = [] + LocalNodeInfo = self.cluster.getDbNodeByName(self.host) + allIP += LocalNodeInfo.backIps + allIP += LocalNodeInfo.sshIps + for dbInstance in LocalNodeInfo.datanodes: + allIP += dbInstance.haIps + allIP += dbInstance.listenIps + + sortedAllIP = sorted(allIP) + for i in range(len(sortedAllIP) - 2, -1, -1): + if sortedAllIP.count(sortedAllIP[i]) > 1: + del sortedAllIP[i] + noPassIPs = g_network.checkIpAddressList(sortedAllIP) + if noPassIPs == []: + self.result.rst = ResultStatus.OK + self.result.raw = "All IP can pinged." + else: + self.result.rst = ResultStatus.NG + self.result.raw = "The following IP can not pinged: \n%s" \ + % noPassIPs diff --git a/script/gspylib/inspection/items/network/CheckRXTX.py b/script/gspylib/inspection/items/network/CheckRXTX.py new file mode 100644 index 0000000..33da003 --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckRXTX.py @@ -0,0 +1,104 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsnetwork import g_network +from gspylib.os.gsfile import g_file +from gspylib.common.ErrorCode import ErrorCode + +EXPECTED_RXTX = 4096 + + +class CheckRXTX(BaseItem): + def __init__(self): + super(CheckRXTX, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = "Normal" + networkCardNums = [] + if (self.cluster): + LocalNodeInfo = self.cluster.getDbNodeByName(self.host) + backIP = LocalNodeInfo.backIps[0] + else: + backIP = SharedFuncs.getIpByHostName(self.host) + + allNetworkInfo = g_network.getAllNetworkInfo() + for network in allNetworkInfo: + if (network.ipAddress == backIP): + networkNum = network.NICNum + BondMode = network.networkBondModeInfo + confFile = network.networkConfigFile + break + + if (not networkNum or not BondMode or not confFile): + raise Exception(ErrorCode.GAUSS_506["GAUSS_50619"]) + if (BondMode != "BondMode Null"): + bondFile = '/proc/net/bonding/%s' % networkNum + bondInfoList = g_file.readFile(bondFile, "Slave Interface") + for bondInfo in bondInfoList: + networkNum = bondInfo.split(':')[-1].strip() + networkCardNums.append(networkNum) + else: + networkCardNums.append(networkNum) + + for networkCardNum in networkCardNums: + RXvalue = "" + TXvalue = "" + for network in allNetworkInfo: + if (network.NICNum == networkCardNum and + str(network.RXValue).strip() != "" and + str(network.TXValue).strip() != ""): + RXvalue = network.RXValue + TXvalue = network.TXValue + if (not RXvalue or not TXvalue): + flag = "Error" + self.result.val += "Failed to obtain network card [%s]" \ + " RX or TX value." % networkCardNum + continue + + if (int(RXvalue) < int(EXPECTED_RXTX)) or ( + int(TXvalue) < int(EXPECTED_RXTX)): + flag = "Error" + self.result.val += "NetWork[%s]\nRX: %s\nTX: %s\n" % ( + networkCardNum, RXvalue, RXvalue) + + self.result.raw = self.result.val + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG + + def doSet(self): + if (self.cluster): + LocalNodeInfo = self.cluster.getDbNodeByName(self.host) + backIP = LocalNodeInfo.backIps[0] + elif (self.ipAddr): + backIP = self.ipAddr + else: + backIP = SharedFuncs.getIpByHostName(self.host) + networkCardNumList = SharedFuncs.CheckNetWorkBonding(backIP) + if (len(networkCardNumList) != 1): + networkCardNums = networkCardNumList[1:] + else: + networkCardNums = networkCardNumList + for networkCardNum in networkCardNums: + cmd = "/sbin/ethtool -G %s %s %d" % ( + networkCardNum, "rx", EXPECTED_RXTX) + cmd += ";/sbin/ethtool -G %s %s %d" % ( + networkCardNum, "tx", EXPECTED_RXTX) + SharedFuncs.runShellCmd(cmd) diff --git a/script/gspylib/inspection/items/network/CheckRouting.py b/script/gspylib/inspection/items/network/CheckRouting.py new file mode 100644 index 0000000..98a3f6a --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckRouting.py @@ -0,0 +1,75 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +import platform +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsplatform import g_Platform + + +class CheckRouting(BaseItem): + def __init__(self): + super(CheckRouting, self).__init__(self.__class__.__name__) + + @staticmethod + def getBinaryAddr(ipAddr): + binaryStr = "" + for part in ipAddr.split('.'): + binaryStr += "%08d" % int(bin(int(part)).replace('0b', '')) + return binaryStr + + def getBinaryRouting(self, ipAndMask): + (ip, netMask) = ipAndMask.split(':') + ipBinary = self.getBinaryAddr(ip) + maskBinary = self.getBinaryAddr(netMask) + routingBinary = "" + if (not len(ipBinary) == len(maskBinary) == 32): + return "" + for bit in range(len(ipBinary)): + routingBinary += str(int(ipBinary[bit]) & int(maskBinary[bit])) + return routingBinary + + def doCheck(self): + ipList = [] + routingBinary = self.getBinaryRouting(self.routing) + if g_Platform.isPlatFormEulerOSOrRHEL7X(): + cmd = "/sbin/ifconfig -a |grep -E '\'| awk '{print $2}'" + else: + cmd = "/sbin/ifconfig -a |grep 'inet addr'|" \ + " awk '{print $2}'| awk -F ':' '{print $2}'" + output = SharedFuncs.runShellCmd(cmd) + for eachLine in output.split('\n'): + if (SharedFuncs.validate_ipv4(eachLine)): + maskAddr = SharedFuncs.getMaskByIP(eachLine) + ipMask = "%s:%s" % (eachLine, maskAddr) + ipList.append(ipMask) + self.result.raw = "Routing: %s [bit]%s\nlocalIP:\n%s" % ( + self.routing, routingBinary, "\n".join(ipList)) + + commIP = [] + for ipMask in ipList: + ipBinary = self.getBinaryRouting(ipMask) + if (ipBinary == routingBinary): + commIP.append(ipMask) + + if (len(commIP) > 1): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.OK + self.result.val = "Business network segment IP: " + ", ".join(commIP) diff --git a/script/gspylib/inspection/items/network/CheckUsedPort.py b/script/gspylib/inspection/items/network/CheckUsedPort.py new file mode 100644 index 0000000..8a635ed --- /dev/null +++ b/script/gspylib/inspection/items/network/CheckUsedPort.py @@ -0,0 +1,83 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + + +class CheckUsedPort(BaseItem): + def __init__(self): + super(CheckUsedPort, self).__init__(self.__class__.__name__) + + def getPortRange(self): + portRangeValue = \ + g_file.readFile('/proc/sys/net/ipv4/ip_local_port_range')[0] + (startPort, endPort) = portRangeValue.split() + portRange = int(endPort) - int(startPort) + + return portRange + + def getTcpUsedPort(self): + if (self.ipAddr): + serviceIP = self.ipAddr + else: + serviceIP = SharedFuncs.getIpByHostName(self.host) + + cmd = "netstat -ano|awk '{print $4}'|grep '%s'|sort|uniq -c|" \ + "grep ' 1 '|wc -l" % serviceIP + tcpUsed = SharedFuncs.runShellCmd(cmd) + + return int(tcpUsed) + + def getSctpUsedPort(self): + cmd = "cat /proc/net/sctp/assocs|" \ + "awk '{print $12}'|sort|uniq -c |wc -l" + sctpUsed = SharedFuncs.runShellCmd(cmd) + + return int(sctpUsed) + + def doCheck(self): + portRange = self.getPortRange() + tcpUsed = self.getTcpUsedPort() + sctpUsed = self.getSctpUsedPort() + defaultPortRange = 60000 - 32768 + if (portRange < defaultPortRange): + self.result.rst = ResultStatus.WARNING + self.result.val = "port range is %s,Check items are not passed." \ + % portRange + return + + if (tcpUsed > portRange * 0.8): + self.result.rst = ResultStatus.WARNING + self.result.val = "tcp port used is %s,Check items are" \ + " not passed." % tcpUsed + return + + if (sctpUsed > portRange * 0.8): + self.result.rst = ResultStatus.WARNING + self.result.val = "sctp port used is %s," \ + "Check items are not passed." % sctpUsed + return + + self.result.rst = ResultStatus.OK + self.result.val = "port range is %s,tcp port used is %s," \ + "sctp port used is %d,Check items pass." \ + % (portRange, tcpUsed, sctpUsed) + return diff --git a/script/gspylib/inspection/items/os/CheckBootItems.py b/script/gspylib/inspection/items/os/CheckBootItems.py new file mode 100644 index 0000000..86cf8e4 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckBootItems.py @@ -0,0 +1,47 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import platform +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckBootItems(BaseItem): + def __init__(self): + super(CheckBootItems, self).__init__(self.__class__.__name__) + + def doCheck(self): + self.result.rst = ResultStatus.OK + checkItems = ["checksum", "mtu", "cgroup", "rx", "tx"] + bootitem = [] + bootfile = "" + if SharedFuncs.isSupportSystemOs(): + bootfile = "/etc/rc.d/rc.local" + else: + bootfile = "/etc/init.d/boot.local" + for item in checkItems: + cmd = "grep -i %s %s" % (item, bootfile) + (status, output) = subprocess.getstatusoutput(cmd) + if (output): + bootitem.append(item) + self.result.rst = ResultStatus.NG + if (self.result.rst == ResultStatus.OK): + self.result.val = "no boot item added" + else: + self.result.val = "boot items is added:\n%s" % "\n".join(bootitem) diff --git a/script/gspylib/inspection/items/os/CheckCPU.py b/script/gspylib/inspection/items/os/CheckCPU.py new file mode 100644 index 0000000..f09e377 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckCPU.py @@ -0,0 +1,70 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + + +class CheckCPU(BaseItem): + def __init__(self): + super(CheckCPU, self).__init__(self.__class__.__name__) + self.idle = None + self.wio = None + self.standard = None + + def preCheck(self): + # check the threshold was set correctly + if (not "StandardCPUIdle" in self.threshold.keys() + or not "StandardWIO" in self.threshold.keys()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "threshold") + if (not self.threshold['StandardCPUIdle'].isdigit() or not + self.threshold['StandardWIO'].isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53014"] + % "The threshold StandardCPUIdle and StandardWIO") + self.idle = self.threshold['StandardCPUIdle'] + self.wio = self.threshold['StandardWIO'] + + # format the standard by threshold + self.standard = self.standard.decode('utf-8').format(idle=self.idle, + iowait=self.wio) + + def doCheck(self): + cmd = "sar 1 5 2>&1" + output = SharedFuncs.runShellCmd(cmd) + self.result.raw = output + # check the result with threshold + d = next(n.split() for n in output.splitlines() if "Average" in n) + iowait = d[-3] + idle = d[-1] + rst = ResultStatus.OK + vals = [] + if (float(iowait) > float(self.wio)): + rst = ResultStatus.NG + vals.append( + "The %s actual value %s %% is greater than " + "expected value %s %%" % ( + "IOWait", iowait, self.wio)) + if (float(idle) < float(self.idle)): + rst = ResultStatus.NG + vals.append( + "The %s actual value %s %% is less than " + "expected value %s %%" % ( + "Idle", idle, self.idle)) + self.result.rst = rst + if (vals): + self.result.val = "\n".join(vals) diff --git a/script/gspylib/inspection/items/os/CheckCpuCount.py b/script/gspylib/inspection/items/os/CheckCpuCount.py new file mode 100644 index 0000000..aed639f --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckCpuCount.py @@ -0,0 +1,58 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ----------------------------------------------------------------------------s +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gscpu import CpuInfo + + +class CheckCpuCount(BaseItem): + def __init__(self): + super(CheckCpuCount, self).__init__(self.__class__.__name__) + + def doCheck(self): + + parRes = "" + flag = "Normal" + cpuCount = CpuInfo.getCpuNum() + + output_online = CpuInfo.getCpuOnlineOfflineInfo() + num = len(output_online.split('-')) + firstValue = output_online.split('-')[0].strip() + lastValue = output_online.split('-')[1].strip() + + output_offline = CpuInfo.getCpuOnlineOfflineInfo(False) + + if (num != 2 or int(firstValue) != 0 or int( + lastValue) != cpuCount - 1): + flag = "Error" + parRes += "it exists unavailable CPU.\n " \ + "online: %s.\n offline: %s." % ( + output_online, output_offline) + if (output_offline.strip() != "" and flag == "Normal"): + flag = "Warning" + + if (flag == "Error"): + self.result.rst = ResultStatus.NG + elif (flag == "Warning"): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.OK + + self.result.val = "cpuCount: %d, online: %s, offline: %s." % ( + cpuCount, output_online, output_offline) + self.result.raw = parRes diff --git a/script/gspylib/inspection/items/os/CheckCrondService.py b/script/gspylib/inspection/items/os/CheckCrondService.py new file mode 100644 index 0000000..8dbd1a5 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckCrondService.py @@ -0,0 +1,49 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsservice import g_service + + +class CheckCrondService(BaseItem): + def __init__(self): + super(CheckCrondService, self).__init__(self.__class__.__name__) + + def doCheck(self): + (status, crondInfo) = g_service.manageOSService('crond', 'status') + self.result.raw = crondInfo + # Resolve and outputs the execution results of each node + if (status != 0 or crondInfo.find('running') < 0): + self.result.val = "There is no cron service." + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = "The cron service is normal." + + def doSet(self): + if SharedFuncs.isSupportSystemOs(): + cmd = "/sbin/service crond start" + else: + cmd = "/sbin/service cron start" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val = "Failed to started crond service. " \ + "Error: %s\n" % output + "The cmd is %s " % cmd + else: + self.result.val = "Successfully started the crond service.\n" diff --git a/script/gspylib/inspection/items/os/CheckCrontabLeft.py b/script/gspylib/inspection/items/os/CheckCrontabLeft.py new file mode 100644 index 0000000..5ef8d96 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckCrontabLeft.py @@ -0,0 +1,86 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsplatform import g_Platform +from gspylib.common.ErrorCode import ErrorCode + + +class CheckCrontabLeft(BaseItem): + def __init__(self): + super(CheckCrontabLeft, self).__init__(self.__class__.__name__) + self.crontabUser = None + + def preCheck(self): + super(CheckCrontabLeft, self).preCheck() + if not "crontabUser" in self.threshold.keys(): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "Threshold crontabUser") + self.crontabUser = self.threshold['crontabUser'] + + def doCheck(self): + parRes = "" + cmd = g_Platform.getAllCrontabCmd() + allCrontab = SharedFuncs.runShellCmd(cmd, self.user) + for crontabService in allCrontab.split('\n'): + if crontabService.find('om_monitor') >= 0: + parRes = "Gauss process om_monitor remains in crontab. " \ + "please delete this gauss info." + self.result.raw += "%s\n" % crontabService + if parRes: + self.result.rst = ResultStatus.NG + self.result.val = parRes + else: + self.result.rst = ResultStatus.OK + + def doSet(self): + if os.getuid == 0: + cmd = "crontab -l -u '%s'" % self.crontabUser + else: + cmd = "crontab -l" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or output.find('om_monitor') < 0: + self.result.val = "No gauss process in crontab.\n" + return + + tmpCrondFileName = "gauss_crond_tmp" + tmpCrondFile = os.path.join(self.tmpPath, tmpCrondFileName) + try: + SharedFuncs.createFile(tmpCrondFile, self.tmpPath) + SharedFuncs.writeFile(tmpCrondFile, output, self.tmpPath) + cmd = "sed -i '/om_monitor/d' %s" % tmpCrondFile + SharedFuncs.runShellCmd(cmd) + cmd = "crontab %s " % tmpCrondFile + if os.getuid == 0: + cmd = "su - %s '%s'" % (self.crontabUser, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.result.val = "Failed to cleaned om_monitor in crontab." \ + " Error: %s\n" % output + "The cmd is %s " \ + % cmd + else: + self.result.val = "Successfully to cleaned om_monitor " \ + "in crontab.\n" + SharedFuncs.cleanFile(tmpCrondFile) + except Exception as e: + if os.path.exists(tmpCrondFile): + SharedFuncs.cleanFile(tmpCrondFile) + raise Exception(str(e)) + diff --git a/script/gspylib/inspection/items/os/CheckDirLeft.py b/script/gspylib/inspection/items/os/CheckDirLeft.py new file mode 100644 index 0000000..a0fa394 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckDirLeft.py @@ -0,0 +1,87 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file +from gspylib.os.gsplatform import g_Platform +from gspylib.common.ErrorCode import ErrorCode + +rmDir = [] + + +class CheckDirLeft(BaseItem): + def __init__(self): + super(CheckDirLeft, self).__init__(self.__class__.__name__) + self.directoryList = None + + def preCheck(self): + # check current node contains cn instances if not raise exception + super(CheckDirLeft, self).preCheck() + # check the threshold was set correctly + if (not self.threshold.__contains__('directoryList')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "threshold directoryList") + self.directoryList = self.threshold['directoryList'].split(',') + + def doCheck(self): + global rmDir + parRes = "" + flag = 0 + mountDir = [] + mountList = g_file.readFile(g_Platform.getMtablFile()) + for line in mountList: + mountInfo = line.strip() + if (not mountInfo.startswith('#') and len(mountInfo.split()) > 5): + mountDir.append(mountInfo.split()[1]) + for dirName in self.directoryList: + if (os.path.exists(dirName)): + flagNumber = True + for mdir in mountDir: + if (len(mdir) >= len(dirName)): + if (mdir[0:len(dirName)] == dirName): + flagNumber = False + break + if (not flagNumber): + continue + + parRes += "\nThe directory of %s exists." % dirName + rmDir.append(dirName) + flag = 1 + + if (flag == 1): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = parRes + self.result.raw = "mount directory: %s" % mountDir + + def doSet(self): + errMsg = "" + for path in rmDir: + if (os.path.exists(path)): + cmd = "rm -rf %s" % path + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + errMsg += "Failed to delete %s.Error: %s\n" % \ + (path, output) + errMsg += "The cmd is %s " % cmd + if (errMsg): + self.result.val = errMsg + else: + self.result.val = "Successfully clean up file residues.\n" diff --git a/script/gspylib/inspection/items/os/CheckDropCache.py b/script/gspylib/inspection/items/os/CheckDropCache.py new file mode 100644 index 0000000..a5a6edb --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckDropCache.py @@ -0,0 +1,40 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckDropCache(BaseItem): + def __init__(self): + super(CheckDropCache, self).__init__(self.__class__.__name__) + + def doCheck(self): + checkdropCacheCmd = "ps -ef| grep 'dropc'|grep -v 'grep'" + (status, output) = subprocess.getstatusoutput(checkdropCacheCmd) + if (status == 0): + if (output): + self.result.rst = ResultStatus.OK + self.result.val = "The DropCache process is running" + else: + self.result.rst = ResultStatus.WARNING + self.result.val = "No DropCache process is running" + else: + self.result.rst = ResultStatus.WARNING + self.result.val = "No DropCache process is running" diff --git a/script/gspylib/inspection/items/os/CheckEncoding.py b/script/gspylib/inspection/items/os/CheckEncoding.py new file mode 100644 index 0000000..045a51f --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckEncoding.py @@ -0,0 +1,35 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckEncoding(BaseItem): + def __init__(self): + super(CheckEncoding, self).__init__(self.__class__.__name__) + + def doCheck(self): + cmd = "source /etc/profile 2>/dev/null; locale | grep '^LANG='" + output = SharedFuncs.runShellCmd(cmd) + self.result.raw = cmd + if (output != ""): + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG + self.result.val = output diff --git a/script/gspylib/inspection/items/os/CheckEtcHosts.py b/script/gspylib/inspection/items/os/CheckEtcHosts.py new file mode 100644 index 0000000..12adc88 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckEtcHosts.py @@ -0,0 +1,91 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + + +class CheckEtcHosts(BaseItem): + def __init__(self): + super(CheckEtcHosts, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = "Normal" + conflictsMapping = [] + commentsMapping = [] + IPMapping = {} + IpList = [] + + mappingList = g_file.readFile('/etc/hosts') + for eachLine in mappingList: + eachLine = eachLine.strip() + if (eachLine == ""): + continue + if (not eachLine.startswith('#') and '::' not in eachLine): + mappingInfo = " ".join(eachLine.split()) + IpList.append(mappingInfo) + IpList.sort() + self.result.raw = "\n".join(IpList) + + # Check localhost Mapping + localHost = False + for eachIP in IpList: + if (eachIP.find("127.0.0.1 localhost") == 0): + localHost = True + break + if (not localHost): + self.result.rst = ResultStatus.NG + self.result.val = "The /etc/hosts does not match localhosts." + return + + # Check conflicts Mapping and GAUSS comments Mapping + for IPInfo in IpList: + ipHost = IPInfo.split() + if (len(ipHost) < 2): + continue + ip = IPInfo.split()[0] + host = IPInfo.split()[1] + if (ip == "127.0.0.1"): + continue + if (ip in IPMapping.keys() and host != IPMapping[ip]): + conflictsMapping.append(IPInfo) + conflictsMapping.append("%s %s" % (ip, IPMapping[ip])) + flag = "Error_conflicts" + else: + IPMapping[ip] = host + if (len(IPInfo.split()) > 2 and IPInfo.split()[2] == "#Gauss"): + commentsMapping.append(IPInfo + " IP Hosts Mapping") + flag = "Error_comments" + + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + self.result.val = "The /etc/hosts is configured correctly." + elif (flag == "Error_comments"): + self.result.rst = ResultStatus.NG + self.result.val = "The /etc/hosts has comments Mapping:\n" \ + + "\n".join( + commentsMapping) + else: + self.result.rst = ResultStatus.NG + self.result.val = "The /etc/hosts has conflicts Mapping:\n" \ + + "\n".join( + conflictsMapping) + if (len(commentsMapping) > 0): + self.result.val += "\n\nThe /etc/hosts has " \ + "comments Mapping:\n" + "\n".join( + commentsMapping) diff --git a/script/gspylib/inspection/items/os/CheckFilehandle.py b/script/gspylib/inspection/items/os/CheckFilehandle.py new file mode 100644 index 0000000..c87e870 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckFilehandle.py @@ -0,0 +1,120 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import json +import multiprocessing +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 + + +class CheckFilehandle(BaseItem): + def __init__(self): + super(CheckFilehandle, self).__init__(self.__class__.__name__) + self.Threshold_Warning = None + + def preCheck(self): + super(CheckFilehandle, self).preCheck() + if (not self.threshold.__contains__('Threshold_Warning')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "The Threshold_Warning") + if (not self.threshold['Threshold_Warning'].isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53014"] + % "The Threshold_Warning") + self.Threshold_Warning = int(self.threshold['Threshold_Warning']) + + def doCheck(self): + masterDNs = [] + slaveDNs = [] + masterDNhander = {} + salveDNhander = {} + overvalueDNs = [] + overmasterDNs = [] + flag = False + nodeInfo = self.cluster.getDbNodeByName(self.host) + for DnInstance in nodeInfo.datanodes: + if (DnInstance.instanceType == MASTER_INSTANCE): + masterDNs.append(DnInstance) + elif (DnInstance.instanceType == STANDBY_INSTANCE): + slaveDNs.append(DnInstance) + for dn in masterDNs: + getpidcmd = "ps -ef| grep %s|grep -v 'grep'|awk '{print $2}'" \ + % dn.datadir + pid = SharedFuncs.runShellCmd(getpidcmd) + getfilehander = "lsof | grep %s|wc -l" % pid + filehander = SharedFuncs.runShellCmd(getfilehander) + instanceName = "dn_%s" % (dn.instanceId) + if (not filehander.isdigit()): + num = filehander.splitlines() + filehander = num[-1] + if (not filehander.isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53018"] + % (pid, getfilehander)) + masterDNhander[instanceName] = filehander + for dn in slaveDNs: + getpidcmd = "ps -ef| grep '%s'|grep -v 'grep'|awk '{print $2}'" \ + % dn.datadir + pid = SharedFuncs.runShellCmd(getpidcmd) + getfilehander = "lsof | grep %s|wc -l" % pid + filehander = SharedFuncs.runShellCmd(getfilehander) + instanceName = "dn_%s" % (dn.instanceId) + if (not filehander.isdigit()): + num = filehander.splitlines() + filehander = num[-1] + if (not filehander.isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53018"] + % (pid, getfilehander)) + salveDNhander[instanceName] = filehander + for key, value in (masterDNhander.items()): + if (int(value) > self.Threshold_Warning): + overvalueDNs.append(key) + for key, value in (salveDNhander.items()): + if (int(value) > self.Threshold_Warning): + overvalueDNs.append(key) + for key, value in salveDNhander.items(): + for mkey, mastervalue in masterDNhander.items(): + if (int(value) > int(mastervalue)): + overmasterDNs.append(key) + flag = True + if (overvalueDNs and flag): + self.result.val = "Some slave database node open more file " \ + "hander than master database node %s;" \ + "Some gaussdb process open file handler over" \ + " %s:\n%s" % ("\n".join(overmasterDNs), + self.Threshold_Warning, + "\n".join(overvalueDNs)) + self.result.rst = ResultStatus.WARNING + elif (overvalueDNs): + self.result.val = "Some gaussdb process open file handler" \ + " over %s:\n%s" % ( + self.Threshold_Warning, + "\n".join(overvalueDNs)) + self.result.rst = ResultStatus.WARNING + elif (flag): + self.result.val = "There is some slave database node open " \ + "more file hander than master database node" \ + " %s" % "\n".join(overmasterDNs) + self.result.rst = ResultStatus.WARNING + else: + self.result.val = "File hander check pass" + self.result.rst = ResultStatus.OK diff --git a/script/gspylib/inspection/items/os/CheckFirewall.py b/script/gspylib/inspection/items/os/CheckFirewall.py new file mode 100644 index 0000000..c5c23c8 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckFirewall.py @@ -0,0 +1,67 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import platform +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsservice import g_service +from gspylib.os.gsplatform import g_Platform + +EXPECTED_VALUE = "disabled" +SUSE_FLAG = "SuSEfirewall2 not active" +REDHAT6_FLAG = "Firewall is not running" +REDHAT7_FLAG = "Active: inactive (dead)" + + +class CheckFirewall(BaseItem): + def __init__(self): + super(CheckFirewall, self).__init__(self.__class__.__name__) + + def doCheck(self): + (status, output) = g_service.manageOSService("firewall", "status") + if (output.find(SUSE_FLAG) > 0 or output.find( + REDHAT6_FLAG) > 0 or output.find(REDHAT7_FLAG) > 0): + firewallStatus = "disabled" + else: + firewallStatus = "enabled" + if (firewallStatus == ""): + self.result.rst = ResultStatus.OK + elif (firewallStatus != EXPECTED_VALUE): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + if (not self.result.raw): + self.result.raw = output + else: + self.result.raw = output + self.result.val = firewallStatus + + def doSet(self): + if g_Platform.isPlatFormEulerOSOrRHEL7X(): + cmd = "systemctl stop firewalld.service" + elif SharedFuncs.isSupportSystemOs(): + cmd = "service iptables stop" + else: + cmd = "SuSEfirewall2 stop" + + status, output = subprocess.getstatusoutput(cmd) + if status: + self.result.val = "Failed to stop firewall service. Error: %s\n" \ + % output + "The cmd is %s " % cmd + else: + self.result.val = "Successfully stopped the firewall service.\n" diff --git a/script/gspylib/inspection/items/os/CheckHyperThread.py b/script/gspylib/inspection/items/os/CheckHyperThread.py new file mode 100644 index 0000000..5a24e5f --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckHyperThread.py @@ -0,0 +1,57 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file +from gspylib.common.Common import DefaultValue + + +class CheckHyperThread(BaseItem): + def __init__(self): + super(CheckHyperThread, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = "open" + idList = [] + idCount = 0 + cores = 0 + cpuCount = 0 + cpuInfo = g_file.readFile('/proc/cpuinfo') + for eachLine in cpuInfo: + if (eachLine.find('physical id') >= 0): + # get different CPU id + cpuID = eachLine.split(':')[1].strip() + if (not cpuID in idList): + idList.append(cpuID) + # Calculate the number of CPUs + idCount += 1 + if (eachLine.find('cores') >= 0): + cores = int(eachLine.split(':')[1].strip()) + if (eachLine.find('processor') >= 0): + cpuCount += 1 + + if (cpuCount == 2 * idCount * cores): + self.result.rst = ResultStatus.OK + else: + if DefaultValue.checkDockerEnv(): + return + flag = "down" + self.result.rst = ResultStatus.NG + + self.result.val = "Hyper-threading is %s." % flag + self.result.raw = "the number of physical id: %d, cores: %d," \ + " cpu counts: %d" % (idCount, cores, cpuCount) diff --git a/script/gspylib/inspection/items/os/CheckKernelVer.py b/script/gspylib/inspection/items/os/CheckKernelVer.py new file mode 100644 index 0000000..7f67561 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckKernelVer.py @@ -0,0 +1,35 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckKernelVer(BaseItem): + def __init__(self): + super(CheckKernelVer, self).__init__(self.__class__.__name__) + + def doCheck(self): + cmd = "uname -r" + output = SharedFuncs.runShellCmd(cmd) + if (output != ""): + self.result.rst = ResultStatus.OK + self.result.val = output + else: + self.result.rst = ResultStatus.NG + self.result.val = "Failed to get kernel version." + self.result.raw = cmd diff --git a/script/gspylib/inspection/items/os/CheckKeyProAdj.py b/script/gspylib/inspection/items/os/CheckKeyProAdj.py new file mode 100644 index 0000000..38f32c1 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckKeyProAdj.py @@ -0,0 +1,59 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckKeyProAdj(BaseItem): + def __init__(self): + super(CheckKeyProAdj, self).__init__(self.__class__.__name__) + + def doCheck(self): + procadj = {} + result = "" + prolist = ['om_monitor', 'cm_agent', 'gaussdb', 'cm_server', 'gtm', + 'etcd'] + gausshome = self.cluster.appPath + gaussdbpath = os.path.join(gausshome, "bin/gaussdb") + for process in prolist: + if (process == 'gaussdb'): + getpidcmd = "ps ux| grep '%s'|grep -v 'grep'|awk '{print " \ + "$2}'" \ + % gaussdbpath + else: + getpidcmd = "ps ux| grep '%s'|grep -v 'grep'|awk '{print " \ + "$2}'" \ + % process + pids = SharedFuncs.runShellCmd(getpidcmd) + for pid in pids.splitlines(): + getAdjcmd = "cat /proc/%s/oom_adj" % pid + adjValue = SharedFuncs.runShellCmd(getAdjcmd) + if (int(adjValue) < 0): + tmpkey = "%s_%s" % (process, pid) + procadj[tmpkey] = adjValue + if (procadj): + self.result.rst = ResultStatus.NG + for key, value in procadj.items(): + result += "%s : %s \n" % (key, value) + self.result.val = "There are processes omm_adj value " \ + "less than 0 \n%s" % (result) + else: + self.result.rst = ResultStatus.OK + self.result.val = "All key processes omm_adj value" \ + " are not less than 0" diff --git a/script/gspylib/inspection/items/os/CheckMaxHandle.py b/script/gspylib/inspection/items/os/CheckMaxHandle.py new file mode 100644 index 0000000..ce47462 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckMaxHandle.py @@ -0,0 +1,108 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsfile import g_file + + +class CheckMaxHandle(BaseItem): + def __init__(self): + super(CheckMaxHandle, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = True + parRes = "" + # Determine if it is an ELK environment + elk_env = DefaultValue.getEnv("ELK_SYSTEM_TABLESPACE") + if (elk_env): + expand_value = 640000 + else: + expand_value = 1000000 + # Check system open files parameter + output = g_OSlib.getUserLimits('open files') + self.result.raw = output + if (output != ""): + self.result.val += output + "\n" + resList = output.split(' ') + limitValue = resList[-1].strip() + # Unlimited check is passed + if limitValue == 'unlimited': + pass + # Open file parameter value is less than 640000 will not pass + if int(limitValue) < int(expand_value): + flag = False + else: + pass + # Write check results + parRes += "Max open files: %s\n" % limitValue + else: + # + flag = False + parRes += "Failed to get system open files parameter.\n" + + # Check cluster process open files parameter + if (self.cluster): + pidList = g_OSlib.getProcess( + os.path.join(self.cluster.appPath, 'bin/gaussdb')) + for pid in pidList: + if (not os.path.isfile( + "/proc/%s/limits" % pid) or not os.access( + "/proc/%s/limits" % pid, os.R_OK)): + continue + openFileInfo = \ + g_file.readFile('/proc/%s/limits' % pid, 'Max open files')[ + 0] + if (openFileInfo): + value = openFileInfo.split()[3] + if (int(value.strip()) < expand_value): + flag = False + parRes += "The value of " \ + "max open files is %s on pid %s. " \ + "it must not be less than %d.\n" % ( + value.strip(), pid, expand_value) + if (flag): + self.result.rst = ResultStatus.OK + else: + self.result.rst = ResultStatus.NG + self.result.val = parRes + + def doSet(self): + self.result.val = "" + self.result.raw = "" + limitPath = '/etc/security/limits.d/' + if (os.path.isfile(os.path.join(limitPath, '91-nofile.conf'))): + limitFile = '91-nofile.conf' + else: + limitFile = '90-nofile.conf' + + elk_env = DefaultValue.getEnv("ELK_SYSTEM_TABLESPACE") + if (elk_env): + expand_value = 640000 + else: + expand_value = 1000000 + + errMsg = SharedFuncs.SetLimitsConf(["soft", "hard"], "nofile", + expand_value, + os.path.join(limitPath, limitFile)) + if errMsg != "Success": + self.result.val = "%s\n" % errMsg + else: + self.result.val = "Success to set openfile to %d\n" % expand_value diff --git a/script/gspylib/inspection/items/os/CheckMaxProcMemory.py b/script/gspylib/inspection/items/os/CheckMaxProcMemory.py new file mode 100644 index 0000000..29ffded --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckMaxProcMemory.py @@ -0,0 +1,166 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +import json +import multiprocessing +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.inspection.common.Exception import CheckNAException +from gspylib.common.ErrorCode import ErrorCode + +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 +# dummy standby +DUMMY_STANDBY_INSTANCE = 2 + +# cn +INSTANCE_ROLE_COODINATOR = 3 +# dn +INSTANCE_ROLE_DATANODE = 4 + +g_gucDist = {} +RecommendedMaxMem = 0 + + +class CheckMaxProcMemory(BaseItem): + def __init__(self): + super(CheckMaxProcMemory, self).__init__(self.__class__.__name__) + self.Threshold_NG = None + + def preCheck(self): + super(CheckMaxProcMemory, self).preCheck() + # check the threshold was set correctly + if (not self.threshold.__contains__('Threshold_NG')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] + % "The threshold Threshold_NG") + if (not self.threshold['Threshold_NG'].isdigit()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53014"] + % "The threshold Threshold_NG") + self.Threshold_NG = int(self.threshold['Threshold_NG']) + + def checkInstanceGucValue(self, Instance): + """ + get CN/DN instance guc parameters + """ + global g_gucDist + Role = "" + needm = False + if (Instance.instanceRole == INSTANCE_ROLE_COODINATOR): + needm = False + elif (self.checkMaster(Instance.instanceId)): + needm = False + else: + needm = True + sqlcmd = "select setting from pg_settings " \ + "where name='max_process_memory';" + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", Instance.port, + self.tmpPath, "postgres", + self.mpprcFile, needm) + if (Instance.instanceRole == INSTANCE_ROLE_COODINATOR): + Role = "CN" + elif (Instance.instanceRole == INSTANCE_ROLE_DATANODE): + Role = "DN" + instanceName = "%s_%s" % (Role, Instance.instanceId) + g_gucDist[instanceName] = output + + def checkMaster(self, instanceId): + cmd = "gs_om -t query |grep %s" % (instanceId) + output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) + line = output.splitlines()[0] + instanceinfo = line.split() + for idx in range(len(instanceinfo)): + if (instanceinfo[idx] == str(instanceId)): + if (instanceinfo[idx + 2] == "Primary"): + return True + else: + return False + return False + + def doCheck(self): + """ + + """ + global g_gucDist + global RecommendedMaxMem + DNidList = [] + nodeInfo = self.cluster.getDbNodeByName(self.host) + CN = nodeInfo.coordinators + for DnInstance in nodeInfo.datanodes: + if (self.checkMaster(DnInstance.instanceId)): + DNidList.append(DnInstance) + if (len(CN) < 1 and len(DNidList) < 1): + self.result.rst = ResultStatus.NA + self.result.val = "NA" + return + + # test database Connection + for Instance in (CN + DNidList): + if not Instance: + continue + sqlcmd = "select pg_sleep(1);" + output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", + Instance.port, self.tmpPath, + 'postgres', + self.mpprcFile) + self.checkInstanceGucValue(Instance) + cmd = "/sbin/sysctl -a |grep vm.min_free_kbytes|awk '{print $3}'" + min_free_kbytes = int(SharedFuncs.runShellCmd(cmd).splitlines()[-1]) + cmd = "free -k | grep 'Mem'| grep -v 'grep'|awk '{print $2}'" + raw = int(SharedFuncs.runShellCmd(cmd)) + if (min_free_kbytes * 100 > raw * 5): + RecommendedMaxMem = int((raw * 0.7) // (len(DNidList) + 1)) + else: + RecommendedMaxMem = int((raw * 0.8) // (len(DNidList) + 1)) + self.result.rst = ResultStatus.OK + result = "RecommendedMaxMem is %s\n" % RecommendedMaxMem + for key, value in g_gucDist.items(): + if (int(value) > RecommendedMaxMem): + self.result.rst = ResultStatus.NG + result += "%s : %s\n" % (key, value) + if (self.result.rst == ResultStatus.OK): + self.result.val = "parameter max_process_memory setting is ok" + else: + self.result.val = "parameter max_process_memory " \ + "setting should not be bigger than " \ + "recommended(kb):%s:\n%s" % ( + RecommendedMaxMem, result) + + def doSet(self): + resultStr = "" + cmd = "su - %s -c \"source %s;gs_guc set " \ + "-N all -I all -c 'max_process_memory=%s'\"" % ( + self.user, self.mpprcFile, RecommendedMaxMem) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Set CN instance Failed.\n Error : %s." % output + resultStr += "The cmd is %s " % cmd + cmd = "su - %s -c \"source %s;gs_guc set " \ + "-N all -I all -c 'max_process_memory=%s'\"" % ( + self.user, self.mpprcFile, RecommendedMaxMem) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + resultStr += "Set database node instance Failed.\n " \ + "Error : %s." % output + resultStr += "The cmd is %s " % cmd + if (len(resultStr) > 0): + self.result.val = resultStr + else: + self.result.val = "Set max_process_memory successfully." diff --git a/script/gspylib/inspection/items/os/CheckMemInfo.py b/script/gspylib/inspection/items/os/CheckMemInfo.py new file mode 100644 index 0000000..2230940 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckMemInfo.py @@ -0,0 +1,32 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsmemory import g_memory + + +class CheckMemInfo(BaseItem): + def __init__(self): + super(CheckMemInfo, self).__init__(self.__class__.__name__) + + def doCheck(self): + totalMem_bit = g_memory.getMemTotalSize() + totalMem_g = totalMem_bit / 1024 / 1024 / 1024 + self.result.rst = ResultStatus.OK + self.result.raw = "%s bit" % totalMem_bit + self.result.val = "totalMem: %sG" % totalMem_g diff --git a/script/gspylib/inspection/items/os/CheckNTPD.py b/script/gspylib/inspection/items/os/CheckNTPD.py new file mode 100644 index 0000000..9979a90 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckNTPD.py @@ -0,0 +1,122 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +import re +from datetime import datetime, timedelta +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsplatform import g_Platform +from gspylib.os.gsOSlib import g_OSlib + +DEFAULT_INTERVAL = 300 + + +class ntp: + def __init__(self): + """ + function : Init class ntp + input : NA + output : NA + """ + self.running = False + self.hosts = set() + self.currentTime = "" + self.errorMsg = None + + +class CheckNTPD(BaseItem): + def __init__(self): + super(CheckNTPD, self).__init__(self.__class__.__name__) + + def collectNtpd(self): + data = ntp() + try: + p = subprocess.Popen(["/usr/sbin/ntpq", "-p"], shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + result = p.communicate() + data.errorMsg = result[1].decode().strip() + except Exception as e: + data.errorMsg = str(e) + return data + if not p.returncode: + startHosts = False + for line in result[0].decode().splitlines(): + if startHosts: + words = line.split() + if len(words) < 2: + continue + host = words[0].strip() + if host.startswith("*"): + host = host.lstrip("*") + data.hosts.add(host) + else: + if re.search("======", line): + startHosts = True + pidList = g_OSlib.getProcess('ntpd') + for line in pidList: + if (line.strip().isdigit()): + data.running = True + return data + + def doCheck(self): + data = self.collectNtpd() + data.currentTime = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + if not data.running: + self.result.rst = ResultStatus.NG + self.result.val = "NTPD service is not running, %s" \ + % data.currentTime + else: + self.result.rst = ResultStatus.OK + self.result.val = "NTPD service is running, %s" % data.currentTime + self.result.raw = data.errorMsg + + def postAnalysis(self, itemResult, category="", name=""): + errors = [] + for i in itemResult.getLocalItems(): + if i.rst == ResultStatus.NG : + errors.append("%s: %s" % (i.host, i.val)) + if len(errors) > 0: + itemResult.rst = ResultStatus.NG + itemResult.analysis = "\n".join(errors) + return itemResult + keyStr = itemResult.getLocalItems()[0].val.strip().split(',')[ + 1].strip() + baseTime = datetime.strptime(keyStr, "%Y-%m-%d %H:%M:%S") + startTime = baseTime + endTime = baseTime + + rst = ResultStatus.OK + analysis = "" + for v in itemResult.getLocalItems(): + analysis += "%s: %s\n" % (v.host, v.val) + tmpStr = v.val.strip().split(',')[1].strip() + tmpTime = datetime.strptime(tmpStr, "%Y-%m-%d %H:%M:%S") + if (tmpTime < startTime): + startTime = tmpTime + if (tmpTime > endTime): + endTime = tmpTime + + if (endTime > (startTime + timedelta(seconds=DEFAULT_INTERVAL))): + rst = ResultStatus.NG + analysis = "Time difference between nodes more than 5 " \ + "minute:\n%s" \ + % analysis + itemResult.rst = rst + itemResult.analysis = analysis + + return itemResult diff --git a/script/gspylib/inspection/items/os/CheckOSVer.py b/script/gspylib/inspection/items/os/CheckOSVer.py new file mode 100644 index 0000000..ae0b748 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckOSVer.py @@ -0,0 +1,106 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import platform +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsplatform import g_Platform + + +class CheckOSVer(BaseItem): + def __init__(self): + super(CheckOSVer, self).__init__(self.__class__.__name__) + + def doCheck(self): + (distName, version) = g_Platform.getCurrentPlatForm() + bits, linkage = platform.architecture() + self.result.val = "The current OS is %s %s %s" % ( + distName, version, bits) + if (distName in ("redhat", "centos")): + if (version[0:3] in ( + "6.4", "6.5", "6.6", "6.7", "6.8", "6.9", "7.0", "7.1", + "7.2", + "7.3", "7.4", "7.5", "7.6") and + bits == "64bit"): + self.result.rst = ResultStatus.OK + self.result.val = "The current OS is %s %s %s." % ( + distName, version[0:3], bits) + else: + self.result.rst = ResultStatus.NG + elif (distName == "euleros" and version in ( + "2.0", "2.3") and bits == "64bit"): + self.result.rst = ResultStatus.OK + self.result.val = "The current OS is EULER %s 64bit." % version + elif (distName == "suse" and version in ( + "11.1", "11.2", "11.3", "11.4", "12.0", "12.1", "12.2", + "12.3") and bits == "64bit"): + self.result.rst = ResultStatus.OK + self.result.val = "The current OS is SuSE %s 64bit." % version + elif distName == "openeuler": + self.result.rst = ResultStatus.OK + self.result.val = "The current OS is openEuler %s." % version + else: + self.result.rst = ResultStatus.NG + self.result.val = "The current OS[%s %s] " \ + "does not meet the requirements." % ( + distName, version) + + def postAnalysis(self, itemResult, category="", name=""): + errors = [] + for i in itemResult.getLocalItems(): + if (i.rst == ResultStatus.NG): + errors.append("%s: %s" % (i.host, i.val)) + if len(errors) > 0: + itemResult.rst = ResultStatus.NG + itemResult.analysis = "\n".join(errors) + return itemResult + + analysis = "" + VerGroupDisk = {'RedHat6': [], 'RedHat7': [], 'Euler': [], + 'SuSE11SP1': [], 'SuSE11SP234': [], 'SuSE12': [], + 'openEuler': []} + for v in itemResult.getLocalItems(): + analysis += "%s: %s\n" % (v.host, v.val) + verInfo = v.val.strip().split(' ')[4:] + if verInfo[0] in ("redhat", "centos"): + if (verInfo[1][0:3] in ( + "6.4", "6.5", "6.6", "6.7", "6.8", "6.9")): + VerGroupDisk['RedHat6'].append(verInfo) + else: + VerGroupDisk['RedHat7'].append(verInfo) + elif verInfo[0] == "euleros": + VerGroupDisk['Euler'].append(verInfo) + elif verInfo[0] == "openEuler": + VerGroupDisk['openEuler'].append(verInfo) + elif verInfo[0] == "SuSE": + if verInfo[1] == "11.1": + VerGroupDisk['SuSE11SP1'].append(verInfo) + elif verInfo[1] in ("11.2", "11.3", "11.4"): + VerGroupDisk['SuSE11SP234'].append(verInfo) + else: + VerGroupDisk['SuSE12'].append(verInfo) + currentVerGroup = [] + for verGroup in VerGroupDisk.keys(): + if len(VerGroupDisk[verGroup]) != 0: + currentVerGroup.append(verGroup) + if len(currentVerGroup) > 1: + itemResult.rst = ResultStatus.NG + else: + itemResult.rst = ResultStatus.OK + itemResult.analysis = analysis + + return itemResult diff --git a/script/gspylib/inspection/items/os/CheckOmmUserExist.py b/script/gspylib/inspection/items/os/CheckOmmUserExist.py new file mode 100644 index 0000000..97f0bcf --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckOmmUserExist.py @@ -0,0 +1,46 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckOmmUserExist(BaseItem): + def __init__(self): + super(CheckOmmUserExist, self).__init__(self.__class__.__name__) + + def doCheck(self): + cmd = "id omm" + (status, output) = subprocess.getstatusoutput(cmd) + self.result.raw = output + if (output.lower().find('no such user') < 0): + self.result.rst = ResultStatus.NG + self.result.val = "User omm already exists. " \ + "please delete this omm " \ + "used by 'userdel -rf omm'." + else: + self.result.rst = ResultStatus.OK + self.result.val = output + + def doSet(self): + cmd = "userdel -rf omm" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val += "Failed to delete omm user. Error:%s\n" % output + self.result.val += "The cmd is %s " % cmd + else: + self.result.val += "Successfully deleted omm user.\n" diff --git a/script/gspylib/inspection/items/os/CheckPortConflict.py b/script/gspylib/inspection/items/os/CheckPortConflict.py new file mode 100644 index 0000000..92e0877 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckPortConflict.py @@ -0,0 +1,65 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckPortConflict(BaseItem): + def __init__(self): + super(CheckPortConflict, self).__init__(self.__class__.__name__) + + def doCheck(self): + cmd = "netstat -apn | grep 'tcp' " \ + "| grep 'LISTEN'| awk -F ' ' '$4 ~ /25[0-9][0-9][0-9]/'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.rst = ResultStatus.NG + self.result.val = "Failed to excuted commands: %s\noutput:%s " % ( + cmd, output) + else: + if (output.strip() == ""): + self.result.rst = ResultStatus.OK + self.result.val = "ports is normal" + else: + self.result.rst = ResultStatus.NG + self.result.val = output + self.result.raw = "checked ports: (25000-26000)\n" + output + + def doSet(self): + pidList = [] + cmd = "netstat -apn| grep 'tcp'" \ + "| grep 'LISTEN'| awk -F ' ' '$4 ~ /25[0-9][0-9][0-9]/'" \ + "| awk '{print $NF}'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output != ""): + for line in output.split('\n'): + if (line.find('/') > 0): + pid = line.split('/')[0].strip() + if (pid.isdigit()): + pidList.append(pid) + if (pidList): + cmd = "kill -9" + for pid in pidList: + cmd += " %s" % pid + (status, output) = subprocess.getstatusoutput(cmd) + if (status != ""): + self.result.val = "Failed to kill process.Error:%s\n" % output + self.result.val += "The cmd is %s " % cmd + else: + self.result.val = \ + "Successfully killed the process with occupies the port.\n" diff --git a/script/gspylib/inspection/items/os/CheckProcMem.py b/script/gspylib/inspection/items/os/CheckProcMem.py new file mode 100644 index 0000000..443c8f5 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckProcMem.py @@ -0,0 +1,114 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 + + +class CheckProcMem(BaseItem): + def __init__(self): + super(CheckProcMem, self).__init__(self.__class__.__name__) + self.percentm = 0.8 + self.percentt = 0.9 + + def preCheck(self): + # check the threshold was set correctly + if (not self.threshold.__contains__('percent_total') + or not self.threshold.__contains__('percent_max')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "threshold") + try: + self.percentt = float(self.threshold['percent_total']) + self.percentm = float(self.threshold['percent_max']) + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] % + "CheckProcMem" + "Error: %s" % str(e)) + + def doCheck(self): + cmd = "free -g | grep Mem | awk '{print $2}' 2>/dev/null" + totalMem = SharedFuncs.runShellCmd(cmd) + cmd = "free -g | grep Mem | awk '{print $3}' 2>/dev/null" + usedMem = SharedFuncs.runShellCmd(cmd) + if (int(usedMem) > int(totalMem) * self.percentt): + self.result.rst = ResultStatus.NG + self.result.val = "Memory usage exceeded threshold" + return + cmd = "show max_process_memory;" + cnPort = None + masterDnPort = None + slaveDnPort = None + + self.node = self.cluster.getDbNodeByName(self.host) + if self.node.coordinators: + cnPort = self.node.coordinators[0].port + + masterDnlist = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) + for datanode in self.node.datanodes: + if (datanode.instanceId in masterDnlist): + masterDnPort = datanode.port + break + elif ( + datanode.instanceType == MASTER_INSTANCE + or datanode.instanceType == STANDBY_INSTANCE): + slaveDnPort = datanode.port + break + if (cnPort): + output = SharedFuncs.runSqlCmd(cmd, self.user, "", cnPort, + self.tmpPath, "postgres", + self.mpprcFile) + elif (masterDnPort): + output = SharedFuncs.runSqlCmd(cmd, self.user, "", masterDnPort, + self.tmpPath, "postgres", + self.mpprcFile) + elif (slaveDnPort): + output = SharedFuncs.runSqlCmd(cmd, self.user, "", slaveDnPort, + self.tmpPath, "postgres", + self.mpprcFile, True) + else: + self.result.val = "There's no master database node " \ + "or slave database node in this node" + self.result.rst = ResultStatus.OK + return + if (output.upper().endswith("GB")): + maxProcessM = int(output[:-2]) * 1024 * 1024 * self.percentm + elif (output.upper().endswith("MB")): + maxProcessM = int(output[:-2]) * 1024 * self.percentm + elif (output.upper().endswith("KB")): + maxProcessM = int(output[:-2]) * self.percentm + else: + self.result.val = \ + "Can not get the correct value of max_process_memroy" + self.result.rst = ResultStatus.NG + return + cmd = "ps ux | grep gaussdb | awk '{print $6}'" + output = SharedFuncs.runShellCmd(cmd) + for line in output.splitlines(): + procM = int(line) + if (procM > maxProcessM): + self.result.val = \ + "Memroy usage of some gaussdb process exceeded threshold" + self.result.rst = ResultStatus.NG + return + + self.result.rst = ResultStatus.OK + self.result.val = "Memory is sufficient" diff --git a/script/gspylib/inspection/items/os/CheckProcessLeft.py b/script/gspylib/inspection/items/os/CheckProcessLeft.py new file mode 100644 index 0000000..0ce5783 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckProcessLeft.py @@ -0,0 +1,58 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus + + +class CheckProcessLeft(BaseItem): + def __init__(self): + super(CheckProcessLeft, self).__init__(self.__class__.__name__) + + def doCheck(self): + parRes = "" + flag = 0 + processList = ['gaussdb', 'omm'] + for process in processList: + cmd = "ps -ef | grep '%s ' -m 20 | grep -v 'grep'" % process + (status, output) = subprocess.getstatusoutput(cmd) + if (output.find(process) >= 0): + parRes += "the process is left over: \n%s" % output + flag = 1 + + if (flag == 1): + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = parRes + self.result.raw = output + + def doSet(self): + processList = ['gaussdb', 'omm'] + for process in processList: + cmd = "ps -eo pid,user,comm | grep -E '\<%s\>' " \ + "| grep -v 'grep' | awk '{print $1}'|xargs kill -9" % process + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val = "Failed to kill " \ + "process.Error:\n%s\n" % output + \ + "The cmd is %s " % cmd + else: + self.result.val = "Successfully killed the gauss " \ + "and omm user process.\n" diff --git a/script/gspylib/inspection/items/os/CheckSctpService.py b/script/gspylib/inspection/items/os/CheckSctpService.py new file mode 100644 index 0000000..8e00810 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckSctpService.py @@ -0,0 +1,108 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +import platform +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.Common import DefaultValue +from gspylib.os.gsfile import g_Platform + + +class CheckSctpService(BaseItem): + def __init__(self): + super(CheckSctpService, self).__init__(self.__class__.__name__) + + def doCheck(self): + + parRes = "" + flag = "Normal" + cmd = "ls -l /lib/modules/`uname -r`/kernel/net/sctp/sctp.ko*" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output == "" or output.find( + "No such file or directory") > 0): + if DefaultValue.checkDockerEnv(): + return + flag = "Error" + parRes += "There is no sctp service." + else: + cmd = "modprobe sctp;" + cmd += "lsmod |grep sctp" + (status, output) = subprocess.getstatusoutput(cmd) + if (output == ""): + flag = "Error" + parRes += "sctp service is not loaded." + + cmd = "cat %s | grep '^insmod.*sctp.ko'" % DefaultValue.getOSInitFile() + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output == ""): + if (flag == "Normal"): + flag = "Warning" + parRes += "Sctp service is not set to boot from power on." + + self.result.val = parRes + self.result.raw = output + if (flag == "Error"): + self.result.rst = ResultStatus.NG + elif (flag == "Warning"): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.OK + self.result.val = "Sctp service is Normal." + + def doSet(self): + self.result.val = "" + parRes = "" + sctpFile = "" + initFileSuse = "/etc/init.d/boot.local" + initFileRedhat = "/etc/rc.d/rc.local" + cmd = "ls -l /lib/modules/`uname -r`/kernel/net/sctp/sctp.ko*" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output == "" or output.find( + "No such file or directory") > 0): + parRes = "There is no sctp service.\n" + else: + sctpFile = output.split()[-1] + cmd = "modprobe sctp;" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + cmd = "insmod %s >/dev/null 2>&1;lsmod |grep sctp" % sctpFile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or output == "": + parRes = "Failed to load sctp service.\n" + distname, version, idnum = g_Platform.dist() + if (distname in ["redhat", "centos", "euleros", "openEuler"]): + cmd = "cat %s | grep sctp" % initFileRedhat + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output == ""): + cmd = "echo 'modprobe sctp' >> /etc/rc.d/rc.local;" + cmd += "echo" \ + " 'insmod %s >/dev/null 2>&1' >> /etc/rc.d/rc.local " \ + % sctpFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + parRes += "Failed to add sctp service to boot.\n" + else: + cmd = "cat %s | grep stcp" % initFileSuse + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output == ""): + cmd = "echo 'modprobe sctp' >> /etc/init.d/boot.local;" + cmd += "echo '%s >/dev/null 2>&1' >> /etc/init.d/boot.local " \ + % sctpFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + parRes += "Failed to add sctp service to boot." + self.result.val = parRes diff --git a/script/gspylib/inspection/items/os/CheckSshdConfig.py b/script/gspylib/inspection/items/os/CheckSshdConfig.py new file mode 100644 index 0000000..f99e3a9 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckSshdConfig.py @@ -0,0 +1,118 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode + +setItem = [] + + +class CheckSshdConfig(BaseItem): + def __init__(self): + super(CheckSshdConfig, self).__init__(self.__class__.__name__) + self.sshdThreshold = {} + + def preCheck(self): + self.sshdThreshold = {} + # check the threshold was set correctly + if (not "PasswordAuthentication" in self.threshold.keys() + or not "MaxStartups" in self.threshold.keys() + or not "UseDNS" in self.threshold.keys() + or not "ClientAliveInterval" in self.threshold.keys()): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % 'threshold') + self.sshdThreshold['PasswordAuthentication'] = self.threshold[ + 'PasswordAuthentication'] + self.sshdThreshold['MaxStartups'] = self.threshold['MaxStartups'] + self.sshdThreshold['UseDNS'] = self.threshold['UseDNS'] + self.sshdThreshold['ClientAliveInterval'] = self.threshold[ + 'ClientAliveInterval'] + + def doCheck(self): + global setItem + flag = "Normal" + resultStr = "" + self.result.raw = "" + WarningItem = ['PasswordAuthentication', 'UseDNS'] + for item in self.sshdThreshold.keys(): + cmd = "cat /etc/ssh/sshd_config | grep -E %s | grep -v '^#' | " \ + "awk '{print $1,$2}'" % item + output = SharedFuncs.runShellCmd(cmd) + self.result.raw += "\n%s" % output + if (item == "ClientAliveInterval"): + if (output == ""): + continue + else: + timeout = int(output.split()[-1]) + if (timeout != 0 and timeout < int( + self.sshdThreshold[item])): + flag = "Abnormal" + resultStr += "\nAbnormal reason: %s; expected: %s" % ( + output, self.sshdThreshold[item]) + setItem.append(output.split()[0]) + else: + if (output != ""): + if (str(output.strip()).lower() != str('%s %s' % ( + item, self.sshdThreshold[item])).lower()): + if (item in WarningItem): + flag = "Warning" + resultStr += "\nWarning reason: %s; expected: %s" \ + % ( + output, self.sshdThreshold[item]) + else: + flag = "Abnormal" + resultStr += "\nAbnormal reason: %s; expected: " \ + "%s" \ + % ( + output, self.sshdThreshold[item]) + setItem.append(output.split()[0]) + else: + if (item in WarningItem): + flag = "Warning" + resultStr += "\nWarning reason: " \ + "%s parameter is not set; expected: %s" \ + % ( + item, self.sshdThreshold[item]) + else: + flag = "Abnormal" + resultStr += "\nAbnormal reason: " \ + "%s parameter is not set; expected: %s" \ + % ( + item, self.sshdThreshold[item]) + setItem.append(output.split()[0]) + self.result.val = resultStr + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + elif (flag == "Warning" and len(setItem) == 0): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.NG + + def doSet(self): + cmd = "" + for item in setItem: + if (item == "MaxStartups"): + cmd += "sed -i '/^MaxStartups/d' /etc/ssh/sshd_config;" + cmd += "echo 'MaxStartups=1000' >> /etc/ssh/sshd_config;" + else: + cmd = "sed -i '/^ClientAliveInterval/d' /etc/ssh/sshd_config;" + cmd += "echo 'ClientAliveInterval 0' >> /etc/ssh/sshd_config;" + cmd += "service sshd restart" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val = "Failed to set SshdConfig. The cmd is %s" % cmd diff --git a/script/gspylib/inspection/items/os/CheckSshdService.py b/script/gspylib/inspection/items/os/CheckSshdService.py new file mode 100644 index 0000000..00c9ff3 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckSshdService.py @@ -0,0 +1,35 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsservice import g_service + + +class CheckSshdService(BaseItem): + def __init__(self): + super(CheckSshdService, self).__init__(self.__class__.__name__) + + def doCheck(self): + (status, output) = g_service.manageOSService('sshd', 'status') + self.result.raw = output + if (status == 0 and output.find('running')): + self.result.rst = ResultStatus.OK + self.result.val = "The sshd service is normal." + else: + self.result.val = "There is no sshd service." + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/items/os/CheckStack.py b/script/gspylib/inspection/items/os/CheckStack.py new file mode 100644 index 0000000..e522047 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckStack.py @@ -0,0 +1,83 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsfile import g_file + +STACK = 3072 + + +class CheckStack(BaseItem): + def __init__(self): + super(CheckStack, self).__init__(self.__class__.__name__) + + def doCheck(self): + parRes = "" + flag = 0 + output = g_OSlib.getUserLimits('stack size') + self.result.raw = output + StackValue = output.split()[-1] + if (StackValue == 'unlimited'): + pass + elif (int(StackValue) < STACK): + flag = 1 + parRes += "The value of stack depth is %d. " \ + "it can not be less than 3072" % int( + StackValue) + + if (self.cluster): + pidList = g_OSlib.getProcess( + os.path.join(self.cluster.appPath, 'bin/gaussdb')) + for pid in pidList: + limitsFile = "/proc/%s/limits" % pid + if (not os.path.isfile(limitsFile) or not os.access(limitsFile, + os.R_OK)): + continue + output = g_file.readFile(limitsFile, 'Max stack size')[ + 0].strip() + self.result.raw += '\n[pid]%s: %s' % (pid, output) + Stack = output.split()[4] + if (Stack == 'unlimited'): + pass + else: + value = int(Stack) / 1024 + if (int(value) < STACK): + flag = 1 + parRes += \ + "The value of stack depth is %s on pid %s. " \ + "it must be larger than 3072.\n" % ( + value, pid) + + if (flag == 1): + self.result.rst = ResultStatus.NG + self.result.val = parRes + else: + self.result.rst = ResultStatus.OK + self.result.val = StackValue + + def doSet(self): + limitPath = '/etc/security/limits.conf' + errMsg = SharedFuncs.SetLimitsConf(["soft", "hard"], "stack", STACK, + limitPath) + if errMsg != "Success": + self.result.val = "%s\n" % errMsg + else: + self.result.val = "Success to set openfile to %d\n" % STACK diff --git a/script/gspylib/inspection/items/os/CheckSysParams.py b/script/gspylib/inspection/items/os/CheckSysParams.py new file mode 100644 index 0000000..947ecc6 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckSysParams.py @@ -0,0 +1,215 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +import configparser +import platform +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file +from gspylib.hardware.gsmemory import g_memory +from gspylib.os.gsfile import g_Platform +from gspylib.common.ErrorCode import ErrorCode + +setParameterList = {} + + +class CheckSysParams(BaseItem): + def __init__(self): + super(CheckSysParams, self).__init__(self.__class__.__name__) + self.version = None + + def preCheck(self): + # check the threshold was set correctly + if (not self.threshold.__contains__('version')): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "version") + self.version = self.threshold['version'] + + def SetSysctlForList(self, key, value): + """ + function: Set sysctl parameter + input : key, value + output: NA + """ + kernelParameterFile = "/etc/sysctl.conf" + cmd = """sed -i '/^\\s*%s *=.*$/d' %s && + echo %s = %s >> %s 2>/dev/null""" % ( + key, kernelParameterFile, key, value, kernelParameterFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(output + " The cmd is %s " % cmd) + + def getConfigFilePara(self, configFile, section, checkList=None, + optionsName=None): + try: + if checkList is None: + checkList = [] + if optionsName is None: + optionsName = [] + data = {} + fp = configparser.RawConfigParser() + fp.read(configFile) + secs = fp.sections() + if section not in secs: + return data + optionList = fp.options(section) + if (len(optionsName) != 0 and optionsName not in optionList): + return data + elif (len(optionsName) != 0): + optionList = optionsName + for key in optionList: + value = fp.get(section, key) + if (len(value.split()) == 0): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50018"] % key) + value = value.split('#')[0] + if (key in checkList and not value.isdigit()): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50020"] % key) + if ( + section == '/etc/security/limits.conf' + and not value.isdigit() and value != 'unlimited'): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50019"] % key) + data[key] = value + if ("vm.min_free_kbytes" in list(data.keys())): + output = g_memory.getMemTotalSize() + totalMemory_k = output // 1024 + multiple = data["vm.min_free_kbytes"].split('*')[1].split('%')[ + 0].strip() + val = int(totalMemory_k) * int(multiple) // 100 + data["vm.min_free_kbytes"] = str(val) + + return data + except Exception as e: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51234"] % configFile + + " Error: \n%s" % str(e)) + + def doCheck(self): + global setParameterList + patchlevel = "" + resultList = [] + informationlist = [] + dirName = os.path.dirname(os.path.realpath(__file__)) + + configFile = "%s/../../config/check_list_%s.conf" % ( + dirName, self.version) + suggestParameterList = self.getConfigFilePara( + configFile, + 'SUGGEST:/etc/sysctl.conf') + kernelParameter = self.getConfigFilePara(configFile, + '/etc/sysctl.conf') + kernelParameter.update(suggestParameterList) + distname, version, idnum = g_Platform.dist() + if (distname == "SuSE" and version == "11"): + patInfo = g_file.readFile("/etc/SuSE-release", 'PATCHLEVEL')[0] + if (patInfo.find('=') > 0): + output = patInfo.split('=')[1].strip() + if (output != ""): + patchlevel = output + for key in kernelParameter: + if (patchlevel == "1" and key == "vm.extfrag_threshold"): + continue + if (key == "sctpchecksumerrors"): + snmpFile = "/proc/net/sctp/snmp" + if (os.path.isfile(snmpFile)): + output = \ + g_file.readFile(snmpFile, 'SctpChecksumErrors')[ + 0].split()[1].strip() + else: + continue + else: + sysFile = "/proc/sys/%s" % key.replace('.', '/') + # High version of linux no longer supports tcp_tw_recycle + if (not os.path.exists( + sysFile) and key == "net.ipv4.tcp_tw_recycle"): + continue + output = g_file.readFile(sysFile)[0].strip() + if (len(output.split()) > 1): + output = ' '.join(output.split()) + + if (output != kernelParameter[key].strip() and key not in list( + suggestParameterList.keys())): + resultList.append(1) + informationlist.append( + "Abnormal reason: variable '%s' " + "RealValue '%s' ExpectedValue '%s'." % ( + key, output, kernelParameter[key])) + setParameterList[key] = kernelParameter[key] + elif output != kernelParameter[key].strip(): + if (key == "vm.overcommit_ratio"): + output = g_file.readFile("/proc/sys/vm/overcommit_memory")[ + 0].strip() + if (output == "0"): + continue + resultList.append(2) + informationlist.append( + "Warning reason: variable '%s' RealValue '%s' " + "ExpectedValue '%s'." % ( + key, output, kernelParameter[key])) + if (1 in resultList): + self.result.rst = ResultStatus.NG + elif (2 in resultList): + self.result.rst = ResultStatus.WARNING + else: + self.result.rst = ResultStatus.OK + self.result.val = "" + for info in informationlist: + self.result.val = self.result.val + '%s\n' % info + + def delSysctlForList(self, key, value): + """ + """ + kernelParameterFile = "/etc/sysctl.conf" + cmd = """sed -i '/^\\s*%s *=.*$/d' %s """ % (key, kernelParameterFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53019"] % (key, value)) + + def doSet(self): + for checkResult in self.result.val.split('\n'): + if (checkResult.startswith("Abnormal reason")): + checkResultList = checkResult.split('\'') + setParameterList[checkResultList[1]] = checkResultList[5] + self.result.val = "" + # The parameter sctpchecksumerrors set method is independent + if ("sctpchecksumerrors" in setParameterList): + cmd = "echo 1 > /sys/module/sctp/parameters/no_checksums" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val += " " \ + " Failed to enforce sysctl kernel " \ + "variable 'sctpchecksumerrors'. " \ + "Error: %s" % output + setParameterList.pop("sctpchecksumerrors") + + if (len(setParameterList) != 0): + for key in setParameterList: + self.SetSysctlForList(key, setParameterList[key]) + self.result.val += "Set variable '%s' to '%s'\n" % ( + key, setParameterList[key]) + cmd = "sysctl -p" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + cmderrorinfo = "sysctl -p | grep 'No such file or directory'" + (status, outputresult) = subprocess.getstatusoutput( + cmderrorinfo) + if (status != 0 and outputresult == ""): + raise Exception(output) + for key in setParameterList: + tmp = "/proc/sys/%s" % key.replace('.', '/') + if (tmp in outputresult or key in outputresult): + # delete the record about key from the /etc/sysctl.conf + self.delSysctlForList(key, setParameterList[key]) diff --git a/script/gspylib/inspection/items/os/CheckSysPortRange.py b/script/gspylib/inspection/items/os/CheckSysPortRange.py new file mode 100644 index 0000000..11379e5 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckSysPortRange.py @@ -0,0 +1,61 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file + +PORT_RANGE = (26000, 65535) +SYSCTL_FILE = "/etc/sysctl.conf" + + +class CheckSysPortRange(BaseItem): + def __init__(self): + super(CheckSysPortRange, self).__init__(self.__class__.__name__) + + def doCheck(self): + output = g_file.readFile('/proc/sys/net/ipv4/ip_local_port_range')[0] + smallValue = output.split()[0].strip() + bigValue = output.split()[1].strip() + if (int(bigValue) > PORT_RANGE[1] or int(smallValue) < PORT_RANGE[0]): + self.result.val = "The value of net.ipv4.ip_local_port_range " \ + "is %d %d. it should be %d %d" % ( + int(smallValue), int(bigValue), + int(PORT_RANGE[0]), + int(PORT_RANGE[1])) + self.result.rst = ResultStatus.NG + else: + self.result.rst = ResultStatus.OK + self.result.val = "The value of net.ipv4." \ + "ip_local_port_range is %d %d." % ( + int(smallValue), int(bigValue)) + self.result.raw = output + + def doSet(self): + cmd = "sed -i '/net.ipv4.ip_local_port_range/d' %s" % SYSCTL_FILE + cmd += " && echo 'net.ipv4.ip_local_port_range " \ + "= 26000 65535' >> %s" % SYSCTL_FILE + cmd += " && sysctl -p" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 and output.find( + '/proc/sys/net/ipv4/ip_local_port_range: N' + 'o such file or directory') >= 0): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53020"] + % "net.ipv4.ip_local_port_range." + + "The cmd is %s " % cmd) diff --git a/script/gspylib/inspection/items/os/CheckTHP.py b/script/gspylib/inspection/items/os/CheckTHP.py new file mode 100644 index 0000000..9b0094f --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckTHP.py @@ -0,0 +1,70 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import subprocess +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsfile import g_file + +THPFile = "/sys/kernel/mm/transparent_hugepage/enabled" + + +class CheckTHP(BaseItem): + def __init__(self): + super(CheckTHP, self).__init__(self.__class__.__name__) + + def collectTHPServer(self): + if (os.path.exists(THPFile)): + output = g_file.readFile(THPFile)[0] + self.result.raw = output + if (output.find('[never]') > 0): + THPstatus = "disabled" + else: + THPstatus = "enabled" + else: + THPstatus = "disabled" + return THPstatus + + def doCheck(self): + THPstatus = self.collectTHPServer() + if (THPstatus != "disabled"): + self.result.rst = ResultStatus.NG + self.result.val = "The THP server is '%s', " \ + "ExpectedValue: 'disabled'." % THPstatus + else: + self.result.rst = ResultStatus.OK + self.result.val = THPstatus + + def doSet(self): + close_cmd = "(if test -f %s; then echo never > %s;fi)" % ( + THPFile, THPFile) + (status, output) = subprocess.getstatusoutput(close_cmd) + if (status != 0): + self.result.val = "Failed to close THP service, " \ + "Error: %s\n" % output + \ + "The cmd is %s " % close_cmd + return + # 2.add close cmd to init file + initFile = SharedFuncs.getInitFile() + cmd = "sed -i '/^.*transparent_hugepage.*enabled.*echo " \ + "never.*$/d' %s &&" % initFile + cmd += "echo \"%s\" >> %s" % (close_cmd, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.result.val += "Failed to add cmd to init file, " \ + "Error: %s\n" % output + "The cmd is %s " % cmd diff --git a/script/gspylib/inspection/items/os/CheckTimeZone.py b/script/gspylib/inspection/items/os/CheckTimeZone.py new file mode 100644 index 0000000..088f981 --- /dev/null +++ b/script/gspylib/inspection/items/os/CheckTimeZone.py @@ -0,0 +1,32 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +from gspylib.inspection.common import SharedFuncs +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.os.gsOSlib import g_OSlib + + +class CheckTimeZone(BaseItem): + def __init__(self): + super(CheckTimeZone, self).__init__(self.__class__.__name__) + + def doCheck(self): + output = g_OSlib.getDate() + timeZone = output.split()[-1] + self.result.rst = ResultStatus.OK + self.result.raw = output + self.result.val = timeZone diff --git a/script/gspylib/inspection/items/other/CheckDataDiskUsage.py b/script/gspylib/inspection/items/other/CheckDataDiskUsage.py new file mode 100644 index 0000000..b0779eb --- /dev/null +++ b/script/gspylib/inspection/items/other/CheckDataDiskUsage.py @@ -0,0 +1,86 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsdisk import g_disk + + +class CheckDataDiskUsage(BaseItem): + def __init__(self): + super(CheckDataDiskUsage, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = "Normal" + resultStr = "" + pathList = [] + if (not self.cluster): + self.result.rst = ResultStatus.NG + self.result.val = "The datanode information is none." + return + + for inst in self.cluster.getDbNodeByName(self.host).datanodes: + pathList.append(inst.datadir) + cnInstList = self.cluster.getDbNodeByName(self.host).coordinators + if (len(cnInstList) > 0): + tblspcDir = os.path.join(cnInstList[0].datadir, 'pg_tblspc') + tblspcList = os.listdir(tblspcDir) + if (tblspcList): + for tblspc in tblspcList: + tblspcPath = os.path.join(tblspcDir, tblspc) + if (os.path.islink(tblspcPath)): + pathList.append(os.path.realpath(tblspcPath)) + + for path in pathList: + rateNum = g_disk.getDiskSpaceUsage(path) + self.result.raw += "[%s] space usage: %s%%\n" % (path, rateNum) + if (rateNum > int(self.thresholdDn)): + resultStr += \ + "Path(%s) space usage(%d%%) Abnormal reason: " \ + "The usage of the device disk space " \ + "cannot be greater than %s%%.\n" % ( + path, rateNum, self.thresholdDn) + flag = "Error" + # Check inode usage + diskName = g_disk.getMountPathByDataDir(path) + diskType = g_disk.getDiskMountType(diskName) + if (not diskType in ["xfs", "ext3", "ext4", "overlay"]): + resultStr += \ + "Path(%s) inodes usage(%s) Warning reason: " \ + "The file system type [%s] is unrecognized " \ + "or not support. Please check it.\n" % ( + path, 0, diskType) + if (flag == "Normal"): + flag = "Warning" + self.result.raw += "[%s] disk type: %s\n" % (path, diskType) + continue + rateNum = g_disk.getDiskInodeUsage(path) + self.result.raw += "[%s] inode usage: %s%%\n" % (path, rateNum) + if (rateNum > int(self.thresholdDn)): + resultStr += \ + "Path(%s) inode usage(%d%%) Abnormal reason: " \ + "The usage of the device disk inode " \ + "cannot be greater than %s%%.\n" % ( + path, rateNum, self.thresholdDn) + flag = "Error" + self.result.val = resultStr + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + self.result.val = "All disk space are sufficient.\n" + else: + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/items/other/CheckInstallDiskUsage.py b/script/gspylib/inspection/items/other/CheckInstallDiskUsage.py new file mode 100644 index 0000000..a733e50 --- /dev/null +++ b/script/gspylib/inspection/items/other/CheckInstallDiskUsage.py @@ -0,0 +1,65 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsdisk import g_disk + + +class CheckInstallDiskUsage(BaseItem): + def __init__(self): + super(CheckInstallDiskUsage, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = "Normal" + path = "/boot" + # Check space usage + rateNum = g_disk.getDiskSpaceUsage(path) + self.result.raw += "[%s] space usage: %s%%\n" % (path, rateNum) + if (rateNum > int(self.thresholdDn)): + self.result.val += \ + "Path(%s) space usage(%d%%) " \ + "Abnormal reason: The usage of the device disk space " \ + "cannot be greater than %s%%.\n" % ( + path, rateNum, self.thresholdDn) + flag = "Error" + # Check inode usage + diskName = g_disk.getMountPathByDataDir(path) + diskType = g_disk.getDiskMountType(diskName) + if (not diskType in ["xfs", "ext3", "ext4"]): + self.result.val = \ + "Path(%s) inodes usage(%s) Warning reason: " \ + "The file system type [%s] is unrecognized or not support. " \ + "Please check it.\n" % ( + path, 0, diskType) + self.result.raw = "[%s] disk type: %s\n" % (path, diskType) + self.result.rst = ResultStatus.WARNING + return + rateNum = g_disk.getDiskInodeUsage(path) + self.result.raw += "[%s] inode usage: %s%%\n" % (path, rateNum) + if (rateNum > int(self.thresholdDn)): + self.result.val += \ + "Path(%s) inode usage(%d%%) Abnormal reason: " \ + "The usage of the device disk inode cannot be greater than" \ + " %s%%.\n" % ( + path, rateNum, self.thresholdDn) + flag = "Error" + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + self.result.val = "Install disk space are sufficient.\n" + else: + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/items/other/CheckLogDiskUsage.py b/script/gspylib/inspection/items/other/CheckLogDiskUsage.py new file mode 100644 index 0000000..b5d5f28 --- /dev/null +++ b/script/gspylib/inspection/items/other/CheckLogDiskUsage.py @@ -0,0 +1,68 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsdisk import g_disk + + +class CheckLogDiskUsage(BaseItem): + def __init__(self): + super(CheckLogDiskUsage, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = "Normal" + path = DefaultValue.getEnv("GAUSSLOG", + "/var/log/gaussdb/%s" % self.user) + # Check space usage + rateNum = g_disk.getDiskSpaceUsage(path) + self.result.raw += "[%s] space usage: %s%%\n" % (path, rateNum) + if (rateNum > int(self.thresholdDn)): + self.result.val += \ + "Path(%s) space usage(%d%%) Abnormal reason: " \ + "The usage of the device disk space cannot" \ + " be greater than %s%%.\n" % ( + path, rateNum, self.thresholdDn) + flag = "Error" + # Check inode usage + diskName = g_disk.getMountPathByDataDir(path) + diskType = g_disk.getDiskMountType(diskName) + if (not diskType in ["xfs", "ext3", "ext4"]): + self.result.val = \ + "Path(%s) inodes usage(%s) Warning reason: " \ + "The file system type [%s] is unrecognized or not support. " \ + "Please check it.\n" % ( + path, 0, diskType) + self.result.raw = "[%s] disk type: %s\n" % (path, diskType) + self.result.rst = ResultStatus.WARNING + return + rateNum = g_disk.getDiskInodeUsage(path) + self.result.raw += "[%s] inode usage: %s%%\n" % (path, rateNum) + if (rateNum > int(self.thresholdDn)): + self.result.val += \ + "Path(%s) inode usage(%d%%) Abnormal reason: " \ + "The usage of the device disk inode cannot be" \ + " greater than %s%%.\n" % ( + path, rateNum, self.thresholdDn) + flag = "Error" + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + self.result.val = "Log disk space are sufficient.\n" + else: + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/items/other/CheckTmpDiskUsage.py b/script/gspylib/inspection/items/other/CheckTmpDiskUsage.py new file mode 100644 index 0000000..2913af7 --- /dev/null +++ b/script/gspylib/inspection/items/other/CheckTmpDiskUsage.py @@ -0,0 +1,69 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +from gspylib.common.Common import DefaultValue +from gspylib.inspection.common.CheckItem import BaseItem +from gspylib.inspection.common.CheckResult import ResultStatus +from gspylib.hardware.gsdisk import g_disk + + +class CheckTmpDiskUsage(BaseItem): + def __init__(self): + super(CheckTmpDiskUsage, self).__init__(self.__class__.__name__) + + def doCheck(self): + flag = "Normal" + path = DefaultValue.getEnv("PGHOST", + os.path.join(DefaultValue.getEnv("GPHOME"), + "%s_mppdb" % self.user)) + # Check space usage + rateNum = g_disk.getDiskSpaceUsage(path) + self.result.raw += "[%s] space usage: %s%%\n" % (path, rateNum) + if (rateNum > int(self.thresholdDn)): + self.result.val += \ + "Path(%s) space usage(%d%%) Abnormal reason: " \ + "The usage of the device disk space cannot be " \ + "greater than %s%%.\n" % ( + path, rateNum, self.thresholdDn) + flag = "Error" + # Check inode usage + diskName = g_disk.getMountPathByDataDir(path) + diskType = g_disk.getDiskMountType(diskName) + if (not diskType in ["xfs", "ext3", "ext4"]): + self.result.val = \ + "Path(%s) inodes usage(%s) Warning reason: " \ + "The file system type [%s] is unrecognized or not support. " \ + "Please check it.\n" % ( + path, 0, diskType) + self.result.raw = "[%s] disk type: %s\n" % (path, diskType) + self.result.rst = ResultStatus.WARNING + return + rateNum = g_disk.getDiskInodeUsage(path) + self.result.raw += "[%s] inode usage: %s%%\n" % (path, rateNum) + if (rateNum > int(self.thresholdDn)): + self.result.val += \ + "Path(%s) inode usage(%d%%) Abnormal reason: " \ + "The usage of the device disk inode cannot be " \ + "greater than %s%%.\n" % ( + path, rateNum, self.thresholdDn) + flag = "Error" + if (flag == "Normal"): + self.result.rst = ResultStatus.OK + self.result.val = "Tmp disk space are sufficient.\n" + else: + self.result.rst = ResultStatus.NG diff --git a/script/gspylib/inspection/lib/__init__.py b/script/gspylib/inspection/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/inspection/lib/checkblacklist/DBCheck.sh b/script/gspylib/inspection/lib/checkblacklist/DBCheck.sh new file mode 100644 index 0000000..3683ccf --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/DBCheck.sh @@ -0,0 +1,142 @@ +#!/bin/bash +#Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +#openGauss is licensed under Mulan PSL v2. +#You can use this software according to the terms and conditions of the Mulan PSL v2. +#You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +#------------------------------------------------------------------------- +# +# DBCheck.sh +# +# IDENTIFICATION +# src/manager/om/script/gspylib/inspection/lib/checkblacklist/DBCheck.sh +# +#------------------------------------------------------------------------- + +GSPORT='' + +function usage() +{ + echo "***********************************************************************" + echo "* DBCheck.sh usage *" + echo "* -p: coordinator port number *" + echo "* example: ./DBCheck.sh -p 25308 *" + echo "***********************************************************************" +} + +function parse_para() +{ + while getopts "p:h" opt + do + case $opt in + p) + if [ -z $GSPORT ]; then + let GSPORT=$OPTARG + else + echo "ERROR: duplicate port number" + usage + exit 1 + fi + ;; + h) + usage + exit 1 + ;; + ?) + echo "ERROR: unkonw argument" + usage + exit 1 + ;; + esac + done + + if [ -z $GSPORT ]; then + echo "ERROR: must designate -p" + usage + exit 1 + fi +} + +function blacklist_check() +{ + blacklist_sql="blacklist_check-"$date_flag".sql" + #only used from v1r5 to v1r6 + version=$(gaussdb -V | awk -F 'Gauss200 OLAP' '{print $2}' | awk -F ' ' '{print $1}') + sed s/OMVersion/$version/g blacklist_check.sql >$blacklist_sql + + + echo "===========================================================================================================================" + echo "== ==" + echo "== Check Blacklist ==" + echo "== ==" + echo "===========================================================================================================================" + + for db in $(gsql -d postgres -p $GSPORT -c "select datname||' GAUSSDB' from pg_database where datname != 'template0'" | grep GAUSSDB | awk '{print $1}') + do + echo "Blacklist Check for DataBase: "$db + check_log="checklog-"$db"-"$date_flag".log" + gsql -d $db -p $GSPORT -f $blacklist_sql > $check_log + if [ $(cat $check_log | grep FAILED | wc -l) -gt 0 ]; then + echo "NOTICE: Violation of blacklist rule" + cat $check_log + else + echo "NOTICE: Comply with the blacklist rule" + fi + + for sqlfile in $(cat $check_log | grep FAILED | awk -F '|' '{print $NF}') + do + sqlfilelog=$(echo $log_path/$sqlfile|awk -F '.' '{print $1}')-$db.log + gsql -d $db -p $GSPORT -f $sqlfile > $sqlfilelog + cat $sqlfilelog + done + done +} + +function dropped_column_table_check() +{ + echo "===========================================================================================================================" + echo "== ==" + echo "== Check DroppedColumnTable ==" + echo "== ==" + echo "===========================================================================================================================" + + dropped_column_table_log=$log_path/GetDroppedColumnTable.log + ./ExecuteSQLOnAllDB.sh -p $GSPORT -f GetDroppedColumnTable.sql > $dropped_column_table_log + cat $dropped_column_table_log +} + +function recurrent_grant_check() +{ + echo "===========================================================================================================================" + echo "== ==" + echo "== Check RecurrentGrant ==" + echo "== ==" + echo "===========================================================================================================================" + + table_recurrent_grant_log=$log_path/GetTableRecurrentGrant.log + ./ExecuteSQLOnAllDB.sh -p $GSPORT -f GetTableRecurrentGrant.sql > $table_recurrent_grant_log + cat $table_recurrent_grant_log +} + +function main() +{ + date_flag=$(date "+%Y%m%d-%H%M%S") + if [ ! -d ./log ]; then mkdir ./log ;fi + if [ ! -d ./log/"checkBlack-"$date_flag ]; then mkdir ./log/"checkBlack-"$date_flag ;fi + log_path=$(pwd)"/log/checkBlack-"$date_flag + source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile + parse_para $* + + blacklist_check + dropped_column_table_check + recurrent_grant_check +} + +main $* diff --git a/script/gspylib/inspection/lib/checkblacklist/ExecuteSQLOnAllDB.sh b/script/gspylib/inspection/lib/checkblacklist/ExecuteSQLOnAllDB.sh new file mode 100644 index 0000000..1f6e5c9 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/ExecuteSQLOnAllDB.sh @@ -0,0 +1,100 @@ +#!/bin/bash +#Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +#openGauss is licensed under Mulan PSL v2. +#You can use this software according to the terms and conditions of the Mulan PSL v2. +#You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +#------------------------------------------------------------------------- +# +# ExecuteSQLOnAllDB.sh +# +# IDENTIFICATION +# src/manager/om/script/gspylib/inspection/lib/checkblacklist/ExecuteSQLOnAllDB.sh +# +#------------------------------------------------------------------------- + +GSPORT='' +SQLFILE='' + +function usage() +{ + echo "***********************************************************************" + echo "* ExecuteSQLOnAllDB.sh usage *" + echo "* two indispensable paramater as following *" + echo "* -p: coordinator port number *" + echo "* -f: sql file to execute *" + echo "* example: ./ExecuteSQLOnAllDB.sh -p 25308 -f blacklist_check.sql *" + echo "***********************************************************************" +} + +function parse_para() +{ + while getopts "p:f:h" opt + do + case $opt in + p) + if [ -z $GSPORT ]; then + let GSPORT=$OPTARG + else + echo "GSPORT: "$GSPORT + echo "SQLFILE: "$SQLFILE + echo "ERROR: duplicate port number" + usage + exit 1 + fi + ;; + f) + if [ -z $SQLFILE ]; then + SQLFILE=$OPTARG + else + echo "GSPORT: "$GSPORT + echo "SQLFILE: "$SQLFILE + echo "ERROR: duplicate sql file" + usage + exit 1 + fi + ;; + h) + usage + exit 1 + ;; + ?) + echo "ERROR: unkonw argument" + echo "GSPORT: "$GSPORT + echo "SQLFILE: "$SQLFILE + usage + exit 1 + ;; + esac + done + + if [[ -z $SQLFILE || -z $GSPORT ]]; then + echo "GSPORT: "$GSPORT + echo "SQLFILE: "$SQLFILE + echo "ERROR: must designate -p and -f" + usage + exit 1 + fi +} + +parse_para $* + +echo "GSPORT: "$GSPORT +echo "SQLFILE: "$SQLFILE + +for db in $(gsql -d postgres -p $GSPORT -c "select datname||' GAUSSDB' from pg_database where datname != 'template0'" | grep GAUSSDB | awk '{print $1}') +do + echo "****************************************Blacklist Check for DataBase: "$db"**************************************************" + if [ ! -d ./log ]; then mkdir ./log ;fi + sql_log=$(pwd)/log/sqlLog.log + gsql -d $db -p $GSPORT -r -P pager=off -f $SQLFILE >$sql_log + cat $sql_log +done + diff --git a/script/gspylib/inspection/lib/checkblacklist/GetDroppedColumnTable.sql b/script/gspylib/inspection/lib/checkblacklist/GetDroppedColumnTable.sql new file mode 100644 index 0000000..3f14f64 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetDroppedColumnTable.sql @@ -0,0 +1,40 @@ +with relinfo as +( + select + n.nspname as nspname, + c.relname as tablename, + pg_get_userbyid(c.relowner) as relowner, + (c.xmin::text::bigint) as rel_xmin, + c.oid as relid + from pg_class c + left join pg_namespace n on (c.relnamespace = n.oid) +), + +groupinfo as +( + select + c.pcrelid as relid, + c.pgroup as group_name, + (g.xmin::text::bigint) as group_xmin + from pgxc_class c + left join pgxc_group g on (c.pgroup = g.group_name) +) + +select + t.relid, + nspname, + tablename, + relowner, + attname, + attnum, + attisdropped, + t.rel_xmin, + g.group_xmin, + case when rel_xmin > group_xmin then 'unable to do dilatation' else 'already broken by dilatation' end as notice, + group_name +from pg_attribute a +left join relinfo t on a.attrelid = t.relid +left join groupinfo g on a.attrelid = g.relid +where a.attisdropped = true +order by notice, nspname, tablename +; diff --git a/script/gspylib/inspection/lib/checkblacklist/GetInheritTable.sql b/script/gspylib/inspection/lib/checkblacklist/GetInheritTable.sql new file mode 100644 index 0000000..9c812a1 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetInheritTable.sql @@ -0,0 +1,20 @@ +with oid2relname AS +( + SELECT + n.nspname AS schemaname, + c.relname AS tablename, + pg_get_userbyid(c.relowner) as relowner, + c.oid AS relid + FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +) +select + a.schemaname as inh_schemaname, + a.tablename as inh_tablename, + a.relowner as inh_owner, + b.schemaname as parent_schemaname, + b.tablename as parent_tablename, + b.relowner as parent_owner +from pg_inherits h +inner join oid2relname a on a.relid = h.inhrelid +inner join oid2relname b on b.relid = h.inhparent +; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetSequenceName.sql b/script/gspylib/inspection/lib/checkblacklist/GetSequenceName.sql new file mode 100644 index 0000000..732afdf --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetSequenceName.sql @@ -0,0 +1,6 @@ +SELECT + n.nspname AS schemaname, + c.relname AS sequencename, + pg_get_userbyid(c.relowner) as sequenceowner +FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +where c.relkind = 'S'::"char"; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableRecurrentGrant.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableRecurrentGrant.sql new file mode 100644 index 0000000..5971479 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableRecurrentGrant.sql @@ -0,0 +1,46 @@ +with aclinfo as +( + select + oid as relid, + relname, + relnamespace, + pg_get_userbyid(relowner) as relowner, + relacl, + trim(trim(relacl::text, '}'), '{') as acl + from pg_class where relacl is not null +), + +acl2tab as +( + select + c.relid, + nspname, + relname, + relowner, + relacl, + regexp_split_to_table(acl, ',') as record + from aclinfo c + inner join pg_namespace n on c.relnamespace = n.oid + where relacl is not null +), + +split_acl as +( + select + relid, + nspname, + relname, + relowner, + relacl, + split_part(record, '/', 2) as grantUser, + split_part(record, '=', 1) as grantedToUser, + split_part(split_part(record, '=', 2), '/', 1) as privilege + from acl2tab + where relowner != grantUser +) + +select + * +from split_acl +order by nspname, relname +; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableSkew.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableSkew.sql new file mode 100644 index 0000000..d5b2e84 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableSkew.sql @@ -0,0 +1,98 @@ +analyze pg_catalog.pg_class; +analyze pg_catalog.pg_namespace; +analyze pg_catalog.pgxc_class; +analyze pg_catalog.pg_statistic; +--sqlblock +DROP FUNCTION IF EXISTS PUBLIC.pgxc_analyzed_tuples() CASCADE; +--sqlblock +CREATE OR REPLACE FUNCTION PUBLIC.pgxc_analyzed_tuples +( + OUT schemaname text, + OUT tablename text, + OUT dn_name text, + OUT tuples real +) +RETURNS SETOF record +AS $$ +DECLARE + datanode_rd record; + fetch_tuples record; + fetch_dn text; + fetch_tuple_str text; + BEGIN + fetch_dn := 'SELECT node_name FROM pg_catalog.pgxc_node WHERE node_type=''D'' order by node_name'; + FOR datanode_rd IN EXECUTE(fetch_dn) LOOP + dn_name := datanode_rd.node_name; + fetch_tuple_str := 'EXECUTE DIRECT ON (' || dn_name || ') ''SELECT + n.nspname, + c.relname, + c.reltuples + FROM pg_catalog.pg_class c + INNER JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace + where c.oid >16384 AND c.relkind = ''''r'''' and n.nspname <> ''''cstore'''' and n.nspname <> ''''pmk'''' and n.nspname <> ''''pg_catalog'''' + '''; + FOR fetch_tuples IN EXECUTE(fetch_tuple_str) LOOP + tuples := fetch_tuples.reltuples; + schemaname := fetch_tuples.nspname; + tablename := fetch_tuples.relname; + return next; + END LOOP; + RAISE INFO 'Finished fetching stats info from DataNode % at %',dn_name, clock_timestamp(); + END LOOP; + return; + END; $$ +LANGUAGE 'plpgsql' +ROWS 1000000; +--sqlblock +DROP VIEW IF EXISTS PUBLIC.pgxc_analyzed_skewness; +--sqlblock +CREATE VIEW PUBLIC.pgxc_analyzed_skewness +AS +SELECT + schemaname, + tablename, + (min(ratio)::numeric(6,3)) AS ratio_min, + (max(ratio)::numeric(6,3) ) AS ratio_max, + (max(ratio) - min(ratio))::numeric(6,3) AS skewness_ratio, + ((max(ratio) - min(ratio)) * total_tuples / 100)::numeric(35) as skewness_tuple, + ((max(ratio) - min(ratio)) * relwidth * total_tuples / 100)::numeric(35) as skewness_size, + (stddev_samp(ratio)::numeric(6,3)) AS skewness_stddev +FROM +( + WITH udt AS + ( + SELECT + n.nspname AS schemaname, + c.relname AS tablename, + relwidth + FROM pg_catalog.pg_class c + INNER JOIN pg_catalog.pg_namespace n ON (n.oid = c.relnamespace) + INNER JOIN (SELECT sum(stawidth) as relwidth, starelid FROM pg_catalog.pg_statistic GROUP BY starelid)s ON s.starelid = c.oid + INNER JOIN pg_catalog.pgxc_class x ON c.oid = x.pcrelid + WHERE x.pclocatortype = 'H' AND c.reltuples > 500 + ) + + SELECT + schemaname, + tablename, + total_tuples, + relwidth, + (round(tuples/total_tuples, 4) * 100)AS ratio + FROM + ( + SELECT + t.schemaname, + t.tablename, + t.dn_name, + t.tuples, + relwidth, + sum(tuples) OVER (PARTITION BY t.schemaname, t.tablename) AS total_tuples + FROM PUBLIC.pgxc_analyzed_tuples() t + INNER JOIN udt u on (u.schemaname = t.schemaname and u.tablename = t.tablename) + ) +) +GROUP BY schemaname, tablename, total_tuples, relwidth; +--sqlblock +SELECT * FROM PUBLIC.pgxc_analyzed_skewness +WHERE skewness_tuple > 100000 +ORDER BY skewness_tuple DESC, skewness_ratio DESC, skewness_size DESC; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableUseRule.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableUseRule.sql new file mode 100644 index 0000000..64168b4 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableUseRule.sql @@ -0,0 +1,15 @@ +with oid2relname AS +( + SELECT + n.nspname AS schemaname, + c.relname AS tablename, + c.oid AS relid + FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +) +select + rulename, + schemaname, + tablename +FROM + pg_rewrite r inner join oid2relname on (ev_class = relid) +WHERE r.oid > 16384 and rulename != '_RETURN'; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableUseToGroup.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableUseToGroup.sql new file mode 100644 index 0000000..f14e957 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableUseToGroup.sql @@ -0,0 +1,16 @@ +with oid2relname AS +( + SELECT + n.nspname AS schemaname, + c.relname AS tablename, + c.oid AS relid + FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +) + +select + schemaname, + tablename, + pgroup as nodegroup +from pgxc_class +inner join oid2relname on (relid = pcrelid) +where pgroup in (select group_name from pgxc_group offset 1); \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableUseTonode.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableUseTonode.sql new file mode 100644 index 0000000..b1a0327 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableUseTonode.sql @@ -0,0 +1,16 @@ +with oid2relname AS +( + SELECT + n.nspname AS schemaname, + c.relname AS tablename, + c.oid AS relid + FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +) + +select + schemaname, + tablename, + pgroup as nodegroup +from pgxc_class +inner join oid2relname on (relid = pcrelid) +where pgroup is null; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportConstraint.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportConstraint.sql new file mode 100644 index 0000000..0f6d690 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportConstraint.sql @@ -0,0 +1,18 @@ +with oid2relname AS +( + SELECT + n.nspname AS schemaname, + c.relname AS tablename, + c.oid AS relid + FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +) + +select + n.schemaname, + n.tablename, + case when contype = 'f' then 'FOREIGN KEY CONSTRAINT' + when contype = 'x' then 'EXCLUSION CONSTRAINT' + when contype = 't' then 'TRIGGER CONSTRAINT' + end as contype +from pg_constraint c +inner join oid2relname n on (n.relid = c.confrelid); \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportIndex.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportIndex.sql new file mode 100644 index 0000000..2013db7 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportIndex.sql @@ -0,0 +1,5 @@ +select + schemaname, + tablename, + indexname +from pg_indexes where indexdef not like '%btree%' and indexdef not like '%psort%'; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportLocatortype.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportLocatortype.sql new file mode 100644 index 0000000..8f65119 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableUseUnsupportLocatortype.sql @@ -0,0 +1,24 @@ +with oid2relname AS +( + SELECT + n.nspname AS schemaname, + c.relname AS tablename, + c.oid AS relid + FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +) + +select + schemaname, + tablename, + case when pclocatortype = 'G' then 'RANGE' + when pclocatortype = 'N' then 'RROBIN' + when pclocatortype = 'N' then 'RROBIN' + when pclocatortype = 'C' then 'CUSTOM' + when pclocatortype = 'M' then 'MODULO' + when pclocatortype = 'O' then 'NONE' + when pclocatortype = 'D' then 'DISTRIBUTED' + end as locatortype +from pgxc_class +inner join oid2relname on (pcrelid = relid) +where pclocatortype not in ('R', 'H') and pcrelid not in (select oid from pg_class where relkind='f') and pcrelid not in (select oid from pg_class where reloptions::text like '%internal_mask=33029%') +; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTableWithOids.sql b/script/gspylib/inspection/lib/checkblacklist/GetTableWithOids.sql new file mode 100644 index 0000000..b325fdf --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTableWithOids.sql @@ -0,0 +1,6 @@ +SELECT + n.nspname AS schemaname, + c.relname AS tablename +FROM pg_class c +INNER JOIN pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r'::"char" and c.oid > 16384 and relhasoids = true; diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTable_ProcUseUnsupportDataType.sql b/script/gspylib/inspection/lib/checkblacklist/GetTable_ProcUseUnsupportDataType.sql new file mode 100644 index 0000000..3b95f12 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTable_ProcUseUnsupportDataType.sql @@ -0,0 +1,57 @@ +with oid2relname AS +( + SELECT + n.nspname AS schemaname, + c.relname AS tablename, + pg_get_userbyid(c.relowner) as relowner, + c.oid AS relid + FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace +) +, +oid2typename AS +( + SELECT + n.nspname AS typschema, + t.typname AS typname, + pg_get_userbyid(typowner) as typowner, + t.oid AS typoid + FROM pg_type t LEFT JOIN pg_namespace n ON t.typnamespace = n.oid +) + +select + schemaname, + tablename, + relowner, + typschema, + typname, + typowner +from pg_attribute +inner join oid2relname on (attrelid = relid) +inner join oid2typename on (typoid = atttypid) +where (atttypid in (628, 629, 142, 194) or (typname like '%reg%') or atttypid > 16384) +and attrelid > 16384 +; + +with oid2typename AS +( + SELECT + n.nspname AS typschema, + t.typname AS typname, + pg_get_userbyid(typowner) as typowner, + t.oid AS typoid + FROM pg_type t LEFT JOIN pg_namespace n ON t.typnamespace = n.oid +) + +select + n.nspname AS proschema, + p.proname AS proname, + pg_get_userbyid(proowner) as proowner, + typschema, + typname, + typowner +from pg_proc p +INNER JOIN pg_namespace n ON n.oid = p.pronamespace +INNER JOIN oid2typename ON (typoid = prorettype or typoid = any(proargtypes)) +where (typoid in (628, 629, 142, 194) or (typname like '%reg%') or typoid > 16384) +and p.oid > 16384 +; diff --git a/script/gspylib/inspection/lib/checkblacklist/GetTable_unsupportHDFSForeignTable.sql b/script/gspylib/inspection/lib/checkblacklist/GetTable_unsupportHDFSForeignTable.sql new file mode 100644 index 0000000..5ce00ab --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetTable_unsupportHDFSForeignTable.sql @@ -0,0 +1,16 @@ +with f_table as +( + select + ftrelid + from pg_foreign_table t + inner join pg_foreign_server s on( t.ftserver = s.oid and s. srvoptions is not null) +) + +select + b.nspname, + a.relname, + pg_get_userbyid(a.relowner) as relowner +from pg_class a +inner join pg_namespace b on a. relnamespace= b.oid +inner join f_table c on a.oid = c.ftrelid +; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetThirdPartExtension.sql b/script/gspylib/inspection/lib/checkblacklist/GetThirdPartExtension.sql new file mode 100644 index 0000000..e32e0d0 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetThirdPartExtension.sql @@ -0,0 +1,7 @@ +select + n.nspname as nspname, + extname as extensionname, + pg_get_userbyid(extowner) +FROM pg_extension e +left join pg_namespace n on (n.oid = e.extnamespace) +where e.oid > 16384 \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedAggregate.sql b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedAggregate.sql new file mode 100644 index 0000000..587260b --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedAggregate.sql @@ -0,0 +1,6 @@ +SELECT + n.nspname AS schemaname, + p.proname AS proname +FROM pg_catalog.pg_proc p +LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace +where p.oid > 16384 and proisagg; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedConversion.sql b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedConversion.sql new file mode 100644 index 0000000..d682e46 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedConversion.sql @@ -0,0 +1,7 @@ +select + n.nspname, + c.conname, + pg_get_userbyid(conowner) as conowner +from pg_conversion c +left join pg_namespace n on (c.connamespace = n.oid) +where c.oid > 16384; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedDataType.sql b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedDataType.sql new file mode 100644 index 0000000..5f462bd --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedDataType.sql @@ -0,0 +1,19 @@ +with oid2typename AS +( + SELECT + n.nspname AS typschema, + t.typname AS typname, + pg_get_userbyid(typowner) as typowner, + t.oid AS typoid, + typrelid + FROM pg_type t LEFT JOIN pg_namespace n ON t.typnamespace = n.oid + where typoid > 16384 and typcategory = 'U' +) + +select + typschema, + typname, + typowner, + typoid, + typrelid +from oid2typename; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedLanguage.sql b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedLanguage.sql new file mode 100644 index 0000000..071cd77 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedLanguage.sql @@ -0,0 +1 @@ +select lanname from pg_language where oid > 16384; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedNodeGroup.sql b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedNodeGroup.sql new file mode 100644 index 0000000..87c44c3 --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/GetUserDefinedNodeGroup.sql @@ -0,0 +1,5 @@ +select + group_name, + group_members, + in_redistribution +from pgxc_group \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkblacklist/Note.txt b/script/gspylib/inspection/lib/checkblacklist/Note.txt new file mode 100644 index 0000000..ef4af2c --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/Note.txt @@ -0,0 +1,29 @@ +-- +---- ǰò +-- +1. Ҫѵǰļ DBCheck Coordinatorclusterڵ +2. ļ DBCheck +3. ļڵ DBCheck.sh ִȨ chmod +x DBCheck.sh +4. ļڵ ExecuteSQLOnAllDB.sh ִȨ chmod +x ExecuteSQLOnAllDB.sh +5. ʼsource /opt/huawei/Bigdata/mppdb/.mppdbgs_profile +6. ȷϵǰڵCoordinatorĶ˿ڣ˴趨Ϊ25308,(ֳҪʵ޸) +7. ִв./DBCheck.sh -p 25308 + + +-- +----߽ +-- +1. ִ./ExecuteSQLOnAllDB.sh -f blacklist_check.sql -p 25308 + +2. һִе + a) һ checkitem + b) ڶ result ݽ + c) expected Ԥ + d) status ͨΪSUCESSʧΪFAILEDΪFAILEDôҪӦĵеĽűһȡϢ + d) failed_process_script checkʧʱһȡϢĽű + +3. ⵽кʧ֮󣬻ԶõѯϢ + + + + diff --git a/script/gspylib/inspection/lib/checkblacklist/blacklist_check.sql b/script/gspylib/inspection/lib/checkblacklist/blacklist_check.sql new file mode 100644 index 0000000..dafdd1c --- /dev/null +++ b/script/gspylib/inspection/lib/checkblacklist/blacklist_check.sql @@ -0,0 +1,263 @@ +With BlacklistCheck As +( +select + 'BlacklistCheck - unsupported synx - inherit table'as CheckItem, + count(1) as Result , + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetInheritTable.sql' as Failed_Process_Script +from pg_inherits + +union all + +select + 'BlacklistCheck - unsupported synx - create table with oids' as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableWithOids.sql' as Failed_Process_Script +from pg_class where oid > 16384 and relhasoids = true + +union all + +select + 'BlacklistCheck - unsupported synx - foreign key constraint' as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableUseUnsupportConstraint.sql' as Failed_Process_Script +from pg_constraint where contype = 'f' + +union all + +select + 'BlacklistCheck - unsupported synx - exclusion constraint'as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableUseUnsupportConstraint.sql' as Failed_Process_Script +from pg_constraint where contype = 'x' + +union all + +select + 'BlacklistCheck - unsupported synx - trigger constraint' as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableUseUnsupportConstraint.sql' as Failed_Process_Script +from pg_constraint where contype = 't' + +union all + +select + 'BlacklistCheck - unsupported synx - unsupported indexs 'as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableUseUnsupportIndex.sql' as Failed_Process_Script +from pg_indexes where indexdef not like '%btree%' and indexdef not like '%psort%' + +union all + +select + 'BlacklistCheck - unsupported synx - unsupported locator type 'as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableUseUnsupportLocatortype.sql' as Failed_Process_Script +from pgxc_class where pclocatortype not in ('R', 'H') and pcrelid not in (select oid from pg_class where relkind='f') and pcrelid not in (select oid from pg_class where reloptions::text like '%internal_mask=33029%') + +union all + +select + 'BlacklistCheck - unsupported synx - sequence' as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetSequenceName.sql' as Failed_Process_Script +from pg_class where relkind = 'S' + +union all + +SELECT + 'BlacklistCheck - unsupported synx - to group' as CheckItem, + (case when count(distinct pgroup) < 2 then 1 else count(distinct pgroup) end) as Result, + '1' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableUseToGroup.sql' as Failed_Process_Script +FROM pgxc_class + +union all + +SELECT + 'BlacklistCheck - unsupported synx - to node' as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableUseTonode.sql' as Failed_Process_Script +FROM pgxc_class where pgroup is null + +union all + +SELECT + 'BlacklistCheck - unsupported synx - create extension' as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetThirdPartExtension.sql' as Failed_Process_Script +FROM pg_extension where oid > 16384 + +union all + +SELECT + 'BlacklistCheck - unsupported synx - create rule' as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTableUseRule.sql' as Failed_Process_Script +from pg_rewrite where oid > 16384 and rulename != '_RETURN' + +union all + +SELECT + 'BlacklistCheck - unsupported synx - create language 'as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetUserDefinedLanguage.sql' as Failed_Process_Script +from pg_language where oid > 16384 + +union all + +select + 'BlacklistCheck - unsupported datatype - line 'as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTable_ProcUseUnsupportDataType.sql' as Failed_Process_Script +from pg_attribute where atttypid in (628, 629) + +union all + +select + 'BlacklistCheck - unsupported datatype - xml 'as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTable_ProcUseUnsupportDataType.sql' as Failed_Process_Script +from pg_attribute where atttypid in (142) + +union all + +SELECT + 'BlacklistCheck - unsupported datatype - reg* 'as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTable_ProcUseUnsupportDataType.sql' as Failed_Process_Script +from pg_catalog.pg_attribute where attisdropped = false and atttypid in (select oid from pg_type where typname like '%reg%' and typrelid > 0) and attrelid >16384 + +union all + +select + 'BlacklistCheck - unsupported datatype - pg_node_tree 'as CheckItem, + count(1) as Result, + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTable_ProcUseUnsupportDataType.sql' as Failed_Process_Script +from pg_attribute where atttypid in (194) and attrelid > 16387 + +union all + +SELECT + 'BlacklistCheck - unsupported datatype - user defined type 'as CheckItem, + count(1) as Result , + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTable_ProcUseUnsupportDataType.sql' as Failed_Process_Script +FROM pg_catalog.pg_attribute where atttypid > 16384 + +union all + +SELECT + 'BlacklistCheck - unsupported table - HDFS foreign table'as CheckItem, + count(1) as Result , + '0' as Expected, + ARRAY['V100R005C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetTable_unsupportHDFSForeignTable.sql' as Failed_Process_Script +FROM pg_class a, pg_namespace b, (select ftrelid from pg_foreign_table t, pg_foreign_server s where t.ftserver = s.oid and s. srvoptions is not null) c +where a.oid = c.ftrelid and a. relnamespace= b.oid + +union all + +SELECT + 'BlacklistCheck - unsupported synx - user defined aggregate 'as CheckItem, + count(1) as Result , + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetUserDefinedAggregate.sql' as Failed_Process_Script +FROM pg_catalog.pg_proc where proisagg and oid > 16384 + +union all + +SELECT + 'BlacklistCheck - unsupported synx - user defined conversion 'as CheckItem, + count(1) as Result , + '0' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetUserDefinedConversion.sql' as Failed_Process_Script +FROM pg_catalog.pg_conversion where oid > 16384 + +union all + +SELECT + 'BlacklistCheck - unsupported synx - user defined nodegroup'as CheckItem, + count(1) as Result , + '1' as Expected, + ARRAY['V100R005C10','V100R006C00','V100R006C10'] as Version, + ARRAY['upgrade','dilatation','replace'] as Action, + 'GetUserDefinedNodeGroup.sql' as Failed_Process_Script +FROM pg_catalog.pgxc_group +), + +t as +( + select + CheckItem, + Result, + Expected, + CASE WHEN Result=Expected THEN 'SUCCESS' ELSE 'FAILED' END as Status, + Failed_Process_Script + from BlacklistCheck + order by CheckItem +) +select + CheckItem, + Result, + Expected, + Status, + Failed_Process_Script +from t +; diff --git a/script/gspylib/inspection/lib/checkcollector/databaseinfo.sql b/script/gspylib/inspection/lib/checkcollector/databaseinfo.sql new file mode 100644 index 0000000..df8edb5 --- /dev/null +++ b/script/gspylib/inspection/lib/checkcollector/databaseinfo.sql @@ -0,0 +1,15 @@ +select 'table count:'||count(1) as point from pg_class where relkind = 'r' and oid > 16384; +select 'foreign_table count:'||count(1) as point from pg_foreign_table; +select 'view count:'||count(1) as point from pg_class where relkind = 'v' and oid > 16384; +select 'index count:'||count(1) as point from pg_class where relkind = 'i' and oid > 16384; +select 'tablespace count:'||count(1)-2 as point from pg_tablespace; +select 'database count:'||count(1)-2 as point from pg_database; +select 'unlogged table count:'||count(*) as point from pg_class where relkind='r' and relpersistence='u'; +select 'schema count:'||count(1) -9 as point from pg_namespace; +select 'partition table count:'||count(1) as point from DBA_PART_TABLES; +select 'all partition count:'||sum(partition_count) as point from DBA_PART_TABLES; +select 'max part_table partition count:'||max(partition_count) as point from DBA_PART_TABLES; +select 'row count:'||count(1) as point from pg_class where relkind = 'r' and oid > 16384 and reloptions::text not like '%column%' and reloptions::text not like '%internal_mask%'; +select 'column count:'||count(1) as point from pg_class where relkind = 'r' and oid > 16384 and reloptions::text like '%column%'; +select 'function count:'||count(1)-2943 as point from pg_proc; + diff --git a/script/gspylib/inspection/lib/checkcollector/dndatabaseinfo.sql b/script/gspylib/inspection/lib/checkcollector/dndatabaseinfo.sql new file mode 100644 index 0000000..d8f97a9 --- /dev/null +++ b/script/gspylib/inspection/lib/checkcollector/dndatabaseinfo.sql @@ -0,0 +1,4 @@ +select sysdate, * from pv_total_memory_detail; +select sysdate, * from pv_session_memory_detail order by totalsize desc limit 20; +select sysdate,* from pg_shared_memory_detail; +select * from pg_stat_activity where state='active'; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checkcollector/getClusterInfo.sh b/script/gspylib/inspection/lib/checkcollector/getClusterInfo.sh new file mode 100644 index 0000000..4371814 --- /dev/null +++ b/script/gspylib/inspection/lib/checkcollector/getClusterInfo.sh @@ -0,0 +1,252 @@ +#!/bin/bash +#Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +#openGauss is licensed under Mulan PSL v2. +#You can use this software according to the terms and conditions of the Mulan PSL v2. +#You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +#------------------------------------------------------------------------- +# +# getClusterInfo.sh +# +# IDENTIFICATION +# src/manager/om/script/gspylib/inspection/lib/checkcollector/getClusterInfo.sh +# +#------------------------------------------------------------------------- + +file_pwd=$(cd $(dirname $0);pwd) +log_dir=$file_pwd/out +PORT='' + +#Base check +if [ $(whoami) == omm ]; then + echo "current user is omm" +else + echo "Please run the scripts as omm user" + exit 0 +fi + +if [ ! -f $file_pwd/databaseinfo.sql ] || [ ! -f $file_pwd/hostfile ];then + echo "Make sure the file \"databaseinfo.sql\" and \"hostfile\" exists in the current path" + exit 0 +fi + +mkdir -p $log_dir + +function usage() +{ + echo "***********************************************************************" + echo "* getClusterInfo.sh usage *" + echo "* -p: coordinator port number *" + echo "* example: ./getClusterInfo.sh -p 25308 *" + echo "***********************************************************************" +} + +function parse_para() +{ + while getopts "p:h" opt + do + case $opt in + p) + if [ -z $PORT ]; then + let PORT=$OPTARG + else + echo "ERROR: duplicate port number" + usage + exit 1 + fi + ;; + h) + usage + exit 1 + ;; + ?) + echo "ERROR: unkonw argument" + usage + exit 1 + ;; + esac + done + + if [ -z $PORT ]; then + echo "ERROR: must designate -p" + usage + exit 1 + fi +} + + + +#Get os info +function os_info_for_remode_host() +{ + echo "CPU info" | tee $log_dir/sysinfo_$host + ssh $host "cat /proc/cpuinfo" >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Memory info" | tee -a $log_dir/sysinfo_$host + ssh $host "/usr/bin/free -g" >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Disk info" | tee -a $log_dir/sysinfo_$host + ssh $host "/bin/df -lh" >> $log_dir/sysinfo_$host + ssh $host "/bin/mount" >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Network info" | tee -a $log_dir/sysinfo_$host + ssh $host "/sbin/ifconfig" >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Swap info" | tee -a $log_dir/sysinfo_$host + ssh $host "/usr/bin/free | grep Swap" >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "OS info" | tee -a $log_dir/sysinfo_$host + ssh $host "/usr/bin/lsb_release -a" >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Os parameter info" | tee -a $log_dir/sysinfo_$host + ssh $host "/sbin/sysctl -a" >> $log_dir/sysinfo_$host 2>&1 + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Cluster info" | tee -a $log_dir/sysinfo_$host + ssh $host "source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile;gs_om -t status --detail" >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Directory of cluster info" | tee -a $log_dir/sysinfo_$host + ssh $host "ps -ef |grep gaussdb|grep /|grep -v grep" >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "MPPDB info" | tee -a $log_dir/sysinfo_$host + ssh $host "source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile;gaussdb -V" >> $log_dir/sysinfo_$host +} + +#Get os info +function os_info_for_local_host() +{ + echo "CPU info" | tee $log_dir/sysinfo_$host + cat /proc/cpuinfo >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Memory info" | tee -a $log_dir/sysinfo_$host + /usr/bin/free -g >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Disk info" | tee -a $log_dir/sysinfo_$host + /bin/df -lh >> $log_dir/sysinfo_$host + /bin/mount >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Network info" | tee -a $log_dir/sysinfo_$host + /sbin/ifconfig >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Swap info" | tee -a $log_dir/sysinfo_$host + /usr/bin/free | grep Swap >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "OS info" | tee -a $log_dir/sysinfo_$host + /usr/bin/lsb_release -a >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Os parameter info" | tee -a $log_dir/sysinfo_$host + /sbin/sysctl -a >> $log_dir/sysinfo_$host 2>&1 + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Cluster info" | tee -a $log_dir/sysinfo_$host + source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile;gs_om -t status --detail >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "Directory of cluster info" | tee -a $log_dir/sysinfo_$host + ps -ef |grep gaussdb|grep /|grep -v grep >> $log_dir/sysinfo_$host + echo "========================================================================================" >> $log_dir/sysinfo_$host + echo | tee -a $log_dir/sysinfo_$host + echo "MPPDB info" | tee -a $log_dir/sysinfo_$host + source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile;gaussdb -V >> $log_dir/sysinfo_$host +} + +#Get the database info +function database_info() +{ + echo + echo "Database info" + echo + source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile + for db in $(gsql -d postgres -p $PORT -c "select datname||' DB' from pg_database where datname != 'template1' and datname != 'template0'" | grep DB | awk '{print $1}') + do + echo "database name:$db" | tee $log_dir/db_$db.log + gsql -d postgres -p $PORT -A -c "select 'database $db size:'||pg_size_pretty(pg_database_size(:db));" -v db="'$db'" | grep -v "column"| grep -v "row)" | tee -a $log_dir/db_$db.log + gsql -d $db -p $PORT -A -f $file_pwd/databaseinfo.sql | grep -v point | grep -v "row)"| grep -v "rows)" >> $log_dir/db_$db.log + echo + done +} + +#Get the cluster config file +function cluster_config_for_remote_host() +{ + source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile + tmp_file=/tmp/conf$(date "+%H%M%S") + echo ''>$tmp_file + #collect the config file information + for dirname in $(gs_om -t status --detail | grep "/" | awk '{ print $5}' | sort -u) + do + for filename in $(ssh $host "ls $dirname/*.conf" 2>/dev/null) + do + echo $filename >> $tmp_file + done + done + #copy the config file + for conf_file in $(sort -u $tmp_file) + do + path=${conf_file%/*} + instance=${path##*/} + scp $host:$conf_file $log_dir/${host}_${instance}_${conf_file##*/} + done +} + +#Get the cluster config file +function cluster_config_for_local_host() +{ + source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile + tmp_file=/tmp/conf$(date "+%H%M%S") + echo ''>$tmp_file + #collect the config file information + for dirname in $(gs_om -t status --detail | grep "/" | awk '{ print $5}' | sort -u) + do + for filename in $(ls $dirname/*.conf 2>/dev/null) + do + echo $filename >> $tmp_file + done + done + #copy the config file + for conf_file in $(sort -u $tmp_file) + do + path=${conf_file%/*} + instance=${path##*/} + cp $conf_file $log_dir/${host}_${instance}_${conf_file##*/} + done +} + +#obtain cn port +parse_para $* +#get information of all +for host in $(cat $file_pwd/hostfile) +do + if [ $host == $(hostname) ]; + then + os_info_for_local_host + echo "get cluster config file" + cluster_config_for_local_host + else + os_info_for_remode_host + echo "get cluster config file" + cluster_config_for_remote_host + fi +done + database_info diff --git a/script/gspylib/inspection/lib/checkcollector/getOSInfo.sh b/script/gspylib/inspection/lib/checkcollector/getOSInfo.sh new file mode 100644 index 0000000..cb32cea --- /dev/null +++ b/script/gspylib/inspection/lib/checkcollector/getOSInfo.sh @@ -0,0 +1,65 @@ +#!/bin/bash +#Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +#openGauss is licensed under Mulan PSL v2. +#You can use this software according to the terms and conditions of the Mulan PSL v2. +#You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +#------------------------------------------------------------------------- +# +# getOSInfo.sh +# +# IDENTIFICATION +# src/manager/om/script/gspylib/inspection/lib/checkcollector/getOSInfo.sh +# +#------------------------------------------------------------------------- + +file_pwd=$(cd $(dirname $0);pwd) +log_dir=$file_pwd/out +PORT=25308 + +#Base check +if [ $(whoami) == root ]; then + echo "current user is root" +else + echo "Please run the scripts as root user" + exit 0 +fi + +if [ ! -f $file_pwd/hostfile ];then + echo "Make sure the file \"$file_pwd/hostfile\" exists !!!" + exit 0 +fi + +mkdir -p $log_dir + +#check command +#get OS info +function os_info() +{ + echo $host + #firewall info + echo "Get firewall info" | tee $log_dir/${host}_osinfo.log + if [ $(cat /etc/*release| grep SUSE|wc -l) -gt 0 ];then + echo "OS is SUSE" | tee -a $log_dir/${host}_osinfo.log + echo "filewall info" | tee -a $log_dir/${host}_osinfo.log + ssh $host "/sbin/SuSEfirewall2 status" >> $log_dir/${host}_osinfo.log 2>&1 + elif [ $(cat /etc/*release| grep -E 'REDHAT|Red Hat'|wc -l) -gt 0 ];then + echo "OS is REDHAT" | tee -a $log_dir/${host}_osinfo.log + echo "filewall info" | tee -a $log_dir/${host}_osinfo.log + ssh $host "service iptables status" >> $log_dir/${host}_osinfo.log 2>&1 + else + echo "ERROR: NOT SUPPORT OS !!!" + fi +} + +for host in $(cat $file_pwd/hostfile) +do + os_info +done diff --git a/script/gspylib/inspection/lib/checkcollector/hostfile b/script/gspylib/inspection/lib/checkcollector/hostfile new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/inspection/lib/checkcollector/readme.txt b/script/gspylib/inspection/lib/checkcollector/readme.txt new file mode 100644 index 0000000..59976a0 --- /dev/null +++ b/script/gspylib/inspection/lib/checkcollector/readme.txt @@ -0,0 +1,12 @@ +ʹ˵ +1.cluster_checkűϴһ̨װcnĽڵϣȷcluster_check·ommûдȨޡ +2.getClusterInfo.shűʹommûִУgetOSInfo.shűʹrootûִУҪнڵroot롣 +3.ȷнűgetClusterInfo.shPORT=25308˿ǷȷȷҪ޸ΪȷݿӶ˿ڡ + +4.õǰ·hostfileļӼȺIPַ + +5.л·鹤ڵ·ִ +sh getClusterInfo.sh + +6.ɵĽᱣڽű·µoutļ档 + diff --git a/script/gspylib/inspection/lib/checkcreateview/check_viewdef.sql b/script/gspylib/inspection/lib/checkcreateview/check_viewdef.sql new file mode 100644 index 0000000..e6c61b5 --- /dev/null +++ b/script/gspylib/inspection/lib/checkcreateview/check_viewdef.sql @@ -0,0 +1,5 @@ +SELECT + 'EXPLAIN SELECT * FROM(' || rtrim(pg_catalog.pg_get_viewdef(c.oid), ';') || ') AS "' || n.nspname || '.' || c.relname || '";' +FROM pg_class c +LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace AND n.nspname NOT IN('pg_toast', 'pg_catalog', 'information_schema', 'cstore')) +WHERE c.relkind = 'v'::"char" and c.oid > 16384; \ No newline at end of file diff --git a/script/gspylib/inspection/lib/checknetspeed/speed_test b/script/gspylib/inspection/lib/checknetspeed/speed_test new file mode 100644 index 0000000..3b969d4 --- /dev/null +++ b/script/gspylib/inspection/lib/checknetspeed/speed_test @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import socket +import time +import sys + +listen_ip = "localhost" +listen_port = 31111 +run_mode = 0 # 0:connect, 1:send, 2:recv + +def send_main(): + try: + global listen_ip + global listen_port + buf = "this is a test !" * 512 # buf 8192 block + sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + print(listen_ip+":"+listen_port) + while(sockets.connect_ex((listen_ip, int(listen_port))) != 0): + print("connect failed:%m\n") + time.sleep(1) + print("connect succeed, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp") + print("send satrt, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp") + i = 0 + while True: + i = i + 1 + n = sockets.send(buf.encode()) + if n == 0: + print("send failed:%m\n") + break + print("%d send:%s, len=%d\n", i, buf, n) + except Exception as e: + print(str(e)) + +def recv_main(): + try: + global listen_ip + global listen_port + sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sockets.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True) + sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + sockets.bind((listen_ip, int(listen_port))) + sockets.listen(128) + while True: + client, addr = sockets.accept() + print('client:', client) + print('addr:', addr) + while True: + data = client.recv(8192) + print(data.decode()) + if not data: + client.close() + break + except Exception as e: + print(str(e)) + +def connect_main(): + sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + if sockets.connect_ex((listen_ip, int(listen_port))) != 0: + print("Failed to connect %s:%d on %s mode:%m.\n", + listen_ip, listen_port, "tcp") + else: + print("Succeed to connect %s:%d on %s mode.\n", + listen_ip, listen_port, "tcp") + +def invalid_argument(): + print("usage: ./speed_test recv/send/connect recv_ip " + "recv_port [sctp] [msg_len] [debug]\n") + print("example: ./speed_test recv 127.0.0.1 10001\n") + exit(1) + +if __name__ == '__main__': + if len(sys.argv) < 5: + invalid_argument() + if sys.argv[1] == "send": + run_mode = 1 + elif sys.argv[1] == "recv": + run_mode = 2 + listen_ip = sys.argv[2] + listen_port = sys.argv[3] + if run_mode == 1: + send_main() + elif run_mode == 2: + recv_main() + else: + connect_main() diff --git a/script/gspylib/inspection/readme.txt b/script/gspylib/inspection/readme.txt new file mode 100644 index 0000000..6166ded --- /dev/null +++ b/script/gspylib/inspection/readme.txt @@ -0,0 +1,52 @@ +1、在集群中选定一个含有cn的节点; + +2、准备一个名为hostfile的文本文件,文件内容为集群所有数据节点的hostname列表(一行一个hostname),并将该文件上传到/home/omm/目录下 + 若要进行扩容新节点检查,hostfile中应包含所有扩容新节点IP + 若要检查扩容新老节点一致性,hostfile中应同时包含新节点和老节点 + +3、登录该CN节点,用omm用户执行下边命令: + su - omm + source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile + for i in `cat /home/omm/hostfile`;do ssh $i "hostname;rm -rf /home/omm/test_check" ;done + for i in `cat /home/omm/hostfile`;do ssh $i "hostname;mkdir /home/omm/test_check" ;done + +4、将Check包解压后,用omm用户上传至该CN节点的/home/omm/test_check目录下, + +5、修改test_check目录及目录下文件的属主为omm + + chown -R omm:wheel /home/omm/test_check/Check/ + +6、给检查脚本赋予执行权限 + + cd /home/omm/test_check + + chmod +x -R /home/omm/test_check/ + + +7、执行脚本,进行集群检查、信息收集 + + for i in `cat /home/omm/hostfile`;do scp -r /home/omm/test_check/* $i:/home/omm/test_check/;done + + cd /home/omm/test_check/Check + + source /opt/huawei/Bigdata/mppdb/.mppdbgs_profile + + 若要巡检集群,则使用如下命令 + ./gs_check -e inspect -U omm + + 若要进行升级前检查,则使用如下命令 + ./gs_check -e upgrade -U omm + + 若要进行扩容前检查,则使用如下命令 + ./gs_check -e expand -U omm + + 若要进行新节点检查,则使用如下命令 + 1. 切换到root或有root权限的用户 + 2. 执行 ./gs_check -e expand_new_node --hosts=/home/omm/hostfile + + 若要进行信息收集,则使用如下命令 + ./gs_check -i CheckCollector -L + + 注意:上述检查命令执行时中间若涉及到root检查项,会提示输入root密码 + +8、将/home/omm/test_check/Check/inspection目录下的output文件夹压缩后传回。 \ No newline at end of file diff --git a/script/gspylib/os/__init__.py b/script/gspylib/os/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/os/gsOSlib.py b/script/gspylib/os/gsOSlib.py new file mode 100644 index 0000000..66ec04f --- /dev/null +++ b/script/gspylib/os/gsOSlib.py @@ -0,0 +1,562 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +""" The following platform framework is used to handle any differences between + the platform's we support. The GenericPlatform class is the base class + that a supported platform extends from and overrides any of the methods + as necessary. +""" + +import os +import sys +import subprocess +import pwd +import grp + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.VersionInfo import VersionInfo +from gspylib.os.gsplatform import g_Platform, findCmdInPath +from gspylib.os.gsfile import g_file + +sys.path.append(sys.path[0] + "/../../../lib") +import psutil + + +class PlatformCommand(): + """ + Command for os + """ + + def __init__(self): + """ + function : init function + input : NA + output : NA + """ + pass + + def getDate(self): + """ + function : Get current system time + input : NA + output: String + """ + dateCmd = g_Platform.getDateCmd() + " -R " + (status, output) = subprocess.getstatusoutput(dateCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "date" + + "The cmd is %s" % dateCmd) + return output + + def getAllCrontab(self): + """ + function : Get the crontab + input : NA + output: status, output + """ + cmd = g_Platform.getAllCrontabCmd() + (status, output) = subprocess.getstatusoutput(cmd) + if output.find("no crontab for") >= 0: + output = "" + status = 0 + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + "crontab list" + " Error:%s." % output + + "The cmd is %s" % cmd) + return status, output + + def execCrontab(self, path): + """ + function : Get the crontab + input : string + output: True or False + """ + if not os.path.exists(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + filePath = os.path.dirname(path) + fileName = os.path.basename(path) + cmd = g_Platform.getCdCmd(filePath) + cmd += " && " + cmd += g_Platform.getCrontabCmd() + cmd += (" ./%s" % fileName) + cmd += " && %s" % g_Platform.getCdCmd("-") + # if cmd failed, then exit + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + return True + + def source(self, path): + """ + function : Get the source + input : string + output: True or False + """ + cmd = g_Platform.getSourceCmd() + cmd += " %s" % path + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + return True + + def getGrepValue(self, para="", value="", path=""): + """ + function : grep value + input : string,value,path + output: status, output + """ + if not os.path.exists(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + cmd = g_Platform.getGrepCmd() + " %s '%s' '%s'" % (para, value, path) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + return status, output + + def getHostName(self): + """ + function : Get host name + input : NA + output: string + """ + hostCmd = findCmdInPath("hostname") + (status, output) = subprocess.getstatusoutput(hostCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "host name" + + "The cmd is %s" % hostCmd) + return output + + def getSysConfiguration(self): + """ + function : The size range of PAGE_SIZE obtained by getconf + input : NA + output: string + """ + configCmd = g_Platform.getGetConfValueCmd() + (status, output) = subprocess.getstatusoutput(configCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + "system config pagesize" + + "The cmd is %s" % configCmd) + return output + + def getUserLimits(self, limitType): + """ + function : Get current user process limits + input : string + output: string + """ + limit = g_Platform.getUlimitCmd() + limitCmd = "%s -a | %s -F '%s'" % (limit, g_Platform.getGrepCmd(), + limitType) + (status, output) = subprocess.getstatusoutput(limitCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % limitCmd + + " Error:\n%s" % output) + return output + + def chageExpiryInformation(self, user): + """ + function : Query user password expiration time + input : user + output: True or False + """ + changeTemp = g_Platform.getPasswordExpiresCmd(user) + changeCmd = "%s | %s -i '^Password expires'" % \ + (changeTemp, g_Platform.getGrepCmd()) + (status, output) = subprocess.getstatusoutput(changeCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % changeCmd + + " Error:\n%s" % output) + + expireTime = output.split(":")[1] + if expireTime.find("never") == 1: + return False + else: + return True + + def getIOStat(self): + """ + function : Get device IO information + input : NA + output: string + """ + ioStatCmd = g_Platform.getIOStatCmd() + (status, output) = subprocess.getstatusoutput(ioStatCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + "IO information" + "The cmd is %s" % ioStatCmd) + return output + + def scpFile(self, ip, sourcePath, targetPath, copyTo=True): + """ + function : if copyTo is True, scp files to remote host else, + scp files to local host + input : destination host ip + source path + target path + copyTo + output: NA + """ + scpCmd = "" + if os.path.isdir(sourcePath): + scpCmd = g_Platform.getRemoteCopyCmd(sourcePath, targetPath, ip, + copyTo, "directory") + elif os.path.exists(sourcePath): + scpCmd = g_Platform.getRemoteCopyCmd(sourcePath, targetPath, ip, + copyTo) + + (status, output) = subprocess.getstatusoutput(scpCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % scpCmd + + " Error:\n%s" % output) + + def getLocaleInfo(self, para): + """ + function : Get OS character set information + input : para + output: string + """ + localCmd = "%s | grep '^%s='" % (g_Platform.getLocaleCmd(), para) + (status, output) = subprocess.getstatusoutput(localCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % localCmd + + " Error:\n%s" % output) + return output.split("=")[1][1:-1] + + def mangerSysMode(self, operateType, module): + """ + type: list --list system module + load --load system module + insert --insert system module by force + remove --remove system module + dep --generate modules.dep and map files + """ + modCmd = g_Platform.getSysModManagementCmd(operateType, module) + (status, output) = subprocess.getstatusoutput(modCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(str(output) + " The cmd is %s" % modCmd) + + def getSshCommand(self, ip, cmd): + """ + function : Get ssh command + input : null + output : exe_cmd + """ + exe_cmd = "%s \"%s\"" % (g_Platform.getSshCmd(ip), cmd) + return exe_cmd + + def getProcess(self, processKeywords): + """ + function : Get process id by keywords + input : processKeywords + output : processId + """ + processId = [] + cmd = g_Platform.getProcessIdByKeyWordsCmd(processKeywords) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and str(output.strip()) != "": + # kill process + processId = output.strip().split("\n") + return processId + + def getProcPidList(self, procName): + """ + function : Get process id by procName + input : procName + output : pidList + """ + pidList = [] + for pid in psutil.pids(): + try: + p = psutil.Process(pid) + if procName == p.name(): + pidList.append(pid) + except psutil.NoSuchProcess: + pass + return pidList + + def killProcessByProcName(self, procName, killType=2): + """ + function : Kill the process + input : int, int + output : boolean + """ + try: + pidList = self.getProcPidList(procName) + for pid in pidList: + os.kill(pid, killType) + return True + except Exception: + return False + + def killallProcess(self, userName, procName, killType='2'): + """ + function : Kill all processes by userName and procName. + input : userName, procName, killType + output : boolean + """ + cmd = "%s >/dev/null 2>&1" % g_Platform.getKillallProcessCmd(killType, + userName, + procName) + status = subprocess.getstatusoutput(cmd)[0] + if status != 0: + return False + return True + + def cleanCommunicationStatus(self, user): + """ + function : clean semaphore + input : user + output : Successful return True,otherwise return false + """ + cmd = g_Platform.getDeleteSemaphoreCmd(user) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + return True + else: + raise Exception(ErrorCode.GAUSS_504["GAUSS_50407"] + + " Error: \n%s." % str(output) + + "The cmd is %s" % cmd) + + def getUserInfo(self): + """ + function : Get user information + input : null + output : userInfo + """ + userInfo = {"uid": os.getuid(), "name": pwd.getpwuid( + os.getuid()).pw_name, + "gid": pwd.getpwuid(os.getuid()).pw_gid} + userInfo["g_name"] = grp.getgrgid(userInfo["gid"]).gr_name + + return userInfo + + def getDeviceIoctls(self, devName): + """ + function : Get device ioctls + input : devName device name + output : blockSize + """ + blockSize = 0 + cmd = g_Platform.getBlockdevCmd(devName) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_504["GAUSS_50408"] % cmd + + " Error: \n%s" % str(output)) + if str(output.strip()) != "" and output.isdigit(): + blockSize = int(output) + return blockSize + + def addUser(self, userName, groupName): + """ + function : Add the user + input : userName + : groupName + output : Successful return True,otherwise return false + """ + cmd = g_Platform.getUseraddCmd(userName, groupName) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + return True + else: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50318"] % userName + + " Error: \n%s." % str(output) + + "The cmd is %s" % cmd) + + def delUser(self, userName): + """ + function : Delete the user + input : userName + output : Successful return True,otherwise return false + """ + cmd = g_Platform.getUserdelCmd(userName) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + return True + else: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50314"] % userName + + " Error: \n%s." % str(output) + + "The cmd is %s" % cmd) + + def addGroup(self, groupName): + """ + function : Add the group + input : groupName + output : Successful return True,otherwise return false + """ + cmd = g_Platform.getGroupaddCmd(groupName) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + return True + else: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50319"] % groupName + + " Error: \n%s." % str(output) + + "The cmd is %s" % cmd) + + def delGroup(self, groupName): + """ + function : delete the group + input : groupName + output : Successful return True,otherwise return false + """ + cmd = g_Platform.getGroupdelCmd(groupName) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + return True + else: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50313"] % groupName + + " Error:\n%s." % str(output) + + "The cmd is %s" % cmd) + + def getPathOwner(self, pathName): + """ + function : Get the owner user of path. + input : pathName + output : user and group + """ + user = "" + group = "" + # check path + if not os.path.exists(pathName): + return user, group + # get use and group information + try: + user = pwd.getpwuid(os.stat(pathName).st_uid).pw_name + group = grp.getgrgid(os.stat(pathName).st_gid).gr_name + return user, group + except Exception: + return "", "" + + def getPackageFile(self, fileType="tarFile"): + """ + function : Get the path of binary file version. + input : NA + output : String + """ + (distName, version) = g_Platform.getCurrentPlatForm() + return g_Platform.getPackageFile(distName, version, + VersionInfo.getPackageVersion(), + VersionInfo.PRODUCT_NAME_PACKAGE, + fileType) + + def getTarFilePath(self): + """ + function : Get the path of binary file version. + input : NA + output : str + """ + return self.getPackageFile("tarFile") + + def getBz2FilePath(self): + """ + function : Get the path of binary file version. + input : NA + output : str + """ + return self.getPackageFile("bz2File") + + def getBinFilePath(self): + """ + function : Get the path of binary file version.. + input : NA + output : str + """ + return self.getPackageFile("binFile") + + def getSHA256FilePath(self): + """ + function : Get the path of sha256 file version.. + input : NA + output : str + """ + return self.getPackageFile("sha256File") + + def getFileSHA256Info(self): + """ + function: get file sha256 info + input: NA + output: str, str + """ + try: + bz2Path = self.getBz2FilePath() + sha256Path = self.getSHA256FilePath() + + fileSHA256 = g_file.getFileSHA256(bz2Path) + valueList = g_file.readFile(sha256Path) + if len(valueList) != 1: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + sha256Path) + sha256Value = valueList[0].strip() + return fileSHA256, sha256Value + except Exception as e: + raise Exception(str(e)) + + def checkLink(self, filePath): + """ + function:check if file is a link + input: filePath + output:NA + """ + if os.path.exists(filePath): + if os.path.islink(filePath): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % filePath) + + def getGroupByUser(self, user): + """ + function : get group by user + input : user + output : group + """ + try: + group = grp.getgrgid(pwd.getpwnam(user).pw_gid).gr_name + except Exception as e: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50300"] % user + + "Detail msg: %s" % str(e)) + return group + + def getPortProcessInfo(self, port): + """ + function : get port occupation process + input : port + output : process info + """ + try: + processInfo = "" + cmd = "netstat -an | grep -w %s" % port + output = subprocess.getstatusoutput(cmd)[1] + processInfo += "%s\n" % output + return processInfo + except Exception as e: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % str(e)) + + +g_OSlib = PlatformCommand() diff --git a/script/gspylib/os/gsfile.py b/script/gspylib/os/gsfile.py new file mode 100644 index 0000000..09a6d7c --- /dev/null +++ b/script/gspylib/os/gsfile.py @@ -0,0 +1,977 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import sys +import subprocess +import os +import pwd +import grp +import shutil +import _thread as thread +import time +import stat + +localDirPath = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, localDirPath + "/../../../lib") +try: + import psutil +except ImportError as e: + # mv psutil mode .so file by python version + pythonVer = sys.version[:3] + psutilLinux = os.path.join(localDirPath, + "./../../../lib/psutil/_psutil_linux.so") + psutilPosix = os.path.join(localDirPath, + "./../../../lib/psutil/_psutil_posix.so") + psutilLinuxBak = "%s_%s" % (psutilLinux, pythonVer) + psutilPosixBak = "%s_%s" % (psutilPosix, pythonVer) + + glo_cmd = "rm -rf '%s' && cp -r '%s' '%s' " % (psutilLinux, + psutilLinuxBak, + psutilLinux) + glo_cmd += " && rm -rf '%s' && cp -r '%s' '%s' " % (psutilPosix, + psutilPosixBak, + psutilPosix) + psutilFlag = True + for psutilnum in range(3): + (status_mvPsutil, output_mvPsutil) = subprocess.getstatusoutput( + glo_cmd) + if status_mvPsutil != 0: + psutilFlag = False + time.sleep(1) + else: + psutilFlag = True + break + if not psutilFlag: + print("Failed to execute cmd: %s. Error:\n%s" % (glo_cmd, + output_mvPsutil)) + sys.exit(1) + # del error import and reload psutil + del sys.modules['psutil._common'] + del sys.modules['psutil._psposix'] + import psutil +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsplatform import g_Platform, findCmdInPath + + +class fileManage(): + """ + Class to handle OS file operations + """ + SHELL_CMD_DICT = { + "deleteFile": "(if [ -f '%s' ];" + "then rm -f '%s';fi)", + "deleteLibFile": "cd %s && ls | grep -E '%s'|" + "xargs rm -f", + "cleanDir": "(if [ -d '%s' ];then rm -rf " + "'%s'/* && cd '%s' && ls -A | " + "xargs rm -rf ; fi)", + "execShellFile": "sh %s", + "getFullPathForShellCmd": "which %s", + "deleteDir": "(if [ -d '%s' ];then rm " + "-rf '%s';fi)", + "deleteLib": "(if [ -e '%s' ];then rm " + "-rf '%s';fi)", + "createDir": "(if [ ! -d '%s' ]; " + "then mkdir -p '%s' -m %s;fi)", + "createFile": "touch '%s' && chmod %s '%s'", + "deleteBatchFiles": "rm -f %s*", + "compressTarFile": "cd '%s' && tar -cf " + "'%s' %s && chmod %s '%s'", + "decompressTarFile": "cd '%s' && tar -xf '%s' ", + "copyFile": " cp -rf %s %s ", + "sshCmd": "pssh -s -H %s 'source %s;%s'", + "renameFile": "(if [ -f '%s' ];then mv '%s' " + "'%s';fi)", + "cleanFile": "if [ -f %s ]; then echo '' > " + "%s; fi", + "exeRemoteShellCMD": "pssh -s -H %s 'source %s;%s'", + "exeRemoteShellCMD1": "pssh -s -H %s \"%s\"", + "userExeRemoteShellCmd": "su - %s -c \"pssh -s -H %s " + "'%s'\"", + "checkUserPermission": "su - %s -c \"cd '%s'\"", + "getFileTime": "echo $[`date +%%s`-`stat -c " + "%%Y %s`]", + "scpFileToRemote": "pscp -H '%s' '%s' '%s'", + "scpFileFromRemote": "pssh -s -H '%s' \"pscp -H " + "'%s' '%s' '%s' \"", + "findfiles": "cd %s && find . " + "-type l -print", + "copyFile1": "(if [ -f '%s' ];then cp " + "'%s' '%s';fi)", + "copyFile2": "(if [ -f '%s' ] && [ ! -f " + "'%s' ];then cp '%s' '%s';fi)", + "copyRemoteFile": "(if [ -d '%s' ];then pssh " + "-s -H '%s' \"pscp -H '%s' " + "'%s' '%s' \";fi)", + "cleanDir1": "(if [ -d '%s' ]; then cd " + "'%s' && rm -rf '%s' && " + "rm -rf '%s' && cd -; fi)", + "cleanDir2": "(if [ -d '%s' ]; then " + "rm -rf '%s'/* && cd '%s' && " + "ls -A | xargs rm -rf && " + "cd -; fi)", + "cleanDir3": "rm -rf '%s'/* && cd '%s' && " + "ls -A | xargs rm -rf && " + "cd - ", + "cleanDir4": "rm -rf %s/*", + "checkNodeConnection": "ping %s -i 1 -c 3 |grep ttl |" + "wc -l", + "overWriteFile": "echo '%s' > '%s'", + "physicMemory": "cat /proc/meminfo | " + "grep MemTotal", + "findFile": "(if [ -d '%s' ]; then " + "find '%s' -type f;fi)", + "unzipForce": "unzip -o '%s' -d '%s'", + "killAll": findCmdInPath("killall") + " %s", + "sleep": "sleep %s", + "softLink": "ln -s '%s' '%s'", + "findwithcd": "cd %s && find ./ -name %s", + "installRpm": "rpm -ivh --nofiledigest %s " + "--nodeps --force --prefix=%s", + "changeMode": "chmod %s %s", + "checkPassword": "export LC_ALL=C; " + "chage -l %s | " + "grep -i %s" + } + + def __init__(self): + """ + constructor + """ + pass + + def createFile(self, path, overwrite=True, mode=None): + """ + function: create file and set the permission + input: + path: the file path. + overwrite: if file already exists and this parameter is true, + we can overwrtie it. + mode: Specify file permissions, type is int and start with 0. + ex: 0700 + output: + return true or false. + """ + try: + if overwrite: + cmd = g_Platform.getCreateFileCmd(path) + if mode: + cmd += "; %s" % g_Platform.getChmodCmd(str(mode), path) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(output + " The cmd is %s" % cmd) + else: + # create file by python API + if mode: + os.mknod(path, mode) + else: + os.mknod(path) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50206"] % path + + " Error:\n%s" % str(e)) + + return True + + def createFileInSafeMode(self, filePath, mode=stat.S_IWUSR | stat.S_IRUSR): + """ + Call this method before open(filePath) functions, + if it may create a new file. + This method guarantees a 0o600 file is created + instead of an arbitrary one. + """ + if os.path.exists(filePath): + return + try: + os.mknod(filePath, mode) + except IOError as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50206"] % filePath + + " Error:\n%s." % str(e)) + + def removeFile(self, path, cmdType="shell"): + """ + function: remove a file + input: the path of file(include file name) + output: return true or false + """ + if cmdType == "python": + # no file need remove. + if not os.path.exists(path): + return True + # check if is a file. + if not os.path.isfile(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % path) + try: + # remove file. + os.remove(path) + except Exception: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % path) + else: + # Support* for fuzzy matching + if "*" in path: + path = self.withAsteriskPath(path) + cmd = g_Platform.getRemoveCmd('file') + path + else: + cmd = g_Platform.getRemoveCmd('file') + "'" + path + "'" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % path + + " Error:\n%s" % output + + " The cmd is %s" % cmd) + return True + + def moveFile(self, src, dest, overwrite=True): + """ + function: move a file + input: + src: the dir of file + dest: the dir which want to move + output: + return true or false + """ + # check if can overwrite + if os.path.exists(dest) and not overwrite: + raise Exception(ErrorCode.GAUSS_501["GAUSS_50102"] % ( + "parameter overwrite", dest)) + try: + if overwrite: + cmd = g_Platform.getMoveFileCmd(src, dest) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(output + "The cmd is %s" % cmd) + else: + # move file + shutil.move(src, dest) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50232"] % (src, dest) + + " Error:\n%s" % str(e)) + + return True + + def readFile(self, filename, keyword="", rows=0): + """ + function: read the content of a file + input: + filename: the name and path of the file + keyword: read line include keyword + rows: the row number, which want to read + offset: keep the parameter, but do nothing + output:list + """ + listKey = [] + strRows = "" + allLines = [] + # check if file exists. + if not os.path.exists(filename): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % filename) + try: + with open(filename, 'rb') as fp: + for line in fp: + allLines.append(line.decode("utf-8")) + except Exception: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % filename) + # get keyword lines + if keyword != "": + for line in allLines: + flag = line.find(keyword) + if flag >= 0: + listKey.append(line) + # get content of row + if rows: + if not str(rows).isdigit(): + raise Exception + if rows > 0: + row_num = rows - 1 + else: + row_num = rows + try: + if row_num < (len(allLines)): + strRows = allLines[row_num] + except Exception: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % ( + "the %s line of the file [%s]" % (rows, filename))) + # check which needs return + if keyword != "" and rows != 0: + return [strRows] + if keyword != "" and rows == 0: + return listKey + if keyword == "" and rows != 0: + return [strRows] + if keyword == "" and rows == 0: + return allLines + + def writeFile(self, path, context=None, mode="a+"): + """ + function: write content in a file + input: + path: the name and path of the file + context: the content, which want to write + mode: the write mode + output: + """ + lock = thread.allocate_lock() + if context is None: + context = [] + # check if not exists. + if not os.path.exists(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + # check if is a file. + if not os.path.isfile(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % path) + # if no context, return + if not context: + return False + self.createFileInSafeMode(path) + with open(path, mode) as fp: + fp.writelines(line + os.linesep for line in context) + lock.acquire() + try: + # write context. + fp.flush() + except Exception as e: + lock.release() + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % path + + "Error:\n%s" % str(e)) + lock.release() + return True + + def withAsteriskPath(self, path): + """ + function: deal with the path with * + input: the path to deal with + output: cmd + """ + path_dirList = os.path.realpath(path).split(os.path.sep)[1:] + path = "'" + for dirName in path_dirList: + if "*" in dirName: + dirPath = "'" + os.path.sep + dirName + "'" + else: + dirPath = os.path.sep + dirName + path += dirPath + if path[-1] == "'": + path = path[:-1] + else: + path += "'" + return path + + def changeMode(self, mode, path, recursive=False, cmdType="shell", + retryFlag=False, retryTime=15, waiteTime=1): + """ + function: change permission of file + input: + cmdType: user shell or python + mode:permission value, Type is int and start with 0. ex: 0700 + path:file path + recursive: recursive or not + output: + """ + try: + # do with shell command. + if cmdType == "shell": + if "*" in path: + path = self.withAsteriskPath(path) + else: + path = "'" + path + "'" + cmd = g_Platform.getChmodCmd(str(mode), path, recursive) + if retryFlag: + self.retryGetstatusoutput(cmd, retryTime, waiteTime) + else: + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_501[ + "GAUSS_50107"] % path + + " Error:\n%s." % output + + "The cmd is %s" % cmd) + # do with python API. If the name has special characters. + else: + os.chmod(path, mode) + except Exception as e: + raise Exception(str(e)) + return True + + def changeOwner(self, user, path, recursive=False, cmdType="shell", + retryFlag=False, retryTime=15, waiteTime=1): + """ + function: change the owner of file + input: cmdType, user, path, recursive + output: return true + """ + try: + # get uid and gid by username. + userInfo = pwd.getpwnam(user) + uid = userInfo.pw_uid + gid = userInfo.pw_gid + group = grp.getgrgid(gid).gr_name + except Exception as e: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50308"] + + " Error:\n%s" % str(e)) + try: + # do with shell command. + if cmdType == "shell": + if "*" in path: + path = self.withAsteriskPath(path) + else: + path = "'" + path + "'" + cmd = g_Platform.getChownCmd(user, group, path, recursive) + if retryFlag: + self.retryGetstatusoutput(cmd, retryTime, waiteTime) + else: + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(output + " The cmd is %s" % cmd) + # do with python API. If the name has special characters. + else: + os.chown(path, uid, gid) + except Exception as e: + raise Exception(ErrorCode.GAUSS_501["GAUSS_50106"] % path + + " Error:\n%s." % str(e)) + return True + + def retryGetstatusoutput(self, cmd, retryTime, sleepTime): + """ + function : exectue commands, if fail ,then retry it. + input : cmd, waitTimes, retryTimes + output: NA + """ + countNum = 0 + (status, output) = subprocess.getstatusoutput(cmd) + while countNum < retryTime: + if status != 0: + sleepCmd = "sleep %s" % sleepTime + subprocess.getstatusoutput(sleepCmd) + (status, output) = subprocess.getstatusoutput(cmd) + countNum = countNum + 1 + else: + break + if status != 0: + raise Exception(ErrorCode.GAUSS_501["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + + def createDirectory(self, path, overwrite=True, mode=None): + """ + function: create a directory + input: path, overwrite + output: true + """ + try: + if os.path.exists(path) and not overwrite: + raise Exception(ErrorCode.GAUSS_501["GAUSS_50102"] % ( + "parameter overwrite", path)) + if overwrite: + cmd = g_Platform.getMakeDirCmd(path, overwrite) + if mode: + cmd += "; %s" % g_Platform.getChmodCmd(str(mode), path) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(output + " The cmd is %s" % cmd) + if not overwrite: + if mode: + os.mkdir(path, mode) + else: + os.mkdir(path) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50208"] % path + + " Error:\n%s" % str(e)) + return True + + def cleanDirectoryContent(self, path): + """ + function: clean the content in a directory, + but do not remove directory. + input:path + output:true + """ + rm_Dirfile = "cd %s && ls | xargs -n 100000" % (path) + if not os.path.exists(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + cmd = "%s %s && %s '%s'/.[^.]*" % (rm_Dirfile, g_Platform.getRemoveCmd( + "directory"), g_Platform.getRemoveCmd("directory"), path) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50209"] % ( + "content in the directory %s " % path) + + " Error:\n%s." % output + "The cmd is %s" % cmd) + return True + + def removeDirectory(self, path): + """ + function: remove the content in a directory + input:path + output:true + """ + if "*" in path: + path = self.withAsteriskPath(path) + cmd = "%s %s" % (g_Platform.getRemoveCmd("directory"), path) + else: + cmd = "%s '%s'" % (g_Platform.getRemoveCmd("directory"), path) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50209"] % path + + " Error:\n%s." % output + "The cmd is %s" % cmd) + return True + + def moveDirectory(self, src, dest): + """ + function:move the content in a directory + input:src, dest + output:true + """ + if not os.path.exists(src): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % src) + if not os.path.exists(dest): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % dest) + cmd = g_Platform.getMoveCmd(src, dest) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + return True + + def getDirectoryList(self, path, keywords="", recursive=False): + """ + function:give the list of file in the directory + input:path, keywords, recursive + output:list + """ + list_Dir = [] + try: + if keywords == "": + if recursive: + cmd = "%s -R '%s'" % (g_Platform.getListCmd(), path) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(output + "\nThe cmd is %s" % cmd) + list_Dir = output.split('\n') + else: + list_Dir = os.listdir(path) + else: + if recursive: + cmd = "%s -R '%s' |%s -E '%s'" % ( + g_Platform.getListCmd(), path, + g_Platform.getGrepCmd(), keywords) + else: + cmd = "%s '%s' |%s -E '%s'" % ( + g_Platform.getListCmd(), path, + g_Platform.getGrepCmd(), keywords) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 and output != "": + raise Exception(output + "\nThe cmd is %s" % cmd) + else: + list_Dir = output.split('\n') + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % ( + "the list of %s" % path) + " Error:\n%s" % str(e)) + while '' in list_Dir: + list_Dir.remove('') + return list_Dir + + def cpFile(self, src, dest, cmdType="shell", skipCheck=False): + """ + function: copy a file + input:src, dest, cmdType + output:true + """ + if skipCheck: + if not os.path.exists(src): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % src) + if not os.path.exists(os.path.dirname(dest)): + raise Exception(ErrorCode.GAUSS_502[ + "GAUSS_50201"] % os.path.dirname(dest)) + try: + if cmdType != "shell": + shutil.copy(src, dest) + else: + cmd = g_Platform.getCopyCmd(src, dest, "directory") + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(output + "\nThe cmd is %s" % cmd) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50214"] % src + + " Error:\n%s" % str(e)) + return True + + def findFile(self, path, keyword, choice='name'): + """ + function:find a file by name or size or user + input:path, keyword, choice, type + output:NA + """ + if not os.path.exists(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + cmd = "%s '%s' -%s %s " % (g_Platform.getFindCmd(), path, + choice, keyword) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % ( + "the files of path %s" % path) + " Error:\n%s" % output + + "\nThe cmd is %s" % cmd) + list_File = output.split('\n') + while '' in list_File: + list_File.remove('') + return list_File + + def compressFiles(self, tarName, dirPath): + """ + function:compress directory to a package + input:tarName, directory + output:NA + """ + cmd = g_Platform.getCompressFilesCmd(tarName, dirPath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50227"] % cmd + + " Error:\n%s" % output) + + def decompressFiles(self, srcPackage, dest): + """ + function:decompress package to files + input:srcPackage, dest + output:NA + """ + if not os.path.exists(srcPackage): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % srcPackage) + if not os.path.exists(dest): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % dest) + cmd = g_Platform.getDecompressFilesCmd(srcPackage, dest) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50231"] % srcPackage + + " Error:\n%s" % output + "\nThe cmd is %s" % cmd) + + def compressZipFiles(self, zipName, dirPath): + """ + function:compress directory to a package + input:zipName, directory + output:NA + """ + if not os.path.exists(dirPath): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % dirPath) + cmd = g_Platform.getCompressZipFilesCmd(zipName, dirPath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50227"] % cmd + + " Error:\n%s" % output) + + def decompressZipFiles(self, srcPackage, dest): + """ + function:decompress package to files + input:srcPackage, dest + output:NA + """ + if not os.path.exists(srcPackage): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % srcPackage) + if not os.path.exists(dest): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % dest) + cmd = g_Platform.getDecompressZipFilesCmd(srcPackage, dest) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50231"] % srcPackage + + " Error:\n%s" % output + "\nThe cmd is %s" % cmd) + + def getfileUser(self, path): + """ + function: get the info(username group) of a file + input:path + output:list of info + """ + if not os.path.exists(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + + user = pwd.getpwuid(os.stat(path).st_uid).pw_name + group = grp.getgrgid(os.stat(path).st_gid).gr_name + return user, group + + def replaceFileLineContent(self, oldLine, newLine, path): + """ + function: replace the line in a file to a new line + input: + oldLine : Need to replace content + newLine : Replaced content + path + output:NA + """ + if not os.path.exists(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + + cmd = g_Platform.getReplaceFileLineContentCmd(oldLine, newLine, path) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50223"] % path + + " Error:\n%s" % output + "\nThe cmd is %s" % cmd) + + def checkIsInDirectory(self, fileName, directoryList): + """ + function : Check if the file is in directoryList. + input : String,[] + output : [] + """ + try: + isExist = False + for onePath in directoryList: + dirName = os.path.normpath(fileName) + isExist = False + + while dirName != "/": + if dirName == onePath: + isExist = True + break + dirName = os.path.dirname(dirName) + + if isExist: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50229"] % ( + fileName, onePath)) + except Exception as e: + raise Exception(str(e)) + return isExist + + def checkDirWriteable(self, dirPath): + """ + function : Check if target directory is writeable for execute user. + input : String,String + output : boolean + """ + # if we can touch a tmp file under the path, it is true; + return os.access(dirPath, os.W_OK) + + def checkFilePermission(self, filename, isread=False, iswrite=False, + isexecute=False): + """ + Function : check file: 1.exist 2. isfile 3. permission + Note : 1.You must check that the file exist and is a file. + 2.You can choose whether to check the file's + permission:readable/writable/executable. + input : filename, isread, iswrite, isexecute + output : True + """ + if not os.path.exists(filename): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % filename) + if not os.path.isfile(filename): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % filename) + if isread: + if not os.access(filename, os.R_OK): + raise Exception(ErrorCode.GAUSS_501["GAUSS_50100"] % ( + filename, "the user") + + " Error:\n%s: Permission denied." % filename) + if iswrite: + if not os.access(filename, os.W_OK): + raise Exception(ErrorCode.GAUSS_501["GAUSS_50102"] % ( + filename, "the user") + + " Error:\n%s: Permission denied." % filename) + if isexecute: + if not os.access(filename, os.X_OK): + raise Exception(ErrorCode.GAUSS_501[ + "GAUSS_50101"] % (filename, "the user") + + " Error:\n%s: Permission denied." % filename) + return True + + def getFileSHA256(self, filename): + """ + function : Get the ssh file by SHA256 + input : String + output : String + """ + if not os.path.exists(filename): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % filename) + if not os.path.isfile(filename): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % filename) + + strSHA256 = "" + cmd = g_Platform.getFileSHA256Cmd(filename) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + return strSHA256 + strSHA256 = output.strip() + + return strSHA256 + + def getDirSize(self, path, unit=""): + """ + function : Get the directory or file size + input : String, String + output : String + """ + sizeInfo = "" + cmd = g_Platform.getDirSizeCmd(path, unit) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + return sizeInfo + return output.split()[0] + + def getTopPath(self, path): + """ + function: find the top path of the specified path + input : NA + output: tmpDir + """ + tmpDir = path + while True: + # find the top path to be created + (tmpDir, topDirName) = os.path.split(tmpDir) + if os.path.exists(tmpDir) or topDirName == "": + tmpDir = os.path.join(tmpDir, topDirName) + break + return tmpDir + + def getFilesType(self, givenPath): + """ + function : get the file and subdirectory type of the given path + input : String + output : String + """ + if not os.path.exists(givenPath): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % givenPath) + # obtain the file type + tmpFile = "/tmp/fileList_%d" % os.getpid() + cmd = "%s '%s' ! -iname '.*' | %s file -F '::' > %s 2>/dev/null" % ( + g_Platform.getFindCmd(), givenPath, + g_Platform.getXargsCmd(), tmpFile) + subprocess.getstatusoutput(cmd) + # Return code is not equal to zero when file a non-existent + # file in SLES SP4 + # But it is equal to zero in SLES SP1/SP2/SP3 and + # RHEL 6.4/6.5/6.6 platform, skip check status and output + resDict = {} + try: + with open(tmpFile, 'r') as fp: + fileNameTypeList = fp.readlines() + os.remove(tmpFile) + for oneItem in fileNameTypeList: + res = oneItem.split("::") + if len(res) != 2: + continue + else: + resDict[res[0]] = res[1] + return resDict + except Exception as e: + if os.path.exists(tmpFile): + g_file.removeFile(tmpFile) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50221"] + + " Error: \n%s" % str(e)) + + # delete a line in file match with re + def deleteLine(self, filePath, lineInfo): + """ + function : delete line in a file + input : filePath ,lineInfo + output : NA + """ + cmd = g_Platform.getSedCmd() + cmd += " -i '/%s/d' %s" % (lineInfo, filePath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % filePath + + " Error:\n%s" % output + "\nThe cmd is %s" % cmd) + + def deleteLineByRowNum(self, filePath, lineNum): + """ + function : delete line in a file by row num + input : filePath ,lineInfo + output : NA + """ + cmd = g_Platform.getSedCmd() + cmd += " -i '%sd' %s" % (lineNum, filePath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % filePath + + " Error:\n%s" % output + "\nThe cmd is %s" % cmd) + + def rename(self, oldFilePath, newFilePath): + """ + function : rename a file name to new name + input : oldFilePath, newFilePath + output : NA + """ + cmd = g_Platform.getMoveCmd(oldFilePath, newFilePath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50218"] % oldFilePath + + " Error:\n%s" % output + "\nThe cmd is %s" % cmd) + + def echoLineToFile(self, line, filePath): + """ + function : write line in file + input : line, file + output : use 2>/dev/null, no return. + Notice: maye the line has '$' + """ + cmd = g_Platform.echoCmdWithNoReturn(line, filePath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % filePath + + " Command:%s. Error:\n%s" % (cmd, output)) + + def checkClusterPath(self, path_name): + """ + Check the path + :param path_name: + :return: + """ + if not path_name: + return False + + a_ascii = ord('a') + z_ascii = ord('z') + A_ascii = ord('A') + Z_ascii = ord('Z') + num0_ascii = ord('0') + num9_ascii = ord('9') + blank_ascii = ord(' ') + sep1_ascii = ord('/') + sep2_ascii = ord('_') + sep3_ascii = ord('-') + sep4_ascii = ord(':') + sep5_ascii = ord('.') + sep6_ascii = ord(',') + for path_char in path_name: + char_check = ord(path_char) + if (not (a_ascii <= char_check <= z_ascii or A_ascii <= + char_check <= Z_ascii or + num0_ascii <= char_check <= num9_ascii or + char_check == blank_ascii or + char_check == sep1_ascii or + char_check == sep2_ascii or + char_check == sep3_ascii or + char_check == sep4_ascii or + char_check == sep5_ascii or + char_check == sep6_ascii)): + return False + return True + + def checkPathIsLegal(self, path_name): + """ + check if the path is legal + :param file name: + :return: + """ + if not self.checkClusterPath(path_name): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51250"] % path_name) + + def cdDirectory(self, dirPath, user=""): + """ + """ + if (user != "") and (os.getuid() == 0): + cmd = "su - %s 'cd \'%s\' '" % (user, dirPath) + else: + cmd = "cd '%s'" % dirPath + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + + +g_file = fileManage() diff --git a/script/gspylib/os/gsnetwork.py b/script/gspylib/os/gsnetwork.py new file mode 100644 index 0000000..4093950 --- /dev/null +++ b/script/gspylib/os/gsnetwork.py @@ -0,0 +1,226 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gsnetwork.py is a utility to do something for +# network information. +############################################################################# +import subprocess +import sys +import _thread as thread +import re +import psutil + +sys.path.append(sys.path[0] + "/../../") + +from gspylib.os.gsplatform import g_Platform +from gspylib.threads.parallelTool import parallelTool + +g_failedAddressList = [] +g_lock = thread.allocate_lock() + +""" +Requirements: +""" + + +class networkInfo(): + """ + Class: networkinfo + """ + + def __init__(self): + """ + constructor + """ + self.NICNum = "" + self.ipAddress = "" + self.networkMask = "" + self.MTUValue = "" + + self.TXValue = "" + self.RXValue = "" + self.networkSpeed = "" + self.networkConfigFile = "" + self.networkBondModeInfo = "" + self.hostName = "" + + def __str__(self): + """ + function: str + """ + return "NICNum=%s,ipAddress=%s,networkMask=%s,MTUValue=%s," \ + "TXValue=%s," \ + "RXValue=%s,networkSpeed=%s,networkConfigFile=%s," \ + "networkBondModeInfo=\"%s\"" % \ + (self.NICNum, self.ipAddress, self.networkMask, self.MTUValue, + self.TXValue, self.RXValue, self.networkSpeed, + self.networkConfigFile, + self.networkBondModeInfo) + + +class Network(): + """ + function: Init the Network options + """ + + def __init__(self): + pass + + def isIpValid(self, ipAddress): + """ + function : check if the input ip address is valid + input : String + output : bool + """ + Valid = re.match("^(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|" + "[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][" + "0-9]" + "|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9]" + "[0-9]|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|" + "[1-9][0-9]|[0-9])$", ipAddress) + if (Valid): + if (Valid.group() == ipAddress): + return True + return False + + def executePingCmd(self, ipAddress): + """ + function : Send the network command of ping. + input : String + output : NA + """ + pingCmd = g_Platform.getPingCmd(ipAddress, "5", "1") + cmd = "%s | %s ttl | %s -l" % (pingCmd, g_Platform.getGrepCmd(), + g_Platform.getWcCmd()) + (status, output) = subprocess.getstatusoutput(cmd) + if (str(output) == '0' or status != 0): + g_lock.acquire() + g_failedAddressList.append(ipAddress) + g_lock.release() + + def checkIpAddressList(self, ipAddressList): + """ + function : Check the connection status of network. + input : [] + output : [] + """ + global g_failedAddressList + g_failedAddressList = [] + parallelTool.parallelExecute(self.executePingCmd, ipAddressList) + return g_failedAddressList + + def getAllNetworkIp(self): + """ + function: get All network ip + """ + networkInfoList = [] + mappingList = g_Platform.getIpAddressAndNICList() + for onelist in mappingList: + data = networkInfo() + # NIC number + data.NICNum = onelist[0] + # ip address + data.ipAddress = onelist[1] + networkInfoList.append(data) + return networkInfoList + + def getNetworkMTUValueByNICNum(self, networkCardNum): + """ + function: get Network MTU Value By NICNum + """ + return psutil.net_if_stats()[networkCardNum].mtu + + def getAllNetworkInfo(self): + """ + function: get all network info + """ + networkInfoList = [] + mappingList = g_Platform.getIpAddressAndNICList() + for oneList in mappingList: + data = networkInfo() + # NIC number + data.NICNum = oneList[0] + # ip address + data.ipAddress = oneList[1] + + # host name + try: + data.hostName = g_Platform.getHostNameByIPAddr( + data.ipAddress) + except Exception: + data.hostName = "" + + # network mask + try: + data.networkMask = g_Platform.getNetworkMaskByNICNum( + data.NICNum) + except Exception: + data.networkMask = "" + + # MTU value + try: + data.MTUValue = self.getNetworkMTUValueByNICNum( + data.NICNum) + except Exception: + data.MTUValue = "" + + # TX value + try: + data.TXValue = g_Platform.getNetworkRXTXValueByNICNum( + data.NICNum, 'tx') + except Exception: + data.TXValue = "" + + # RX value + try: + data.RXValue = g_Platform.getNetworkRXTXValueByNICNum( + data.NICNum, 'rx') + except Exception: + data.RXValue = "" + + # network speed + try: + data.networkSpeed = g_Platform.getNetworkSpeedByNICNum( + data.NICNum) + except Exception: + data.networkSpeed = "" + + # network config file + try: + data.networkConfigFile = \ + g_Platform.getNetworkConfigFileByNICNum(data.NICNum) + except Exception: + data.networkConfigFile = "" + + # network bond mode info + try: + data.networkBondModeInfo = g_Platform.getNetworkBondModeInfo( + data.networkConfigFile, data.NICNum) + except Exception: + data.networkBondModeInfo = "" + + networkInfoList.append(data) + return networkInfoList + + def checkNetworkInterruptByNIC(self, networkCardNum): + """ + function: check Network Interrupt By NIC + """ + return g_Platform.checkNetworkInterruptByNIC(networkCardNum) + + +g_network = Network() diff --git a/script/gspylib/os/gsplatform.py b/script/gspylib/os/gsplatform.py new file mode 100644 index 0000000..e9f0fea --- /dev/null +++ b/script/gspylib/os/gsplatform.py @@ -0,0 +1,1999 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# Portions Copyright (c) 1999-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com +# Portions Copyright (c) 2000-2010, eGenix.com Software GmbH; mailto:info@egenix.com +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gsplatform.py is a utility to do something for +# platform information. +############################################################################# + +""" The following platform framework is used to handle any differences between + the platform's we support. The GenericPlatform class is the base class + that a supported platform extends from and overrides any of the methods + as necessary. +""" + +import os +import sys +import re +import subprocess +import platform +import socket +import time + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode + +localDirPath = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, localDirPath + "/../../../lib/netifaces/") +sys.path.append(localDirPath + "/../inspection/lib/netifaces/") +try: + from netifaces import interfaces, ifaddresses, AF_INET, AF_INET6 +except ImportError as e: + # get python unicode value. The current environment python is compiled + # with UCS2 or UCS4. + # 1114111 is UCS4 + # 65535 is UCS2 + flagNum = 4 if sys.maxunicode == 1114111 else 2 + omToolsNetifacesPath = os.path.join( + localDirPath, "./../../../lib/netifaces/netifaces.so") + inspectToolsNetifacesPath = os.path.join( + localDirPath, "./../../../script/gspylib/inspection/\ + lib/netifaces/netifaces.so") + newPythonDependNetifacesPath = "%s_UCS%d" % (omToolsNetifacesPath, + flagNum) + glo_cmd = "rm -f '%s' && " \ + "cp -f -p '%s' '%s' " % (omToolsNetifacesPath, + newPythonDependNetifacesPath, + omToolsNetifacesPath) + glo_cmd += " && rm -f '%s' && " \ + "cp -f -p '%s' '%s' " % (inspectToolsNetifacesPath, + newPythonDependNetifacesPath, + inspectToolsNetifacesPath) + flagExce = True + for retryNum in range(3): + (statusExec, outputExec) = subprocess.getstatusoutput(glo_cmd) + if statusExec != 0: + flagExce = False + time.sleep(1) + else: + flagExce = True + break + if not flagExce: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % glo_cmd + + "Error:\n%s" % outputExec) + from netifaces import interfaces, ifaddresses, AF_INET, AF_INET6 + +# ---------------platforms-------------------- +# global variable for our platform +_supported_dists = ( + 'SuSE', 'debian', 'fedora', 'redhat', 'centos', 'euleros', "openEuler", + 'mandrake', 'mandriva', 'rocks', 'slackware', 'yellowdog', 'gentoo', + 'UnitedLinux', 'turbolinux') +_release_filename = re.compile(r'(\w+)[-_](release|version)') +_lsb_release_version = re.compile(r'(.+)' + ' release ' + '([\d.]+)' + '[^(]*(?:\((.+)\))?') +_release_version = re.compile(r'([^0-9]+)' + '(?: release )?' + '([\d.]+)' + '[^(]*(?:\((.+)\))?') +SUSE = "suse" +REDHAT = "redhat" +CENTOS = "centos" +EULEROS = "euleros" +KYLIN = "kylin" +OPENEULER = "openeuler" +ASIANUX = "asianux" +SUPPORT_WHOLE_PLATFORM_LIST = [SUSE, REDHAT, CENTOS, EULEROS, + OPENEULER, KYLIN, ASIANUX] +# RedhatX platform +SUPPORT_RHEL_SERIES_PLATFORM_LIST = [REDHAT, CENTOS, "kylin", "asianux"] +SUPPORT_RHEL6X_VERSION_LIST = ["6.4", "6.5", "6.6", "6.7", "6.8", "6.9", "10"] +SUPPORT_RHEL7X_VERSION_LIST = ["7.0", "7.1", "7.2", "7.3", "7.4", "7.5", "7.6", "10"] +SUPPORT_RHEL_SERIES_VERSION_LIST = (SUPPORT_RHEL6X_VERSION_LIST + + SUPPORT_RHEL7X_VERSION_LIST) +# EulerOS 2.3 -> 2.0 SP3 +SUPPORT_EULEROS_VERSION_LIST = ["2.0"] +# SuSE platform +SUSE11 = "11" +SUSE12 = "12" +SUPPORT_SUSE_VERSION_LIST = [SUSE11, SUSE12] +SUPPORT_SUSE11X_VERSION_LIST = ["1", "2", "3", "4"] +SUPPORT_RHEL12X_VERSION_LIST = ["0", "1", "2", "3"] +BIT_VERSION = "64bit" + +# ---------------command path-------------------- +CMD_PATH = ['/bin', '/usr/local/bin', '/usr/bin', '/sbin', '/usr/sbin'] +CMD_CACHE = {} +BLANK_SPACE = " " +COLON = ":" +# Need to be consistent with the packaging script +PAK_CENTOS = "CentOS" +PAK_EULER = "Euler" +PAK_OPENEULER = "openEuler" +PAK_REDHAT = "RedHat" +PAK_ASIANUX = "asianux" + +####################################################### +_supported_dists = ( + 'SuSE', 'debian', 'fedora', 'redhat', 'centos', 'euleros', "openEuler", + 'mandrake', 'mandriva', 'rocks', 'slackware', 'yellowdog', 'gentoo', + 'UnitedLinux', 'turbolinux', 'kylin', 'asianux') +_release_filename = re.compile(r'(\w+)[-_](release|version)') +_lsb_release_version = re.compile(r'(.+)' + ' release ' + '([\d.]+)' + '[^(]*(?:\((.+)\))?') +_release_version = re.compile(r'([^0-9]+)' + '(?: release )?' + '([\d.]+)' + '[^(]*(?:\((.+)\))?') + + +def _parse_release_file(firstline): + """ + Default to empty 'version' and 'id' strings. Both defaults are used + when 'firstline' is empty. 'id' defaults to empty when an id can not + be deduced. + """ + version = '' + idNum = '' + + # Parse the first line + m = _lsb_release_version.match(firstline) + if m is not None: + # LSB format: "distro release x.x (codename)" + return tuple(m.groups()) + + # Pre-LSB format: "distro x.x (codename)" + m = _release_version.match(firstline) + if m is not None: + return tuple(m.groups()) + + # Unkown format... take the first two words + l = str.split(str.strip(firstline)) + if l: + version = l[0] + if len(l) > 1: + idNum = l[1] + return '', version, idNum + + +def linux_distribution(distname='', version='', idNum='', + supported_dists=_supported_dists, + full_distribution_name=1): + """ + Tries to determine the name of the Linux OS distribution name. + + The function first looks for a distribution release file in + /etc and then reverts to _dist_try_harder() in case no + suitable files are found. + + supported_dists may be given to define the set of Linux + distributions to look for. It defaults to a list of currently + supported Linux distributions identified by their release file + name. + + If full_distribution_name is true (default), the full + distribution read from the OS is returned. Otherwise the short + name taken from supported_dists is used. + + Returns a tuple (distname,version,id) which default to the + args given as parameters. + + """ + try: + etc = os.listdir('/etc') + except os.error: + # Probably not a Unix system + return distname, version, idNum + sortEtc = sorted(etc) + gFile = None + for file in sortEtc: + if os.path.islink('/etc/' + file): + continue + m = _release_filename.match(file) + if m is not None: + _distname, dummy = m.groups() + if _distname in supported_dists: + gFile = file + distname = _distname + break + + # Read the first line + if gFile is None: + return distname, version, idNum + with open('/etc/' + gFile, 'r') as f: + firstline = f.readline() + _distname, _version, _id = _parse_release_file(firstline) + + if _distname and full_distribution_name: + distname = _distname + if _version: + version = _version + if _id: + idNum = _id + return distname, version, idNum + + +def dist(supported_dists=_supported_dists): + """ Tries to determine the name of the Linux OS distribution name. + + The function first looks for a distribution release file in + /etc and then reverts to _dist_try_harder() in case no + suitable files are found. + + Returns a tuple (distname,version,id) which default to the + args given as parameters. + + """ + return linux_distribution(supported_dists=supported_dists, + full_distribution_name=0) + + +# ------------------platform module---------------------- +class CommandNotFoundException(Exception): + """ + """ + + def __init__(self, cmd, paths): + """ + function: constructor + """ + self.cmd = cmd + self.paths = paths + + def __str__(self): + """ + function: str + input : NA + output : NA + """ + return "Could not locate command: '%s' in this " \ + "set of paths: %s" % (self.cmd, repr(self.paths)) + + +def findCmdInPath(cmd, additionalPaths=None, printError=True): + """ + function: find cmd in path + input: cmd, additionalPaths, printError + output: NA + """ + global CMD_CACHE + if additionalPaths is None: + additionalPaths = [] + if cmd not in CMD_CACHE: + # Search additional paths and don't add to cache. + for p in additionalPaths: + f = os.path.join(p, cmd) + if os.path.exists(f): + return f + + for p in CMD_PATH: + f = os.path.join(p, cmd) + if os.path.exists(f): + CMD_CACHE[cmd] = f + return f + + if cmd == "killall": + gphome = os.getenv("GPHOME") + if gphome is None or \ + not os.path.exists(os.path.join(gphome, "script/killall")): + gphome = os.path.dirname(os.path.realpath(__file__))\ + + "/../../.." + gphome = gphome.replace("\\", "\\\\").replace('"', '\\"\\"') + for rac in ["|", ";", "&", "$", "<", ">", "`", "\\", "'", "\"", + "{", "}", "(", ")", "[", "]", "~", "*", + "?", " ", "!", "\n"]: + if rac in gphome: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50219"] % gphome + + " There are illegal characters in the path.") + if gphome != "" and os.path.exists(os.path.join(gphome, + "script/killall")): + return os.path.join(gphome, "script/killall") + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % "killall") + + if printError: + print('Command %s not found' % cmd) + search_path = CMD_PATH[:] + search_path.extend(additionalPaths) + raise CommandNotFoundException(cmd, search_path) + else: + return CMD_CACHE[cmd] + + +# Requirements: +# 1. ulimit, ntpq, source, kerberos is not found under system path + + +class GenericPlatform: + """ + manage OS command,config or service for muti-platform + """ + + def __init__(self): + """ + function: constructor + """ + pass + + def echoCmdWithNoReturn(self, line, filePath): + """ + function: echo cmd with no return + input : line, filePath + output : str + """ + cmd = "echo %s >> '%s' 2>/dev/null" % (line, filePath) + return cmd + + def getCreateFileCmd(self, path): + """ + function: get create file cmd + input : path + output : str + """ + cmd = "touch '%s'" % path + return cmd + + def getMoveFileCmd(self, src, dest): + """ + function: get move file cmd + input : src, dest + output : str + """ + cmd = "mv '%s' '%s'" % (src, dest) + return cmd + + def get_machine_arch_cmd(self): + """ + function: get machine arch cmd + input : NA + output : str + """ + return 'uname -i' + + def getDefaultLocale(self): + """ + function: get default locale + input : NA + output : str + """ + return 'en_US.utf-8' + + def getRemoveCmd(self, pathType): + """ + function: get remove cmd + input : pathType + output : str + """ + opts = " " + if pathType == "file": + opts = " -f " + elif pathType == "directory": + opts = " -rf " + return findCmdInPath('rm') + opts + + def getChmodCmd(self, Permission, src, recursive=False): + """ + function: get chmod cmd + input : Permission, src, recursive + output : str + """ + return findCmdInPath('chmod') + \ + (" -R " if recursive else BLANK_SPACE) + \ + Permission + BLANK_SPACE + src + + def getChownCmd(self, owner, group, src, recursive=False): + """ + function: get chown cmd + input : owner, group, src, recursive + output : str + """ + return findCmdInPath('chown') + \ + (" -R " if recursive else BLANK_SPACE) + owner + \ + COLON + group + BLANK_SPACE + src + + def getCopyCmd(self, src, dest, pathType=""): + """ + function: get copy cmd + input : src, dest, pathType + output : str + """ + opts = " " + if pathType == "directory": + opts = " -r " + return findCmdInPath('cp') + " -p -f " + opts + BLANK_SPACE + "'" + \ + src + "'" + BLANK_SPACE + "'" + dest + "'" + + def getRemoteCopyCmd(self, src, dest, remoteHost, copyTo=True, + pathType="", otherHost=None): + """ + function: get remote copy cmd + input : src, dest, remoteHost, copyTo=True, + pathType="", otherHost + output : str + """ + opts = " " + if pathType == "directory": + opts = " -r " + if copyTo: + return "pscp -x '%s' -H %s %s %s " % (opts.strip(), + remoteHost, src, dest) + else: + localhost = self.getLocalIp() + if otherHost is not None: + localhost = otherHost + return "pssh -s -H %s \" pscp -x '%s' -H %s %s %s \" " % ( + remoteHost, opts.strip(), localhost, src, dest) + + def getHostName(self): + """ + function : Get host name + input : NA + output: string + """ + hostCmd = findCmdInPath('hostname') + (status, output) = subprocess.getstatusoutput(hostCmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "host name" + + "The cmd is %s" % hostCmd) + return output + + def getLocalIp(self): + """ + function: Obtaining the local IP address + input: NA + output: str + """ + return self.getHostName() + + def getScpCmd(self): + """ + Get scp cmd for special remotely copy, just like remote to + remote or remote to local. + :return: str + """ + return "pscp -H " + + def getUseraddCmd(self, user, group): + """ + function: get user add cmd + input : user, group + output : str + """ + return findCmdInPath('useradd') + " -m " + user + " -g " + group + + def getUserdelCmd(self, user): + """ + function: get userdel cmd + input : user + output : str + """ + return findCmdInPath('userdel') + " -r " + user + + def getGroupaddCmd(self, group): + """ + function: get group add cmd + input : group + output : str + """ + return findCmdInPath('groupadd') + " " + group + + def getGroupdelCmd(self, group): + """ + function: get group del cmd + input : group + output : str + """ + return findCmdInPath('groupdel') + " " + group + + def getMoveCmd(self, src, dest): + """ + function: get move cmd + input : src, dest + output : str + """ + return findCmdInPath('mv') + " -f " + "'" + src + \ + "'" + BLANK_SPACE + "'" + dest + "'" + + def getMakeDirCmd(self, src, recursive=False): + """ + function: get make dir cmd + input : src, recursive + output : str + """ + return findCmdInPath('mkdir') + \ + (" -p " if recursive else BLANK_SPACE) + "'" + src + "'" + + def getPingCmd(self, host, count, interval, packetSize=56): + """ + function: get ping cmd + input : host, count, interval, packetSize + output : str + """ + opts = " " + if int(packetSize) != int(56): + opts = " -s " + str(packetSize) + return findCmdInPath('ping') + BLANK_SPACE + host + " -c " + \ + count + " -i " + interval + opts + + def getWcCmd(self): + """ + function: get wc cmd + input : NA + output : str + """ + return findCmdInPath('wc') + + def getTarCmd(self): + """ + function: get tar cmd + input : NA + output : str + """ + return findCmdInPath('tar') + + def getZipCmd(self): + """ + function: get zip cmd + input : NA + output : str + """ + return findCmdInPath('zip') + + def getUnzipCmd(self): + """ + function: get unzip cmd + input : NA + output : str + """ + return findCmdInPath('unzip') + + def getEchoCmd(self, echoString): + """ + function: get echo cmd + input : echoString + output : str + """ + cmdStr = '%s "%s"' % (findCmdInPath('echo'), echoString) + return cmdStr + + def getSedCmd(self): + """ + function: get sed cmd + input : NA + output : str + """ + return findCmdInPath('sed') + + def getGrepCmd(self): + """ + function: get grep cmd + input : NA + output : str + """ + return findCmdInPath('grep') + + def getLsofCmd(self): + """ + function: get lsof cmd + input : NA + output : str + """ + return findCmdInPath('lsof') + " -i:" + + def getIfconfigCmd(self): + """ + function: get ifconfig cmd + input : NA + output : str + """ + return findCmdInPath('ifconfig') + + def getIpCmd(self): + """ + function: get ip cmd + input : NA + output : str + """ + return findCmdInPath('ip') + + def getDateCmd(self): + """ + function: get date cmd + input : NA + output : str + """ + return findCmdInPath('date') + + def getAwkCmd(self): + """ + function: get awk cmd + input : NA + output : str + """ + return findCmdInPath('awk') + + def getFindCmd(self): + """ + function: get find cmd + input : NA + output : str + """ + return findCmdInPath('find') + + def getTouchCmd(self, filename): + """ + function: get touch cmd + input : filename + output : str + """ + return findCmdInPath('touch') + BLANK_SPACE + filename + + def getListCmd(self): + """ + function: get list cmd + input : NA + output : str + """ + return findCmdInPath('ls') + + def getSHA256Cmd(self): + """ + function: get sha256 cmd + input : NA + output : str + """ + return findCmdInPath('sha256sum') + + def getProcessCmd(self): + """ + function: get process cmd + input : NA + output : str + """ + return findCmdInPath('ps') + + def getCatCmd(self): + """ + function: get cat cmd + input : NA + output : str + """ + return findCmdInPath('cat') + + def getDdCmd(self): + """ + function: get dd cmd + input : NA + output : str + """ + return findCmdInPath('dd') + + def getCdCmd(self, path): + """ + function: get cd cmd + input : path + output : str + """ + return 'cd' + BLANK_SPACE + "'" + path + "'" + + def getAllCrontabCmd(self): + """ + function: get all crontab cmd + input : NA + output : str + """ + cmd = findCmdInPath('crontab') + BLANK_SPACE + " -l" + return cmd + + def getCrontabCmd(self): + """ + function: get crontab cmd + input : NA + output : str + """ + return findCmdInPath('crontab') + + def getKillProcessCmd(self, signal, pid): + """ + function: get kill process cmd + input : signal, pid + output : str + """ + return findCmdInPath('kill') + " -" + signal + BLANK_SPACE + pid + + def getKillallCmd(self): + """ + function: get killall cmd + input : NA + output : str + """ + return findCmdInPath('killall') + + def getKillallProcessCmd(self, signal, username, procName=""): + """ + function: get killall process cmd + input : signal, username, procName + output : str + """ + if procName != "": + return findCmdInPath('killall') + " -s " + signal + " -u " + \ + username + BLANK_SPACE + procName + else: + return findCmdInPath('killall') + " -s " + signal + " -u " + \ + username + + def getXargsCmd(self): + """ + function: get xargs cmd + input : NA + output : str + """ + return findCmdInPath('xargs') + + def getDeleteSemaphoreCmd(self, user): + """ + function: get delete semaphore cmd + input : user + output : str + """ + ipcs = findCmdInPath('ipcs') + ipcrm = findCmdInPath('ipcrm') + xargs = findCmdInPath('xargs') + awk = findCmdInPath('awk') + return "%s -s | %s '/ %s /{print $2}' | %s -n1 %s -s" % ( + ipcs, awk, user, xargs, ipcrm) + + def getProcessIdByKeyWordsCmd(self, keywords): + """ + function: get proecess id by keywords cmd + input : keywords + output : str + """ + ps = findCmdInPath('ps') + grep = findCmdInPath('grep') + awk = findCmdInPath('awk') + return "%s -ef| %s -F '%s' | %s -F -v 'grep'| %s '{print $2}'" % ( + ps, grep, keywords, grep, awk) + + def getSysctlCmd(self): + """ + function: get sysctl cmd + input : NA + output : str + """ + return findCmdInPath('sysctl') + + def getServiceCmd(self, serviceName, action): + """ + function: get service cmd + input : serviceName, action + output : str + """ + return findCmdInPath('service') + BLANK_SPACE + serviceName + \ + BLANK_SPACE + action + + def getSystemctlCmd(self, serviceName, action): + """ + function: get systemctl cmd + input : serviceName, action + output : str + """ + return findCmdInPath('systemctl') + BLANK_SPACE + action + \ + BLANK_SPACE + serviceName + + def getUlimitCmd(self): + """ + function: get ulimit cmd + input : NA + output : str + """ + return 'ulimit' + + def getGetConfValueCmd(self): + """ + function: get conf value cmd + input : NA + output : str + """ + return findCmdInPath('getconf') + " PAGESIZE " + + def getBlockdevCmd(self, device, value="", isSet=False): + """ + function: get block dev cmd + input : device, value, isSet + output : str + """ + if isSet and value != "": + return findCmdInPath('blockdev') + " --setra " + value + \ + BLANK_SPACE + device + else: + return findCmdInPath('blockdev') + " --getra " + device + + def getSysModManagementCmd(self, OperType, module): + + """ + OperType: list --list system module + load --load system module + insert --insert system module by force + remove --remove system module + dep --generate modules.dep and map files + """ + if OperType == "list": + return findCmdInPath('lsmod') + BLANK_SPACE + module + elif OperType == "load": + return findCmdInPath('modprobe') + BLANK_SPACE + module + elif OperType == "insert": + return findCmdInPath('insmod') + BLANK_SPACE + module + elif OperType == "remove": + return findCmdInPath('rmmod') + BLANK_SPACE + module + elif OperType == "dep": + return findCmdInPath('depmod') + BLANK_SPACE + module + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51404"] + OperType) + + def getMountCmd(self): + """ + function: get dd cmd + input : NA + output : str + """ + return findCmdInPath('mount') + + def getLocaleCmd(self): + """ + function: get locale cmd + input : NA + output : str + """ + return findCmdInPath('locale') + + def getPasswordExpiresCmd(self, user): + """ + function: get password expires cmd + input : NA + output : str + """ + return findCmdInPath('chage') + " -l " + user + + def getIOStatCmd(self): + """ + function: get io stat cmd + input : NA + output : str + """ + return findCmdInPath('iostat') + " -xm 2 3 " + + def getEthtoolCmd(self): + """ + function: get eth tool cmd + input : NA + output : str + """ + return findCmdInPath('ethtool') + + def getTailCmd(self): + """ + function: get tail cmd + input : NA + output : str + """ + return findCmdInPath('tail') + + def getSshCmd(self, address): + """ + function: get ssh cmd + input : address + output : str + """ + return "pssh -s -H " + BLANK_SPACE + address + + def getChkconfigCmd(self, OperType, service=""): + """ + function: get chkconfig cmd + input : OperType, service + output : str + """ + if OperType == "list": + return findCmdInPath('chkconfig') + " --list " + elif OperType == "delete" and service: + return findCmdInPath('chkconfig') + " --del " + service + + def getManageKerberosCmd(self, OperType): + """ + OperType: init --init kerberos + destory --destory kerberos + """ + if OperType == "init": + return "kinit -k -t " + elif OperType == "destory": + return "kdestroy" + + def getManageSSDCmd(self): + """ + function: get manage ssd cmd + input : NA + output : NA + """ + pass + + def getPythonCmd(self): + """ + function: get python cmd + input : NA + output : str + """ + return findCmdInPath('python3') + + def getShellCmd(self): + """ + function: get shell cmd + input : NA + output : str + """ + return findCmdInPath('sh') + + def getSourceCmd(self): + """ + function: get source cmd + input : NA + output : str + """ + return 'source' + + def getTestCmd(self): + """ + Linux test cmd + example: test -f /etc/profile && echo 1 || echo 2 + """ + return findCmdInPath("test") + + def getPgrepCmd(self): + """ + Linux pgrep cmd + """ + return findCmdInPath("pgrep") + + def getExportCmd(self, key="", value=""): + """ + Linux export cmd + """ + cmd = findCmdInPath("export") + if key: + cmd += " %s=%s" % (key, value) + return cmd + + +class LinuxPlatform(GenericPlatform): + """ + manage Linux command,config or service for muti-platform + """ + + def __init__(self): + """ + function: constructor + """ + pass + + def dist(self): + """ + function: dist + input : NA + output : distname, version, id + """ + return dist() + + def getCpuInfoFile(self): + """ + function: get cpu info file + input : NA + output : str + """ + return "/proc/cpuinfo" + + def getMemInfoFile(self): + """ + function: get dd cmd + input : NA + output : str + """ + return "/proc/meminfo" + + def getBondInfoPath(self): + """ + function: get bond info path + input : NA + output : str + """ + return "/proc/net/bonding/" + + def getSysctlFile(self): + """ + function: get sysctl file + input : NA + output : str + """ + return "/etc/sysctl.conf" + + def getMtablFile(self): + """ + function: get mtab file + input : NA + output : str + """ + return "/etc/mtab" + + def getInterruptFile(self): + """ + function: get interrput file + input : NA + output : str + """ + return "/proc/interrupts" + + def getHostsFile(self): + """ + function: get hostfile + input : NA + output : str + """ + return "/etc/hosts" + + def getName(self): + """ + function: get name + input : NA + output : str + """ + return "linux" + + def getDefaultLocale(self): + """ + function: get default locale + input : NA + output : str + """ + return 'en_US.utf8' + + def getDiskFreeCmd(self, Mounted="", inode=False): + # -P is for POSIX formatting. Prevents error + # on lines that would wrap + return findCmdInPath('df') + " -Pk " + \ + (" -i " if inode else " -h ") + Mounted + + def getDirSizeCmd(self, path, unit=""): + # -s only shows the total size + # unit specify the output size unit + return findCmdInPath('du') + " -s " + (" -B %s " % unit + if unit else " -h ") + path + + def getSadcCmd(self, interval, outFileName): + """ + function: get sadc cmd + input : interval, outFileName + output : str + """ + cmd = "/usr/lib64/sa/sadc -F -d " + str(interval) + " " + outFileName + return cmd + + def getCompressFilesCmd(self, tarName, fileSrc): + """ + function: get compress file cmd + input : tarName, fileSrc + output : str + """ + cmd = "%s -zvcf '%s' %s" % (self.getTarCmd(), tarName, fileSrc) + return cmd + + def getDecompressFilesCmd(self, srcPackage, dest): + """ + function: get decompress file cmd + input : srcPackage, dest + output : str + """ + cmd = "%s -zxvf '%s' -C '%s'" % (self.getTarCmd(), srcPackage, dest) + return cmd + + def getCompressZipFilesCmd(self, zipName, fileSrc): + """ + function: get compress zip files cmd + input : zipName, fileSrc + output : str + """ + cmd = "cd %s && %s -r '%s.zip' ./*" % (fileSrc, self.getZipCmd(), + zipName) + return cmd + + def getDecompressZipFilesCmd(self, srcPackage, dest): + """ + function: get decompress zip files cmd + input : srcPackage, dest + output : str + """ + cmd = "%s -o '%s' -d '%s'" % (self.getUnzipCmd(), srcPackage, dest) + return cmd + + def getReplaceFileLineContentCmd(self, oldLine, newLine, path): + """ + function: get replace file line content cmd + input : oldLine, newLine, path + output : str + """ + cmd = "%s -i \"s/%s/%s/g\" '%s'" % (self.getSedCmd(), oldLine, + newLine, path) + return cmd + + def getDirPermissionCmd(self, dirPath): + """ + function: get dir permission cmd + input : dirPath + output : str + """ + cmd = "%s -ld '%s' | %s -F\" \" '{print $1}' " % (self.getListCmd(), + dirPath, + self.getAwkCmd()) + return cmd + + def getFileSHA256Cmd(self, fileName): + """ + function: get file sha256 cmd + input : fileName + output : str + """ + cmd = "%s '%s' | %s -F\" \" '{print $1}' " % (self.getSHA256Cmd(), + fileName, + self.getAwkCmd()) + return cmd + + def getExecuteCmdWithUserProfile(self, user, userProfile, executeCmd, + ignoreError=True): + """ + function: get execute cmd with user profile + input: user, userProfile, executeCmd, ignoreError + output: str + """ + if (user != "") and (os.getuid() == 0): + cmd = "su - %s -c '%s %s; %s'" % (user, self.getSourceCmd(), + userProfile, executeCmd) + else: + cmd = "%s %s; %s" % (self.getSourceCmd(), userProfile, executeCmd) + if ignoreError: + cmd += " 2>/dev/null" + return cmd + + def getUserHomePath(self): + """ + function: get user home path + input: NA + output: str + """ + # converts the relative path to an absolute path + cmd = "echo ~ 2>/dev/null" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "user home" + + "The cmd is %s" % cmd) + return output + + def checkProcAlive(self, procPid): + """ + function: check Proc alive + input: procPid + output: True/False + """ + try: + os.kill(procPid, 0) + except OSError: + return False + else: + return True + + def getIpAddressAndNIC(self, ipType="ipv4"): + """ + function: get ip address and nic + input: ipType + output: NA + """ + if ipType == "ipv4": + key = AF_INET + else: + key = AF_INET6 + + for iface in interfaces(): + if key in ifaddresses(iface): + ipAddress = ifaddresses(iface)[key][0]['addr'] + yield (iface, ipAddress) + + def getIpAddressAndNICList(self, ipType="ipv4"): + """ + function: get ip address and nicList + input: ipType + output: [] + """ + return list(self.getIpAddressAndNIC(ipType)) + + def getNetworkNumByIPAddr(self, ipAddress, ipType="ipv4"): + """ + function: get netWork num by IP addr + input: ipAddress, ipType + output: str + """ + try: + mappingList = self.getIpAddressAndNICList(ipType) + for mapInfo in mappingList: + if mapInfo[1] == ipAddress: + return mapInfo[0] + raise Exception(ErrorCode.GAUSS_506["GAUSS-50612"] % ipAddress) + except Exception as e: + raise Exception(str(e)) + + def getHostNameByIPAddr(self, ipAddress): + """ + function: get host name by ip addr + input: ipAddress + output: str + """ + try: + return socket.gethostbyaddr(ipAddress)[0] + except Exception as e: + raise Exception(str(e)) + + def getLinuxNetworkConfigFile(self, networkConfPath, networkCardNum): + """ + function: get linux network config file + input: networkConfPath, networkCardNum + output: str + """ + try: + networkConfFile = "%sifcfg-%s" % (networkConfPath, networkCardNum) + # Network configuration file does not exist + if not os.path.exists(networkConfFile): + cmd = "%s %s -iname 'ifcfg-*-%s' -print" % (self.getFindCmd(), + networkConfFile, + networkCardNum) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output.strip() == "" + or len(output.split('\n')) != 1): + raise Exception(ErrorCode.GAUSS_502["GAUSS-50201"] % + networkConfFile + "The cmd is %s" % cmd) + networkConfFile = output.strip() + return networkConfFile + except Exception as e: + raise Exception(str(e)) + + def getNetworkBondModeByBondConfigFile(self, bondingConfFile): + """ + function: get Network Bond Mode By Bond ConfigFile + input: bondingConfFile + output: str + """ + try: + # Check the bond mode + cmd = "%s -w '\' %s | %s -F ':' '{print $NF}'" % ( + self.getGrepCmd(), bondingConfFile, self.getAwkCmd()) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + return "BondMode %s" % output.strip() + except Exception as e: + raise Exception(str(e)) + + def getNetworkBondModeInfo(self, networkConfFile, networkCardNum): + """ + function: get Network Bond Mode Info + input: networkConfFile, networkCardNum + output: str + """ + # Get the bond profile + if not os.path.isfile(networkConfFile): + return "BondMode Null" + + bondingConfFile = "%s%s" % (self.getBondInfoPath(), networkCardNum) + cmd = "%s -i 'BONDING_OPTS\|BONDING_MODULE_OPTS' %s" % ( + self.getGrepCmd(), networkConfFile) + output = subprocess.getstatusoutput(cmd)[1] + # Analysis results + if output.strip() != "": + if (output.find("mode") > 0) and os.path.exists(bondingConfFile): + bondInfo = self.getNetworkBondModeByBondConfigFile( + bondingConfFile) + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + elif os.path.exists(bondingConfFile): + bondInfo = self.getNetworkBondModeByBondConfigFile(bondingConfFile) + bondInfo += "\nNo 'BONDING_OPTS' or \ + 'BONDING_MODULE_OPTS' in bond config file[%s]." % networkConfFile + else: + bondInfo = "BondMode Null" + return bondInfo + + def getNetworkMaskByNICNum(self, networkCardNum, ipType="ipv4"): + """ + function: get Network Mask By NICNum + input: networkCardNum, ipType + output: str + """ + if ipType == "ipv4": + return ifaddresses(networkCardNum)[AF_INET][0]["netmask"] + else: + return ifaddresses(networkCardNum)[AF_INET6][0]["netmask"] + + def getNetworkRXTXValueByNICNum(self, networkCardNum, valueType): + """ + function: get Network RXTX Value By NICNum + input: networkCardNum, valueType + output: int + """ + try: + cmd = "%s -g %s | %s '%s:' | %s -n 1" % (self.getEthtoolCmd(), + networkCardNum, + self.getGrepCmd(), + valueType.upper(), + self.getTailCmd()) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + value = output.split(':')[-1].split(' ')[0].strip() + if not str(value).isdigit(): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + return int(value) + except Exception as e: + raise Exception(str(e)) + + def setNetworkRXTXValue(self, networkCardNum, rxValue=8192, + txValue=8192): + """ + function: set Network RXTX Value + input: networkCardNum, rxValue, txValue + output: NA + """ + try: + cmd = "%s -G %s rx %s tx %s" % ( + self.getEthtoolCmd(), networkCardNum, rxValue, txValue) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + if output.find("no ring parameters changed, aborting") < 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + except Exception as e: + raise Exception(str(e)) + + def getNetworkSpeedByNICNum(self, networkCardNum): + """ + function: get Network Speed By NICNum + input: networkCardNum + output: int + """ + keyWord = "Speed: " + speedUnit = "Mb/s" + try: + cmd = "%s %s | grep '%s'" % (self.getEthtoolCmd(), + networkCardNum, keyWord) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or output == "": + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + if len(output.split('\n')) >= 1: + for line in output.split('\n'): + if line.find(keyWord) >= 0 and line.find(speedUnit) >= 0: + return int(line.split(':')[-1].strip()[:-4]) + return 0 + except Exception as e: + raise Exception(str(e)) + + def checkNetworkInterruptByNIC(self, networkCardNum): + """ + function: check Network Interrupt By NIC + """ + try: + interruptConfFile = self.getInterruptFile() + numberedListCmd = "%s %s | %s '%s-' | \ + %s -F ' ' '{print $1}' | %s -F ':' '{print $1}'" % ( + self.getCatCmd(), interruptConfFile, self.getGrepCmd(), + networkCardNum, self.getAwkCmd(), self.getAwkCmd()) + irqCmd = "%s /proc/irq/$i/smp_affinity" % self.getCatCmd() + cmd = "for i in `%s`; do %s ; done" % (numberedListCmd, irqCmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + except Exception as e: + raise Exception(str(e)) + + # cpu core number followed by 1 2 4 8,every 4 left shift one + Mapping = {0: "1", 1: "2", 2: "4", 3: "8"} + flag = True + for index, eachLine in enumerate(output.split()): + # Remove the ',' + eachLine = eachLine.replace(",", "") + # Replace 0000,00001000 to 1,Remove invalid content + validValue = eachLine.replace("0", "") + # Convert the row index to the expected value + expandNum = Mapping[index % 4] + # Convert line index to expected position + expandBit = index // 4 * -1 - 1 + # value and position is correct + if eachLine[expandBit] == expandNum and validValue == expandNum: + continue + else: + print("Network card [%s] multi-queue \ + support is not enabled.\n" % networkCardNum) + flag = False + break + return flag + + def getInterruptCountNum(self, networkCardNum): + """ + function : We can makesure that all dev names is startwith + 'ethX-' and endwith '-X' + input : String + output : Int + """ + try: + interruptConfFile = self.getInterruptFile() + cmd = "%s %s | %s '%s-' | %s -l" % (self.getCatCmd(), + interruptConfFile, + self.getGrepCmd(), + networkCardNum, + self.getWcCmd()) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50622"] % cmd) + if not str(output.strip()).isdigit(): + return 0 + return int(output.strip()) + except Exception as e: + raise Exception(str(e)) + + def getPackageFile(self, distName, version, packageVersion, + productVersion, fileType="tarFile"): + """ + function : Get the path of binary file version. + input : distName, version, packageVersion, + productVersion, fileType + output : String + """ + distname, version, idnum = dist() + distname = distname.lower() + dirName = os.path.dirname(os.path.realpath(__file__)) + prefixStr = productVersion + if fileType == "tarFile": + postfixStr = "tar.gz" + elif fileType == "binFile": + postfixStr = "bin" + elif fileType == "sha256File": + postfixStr = "sha256" + elif fileType == "bz2File": + postfixStr = "tar.bz2" + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50019"] % "fileType") + + # RHEL and CentOS have the same kernel version, + # So RHEL cluster package can run directly on CentOS. + if distname in REDHAT: + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, PAK_REDHAT, + BIT_VERSION, postfixStr)) + elif distname in CENTOS: + if os.path.isfile(os.path.join("/etc", "euleros-release")): + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, + PAK_EULER, + BIT_VERSION, postfixStr)) + if not os.path.isfile(fileName): + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, + PAK_CENTOS, BIT_VERSION, + postfixStr)) + else: + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, + PAK_CENTOS, + BIT_VERSION, postfixStr)) + if not os.path.isfile(fileName): + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, + PAK_REDHAT, + BIT_VERSION, postfixStr)) + elif distname in ASIANUX: + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, PAK_ASIANUX, + BIT_VERSION, postfixStr)) + elif distname == SUSE and version.split('.')[0] in ("11", "12"): + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, "SUSE11", + BIT_VERSION, postfixStr)) + elif distname in EULEROS and (idnum in ["SP2", "SP3", "SP5"]): + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, PAK_EULER, + BIT_VERSION, postfixStr)) + if not os.path.isfile(fileName): + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, + PAK_REDHAT, + BIT_VERSION, postfixStr)) + elif distname in EULEROS and (idnum == "SP8"): + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, PAK_EULER, + BIT_VERSION, postfixStr)) + elif distname in EULEROS: + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, PAK_REDHAT, + BIT_VERSION, postfixStr)) + elif distname in OPENEULER or distname in KYLIN: + fileName = os.path.join(dirName, "./../../../", + "%s-%s-%s-%s.%s" % ( + prefixStr, packageVersion, + PAK_OPENEULER, + BIT_VERSION, postfixStr)) + else: + raise Exception(ErrorCode.GAUSS_519["GAUSS_51900"] + + "Supported platforms are: %s." % str( + SUPPORT_WHOLE_PLATFORM_LIST)) + + fileName = os.path.normpath(fileName) + if not os.path.exists(fileName): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % fileName) + if not os.path.isfile(fileName): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % fileName) + return fileName + + def setKeyValueInSshd(self, key, value): + """ + function: Set a (key, value) pair into /etc/ssh/sshd_config, + before "Match" section. + "Match" section in sshd_config should always places in the end. + Attention: you need to remove the old (key, value) + from sshd_config manually. + input: + key: the configuration name of sshd_config + value: the configuration value(Only single line string + permitted here). + output: + void + """ + sshd_config = '/etc/ssh/sshd_config' + cmd = "grep -E '^\' %s" % sshd_config + (status, output) = subprocess.getstatusoutput(cmd) + + if status == 0: + cmd = "sed -i '/^\.*/i %s %s' %s" % (key, value, + sshd_config) + else: + if output is not None and len(output.strip()) != 0: + raise Exception(ErrorCode.GAUSS_503["GAUSS_50321"] % + "Match section" + "Command: %s, Error: %s" % + (cmd, output)) + cmd = "echo '' >> %s ; echo '%s %s' >> %s" % (sshd_config, + key, value, + sshd_config) + + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception((ErrorCode.GAUSS_503["GAUSS_50320"] % ( + key, value)) + ("Command: %s, Error: %s" % (cmd, output))) + + +class SLESPlatform(LinuxPlatform): + """ + manage SUSE Linux Enterprise Server command, + config or service for muti-platform + """ + + def __init__(self): + self.NetWorkConfPath = "/etc/sysconfig/network/" + self.SuSEReleaseFile = "/etc/SuSE-release" + self.OSReleaseFile = "/etc/SuSE-release" + + def isPlatFormEulerOSOrRHEL7X(self): + """ + function: the patform is euleros or rhel7x + input : NA + output : bool + """ + return False + + def getManageFirewallCmd(self, action): + """ + function: get manage firewall cmd + input : action + output : str + """ + return findCmdInPath('SuSEfirewall2') + BLANK_SPACE + action + + def getLinuxFirewallStatus(self): + """ + function: get Linux Firewall Status + input: NA + output: str + """ + try: + cmd = self.getManageFirewallCmd("status") + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or output == "": + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + + if output.strip().find("SuSEfirewall2 not active") > 0: + firewallStatus = "disabled" + else: + firewallStatus = "enabled" + return firewallStatus + except Exception as e: + raise Exception(str(e)) + + def getManageCrondCmd(self, action): + """ + function: get manage crond cmd + input : action + output : str + """ + return self.getServiceCmd("cron", action) + + def getManageSshdCmd(self, action): + """ + function: get manage sshd cmd + input : action + output : str + """ + return self.getServiceCmd("sshd", action) + + def getManageSyslogCmd(self, action): + """ + function: get manage syslog cmd + input : action + output : str + """ + return self.getServiceCmd("syslog", action) + + def getManageRsyslogCmd(self, action): + """ + function: get manage rsyslog cmd + input : action + output : str + """ + return self.getServiceCmd("rsyslog", action) + + def getManageSystemdJournaldCmd(self, action): + """ + function: get systemd-jorunald cmd + input : action + output : str + """ + return self.getServiceCmd("systemd-journald", action) + + def getManageGsOsServerCmd(self, action): + """ + function: get rhel/centos cmd + input : action + output : NA + """ + try: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53021"] + % ("gs-OS-set service", "RHEL/CentOS")) + except Exception as e: + raise Exception(str(e)) + + def getCurrentPlatForm(self): + """ + function: get current platform + input: NA + output: str, str + """ + try: + distName, version = dist()[0:2] + bits = platform.architecture()[0] + if (distName.lower() != SUSE or + version not in SUPPORT_SUSE_VERSION_LIST): + raise Exception(ErrorCode.GAUSS_530["GAUSS_53022"] + % (distName.lower(), version)) + + # os-release is added since SLE 12; SuSE-release will + # be removed in a future service pack or release + if os.path.exists(self.SuSEReleaseFile): + cmd = "%s -i 'PATCHLEVEL' %s | " \ + "%s -F '=' '{print $2}'" % (self.getGrepCmd(), + self.SuSEReleaseFile, + self.getAwkCmd()) + else: + cmd = "%s -i 'VERSION_ID' %s | " \ + "%s -F '.' '{print $2}' | %s 's/\"//'" % ( + self.getGrepCmd(), self.OSReleaseFile, + self.getAwkCmd(), self.getSedCmd()) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and output != "": + patchlevel = output.strip() + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + + if (bits == BIT_VERSION and + ((version == SUSE11 and + patchlevel in SUPPORT_SUSE11X_VERSION_LIST) or + (version == SUSE12 and + patchlevel in SUPPORT_RHEL12X_VERSION_LIST))): + platformVersion = "%s.%s" % (version, patchlevel) + return distName.lower(), platformVersion + else: + raise Exception(ErrorCode.GAUSS_519["GAUSS_51900"] + + " The current system is: %s%s.%s" % ( + distName.lower(), version, patchlevel)) + except Exception as e: + raise Exception(str(e)) + + def getNetworkConfigFileByNICNum(self, networkCardNum): + """ + function: get Network ConfigFile By NICNum + input: networkCardNum + output: str + """ + return self.getLinuxNetworkConfigFile(self.NetWorkConfPath, + networkCardNum) + + def getNetworkConfigFileByIPAddr(self, ipAddress): + """ + function: get Network ConfigFile By ip addr + input: ipAddress + output: str + """ + networkCardNum = self.getNetworkNumByIPAddr(ipAddress) + return self.getNetworkConfigFileByNICNum(networkCardNum) + + +class RHELPlatform(LinuxPlatform): + """ + manage Red Hat Enterprise Linux command,config or service for muti-platform + """ + + def __init__(self): + """ + function: constructor + """ + self.NetWorkConfPath = "/etc/sysconfig/network-scripts/" + + def isSupportSystemctl(self): + """ + function: isSupportSystemctl + input: NA + output: bool + """ + distName, version = dist()[0:2] + if ((distName.lower() == EULEROS and version[0:3] in + SUPPORT_EULEROS_VERSION_LIST) or + (distName.lower() in SUPPORT_RHEL_SERIES_PLATFORM_LIST and + version[0:3] in SUPPORT_RHEL7X_VERSION_LIST) or + (distName.lower() == CENTOS and version[0:3] == + SUPPORT_EULEROS_VERSION_LIST and + os.path.isfile(os.path.join("/etc", "euleros-release"))) or + distName.lower() == OPENEULER): + return True + else: + return False + + def isPlatFormEulerOSOrRHEL7X(self): + """ + function: check is PlatForm EulerOS Or RHEL7X + """ + return self.isSupportSystemctl() + + def getManageFirewallCmd(self, action): + """ + function: get manage firewall cmd + input : action + output : str + """ + if self.isSupportSystemctl(): + return self.getSystemctlCmd("firewalld.service", action) + else: + return self.getServiceCmd("iptables", action) + + def getLinuxFirewallStatus(self): + """ + function: get Linux Firewall Status + """ + try: + cmd = self.getManageFirewallCmd("status") + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 and output == "": + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + + if self.isSupportSystemctl(): + if output.strip().find("Active: active (running)") > 0: + firewallStatus = "enabled" + else: + firewallStatus = "disabled" + else: + if output.strip().find("Firewall is not running") > 0: + firewallStatus = "disabled" + else: + firewallStatus = "enabled" + return firewallStatus + except Exception as e: + raise Exception(str(e)) + + def getManageCrondCmd(self, action): + """ + function: get crond.server cmd + input : action + output : str + """ + if self.isSupportSystemctl(): + return self.getSystemctlCmd("crond.service", action) + else: + return self.getServiceCmd("crond", action) + + def getManageSshdCmd(self, action): + """ + function: get sshd.server cmd + input : action + output : str + """ + if self.isSupportSystemctl(): + return self.getSystemctlCmd("sshd.service", action) + else: + return self.getServiceCmd("sshd", action) + + def getManageGsOsServerCmd(self, action): + """ + function: get gs-OS-set.service cmd + input : action + output : str + """ + if self.isSupportSystemctl(): + return self.getSystemctlCmd("gs-OS-set.service", action) + else: + return self.getServiceCmd("gs-OS-set", action) + + def getManageSyslogCmd(self, action): + """ + function: get syslog service cmd + """ + try: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53021"] + % ("Syslog service", "SuSE")) + except Exception as e: + raise Exception(str(e)) + + def getManageRsyslogCmd(self, action): + """ + function: get syslog cmd + """ + try: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53021"] + % ("Rsyslog service", "SuSE")) + except Exception as e: + raise Exception(str(e)) + + def getManageSystemdJournaldCmd(self, action): + """ + function: get systemd journal cmd + """ + try: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53021"] + % ("systemd-journald", "SuSE")) + except Exception as e: + raise Exception(str(e)) + + def getCurrentPlatForm(self): + """ + function: get current platform + """ + try: + distName, version, currentId = dist() + bits = platform.architecture()[0] + if ((bits == BIT_VERSION and + ((distName.lower() == EULEROS and version[0:3] in + SUPPORT_EULEROS_VERSION_LIST) or + (distName.lower() in SUPPORT_RHEL_SERIES_PLATFORM_LIST and + version[0:3] in SUPPORT_RHEL_SERIES_VERSION_LIST)) or + (distName.lower() == OPENEULER) + )): + return distName.lower(), version[0:3] + else: + if distName.lower() == CENTOS and os.path.isfile( + os.path.join("/etc", "euleros-release")) and \ + (version[0:3] in SUPPORT_EULEROS_VERSION_LIST): + return EULEROS, version[0:3] + if distName.lower() == EULEROS: + raise Exception(ErrorCode.GAUSS_519["GAUSS_51900"] + + " The current system is: %s%s%s" % ( + distName.lower(), + version[0:3], currentId)) + else: + raise Exception(ErrorCode.GAUSS_519["GAUSS_51900"] + + " The current system is: %s%s" % ( + distName.lower(), version[0:3])) + except Exception as e: + raise Exception(str(e)) + + def getNetworkConfigFileByIPAddr(self, ipAddress): + """ + function: get Network ConfigFile By IPAddr + """ + networkCardNum = self.getNetworkNumByIPAddr(ipAddress) + return self.getLinuxNetworkConfigFile(self.NetWorkConfPath, + networkCardNum) + + def getNetworkConfigFileByNICNum(self, networkCardNum): + """ + function: get Network ConfigFile By NICNum + """ + return self.getLinuxNetworkConfigFile(self.NetWorkConfPath, + networkCardNum) + + +class UserPlatform(): + """ + manage Red Hat Enterprise Linux command,config or service for muti-platform + """ + + def __init__(self): + """ + function : Check support OS version and init OS class + """ + # now we support this platform: + # RHEL/CentOS "6.4", "6.5", "6.6", "6.7", "6.8", "6.9", + # "7.0", "7.1", "7.2", "7.3", "7.4", "7.5 "64bit + # EulerOS "2.0", "2.3" 64bit + # SuSE11 sp1/2/3/4 64bit + # SuSE12 sp0/1/2/3 64bit + # Kylin "10" 64bit + distName, version, idNum = dist() + if distName.lower() not in SUPPORT_WHOLE_PLATFORM_LIST: + raise Exception(ErrorCode.GAUSS_519["GAUSS_51900"] + + "Supported platforms are: %s." % str( + SUPPORT_WHOLE_PLATFORM_LIST)) + + if distName.lower() == SUSE: + # SuSE11.X SUSE12.X + self.userPlatform = SLESPlatform() + elif distName.lower() in SUPPORT_RHEL_SERIES_PLATFORM_LIST: + # RHEL6.X RHEL7.X + self.userPlatform = RHELPlatform() + else: + # EULEROS 2.0/2.3 + self.userPlatform = RHELPlatform() + try: + self.userPlatform.getCurrentPlatForm() + except Exception as e: + raise Exception(str(e)) + + +# global platform class +g_Platform = UserPlatform().userPlatform diff --git a/script/gspylib/os/gsservice.py b/script/gspylib/os/gsservice.py new file mode 100644 index 0000000..c796cb8 --- /dev/null +++ b/script/gspylib/os/gsservice.py @@ -0,0 +1,106 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gsservice.py is a utility to do something for service +# information. +############################################################################# +import subprocess +import sys + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsplatform import g_Platform + +# ---------------OS service action -------------------- +ACTION_LIST = ["start", "stop", "restart", "status", "reload", "enable"] +FIREWALL = "firewall" +CROND = "crond" +SSHD = "sshd" +SYSLOG = "syslog" +RSYSLOG = "rsyslog" +SYSTEMD_JOURNALD = "systemd-journald" +NTPD = "ntp" +GS_OS_SERVER = "gs-OS-set" +SERVICE_LIST = [FIREWALL, CROND, SSHD, SYSLOG, RSYSLOG, SYSTEMD_JOURNALD, NTPD, + GS_OS_SERVER] + + +class Service(): + """ + function: Init the Service options + """ + + def __init__(self): + """ + constructor + """ + pass + + def checkService(self, service): + """ + function: check service + """ + if (service not in SERVICE_LIST): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % ("service", + service)) + + def checkAction(self, action): + """ + function: check action + """ + if (action not in ACTION_LIST): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % ("action", + action)) + + def manageOSService(self, service, action): + """ + function: manage OS service + """ + self.checkService(service) + self.checkAction(action) + + # get service commands + if (service == FIREWALL): + cmd = g_Platform.getManageFirewallCmd(action) + elif (service == CROND): + cmd = g_Platform.getManageCrondCmd(action) + elif (service == SSHD): + cmd = g_Platform.getManageSshdCmd(action) + elif (service == SYSTEMD_JOURNALD): + # systemd-journald now only supported on SuSE Platform + cmd = g_Platform.getManageSystemdJournaldCmd(action) + elif (service == SYSLOG): + # syslog-ng only supported on SuSE Platform + cmd = g_Platform.getManageSyslogCmd(action) + elif (service == RSYSLOG): + # rsyslog only supported on SuSE Platform + cmd = g_Platform.getManageRsyslogCmd(action) + elif (service == GS_OS_SERVER): + cmd = g_Platform.getManageGsOsServerCmd(action) + else: + return (1, "Server(%s) is not support." % service) + (status, output) = subprocess.getstatusoutput(cmd) + return (status, output) + + def getFirewallStatus(self): + """ + function: get firewall status + """ + return g_Platform.getLinuxFirewallStatus() + + +g_service = Service() diff --git a/script/gspylib/os/gssysctl.py b/script/gspylib/os/gssysctl.py new file mode 100644 index 0000000..3453a40 --- /dev/null +++ b/script/gspylib/os/gssysctl.py @@ -0,0 +1,102 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import sys +import subprocess + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsplatform import g_Platform + + +class SysctlInfo: + """ + class: SysctlInfo + """ + + def __init__(self): + """ + function: constructor + """ + self.sysctlFile = g_Platform.getSysctlFile() + + def GetSysPara(self, paraList): + """ + function : Get system parameters by paraList + input : paraList parameters list + output : para_dict parameters dict + """ + para_dict = {} + fullParaDict = {} + try: + cmd = "'%s' -a" % g_Platform.getSysctlCmd() + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % str(output)) + line_list = output.split('\n') + for line in line_list: + words = line.split('=') + if (len(words) < 2): + continue + fullParaDict[words[0].strip()] = words[1].strip() + # chose para + for para in paraList: + if (para in fullParaDict.keys()): + para_dict[para] = fullParaDict[para] + except Exception as e: + raise Exception(str(e)) + return para_dict + + def SetSysPara(self, paraDict): + """ + function : Set system parameters by dict + input : paraDict parameters dict + output : null + """ + try: + # write or change configure file + configure_file = self.sysctlFile + with open(configure_file, 'r') as fp: + full_line = fp.readlines() + with open(configure_file, 'w') as fp: + for current_line in full_line: + isFind = False + for key in paraDict.keys(): + if current_line.find(key) >= 0 \ + and current_line.strip()[0] != '#': + new_line = "#" + current_line + fp.write(current_line.replace(current_line, + new_line)) + isFind = True + if not isFind: + fp.write(current_line.replace(current_line, + current_line)) + + for key in paraDict.keys(): + new_line = "\n" + key + " =" + paraDict[key] + fp.write(new_line) + # restart server + cmd = "'%s' -p" % g_Platform.getSysctlCmd() + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % str(output)) + except Exception as e: + raise Exception(str(e)) + + +g_sysctl = SysctlInfo() diff --git a/script/gspylib/pssh/bin/TaskPool.py b/script/gspylib/pssh/bin/TaskPool.py new file mode 100644 index 0000000..fc07553 --- /dev/null +++ b/script/gspylib/pssh/bin/TaskPool.py @@ -0,0 +1,335 @@ +# -*- coding=utf-8 -*- +# ############################################################################ +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : TaskPool.py is a utility to manage tasks. +# ############################################################################ + +import os +import signal +import subprocess +import sys +import stat +import threading +import time +from threading import Timer + + +class WriterThread(threading.Thread): + """ + class writer. + Thread that processes the result content from TaskThread + and writes the result content to a file. + """ + + def __init__(self, f_out, f_std): + super(WriterThread, self).__init__() + self.out_file = f_out + self.err_file = f_std + + self.stdout = None + self.stderr = None + + def run(self): + """ + Writing the result content to a file. + """ + if self.out_file: + if not os.path.exists(self.out_file): + try: + os.mknod(self.out_file, stat.S_IWUSR | stat.S_IRUSR) + except IOError as e: + raise Exception("[GAUSS-50206] : Failed to create file" + " or directory. Error:\n%s." % str(e)) + with open(self.out_file, 'wb', buffering=1) as fp_out: + fp_out.write(self.stdout.encode('utf-8')) + + if self.err_file: + if not os.path.exists(self.err_file): + try: + os.mknod(self.err_file, stat.S_IWUSR | stat.S_IRUSR) + except IOError as e: + raise Exception("[GAUSS-50206] : Failed to create file" + " or directory. Error:\n%s." % str(e)) + with open(self.err_file, 'wb', buffering=1) as fp_err: + fp_err.write(self.stderr.encode('utf-8')) + + +class TaskThread(threading.Thread): + """ + class task + Starts a task thread. + """ + + def __init__(self, host, cmd, f_out="", f_err="", + detail=False, timeout=0, shell_mode=False, inline=False): + super(TaskThread, self).__init__() + self.setDaemon(True) + + self.host = host + self.cmd = cmd + self.detail = bool(detail) + self.timeout = timeout + self.shell_mode = shell_mode + self.inline = inline + + self.status = 0 + self.stdout, self.stderr = "", "" + self.failures = [] + self.proc = None + self.timestamp = time.time() + self.isKill = False + self.writer = WriterThread(f_out, f_err) if (f_out or f_err) else None + + def kill(self): + """ + Kill the process of cmd. + :param : NA + :return: NA + """ + self.failures.append("Timed out") + # kill process + if self.proc: + self.proc.kill() + self.isKill = True + # Set the status + self.status = -1 * signal.SIGKILL + self.failures.append("Killed by signal %s" % signal.SIGKILL) + + def get_elapsed_time(self): + """ + Getting elapsed timestamp. + :return: timestamp + """ + return time.time() - self.timestamp + + def check_timeout(self): + """ + check timed-out process + """ + if self.isKill or self.timeout <= 0: + return False + timeleft = self.timeout - self.get_elapsed_time() + if timeleft <= 0: + return True + return False + + def run(self): + """ + Execute the cmd on host. + :return: NA + """ + self.timestamp = time.time() + self.proc = subprocess.Popen(self.cmd, shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + stdout, stderr = self.proc.communicate() + self.stdout += stdout.decode('utf-8') + self.stderr += stderr.decode('utf-8') + self.status = self.proc.returncode + + def __print_out(self): + if not self.stdout and not self.stderr: + return + if self.shell_mode: + sys.stderr.write("%s" % self.stderr) + sys.stdout.write("%s" % self.stdout) + else: + if self.stdout: + sys.stdout.write("%s: %s" % (self.host, self.stdout)) + # Use [-1] replace of .endswith, can avoid the problem about + # coding inconsistencies + if self.stdout and self.stdout[-1] != os.linesep: + sys.stdout.write(os.linesep) + if self.shell_mode and self.stderr and self.stderr[-1] != os.linesep: + sys.stderr.write(os.linesep) + + def __print_result(self, index): + """ + Print the result into sys.stdout + :return: NA + """ + if self.shell_mode: + str_ = "" + else: + str_ = "[%s] %s [%s] %s" % ( + index, + time.asctime().split()[3], + "SUCCESS" if not self.status else "FAILURE", + self.host + ) + if self.status > 0: + str_ += " Exited with error code %s" % self.status + + if self.failures: + failures_msg = ", ".join(self.failures) + str_ = str_ + " " + failures_msg + + if str_: + print(str_) + if self.inline: + sys.stdout.write("%s" % self.stdout) + + def write(self, index): + """ + Write the output into sys.stdout and files. + :return: object of writer or None + """ + # Print the stdout into sys.stdout + if self.detail: + self.__print_out() + # Print the status + self.__print_result(index) + + # Write the self.stdout and self.stderr into files. + if self.writer: + self.writer.stdout = self.stdout + self.writer.stderr = self.stderr + self.writer.start() + return self.writer + + +class TaskPool(object): + """ + class manager + """ + + def __init__(self, opts): + """ + Initialize + """ + self.out_path = opts.outdir + self.err_path = opts.errdir + self.detail = True + self.parallel_num = opts.parallel + self.timeout = opts.timeout + self.shell_mode = opts.shellmode + self.inline = opts.inline + + self.tasks = [] + self.running_tasks = [] + self.writers = [] + self.task_status = {} + + def __get_task_files(self, host): + """ + Obtain the result file of the task. + """ + std_path = "" + if self.out_path: + std_path = os.path.join(self.out_path, host) + + err_path = "" + if self.err_path: + err_path = os.path.join(self.err_path, host) + + return std_path, err_path + + def add_task(self, host, cmd): + """ + Adding a Task to the Task Pool + """ + + f_out, f_err = self.__get_task_files(host) + task = TaskThread(host, cmd, f_out, f_err, self.detail, self.timeout, + self.shell_mode, self.inline) + self.tasks.append(task) + + def __get_writing_task(self): + """ + Check the task status and obtain the running tasks. + """ + still_running = [] + not_running = [] + + # Check whether the task times out. If the task times out, + # stop the task. + for task in self.running_tasks: + if task.check_timeout(): + task.kill() + + # filter the still running tasks and not running tasks + for task in self.running_tasks: + if task.isAlive(): + still_running.append(task) + else: + self.task_status[task.host] = task.status + not_running.append(task) + + # Start the writing thread of completed tasks + for task in not_running: + index = len(self.writers) + 1 + writer = task.write(index) + if writer: + self.writers.append(writer) + + self.running_tasks = still_running + + def __start_limit_task(self): + """ + Starts the tasks within a specified number of parallel. + """ + while self.tasks and len(self.running_tasks) < self.parallel_num: + task = self.tasks.pop(0) + self.running_tasks.append(task) + task.start() + + def start(self): + """ + Start to execute all tasks. + """ + # Create the path of stdout and stderr + dir_permission = 0o700 + if self.out_path and not os.path.exists(self.out_path): + os.makedirs(self.out_path, mode=dir_permission) + if self.err_path and not os.path.exists(self.err_path): + os.makedirs(self.err_path, mode=dir_permission) + + # Do cmd + while self.tasks or self.running_tasks: + self.__get_writing_task() + self.__start_limit_task() + time.sleep(0.05) + + # Waiting for writing files complete. + for writer in self.writers: + writer.join() + + return list(self.task_status.values()) + + +def read_host_file(host_file): + """ + Reads the host file. + Lines are of the form: host. + Returns a list of host triples. + """ + hosts = [] + try: + if not os.path.isfile(host_file): + raise Exception("[GAUSS-50201] : The %s does not exist." % + host_file) + with open(host_file) as fp: + for line in fp: + line = line.strip() + if line or not line.startswith('#'): + hosts.append(line) + except (OSError, IOError) as err: + sys.stderr.write('Could not open hosts file: %s\n' % err) + sys.exit(1) + + return hosts diff --git a/script/gspylib/pssh/bin/pscp b/script/gspylib/pssh/bin/pscp new file mode 100644 index 0000000..637f9ff --- /dev/null +++ b/script/gspylib/pssh/bin/pscp @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# ############################################################################ +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Parallel scp to the set of nodes. +# For each node, do a scp [-r] local ip:remote. Note that +# remote must be an absolute path. +# ############################################################################ +try: + import optparse + import os + import shlex + import sys + import xml.etree.cElementTree as ETree + from TaskPool import TaskPool + from TaskPool import read_host_file +except ImportError as e: + sys.exit("[GAUSS-52200] : Unable to import module: %s." % str(e)) + +TIME_OUT = 0 +PARALLEL_NUM = 32 + + +def parse_command(): + """ + :return: parser + """ + parser = optparse.OptionParser(conflict_handler='resolve') + parser.disable_interspersed_args() + parser.usage = "%prog [OPTIONS] localPath remote" + parser.epilog = "Example: pscp -H hostname test.txt /home/omm/test.txt" + parser.add_option('-H', dest='hostname', action='append', + help='Nodes to be connected') + parser.add_option('-h', dest='hostfile', + help='Host file with each line per node') + parser.add_option('-t', dest='timeout', type='int', + help='Timeouts in seconds') + parser.add_option('-p', dest='parallel', type='int', + help='Maximum number of parallel') + parser.add_option('-o', dest='outdir', help='Output results folder') + parser.add_option('-e', dest='errdir', help='Error results folder') + parser.add_option('-r', dest='recursive', action='store_true', + help='recusively copy directories') + parser.add_option('-v', dest='verbose', action='store_true', + help='turn on diagnostic messages') + parser.add_option('-s', dest='shellmode', action='store_true', + help='Output only execution results') + parser.add_option('-x', dest='extra', + help='Additional scp parameters') + parser.add_option('-i', dest='inline', action='store_true', + help='aggregated output and error for each server') + parser.add_option('-O', dest='opt', action='append', + help='Additional scp parameters') + + return parser + + +def check_parse(parser_info): + """ + :param parser_info: Parameter key-value pairs + :return: opts_info: Parameter key-value pairs + args_info: file list + """ + # set defaults parallel and timeout value + defaults = dict(parallel=PARALLEL_NUM, timeout=TIME_OUT) + parser_info.set_defaults(**defaults) + opts_info, args_info = parser_info.parse_args() + + if len(args_info) < 2: + parser_info.error('path not specified.') + + if not opts_info.hostname and not opts_info.hostfile: + parser_info.error('Hosts not specified.') + + return opts_info, args_info + + +def run(hosts, opts, args): + """ + function: do run process + input : hosts, opts, args + output: NA + """ + local_path = args[0:-1] + remote_path = args[-1] + if not os.path.isabs(remote_path): + print("Remote path %s must be an absolute path." % remote_path) + sys.exit(3) + dir_permission = 0o700 + if opts.outdir and not os.path.exists(opts.outdir): + os.makedirs(opts.outdir, mode=dir_permission) + + if opts.errdir and not os.path.exists(opts.errdir): + os.makedirs(opts.errdir, mode=dir_permission) + + manager = TaskPool(opts) + for host in hosts: + env_dist = os.environ + if "HOST_IP" in env_dist.keys(): + tool_path = os.path.dirname( + os.path.dirname( + os.path.dirname( + os.path.dirname( + os.path.dirname(os.path.realpath(sys.argv[0])))))) + uploader_path = os.path.join(tool_path, 'script/uploader.py') + if not os.path.exists(uploader_path): + sys.exit(2) + + xml_path = os.path.join(tool_path, "cluster_default_agent.xml") + agent_port = 0 + try: + dom_tree = ETree.parse(xml_path) + root_node = dom_tree.getroot() + element = root_node.findall('CLUSTER')[0] + elem_array = element.findall('PARAM') + for elem in elem_array: + name = elem.attrib['name'] + if name == "agentPort": + agent_port = int(elem.attrib['value']) + except Exception as ex: + raise Exception("Failed to parsing xml. Error: \n%s." % + str(ex)) + for path in local_path: + cmd = ['python3', uploader_path, '-H', host, '-p', + str(agent_port), path, remote_path] + manager.add_task(host, cmd) + else: + cmd = ['scp', '-qCr'] + if opts.extra: + cmd.extend(shlex.split(opts.extra)) + if opts.opt: + for i in opts.opt: + cmd.append("-o") + cmd.append(i) + cmd.extend(local_path) + cmd.append('%s:%s' % (host, remote_path)) + manager.add_task(host, cmd) + try: + statuses = manager.start() + if min(statuses) < 0: + # At least one process was killed + sys.exit(3) + for status in statuses: + if status != 0: + sys.exit(4) + except Exception as ex: + print(str(ex)) + sys.exit(1) + + +if __name__ == "__main__": + """ + main + """ + try: + parsers = parse_command() + opts, args = check_parse(parsers) + if opts.hostfile: + host_list = read_host_file(opts.hostfile) + else: + host_list = opts.hostname + host_list = list(set(host_list)) + run(host_list, opts, args) + except Exception as e: + print(str(e)) + sys.exit(1) diff --git a/script/gspylib/pssh/bin/pssh b/script/gspylib/pssh/bin/pssh new file mode 100644 index 0000000..601ae13 --- /dev/null +++ b/script/gspylib/pssh/bin/pssh @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# ############################################################################ +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Parallel ssh to the set of nodes in hosts.txt. +# For each node, this essentially does an "ssh host command". +# from each remote node in a directory. +# Each output file in that directory will be named +# by the corresponding remote node's hostname or IP address. +# ############################################################################ +import os +import optparse +import sys +import shlex +import xml.etree.cElementTree as ETree +from TaskPool import TaskPool +from TaskPool import read_host_file + +TIME_OUT = 300 +PARALLEL_NUM = 32 + + +def parse_command(): + """ + return: parser + """ + parser = optparse.OptionParser(conflict_handler='resolve') + parser.disable_interspersed_args() + parser.usage = "%prog [OPTIONS] command" + parser.epilog = "Example: pssh -H hostname 'id'" + parser.add_option('-H', dest='hostname', action='append', + help='Nodes to be connected') + parser.add_option('-h', dest='hostfile', + help='Host file with each line per node') + parser.add_option('-t', dest='timeout', type='int', + help='Timeouts in seconds') + parser.add_option('-p', dest='parallel', type='int', + help='Maximum number of parallel') + parser.add_option('-o', dest='outdir', help='Output results folder') + parser.add_option('-e', dest='errdir', help='Error results folder') + parser.add_option('-P', dest='print', action='store_true', + help='Print output') + parser.add_option('-s', dest='shellmode', action='store_true', + help='Output only execution results') + parser.add_option('-x', dest='extra', + help='Extra command-line arguments') + parser.add_option('-i', dest='inline', action='store_true', + help='aggregated output and error for each server') + parser.add_option('-O', dest='opt', action='append', + help='Additional scp parameters') + return parser + + +def check_parse(parser_info): + """ + :param parser_info: Parameter key-value pairs + :return: opts_info: Parameter key-value pairs + args_info: commands list + """ + # set defaults parallel and timeout value + defaults = dict(parallel=PARALLEL_NUM, timeout=TIME_OUT) + parser_info.set_defaults(**defaults) + opts_info, args_info = parser_info.parse_args() + + if not opts_info: + parser_info.error("The commands is request.") + if not opts_info.hostname and not opts_info.hostfile: + parser_info.error("The host info is request.") + + return opts_info, args_info + + +def run(hosts): + """ + function: do run process + input : hosts + output: NA + """ + + manager = TaskPool(opts) + for host in hosts: + env_dist = os.environ + if "HOST_IP" in env_dist.keys(): + tool_path = os.path.join(os.path.dirname(__file__), "../../../../") + cmd_sender_path = os.path.join(tool_path, 'script/cmd_sender.py') + + if not os.path.exists(cmd_sender_path): + sys.exit(2) + + xml_path = os.path.join(tool_path, "cluster_default_agent.xml") + agent_port = 0 + tmp_dir = "" + try: + dom_tree = ETree.parse(xml_path) + root_node = dom_tree.getroot() + element = root_node.findall('CLUSTER')[0] + elem_array = element.findall('PARAM') + for elem in elem_array: + name = elem.attrib['name'] + if name == "agentPort": + agent_port = int(elem.attrib['value']) + if name == "tmpMppdbPath": + tmp_dir = str(elem.attrib['value']) + except Exception as ex: + raise Exception("Failed to parsing xml. Error: \n%s." % + str(ex)) + + action_file = os.path.join(tmp_dir, ".action_flag_file") + if os.path.exists(action_file): + with open(action_file, "r") as fp: + result = fp.read() + action = result.strip() + else: + action = "common" + cmd = ['python3', cmd_sender_path, '-H', host, '-p', + str(agent_port), '-a', action, '-t', str(opts.timeout)] + else: + cmd = ["ssh", host, "-q", + "-o", "SendEnv=PSSH_NODENUM PSSH_HOST", + "-o", "BatchMode=yes", + "-o", "ConnectionAttempts=10", + "-o", "ConnectTimeout=30", + "-o", "NumberOfPasswordPrompts=1", + "-o", "ServerAliveCountMax=10", + "-o", "ServerAliveInterval=30", + "-o", "TCPKeepAlive=yes"] + if opts.extra: + extra_info = shlex.split(opts.extra) + cmd.extend(extra_info) + if opts.opt: + for i in opts.opt: + cmd.append("-o") + cmd.append(i) + cmd.extend(args) + manager.add_task(host, cmd) + try: + statuses = manager.start() + if min(statuses) < 0: + # At least one process was killed. + sys.exit(3) + for status in statuses: + if status == 255 and not opts.shellmode: + sys.exit(4) + for status in statuses: + if status != 0 and not opts.shellmode: + sys.exit(5) + elif status != 0: + sys.exit(status) + + except Exception as ex: + print(str(ex)) + sys.exit(1) + + +if __name__ == "__main__": + """ + """ + try: + parsers = parse_command() + opts, args = check_parse(parsers) + if opts.hostfile: + host_list = read_host_file(opts.hostfile) + else: + host_list = opts.hostname + host_list = list(set(host_list)) + run(host_list) + except Exception as e: + print(str(e)) + sys.exit(1) diff --git a/script/gspylib/threads/SshTool.py b/script/gspylib/threads/SshTool.py new file mode 100644 index 0000000..90bc95e --- /dev/null +++ b/script/gspylib/threads/SshTool.py @@ -0,0 +1,860 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : SshTool.py is utility to support ssh tools +############################################################################# +import subprocess +import os +import sys +import datetime +import weakref +import getpass +import time +import re +from random import sample + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue +from gspylib.os.gsfile import g_file +from gspylib.common.GaussLog import GaussLog + + +class SshTool(): + """ + Class for controling multi-hosts + """ + + def __init__(self, hostNames, logFile=None, + timeout=DefaultValue.TIMEOUT_PSSH_COMMON, key=""): + ''' + Constructor + ''' + self.hostNames = hostNames + self.__logFile = logFile + self.__pid = os.getpid() + self.__timeout = timeout + 10 + self._finalizer = weakref.finalize(self, self.clenSshResultFiles) + + currentTime = str(datetime.datetime.now()).replace(" ", "_").replace( + ".", "_") + randomnum = ''.join(sample('0123456789', 3)) + # can tmp path always access? + if key == "": + self.__hostsFile = "/tmp/gauss_hosts_file_%d_%s_%s" % ( + self.__pid, currentTime, randomnum) + self.__resultFile = "/tmp/gauss_result_%d_%s_%s.log" % ( + self.__pid, currentTime, randomnum) + self.__outputPath = "/tmp/gauss_output_files_%d_%s_%s" % ( + self.__pid, currentTime, randomnum) + self.__errorPath = "/tmp/gauss_error_files_%d_%s_%s" % ( + self.__pid, currentTime, randomnum) + else: + self.__hostsFile = "/tmp/gauss_hosts_file_%d_%s_%s_%s" % ( + self.__pid, key, currentTime, randomnum) + self.__resultFile = "/tmp/gauss_result_%d_%s_%s_%s.log" % ( + self.__pid, key, currentTime, randomnum) + self.__outputPath = "/tmp/gauss_output_files_%d_%s_%s_%s" % ( + self.__pid, key, currentTime, randomnum) + self.__errorPath = "/tmp/gauss_error_files_%d_%s_%s_%s" % ( + self.__pid, key, currentTime, randomnum) + + self.__resultStatus = {} + if logFile is None: + self.__logFile = "/dev/null" + + # before using, clean the old ones + g_file.removeFile(self.__hostsFile) + g_file.removeFile(self.__resultFile) + + if os.path.exists(self.__outputPath): + g_file.removeDirectory(self.__outputPath) + + if os.path.exists(self.__errorPath): + g_file.removeDirectory(self.__errorPath) + + self.__writeHostFiles() + + def clenSshResultFiles(self): + """ + function: Delete file + input : NA + output: NA + """ + if os.path.exists(self.__hostsFile): + g_file.removeFile(self.__hostsFile) + + if os.path.exists(self.__resultFile): + g_file.removeFile(self.__resultFile) + + if os.path.exists(self.__outputPath): + g_file.removeDirectory(self.__outputPath) + + if os.path.exists(self.__errorPath): + g_file.removeDirectory(self.__errorPath) + + def __del__(self): + """ + function: Delete file + input : NA + output: NA + """ + self._finalizer() + + def exchangeHostnameSshKeys(self, user, pwd, mpprcFile=""): + """ + function: Exchange ssh public keys for specified user, using hostname + input : user, pwd, mpprcFile + output: NA + """ + if mpprcFile != "": + exkeyCmd = "su - %s -c 'source %s&&mvxssh-exkeys -f %s -p" \ + " %s' 2>>%s" % (user, mpprcFile, self.__hostsFile, + pwd, self.__logFile) + else: + exkeyCmd = "su - %s -c 'source /etc/profile&&mvxssh-exkeys -f" \ + " %s -p %s' 2>>%s" % (user, self.__hostsFile, + pwd, self.__logFile) + (status, output) = subprocess.getstatusoutput(exkeyCmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_511["GAUSS_51112"] % user + + "Error: \n%s" % output.replace(pwd, "******") + + "\nYou can comment Cipher 3des." + " Ciphers aes128-cbc and MACs in" + " /etc/ssh/ssh_config and try again.") + + def exchangeIpSshKeys(self, user, pwd, ips, mpprcFile=""): + """ + function: Exchange ssh public keys for specified user, + using ip address + input : user, pwd, ips, mpprcFile + output: NA + """ + if mpprcFile != "": + exkeyCmd = "su - %s -c 'source %s&&mvxssh-exkeys " \ + % (user, mpprcFile) + else: + exkeyCmd = "su - %s -c 'source /etc/profile&&mvxssh-exkeys " \ + % user + for ip in ips: + exkeyCmd += " -h %s " % ip.strip() + exkeyCmd += "-p %s' 2>>%s" % (pwd, self.__logFile) + (status, output) = subprocess.getstatusoutput(exkeyCmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_511["GAUSS_51112"] + % user + "Error: \n%s" + % output.replace(pwd, "******") + + "\nYou can comment Cipher 3des, Ciphers" + " aes128-cbc and MACs in /etc/ssh/ssh_config" + " and try again.") + + def createTrust(self, user, pwd, ips=None, mpprcFile="", + skipHostnameSet=False, preMode=False): + """ + function: create trust for specified user with both ip and hostname, + when using N9000 tool create trust failed + do not support using a normal user to create trust for + another user. + input : user, pwd, ips, mpprcFile, skipHostnameSet + output: NA + """ + tmp_hosts = "/tmp/tmp_hosts_%d" % self.__pid + cnt = 0 + status = 0 + output = "" + if ips is None: + ips = [] + try: + g_file.removeFile(tmp_hosts) + # 1.prepare hosts file + for ip in ips: + cmd = "echo %s >> %s 2>/dev/null" % (ip, tmp_hosts) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % tmp_hosts + " Error:\n%s." % output + + "The cmd is %s" % cmd) + g_file.changeMode(DefaultValue.KEY_HOSTS_FILE, tmp_hosts, False, + "python") + + # 2.call createtrust script + create_trust_file = "gs_sshexkey" + if pwd is None or len(str(pwd)) == 0: + GaussLog.printMessage("Please enter password for current" + " user[%s]." % user) + pwd = getpass.getpass() + + if (mpprcFile != "" and + g_file.checkFilePermission(mpprcFile, True) and + self.checkMpprcfile(user, mpprcFile)): + cmd = "source %s; %s -f %s -l '%s'" % ( + mpprcFile, create_trust_file, tmp_hosts, + self.__logFile) + elif (mpprcFile == "" and g_file.checkFilePermission( + '/etc/profile', True)): + cmd = "source /etc/profile;" \ + " %s -f %s -l '%s'" % (create_trust_file, + tmp_hosts, self.__logFile) + + if skipHostnameSet: + cmd += " --skip-hostname-set" + cmd += " 2>&1" + + tempcmd = ["su", "-", user, "-c"] + tempcmd.append(cmd) + cmd = tempcmd + + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if os.getuid() != 0: + time.sleep(5) + p.stdin.write((pwd+"\n").encode(encoding="utf-8")) + time.sleep(10) + p.stdin.write((pwd+"\n").encode(encoding="utf-8")) + (output, err) = p.communicate() + # 3.delete hosts file + g_file.removeFile(tmp_hosts) + if output is not None: + output = str(output, encoding='utf-8') + if re.search("\[GAUSS\-", output): + if re.search("Please enter password", output): + GaussLog.printMessage( + ErrorCode.GAUSS_503["GAUSS_50306"] % user) + else: + GaussLog.printMessage(output.strip()) + sys.exit(1) + else: + GaussLog.printMessage(output.strip()) + else: + sys.exit(1) + except Exception as e: + g_file.removeFile(tmp_hosts) + raise Exception(str(e)) + + def checkMpprcfile(self, username, filePath): + """ + function: + check if given user has operation permission for Mpprcfile + precondition: + 1.user should be exist---root/cluster user + 2.filePath should be an absolute path + postcondition: + 1.return True or False + input : username,filePath + output: True/False + """ + ownerPath = os.path.split(filePath)[0] + cmd = "su - %s -c 'cd %s'" % (username, ownerPath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] + % '-sep-env-file' + " Error:\n%s." % output + + "The cmd is %s" % cmd) + + return True + + def getUserOSProfile(self, env_file=""): + """ + function: get user os profile + input : env_file + output: mpprcFile, userProfile, osProfile + """ + if env_file != "": + mpprcFile = env_file + else: + mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + + if mpprcFile != "" and mpprcFile is not None: + userProfile = mpprcFile + else: + userProfile = "~/.bashrc" + osProfile = "/etc/profile" + return mpprcFile, userProfile, osProfile + + def getGPHOMEPath(self, osProfile): + """ + function: get GPHOME path + input : osProfile + output: output + """ + try: + cmd = "source %s && echo $GPHOME" % osProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or not output or output.strip() == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] % "GPHOME" + + "The cmd is %s" % cmd) + return output.strip() + except Exception as e: + raise Exception(str(e)) + + def parseSshResult(self, hostList=None): + """ + function: parse ssh result + input : hostList + output: resultMap, outputCollect + """ + try: + if hostList is None: + hostList = [] + outputCollect = "" + prefix = "" + resultMap = self.__readCmdResult(self.__resultFile, len(hostList)) + for host in hostList: + sshOutPutFile = "%s/%s" % (self.__outputPath, host) + sshErrorPutFile = "%s/%s" % (self.__errorPath, host) + if resultMap[host] == DefaultValue.SUCCESS: + prefix = "SUCCESS" + else: + prefix = "FAILURE" + outputCollect += "[%s] %s:\n" % (prefix, str(host)) + if os.path.isfile(sshOutPutFile): + context = "" + with open(sshOutPutFile, "r") as fp: + context = fp.read() + outputCollect += context + if os.path.isfile(sshErrorPutFile): + context = "" + with open(sshErrorPutFile, "r") as fp: + context = fp.read() + outputCollect += context + except Exception as e: + raise Exception(str(e)) + return resultMap, outputCollect + + def timeOutClean(self, cmd, psshpre, hostList=None, env_file="", + parallel_num=300, signal=9): + """ + function: timeout clean + """ + if hostList is None: + hostList = [] + pstree = "python3 %s -sc" % os.path.realpath(os.path.dirname( + os.path.realpath(__file__)) + "/../../py_pstree.py") + mpprcFile, userProfile, osProfile = self.getUserOSProfile(env_file) + # kill the parent and child process. get all process by py_pstree.py + timeOutCmd = "source %s && pidList=\`ps aux | grep \\\"%s\\\" |" \ + " grep -v 'grep' | awk '{print \$2}' | xargs \`; " \ + % (osProfile, cmd) + timeOutCmd += "for pid in \$pidList; do %s \$pid | xargs -r -n 100" \ + " kill -%s; done" % (pstree, str(signal)) + if len(hostList) == 0: + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s && %s -t %s -h %s -P -p %s -o %s -e" \ + " %s \"source %s; %s\" 2>&1 | tee %s" % \ + ( + osProfile, psshpre, self.__timeout, + self.__hostsFile, + parallel_num, self.__outputPath, + self.__errorPath, osProfile, timeOutCmd, + self.__resultFile) + else: + sshCmd = "source %s && %s -t %s -h %s -P -p %s -o %s -e" \ + " %s \"source %s;source %s;%s\" 2>&1 | tee %s" % \ + ( + osProfile, psshpre, self.__timeout, + self.__hostsFile, + parallel_num, self.__outputPath, + self.__errorPath, osProfile, userProfile, + timeOutCmd, + self.__resultFile) + else: + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s && %s -t %s -H %s -P -p %s -o %s -e" \ + " %s \"source %s; %s\" 2>&1 | tee %s" % \ + (osProfile, psshpre, self.__timeout, + " -H ".join(hostList), parallel_num, + self.__outputPath, + self.__errorPath, osProfile, timeOutCmd, + self.__resultFile) + else: + sshCmd = "source %s && %s -t %s -H %s -P -p %s -o %s -e" \ + " %s \"source %s;source %s;%s\" 2>&1 | tee %s" % \ + (osProfile, psshpre, self.__timeout, + " -H ".join(hostList), parallel_num, + self.__outputPath, + self.__errorPath, osProfile, userProfile, + timeOutCmd, self.__resultFile) + subprocess.getstatusoutput(sshCmd) + + def executeCommand(self, cmd, descript, cmdReturn=DefaultValue.SUCCESS, + hostList=None, env_file="", parallel_num=300, + checkenv=False): + """ + function: Execute command on all hosts + input : cmd, descript, cmdReturn, hostList, env_file, parallel_num + output: NA + """ + sshCmd = "" + localMode = False + resultMap = {} + outputCollect = "" + isTimeOut = False + if hostList is None: + hostList = [] + try: + mpprcFile, userProfile, osProfile = self.getUserOSProfile( + env_file) + GPHOME = self.getGPHOMEPath(osProfile) + psshpre = "python3 %s/script/gspylib/pssh/bin/pssh" % GPHOME + + # clean result file + if os.path.exists(self.__resultFile): + os.remove(self.__resultFile) + + if len(hostList) == 0: + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s && %s -t %s -h %s -P -p %s -o %s -e" \ + " %s \"source %s; %s\" 2>&1 | tee %s" \ + % (osProfile, psshpre, self.__timeout, + self.__hostsFile, parallel_num, + self.__outputPath, self.__errorPath, + osProfile, cmd, self.__resultFile) + else: + sshCmd = "source %s && %s -t %s -h %s -P -p %s -o %s -e" \ + " %s \"source %s;source %s;%s\" 2>&1 | tee %s" \ + % (osProfile, psshpre, self.__timeout, + self.__hostsFile, parallel_num, + self.__outputPath, self.__errorPath, + osProfile, userProfile, cmd, + self.__resultFile) + hostList = self.hostNames + else: + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s && %s -t %s -H %s -P -p %s -o %s -e" \ + " %s \"source %s; %s\" 2>&1 | tee %s" \ + % (osProfile, psshpre, self.__timeout, + " -H ".join(hostList), parallel_num, + self.__outputPath, self.__errorPath, + osProfile, cmd, self.__resultFile) + else: + sshCmd = "source %s && %s -t %s -H %s -P -p %s -o %s -e" \ + " %s \"source %s;source %s;%s\" 2>&1 | tee %s" \ + % (osProfile, psshpre, self.__timeout, + " -H ".join(hostList), parallel_num, + self.__outputPath, self.__errorPath, + osProfile, userProfile, cmd, + self.__resultFile) + + # single cluster or execute only in local node. + if (len(hostList) == 1 and + hostList[0] == DefaultValue.GetHostIpOrName() + and cmd.find(" --lock-cluster ") < 0): + localMode = True + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s ; %s 2>&1" % (osProfile, cmd) + else: + sshCmd = "source %s ; source %s; %s 2>&1" \ + % (osProfile, userProfile, cmd) + + # if it is localMode, it means does not call pssh, + # so there is no time out + (status, output) = subprocess.getstatusoutput(sshCmd) + # when the pssh is time out, kill parent and child process + if not localMode: + if output.find("Timed out, Killed by signal 9") > 0: + self.timeOutClean(cmd, psshpre, hostList, env_file, + parallel_num) + isTimeOut = True + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % sshCmd + " Error:\n%s" % output) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % sshCmd + " Error:\n%s" % output) + + if localMode: + resultMap[hostList[0]] = DefaultValue.SUCCESS if status == 0 \ + else DefaultValue.FAILURE + outputCollect = "[%s] %s:\n%s" \ + % ("SUCCESS" if status == 0 else "FAILURE", + hostList[0], output) + else: + # ip and host name should match here + resultMap, outputCollect = self.parseSshResult(hostList) + except Exception as e: + if not isTimeOut: + self.clenSshResultFiles() + raise Exception(str(e)) + + for host in hostList: + if resultMap.get(host) != cmdReturn: + if outputCollect.find("GAUSS-5") == -1: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Result:%s.\nError:\n%s" + % (resultMap, outputCollect)) + else: + raise Exception(outputCollect) + if checkenv: + for res in output.split("\n"): + if res.find("[SUCCESS]") >= 0: + continue + elif res == "": + continue + else: + if mpprcFile != "" and mpprcFile is not None: + envfile = mpprcFile + " and /etc/profile" + else: + envfile = "/etc/profile and ~/.bashrc" + raise Exception(ErrorCode.GAUSS_518["GAUSS_51808"] + % res + "Please check %s." % envfile) + + def getSshStatusOutput(self, cmd, hostList=None, env_file="", + gp_path="", parallel_num=300, ssh_config=""): + """ + function: Get command status and output + input : cmd, hostList, env_file, gp_path, parallel_num + output: resultMap, outputCollect + """ + sshCmd = "" + localMode = False + resultMap = {} + outputCollect = "" + isTimeOut = False + need_replace_quotes = False + + if hostList is None: + hostList = [] + + if cmd.find("[need_replace_quotes]") != -1: + cmd = cmd.replace("[need_replace_quotes]", "") + need_replace_quotes = True + fp = None + + try: + mpprcFile, userProfile, osProfile = self.getUserOSProfile( + env_file) + # clean result file + if os.path.exists(self.__resultFile): + os.remove(self.__resultFile) + + if gp_path == "": + GPHOME = self.getGPHOMEPath(osProfile) + else: + GPHOME = gp_path.strip() + psshpre = "python3 %s/script/gspylib/pssh/bin/pssh" % GPHOME + if ssh_config: + if os.path.exists(ssh_config) and os.path.isfile(ssh_config): + psshpre += ' -x "-F %s" ' % ssh_config + + if len(hostList) == 0: + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s && %s -t %s -h %s -P -p %s -o %s -e" \ + " %s \"source %s; %s\" 2>&1 | tee %s" \ + % (osProfile, psshpre, self.__timeout, + self.__hostsFile, parallel_num, + self.__outputPath, self.__errorPath, + osProfile, cmd, self.__resultFile) + else: + sshCmd = "source %s && %s -t %s -h %s -P -p %s -o %s -e" \ + " %s \"source %s;source %s;%s\" 2>&1 | tee %s" \ + % (osProfile, psshpre, self.__timeout, + self.__hostsFile, parallel_num, + self.__outputPath, self.__errorPath, + osProfile, userProfile, cmd, + self.__resultFile) + hostList = self.hostNames + else: + if need_replace_quotes: + remote_cmd = cmd.replace("\"", "\\\"") + else: + remote_cmd = cmd + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s && %s -t %s -H %s -P -p %s -o %s -e" \ + " %s \"source %s; %s\" 2>&1 | tee %s" \ + % (osProfile, psshpre, self.__timeout, + " -H ".join(hostList), parallel_num, + self.__outputPath, self.__errorPath, + osProfile, remote_cmd, self.__resultFile) + else: + sshCmd = "source %s && %s -t %s -H %s -P -p %s -o %s -e" \ + " %s \"source %s;source %s;%s\" 2>&1 | tee %s" \ + % (osProfile, psshpre, self.__timeout, + " -H ".join(hostList), parallel_num, + self.__outputPath, self.__errorPath, + osProfile, userProfile, remote_cmd, + self.__resultFile) + + # single cluster or execute only in local node. + if (len(hostList) == 1 and + hostList[0] == DefaultValue.GetHostIpOrName()): + localMode = True + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s ; %s 2>&1" % (osProfile, cmd) + else: + sshCmd = "source %s ; source %s; %s 2>&1" % (osProfile, + userProfile, + cmd) + + (status, output) = subprocess.getstatusoutput(sshCmd) + # when the pssh is time out, kill parent and child process + if not localMode: + if output.find("Timed out, Killed by signal 9") > 0: + isTimeOut = True + self.timeOutClean(cmd, psshpre, hostList, env_file, + parallel_num) + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % sshCmd + " Error:\n%s" % output) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % sshCmd + " Error:\n%s" % output) + + if localMode: + dir_permission = 0o700 + if status == 0: + resultMap[hostList[0]] = DefaultValue.SUCCESS + outputCollect = "[%s] %s:\n%s" % ("SUCCESS", hostList[0], + output) + + if not os.path.exists(self.__outputPath): + os.makedirs(self.__outputPath, mode=dir_permission) + file_path = os.path.join(self.__outputPath, hostList[0]) + g_file.createFileInSafeMode(file_path) + with open(file_path, "w") as fp: + fp.write(output) + fp.flush() + fp.close() + else: + resultMap[hostList[0]] = DefaultValue.FAILURE + outputCollect = "[%s] %s:\n%s" % ("FAILURE", hostList[0], + output) + + if not os.path.exists(self.__errorPath): + os.makedirs(self.__errorPath, mode=dir_permission) + file_path = os.path.join(self.__errorPath, hostList[0]) + g_file.createFileInSafeMode(file_path) + with open(file_path, "w") as fp: + fp.write(output) + fp.flush() + fp.close() + else: + resultMap, outputCollect = self.parseSshResult(hostList) + except Exception as e: + if fp: + fp.close() + if not isTimeOut: + self.clenSshResultFiles() + raise Exception(str(e)) + + for host in hostList: + if resultMap.get(host) != DefaultValue.SUCCESS: + if outputCollect.find("GAUSS-5") == -1: + outputCollect = ErrorCode.GAUSS_514["GAUSS_51400"] \ + % cmd + " Error:\n%s." % outputCollect + break + + return resultMap, outputCollect + + def parseSshOutput(self, hostList): + """ + function: + parse ssh output on every host + input: + hostList: the hostname list of all hosts + output: + a dict, like this "hostname : info of this host" + hiden info: + the output info of all hosts + ppp: + for a host in hostList + if outputfile exists + open file with the same name + read context into a str + close file + save info of this host + else + raise exception + return host info list + """ + resultMap = {} + try: + for host in hostList: + context = "" + sshOutPutFile = "%s/%s" % (self.__outputPath, host) + sshErrorPutFile = "%s/%s" % (self.__errorPath, host) + + if os.path.isfile(sshOutPutFile): + with open(sshOutPutFile, "r") as fp: + context = fp.read() + resultMap[host] = context + if os.path.isfile(sshErrorPutFile): + with open(sshErrorPutFile, "r") as fp: + context += fp.read() + resultMap[host] = context + if (not os.path.isfile(sshOutPutFile) and + not os.path.isfile(sshErrorPutFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % "%s or %s" + % (sshOutPutFile, sshErrorPutFile)) + except Exception as e: + raise Exception(str(e)) + + return resultMap + + def scpFiles(self, srcFile, targetDir, hostList=None, env_file="", + gp_path="", parallel_num=300): + """ + function: copy files to other path + input : srcFile, targetDir, hostList, env_file, gp_path, parallel_num + output: NA + """ + scpCmd = "source /etc/profile" + outputCollect = "" + if hostList is None: + hostList = [] + try: + if env_file != "": + mpprcFile = env_file + else: + mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + if mpprcFile != "" and mpprcFile is not None: + scpCmd += " && source %s" % mpprcFile + + if gp_path == "": + cmdpre = "%s && echo $GPHOME" % scpCmd + (status, output) = subprocess.getstatusoutput(cmdpre) + if status != 0 or not output or output.strip() == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] + % "GPHOME" + "The cmd is %s" % cmdpre) + GPHOME = output.strip() + else: + GPHOME = gp_path.strip() + pscppre = "python3 %s/script/gspylib/pssh/bin/pscp" % GPHOME + + if len(hostList) == 0: + scpCmd += " && %s -r -v -t %s -p %s -h %s -o %s -e %s %s %s" \ + " 2>&1 | tee %s" % (pscppre, self.__timeout, + parallel_num, self.__hostsFile, + self.__outputPath, + self.__errorPath, srcFile, + targetDir, self.__resultFile) + hostList = self.hostNames + else: + scpCmd += " && %s -r -v -t %s -p %s -H %s -o %s -e %s %s %s" \ + " 2>&1 | tee %s" % (pscppre, self.__timeout, + parallel_num, + " -H ".join(hostList), + self.__outputPath, + self.__errorPath, srcFile, + targetDir, self.__resultFile) + (status, output) = subprocess.getstatusoutput(scpCmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50216"] + % ("file [%s]" % srcFile) + + " To directory: %s." + % targetDir + " Error:\n%s" % output) + if output.find("Timed out") > 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % scpCmd + + " Error:\n%s" % output) + + # ip and host name should match here + resultMap, outputCollect = self.parseSshResult(hostList) + except Exception as e: + self.clenSshResultFiles() + raise Exception(str(e)) + + for host in hostList: + if resultMap.get(host) != DefaultValue.SUCCESS: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50216"] + % ("file [%s]" % srcFile) + + " To directory: %s." % targetDir + + " Command: %s.\nError:\n%s" % (scpCmd, + outputCollect)) + + def checkRemoteFileExist(self, node, fileAbsPath, mpprcFile): + """ + check remote node exist file + this method depend on directory permisstion 'x' + if exist return true,else retrun false + """ + sshcmd = "if [ -e '%s' ];then echo 'exist tar file yes flag';" \ + "else echo 'exist tar file no flag';fi" % fileAbsPath + if node != DefaultValue.GetHostIpOrName(): + outputCollect = self.getSshStatusOutput(sshcmd, + [node], + mpprcFile)[1] + else: + outputCollect = subprocess.getstatusoutput(sshcmd)[1] + if 'exist tar file yes flag' in outputCollect: + return True + elif 'exist tar file no flag' in outputCollect: + return False + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sshcmd + + "On node %s" % node) + + def __writeHostFiles(self): + """ + function: Write all hostname to a file + input : NA + output: NA + """ + try: + g_file.createFileInSafeMode(self.__hostsFile) + with open(self.__hostsFile, "w") as fp: + for host in self.hostNames: + fp.write("%s\n" % host) + fp.flush() + subprocess.getstatusoutput("chmod %s '%s'" + % (DefaultValue.FILE_MODE, + self.__hostsFile)) + except Exception as e: + g_file.removeFile(self.__hostsFile) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % "host file" + + " Error: \n%s" % str(e)) + + # change the mode + # if it created by root user,and permission is 640, then + # install user will have no permission to read it, so we should set + # its permission 644. + g_file.changeMode(DefaultValue.KEY_HOSTS_FILE, self.__hostsFile, False, + "python") + + def __readCmdResult(self, resultFile, hostNum): + """ + function: Read command result + input : resultFile, hostNum, cmd + output: resultMap + """ + resultMap = {} + try: + with open(resultFile, "r") as fp: + lines = fp.readlines() + context = "".join(lines) + for line in lines: + resultPair = line.strip().split(" ") + if len(resultPair) >= 4 and resultPair[2] == "[FAILURE]": + resultMap[resultPair[3]] = "Failure" + if len(resultPair) >= 4 and resultPair[2] == "[SUCCESS]": + resultMap[resultPair[3]] = "Success" + + if len(resultMap) != hostNum: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] + % ("valid return item number [%d]" + % len(resultMap), "host number[%d]" + % hostNum) + " The return result:\n%s." + % context) + except Exception as e: + raise Exception(str(e)) + + return resultMap + + def setTimeOut(self, timeout): + """ + function: Set a new timeout value for ssh tool. + :param timeout: The new timeout value in seconds. + :return: void + """ + self.__timeout = timeout diff --git a/script/gspylib/threads/__init__.py b/script/gspylib/threads/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/gspylib/threads/parallelTool.py b/script/gspylib/threads/parallelTool.py new file mode 100644 index 0000000..ec91788 --- /dev/null +++ b/script/gspylib/threads/parallelTool.py @@ -0,0 +1,95 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : ThreadPool.py is utility to support parallel control by +# multiprocess +############################################################################# +import threading +import multiprocessing +import subprocess +import sys + +from multiprocessing.dummy import Pool as ThreadPool + + +class parallelTool: + ''' + Class for multi-parallel controling one-hosts + ''' + DEFAULT_PARALLEL_NUM = 12 + + def __init__(self): + ''' + Constructor + ''' + + @staticmethod + def getCpuCount(parallelJobs=0): + """ + function: get cpu set of current board + cat /proc/cpuinfo |grep processor + input: parallelJobs + output: cpuSet + """ + if (parallelJobs != 0): + return parallelJobs + # do this function to get the parallel number + cpuSet = multiprocessing.cpu_count() + if (cpuSet > 1): + return cpuSet + else: + return parallelTool.DEFAULT_PARALLEL_NUM + + @staticmethod + def parallelExecute(func, paraList, parallelJobs=0): + """ + function: Execution of python functions through multiple processes + input: func, list, parallelJobs + output: list + """ + jobs = parallelTool.getCpuCount(parallelJobs) + if (jobs > len(paraList)): + jobs = len(paraList) + pool = ThreadPool(jobs) + results = pool.map(func, paraList) + pool.close() + pool.join() + return results + + +class CommandThread(threading.Thread): + """ + The class is used to execute command in thread + """ + + def __init__(self, cmd): + ''' + Constructor + ''' + threading.Thread.__init__(self) + self.command = cmd + self.cmdStauts = 0 + self.cmdOutput = "" + + def run(self): + """ + function: Run command + input : NA + output: NA + """ + (self.cmdStauts, self.cmdOutput) = subprocess.getstatusoutput( + self.command) diff --git a/script/impl/__init__.py b/script/impl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/backup/BackupImpl.py b/script/impl/backup/BackupImpl.py new file mode 100644 index 0000000..7dbb98f --- /dev/null +++ b/script/impl/backup/BackupImpl.py @@ -0,0 +1,86 @@ +# -*- coding:utf-8 -*- + +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_backup is a utility to back up +# or restore binary files and parameter files. +############################################################################# +import sys + + +class BackupImpl: + ''' + classdocs + input : NA + output: NA + ''' + ACTION_BACKUP = "backup" + ACTION_RESTORE = "restore" + + def __init__(self, backupObj): + ''' + function: Constructor + input : backupObj + output: NA + ''' + self.context = backupObj + + def run(self): + ''' + function: main flow + input : NA + output: NA + ''' + try: + self.context.initLogger(self.context.action) + except Exception as e: + self.context.logger.closeLog() + raise Exception(str(e)) + + try: + self.parseConfigFile() + if self.context.action == BackupImpl.ACTION_BACKUP: + self.doRemoteBackup() + else: + self.doRemoteRestore() + self.context.logger.closeLog() + except Exception as e: + self.context.logger.logExit(str(e)) + + def parseConfigFile(self): + """ + function: Parsing configuration files + input : NA + output: NA + """ + pass + + def doRemoteBackup(self): + """ + function: Backup cluster config files + input : NA + output: NA + """ + pass + + def doRemoteRestore(self): + """ + function: Restore cluster config files + input : NA + output: NA + """ + pass diff --git a/script/impl/backup/OLAP/BackupImplOLAP.py b/script/impl/backup/OLAP/BackupImplOLAP.py new file mode 100644 index 0000000..03edfd8 --- /dev/null +++ b/script/impl/backup/OLAP/BackupImplOLAP.py @@ -0,0 +1,223 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import subprocess +import os +import sys + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.Common import DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from impl.backup.BackupImpl import BackupImpl + + +class BackupImplOLAP(BackupImpl): + """ + The class is used to do perform backup + or restore binary files and parameter files. + """ + + def __init__(self, backupObj): + """ + function: Constructor + input : backupObj + output: NA + """ + super(BackupImplOLAP, self).__init__(backupObj) + + def parseConfigFile(self): + """ + function: Parsing configuration files + input : NA + output: NA + """ + self.context.logger.log("Parsing configuration files.") + if self.context.isForce and self.context.nodename != "" \ + and self.context.action == BackupImpl.ACTION_RESTORE: + self.context.initSshTool([self.context.nodename], + DefaultValue.TIMEOUT_PSSH_BACKUP) + self.context.logger.log( + "Successfully init restore nodename: %s." + % self.context.nodename) + return + + try: + self.context.initClusterInfoFromStaticFile(self.context.user) + nodeNames = self.context.clusterInfo.getClusterNodeNames() + if self.context.nodename == "": + self.context.nodename = nodeNames + else: + remoteNode = self.context.clusterInfo.getDbNodeByName( + self.context.nodename) + if remoteNode is None: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51209"] % ( + "the node", self.context.nodename)) + self.context.nodename = [self.context.nodename] + + self.context.initSshTool(self.context.nodename, + DefaultValue.TIMEOUT_PSSH_BACKUP) + + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log("Successfully parsed the configuration file.") + + def doRemoteBackup(self): + """ + function: Get user and group + input : NA + output: NA + """ + self.context.logger.log("Performing remote backup.") + localHostName = DefaultValue.GetHostIpOrName() + tmp_backupDir = "%s/backupTemp_%d" % ( + DefaultValue.getTmpDirFromEnv(), os.getpid()) + cmd = "%s -U %s --nodeName %s -P %s -B %s -l %s --ingore_miss" % \ + (OMCommand.getLocalScript("Local_Backup"), + self.context.user, + localHostName, + tmp_backupDir, + self.context.backupDir, + self.context.localLog) + + if self.context.isParameter: + cmd += " -p" + if self.context.isBinary: + cmd += " -b" + + self.context.logger.debug("Remote backup command is %s." % cmd) + + try: + if not os.path.exists(tmp_backupDir): + os.makedirs(tmp_backupDir, + DefaultValue.KEY_DIRECTORY_PERMISSION) + self._runCmd(cmd) + if self.context.isParameter: + self.__distributeBackupFile(tmp_backupDir, "parameter") + if self.context.isBinary: + self.__distributeBackupFile(tmp_backupDir, "binary") + + DefaultValue.cleanFileDir(tmp_backupDir, self.context.sshTool) + + self.context.logger.log("Remote backup succeeded.") + self.context.logger.log("Successfully backed up cluster files.") + except Exception as e: + DefaultValue.cleanFileDir(tmp_backupDir, self.context.sshTool) + raise Exception(str(e)) + + def __distributeBackupFile(self, tmp_backupDir, flag): + """ + function: distribute Backup File + input : tmp_backupDir, flag + output: NA + """ + # compresses the configuration files for all node backups + tarFiles = "%s_*.tar" % flag + tarName = "%s.tar" % flag + cmd = g_file.SHELL_CMD_DICT["compressTarFile"] \ + % (tmp_backupDir, tarName, + tarFiles, DefaultValue.KEY_FILE_MODE, tarName) + if (flag == "parameter"): + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error:\n%s" % output) + else: + self._runCmd(cmd) + + # prepares the backup directory for the specified node + cmd = g_file.SHELL_CMD_DICT["createDir"] \ + % (self.context.backupDir, self.context.backupDir, + DefaultValue.KEY_DIRECTORY_MODE) + self._runCmd(cmd, self.context.nodename) + # send backup package to the specified node from the local node + originalFile = "'%s'/%s.tar" % (tmp_backupDir, flag) + if flag == "parameter": + if self.context.nodename != [g_OSlib.getHostName()]: + self.context.sshTool.scpFiles( + originalFile, + self.context.backupDir, self.context.nodename) + else: + g_file.cpFile(originalFile, self.context.backupDir) + else: + targetFile = "'%s'/%s.tar" % (self.context.backupDir, flag) + cmd = g_file.SHELL_CMD_DICT["copyFile"] % ( + originalFile, targetFile) + self._runCmd(cmd) + + def __cleanTmpTar(self): + """ + function: delete tmp tar package + input : NA + output: NA + """ + cmd = "" + if (self.context.isParameter and self.context.isBinary): + cmd += g_file.SHELL_CMD_DICT["deleteBatchFiles"] % ( + "%s/parameter_" % self.context.backupDir) + cmd += " ; " + cmd += g_file.SHELL_CMD_DICT["deleteBatchFiles"] % ( + "%s/binary_" % self.context.backupDir) + elif (self.context.isParameter and not self.context.isBinary): + cmd += g_file.SHELL_CMD_DICT["deleteBatchFiles"] % ( + "%s/parameter_" % self.context.backupDir) + elif (not self.context.isParameter and self.context.isBinary): + cmd += g_file.SHELL_CMD_DICT["deleteBatchFiles"] % ( + "%s/binary_" % self.context.backupDir) + + self._runCmd(cmd) + + def doRemoteRestore(self): + """ + function: Get user and group + input : NA + output: NA + """ + self.context.logger.log("Performing remote restoration.") + + cmd = "%s -U %s -l %s " % ( + OMCommand.getLocalScript("Local_Restore"), + self.context.user, + self.context.localLog) + if (self.context.backupDir != ""): + cmd += " -P %s" % self.context.backupDir + if self.context.isParameter: + cmd += " -p" + if self.context.isBinary: + cmd += " -b" + if self.context.isForce: + cmd += " -f" + self.context.logger.debug("Remote restoration command: %s." % cmd) + + try: + self._runCmd(cmd) + self.__cleanTmpTar() + self.context.logger.log("Successfully restored cluster files.") + except Exception as e: + self.__cleanTmpTar() + raise Exception(str(e)) + + def _runCmd(self, cmd, nodes=None): + (status, output) = \ + self.context.sshTool.getSshStatusOutput(cmd, nodes) + for node in status.keys(): + if status[node] != DefaultValue.SUCCESS: + raise Exception(output) diff --git a/script/impl/backup/OLAP/__init__.py b/script/impl/backup/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/backup/__init__.py b/script/impl/backup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/checkperf/CheckperfImpl.py b/script/impl/checkperf/CheckperfImpl.py new file mode 100644 index 0000000..13577e6 --- /dev/null +++ b/script/impl/checkperf/CheckperfImpl.py @@ -0,0 +1,130 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +import sys +from gspylib.common.Common import DefaultValue +from gspylib.os.gsfile import g_file + + +class CheckperfImpl(): + """ + Class: check perf impl + """ + + def __init__(self): + """ + function: constructor + """ + pass + + def CheckPMKPerf(self, outputInfo): + """ + function: check pmk perf + """ + pass + + def CheckSSDPerf(self, outputInfo): + """ + function: check ssd perf + input : outputInfo + output : NA + """ + pass + + def run(self): + """ + function: the real interface that execute the check method + input : NA + output: NA + """ + try: + outputInfo = None + # check output file + if self.opts.outFile != "": + self.opts.outFile_tmp = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + (os.path.split(self.opts.outFile)[1] + + "_tmp_%s" % os.getpid())) + outputInfo = self.setOutFile() + else: + outputInfo = sys.stdout + # check check item + for key in self.opts.checkItem: + if key == "PMK": + # check PMK + self.CheckPMKPerf(outputInfo) + elif key == "SSD": + # check SSD + self.CheckSSDPerf(outputInfo) + + # Follow-up + self.closeFile(outputInfo) + except Exception as e: + # close file handle if outputInfo is out file + if self.opts.outFile and outputInfo: + outputInfo.flush() + outputInfo.close() + if os.path.isfile(self.opts.outFile_tmp): + g_file.removeFile(self.opts.outFile_tmp) + # modify the log file's owner + g_file.changeOwner(self.opts.user, self.logger.logFile) + self.logger.error(str(e)) + sys.exit(1) + + def setOutFile(self): + """ + function: set out file + input : NA + output : NA + """ + # get directory component of a pathname + dirName = os.path.dirname(self.opts.outFile) + # judge if directory + if not os.path.isdir(dirName): + g_file.createDirectory(dirName, True, + DefaultValue.KEY_DIRECTORY_MODE) + # create output file and modify permission + g_file.createFile(self.opts.outFile, True, DefaultValue.KEY_FILE_MODE) + g_file.changeOwner(self.opts.user, self.opts.outFile) + self.logger.log( + "Performing performance check. " + "Output the checking result to the file %s." % self.opts.outFile) + # write file + self.opts.outFile_tmp = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + (os.path.split(self.opts.outFile)[1] + "_tmp_%s" % os.getpid())) + if not os.path.isfile(self.opts.outFile_tmp): + g_file.createFile(self.opts.outFile_tmp, True, + DefaultValue.KEY_FILE_MODE) + g_file.changeOwner(self.opts.user, self.opts.outFile_tmp) + fp = open(self.opts.outFile_tmp, "w") + outputInfo = fp + return outputInfo + + def closeFile(self, fp): + """ + function: close file + input : fp + output : NA + """ + if self.opts.outFile and fp: + # close file handle if outputInfo is out file + fp.flush() + fp.close() + g_file.moveFile(self.opts.outFile_tmp, self.opts.outFile) + self.logger.log("Performance check is completed.") diff --git a/script/impl/checkperf/OLAP/CheckperfImplOLAP.py b/script/impl/checkperf/OLAP/CheckperfImplOLAP.py new file mode 100644 index 0000000..bf4bb82 --- /dev/null +++ b/script/impl/checkperf/OLAP/CheckperfImplOLAP.py @@ -0,0 +1,1842 @@ +# -*- coding:utf-8 -*- +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import subprocess +import os +import sys +import time +import threading +import glob +import shutil +from functools import cmp_to_key + +from gspylib.common.Common import ClusterCommand, DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file +from gspylib.threads.parallelTool import parallelTool +from impl.checkperf.CheckperfImpl import CheckperfImpl +from multiprocessing.dummy import Pool as ThreadPool + +# Database size inspection interval +DB_SIZE_CHECK_INTERVAL = 21600 + + +class CheckperfImplOLAP(CheckperfImpl): + """ + checkperf with OLAP + """ + + def __init__(self): + """ + function: constructor + """ + CheckperfImpl.__init__(self) + self.recordColumn = {} + self.recordPrevStat = {} + self.sessionCpuColumn = [] + self.sessionMemoryColumn = [] + self.sessionIOColumn = [] + # Functional options + self.ACTION_INSTALL_PMK = "install_pmk" + self.ACTION_COLLECT_STAT = "collect_stat" + self.ACTION_DISPLAY_STAT = "display_stat" + self.ACTION_ASYN_COLLECT = "asyn_collect" + self.DWS_mode = False + + def getNormalDatanodes(self): + """ + function: get normal primary datanodes. + input : NA + output: instlist + """ + clusterStatus = OMCommand.getClusterStatus(self.opts.user) + if clusterStatus is None: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51600"]) + + normalDNList = [] + for dbNode in self.clusterInfo.dbNodes: + for dnInst in dbNode.datanodes: + instStatus = clusterStatus.getInstanceStatusById( + dnInst.instanceId) + if (instStatus is not None and + instStatus.isInstanceHealthy() and + instStatus.status == "Primary"): + normalDNList.append(dnInst) + + if (len(normalDNList) == 0): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] % "DN" + + " There is no normal primary datanode.") + + # the cluster must be non-read-only status + (status, output) = DefaultValue.checkTransactionReadonly( + self.opts.user, self.clusterInfo, normalDNList) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51602"] + "Error: \n%s" \ + % output + \ + "\nPlease ensure the database is not read only mode.") + + return normalDNList + + def checkClusterStatus(self): + """ + function: Check cluster status, + should be normal, no redistributing,and degrade(CN deleted only) + input : NA + output: None + """ + self.logger.debug("Checking cluster status.") + + cmd = ClusterCommand.getQueryStatusCmd(self.opts.user, "", "", False) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51600"] + + "\nCommand:\n %s\nOutput:\n %s" + % (cmd, str(output))) + + cluster_state = None + redistributing = None + for line in output.split('\n'): + line = line.strip() + if line.startswith("cluster_state"): + cluster_state = \ + line.split(":")[1].strip() \ + if len(line.split(":")) == 2 else None + continue + if line.startswith("redistributing"): + redistributing = \ + line.split(":")[1].strip() \ + if len(line.split(":")) == 2 else None + continue + + # cluster status should be Normal or Degraded + if cluster_state != "Normal" and cluster_state != "Degraded": + raise Exception(ErrorCode.GAUSS_516["GAUSS_51602"]) + + # redistributing should be No + if (redistributing != "No"): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51602"] + \ + "\nPlease ensure the cluster is not in " + "redistributing.") + + def collectPMKData( + self, pmk_curr_collect_start_time, + pmk_last_collect_start_time, last_snapshot_id, port): + """ + function: collect PMK data + input : pmk_curr_collect_start_time, + pmk_last_collect_start_time, last_snapshot_id, port + output : NA + """ + cmd = "" + failedNodes = [] + if (self.opts.mpprcFile != ""): + cmd += "source %s;" % self.opts.mpprcFile + cmd += \ + "%s -t %s -p %s -u %s -c %s -l %s" \ + % (OMCommand.getLocalScript("UTIL_GAUSS_STAT"), + self.ACTION_COLLECT_STAT, + self.clusterInfo.appPath, + self.opts.user, + str(port), + self.opts.localLog) + if (self.DWS_mode): + cmd += " --dws-mode" + if pmk_curr_collect_start_time != "": + cmd += " --curr-time='%s'" % pmk_curr_collect_start_time + if pmk_last_collect_start_time != "": + cmd += " --last-time='%s'" % pmk_last_collect_start_time + if last_snapshot_id != "": + cmd += " --snapshot-id=%s" % last_snapshot_id + + cmd += " --flag-num=%d" % os.getpid() + + cmd += " --master-host=%s" % DefaultValue.GetHostIpOrName() + + self.logger.debug("Command for executing %s on all hosts" % cmd) + if (os.getuid() == 0): + cmd = """su - %s -c \\\"%s\\\" """ % (self.opts.user, cmd) + (status, output) = self.sshTool.getSshStatusOutput(cmd) + for node in status.keys(): + if (status[node] == DefaultValue.SUCCESS): + pass + else: + failedNodes.append(node) + + if (len(failedNodes) != 0): + self.logger.debug( + "Failed to collect statistics on (%s). Output: \n%s." \ + % (failedNodes, output)) + raise Exception(output) + else: + self.logger.debug( + "Successfully collected statistics on all hosts.") + + def getMetaData(self, hostName, port): + """ + function: get meta data of PMK(curr_collect_start_time, + last_collect_start_time, last_snapshot_id) + input : hostName, port + output: NA + """ + self.logger.debug("Getting PMK meta data.") + try: + local_host = DefaultValue.GetHostIpOrName() + status = 7 + result = None + error_output = "" + querySql = "SELECT l_pmk_curr_collect_start_time, " \ + "l_pmk_last_collect_start_time, l_last_snapshot_id " \ + "FROM pmk.get_meta_data();" + if (self.DWS_mode): + if (hostName == local_host): + # execute sql + (status, result, error_output) = \ + ClusterCommand.excuteSqlOnLocalhost(port, querySql) + else: + # Gets the current time + currentTime = time.strftime("%Y-%m-%d_%H:%M:%S") + pid = os.getpid() + outputfile = \ + "metadata_%s_%s_%s.json" \ + % (hostName, pid, currentTime) + # Get the temporary directory from PGHOST + tmpDir = DefaultValue.getTmpDirFromEnv(self.opts.user) + filepath = os.path.join(tmpDir, outputfile) + # execute SQL on remote host + ClusterCommand.executeSQLOnRemoteHost( + hostName, port, querySql, filepath) + # get sql result from outputfile + (status, result, error_output) = \ + ClusterCommand.getSQLResult(hostName, outputfile) + + if (status != 2 or error_output != ""): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Error: \n%s" \ + % str(error_output)) + self.logger.debug("output: %s" % result) + if (len(result) == 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Return record is null") + + recordList = result[0] + + if (recordList[0] != ''): + recordList[0] = (recordList[0]).strip() + if (recordList[1] != ''): + recordList[1] = (recordList[1]).strip() + if (recordList[2] != ''): + recordList[2] = (recordList[2]).strip() + + self.logger.debug("Successfully got PMK meta data.") + return recordList[0], recordList[1], recordList[2] + else: + (status, output) = ClusterCommand.remoteSQLCommand( + querySql, self.opts.user, + hostName, port, False, DefaultValue.DEFAULT_DB_NAME) + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] + % querySql + " Error: \n%s" % str(output)) + self.logger.debug("output: %s" % output) + if (output == ""): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] + % querySql + " Return record is null") + recordList = output.split('|') + + if (recordList[0] != ''): + recordList[0] = (recordList[0]).strip() + if (recordList[1] != ''): + recordList[1] = (recordList[1]).strip() + if (recordList[2] != ''): + recordList[2] = (recordList[2]).strip() + + self.logger.debug("Successfully got PMK meta data.") + return recordList[0], recordList[1], recordList[2] + except Exception as e: + raise Exception(str(e)) + + def deleteExpiredSnapShots(self, hostName, port): + """ + function: delete expired snapshots records + input : hostName, port + output: NA + """ + self.logger.debug("Deleting expired snapshots records.") + try: + local_host = DefaultValue.GetHostIpOrName() + # execute sql + querySql = "SELECT * FROM pmk.delete_expired_snapshots();" + if (self.DWS_mode): + if (hostName == local_host): + (status, result, error_output) = \ + ClusterCommand.excuteSqlOnLocalhost(port, querySql) + else: + # Gets the current time + currentTime = time.strftime("%Y-%m-%d_%H:%M:%S") + pid = os.getpid() + outputfile = \ + "deleteSnapshots_%s_%s_%s.json" \ + % (hostName, pid, currentTime) + # Create a temporary file + tmpDir = DefaultValue.getTmpDirFromEnv(self.opts.user) + filepath = os.path.join(tmpDir, outputfile) + # execute SQL on remote host + ClusterCommand.executeSQLOnRemoteHost( \ + hostName, port, querySql, filepath) + # get sql result from outputfile + (status, result, error_output) = \ + ClusterCommand.getSQLResult(hostName, outputfile) + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql \ + + " Error: \n%s" % str(error_output)) + self.logger.debug( + "Successfully deleted expired snapshots records.") + else: + # execute sql + querySql = "SELECT * FROM pmk.delete_expired_snapshots();" + (status, output) = ClusterCommand.remoteSQLCommand( + querySql, self.opts.user, + hostName, port, False, DefaultValue.DEFAULT_DB_NAME) + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql \ + + " Error: \n%s" % str(output)) + self.logger.debug( + "Successfully deleted expired snapshots records.") + except Exception as e: + raise Exception(str(e)) + + def parseSingleHostNodeStat(self, filePath): + """ + function: parse node stat of single host + input : filePath + output: NA + """ + self.logger.debug( + "Parsing node stat of single host into the file[%s]." \ + % filePath) + try: + # read file + nodtStat = g_file.readFile(filePath) + # parse node stat + for line in nodtStat: + line = line.strip() + recordItem = line.split("::::") + if (len(recordItem) != 2): + continue + column = (recordItem[1]).split('|') + recordNode = (recordItem[0]).strip() + self.recordColumn[recordNode] = column + self.logger.debug( + "Successfully parsed node stat of single " \ + "host into the file[%s]." % filePath) + except Exception as e: + raise Exception(str(e)) + + def parseSessionCpuStat(self, filePath): + """ + function: parse session cpu stat of single host + input : filePath + output: NA + """ + self.logger.debug( + "Parsing session cpu stat of single host into the file[%s]." \ + % filePath) + try: + # read file + cpuStat = g_file.readFile(filePath) + # parse session cpu stat + for line in cpuStat: + line = line.strip() + column = line.split('|') + self.sessionCpuColumn.append(column) + self.logger.debug( + "Successfully parsed session cpu " \ + "stat of single host into the file[%s]." % filePath) + except Exception as e: + raise Exception(str(e)) + + def parseSessionMemoryStat(self, filePath): + """ + function: parse session memory of single host + input : filePath + output: NA + """ + self.logger.debug( + "Parsing session memory stat of single host into the file[%s]." \ + % filePath) + try: + # read file + MemoryStat = g_file.readFile(filePath) + for line in MemoryStat: + line = line.strip() + column = line.split('|') + self.sessionMemoryColumn.append(column) + self.logger.debug( + "Successfully parsed session memory stat of " \ + "single host into the file[%s]." % filePath) + except Exception as e: + raise Exception(str(e)) + + def parseSessionIOStat(self, filePath): + """ + function: parse session IO stat of single host + input : filePath + output: NA + """ + self.logger.debug( + "Parsing session IO stat of single host into the file[%s]." \ + % filePath) + try: + IOStat = g_file.readFile(filePath) + for line in IOStat: + line = line.strip() + column = line.split('|') + self.sessionIOColumn.append(column) + self.logger.debug( + "Successfully parsed session IO stat \ + of single host into the file[%s]." % filePath) + except Exception as e: + raise Exception(str(e)) + + def getAllHostsNodeStat(self): + """ + function: get node stat of all hosts + input : NA + output: NA + """ + self.logger.debug("Getting node stat of all hosts.") + resultFiles = [] + instCounts = 0 + try: + # Get the cluster's node names + hostNames = self.clusterInfo.getClusterNodeNames() + # traversing host name + for hostName in hostNames: + recordTempFile = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + "recordTempFile_%d_%s" % (os.getpid(), hostName)) + # check if recordTempFile exists + if (os.path.exists(recordTempFile)): + # append recordTempFile to resultFiles + resultFiles.append(recordTempFile) + else: + if (self.clusterInfo.isSingleInstCluster()): + continue + if (hostName != DefaultValue.GetHostIpOrName()): + scpcmd = "pssh -s -H %s 'pscp -H %s %s %s' " \ + % (hostName, DefaultValue.GetHostIpOrName(), + recordTempFile, recordTempFile) + (status, output) = subprocess.getstatusoutput(scpcmd) + if (status != 0): + self.logger.debug( + "Lost file [%s] in current node " \ + " path [%s],the file is " \ + "delivered from node [%s]" \ + "by command 'scp';Error:\n%s" % \ + (recordTempFile, + DefaultValue.getTmpDirFromEnv( + self.opts.user), + hostName, output)) + else: + resultFiles.append(recordTempFile) + else: + self.logger.debug( + "Lost local file [%s] in current " \ + "node path [%s]" % \ + (recordTempFile, + DefaultValue.getTmpDirFromEnv(self.opts.user))) + # check if number matches + if (len(resultFiles) == 0): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50219"] \ + % "the node stat files of all hosts") + # concurrent execution + parallelTool.parallelExecute( + self.parseSingleHostNodeStat, resultFiles) + + # traverse node item + for nodeItem in self.clusterInfo.dbNodes: + instCounts += nodeItem.dataNum + + # judge if number of pgxc_node records is equal to + # the number of data instances(cn and master dn) + if (instCounts != len(self.recordColumn)): + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51637"] \ + % ("number of pgxc_node records[%d]" % \ + (len(self.recordColumn)), + "the number of data instances(cn and master dn)[%d]" \ + % instCounts)) + + # traverse file + for tempFile in resultFiles: + g_file.removeFile(tempFile) + + self.logger.debug("Successfully got node stat of all hosts.") + except Exception as e: + # traverse file + for tempFile in resultFiles: + # close and remove temporary file + g_file.removeFile(tempFile) + raise Exception(str(e)) + + def getAllSessionCpuStat(self): + """ + function: get cpu stat of all sessions + input : NA + output: NA + """ + self.logger.debug("Getting cpu stat of all sessions.") + resultFiles = [] + hostNames = [] + try: + # get host names + hostNames = self.clusterInfo.getClusterNodeNames() + # traverse host names + for hostName in hostNames: + # get session Cpu Temp File + sessionCpuTempFile = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + "sessionCpuTempFile_%d_%s" \ + % (os.getpid(), hostName)) + # check if session Cpu Temp File exists + if (os.path.exists(sessionCpuTempFile)): + # append session Cpu Temp File to result Files + resultFiles.append(sessionCpuTempFile) + + if (len(resultFiles) == 0): + self.logger.debug("There are no sessions.") + return + + # Concurrent execution + self.logger.debug("resultFiles: %s" % resultFiles) + parallelTool.parallelExecute( + self.parseSessionCpuStat, resultFiles) + + self.logger.debug("self.sessionCpuColumn: \n") + # traverse record + for record in self.sessionCpuColumn: + self.logger.debug("%s" % record) + + # traverse temp File + for tempFile in resultFiles: + # clean temp File + g_file.removeFile(tempFile) + + self.logger.debug("Successfully got cpu stat of all sessions.") + except Exception as e: + # traverse temp File + for tempFile in resultFiles: + # clean temp File + g_file.removeFile(tempFile) + raise Exception(str(e)) + + def getAllSessionMemoryStat(self): + """ + function: get memory stat of all sessions + input : NA + output: NA + """ + self.logger.debug("Getting memory stat of all sessions.") + resultFiles = [] + hostNames = [] + try: + # get host names + hostNames = self.clusterInfo.getClusterNodeNames() + # traverse host names + for hostName in hostNames: + sessionMemTempFile = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + "sessionMemTempFile_%d_%s" \ + % (os.getpid(), hostName)) + # check if session Mem Temp File exists + if (os.path.exists(sessionMemTempFile)): + # append session Mem Temp File to resultFiles + resultFiles.append(sessionMemTempFile) + + # judge if sessions + if (len(resultFiles) == 0): + self.logger.debug("There are no sessions.") + return + + # Concurrent execution + self.logger.debug("resultFiles: %s" % resultFiles) + parallelTool.parallelExecute( + self.parseSessionMemoryStat, resultFiles) + self.logger.debug("self.sessionMemoryColumn: \n") + # traverse record + for record in self.sessionMemoryColumn: + self.logger.debug("%s" % record) + + # traverse temp File + for tempFile in resultFiles: + g_file.removeFile(tempFile) + + self.logger.debug("Successfully got memory stat of all sessions.") + except Exception as e: + # traverse temp File + for tempFile in resultFiles: + # remove temporary file + g_file.removeFile(tempFile) + raise Exception(str(e)) + + def getAllSessionIOStat(self): + """ + function: get IO stat of all sessions + input : NA + output: NA + """ + self.logger.debug("Getting IO stat of all sessions.") + resultFiles = [] + hostNames = [] + try: + # get host names + hostNames = self.clusterInfo.getClusterNodeNames() + # traverse host names + for hostName in hostNames: + sessionIOTempFile = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + "sessionIOTempFile_%d_%s" % (os.getpid(), hostName)) + # if session IO Temp File exists + if (os.path.exists(sessionIOTempFile)): + # append session IO Temp File to resultFiles + resultFiles.append(sessionIOTempFile) + + # judge if sessions + if (len(resultFiles) == 0): + self.logger.debug("There are no sessions.") + return + + # Concurrent execution + self.logger.debug("resultFiles: %s" % resultFiles) + parallelTool.parallelExecute(self.parseSessionIOStat, resultFiles) + self.logger.debug("self.sessionIOColumn: \n") + # traverse record + for record in self.sessionIOColumn: + self.logger.debug("%s" % record) + + # traverse temp File + for tempFile in resultFiles: + # close and remove temporary file + g_file.removeFile(tempFile) + + self.logger.debug("Successfully got IO stat of all sessions.") + except Exception as e: + # traverse temp File + for tempFile in resultFiles: + # close and remove temporary file + g_file.removeFile(tempFile) + raise Exception(str(e)) + + def getAllHostsPrevNodeStat(self, hostName, port, snapshotId): + """ + function: get prev node stat of all hosts + input : hostName, port, snapshotId + output: NA + """ + self.logger.debug("Getting prev node stat of all hosts.") + dataNum = 0 + cooNum = 0 + try: + for nodeItem in self.clusterInfo.dbNodes: + dataNum += nodeItem.dataNum + cooNum += nodeItem.cooNum + if (self.DWS_mode): + if (snapshotId != ""): + # query CN sql + querySql = "" + querySql += "SELECT node_name, " \ + "COALESCE(pns.physical_reads, 0), " \ + "COALESCE(pns.physical_writes, 0), " + querySql += "COALESCE(pns.read_time, 0), " \ + "COALESCE(pns.write_time, 0), " \ + "COALESCE(pns.xact_commit, 0), " + querySql += "COALESCE(pns.xact_rollback, 0), " \ + "COALESCE(pns.checkpoints_timed, 0), " \ + "COALESCE(pns.checkpoints_req, 0), " + querySql += "COALESCE(pns.checkpoint_write_time, 0)," \ + "COALESCE(pns.blocks_read, 0)," \ + "COALESCE(pns.blocks_hit, 0), " + querySql += "COALESCE(pns.busy_time, 0)," \ + "COALESCE(pns.idle_time, 0), " \ + "COALESCE(pns.iowait_time, 0), " + querySql += "COALESCE(pns.db_cpu_time, 0)FROM " \ + "pmk.pmk_snapshot_coordinator_stat pns " + querySql += "WHERE pns.snapshot_id = %s" % snapshotId + local_host = DefaultValue.GetHostIpOrName() + + if (local_host == hostName): + (status, result, error_output) = \ + ClusterCommand.excuteSqlOnLocalhost(port, querySql) + else: + # Gets the current time + currentTime = time.strftime("%Y-%m-%d_%H:%M:%S") + pid = os.getpid() + outputfile = "nodestat_%s_%s_%s.json" \ + % (hostName, pid, currentTime) + tmpDir = DefaultValue.getTmpDirFromEnv(self.opts.user) + filepath = os.path.join(tmpDir, outputfile) + # execute SQL on remote host + ClusterCommand.executeSQLOnRemoteHost( + hostName, port, querySql, filepath, snapshotId) + (status, result, error_output) = \ + ClusterCommand.getSQLResult(hostName, outputfile) + if (status != 2): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Error: \n%s" % str(error_output)) + self.logger.debug("output: %s" % result) + if (len(result) == 0): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Return record is null") + + prevStatList = result + for i in range(len(prevStatList)): + prevStat = "|".join(prevStatList[i]) + column = (prevStat).split('|') + recordName = (column[0]).strip() + self.recordPrevStat[recordName] = column + + # query DN sql + querySql = "" + querySql += "SELECT node_name, " \ + "COALESCE(pns.physical_reads, 0), " \ + "COALESCE(pns.physical_writes, 0), " + querySql += "COALESCE(pns.read_time, 0), " \ + "COALESCE(pns.write_time, 0), " \ + "COALESCE(pns.xact_commit, 0), " + querySql += "COALESCE(pns.xact_rollback, 0)," \ + "COALESCE(pns.checkpoints_timed, 0), " \ + "COALESCE(pns.checkpoints_req, 0), " + querySql += "COALESCE(pns.checkpoint_write_time, 0), " \ + "COALESCE(pns.blocks_read, 0), " \ + "COALESCE(pns.blocks_hit, 0), " + querySql += "COALESCE(pns.busy_time, 0)," \ + "COALESCE(pns.idle_time, 0), " \ + "COALESCE(pns.iowait_time, 0), " + querySql += "COALESCE(pns.db_cpu_time, 0) " \ + "FROM pmk.pmk_snapshot_datanode_stat pns " + querySql += "WHERE pns.snapshot_id = %s" % snapshotId + if (local_host == hostName): + (status, result, error_output) = \ + ClusterCommand.excuteSqlOnLocalhost(port, querySql) + else: + # Gets the current time + currentTime = time.strftime("%Y-%m-%d_%H:%M:%S") + pid = os.getpid() + outputfile = "nodestat_%s_%s_%s.json" \ + % (hostName, pid, currentTime) + tmpDir = DefaultValue.getTmpDirFromEnv(self.opts.user) + filepath = os.path.join(tmpDir, outputfile) + ClusterCommand.executeSQLOnRemoteHost( + hostName, port, querySql, filepath, snapshotId) + (status, result, error_output) = \ + ClusterCommand.getSQLResult(hostName, outputfile) + if (status != 2): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Error: \n%s" % str(error_output)) + self.logger.debug("output: %s" % result) + if (len(result) == 0): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Return record is null") + + prevStatList = result + for i in range(len(prevStatList)): + prevStat = "|".join(prevStatList[i]) + column = (prevStat).split('|') + recordName = (column[0]).strip() + self.recordPrevStat[recordName] = column + + # handle the scrnario expand or add-cn or delete-cn + for nodeName in self.recordColumn.keys(): + if (self.recordPrevStat.__contains__(nodeName)): + pass + else: + tempPrevRecord = ['0', '0', '0', '0', + '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0'] + prevRecord = [] + prevRecord.append(nodeName) + prevRecord.extend(tempPrevRecord) + self.recordPrevStat[nodeName] = prevRecord + self.logger.debug("The pgxc nodes have been changed.") + else: + tempPrevRecord = ['0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', + '0', '0', '0'] + for nodeName in self.recordColumn.keys(): + prevRecord = [] + prevRecord.append(nodeName) + prevRecord.extend(tempPrevRecord) + self.recordPrevStat[nodeName] = prevRecord + + self.logger.debug("Successfully got prev \ + node stat of all hosts.") + else: + if (snapshotId != ""): + if (not self.clusterInfo.isSingleInstCluster()): + # query CN sql + querySql = "" + querySql += "SELECT node_name, " \ + "COALESCE(pns.physical_reads, 0), " \ + "COALESCE(pns.physical_writes, 0), " + querySql += "COALESCE(pns.read_time, 0)," \ + " COALESCE(pns.write_time, 0)," \ + "COALESCE(pns.xact_commit, 0), " + querySql += "COALESCE(pns.xact_rollback, 0), " \ + " COALESCE(pns.checkpoints_timed, 0), " \ + " COALESCE(pns.checkpoints_req, 0), " + querySql += "COALESCE(pns.checkpoint_write_time, 0)," \ + " COALESCE(pns.blocks_read, 0), " \ + "COALESCE(pns.blocks_hit, 0), " + querySql += "COALESCE(pns.busy_time, 0)," \ + "COALESCE(pns.idle_time, 0), " \ + "COALESCE(pns.iowait_time, 0), " + querySql += "COALESCE(pns.db_cpu_time, 0)FROM " \ + "pmk.pmk_snapshot_coordinator_stat pns " + querySql += "WHERE pns.snapshot_id = %s" % snapshotId + + (status, output) = ClusterCommand.remoteSQLCommand( + querySql, self.opts.user, + hostName, port, False, + DefaultValue.DEFAULT_DB_NAME) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Error: \n%s" % str(output)) + self.logger.debug("output: %s" % output) + if (output == ""): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Return record is null") + + prevStatList = output.split('\n') + for prevStat in prevStatList: + prevStat = prevStat.strip() + column = (prevStat).split('|') + recordName = (column[0]).strip() + self.recordPrevStat[recordName] = column + + # query DN sql + querySql = "" + querySql += "SELECT node_name, " \ + "COALESCE(pns.physical_reads, 0), " \ + "COALESCE(pns.physical_writes, 0), " + querySql += "COALESCE(pns.read_time, 0)," \ + "COALESCE(pns.write_time, 0), " \ + "COALESCE(pns.xact_commit, 0), " + querySql += "COALESCE(pns.xact_rollback, 0), " \ + "COALESCE(pns.checkpoints_timed, 0), " \ + "COALESCE(pns.checkpoints_req, 0), " + querySql += "COALESCE(pns.checkpoint_write_time, 0), " \ + "COALESCE(pns.blocks_read, 0), " \ + "COALESCE(pns.blocks_hit, 0), " + querySql += "COALESCE(pns.busy_time, 0)," \ + "COALESCE(pns.idle_time, 0), " \ + "COALESCE(pns.iowait_time, 0), " + querySql += "COALESCE(pns.db_cpu_time, 0) " \ + "FROM pmk.pmk_snapshot_datanode_stat pns " + querySql += "WHERE pns.snapshot_id = %s" % snapshotId + + # Execute sql command on remote host + (status, output) = ClusterCommand.remoteSQLCommand( + querySql, self.opts.user, + hostName, port, False, DefaultValue.DEFAULT_DB_NAME) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Error: \n%s" % str(output)) + self.logger.debug("output: %s" % output) + if (output == ""): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Return record is null") + + prevStatList = output.split('\n') + for prevStat in prevStatList: + prevStat = prevStat.strip() + column = (prevStat).split('|') + recordName = (column[0]).strip() + self.recordPrevStat[recordName] = column + + # handle the scrnario expand or add-cn or delete-cn + for nodeName in self.recordColumn.keys(): + if (self.recordPrevStat.__contains__(nodeName)): + pass + else: + tempPrevRecord = ['0', '0', '0', '0', + '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0'] + prevRecord = [] + prevRecord.append(nodeName) + prevRecord.extend(tempPrevRecord) + self.recordPrevStat[nodeName] = prevRecord + self.logger.debug("The pgxc nodes have been changed.") + else: + tempPrevRecord = ['0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', + '0', '0'] + for nodeName in self.recordColumn.keys(): + prevRecord = [] + prevRecord.append(nodeName) + prevRecord.extend(tempPrevRecord) + self.recordPrevStat[nodeName] = prevRecord + self.logger.debug( + "Successfully got prev node stat of all hosts.") + except Exception as e: + raise Exception(str(e)) + + def handleNodeStat(self): + """ + function: handle the node stat of all hosts + input : NA + output: NA + """ + self.logger.debug("Handling the node stat of all hosts.") + try: + + for record in self.recordColumn.keys(): + columnNow = self.recordColumn[record] + recordName = (columnNow[1]).strip() + columnPrev = self.recordPrevStat[recordName] + # value 1 + tempValue1 = int( + float(columnNow[6])) - int(float(columnPrev[1])) + if (tempValue1 < 0): + tempValue1 = 0 + (self.recordColumn[record])[6] = str(tempValue1) + + # value 2 + tempValue2 = int( + float(columnNow[8])) - int(float(columnPrev[2])) + if (tempValue2 < 0): + tempValue2 = 0 + (self.recordColumn[record])[8] = str(tempValue2) + + # value 3 + tempValue3 = int( + float(columnNow[10])) - int(float(columnPrev[3])) + if (tempValue3 < 0): + tempValue3 = 0 + (self.recordColumn[record])[10] = str(tempValue3) + + # value 4 + tempValue4 = int( + float(columnNow[12])) - int(float(columnPrev[4])) + if (tempValue4 < 0): + tempValue4 = 0 + (self.recordColumn[record])[12] = str(tempValue4) + + # value 5 + tempValue5 = int( + float(columnNow[18])) - int(float(columnPrev[5])) + if (tempValue5 < 0): + tempValue5 = 0 + (self.recordColumn[record])[18] = str(tempValue5) + + # value 6 + tempValue6 = int( + float(columnNow[20])) - int(float(columnPrev[6])) + if (tempValue6 < 0): + tempValue6 = 0 + (self.recordColumn[record])[20] = str(tempValue6) + + # value 7 + tempValue7 = int( + float(columnNow[22])) - int(float(columnPrev[7])) + if (tempValue7 < 0): + tempValue7 = 0 + (self.recordColumn[record])[22] = str(tempValue7) + + # value 8 + tempValue8 = int( + float(columnNow[24])) - int(float(columnPrev[8])) + if (tempValue8 < 0): + tempValue8 = 0 + (self.recordColumn[record])[24] = str(tempValue8) + + # value 9 + tempValue9 = int( + float(columnNow[26])) - int(float(columnPrev[9])) + if (tempValue9 < 0): + tempValue9 = 0 + (self.recordColumn[record])[26] = str(tempValue9) + + # value 10 + tempValue10 = int( + float(columnNow[33])) - int(float(columnPrev[10])) + if (tempValue10 < 0): + tempValue10 = 0 + (self.recordColumn[record])[33] = str(tempValue10) + + # value 11 + tempValue11 = int( + float(columnNow[35])) - int(float(columnPrev[11])) + if (tempValue11 < 0): + tempValue11 = 0 + (self.recordColumn[record])[35] = str(tempValue11) + + # value 12 + tempValue12 = int( + float(columnNow[42])) - int(float(columnPrev[12])) + if (tempValue12 < 0): + tempValue12 = 0 + (self.recordColumn[record])[42] = str(tempValue12) + + # value 13 + tempValue13 = int( + float(columnNow[44])) - int(float(columnPrev[13])) + if (tempValue13 < 0): + tempValue13 = 0 + (self.recordColumn[record])[44] = str(tempValue13) + + # value 14 + tempValue14 = int( + float(columnNow[46])) - int(float(columnPrev[14])) + if (tempValue14 < 0): + tempValue14 = 0 + (self.recordColumn[record])[46] = str(tempValue14) + + # value 15 + tempValue15 = int( + float(columnNow[48])) - int(float(columnPrev[15])) + if (tempValue15 < 0): + tempValue15 = 0 + (self.recordColumn[record])[48] = str(tempValue15) + self.logger.debug( + "Successfully handled the node stat of all hosts.") + except Exception as e: + raise Exception(str(e)) + + def handleSessionCpuStat(self, hostname): + """ + function: handle session cpu stat of all hosts + input : hostname + output: NA + """ + self.logger.debug("Handling session cpu stat of all hosts.") + tempList = [] + sessionCpuTempResult = "" + try: + if (len(self.sessionCpuColumn) > 0): + for record in self.sessionCpuColumn: + if (len(record) == 1): + continue + tempTuple = tuple(record) + tempList.append(tempTuple) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[0] > y[0]) - (x[0] < y[0]))), + reverse=False) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[5] > y[5]) - (x[5] < y[5]))), + reverse=True) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[3] > y[3]) - (x[3] < y[3]))), + reverse=True) + self.logger.debug("tempList: %s" % tempList) + + sessionCpuTempResult = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + "sessionCpuTempResult_%d_%s" \ + % (os.getpid(), + DefaultValue.GetHostIpOrName())) + # clean the temp file first + g_file.createFile(sessionCpuTempResult) + + strCmd = "" + for index in range(0, min(10, len(self.sessionCpuColumn))): + strCmd += "%s|%s|%s|%s|%s|%s\n" % \ + ((tempList[index])[0], (tempList[index])[1], + (tempList[index])[2], (tempList[index])[3], + (tempList[index])[4], (tempList[index])[5]) + + g_file.writeFile(sessionCpuTempResult, [strCmd]) + if (hostname != DefaultValue.GetHostIpOrName()): + self.sshTool.scpFiles( + sessionCpuTempResult, + DefaultValue.getTmpDirFromEnv(self.opts.user) \ + + "/", [hostname]) + + g_file.removeFile(sessionCpuTempResult) + else: + self.logger.debug("There are no session cpu statistics.") + self.logger.debug( + "Successfully handled session cpu stat of all hosts.") + except Exception as e: + # close and remove temporary file + g_file.removeFile(sessionCpuTempResult) + raise Exception(str(e)) + + def handleSessionMemoryStat(self, hostname): + """ + function: handle session memory stat of all hosts + input : hostname + output: NA + """ + self.logger.debug("Handling session memory stat of all hosts.") + tempList = [] + sessionMemTempResult = "" + try: + if (len(self.sessionMemoryColumn) > 0): + for record in self.sessionMemoryColumn: + if (len(record) == 1): + continue + tempTuple = tuple(record) + tempList.append(tempTuple) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[0] > y[0]) - (x[0] < y[0]))), + reverse=False) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[4] > y[4]) - (x[4] < y[4]))), + reverse=True) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[3] > y[3]) - (x[3] < y[3]))), + reverse=True) + self.logger.debug("tempList: %s" % tempList) + + # get session Mem Temp Result + sessionMemTempResult = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + "sessionMemTempResult_%d_%s" \ + % (os.getpid(), DefaultValue.GetHostIpOrName())) + # clean the temp file first + g_file.createFile(sessionMemTempResult) + + strCmd = "" + for index in range(0, min(10, len(self.sessionMemoryColumn))): + strCmd += "%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n" % \ + ((tempList[index])[0], (tempList[index])[1], + (tempList[index])[2], (tempList[index])[3], + (tempList[index])[4], (tempList[index])[5], + (tempList[index])[6], (tempList[index])[7], + (tempList[index])[8], (tempList[index])[9]) + + g_file.writeFile(sessionMemTempResult, [strCmd]) + if (hostname != DefaultValue.GetHostIpOrName()): + self.sshTool.scpFiles( + sessionMemTempResult, + DefaultValue.getTmpDirFromEnv(self.opts.user) \ + + "/", [hostname]) + + g_file.removeFile(sessionMemTempResult) + else: + self.logger.debug("There are no session memory statistics.") + self.logger.debug( + "Successfully handled session memory stat of all hosts.") + except Exception as e: + # close and remove temporary file + g_file.removeFile(sessionMemTempResult) + raise Exception(str(e)) + + def handleSessionIOStat(self, hostname): + """ + function: handle session IO stat of all hosts + input : hostname + output: NA + """ + self.logger.debug("Handling session IO stat of all hosts.") + tempList = [] + sessionIOTempResult = "" + try: + if (len(self.sessionIOColumn) > 0): + for record in self.sessionIOColumn: + if (len(record) == 1): + continue + tempTuple = tuple(record) + tempList.append(tempTuple) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[0] > y[0]) - (x[0] < y[0]))), + reverse=False) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[4] > y[4]) - (x[4] < y[4]))), + reverse=True) + tempList.sort(key=cmp_to_key( + lambda x, y: ((x[3] > y[3]) - (x[3] < y[3]))), + reverse=True) + self.logger.debug("tempList: %s" % tempList) + + sessionIOTempResult = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + "sessionIOTempResult_%d_%s" \ + % (os.getpid(), DefaultValue.GetHostIpOrName())) + # clean the temp file first + g_file.createFile(sessionIOTempResult) + + strCmd = "" + for index in range(0, min(10, len(self.sessionIOColumn))): + strCmd += "%s|%s|%s|%s|%s\n" % ((tempList[index])[0], + (tempList[index])[1], + (tempList[index])[2], + (tempList[index])[3], + (tempList[index])[4]) + + g_file.writeFile(sessionIOTempResult, [strCmd]) + if (hostname != DefaultValue.GetHostIpOrName()): + self.sshTool.scpFiles( + sessionIOTempResult, + DefaultValue.getTmpDirFromEnv(self.opts.user) \ + + "/", [hostname]) + + # close and remove temporary file + g_file.removeFile(sessionIOTempResult) + else: + self.logger.debug("There are no session IO statistics.") + self.logger.debug( + "Successfully handled session IO stat of all hosts.") + except Exception as e: + + # close and remove temporary file + g_file.removeFile(sessionIOTempResult) + raise Exception(str(e)) + + def launchAsynCollection(self, host, port): + """ + function: launch asyn collection for database size + input : host, port + output: NA + """ + self.logger.debug("Collecting database size.") + executingNodes = [] + querycmd = "ps -ef |grep '%s' | grep '%s' | grep -v grep" \ + % (OMCommand.getLocalScript("UTIL_GAUSS_STAT"), + self.ACTION_ASYN_COLLECT) + self.logger.debug( + "Command for Querying Collecting database size: %s." \ + % querycmd) + status = self.sshTool.getSshStatusOutput(querycmd)[0] + outputMap = self.sshTool.parseSshOutput(self.sshTool.hostNames) + for node in status.keys(): + if (outputMap[node].find(self.ACTION_ASYN_COLLECT) >= 0): + executingNodes.append(node) + + # judge failed nodes + if (len(executingNodes)): + self.logger.debug( + "Asyn Collection database size is in progress on nodes[%s]." \ + % ' '.join(executingNodes)) + return + + # Skip asyn collects database size when interval is less than 6 hours + if (os.path.isfile(self.opts.databaseSizeFile)): + # Get the last modified time of the file + statinfo = os.stat(self.opts.databaseSizeFile) + lastChangeTime = statinfo.st_mtime + localTime = time.time() + # Query time interval 6 hours + if (int(localTime) - int(lastChangeTime) < DB_SIZE_CHECK_INTERVAL): + self.logger.debug( + "Asyn collects database size within 6 hours.") + return + + # launch asyn collection for database size + cmd = "pssh -s -H %s \'" % (str(host)) + + if (self.opts.mpprcFile != ""): + cmd += "source %s;" % self.opts.mpprcFile + cmd += "%s -t %s -p %s -u %s -c %s -l %s " \ + % (OMCommand.getLocalScript("UTIL_GAUSS_STAT"), + self.ACTION_ASYN_COLLECT, + self.clusterInfo.appPath, + self.opts.user, + str(port), + self.opts.localLog) + cmd += "\' > /dev/null 2>&1 & " + if (os.getuid() == 0): + cmd = """su - %s -c "%s" """ % (self.opts.user, cmd) + + self.logger.debug( + "Launch asyn collection command for executing %s on (%s:%s)" \ + % (cmd, str(host), str(port))) + status = subprocess.getstatusoutput(cmd)[0] + if status == 0: + self.logger.debug("Successfully launch asyn collection.") + else: + self.logger.debug("Failed to launch asyn collection.") + + def getPreviousDbSize(self): + """ + function: get previous database size + input : NA + output: NA + """ + if (not os.path.isfile(self.opts.databaseSizeFile)): + self.logger.debug( + "The database size file [%s] does not exists." + % self.opts.databaseSizeFile) + return + + lines = g_file.readFile(self.opts.databaseSizeFile) + + if (len(lines) == 0): + self.logger.debug( + "The database size file [%s] is empty." \ + % self.opts.databaseSizeFile) + return + + for line in lines: + if (line.find("total_database_size:") >= 0): + self.opts.databaseSize = int( + line.strip().split(":")[1].strip()) + break + + self.logger.debug( + "The total database size is [%s]." \ + % str(self.opts.databaseSize)) + + def insertNodeStat(self, hostName, port, currTime, lastTime, snapshotId): + """ + function: insert the node stat of all hosts into the cluster + input : hostname, port, currTime, lastTime, snapshotId + output: NA + """ + self.logger.debug( + "Inserting the node stat of all hosts into the cluster.") + dnInsts = [] + insertSql = "" + currTimeTemp = "" + lastTimeTemp = "" + snapshotIdTempNum = 0 + snapshotIdTempStr = "" + try: + if (currTime == ""): + currTimeTemp = "NULL" + else: + currTimeTemp = "'%s'" % currTime + + if (lastTime == ""): + lastTimeTemp = "NULL" + else: + lastTimeTemp = "'%s'" % lastTime + + if (snapshotId == ""): + snapshotIdTempStr = "NULL" + else: + snapshotIdTempNum = int(snapshotId) + + if (snapshotIdTempNum == 0 or snapshotIdTempNum == 2147483647): + snapshotIdTempNum = 1 + else: + snapshotIdTempNum += 1 + + snapshotIdTempStr = str(snapshotIdTempNum) + dnInst = None + for dbNode in self.clusterInfo.dbNodes: + # find a dn instance + if len(dbNode.datanodes) > 0: + dntmpInst = dbNode.datanodes[0] + if dntmpInst.hostname == hostName: + dnInst = dntmpInst + break + + for record in self.recordColumn.keys(): + column = self.recordColumn[record] + insertSql += "INSERT INTO pmk.pmk_snapshot_datanode_stat" \ + " VALUES(" + insertSql += "%s, '%s', '%s', '%s', %s," % ( + column[0], column[1], column[2], column[3], column[4]) + insertSql += "%s, %s, %s, %s, %s," % ( + column[5], column[6], column[7], column[8], column[9]) + insertSql += "%s, %s, %s, %s, %s," % ( + column[10], column[11], column[12], column[13], column[14]) + insertSql += "%s, %s, %s, %s, %s," % ( + column[15], column[16], column[17], column[18], column[19]) + insertSql += "%s, %s, %s, %s, %s," % ( + column[20], column[21], column[22], column[23], column[24]) + insertSql += "%s, %s, %s, %s, %s," % ( + column[25], column[26], column[27], column[28], column[29]) + insertSql += "%s, %s, %s, %s, %s," % ( + column[30], column[31], column[32], column[33], column[34]) + insertSql += "%s, %s, %s, %s, %s," % ( + column[35], column[36], column[37], column[38], column[39]) + insertSql += "%s, %s, %s, %s, %s," % ( + column[40], column[41], column[42], column[43], column[44]) + insertSql += "%s, %s, %s, %s);\n" % ( + column[45], column[46], column[47], column[48]) + + if (insertSql != ""): + startSql = "START TRANSACTION;" + commitSql = "COMMIT;" + tempSql = "INSERT INTO pmk.pmk_snapshot VALUES (%s, %s, %s, " \ + "current_timestamp);\n" % (snapshotIdTempStr, + currTimeTemp, + lastTimeTemp) + updateSql = "UPDATE pmk.pmk_meta_data SET last_snapshot_id" \ + " = %s, last_snapshot_collect_time = %s; " % \ + (snapshotIdTempStr, currTimeTemp) + # execute the insert sql + local_host = DefaultValue.GetHostIpOrName() + error_output = "" + if (self.DWS_mode): + if (local_host == hostName): + (status, result, + error_output1) = ClusterCommand.excuteSqlOnLocalhost( + port, tempSql) + (status, result, + error_output2) = ClusterCommand.excuteSqlOnLocalhost( + port, insertSql) + (status, result, + error_output3) = ClusterCommand.excuteSqlOnLocalhost( + port, updateSql) + else: + currentTime = time.strftime("%Y-%m-%d_%H:%M:%S") + pid = os.getpid() + outputfile = "metadata_%s_%s_%s.json" % ( + hostName, pid, currentTime) + tmpDir = DefaultValue.getTmpDirFromEnv() + filepath = os.path.join(tmpDir, outputfile) + ClusterCommand.executeSQLOnRemoteHost(dnInst.hostname, + dnInst.port, + tempSql, + filepath) + (status, result, + error_output1) = ClusterCommand.getSQLResult( + dnInst.hostname, outputfile) + ClusterCommand.executeSQLOnRemoteHost(dnInst.hostname, + dnInst.port, + insertSql, + filepath) + (status, result, + error_output2) = ClusterCommand.getSQLResult( + dnInst.hostname, outputfile) + ClusterCommand.executeSQLOnRemoteHost(dnInst.hostname, + dnInst.port, + updateSql, + filepath) + (status, result, + error_output3) = ClusterCommand.getSQLResult( + dnInst.hostname, outputfile) + if error_output1 != "": + self.logger.debug( + "Failed to execute SQL: %s" % startSql + + "\nError: \n%s" % str(error_output1)) + raise Exception(ErrorCode.GAUSS_530["GAUSS_53012"] + + "\nError: \n%s\n" \ + % str(error_output1) + + "Please check the log for detail.") + if error_output2 != "": + self.logger.debug( + "Failed to execute SQL: %s" % insertSql + + "\nError: \n%s" % str(error_output2)) + raise Exception(ErrorCode.GAUSS_530["GAUSS_53012"] + + "\nError: \n%s\n" \ + % str(error_output2) + + "Please check the log for detail.") + if error_output3 != "": + self.logger.debug( + "Failed to execute SQL: %s" % insertSql + + "\nError: \n%s" % str(error_output3)) + raise Exception(ErrorCode.GAUSS_530["GAUSS_53012"] + + "\nError: \n%s\n" \ + % str(error_output3) + + "Please check the log for detail.") + else: + sql = startSql + tempSql + insertSql \ + + updateSql + commitSql + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.opts.user, + hostName, port, False, DefaultValue.DEFAULT_DB_NAME) + if status != 0: + self.logger.debug( + "Failed to execute SQL: %s" % sql + + "\nError: \n%s" % str(output)) + raise Exception(ErrorCode.GAUSS_530[ + "GAUSS_53012"] + + "\nError: \n%s\n" % str(output) + + "Please check the log for detail.") + else: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50203"] % ("sql statement")) + + self.logger.debug( + "Successfully inserted the node " + "stat of all host into the cluster.") + except Exception as e: + raise Exception(str(e)) + + def getDWSMode(self): + """ + function: get collect pmk infromation mode + input : NA + output: NA + """ + # get security mode + security_mode_value = DefaultValue.getSecurityMode() + if (security_mode_value == "on"): + self.DWS_mode = True + + def installPMKSchema(self, host, port): + """ + function: install PMK schema + input : NA + output : NA + """ + try: + # install pmk schema + cmd = "%s -t %s -p %s -u %s -c %s -l %s" % ( + OMCommand.getLocalScript("UTIL_GAUSS_STAT"), + self.ACTION_INSTALL_PMK, + self.clusterInfo.appPath, + self.opts.user, + str(port), + self.opts.localLog) + if (self.opts.mpprcFile != ""): + cmd = "source %s; %s" % (self.opts.mpprcFile, cmd) + + if (host != DefaultValue.GetHostIpOrName()): + cmd = "pssh -s -H %s \'%s\'" % (str(host), cmd) + + if (os.getuid() == 0): + cmd = """su - %s -c "%s" """ % (self.opts.user, cmd) + self.logger.debug( + "Install pmk schema command for executing %s on (%s:%s)" % ( + cmd, str(host), str(port))) + self.logger.debug("Command for installing pmk : %s." % cmd) + + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + self.logger.debug("Successfully install pmk schema.") + else: + self.logger.debug("Failed to install pmk schema.") + raise Exception(output) + except Exception as e: + raise Exception(str(e)) + + def dropPMKSchema(self, host, port): + """ + function: drop PMK schema + input : host, port + output : NA + """ + try: + querySql = "DROP SCHEMA IF EXISTS pmk CASCADE;" + local_host = DefaultValue.GetHostIpOrName() + if (self.DWS_mode): + if (host == local_host): + (status, result, + error_output) = ClusterCommand.excuteSqlOnLocalhost( + port, querySql) + else: + currentTime = time.strftime("%Y-%m-%d_%H:%M:%S") + pid = os.getpid() + outputfile = "droppmk_%s_%s_%s.json" \ + % (host, pid, currentTime) + tmpDir = DefaultValue.getTmpDirFromEnv(self.opts.user) + filepath = os.path.join(tmpDir, outputfile) + ClusterCommand.executeSQLOnRemoteHost( + host, port, querySql, filepath) + (status, result, error_output) = \ + ClusterCommand.getSQLResult(host, outputfile) + if (status != 2): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] % querySql \ + + " Error: \n%s" % str(error_output)) + else: + (status, output) = ClusterCommand.remoteSQLCommand( + querySql, self.opts.user, + host, port, + False, DefaultValue.DEFAULT_DB_NAME) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] % querySql \ + + " Error: \n%s" % str(output)) + except Exception as e: + raise Exception(str(e)) + + def checkPMKMetaData(self, host, port): + """ + function: check PMK meta data + input : host, port + output : NA + """ + # check pmk_meta_data + try: + querySql = "SELECT * FROM pmk.pmk_meta_data " \ + "WHERE last_snapshot_collect_time >= " \ + "date_trunc('second', current_timestamp);" + local_host = DefaultValue.GetHostIpOrName() + if (self.DWS_mode): + if (host == local_host): + (status, result, error_output) = \ + ClusterCommand.excuteSqlOnLocalhost(port, querySql) + else: + currentTime = time.strftime("%Y-%m-%d_%H:%M:%S") + pid = os.getpid() + outputfile = "checkPMK%s_%s_%s.json" \ + % (host, pid, currentTime) + tmpDir = DefaultValue.getTmpDirFromEnv(self.opts.user) + filepath = os.path.join(tmpDir, outputfile) + ClusterCommand.executeSQLOnRemoteHost( + host, port, querySql, filepath) + (status, result, error_output) = \ + ClusterCommand.getSQLResult(host, outputfile) + if (status != 2): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql + " Error: \n%s" \ + % str(error_output)) + else: + (status, output) = ClusterCommand.remoteSQLCommand( + querySql, self.opts.user, + host, port, False, DefaultValue.DEFAULT_DB_NAME) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] \ + % querySql \ + + " Error: \n%s" \ + % str(output)) + if (output != ""): + self.logger.debug( + "ERROR: There is a change in system time \ + of Gauss MPPDB host." + \ + " PMK does not support the scenarios\ + related to system time change." + \ + " The value of table \ + pmk.pmk_meta_data is \"%s\"." % output) + # recreate pmk schema + self.dropPMKSchema(host, port) + # install pmk schema + self.installPMKSchema(host, port) + except Exception as e: + raise Exception(str(e)) + + def cleanTempFiles(self): + """ + function: clean temp files + """ + recordTempFilePattern = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + 'recordTempFile_*_*') + g_file.removeFile(recordTempFilePattern) + + sessionCpuTempFilePattern = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + 'sessionCpuTempFile_*_*') + g_file.removeFile(sessionCpuTempFilePattern) + + sessionMemTempFilePattern = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + 'sessionMemTempFile_*_*') + g_file.removeFile(sessionMemTempFilePattern) + + sessionIOTempFilePattern = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + 'sessionIOTempFile_*_*') + g_file.removeFile(sessionIOTempFilePattern) + + sessionCpuTempResultPattern = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + 'sessionCpuTempResult_*_*') + g_file.removeFile(sessionCpuTempResultPattern) + + sessionMemTempResultPattern = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + 'sessionMemTempResult_*_*') + g_file.removeFile(sessionMemTempResultPattern) + + sessionIOTempResultPattern = os.path.join( + DefaultValue.getTmpDirFromEnv(self.opts.user), + 'sessionIOTempResult_*_*') + g_file.removeFile(sessionIOTempResultPattern) + + def CheckPMKPerf(self, outputInfo): + """ + function: check the performance about PMK tool + input : outputInfo + output: NA + """ + self.logger.debug("Checking PMK performance.") + cooInst = None + failedNodes = [] + tmpFiles = [] + try: + # clean all the temp files before start + # collect the performance data + self.cleanTempFiles() + + # Check whether pmk can be done + self.checkClusterStatus() + + nodeNames = self.clusterInfo.getClusterNodeNames() + tmpDir = DefaultValue.getTmpDirFromEnv(self.opts.user) + pid = os.getpid() + for nodeName in nodeNames: + tmpFiles.append(os.path.join(tmpDir, "recordTempFile_%d_%s" % ( + pid, nodeName))) + tmpFiles.append(os.path.join(tmpDir, + "sessionCpuTempFile_%d_%s" % ( + pid, nodeName))) + tmpFiles.append(os.path.join(tmpDir, + "sessionMemTempFile_%d_%s" % ( + pid, nodeName))) + tmpFiles.append(os.path.join(tmpDir, + "sessionIOTempFile_%d_%s" % ( + pid, nodeName))) + tmpFiles.append(os.path.join(tmpDir, + "sessionCpuTempResult_%d_%s" % ( + pid, nodeName))) + tmpFiles.append(os.path.join(tmpDir, + "sessionMemTempResult_%d_%s" % ( + pid, nodeName))) + tmpFiles.append(os.path.join(tmpDir, + "sessionIOTempResult_%d_%s" % ( + pid, nodeName))) + + # get security_mode value from cm_agent conf + self.getDWSMode() + + normalDNs = self.getNormalDatanodes() + hostname = normalDNs[0].hostname + port = normalDNs[0].port + + # install pmk schema + self.installPMKSchema(hostname, port) + + # check pmk_meta_data + self.checkPMKMetaData(hostname, port) + + # get pmk meta data + (pmk_curr_collect_start_time, + pmk_last_collect_start_time, last_snapshot_id) = \ + self.getMetaData(hostname, port) + self.deleteExpiredSnapShots(hostname, port) + + # collect pmk stat + self.collectPMKData(pmk_curr_collect_start_time, + pmk_last_collect_start_time, + last_snapshot_id, port) + + # launch asynchronous collection + self.launchAsynCollection(hostname, port) + + # get database size from previous collection + self.getPreviousDbSize() + + if (not self.DWS_mode): + # get cpu stat of all sessions + self.getAllSessionCpuStat() + # get IO stat of all sessions + self.getAllSessionIOStat() + # get memory stat of all sessions + self.getAllSessionMemoryStat() + # handle session cpu stat of all hosts + self.handleSessionCpuStat(str(hostname)) + # Handle session IO stat of all hosts + self.handleSessionIOStat(str(hostname)) + # handle session memory stat of all hosts + self.handleSessionMemoryStat(str(hostname)) + + # get node stat of all hosts + self.getAllHostsNodeStat() + # get prev node stat of all hosts + self.getAllHostsPrevNodeStat(hostname, port, last_snapshot_id) + # handle the node stat of all hosts + self.handleNodeStat() + # insert the node stat of all hosts into the cluster + self.insertNodeStat(hostname, port, + pmk_curr_collect_start_time, + pmk_last_collect_start_time, last_snapshot_id) + + # display pmk stat + showDetail = "" + if (self.opts.show_detail): + showDetail = "-d" + + cmd = "%s -t %s -p %s -u %s -c %s %s -l %s" \ + % (OMCommand.getLocalScript("UTIL_GAUSS_STAT"), + self.ACTION_DISPLAY_STAT, + self.clusterInfo.appPath, + self.opts.user, + str(port), + showDetail, + self.opts.localLog) + if (self.opts.mpprcFile != ""): + cmd = "source %s; %s" % (self.opts.mpprcFile, cmd) + + if (self.DWS_mode): + cmd += " --dws-mode" + + cmd += " --flag-num=%d" % os.getpid() + + cmd += " --master-host=%s" % DefaultValue.GetHostIpOrName() + + cmd += " --database-size=%s" % str(self.opts.databaseSize) + + if (str(hostname) != DefaultValue.GetHostIpOrName()): + cmd = "pssh -s -H %s \'%s\'" % (str(hostname), cmd) + + if (os.getuid() == 0): + cmd = """su - %s -c "%s" """ % (self.opts.user, cmd) + + self.logger.debug( + "Display pmk stat command for executing %s on (%s:%s)" % \ + (cmd, str(hostname), str(port))) + + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + print("%s\n" % output, end="", file=outputInfo) + self.logger.debug("Successfully display pmk stat.") + else: + self.logger.debug("Failed to display pmk stat.") + raise Exception(output) + + self.logger.debug("Operation succeeded: PMK performance check.") + except Exception as e: + for tmpFile in tmpFiles: + g_file.removeFile(tmpFile) + raise Exception(str(e)) + + def CheckSSDPerf(self, outputInfo): + """ + function: check the performance about SSD + input : outputInfo + output: NA + """ + self.logger.debug("Checking SSD performance.") + # print SSD performance statistics information to output file + print( + "SSD performance statistics information:", + end="", file=outputInfo) + try: + # check SSD + cmd = "%s -t SSDPerfCheck -U %s -l %s" \ + % (OMCommand.getLocalScript("LOCAL_PERFORMANCE_CHECK"), + self.opts.user, self.opts.localLog) + gp_path = os.path.join( + DefaultValue.ROOT_SCRIPTS_PATH, self.opts.user) + (status, output) = self.sshTool.getSshStatusOutput(cmd, + gp_path=gp_path) + outputMap = self.sshTool.parseSshOutput(self.sshTool.hostNames) + for node in status.keys(): + if (status[node] == DefaultValue.SUCCESS): + result = outputMap[node] + print( + " %s:\n%s" % (node, result), + end="", file=outputInfo) + else: + print( + " %s:\n Failed to check SSD performance." \ + " Error: %s" % (node, outputMap[node]), + end="", file=outputInfo) + self.logger.debug("Successfully checked SSD performance.") + except Exception as e: + raise Exception(str(e)) diff --git a/script/impl/checkperf/OLAP/__init__.py b/script/impl/checkperf/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/checkperf/__init__.py b/script/impl/checkperf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/collect/CollectImpl.py b/script/impl/collect/CollectImpl.py new file mode 100644 index 0000000..01cc69b --- /dev/null +++ b/script/impl/collect/CollectImpl.py @@ -0,0 +1,65 @@ +# coding: UTF-8 +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_collector is a utility to collect information +# about the cluster. +############################################################################# +import sys + + +class CollectImpl: + ''' + classdocs + input : NA + output: NA + ''' + + def __init__(self, collectObj): + ''' + function: Constructor + input : collectObj + output: NA + ''' + self.context = collectObj + + def run(self): + ''' + function: main flow + input : NA + output: NA + ''' + try: + self.context.initLogger("gs_collector") + except Exception as e: + self.context.logger.closeLog() + raise Exception(str(e)) + + try: + # Perform a log collection + self.doCollector() + + self.context.logger.closeLog() + except Exception as e: + self.context.logger.logExit(str(e)) + + def doCollector(self): + """ + function: collect information + input : strftime + output: Successfully collected catalog statistics + """ + pass diff --git a/script/impl/collect/OLAP/CollectImplOLAP.py b/script/impl/collect/OLAP/CollectImplOLAP.py new file mode 100644 index 0000000..885f651 --- /dev/null +++ b/script/impl/collect/OLAP/CollectImplOLAP.py @@ -0,0 +1,927 @@ +# coding: UTF-8 +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import sys +import time +import subprocess +import time +import base64 +import json + +sys.path.append(sys.path[0] + "/../../../") +from gspylib.common.Common import DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file +from gspylib.os.gsplatform import g_Platform +from gspylib.os.gsOSlib import g_OSlib +from impl.collect.CollectImpl import CollectImpl + + +class CollectImplOLAP(CollectImpl): + """ + The class is used to do perform collect log files. + """ + + def __init__(self, collectObj): + """ + function: Constructor + input : collectObj + output: NA + """ + self.jobInfo = {} + self.nodeJobInfo = {} + super(CollectImplOLAP, self).__init__(collectObj) + + def parseConfigFile(self): + """ + function: Parsing configuration files + input : NA + output: NA + """ + try: + # Init the cluster information + self.context.initClusterInfoFromStaticFile(self.context.user) + self.context.appPath = self.context.clusterInfo.appPath + + # Obtain the cluster installation directory owner and group + (self.context.user, self.context.group) = g_OSlib.getPathOwner( + self.context.appPath) + if (self.context.user == "" or self.context.group == ""): + self.context.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50308"]) + + # Match the corresponding node + for nodename in self.context.nodeName: + if not self.context.clusterInfo.getDbNodeByName(nodename): + self.context.logger.logExit( + ErrorCode.GAUSS_516["GAUSS_51619"] % nodename) + + if (len(self.context.nodeName) == 0): + self.context.nodeName = \ + self.context.clusterInfo.getClusterNodeNames() + + self.context.initSshTool(self.context.nodeName, + DefaultValue.TIMEOUT_PSSH_COLLECTOR) + if (len(self.context.nodeName) == 1 and self.context.nodeName[ + 0] == DefaultValue.GetHostIpOrName()): + self.context.localMode = True + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Successfully parsed the configuration file.") + + # python will remove Single quotes + # and double quote when we pass parameter from outside + # we use # to replace double quote + def formatJsonString(self, check): + """ + function: format sonString + input : string + output: json string + """ + if (self.context.isSingle or self.context.localMode): + return "\'" + json.dumps(check).replace("\"", "#") + "\'" + else: + return "\'" \ + + \ + json.dumps(check).replace("$", "\$").replace("\"", "#") \ + + "\'" + + def checkTmpDir(self): + """ + function: Check tmp dir, if tmp dir not exist, create it + input : TmpDirFromEnv + output: NA + """ + try: + # Create a temporary file + tmpDir = DefaultValue.getTmpDirFromEnv() + cmd = "(if [ ! -d '%s' ];then mkdir -p '%s' -m %s;fi)" \ + % (tmpDir, tmpDir, DefaultValue.KEY_DIRECTORY_MODE) + DefaultValue.execCommandWithMode( + cmd, + "Check temporary directory", + self.context.sshTool, + self.context.isSingle or self.context.localMode, + self.context.mpprcFile) + except Exception as e: + self.context.logger.logExit(str(e)) + + def checkCommand(self): + """ + function: check command + output: Successfully command exists + """ + self.context.logger.log("check rsync command.") + # Check the system information on each node + cmd = "source %s; %s -t check_command -U %s -S %d -l %s" % ( + self.context.mpprcFile, + OMCommand.getLocalScript("Local_Collect"), + self.context.user, + self.context.speedLimitFlag, + self.context.localLog) + flag = 0 + failedNodeList = [] + if (self.context.isSingle or self.context.localMode): + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + flag = 1 + self.context.logger.log("The cmd is %s " % cmd) + self.context.logger.logExit( + "rsync command not found on %s. " + "Error:\n%s\n--speed-limit parameters cannot be used" % \ + (self.context.nodeName[0], output)) + else: + (status, output) = self.context.sshTool.getSshStatusOutput( + cmd, self.context.nodeName) + self.context.sshTool.parseSshOutput(self.context.nodeName) + # Gets the execution result + for node in self.context.nodeName: + if (status[node] != DefaultValue.SUCCESS): + flag = 1 + failedNodeList.append(node) + if flag == 0: + self.context.logger.log("Successfully check rsync command.") + else: + self.context.logger.logExit( + "rsync command not found on hosts: %s.\n " + "--speed-limit parameters cannot be used " + % str(failedNodeList)) + + def createStoreDir(self): + """ + :return: + """ + resultdir = "" + # Gets the current time + currentTime = time.strftime("%Y%m%d_%H%M%S") + if (self.context.outFile is not None and self.context.outFile != ""): + # rm the tmpdir + resultdir = self.context.outFile + else: + # rm the tmpdir + resultdir = DefaultValue.getTmpDirFromEnv() + + cmd = \ + "if [ -d '%s'/collector_tmp_* ];" \ + "then rm -rf '%s'/collector_tmp_*; fi" % ( + resultdir, resultdir) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % output) + # Get the default path + targetdir = "%s/collector_tmp_%s" % (resultdir, currentTime) + self.context.outFile = "%s/collector_%s" % (targetdir, currentTime) + # Create a folder to store log information + g_file.createDirectory(self.context.outFile) + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, + self.context.outFile, True) + return (currentTime, targetdir, resultdir) + + def createDir(self): + """ + function: create Dir + output: Successfully create dir + """ + self.context.logger.log("create Dir.") + # Check the system information on each node + cmd = "source %s; %s -t create_dir -U %s -l %s" % ( + self.context.mpprcFile, + OMCommand.getLocalScript("Local_Collect"), + self.context.user, + self.context.localLog) + flag = 0 + if (self.context.isSingle or self.context.localMode): + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + flag = 1 + self.context.logger.log("The cmd is %s " % cmd) + self.context.logger.log( + "Failed to create dir on %s. Error:\n%s" % \ + (self.context.nodeName[0], output)) + else: + (status, output) = self.context.sshTool.getSshStatusOutput( + cmd, + self.context.nodeName) + outputMap = self.context.sshTool.parseSshOutput( + self.context.nodeName) + # Gets the execution result + for node in self.context.nodeName: + if (status[node] != DefaultValue.SUCCESS): + flag = 1 + self.context.logger.log( + "Failed to create dir on %s. Error:\n%s" % \ + (node, str(outputMap[node]))) + if (flag == 0): + self.context.logger.log("Successfully create dir.") + + def printSummaryInfo(self, resultdir, currentTime): + maxNamelen = len("SUCCESS HOSTNAME") + maxJoblen = len("success") + jobCount = 0 + jobNameList = [] + tag = "" + info = "" + + for host in self.context.nodeName: + maxNamelen = max(maxNamelen, len(host)) + for jobName, jobInfo in self.jobInfo.items(): + subJobCount = len(jobInfo) + jobCount += subJobCount + while subJobCount > 0: + job = "%s-%s" % (jobName, str(subJobCount)) + maxJoblen = max(maxJoblen, len(job)) + jobNameList.append(job) + subJobCount -= 1 + maxJoblen = maxJoblen + 4 + maxNamelen = maxNamelen + 4 + + title = "%s%s%s%s%s%s%s" % ("|", "TASK NAME".center(maxJoblen), "|", + "SUCCESS HOSTNAME".center(maxNamelen), "|", + "FAILED HOSTNAME".center(maxNamelen), "|") + index = len(title) + while index > 0: + tag = "%s%s" % (tag, "-") + index -= 1 + info = "%s%s%s" % (info, tag, "\n") + info = "%s%s%s%s%s%s%s%s" % ( + info, "|", "".center(maxJoblen), "|", "".center(maxNamelen), "|", + "".center(maxNamelen), "|\n") + info = "%s%s%s" % (info, title, "\n") + info = "%s%s%s%s%s%s%s%s" % ( + info, "|", "".center(maxJoblen), "|", "".center(maxNamelen), "|", + "".center(maxNamelen), "|\n") + info = "%s%s%s" % (info, tag, "\n") + for job in jobNameList: + jobName = str(job.split("-")[0]) + i = int(job.split("-")[1]) + len_s = len(self.jobInfo[jobName][i - 1]["successNodes"]) + len_f = len(self.jobInfo[jobName][i - 1]["failedNodes"]) + if len_s >= len_f: + self.jobInfo[jobName][i - 1]["failedNodes"] += [None] * ( + len_s - len_f) + else: + self.jobInfo[jobName][i - 1]["successNodes"] += [None] * ( + len_f - len_s) + + isInitTitle = 0 + for s, f in zip(self.jobInfo[jobName][i - 1]["successNodes"], + self.jobInfo[jobName][i - 1]["failedNodes"]): + if isInitTitle == 1: + job = "" + if str(s) == "None": + s = "" + if str(f) == "None": + f = "" + info = "%s%s%s%s%s%s%s%s%s" % ( + info, "|", job.ljust(maxJoblen), "|", + str(s).center(maxNamelen), "|", str(f).center(maxNamelen), + "|", + "\n") + isInitTitle = 1 + info = "%s%s%s" % (info, tag, "\n") + + cmd = " echo '%s\n' >> %s/collector_tmp_%s/collector_%s/Summary.log" \ + % ( + info, resultdir, currentTime, currentTime) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + print("Generate Summary Info Failed.") + self.context.logger.debug("The cmd is %s " % cmd) + self.context.logger.debug( + "Generate Summary Info Failed %s." % output) + + def printDetailSummaryInfo(self, resultdir, currentTime): + statusFiedLen = len("SuccessfulTask") + 4 + for node, jobList in self.nodeJobInfo.items(): + for job in jobList: + jsonJob = json.loads(job) + successLen = 0 + jobName = jsonJob["jobName"] + successInfoList = [] + failedInfolist = [] + Info = "" + tag = [] + successTask = jsonJob["successTask"] + + for i in range(0, len(successTask), 5): + Task = "; ".join(successTask[i: i + 5]) + successLen = max(successLen, len(Task)) + successInfoList.append(Task) + + failedLen = 0 + for failedJob, reason in jsonJob["failedTask"].items(): + failedInfo = failedJob + ": " + reason + failedLen = max(len(failedInfo), failedLen) + failedInfolist.append(failedInfo) + + title = "%s - %s - %s" % ( + node, jobName, "Success" if failedLen == 0 else "Failed") + taskMaxLen = max(failedLen + 4 + 2, successLen + 4 + 2) + maxLen = max(taskMaxLen, len(title)) + titleLen = maxLen + statusFiedLen + 1 + totalLen = titleLen + 2 + i = 0 + while i < totalLen: + tag.append("-") + i += 1 + Info = "%s%s%s" % (Info, "".join(tag), "\n") + Info = "%s%s%s%s%s" % ( + Info, "|", " ".center(titleLen), "|", "\n") + Info = "%s%s%s%s%s" % ( + Info, "|", title.center(titleLen), "|", "\n") + Info = "%s%s%s%s%s" % ( + Info, "|", " ".center(titleLen), "|", "\n") + Info = "%s%s%s" % (Info, "".join(tag), "\n") + for s in successInfoList: + Info = "%s%s%s%s%s%s%s" % ( + Info, "|", "SuccessfulTask".center(statusFiedLen), "|", + s.center(maxLen), "|", "\n") + Info = "%s%s%s" % (Info, "".join(tag), "\n") + for f in failedInfolist: + Info = "%s%s%s%s%s%s%s" % ( + Info, "|", "FailedTask".center(statusFiedLen), "|", + f.center(maxLen), "|", "\n") + Info = "%s%s%s" % (Info, "".join(tag), "\n") + Info = "%s%s" % (Info, "\n\n") + cmd = \ + " echo '%s' " \ + ">> %s/collector_tmp_%s/collector_%s/Detail.log" % ( + Info, resultdir, currentTime, currentTime) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + print("Generate Detail Summary Info Failed") + self.context.logger.debug("The cmd is %s " % cmd) + self.context.logger.debug( + "Generate Detail Summary Info Failed %s." % output) + + def generalJobInfo(self, jobName, nodeList): + if self.jobInfo.__contains__(jobName): + self.jobInfo[jobName].append(nodeList) + else: + nodes = [nodeList] + self.jobInfo[jobName] = nodes + + def generalDetailInfo(self, nodeName, job): + if self.nodeJobInfo.__contains__(nodeName): + self.nodeJobInfo[nodeName].append(job) + else: + jobList = [job] + self.nodeJobInfo[nodeName] = jobList + + def generalSummary(self, resultdir, currentTime): + self.printSummaryInfo(resultdir, currentTime) + self.printDetailSummaryInfo(resultdir, currentTime) + + def resultCheck(self, output): + isFailed = 0 + nodeList = {} + successNodeList = [] + failedNodeList = [] + jobName = "UNKNOWN" + try: + if self.context.isSingle or self.context.localMode: + if len(json.loads(output)["failedTask"]) > 0: + isFailed = 1 + failedNodeList.append(self.context.nodeName[0]) + else: + successNodeList.append(self.context.nodeName[0]) + self.generalDetailInfo(self.context.nodeName[0], output) + jobName = json.loads(output)["jobName"] + else: + for node in self.context.nodeName: + if len(json.loads(str(output[node]))["failedTask"]) > 0: + isFailed = 1 + failedNodeList.append(node) + else: + successNodeList.append(node) + self.generalDetailInfo(node, str(output[node])) + jobName = json.loads(output[node])["jobName"] + nodeList["successNodes"] = successNodeList + nodeList["failedNodes"] = failedNodeList + + self.generalJobInfo(jobName, nodeList) + return isFailed + except Exception as e: + self.context.logger.debug("check result failed %s." % str(e)) + return 1 + + def planResultCheck(self, output): + isFailed = 1 + nodeList = {} + successNodeList = [] + failedNodeList = [] + jobName = "UNKNOWN" + try: + if self.context.isSingle or self.context.localMode: + if len(json.loads(output)["failedTask"]) == 0: + isFailed = 0 + successNodeList.append(self.context.nodeName[0]) + else: + failedNodeList.append(self.context.nodeName[0]) + + self.generalDetailInfo(self.context.nodeName[0], output) + jobName = json.loads(output)["jobName"] + else: + for node in self.context.nodeName: + if len(json.loads(str(output[node]))["failedTask"]) == 0: + isFailed = 0 + successNodeList.append(node) + else: + failedNodeList.append(node) + + self.generalDetailInfo(node, str(output[node])) + jobName = json.loads(output[node])["jobName"] + nodeList["successNodes"] = successNodeList + nodeList["failedNodes"] = failedNodeList + self.generalJobInfo(jobName, nodeList) + return isFailed + except Exception as e: + self.context.logger.debug("check plan result failed %s." % str(e)) + return 1 + + def systemCheck(self, sysInfo): + """ + function: collected OS information + output: Successfully collected OS information + """ + self.context.logger.log("Collecting OS information.") + # Check the system information on each node + cmd = "source %s; %s -t system_check -U %s -l %s -C %s" % ( + self.context.mpprcFile, + OMCommand.getLocalScript("Local_Collect"), + self.context.user, + self.context.localLog, + self.formatJsonString(sysInfo)) + + if (self.context.isSingle or self.context.localMode): + output = subprocess.getstatusoutput(cmd)[1] + flag = self.resultCheck(output) + else: + self.context.sshTool.getSshStatusOutput( + cmd, + self.context.nodeName) + outputMap = self.context.sshTool.parseSshOutput( + self.context.nodeName) + # Gets the execution result + flag = self.resultCheck(outputMap) + if (flag == 0): + self.context.logger.log("Successfully collected OS information.") + else: + self.context.logger.log("The cmd is %s " % cmd) + self.context.logger.log("Failed to collect OS information.") + + def databaseCheck(self, data): + """ + function: collected catalog informatics + output: Successfully collected catalog statistics. + """ + self.context.logger.log("Collecting catalog statistics.") + # Collect catalog statistics on each node + cmd = "source %s; %s -t database_check -U %s -l %s -C %s" % ( + self.context.mpprcFile, + OMCommand.getLocalScript("Local_Collect"), + self.context.user, + self.context.localLog, + self.formatJsonString(data)) + + if (self.context.isSingle or self.context.localMode): + output = subprocess.getstatusoutput(cmd)[1] + flag = self.resultCheck(output) + else: + self.context.sshTool.getSshStatusOutput( + cmd, + self.context.nodeName) + outputMap = self.context.sshTool.parseSshOutput( + self.context.nodeName) + # Gets the execution result + flag = self.resultCheck(outputMap) + if (flag == 0): + self.context.logger.log( + "Successfully collected catalog statistics.") + else: + self.context.logger.log("The cmd is %s " % cmd) + self.context.logger.log("Failed collected catalog statistics.") + + def logCopy(self, log, l): + """ + function: collected log files + output: Successfully collected log files + """ + self.context.logger.log("Collecting %s files." % log) + # Copy the log information on each node + self.context.keyword = base64.b64encode( + bytes(self.context.keyword, 'utf-8')).decode() + + cmd = \ + "source %s; " \ + "%s -t %s -U %s -b '%s' -e '%s' -k '%s' -l %s -s %d -S %d -C %s" \ + % (self.context.mpprcFile, + OMCommand.getLocalScript("Local_Collect"), + "log_copy" if log == "Log" else ( + "xlog_copy" if log == "XLog" else "core_copy"), + self.context.user, + self.context.begintime, + self.context.endtime, + self.context.keyword, + self.context.localLog, + # For local collection, + # use the max speed limit, as all nodes do it individually. + self.context.speedLimit * 1024, + self.context.speedLimitFlag, + self.formatJsonString(l) + ) + + if (self.context.isSingle or self.context.localMode): + output = subprocess.getstatusoutput(cmd)[1] + flag = self.resultCheck(output) + else: + timeout = int( + self.context.LOG_SIZE_PER_DAY_ONE_NODE + * self.context.duration // self.context.speedLimit \ + + self.context.LOG_SIZE_PER_DAY_ONE_NODE + * self.context.duration // self.context.TAR_SPEED) + # The timeout value should be in [10 min, 1 hour] + if (timeout < DefaultValue.TIMEOUT_PSSH_COLLECTOR): + timeout = DefaultValue.TIMEOUT_PSSH_COLLECTOR + elif (timeout > 3600): + timeout = 3600 + self.context.sshTool.setTimeOut(timeout) + self.context.logger.debug( + "Collection will be timeout in %ds." % timeout) + self.context.sshTool.getSshStatusOutput( + cmd, + self.context.nodeName) + outputMap = self.context.sshTool.parseSshOutput( + self.context.nodeName) + # Gets the execution result + flag = self.resultCheck(outputMap) + if (flag == 0): + self.context.logger.log("Successfully collected %s files." % log) + else: + self.context.logger.log("The cmd is %s " % cmd) + self.context.logger.log("Failed collected %s files." % log) + + def confGstack(self, check, s): + """ + function: collected configuration files and processed stack information + output: Successfully collected configuration files + and processed stack information. + """ + self.context.logger.log("Collecting %s files." % s["TypeName"]) + # Collect configuration files + # and process stack information on each node + cmd = "source %s; %s -t %s -U %s -l %s -C %s" % ( + self.context.mpprcFile, + OMCommand.getLocalScript("Local_Collect"), + check, + self.context.user, + self.context.localLog, + self.formatJsonString(s)) + + if (self.context.isSingle or self.context.localMode): + output = subprocess.getstatusoutput(cmd)[1] + flag = self.resultCheck(output) + else: + self.context.sshTool.getSshStatusOutput( + cmd, + self.context.nodeName) + outputMap = self.context.sshTool.parseSshOutput( + self.context.nodeName) + # Gets the execution result + flag = self.resultCheck(outputMap) + if (flag == 0): + self.context.logger.log( + "Successfully collected %s files." % s["TypeName"]) + else: + self.context.logger.log("The cmd is %s " % cmd) + self.context.logger.log( + "Failed collected %s files." % s["TypeName"]) + + def planSimulator(self, data): + """ + function: collect plan simulator files + output: Successfully collected files. + """ + self.context.logger.log("Collecting plan simulator statistics.") + # Collect plan simulator on each node + cmd = "source %s; %s -t plan_simulator_check -U %s -l %s -C %s" % ( + self.context.mpprcFile, + OMCommand.getLocalScript("Local_Collect"), + self.context.user, + self.context.localLog, + self.formatJsonString(data)) + + if (self.context.isSingle or self.context.localMode): + output = subprocess.getstatusoutput(cmd)[1] + flag = self.planResultCheck(output) + else: + (status, output) = self.context.sshTool.getSshStatusOutput( + cmd, + self.context.nodeName) + outputMap = self.context.sshTool.parseSshOutput( + self.context.nodeName) + # Gets the execution result + flag = self.planResultCheck(outputMap) + if (flag == 0): + self.context.logger.log("Successfully collected plan simulator.") + else: + self.context.logger.log("The cmd is %s " % cmd) + self.context.logger.log("Failed collected plan simulator.") + + def copyFile(self): + """ + function: collected result files + output: Successfully collected files. + """ + self.context.logger.log("Collecting files.") + # Collect result files on each node + cmd = "source %s; %s -t copy_file -U %s -o %s -h %s -l %s" % ( + self.context.mpprcFile, + OMCommand.getLocalScript("Local_Collect"), + self.context.user, + self.context.outFile, + DefaultValue.GetHostIpOrName(), + self.context.localLog) + + flag = 0 + if (self.context.isSingle or self.context.localMode): + cmd = cmd + (" -s %d" % self.context.speedLimit * 1024) + cmd = cmd + (" -S %d" % self.context.speedLimitFlag) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + flag = 0 + self.context.logger.log("The cmd is %s " % cmd) + self.context.logger.log( + "Failed to collect files on %s. Error:\n%s" % \ + (self.context.nodeName[0], output)) + else: + flag = 1 + else: + parallelNum = DefaultValue.getCpuSet() + if (len(self.context.nodeName) < parallelNum): + parallelNum = len(self.context.nodeName) + + speedLimitEachNodeKBs = int( + self.context.speedLimit * 1024 // parallelNum) + + # In parallel mode, + # set a bandwidth to collect log files from other nodes + # to avoid too much IO for net card, which is risky for CM things. + cmd = cmd + (" -s %d" % speedLimitEachNodeKBs) + cmd = cmd + (" -S %d" % self.context.speedLimitFlag) + + # The timeout value to remote copy. + timeout = self.context.LOG_SIZE_PER_DAY_ONE_NODE \ + * self.context.duration * 1024 // speedLimitEachNodeKBs + # The timeout value should be in [10 min, 1 hour] + if (timeout < DefaultValue.TIMEOUT_PSSH_COLLECTOR): + timeout = DefaultValue.TIMEOUT_PSSH_COLLECTOR + elif (timeout > 3600): + timeout = 3600 + self.context.sshTool.setTimeOut(timeout) + self.context.logger.debug( + "Copy logs will be timeout in %ds." % timeout) + (status, output) = self.context.sshTool.getSshStatusOutput( + cmd, + self.context.nodeName, + parallel_num=parallelNum) + self.context.sshTool.parseSshOutput( + self.context.nodeName) + # Gets the execution result + for node in self.context.nodeName: + if (status[node] == DefaultValue.SUCCESS): + flag = 1 + + if (flag == 0): + self.context.logger.log( + "Failed to collect files: All collection tasks failed") + else: + self.context.logger.log("Successfully collected files.") + + def tarResultFiles(self, currentTime, targetdir, resultdir): + """ + :return: + """ + # tar the result and delete directory + try: + # tar the result and delete directory + tarFile = "collector_%s.tar.gz" % currentTime + destDir = "collector_%s" % currentTime + cmd = "%s && %s" % (g_Platform.getCdCmd(targetdir), + g_Platform.getCompressFilesCmd(tarFile, + destDir)) + cmd += " && %s" % g_Platform.getChmodCmd( + str(DefaultValue.KEY_FILE_MODE), tarFile) + cmd += " && %s" % g_Platform.getMoveFileCmd(tarFile, "../") + cmd += " && %s '%s'" % ( + g_Platform.getRemoveCmd("directory"), targetdir) + DefaultValue.execCommandLocally(cmd) + self.context.logger.log( + "All results are stored in %s/collector_%s.tar.gz." % ( + resultdir, currentTime)) + except Exception as e: + raise Exception(str(e)) + + def getCycle(self, sysInfo): + """ + function: parse interval and count + input : sysInfo + output: count, interval + """ + interval = 0 + if sysInfo.__contains__('Interval'): + interval = int(sysInfo['Interval'].replace(" ", "")) + count = int(sysInfo['Count'].replace(" ", "")) + return interval, count + + def doCollector(self): + """ + function: collect information + input : strftime + output: Successfully collected catalog statistics + """ + # Parsing configuration files + self.parseConfigFile() + + # check rsync command + if self.context.speedLimitFlag == 1: + self.checkCommand() + + # check tmp directory + self.checkTmpDir() + + self.createDir() + # create store dir + (currentTime, targetdir, resultdir) = self.createStoreDir() + + # collect OS information + if self.context.config.__contains__('System'): + sysList = self.context.config['System'] + for sysInfo in sysList: + if sysInfo.__contains__('Count'): + (interval, count) = self.getCycle(sysInfo) + print("do system check interval %s : count %s" % ( + str(interval), str(count))) + while count: + count -= 1 + self.systemCheck(sysInfo) + if count > 0 and interval > 0: + time.sleep(interval) + else: + self.systemCheck(sysInfo) + + # collect catalog statistics + if self.context.config.__contains__('Database'): + dataList = self.context.config['Database'] + for data in dataList: + if data.__contains__('Count'): + (interval, count) = self.getCycle(data) + print("do database check interval %s : count %s" % ( + str(interval), str(count))) + while count: + count -= 1 + self.databaseCheck(data) + if count > 0 and interval > 0: + time.sleep(interval) + else: + self.databaseCheck(data) + + # Collect log files + if self.context.config.__contains__('Log'): + logList = self.context.config['Log'] + for l in logList: + if l.__contains__('Count'): + (interval, count) = self.getCycle(l) + print("do log check interval %s : count %s" % ( + str(interval), str(count))) + if count > 1: + self.context.logger.log( + ErrorCode.GAUSS_512["GAUSS_51246"] % "Log") + count = 1 + while count: + count -= 1 + self.logCopy("Log", l) + if count > 0 and interval > 0: + time.sleep(interval) + else: + self.logCopy("Log", l) + + # Collect xlog files + if self.context.config.__contains__('XLog'): + xloglist = self.context.config['XLog'] + for l in xloglist: + if l.__contains__('Count'): + (interval, count) = self.getCycle(l) + print("do XLog check " + str(interval) + ":" + str(count)) + if count > 1: + self.context.logger.log( + ErrorCode.GAUSS_512["GAUSS_51246"] % "XLog") + count = 1 + while count: + count -= 1 + self.logCopy("XLog", l) + if count > 0 and interval > 0: + time.sleep(interval) + else: + self.logCopy("XLog", l) + + # CoreDump files + if self.context.config.__contains__('CoreDump'): + corelist = self.context.config['CoreDump'] + for l in corelist: + if l.__contains__('Count'): + (interval, count) = self.getCycle(l) + print("do CoreDump check " + str(interval) + ":" + str( + count)) + if count > 1: + self.context.logger.log( + ErrorCode.GAUSS_512["GAUSS_51246"] % "CoreDump") + count = 1 + while count: + count -= 1 + self.logCopy("CoreDump", l) + if count > 0 and interval > 0: + time.sleep(interval) + else: + self.logCopy("CoreDump", l) + + # collect configuration files + if self.context.config.__contains__('Config'): + clist = self.context.config['Config'] + for c in clist: + if c.__contains__('Count'): + (interval, count) = self.getCycle(c) + print("do Config check " + str(interval) + ":" + str( + count)) + if count > 1: + self.context.logger.log( + ErrorCode.GAUSS_512["GAUSS_51246"] % "Config") + count = 1 + while count: + count -= 1 + self.confGstack("Config", c) + if count > 0 and interval > 0: + time.sleep(interval) + else: + self.confGstack("Config", c) + + # process stack information + if self.context.config.__contains__('Gstack'): + stacklist = self.context.config['Gstack'] + for s in stacklist: + if s.__contains__('Count'): + (interval, count) = self.getCycle(s) + print("do Gstack check " + str(interval) + ":" + str( + count)) + + while count: + count -= 1 + self.confGstack("Gstack", s) + if count > 0 and interval > 0: + time.sleep(interval) + else: + self.confGstack("Gstack", s) + + # collect configuration files and process stack information + if self.context.config.__contains__('Trace'): + print("do config check") + + # collect plan simulator files + if self.context.config.__contains__('Plan'): + dbList = self.context.config['Plan'] + for s in dbList: + if s.__contains__('Count'): + (interval, count) = self.getCycle(s) + print("do Plan check " + str(interval) + ":" + str(count)) + + while count: + count -= 1 + self.planSimulator(s) + if count > 0 and interval > 0: + time.sleep(interval) + else: + self.planSimulator(s) + + # Collect result files + self.copyFile() + + # generate summary info + self.generalSummary(resultdir, currentTime) + + # tar the result and delete directory + self.tarResultFiles(currentTime, targetdir, resultdir) diff --git a/script/impl/collect/OLAP/__init__.py b/script/impl/collect/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/collect/__init__.py b/script/impl/collect/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/dropnode/DropnodeImpl.py b/script/impl/dropnode/DropnodeImpl.py new file mode 100644 index 0000000..c314a0f --- /dev/null +++ b/script/impl/dropnode/DropnodeImpl.py @@ -0,0 +1,797 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : DropnodeImpl.py +############################################################################# + +import subprocess +import sys +import re +import os +import pwd +import datetime +import grp +import socket +import stat + +sys.path.append(sys.path[0] + "/../../../../") +from gspylib.threads.SshTool import SshTool +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.GaussLog import GaussLog +from gspylib.inspection.common.SharedFuncs import cleanFile +from gspylib.inspection.common.Exception import CheckException, \ + SQLCommandException + +sys.path.append(sys.path[0] + "/../../../lib/") +DefaultValue.doConfigForParamiko() +import paramiko + +# mode +MODE_PRIMARY = "primary" +MODE_STANDBY = "standby" +MODE_NORMAL = "normal" + +SWITCHOVER_FILE = "/switchover" +FAILOVER_FILE = "/failover" +PROMOTE_FILE = "/promote" + +# db state +STAT_NORMAL = "normal" + +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 + +# status failed +STATUS_FAIL = "Failure" + + +class DropnodeImpl(): + """ + class for drop a standby node. + step: + 1. check whether all standby can be reached or the switchover/failover is happening + 2. shutdown the program of the target node if it can be reached + 3. flush the configuration on all nodes if it is still a HA cluster + 4. flush the configuration on primary if it is the only one left + """ + + def __init__(self, dropnode): + """ + """ + self.context = dropnode + self.user = self.context.user + self.userProfile = self.context.userProfile + self.group = self.context.group + self.backupFilePrimary = '' + self.localhostname = DefaultValue.GetHostIpOrName() + self.logger = self.context.logger + self.resultDictOfPrimary = [] + self.replSlot = '' + envFile = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH") + if envFile: + self.envFile = envFile + else: + self.envFile = "/etc/profile" + gphomepath = DefaultValue.getEnv("GPHOME") + if gphomepath: + self.gphomepath = gphomepath + else: + (status, output) = subprocess.getstatusoutput("which gs_om") + if "no gs_om in" in output: + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GPHOME") + self.gphomepath = os.path.normpath(output.replace("/gs_om", "")) + self.appPath = self.context.clusterInfo.appPath + self.gsql_path = "source %s;%s/bin/gsql" % (self.userProfile, self.appPath) + + currentTime = str(datetime.datetime.now()).replace(" ", "_").replace( + ".", "_") + self.dnIdForDel = [] + for hostDelName in self.context.hostMapForDel.keys(): + self.dnIdForDel += self.context.hostMapForDel[hostDelName]['dn_id'] + self.commonOper = OperCommon(dropnode) + + def change_user(self): + if os.getuid() == 0: + user = self.user + try: + pw_record = pwd.getpwnam(user) + except CheckException: + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50300"] % user) + user_uid = pw_record.pw_uid + user_gid = pw_record.pw_gid + os.setgid(user_gid) + os.setuid(user_uid) + + def checkAllStandbyState(self): + """ + check all standby state whether switchover is happening + """ + for hostNameLoop in self.context.hostMapForExist.keys(): + sshtool_host = SshTool([hostNameLoop]) + for i in self.context.hostMapForExist[hostNameLoop]['datadir']: + # check whether switchover/failover is happening + self.commonOper.checkStandbyState(hostNameLoop, i, + sshtool_host, + self.userProfile) + self.cleanSshToolFile(sshtool_host) + + for hostNameLoop in self.context.hostMapForDel.keys(): + if hostNameLoop not in self.context.failureHosts: + sshtool_host = SshTool([hostNameLoop]) + for i in self.context.hostMapForDel[hostNameLoop]['datadir']: + # check whether switchover/failover is happening + self.commonOper.checkStandbyState(hostNameLoop, i, + sshtool_host, + self.userProfile, True) + self.commonOper.stopInstance(hostNameLoop, sshtool_host, i, + self.userProfile) + cmdDelCert = "ls %s/share/sslcert/grpc/* | " \ + "grep -v openssl.cnf | xargs rm -rf" % self.appPath + result, output = sshtool_host.getSshStatusOutput(cmdDelCert, + [hostNameLoop], self.userProfile) + if result[hostNameLoop] != 'Success': + self.logger.debug(output) + self.logger.log("[gs_dropnode]Failed to delete the GRPC " + "sslcert of %s." % hostNameLoop) + self.logger.log("[gs_dropnode]Please check and delete the " + "GRPC sslcert of %s manually." % hostNameLoop) + self.cleanSshToolFile(sshtool_host) + else: + self.logger.log("[gs_dropnode]Cannot connect %s. Please check " + "and delete the GRPC sslcert of %s manually." + % (hostNameLoop, hostNameLoop)) + + def dropNodeOnAllHosts(self): + """ + drop the target node on the other host + """ + for hostNameLoop in self.context.hostMapForExist.keys(): + sshtool_host = SshTool([hostNameLoop]) + # backup + backupfile = self.commonOper.backupConf( + self.gphomepath, self.user, + hostNameLoop, self.userProfile, sshtool_host) + self.logger.log( + "[gs_dropnode]The backup file of " + hostNameLoop + " is " + backupfile) + if hostNameLoop == self.localhostname: + self.backupFilePrimary = backupfile + indexForuse = 0 + for i in self.context.hostMapForExist[hostNameLoop]['datadir']: + # parse + resultDict = self.commonOper.parseConfigFile(hostNameLoop, i, + self.dnIdForDel, + self.context.hostIpListForDel, + sshtool_host, + self.envFile) + resultDictForRollback = self.commonOper.parseBackupFile( + hostNameLoop, backupfile, + self.context.hostMapForExist[hostNameLoop][ + 'dn_id'][indexForuse], + resultDict['replStr'], sshtool_host, + self.envFile) + if hostNameLoop == self.localhostname: + self.resultDictOfPrimary.append(resultDict) + # try set + try: + self.commonOper.SetPgsqlConf(resultDict['replStr'], + hostNameLoop, + resultDict['syncStandbyStr'], + sshtool_host, + self.userProfile, + self.context.hostMapForExist[ + hostNameLoop]['port'][ + indexForuse], + '', + self.context.flagOnlyPrimary) + except ValueError: + self.logger.log("[gs_dropnode]Rollback pgsql process.") + self.commonOper.SetPgsqlConf(resultDict['replStr'], + hostNameLoop, + resultDict['syncStandbyStr'], + sshtool_host, + self.userProfile, + self.context.hostMapForExist[ + hostNameLoop]['port'][ + indexForuse], + resultDictForRollback[ + 'rollbackReplStr']) + try: + repl_slot = self.commonOper.get_repl_slot(hostNameLoop, + sshtool_host, self.userProfile, self.gsql_path, + self.context.hostMapForExist[hostNameLoop]['port'][ + indexForuse]) + self.commonOper.SetReplSlot(hostNameLoop, sshtool_host, + self.userProfile, self.gsql_path, + self.context.hostMapForExist[ + hostNameLoop]['port'][indexForuse + ], self.dnIdForDel, repl_slot) + except ValueError: + self.logger.log("[gs_dropnode]Rollback replslot") + self.commonOper.SetReplSlot(hostNameLoop, sshtool_host, + self.userProfile, self.gsql_path, + self.context.hostMapForExist[ + hostNameLoop]['port'][indexForuse + ], self.dnIdForDel, repl_slot, True) + indexForuse += 1 + self.cleanSshToolFile(sshtool_host) + + def operationOnlyOnPrimary(self): + """ + operation only need to be executed on primary node + """ + for hostNameLoop in self.context.hostMapForExist.keys(): + sshtool_host = SshTool([hostNameLoop]) + try: + self.commonOper.SetPghbaConf(self.userProfile, hostNameLoop, + self.resultDictOfPrimary[0][ + 'pghbaStr'], False) + except ValueError: + self.logger.log("[gs_dropnode]Rollback pghba conf.") + self.commonOper.SetPghbaConf(self.userProfile, hostNameLoop, + self.resultDictOfPrimary[0][ + 'pghbaStr'], True) + self.cleanSshToolFile(sshtool_host) + + def modifyStaticConf(self): + """ + Modify the cluster static conf and save it + """ + self.logger.log("[gs_dropnode]Start to modify the cluster static conf.") + staticConfigPath = "%s/bin/cluster_static_config" % self.appPath + # first backup, only need to be done on primary node + tmpDir = DefaultValue.getEnvironmentParameterValue("PGHOST", self.user, + self.userProfile) + cmd = "cp %s %s/%s_BACKUP" % ( + staticConfigPath, tmpDir, 'cluster_static_config') + (status, output) = subprocess.getstatusoutput(cmd) + if status: + self.logger.debug("[gs_dropnode]Backup cluster_static_config failed" + + output) + backIpDict = self.context.backIpNameMap + backIpDict_values = list(backIpDict.values()) + backIpDict_keys = list(backIpDict.keys()) + for ipLoop in self.context.hostIpListForDel: + nameLoop = backIpDict_keys[backIpDict_values.index(ipLoop)] + dnLoop = self.context.clusterInfo.getDbNodeByName(nameLoop) + self.context.clusterInfo.dbNodes.remove(dnLoop) + for dbNode in self.context.clusterInfo.dbNodes: + if dbNode.name == self.localhostname: + self.context.clusterInfo.saveToStaticConfig(staticConfigPath, + dbNode.id) + continue + staticConfigPath_dn = "%s/cluster_static_config_%s" % ( + tmpDir, dbNode.name) + self.context.clusterInfo.saveToStaticConfig(staticConfigPath_dn, + dbNode.id) + self.logger.debug( + "[gs_dropnode]Start to scp the cluster static conf to any other node.") + + if not self.context.flagOnlyPrimary: + sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames()) + cmd = "%s/script/gs_om -t refreshconf" % self.gphomepath + (status, output) = subprocess.getstatusoutput(cmd) + for hostName in self.context.hostMapForExist.keys(): + hostSsh = SshTool([hostName]) + if hostName != self.localhostname: + staticConfigPath_name = "%s/cluster_static_config_%s" % ( + tmpDir, hostName) + hostSsh.scpFiles(staticConfigPath_name, staticConfigPath, + [hostName], self.envFile) + try: + os.unlink(staticConfigPath_name) + except FileNotFoundError: + pass + self.cleanSshToolFile(hostSsh) + + self.logger.log("[gs_dropnode]End of modify the cluster static conf.") + + def cleanSshToolFile(self, sshTool): + """ + """ + try: + sshTool.clenSshResultFiles() + except Exception as e: + self.logger.debug(str(e)) + + def checkUserAndGroupExists(self): + """ + check system user and group exists and be same + on primary and standby nodes + """ + inputUser = self.user + inputGroup = self.group + user_group_id = "" + isUserExits = False + localHost = socket.gethostname() + for user in pwd.getpwall(): + if user.pw_name == self.user: + user_group_id = user.pw_gid + isUserExits = True + break + if not isUserExits: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \ + % ("User", self.user, localHost)) + + isGroupExits = False + group_id = "" + for group in grp.getgrall(): + if group.gr_name == self.group: + group_id = group.gr_gid + isGroupExits = True + if not isGroupExits: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \ + % ("Group", self.group, localHost)) + if user_group_id != group_id: + GaussLog.exitWithError("User [%s] is not in the group [%s]." \ + % (self.user, self.group)) + + hostNames = list(self.context.hostMapForExist.keys()) + envfile = self.envFile + sshTool = SshTool(hostNames) + + # get username in the other standy nodes + getUserNameCmd = "cat /etc/passwd | grep -w %s" % inputUser + resultMap, outputCollect = sshTool.getSshStatusOutput(getUserNameCmd, + [], envfile) + + for hostKey in resultMap: + if resultMap[hostKey] == STATUS_FAIL: + self.cleanSshToolFile(sshTool) + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \ + % ("User", self.user, hostKey)) + + # get groupname in the other standy nodes + getGroupNameCmd = "cat /etc/group | grep -w %s" % inputGroup + resultMap, outputCollect = sshTool.getSshStatusOutput(getGroupNameCmd, + [], envfile) + for hostKey in resultMap: + if resultMap[hostKey] == STATUS_FAIL: + self.cleanSshToolFile(sshTool) + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \ + % ("Group", self.group, hostKey)) + self.cleanSshToolFile(sshTool) + + def restartInstance(self): + if self.context.flagOnlyPrimary: + self.logger.log("[gs_dropnode]Remove the dynamic conf.") + dynamicConfigPath = "%s/bin/cluster_dynamic_config" % self.appPath + try: + os.unlink(dynamicConfigPath) + except FileNotFoundError: + pass + flag = input( + "Only one primary node is left." + "It is recommended to restart the node." + "\nDo you want to restart the primary node now (yes/no)? ") + count_f = 2 + while count_f: + if ( + flag.upper() != "YES" + and flag.upper() != "NO" + and flag.upper() != "Y" and flag.upper() != "N"): + count_f -= 1 + flag = input("Please type 'yes' or 'no': ") + continue + break + if flag.upper() != "YES" and flag.upper() != "Y": + GaussLog.exitWithError( + ErrorCode.GAUSS_358["GAUSS_35805"] % flag.upper()) + sshTool = SshTool([self.localhostname]) + for i in self.context.hostMapForExist[self.localhostname]['datadir']: + self.commonOper.stopInstance(self.localhostname, sshTool, i, + self.userProfile) + self.commonOper.startInstance(i, self.userProfile) + self.cleanSshToolFile(sshTool) + else: + pass + + def run(self): + """ + start dropnode + """ + self.change_user() + self.logger.log("[gs_dropnode]Start to drop nodes of the cluster.") + self.checkAllStandbyState() + self.dropNodeOnAllHosts() + self.operationOnlyOnPrimary() + self.modifyStaticConf() + self.restartInstance() + self.logger.log("[gs_dropnode]Success to drop the target nodes.") + + +class OperCommon: + + def __init__(self, dropnode): + """ + """ + self.logger = dropnode.logger + self.user = dropnode.user + + def checkStandbyState(self, host, dirDn, sshTool, envfile, isForDel=False): + """ + check the existed standby node state + Exit if the role is not standby or the state of database is not normal + """ + sshcmd = "gs_ctl query -D %s" % dirDn + (statusMap, output) = sshTool.getSshStatusOutput(sshcmd, [host], + envfile) + if 'Is server running?' in output and not isForDel: + GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51651"] % host) + elif 'Is server running?' in output and isForDel: + return + else: + res = re.findall(r'db_state\s*:\s*(\w+)', output) + if not len(res) and isForDel: + return + elif not len(res): + GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51651"] % host) + dbState = res[0] + if dbState in ['Promoting', 'Wait', 'Demoting']: + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35808"] % host) + + def backupConf(self, appPath, user, host, envfile, sshTool): + """ + backup the configuration file (postgresql.conf and pg_hba.conf) + The Backup.py can do this + """ + self.logger.log( + "[gs_dropnode]Start to backup parameter config file on %s." % host) + tmpPath = '/tmp/gs_dropnode_backup' + \ + str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) + backupPyPath = os.path.join(appPath, './script/local/Backup.py') + cmd = "(find /tmp -type d | grep gs_dropnode_backup | xargs rm -rf;" \ + "if [ ! -d '%s' ]; then mkdir -p '%s' -m %s;fi)" \ + % (tmpPath, tmpPath, DefaultValue.KEY_DIRECTORY_MODE) + sshTool.executeCommand(cmd, "", DefaultValue.SUCCESS, [host], envfile) + logfile = os.path.join(tmpPath, 'gs_dropnode_call_Backup_py.log') + cmd = "python3 %s -U %s -P %s -p --nodeName=%s -l %s" \ + % (backupPyPath, user, tmpPath, host, logfile) + (statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile) + if statusMap[host] != 'Success': + self.logger.debug( + "[gs_dropnode]Backup parameter config file failed." + output) + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"]) + self.logger.log( + "[gs_dropnode]End to backup parameter config file on %s." % host) + return '%s/parameter_%s.tar' % (tmpPath, host) + + def parseConfigFile(self, host, dirDn, dnId, hostIpListForDel, sshTool, + envfile): + """ + parse the postgresql.conf file and get the replication info + """ + self.logger.log( + "[gs_dropnode]Start to parse parameter config file on %s." % host) + resultDict = {'replStr': '', 'syncStandbyStr': '*', 'pghbaStr': ''} + pgConfName = os.path.join(dirDn, 'postgresql.conf') + pghbaConfName = os.path.join(dirDn, 'pg_hba.conf') + + cmd = "grep -o '^replconninfo.*' %s | egrep -o '^replconninfo.*'" \ + % pgConfName + (statusMap, output1) = sshTool.getSshStatusOutput(cmd, [host], envfile) + if statusMap[host] != 'Success': + self.logger.debug("[gs_dropnode]Parse replconninfo failed:" + output1) + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"]) + cmd = "grep -o '^synchronous_standby_names.*' %s" % pgConfName + (statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile) + if statusMap[host] != 'Success': + self.logger.debug( + "[gs_dropnode]Parse synchronous_standby_names failed:" + output) + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"]) + output_v = output.split("'")[-2] + if output_v == '*': + resultDict['syncStandbyStr'] = output_v + else: + resultDict['syncStandbyStr'] = self.check_syncStandbyStr(dnId, + output_v) + + cmd = "grep '^host.*trust' %s" % pghbaConfName + (statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile) + if statusMap[host] != 'Success': + self.logger.debug("[gs_dropnode]Parse pg_hba file failed:" + output) + for ip in hostIpListForDel: + if ip in output1: + i = output1.rfind('replconninfo', 0, output1.find(ip)) + 12 + resultDict['replStr'] += output1[i] + if ip in output: + s = output.rfind('host', 0, output.find(ip)) + e = output.find('\n', output.find(ip), len(output)) + resultDict['pghbaStr'] += output[s:e] + '|' + self.logger.log( + "[gs_dropnode]End to parse parameter config file on %s." % host) + return resultDict + + def check_syncStandbyStr(self, dnlist, output): + output_no = '0' + output_result = output + output_new_no = '1' + if '(' in output: + output_dn = re.findall(r'\((.*)\)', output)[0] + output_no = re.findall(r'.*(\d) *\(.*\)', output)[0] + else: + output_dn = output + output_dn_nospace = re.sub(' *', '', output_dn) + init_no = len(output_dn_nospace.split(',')) + quorum_no = int(init_no / 2) + 1 + half_no = quorum_no - 1 + count_dn = 0 + list_output1 = '*' + for dninst in dnlist: + if dninst in output_dn_nospace: + list_output1 = output_dn_nospace.split(',') + list_output1.remove(dninst) + list_output1 = ','.join(list_output1) + output_dn_nospace = list_output1 + init_no -= 1 + count_dn += 1 + if count_dn == 0: + return output_result + if list_output1 == '': + return '' + if list_output1 != '*': + output_result = output.replace(output_dn, list_output1) + if output_no == '0': + return output_result + if int(output_no) == quorum_no: + output_new_no = str(int(init_no / 2) + 1) + output_result = output_result.replace(output_no, output_new_no, 1) + return output_result + elif int(output_no) > half_no and (int(output_no) - count_dn) > 0: + output_new_no = str(int(output_no) - count_dn) + elif int(output_no) > half_no and (int(output_no) - count_dn) <= 0: + output_new_no = '1' + elif int(output_no) < half_no and int(output_no) <= init_no: + output_new_no = output_no + elif half_no > int(output_no) > init_no: + output_new_no = str(init_no) + output_result = output_result.replace(output_no, output_new_no, 1) + return output_result + + def parseBackupFile(self, host, backupfile, dnId, replstr, sshTool, + envfile): + """ + parse the backup file eg.parameter_host.tar to get the value for rollback + """ + self.logger.log( + "[gs_dropnode]Start to parse backup parameter config file on %s." % host) + resultDict = {'rollbackReplStr': '', 'syncStandbyStr': ''} + backupdir = os.path.dirname(backupfile) + cmd = "tar xf %s -C %s;grep -o '^replconninfo.*' %s/%s/%s_postgresql.conf;" \ + "grep -o '^synchronous_standby_names.*' %s/%s/%s_postgresql.conf;" \ + % ( + backupfile, backupdir, backupdir, 'parameter_' + host, dnId[3:], + backupdir, 'parameter_' + host, dnId[3:]) + (statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile) + if statusMap[host] != 'Success': + self.logger.log( + "[gs_dropnode]Parse backup parameter config file failed:" + output) + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"]) + for i in replstr: + tmp_v = 'replconninfo' + i + s = output.index(tmp_v) + e = output.find('\n', s, len(output)) + resultDict['rollbackReplStr'] += output[s:e].split("'")[-2] + '|' + s = output.index('synchronous_standby_names') + resultDict['syncStandbyStr'] = output[s:].split("'")[-2] + self.logger.log( + "[gs_dropnode]End to parse backup parameter config file %s." % host) + return resultDict + + def SetPgsqlConf(self, replNo, host, syncStandbyValue, sshTool, envfile, + port, replValue='', singleLeft=False): + """ + Set the value of postgresql.conf + """ + self.logger.log( + "[gs_dropnode]Start to set postgresql config file on %s." % host) + sqlExecFile = '/tmp/gs_dropnode_sqlExecFile_' + \ + str(datetime.datetime.now().strftime( + '%Y%m%d%H%M%S')) + host + checkResultFile = '/tmp/gs_dropnode_sqlResultFile_' + \ + str(datetime.datetime.now().strftime( + '%Y%m%d%H%M%S')) + host + sqlvalue = '' + if not replValue and replNo != '': + for i in replNo: + sqlvalue += "ALTER SYSTEM SET replconninfo%s = '';" % i + if len(replValue) > 0: + count = 0 + for i in replNo: + sqlvalue += "ALTER SYSTEM SET replconninfo%s = '%s';" % ( + i, replValue[:-1].split('|')[count]) + count += 1 + if not singleLeft and syncStandbyValue != '*': + sqlvalue += "ALTER SYSTEM SET synchronous_standby_names = '%s';" \ + % syncStandbyValue + if singleLeft: + sqlvalue += "ALTER SYSTEM SET synchronous_standby_names = '';" + if sqlvalue != '': + cmd = "touch %s && chmod %s %s" % \ + (sqlExecFile, DefaultValue.MAX_DIRECTORY_MODE, sqlExecFile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.log( + "[gs_dropnode]Create the SQL command file failed:" + output) + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"]) + try: + with os.fdopen( + os.open("%s" % sqlExecFile, os.O_WRONLY | os.O_CREAT, + stat.S_IWUSR | stat.S_IRUSR), 'w') as fo: + fo.write(sqlvalue) + fo.close() + except Exception as e: + cleanFile(sqlExecFile) + raise SQLCommandException(sqlExecFile, + "write into sql query file failed. " + + str(e)) + self.logger.debug( + "[gs_dropnode]Start to send the SQL command file to all hosts.") + sshTool.scpFiles(sqlExecFile, '/tmp', [host]) + cmd = "gsql -p %s -d postgres -f %s --output %s;cat %s" % ( + port, sqlExecFile, checkResultFile, checkResultFile) + (statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile) + if "ERROR" in output: + self.logger.debug( + "[gs_dropnode]Failed to execute the SQL command file on all " + "hosts:" + output) + raise ValueError(output) + cmd = "ls /tmp/gs_dropnode_sql* | xargs rm -rf" + sshTool.executeCommand(cmd, "", DefaultValue.SUCCESS, [host], envfile) + try: + os.unlink(sqlExecFile) + os.unlink(checkResultFile) + except FileNotFoundError: + pass + self.logger.log( + "[gs_dropnode]End of set postgresql config file on %s." % host) + + def SetPghbaConf(self, envProfile, host, pgHbaValue, + flagRollback=False): + """ + Set the value of pg_hba.conf + """ + self.logger.log( + "[gs_dropnode]Start of set pg_hba config file on %s." % host) + cmd = 'source %s;' % envProfile + if len(pgHbaValue): + if not flagRollback: + for i in pgHbaValue[:-1].split('|'): + v = i[0:i.find('/32') + 3] + cmd += "gs_guc set -N %s -I all -h '%s';" % (host, v) + if flagRollback: + for i in pgHbaValue[:-1].split('|'): + cmd += "gs_guc set -N %s -I all -h '%s';" \ + % (host, i.strip()) + (status, output) = subprocess.getstatusoutput(cmd) + result_v = re.findall(r'Failed instances: (\d)\.', output) + if status: + self.logger.debug( + "[gs_dropnode]Set pg_hba config file failed:" + output) + raise ValueError(output) + if len(result_v): + if result_v[0] != '0': + self.logger.debug( + "[gs_dropnode]Set pg_hba config file failed:" + output) + raise ValueError(output) + else: + self.logger.debug( + "[gs_dropnode]Set pg_hba config file failed:" + output) + raise ValueError(output) + else: + self.logger.log( + "[gs_dropnode]Nothing need to do with pg_hba config file.") + self.logger.log( + "[gs_dropnode]End of set pg_hba config file on %s." % host) + + def get_repl_slot(self, host, ssh_tool, envfile, gsql_path, port): + """ + Get the replication slot on primary node only + """ + self.logger.log("[gs_dropnode]Start to get repl slot on primary node.") + selectSQL = "SELECT slot_name,plugin,slot_type FROM pg_replication_slots;" + querycmd = "%s -p %s postgres -A -t -c '%s'" % (gsql_path, port, selectSQL) + (status, output) = ssh_tool.getSshStatusOutput(querycmd, [host], envfile) + if status[host] != 'Success' or "ERROR" in output: + self.logger.debug( + "[gs_dropnode]Get repl slot failed:" + output) + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"]) + return ','.join(output.split('\n')[1:]) + + def SetReplSlot(self, host, sshTool, envfile, gsqlPath, port, dnid, + replslot_output, flag_rollback=False): + """ + Drop the replication slot on primary node only + """ + self.logger.log("[gs_dropnode]Start to set repl slot on primary node.") + setcmd = '' + if not flag_rollback: + for i in dnid: + if i in replslot_output: + setcmd += "%s -p %s postgres -A -t -c \\\"SELECT pg_drop_" \ + "replication_slot('%s');\\\";" % \ + (gsqlPath, port, i) + if flag_rollback: + list_o = [i.split('|') for i in replslot_output.split(',')] + for r in list_o: + if r[0] in dnid and r[2] == 'physical': + setcmd += "%s -p %s postgres -A -t -c \\\"SELECT * FROM " \ + "pg_create_physical_replication_slot('%s', false);\\\";" % \ + (gsqlPath, port, r[0]) + elif r[0] in dnid and r[2] == 'logical': + setcmd += "%s -p %s postgres -A -t -c \\\"SELECT * FROM " \ + "pg_create_logical_replication_slot('%s', '%s');\\\";" % \ + (gsqlPath, port, r[0], r[1]) + if setcmd != '': + if host == DefaultValue.GetHostIpOrName(): + setcmd = setcmd.replace("\\", '') + (status, output) = sshTool.getSshStatusOutput(setcmd, [host], envfile) + if status[host] != 'Success' or "ERROR" in output: + self.logger.debug("[gs_dropnode]Set repl slot failed:" + output) + raise ValueError(output) + self.logger.log("[gs_dropnode]End of set repl slot on primary node.") + + def SetSyncCommit(self, dirDn): + """ + Set the synccommit to local when only primary server be left + """ + self.logger.log("[gs_dropnode]Start to set sync_commit on primary node.") + command = "gs_guc set -D %s -c 'synchronous_commit = local'" % dirDn + (status, output) = subprocess.getstatusoutput(command) + if status or '0' not in re.findall(r'Failed instances: (\d)\.', output): + self.logger.debug("[gs_dropnode]Set sync_commit failed:" + output) + raise ValueError(output) + self.logger.log("[gs_dropnode]End of set sync_commit on primary node.") + + def stopInstance(self, host, sshTool, dirDn, env): + """ + """ + self.logger.log("[gs_dropnode]Start to stop the target node %s." % host) + command = "source %s ; gs_ctl stop -D %s -M immediate" % (env, dirDn) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, [host], + env) + if 'Is server running?' in outputCollect: + self.logger.log("[gs_dropnode]End of stop the target node %s." + % host) + return + elif resultMap[host] != 'Success': + self.logger.debug(outputCollect) + self.logger.log( + "[gs_dropnode]Cannot connect the target node %s." % host) + self.logger.log( + "[gs_dropnode]It may be still running.") + return + self.logger.log("[gs_dropnode]End of stop the target node %s." % host) + + def startInstance(self, dirDn, env): + """ + """ + self.logger.log("[gs_dropnode]Start to start the target node.") + start_retry_num = 1 + command = "source %s ; gs_ctl start -D %s" % (env, dirDn) + while start_retry_num <= 3: + (status, output) = subprocess.getstatusoutput(command) + self.logger.debug(output) + if 'done' in output and 'server started' in output: + self.logger.log("[gs_dropnode]End of start the target node.") + break + else: + self.logger.debug("[gs_dropnode]Failed to start the node.") + GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"]) + start_retry_num += 1 diff --git a/script/impl/dropnode/__init__.py b/script/impl/dropnode/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/expansion/ExpansionImpl.py b/script/impl/expansion/ExpansionImpl.py new file mode 100644 index 0000000..331c0ac --- /dev/null +++ b/script/impl/expansion/ExpansionImpl.py @@ -0,0 +1,1519 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : ExpansionImpl.py +############################################################################# + +import subprocess +import sys +import re +import os +import getpass +import pwd +import datetime +import weakref +from random import sample +import time +import grp +import socket +import stat +from multiprocessing import Process, Value + +sys.path.append(sys.path[0] + "/../../../../") +from gspylib.common.DbClusterInfo import dbClusterInfo, queryCmd +from gspylib.threads.SshTool import SshTool +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue +from gspylib.common.GaussLog import GaussLog + +#boot/build mode +MODE_PRIMARY = "primary" +MODE_STANDBY = "standby" +MODE_NORMAL = "normal" +MODE_CASCADE = "cascade_standby" + +# instance local_role +ROLE_NORMAL = "normal" +ROLE_PRIMARY = "primary" +ROLE_STANDBY = "standby" +ROLE_CASCADE = "cascade standby" + +#db state +STATE_NORMAL = "normal" +STATE_STARTING = "starting" +STATE_CATCHUP = "catchup" + +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 + +# statu failed +STATUS_FAIL = "Failure" + +class ExpansionImpl(): + """ + class for expansion standby node. + step: + 1. preinstall database on new standby node + 2. install as single-node database + 3. establish primary-standby relationship of all node + """ + + def __init__(self, expansion): + """ + """ + self.context = expansion + + self.user = self.context.user + self.group = self.context.group + self.existingHosts = [] + self.expansionSuccess = {} + for newHost in self.context.newHostList: + self.expansionSuccess[newHost] = False + self.logger = self.context.logger + + envFile = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH") + if envFile: + self.envFile = envFile + else: + userpath = pwd.getpwnam(self.user).pw_dir + mpprcFile = os.path.join(userpath, ".bashrc") + self.envFile = mpprcFile + + currentTime = str(datetime.datetime.now()).replace(" ", "_").replace( + ".", "_") + + self.commonGsCtl = GsCtlCommon(expansion) + self.tempFileDir = "/tmp/gs_expansion_%s" % (currentTime) + self.logger.debug("tmp expansion dir is %s ." % self.tempFileDir) + # primary's wal_keep_segments value + self.walKeepSegments = -1 + + self._finalizer = weakref.finalize(self, self.final) + + globals()["paramiko"] = __import__("paramiko") + + def queryPrimaryWalKeepSegments(self): + """ + query primary's wal_keep_segments, when current user is root + """ + primaryHostName = self.getPrimaryHostName() + primaryHostIp = self.context.clusterInfoDict[primaryHostName]["backIp"] + primaryDataNode = self.context.clusterInfoDict[primaryHostName]["dataNode"] + status, walKeepSegments = self.commonGsCtl.queryGucParaValue( + primaryHostIp, self.envFile, primaryDataNode, "wal_keep_segments", self.user) + if status != DefaultValue.SUCCESS: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50021"] % "wal_keep_segments") + return walKeepSegments + + def rollbackPrimaryWalKeepSegments(self): + """ + rollback primary's wal_keep_segments, when current user is root + """ + self.logger.debug("Start to rollback primary's wal_keep_segments") + primary = self.getPrimaryHostName() + primaryDataNode = self.context.clusterInfoDict[primary]["dataNode"] + status = self.commonGsCtl.setGucPara(primary, self.envFile, primaryDataNode, + "wal_keep_segments", self.walKeepSegments, self.user) + if status != DefaultValue.SUCCESS: + self.logger.log("Failed to rollback wal_keep_segments, please manually " + "set it to original value %s." % self.walKeepSegments) + else: + self.reloadPrimaryConf(self.user) + + def final(self): + """ + function: + 1. Make sure primary's wal_keep_segments is restored to its + original value if it has been changed, + 2. rollback, + 3. clear temp file + input : NA + output: NA + """ + if self.walKeepSegments != -1: + currentWalKeepSegments = self.queryPrimaryWalKeepSegments() + if currentWalKeepSegments != "NULL" \ + and self.walKeepSegments != int(currentWalKeepSegments): + self.rollbackPrimaryWalKeepSegments() + self.rollback() + self.clearTmpFile() + + def sendSoftToHosts(self): + """ + create software dir and send it on each nodes + """ + self.logger.log("Start to send soft to each standby nodes.") + srcFile = self.context.packagepath + targetDir = os.path.realpath(os.path.join(srcFile, "../")) + for host in self.context.newHostList: + sshTool = SshTool([host], timeout = 300) + # mkdir package dir and send package to remote nodes. + sshTool.executeCommand("umask 0022;mkdir -p %s" % srcFile , "", + DefaultValue.SUCCESS, [host]) + sshTool.scpFiles(srcFile, targetDir, [host]) + self.cleanSshToolFile(sshTool) + self.logger.log("End to send soft to each standby nodes.") + + def generateAndSendXmlFile(self): + """ + """ + self.logger.debug("Start to generateAndSend XML file.\n") + + tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir + cmd = "mkdir -p %s; touch %s; cat /dev/null > %s" % \ + (self.tempFileDir, tempXmlFile, tempXmlFile) + (status, output) = subprocess.getstatusoutput(cmd) + + cmd = "chown -R %s:%s %s" % (self.user, self.group, self.tempFileDir) + (status, output) = subprocess.getstatusoutput(cmd) + + newHosts = self.context.newHostList + for host in newHosts: + # create single deploy xml file for each standby node + xmlContent = self.__generateXml(host) + with os.fdopen(os.open("%s" % tempXmlFile, os.O_WRONLY | os.O_CREAT, + stat.S_IWUSR | stat.S_IRUSR),'w') as fo: + fo.write( xmlContent ) + fo.close() + # send single deploy xml file to each standby node + sshTool = SshTool([host]) + retmap, output = sshTool.getSshStatusOutput("mkdir -p %s" % + self.tempFileDir , [host], self.envFile) + retmap, output = sshTool.getSshStatusOutput("chown %s:%s %s" % + (self.user, self.group, self.tempFileDir), [host], self.envFile) + sshTool.scpFiles("%s" % tempXmlFile, "%s" % + tempXmlFile, [host], self.envFile) + self.cleanSshToolFile(sshTool) + + self.logger.debug("End to generateAndSend XML file.\n") + + def __generateXml(self, backIp): + """ + """ + nodeName = self.context.backIpNameMap[backIp] + nodeInfo = self.context.clusterInfoDict[nodeName] + clusterName = self.context.clusterInfo.name + + backIp = nodeInfo["backIp"] + sshIp = nodeInfo["sshIp"] + port = nodeInfo["port"] + dataNode = nodeInfo["dataNode"] + + appPath = self.context.clusterInfoDict["appPath"] + logPath = self.context.clusterInfoDict["logPath"] + corePath = self.context.clusterInfoDict["corePath"] + toolPath = self.context.clusterInfoDict["toolPath"] + mppdbconfig = "" + tmpMppdbPath = DefaultValue.getEnv("PGHOST") + if tmpMppdbPath: + mppdbconfig = '' % tmpMppdbPath + azName = self.context.hostAzNameMap[backIp] + azPriority = nodeInfo["azPriority"] + + xmlConfig = """\ + + + + + + + + + + {mappdbConfig} + + + + + + + + + + + + + + + + + + """.format(clusterName = clusterName, nodeName = nodeName, backIp = backIp, + appPath = appPath, logPath = logPath, toolPath = toolPath, corePath = corePath, + sshIp = sshIp, port = port, dataNode = dataNode, azName = azName, + azPriority = azPriority, mappdbConfig = mppdbconfig) + return xmlConfig + + def changeUser(self): + user = self.user + try: + pw_record = pwd.getpwnam(user) + except Exception: + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50300"] % user) + + user_name = pw_record.pw_name + user_uid = pw_record.pw_uid + user_gid = pw_record.pw_gid + os.setgid(user_gid) + os.setuid(user_uid) + os.environ["HOME"] = pw_record.pw_dir + os.environ["USER"] = user_name + os.environ["LOGNAME"] = user_name + os.environ["SHELL"] = pw_record.pw_shell + + def initSshConnect(self, host, user='root', timeoutNum=0): + try: + self.sshClient = paramiko.SSHClient() + self.sshClient.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + if timeoutNum == 0: + self.sshClient.connect(host, 22, user) + elif timeoutNum < 4: + getPwdStr = "Please enter the password of user [%s] on node [%s]: " % (user, host) + passwd = getpass.getpass(getPwdStr) + try: + self.sshClient.connect(host, 22, user, passwd) + except paramiko.ssh_exception.AuthenticationException: + self.logger.log("Authentication failed.") + raise Exception + else: + GaussLog.exitWithError(ErrorCode.GAUSS_511["GAUSS_51109"]) + except Exception: + self.initSshConnect(host, user, timeoutNum + 1) + + def hasNormalStandbyInAZOfCascade(self, cascadeIp, existingStandbys): + """ + check whether there are normal standbies in hostAzNameMap[cascadeIp] azZone + """ + hasStandbyWithSameAZ = False + hostAzNameMap = self.context.hostAzNameMap + for existingStandby in existingStandbys: + existingStandbyName = self.context.backIpNameMap[existingStandby] + existingStandbyDataNode = \ + self.context.clusterInfoDict[existingStandbyName]["dataNode"] + insType, dbState = self.commonGsCtl.queryInstanceStatus( + existingStandby, existingStandbyDataNode, self.envFile) + if dbState != STATE_NORMAL: + continue + if hostAzNameMap[cascadeIp] != hostAzNameMap[existingStandby]: + continue + hasStandbyWithSameAZ = True + break + return hasStandbyWithSameAZ + + def installDatabaseOnHosts(self): + """ + install database on each standby node + """ + standbyHosts = self.context.newHostList + tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir + installCmd = "source {envFile} ; gs_install -X {xmlFile} "\ + "2>&1".format(envFile = self.envFile, xmlFile = tempXmlFile) + self.logger.debug(installCmd) + primaryHostName = self.getPrimaryHostName() + primaryHostIp = self.context.clusterInfoDict[primaryHostName]["backIp"] + existingStandbys = list(set(self.existingHosts) - (set([primaryHostIp]))) + failedInstallHosts = [] + notInstalledCascadeHosts = [] + for newHost in standbyHosts: + if not self.expansionSuccess[newHost]: + continue + self.logger.log("Installing database on node %s:" % newHost) + hostName = self.context.backIpNameMap[newHost] + sshIp = self.context.clusterInfoDict[hostName]["sshIp"] + if self.context.newHostCasRoleMap[newHost] == "on": + # check whether there are normal standbies in hostAzNameMap[host] azZone + hasStandbyWithSameAZ = self.hasNormalStandbyInAZOfCascade(newHost, + existingStandbys) + if not hasStandbyWithSameAZ: + notInstalledCascadeHosts.append(newHost) + self.expansionSuccess[newHost] = False + continue + self.initSshConnect(sshIp, self.user) + stdin, stdout, stderr = self.sshClient.exec_command(installCmd, + get_pty=True) + channel = stdout.channel + echannel = stderr.channel + + while not channel.exit_status_ready(): + try: + recvOut = channel.recv(1024) + outDecode = recvOut.decode("utf-8") + outStr = outDecode.strip() + if(len(outStr) == 0): + continue + if(outDecode.endswith("\r\n")): + self.logger.log(outStr) + else: + value = "" + if re.match(r".*yes.*no.*", outStr): + value = input(outStr) + while True: + # check the input + if ( + value.upper() != "YES" + and value.upper() != "NO" + and value.upper() != "Y" + and value.upper() != "N"): + value = input("Please type 'yes' or 'no': ") + continue + break + else: + value = getpass.getpass(outStr) + stdin.channel.send("%s\r\n" %value) + stdin.flush() + stdout.flush() + except Exception as e: + sys.exit(1) + if channel.exit_status_ready() and \ + not channel.recv_stderr_ready() and \ + not channel.recv_ready(): + channel.close() + break + stdout.close() + stderr.close() + if channel.recv_exit_status() != 0: + self.expansionSuccess[newHost] = False + failedInstallHosts.append(newHost) + else: + if self.context.newHostCasRoleMap[newHost] == "off": + existingStandbys.append(newHost) + self.logger.log("%s install success." % newHost) + if notInstalledCascadeHosts: + self.logger.log("OpenGauss won't be installed on cascade_standby" + " %s, because there is no Normal standby in the same azZone." % + ", ".join(notInstalledCascadeHosts)) + if failedInstallHosts: + self.logger.log(ErrorCode.GAUSS_527["GAUSS_52707"] % + ", ".join(failedInstallHosts)) + self.logger.log("Finish to install database on all nodes.") + if self._isAllFailed(): + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "install") + + def preInstallOnHosts(self): + """ + execute preinstall step + """ + self.logger.log("Start to preinstall database step.") + tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir + + if not DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"): + preinstallCmd = "{softPath}/script/gs_preinstall -U {user} -G {group} "\ + "-X {xmlFile} --non-interactive 2>&1".format( + softPath = self.context.packagepath, user = self.user, + group = self.group, xmlFile = tempXmlFile) + else: + preinstallCmd = "{softPath}/script/gs_preinstall -U {user} -G {group} "\ + "-X {xmlFile} --sep-env-file={envFile} --non-interactive 2>&1".format( + softPath = self.context.packagepath, user = self.user, + group = self.group, xmlFile = tempXmlFile, envFile = self.envFile) + + failedPreinstallHosts = [] + for host in self.context.newHostList: + sshTool = SshTool([host], timeout = 300) + resultMap, output = sshTool.getSshStatusOutput(preinstallCmd, [], self.envFile) + self.logger.debug(resultMap) + self.logger.debug(output) + if resultMap[host] == DefaultValue.SUCCESS: + self.expansionSuccess[host] = True + self.logger.log("Preinstall %s success" % host) + else: + failedPreinstallHosts.append(host) + self.cleanSshToolFile(sshTool) + if failedPreinstallHosts: + self.logger.log("Failed to preinstall on: \n%s" % ", ".join(failedPreinstallHosts)) + self.logger.log("End to preinstall database step.") + + def buildStandbyRelation(self): + """ + func: after install single database on standby nodes. + build the relation with primary and standby nodes. + step: + 1. set all nodes' guc config parameter: replconninfo, available_zone(only for new) + 2. add trust on all hosts + 3. generate GRPC cert on new hosts, and primary if current cluster is single instance + 4. build new hosts : + (1) restart new instance with standby mode + (2) build new instances + 5. generate cluster static file and send to each node. + """ + self.setGucConfig() + self.addTrust() + self.generateGRPCCert() + self.buildStandbyHosts() + self.generateClusterStaticFile() + + def getExistingHosts(self, isRootUser=True): + """ + get the exiting hosts + """ + self.logger.debug("Get the existing hosts.") + primaryHost = self.getPrimaryHostName() + command = "" + if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"): + command = "source %s;gs_om -t status --detail" % self.envFile + else: + command = "source /etc/profile;source %s;"\ + "gs_om -t status --detail" % self.envFile + if isRootUser: + command = "su - %s -c '%s'" % (self.user, command) + self.logger.debug(command) + sshTool = SshTool([primaryHost]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [primaryHost], self.envFile) + self.cleanSshToolFile(sshTool) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + if resultMap[primaryHost] != DefaultValue.SUCCESS: + GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51600"]) + instances = re.split('(?:\|)|(?:\n)', outputCollect) + self.existingHosts = [] + pattern = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*') + for inst in instances: + existingHosts = pattern.findall(inst) + if len(existingHosts) != 0: + self.existingHosts.append(existingHosts[0]) + + def setGucConfig(self): + """ + set replconninfo on all hosts + """ + self.logger.debug("Start to set GUC config on all hosts.\n") + gucDict = self.getGUCConfig() + tempShFile = "%s/guc.sh" % self.tempFileDir + hostIpList = list(self.existingHosts) + for host in self.expansionSuccess: + hostIpList.append(host) + + nodeDict = self.context.clusterInfoDict + backIpNameMap = self.context.backIpNameMap + hostAzNameMap = self.context.hostAzNameMap + for host in hostIpList: + hostName = backIpNameMap[host] + # set Available_zone for the new standby + if host in self.context.newHostList: + dataNode = nodeDict[hostName]["dataNode"] + gucDict[hostName] += """\ +gs_guc set -D {dn} -c "available_zone='{azName}'" + """.format(dn=dataNode, azName=hostAzNameMap[host]) + command = "source %s ; " % self.envFile + gucDict[hostName] + self.logger.debug("[%s] gucCommand:%s" % (host, command)) + + sshTool = SshTool([host]) + # create temporary dir to save guc command bashfile. + mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \ + (self.tempFileDir, self.user, self.group, self.tempFileDir) + sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile) + subprocess.getstatusoutput("if [ ! -e '%s' ]; then mkdir -m a+x -p %s;" + " fi; touch %s; cat /dev/null > %s" % (self.tempFileDir, + self.tempFileDir, tempShFile, tempShFile)) + with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT, + stat.S_IWUSR | stat.S_IRUSR), 'w') as fo: + fo.write("#bash\n") + fo.write(command) + fo.close() + + # send guc command bashfile to each host and execute it. + sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host], + self.envFile) + resultMap, outputCollect = sshTool.getSshStatusOutput( + "sh %s" % tempShFile, [host], self.envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + self.cleanSshToolFile(sshTool) + + def addTrust(self): + """ + add authentication rules about new host ip in existing hosts and + add authentication rules about other all hosts ip in new hosts + """ + self.logger.debug("Start to set host trust on all node.") + allHosts = list(self.existingHosts) + for host in self.context.newHostList: + if self.expansionSuccess[host]: + allHosts.append(host) + for hostExec in allHosts: + hostExecName = self.context.backIpNameMap[hostExec] + dataNode = self.context.clusterInfoDict[hostExecName]["dataNode"] + cmd = "source %s;gs_guc set -D %s" % (self.envFile, dataNode) + if hostExec in self.existingHosts: + for hostParam in self.context.newHostList: + cmd += " -h 'host all all %s/32 trust'" % \ + hostParam + else: + for hostParam in allHosts: + if hostExec != hostParam: + cmd += " -h 'host all all %s/32 trust'" % \ + hostParam + self.logger.debug("[%s] trustCmd:%s" % (hostExec, cmd)) + sshTool = SshTool([hostExec]) + sshTool.getSshStatusOutput(cmd, [hostExec], self.envFile) + self.cleanSshToolFile(sshTool) + self.logger.debug("End to set host trust on all node.") + + def generateGRPCCert(self): + """ + generate GRPC cert for single node + """ + primaryHost = self.getPrimaryHostName() + dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"] + needGRPCHosts = [] + for host in self.expansionSuccess: + if self.expansionSuccess[host]: + needGRPCHosts.append(host) + insType, dbState = self.commonGsCtl.queryInstanceStatus(primaryHost, + dataNode,self.envFile) + if insType != MODE_PRIMARY: + primaryHostIp = self.context.clusterInfoDict[primaryHost]["backIp"] + needGRPCHosts.append(primaryHostIp) + self.logger.debug("Start to generate GRPC cert.") + if needGRPCHosts: + self.context.initSshTool(needGRPCHosts) + self.context.createGrpcCa(needGRPCHosts) + self.logger.debug("End to generate GRPC cert.") + + def reloadPrimaryConf(self, user=""): + """ + """ + primaryHost = self.getPrimaryHostName() + dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"] + command = "" + if user: + command = "su - %s -c 'source %s;gs_ctl reload -D %s'" % \ + (user, self.envFile, dataNode) + else: + command = "gs_ctl reload -D %s " % dataNode + sshTool = SshTool([primaryHost]) + self.logger.debug(command) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [primaryHost], self.envFile) + self.logger.debug(outputCollect) + self.cleanSshToolFile(sshTool) + + def getPrimaryHostName(self): + """ + """ + primaryHost = "" + for nodeName in self.context.nodeNameList: + if self.context.clusterInfoDict[nodeName]["instanceType"] \ + == MASTER_INSTANCE: + primaryHost = nodeName + break + return primaryHost + + + def buildStandbyHosts(self): + """ + stop the new standby host`s database and build it as standby mode + """ + self.logger.debug("Start to build new nodes.") + standbyHosts = self.context.newHostList + hostAzNameMap = self.context.hostAzNameMap + primaryHostName = self.getPrimaryHostName() + primaryHost = self.context.clusterInfoDict[primaryHostName]["backIp"] + existingStandbys = list(set(self.existingHosts).difference(set([primaryHost]))) + primaryDataNode = self.context.clusterInfoDict[primaryHostName]["dataNode"] + walKeepSegmentsChanged = False + status, synchronous_commit = self.commonGsCtl.queryGucParaValue( + primaryHost, self.envFile, primaryDataNode, "synchronous_commit") + if status != DefaultValue.SUCCESS: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50021"] % "synchronous_commit") + if synchronous_commit == "off" and self.walKeepSegments < 1024: + status = self.commonGsCtl.setGucPara(primaryHost, self.envFile, primaryDataNode, + "wal_keep_segments", 1024) + if status != DefaultValue.SUCCESS: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50007"] % "wal_keep_segments") + walKeepSegmentsChanged = True + self.reloadPrimaryConf() + time.sleep(10) + insType, dbState = self.commonGsCtl.queryInstanceStatus( + primaryHost, primaryDataNode, self.envFile) + primaryExceptionInfo = "" + if insType != ROLE_PRIMARY: + primaryExceptionInfo = ErrorCode.GAUSS_357["GAUSS_35709"] % \ + ("local_role", "primary", "primary") + if dbState != STATE_NORMAL: + primaryExceptionInfo = ErrorCode.GAUSS_357["GAUSS_35709"] % \ + ("db_state", "primary", "Normal") + if primaryExceptionInfo != "": + GaussLog.exitWithError(primaryExceptionInfo) + waitChars = ["\\", "|", "/", "-"] + for host in standbyHosts: + if not self.expansionSuccess[host]: + continue + hostName = self.context.backIpNameMap[host] + dataNode = self.context.clusterInfoDict[hostName]["dataNode"] + buildMode = "" + hostRole = "" + if self.context.newHostCasRoleMap[host] == "on": + buildMode = MODE_CASCADE + hostRole = ROLE_CASCADE + # check whether there are normal standbies in hostAzNameMap[host] azZone + hasStandbyWithSameAZ = self.hasNormalStandbyInAZOfCascade(host, + existingStandbys) + if not hasStandbyWithSameAZ: + self.logger.log("There is no Normal standby in %s" % + hostAzNameMap[host]) + self.expansionSuccess[host] = False + continue + else: + buildMode = MODE_STANDBY + hostRole = ROLE_STANDBY + self.logger.log("Start to build %s %s." % (hostRole, host)) + self.checkTmpDir(hostName) + # start new host as standby mode + self.commonGsCtl.stopInstance(hostName, dataNode, self.envFile) + result, output = self.commonGsCtl.startInstanceWithMode(host, + dataNode, MODE_STANDBY, self.envFile) + if result[host] != DefaultValue.SUCCESS: + if "uncompleted build is detected" not in output: + self.expansionSuccess[host] = False + self.logger.log("Failed to start %s as standby " + "before building." % host) + continue + else: + self.logger.debug("Uncompleted build is detected on %s." % + host) + else: + insType, dbState = self.commonGsCtl.queryInstanceStatus( + hostName, dataNode, self.envFile) + if insType != ROLE_STANDBY: + self.logger.log("Build %s failed." % host) + self.expansionSuccess[host] = False + continue + + # build new host + sshTool = SshTool([host]) + tempShFile = "%s/buildStandby.sh" % self.tempFileDir + # create temporary dir to save gs_ctl build command bashfile. + mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \ + (self.tempFileDir, self.user, self.group, self.tempFileDir) + sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile) + subprocess.getstatusoutput("touch %s; cat /dev/null > %s" % + (tempShFile, tempShFile)) + buildCmd = "gs_ctl build -D %s -M %s" % (dataNode, buildMode) + gs_ctlBuildCmd = "source %s ;nohup " % self.envFile + buildCmd + " 1>/dev/null 2>/dev/null &" + self.logger.debug("[%s] gs_ctlBuildCmd: %s" % (host, gs_ctlBuildCmd)) + with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT, + stat.S_IWUSR | stat.S_IRUSR),'w') as fo: + fo.write("#bash\n") + fo.write(gs_ctlBuildCmd) + fo.close() + # send gs_ctlBuildCmd bashfile to the standby host and execute it. + sshTool.scpFiles(tempShFile, tempShFile, [host], self.envFile) + resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \ + tempShFile, [host], self.envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + if resultMap[host] != DefaultValue.SUCCESS: + self.expansionSuccess[host] = False + self.logger.debug("Failed to send gs_ctlBuildCmd bashfile " + "to %s." % host) + self.logger.log("Build %s %s failed." % (hostRole, host)) + continue + # check whether build process has finished + checkProcessExistCmd = "ps x" + while True: + resultMap, outputCollect = sshTool.getSshStatusOutput( + checkProcessExistCmd, [host]) + if buildCmd not in outputCollect: + self.logger.debug("Build %s complete." % host) + break + timeFlush = 0.5 + for i in range(0, int(60 / timeFlush)): + index = i % 4 + print("\rThe program is running {}".format(waitChars[index]), end="") + time.sleep(timeFlush) + # check build result after build process finished + while True: + timeFlush = 0.5 + for i in range(0, int(60 / timeFlush)): + index = i % 4 + print("\rThe program is running {}".format(waitChars[index]), end="") + time.sleep(timeFlush) + insType, dbState = self.commonGsCtl.queryInstanceStatus( + hostName, dataNode, self.envFile) + if dbState not in [STATE_STARTING, STATE_CATCHUP]: + self.logger.debug("%s starting and catchup complete." % host) + break + insType, dbState = self.commonGsCtl.queryInstanceStatus( + hostName, dataNode, self.envFile) + if insType == hostRole and dbState == STATE_NORMAL: + if self.context.newHostCasRoleMap[host] == "off": + existingStandbys.append(host) + self.logger.log("\rBuild %s %s success." % (hostRole, host)) + else: + self.expansionSuccess[host] = False + self.logger.log("\rBuild %s %s failed." % (hostRole, host)) + if walKeepSegmentsChanged: + self.logger.debug("Start to rollback primary's wal_keep_segments") + status = self.commonGsCtl.setGucPara(primaryHost, self.envFile, primaryDataNode, + "wal_keep_segments", self.walKeepSegments) + if status != DefaultValue.SUCCESS: + self.logger.debug(ErrorCode.GAUSS_500["GAUSS_50007"] % "wal_keep_segments") + self.reloadPrimaryConf() + if self._isAllFailed(): + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "build") + + def checkTmpDir(self, hostName): + """ + if the tmp dir id not exist, create it. + """ + tmpDir = os.path.realpath(DefaultValue.getTmpDirFromEnv()) + checkCmd = 'if [ ! -d "%s" ]; then exit 1;fi;' % (tmpDir) + sshTool = SshTool([hostName]) + resultMap, outputCollect = sshTool.getSshStatusOutput(checkCmd, + [hostName], self.envFile) + ret = resultMap[hostName] + if ret == STATUS_FAIL: + self.logger.debug("Node [%s] does not have tmp dir. need to fix.") + fixCmd = "mkdir -p %s" % (tmpDir) + sshTool.getSshStatusOutput(fixCmd, [hostName], self.envFile) + self.cleanSshToolFile(sshTool) + + def generateClusterStaticFile(self): + """ + generate static_config_files and send to all hosts + """ + self.logger.log("Start to generate and send cluster static file.") + + primaryHost = self.getPrimaryHostName() + result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile) + for nodeName in self.context.nodeNameList: + nodeInfo = self.context.clusterInfoDict[nodeName] + nodeIp = nodeInfo["backIp"] + dataNode = nodeInfo["dataNode"] + exist_reg = r"(.*)%s[\s]*%s(.*)%s(.*)" % (nodeName, nodeIp, dataNode) + dbNode = self.context.clusterInfo.getDbNodeByName(nodeName) + if not re.search(exist_reg, result) and nodeIp not in self.context.newHostList: + self.logger.debug("The node ip [%s] will not be added to cluster." % nodeIp) + self.context.clusterInfo.dbNodes.remove(dbNode) + if nodeIp in self.context.newHostList and not self.expansionSuccess[nodeIp]: + self.context.clusterInfo.dbNodes.remove(dbNode) + + toolPath = self.context.clusterInfoDict["toolPath"] + appPath = self.context.clusterInfoDict["appPath"] + + static_config_dir = "%s/script/static_config_files" % toolPath + if not os.path.exists(static_config_dir): + os.makedirs(static_config_dir) + + # valid if dynamic config file exists. + dynamic_file = "%s/bin/cluster_dynamic_config" % appPath + dynamic_file_exist = False + if os.path.exists(dynamic_file): + dynamic_file_exist = True + + for dbNode in self.context.clusterInfo.dbNodes: + hostName = dbNode.name + staticConfigPath = "%s/script/static_config_files/cluster_static_config_%s" % \ + (toolPath, hostName) + self.context.clusterInfo.saveToStaticConfig(staticConfigPath, dbNode.id) + srcFile = staticConfigPath + if not os.path.exists(srcFile): + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35710"] % srcFile) + hostSsh = SshTool([hostName]) + targetFile = "%s/bin/cluster_static_config" % appPath + hostSsh.scpFiles(srcFile, targetFile, [hostName], self.envFile) + # if dynamic config file exists, freshconfig it. + if dynamic_file_exist: + refresh_cmd = "gs_om -t refreshconf" + hostSsh.getSshStatusOutput(refresh_cmd, [hostName], self.envFile) + self.cleanSshToolFile(hostSsh) + self.logger.log("End to generate and send cluster static file.\n") + + self.logger.log("Expansion results:") + self.getExistingHosts(False) + for newHost in self.context.newHostList: + if newHost in self.existingHosts: + self.logger.log("%s:\tSuccess" % newHost) + else: + self.logger.log("%s:\tFailed" % newHost) + + def getGUCConfig(self): + """ + get guc config of each node: + replconninfo[index] + """ + clusterInfoDict = self.context.clusterInfoDict + hostIpList = list(self.existingHosts) + for host in self.expansionSuccess: + hostIpList.append(host) + hostNames = [] + for host in hostIpList: + hostNames.append(self.context.backIpNameMap[host]) + + gucDict = {} + for hostName in hostNames: + localeHostInfo = clusterInfoDict[hostName] + index = 1 + guc_tempate_str = "source %s; " % self.envFile + for remoteHost in hostNames: + if remoteHost == hostName: + continue + remoteHostInfo = clusterInfoDict[remoteHost] + guc_repl_template = """\ +gs_guc set -D {dn} -c "replconninfo{index}=\ +'localhost={localhost} localport={localport} \ +localheartbeatport={localeHeartPort} \ +localservice={localservice} \ +remotehost={remoteNode} \ +remoteport={remotePort} \ +remoteheartbeatport={remoteHeartPort} \ +remoteservice={remoteservice}'" + """.format(dn=localeHostInfo["dataNode"], + index=index, + localhost=localeHostInfo["sshIp"], + localport=localeHostInfo["localport"], + localeHeartPort=localeHostInfo["heartBeatPort"], + localservice=localeHostInfo["localservice"], + remoteNode=remoteHostInfo["sshIp"], + remotePort=remoteHostInfo["localport"], + remoteHeartPort=remoteHostInfo["heartBeatPort"], + remoteservice=remoteHostInfo["localservice"]) + guc_tempate_str += guc_repl_template + index += 1 + + gucDict[hostName] = guc_tempate_str + return gucDict + + def checkGaussdbAndGsomVersionOfStandby(self): + """ + check whether gaussdb and gs_om version of standby are same with priamry + """ + standbyHosts = list(self.context.newHostList) + envFile = self.envFile + if self.context.standbyLocalMode: + for host in standbyHosts: + self.expansionSuccess[host] = True + self.logger.log("Checking gaussdb and gs_om version.") + getGaussdbVersionCmd = "source %s;gaussdb --version" % envFile + getGsomVersionCmd = "source %s;gs_om --version" % envFile + gaussdbVersionPattern = re.compile("gaussdb \((.*)\) .*") + gsomVersionPattern = re.compile("gs_om \(.*\) .*") + primaryHostName = self.getPrimaryHostName() + sshPrimary = SshTool([primaryHostName]) + resultMap, outputCollect = sshPrimary.getSshStatusOutput( + getGaussdbVersionCmd, [], envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + if resultMap[primaryHostName] != DefaultValue.SUCCESS: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35707"] % + ("gaussdb", "primary")) + primaryGaussdbVersion = gaussdbVersionPattern.findall(outputCollect)[0] + resultMap, outputCollect = sshPrimary.getSshStatusOutput( + getGsomVersionCmd, [], envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + if resultMap[primaryHostName] != DefaultValue.SUCCESS: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35707"] % + ("gs_om", "primary")) + primaryGsomVersion = gsomVersionPattern.findall(outputCollect)[0] + self.cleanSshToolFile(sshPrimary) + + failCheckGaussdbVersionHosts = [] + failCheckGsomVersionHosts = [] + wrongGaussdbVersionHosts = [] + wrongGsomVersionHosts = [] + for host in standbyHosts: + if not self.expansionSuccess[host]: + continue + sshTool = SshTool([host]) + # get gaussdb version + resultMap, outputCollect = sshTool.getSshStatusOutput( + getGaussdbVersionCmd, [], envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + if resultMap[host] != DefaultValue.SUCCESS: + self.expansionSuccess[host] = False + failCheckGaussdbVersionHosts.append(host) + else: + gaussdbVersion = gaussdbVersionPattern.findall(outputCollect)[0] + if gaussdbVersion != primaryGaussdbVersion: + self.expansionSuccess[host] = False + wrongGaussdbVersionHosts.append(host) + self.cleanSshToolFile(sshTool) + continue + # get gs_om version + resultMap, outputCollect = sshTool.getSshStatusOutput( + getGsomVersionCmd, [], envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + if resultMap[host] != DefaultValue.SUCCESS: + self.expansionSuccess[host] = False + failCheckGsomVersionHosts.append(host) + else: + gsomVersion = gsomVersionPattern.findall(outputCollect)[0] + if gsomVersion != primaryGsomVersion: + self.expansionSuccess[host] = False + wrongGsomVersionHosts.append(host) + self.cleanSshToolFile(sshTool) + if failCheckGaussdbVersionHosts: + self.logger.log(ErrorCode.GAUSS_357["GAUSS_35707"] % + ("gaussdb", ", ".join(failCheckGaussdbVersionHosts))) + if failCheckGsomVersionHosts: + self.logger.log(ErrorCode.GAUSS_357["GAUSS_35707"] % + ("gs_om", ", ".join(failCheckGsomVersionHosts))) + if wrongGaussdbVersionHosts: + self.logger.log(ErrorCode.GAUSS_357["GAUSS_35708"] % + ("gaussdb", ", ".join(wrongGaussdbVersionHosts))) + if wrongGsomVersionHosts: + self.logger.log(ErrorCode.GAUSS_357["GAUSS_35708"] % + ("gs_om", ", ".join(wrongGsomVersionHosts))) + self.logger.log("End to check gaussdb and gs_om version.\n") + if self._isAllFailed(): + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % + "check gaussdb and gs_om version") + + def preInstall(self): + """ + preinstall on new hosts. + """ + self.logger.log("Start to preinstall database on new nodes.") + self.sendSoftToHosts() + self.generateAndSendXmlFile() + self.preInstallOnHosts() + self.logger.log("End to preinstall database on new nodes.\n") + if self._isAllFailed(): + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "preinstall") + + def clearTmpFile(self): + """ + clear temporary file after expansion success + """ + self.logger.debug("start to delete temporary file %s" % self.tempFileDir) + clearCmd = "if [ -d '%s' ];then rm -rf %s;fi" % \ + (self.tempFileDir, self.tempFileDir) + hosts = self.existingHosts + self.context.newHostList + try: + sshTool = SshTool(hosts) + result, output = sshTool.getSshStatusOutput(clearCmd, + hosts, self.envFile) + self.logger.debug(output) + self.cleanSshToolFile(sshTool) + except Exception as e: + self.logger.debug(str(e)) + self.cleanSshToolFile(sshTool) + + + def cleanSshToolFile(self, sshTool): + """ + """ + try: + sshTool.clenSshResultFiles() + except Exception as e: + self.logger.debug(str(e)) + + + def checkNodesDetail(self): + """ + """ + self.checkUserAndGroupExists() + self.checkXmlFileAccessToUser() + self.checkClusterStatus() + self.validNodeInStandbyList() + self.checkXMLConsistency() + + def checkXMLConsistency(self): + """ + Check whether XML information is consistent with cluster information + """ + self.logger.debug("Checking whether XML information is " + "consistent with cluster information") + self._checkDataNodes() + self._checkAvailableZone() + + def _checkDataNodes(self): + """ + check datanodes + """ + self.logger.debug("Checking the consistence of datanodes.") + primaryName = self.getPrimaryHostName() + cmd = "" + if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"): + cmd = "su - %s -c 'source %s;gs_om -t status --detail'" % \ + (self.user, self.envFile) + else: + cmd = "su - %s -c 'source /etc/profile;source %s;"\ + "gs_om -t status --detail'" % (self.user, self.envFile) + sshTool = SshTool([primaryName]) + resultMap, outputCollect = sshTool.getSshStatusOutput(cmd, + [primaryName], self.envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + if resultMap[primaryName] != DefaultValue.SUCCESS: + GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51600"]) + self.cleanSshToolFile(sshTool) + pos = outputCollect.rfind("-----") + pos += len("-----") + 1 + allNodesState = outputCollect[pos:] + nodeStates = re.split('(?:\|)|(?:\n)', allNodesState) + dataNodes = {} + for nodeState in nodeStates: + pattern = re.compile("[ ]+[^ ]+[ ]+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[ ]+[^ ]+[ ]+([^ ]+)[ ]+") + result = pattern.findall(nodeState) + if len(result) != 0: + result = result[0] + if len(result) != 0: + dataNodes[result[0]] = result[1] + clusterInfoDict = self.context.clusterInfoDict + backIpNameMap = self.context.backIpNameMap + for hostIp in self.existingHosts: + hostName = backIpNameMap[hostIp] + dataNode = clusterInfoDict[hostName]["dataNode"] + if dataNode != dataNodes[hostIp]: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35711"] % + ("dataNode of %s" % hostIp)) + + def _checkAvailableZone(self): + """ + check available_zone + """ + self.logger.debug("Checking the consistence of azname") + clusterInfoDict = self.context.clusterInfoDict + backIpNameMap = self.context.backIpNameMap + hostAzNameMap = self.context.hostAzNameMap + primary = self.getPrimaryHostName() + for hostIp in self.existingHosts: + hostName = backIpNameMap[hostIp] + if hostName == primary: + continue + dataNode = clusterInfoDict[hostName]["dataNode"] + if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"): + cmd = "su - %s -c 'source %s;" \ + "gs_guc check -D %s -c \"available_zone\"'" % \ + (self.user, self.envFile, dataNode) + else: + cmd = "su - %s -c 'source /etc/profile;source %s;" \ + "gs_guc check -D %s -c \"available_zone\"'" % \ + (self.user, self.envFile, dataNode) + sshTool = SshTool([hostIp]) + resultMap, output = sshTool.getSshStatusOutput(cmd, + [hostIp], self.envFile) + self.logger.debug(resultMap) + self.logger.debug(output) + if resultMap[hostIp] != DefaultValue.SUCCESS: + GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51600"]) + self.cleanSshToolFile(sshTool) + azPattern = re.compile("available_zone='(.*)'") + azName = azPattern.findall(output) + if len(azName) != 0: + azName = azName[0] + if azName != hostAzNameMap[hostIp]: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35711"] % + ("azName of %s" % hostIp)) + + def checkClusterStatus(self): + """ + Check whether the cluster status is normal before expand. + """ + self.logger.debug("Start to check cluster status.") + + curHostName = socket.gethostname() + command = "" + if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"): + command = "su - %s -c 'source %s;gs_om -t status --detail'" % \ + (self.user, self.envFile) + else: + command = "su - %s -c 'source /etc/profile;source %s;"\ + "gs_om -t status --detail'" % (self.user, self.envFile) + sshTool = SshTool([curHostName]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [curHostName], self.envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + if outputCollect.find("Primary Normal") == -1: + GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51600"]) + + self.logger.debug("The primary database is normal.\n") + currentWalKeepSegments = self.queryPrimaryWalKeepSegments() + if currentWalKeepSegments != "NULL": + self.walKeepSegments = int(currentWalKeepSegments) + else: + self.walKeepSegments = 16 + + def _adjustOrderOfNewHostList(self): + """ + Adjust the order of hostlist so that + standby comes first and cascade standby comes last + """ + newHostList = self.context.newHostList + newHostCasRoleMap = self.context.newHostCasRoleMap + i, j = 0, len(newHostList) - 1 + while i < j: + while i < j and newHostCasRoleMap[newHostList[i]] == "off": + i += 1 + while i < j and newHostCasRoleMap[newHostList[j]] == "on": + j -= 1 + newHostList[i], newHostList[j] = newHostList[j], newHostList[i] + i += 1 + j -= 1 + + def validNodeInStandbyList(self): + """ + check if the node has been installed in the cluster. + """ + self.logger.debug("Start to check if the nodes in standby list.") + self.getExistingHosts() + newHostList = self.context.newHostList + existedNewHosts = \ + [host for host in newHostList if host in self.existingHosts] + if existedNewHosts: + newHostList = \ + [host for host in newHostList if host not in existedNewHosts] + self.context.newHostList = newHostList + self.expansionSuccess = {} + for host in newHostList: + self.expansionSuccess[host] = False + self.logger.log("These nodes [%s] are already in the cluster. " + "Skip expand these nodes." % ",".join(existedNewHosts)) + if len(newHostList) == 0: + self.logger.log("There is no node can be expanded.") + sys.exit(0) + self._adjustOrderOfNewHostList() + + def checkXmlFileAccessToUser(self): + """ + Check if the xml config file has readable access to user. + """ + userInfo = pwd.getpwnam(self.user) + uid = userInfo.pw_uid + gid = userInfo.pw_gid + + xmlFile = self.context.xmlFile + fstat = os.stat(xmlFile) + mode = fstat[stat.ST_MODE] + if (fstat[stat.ST_UID] == uid and (mode & stat.S_IRUSR > 0)) or \ + (fstat[stat.ST_GID] == gid and (mode & stat.S_IRGRP > 0)): + pass + else: + self.logger.debug(ErrorCode.GAUSS_501["GAUSS_50100"] + % (xmlFile, self.user)) + os.chown(xmlFile, uid, gid) + os.chmod(xmlFile, stat.S_IRUSR) + + def checkUserAndGroupExists(self): + """ + check system user and group exists and be same + on primary and standby nodes + """ + inputUser = self.user + inputGroup = self.group + + user_group_id = "" + isUserExits = False + localHost = socket.gethostname() + for user in pwd.getpwall(): + if user.pw_name == self.user: + user_group_id = user.pw_gid + isUserExits = True + break + if not isUserExits: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \ + % ("User", self.user, localHost)) + + isGroupExits = False + group_id = "" + for group in grp.getgrall(): + if group.gr_name == self.group: + group_id = group.gr_gid + isGroupExits = True + if not isGroupExits: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \ + % ("Group", self.group, localHost)) + if user_group_id != group_id: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35712"] + % (self.user, self.group)) + + hostNames = self.context.newHostList + envfile = self.envFile + sshTool = SshTool(hostNames) + + #get username in the other standy nodes + getUserNameCmd = "cat /etc/passwd | grep -w %s" % inputUser + resultMap, outputCollect = sshTool.getSshStatusOutput(getUserNameCmd, + [], envfile) + + for hostKey in resultMap: + if resultMap[hostKey] == STATUS_FAIL: + self.cleanSshToolFile(sshTool) + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \ + % ("User", self.user, hostKey)) + + #get groupname in the other standy nodes + getGroupNameCmd = "cat /etc/group | grep -w %s" % inputGroup + resultMap, outputCollect = sshTool.getSshStatusOutput(getGroupNameCmd, + [], envfile) + for hostKey in resultMap: + if resultMap[hostKey] == STATUS_FAIL: + self.cleanSshToolFile(sshTool) + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \ + % ("Group", self.group, hostKey)) + self.cleanSshToolFile(sshTool) + + + def installAndExpansion(self): + """ + install database and expansion standby node with db om user + """ + pvalue = Value('i', 0) + proc = Process(target=self.installProcess, args=(pvalue,)) + proc.start() + proc.join() + if not pvalue.value: + sys.exit(1) + else: + proc.terminate() + + def installProcess(self, pvalue): + # change to db manager user. the below steps run with db manager user. + self.changeUser() + + if not self.context.standbyLocalMode: + self.logger.log("Start to install database on new nodes.") + self.installDatabaseOnHosts() + self.logger.log("Database on standby nodes installed finished.\n") + self.checkGaussdbAndGsomVersionOfStandby() + self.logger.log("Start to establish the relationship.") + self.buildStandbyRelation() + # process success + pvalue.value = 1 + + def rollback(self): + """ + rollback all hosts' replconninfo about failed hosts + """ + self.getExistingHosts() + failedHosts = list(set(self.context.newHostList) - set(self.existingHosts)) + clusterInfoDict = self.context.clusterInfoDict + for failedHost in failedHosts: + # rollback GRPC cert on failed hosts + self.logger.debug("Start to rollback GRPC cert of %s" % failedHost) + appPath = DefaultValue.getInstallDir(self.user) + removeGRPCCertCmd = "ls %s/share/sslcert/grpc/* | grep -v openssl.cnf | " \ + "xargs rm -rf" % appPath + sshTool = SshTool([failedHost]) + sshTool.getSshStatusOutput(removeGRPCCertCmd, [failedHost]) + self.cleanSshToolFile(sshTool) + for host in self.expansionSuccess: + if not self.expansionSuccess[host]: + sshTool = SshTool([host]) + sshTool.getSshStatusOutput(removeGRPCCertCmd, [host], self.envFile) + self.cleanSshToolFile(sshTool) + self.logger.debug("Start to rollback replconninfo about %s" % failedHost) + for host in self.existingHosts: + hostName = self.context.backIpNameMap[host] + dataNode = clusterInfoDict[hostName]["dataNode"] + confFile = os.path.join(dataNode, "postgresql.conf") + rollbackReplconninfoCmd = "sed -i '/remotehost=%s/s/^/#&/' %s" \ + % (failedHost, confFile) + self.logger.debug("[%s] rollbackReplconninfoCmd:%s" % (host, + rollbackReplconninfoCmd)) + sshTool = SshTool([host]) + sshTool.getSshStatusOutput(rollbackReplconninfoCmd, [host]) + pg_hbaFile = os.path.join(dataNode, "pg_hba.conf") + rollbackPg_hbaCmd = "sed -i '/%s/s/^/#&/' %s" \ + % (failedHost, pg_hbaFile) + self.logger.debug("[%s] rollbackPg_hbaCmd:%s" % (host, + rollbackPg_hbaCmd)) + sshTool.getSshStatusOutput(rollbackPg_hbaCmd, [host]) + reloadGUCCommand = "su - %s -c 'source %s; gs_ctl reload " \ + "-D %s'" % (self.user, self.envFile, dataNode) + self.logger.debug(reloadGUCCommand) + resultMap, outputCollect = sshTool.getSshStatusOutput( + reloadGUCCommand, [host], self.envFile) + self.logger.debug(resultMap) + self.logger.debug(outputCollect) + self.cleanSshToolFile(sshTool) + + def _isAllFailed(self): + """ + check whether all new hosts preinstall/install/build failed + """ + for host in self.expansionSuccess: + if self.expansionSuccess[host]: + return False + return True + + def run(self): + """ + start expansion + """ + self.checkNodesDetail() + # preinstall on standby nodes with root user. + if not self.context.standbyLocalMode: + self.preInstall() + + self.installAndExpansion() + self.logger.log("Expansion Finish.") + + +class GsCtlCommon: + + def __init__(self, expansion): + """ + """ + self.logger = expansion.logger + self.user = expansion.user + + def queryInstanceStatus(self, host, datanode, env): + """ + """ + command = "source %s ; gs_ctl query -D %s" % (env, datanode) + sshTool = SshTool([datanode]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [host], env) + self.logger.debug(outputCollect) + localRole = re.findall(r"local_role.*: (.*?)\n", outputCollect) + db_state = re.findall(r"db_state.*: (.*?)\n", outputCollect) + + insType = "" + + if(len(localRole)) == 0: + insType = "" + else: + insType = localRole[0] + + dbStatus = "" + if(len(db_state)) == 0: + dbStatus = "" + else: + dbStatus = db_state[0] + self.cleanSshToolTmpFile(sshTool) + return insType.strip().lower(), dbStatus.strip().lower() + + def stopInstance(self, host, datanode, env): + """ + """ + command = "source %s ; gs_ctl stop -D %s" % (env, datanode) + sshTool = SshTool([host]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [host], env) + self.logger.debug(host) + self.logger.debug(outputCollect) + self.cleanSshToolTmpFile(sshTool) + + def startInstanceWithMode(self, host, datanode, mode, env): + """ + """ + command = "source %s ; gs_ctl start -D %s -M %s" % (env, datanode, mode) + self.logger.debug(command) + sshTool = SshTool([host]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [host], env) + self.logger.debug(host) + self.logger.debug(outputCollect) + self.cleanSshToolTmpFile(sshTool) + return resultMap, outputCollect + + def buildInstance(self, host, datanode, mode, env): + command = "source %s ; gs_ctl build -D %s -M %s" % (env, datanode, mode) + self.logger.debug(command) + sshTool = SshTool([host]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [host], env) + self.logger.debug(host) + self.logger.debug(outputCollect) + self.cleanSshToolTmpFile(sshTool) + + def startOmCluster(self, host, env): + """ + om tool start cluster + """ + command = "source %s ; gs_om -t start" % env + self.logger.debug(command) + sshTool = SshTool([host]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [host], env) + self.logger.debug(host) + self.logger.debug(outputCollect) + self.cleanSshToolTmpFile(sshTool) + + def queryOmCluster(self, host, env): + """ + query om cluster detail with command: + gs_om -t status --detail + """ + command = "source %s ; gs_om -t status --detail" % env + sshTool = SshTool([host]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [host], env) + self.logger.debug(host) + self.logger.debug(outputCollect) + if resultMap[host] == STATUS_FAIL: + GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51600"] + + "Please check the cluster status or source the environmental" + " variables of user [%s]." % self.user) + self.cleanSshToolTmpFile(sshTool) + return outputCollect + + def queryGucParaValue(self, host, env, datanode, para, user=""): + """ + query guc parameter value + """ + value = "" + command = "" + if user: + command = "su - %s -c 'source %s; gs_guc check -D %s -c \"%s\"'" % \ + (user, env, datanode, para) + else: + command = "source %s; gs_guc check -D %s -c \"%s\"" % \ + (env, datanode, para) + sshTool = SshTool([host]) + resultMap, outputCollect = sshTool.getSshStatusOutput( + command, [host], env) + self.logger.debug(host) + self.logger.debug(outputCollect) + if resultMap[host] == STATUS_FAIL: + return resultMap[host], "" + self.cleanSshToolTmpFile(sshTool) + paraPattern = re.compile(" %s=(.+)" % para) + value = paraPattern.findall(outputCollect) + if len(value) != 0: + value = value[0] + else: + value = "NULL" + return resultMap[host], value + + def setGucPara(self, host, env, datanode, para, value, user=""): + """ + set guc parameter + """ + command = "" + if not user: + command = "source %s; gs_guc set -D %s -c \"%s=%s\"" % \ + (env, datanode, para, value) + else: + command = "su - %s -c 'source %s; gs_guc set -D %s -c \"%s=%s\"'" % \ + (user, env, datanode, para, value) + sshTool = SshTool([host]) + resultMap, outputCollect = sshTool.getSshStatusOutput( + command, [host], env) + self.logger.debug(host) + self.logger.debug(outputCollect) + self.cleanSshToolTmpFile(sshTool) + return resultMap[host] + + def cleanSshToolTmpFile(self, sshTool): + """ + """ + try: + sshTool.clenSshResultFiles() + except Exception as e: + self.logger.debug(str(e)) \ No newline at end of file diff --git a/script/impl/expansion/__init__.py b/script/impl/expansion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py new file mode 100644 index 0000000..02c7477 --- /dev/null +++ b/script/impl/install/InstallImpl.py @@ -0,0 +1,583 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_install is a utility to deploy a Gauss200 server. +############################################################################# +import os +import sys + +sys.path.append(sys.path[0] + "/../../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.os.gsfile import g_file +from gspylib.common.DbClusterInfo import dbNodeInfo, \ + dbClusterInfo, compareObject + +############################################################################# +# Const variables +# INSTALL_STEP: the signal about install +# STEPBACKUP_DIR: the backup directory storage step information +# STEP_INIT: the signal about install +# STEP_INSTALL: the signal about install +# STEP_CONFIG: the signal about install +# STEP_START: the signal about install +############################################################################# +INSTALL_STEP = "" +STEPBACKUP_DIR = "" +STEP_INIT = "Init Install" +STEP_INSTALL = "Install cluster" +STEP_CONFIG = "Config cluster" +STEP_START = "Start cluster" + +############################################################################# +# TP cluster type +############################################################################# + +##################################################### +# Ation type +##################################################### +ACTION_INSTALL_CLUSTER = "install_cluster" +ACTION_START_CLUSTER = "start_cluster" +ACTION_BUILD_STANDBY = "build_standby" +ACTION_BUILD_CASCADESTANDBY = "build_cascadestandby" + +# exit code +EXEC_SUCCESS = 0 + + +############################################################################# +# Global variables +# self.context.logger: globle logger +# self.context.clusterInfo: global clueter information +# self.context.sshTool: globle ssh tool interface +############################################################################# + +class InstallImpl: + """ + The class is used to do perform installation + """ + """ + init the command options + save command line parameter values + """ + + def __init__(self, install): + """ + function: constructor + """ + self.context = install + + def run(self): + """ + function: run method + """ + try: + # check timeout time. + # Notice: time_out is not supported under TP branch + self.checkTimeout() + + # check if have done preinstall for this user on every node + self.checkGaussenvFlag() + # check the clueter status + self.checkClusterStatus() + # creating the backup directory + self.prepareBackDir() + # Check time consistency(only TP use it must less 2s) + self.checkTimeConsistency() + # install clueter + self.context.logger.log("begin deploy..") + self.doDeploy() + self.context.logger.log("end deploy..") + # close the log file + self.context.logger.closeLog() + except Exception as e: + GaussLog.exitWithError(str(e)) + + def checkTimeout(self): + """ + function: check timeout + """ + pass + + def checkGaussenvFlag(self): + """ + function: check if have done preinstall for this user on every node + 1 PREINSTALL_FLAG + 2 INSTALL_FLAG + input : NA + output: NA + """ + try: + self.context.logger.log("Check preinstall on every node.", + "addStep") + self.context.checkPreInstall(self.context.user, "preinstall") + self.context.logger.log( + "Successfully checked preinstall on every node.", "constant") + except Exception as e: + self.context.logger.logExit(str(e)) + + def checkClusterStatus(self): + """ + function: Check if cluster is running + input : NA + output: NA + """ + pass + + def checkTimeConsistency(self): + """ + Check time consistency between hosts in cluster + :return: NA + """ + pass + + def prepareBackDir(self): + """ + function: Creating the backup directory + input : NA + output: NA + """ + self.context.logger.log("Creating the backup directory.", "addStep") + self.context.managerOperateStepDir() + + # if INSTALL_STEP is exists + if (os.path.exists(self.context.operateStepFile)): + # read the step from INSTALL_STEP + warmstep = self.context.readOperateStep() + # print the step + self.context.logger.log("Last time end with %s." % warmstep) + self.context.logger.log("Continue this step.") + + # Successfully created the backup directory + self.context.logger.log("Successfully created the backup directory.", + "constant") + + def checkPgLogFileMode(self): + """ + function: change pg_log file mode + input : NA + output: NA + """ + pass + + def compareOldNewClusterConfigInfo(self, clusterInfo, oldClusterInfo): + """ + function: verify cluster config info between old and new cluster + input : clusterInfo, oldClusterInfo + output: NA + """ + + # covert new cluster information to compare cluster + compnew = self.storageDbClusterInfo(clusterInfo) + # covert old cluster information to compare cluster + compold = self.storageDbClusterInfo(oldClusterInfo) + # do compare + # if it is not same, print it. + theSame, tempbuffer = compareObject(compnew, compold, "clusterInfo", + []) + if (theSame): + self.context.logger.debug( + "Static configuration matched with old " + "static configuration files.") + else: + msg = \ + "Instance[%s] are not the same." \ + "\nXmlConfigFile: %s\nStaticConfigFile: %s\n" % \ + (tempbuffer[0], tempbuffer[1], tempbuffer[2]) + self.context.logger.log( + "The cluster's static configuration " + "does not match the new configuration file.") + self.context.logger.log(msg.strip("\n")) + return theSame + + def storageDbClusterInfo(self, dbclusterInfo): + """ + function: covert to comp cluster + input : dbclusterInfo + output: midClusterInfo + """ + # init dbcluster class + midClusterInfo = dbClusterInfo() + # get cluster name + midClusterInfo.name = dbclusterInfo.name + for dbnode in dbclusterInfo.dbNodes: + compNodeInfo = dbNodeInfo() + compNodeInfo.azName = dbnode.azName + compNodeInfo.name = dbnode.name + midClusterInfo.dbNodes.append(compNodeInfo) + return midClusterInfo + + def doDeploy(self): + """ + function: Deploy Application + input : NA + output: NA + """ + # read the install setp from INSTALL_STEP + self.context.logger.debug("Installing application") + # compare xmlconfigInfo with staticConfigInfo + gaussHome = DefaultValue.getInstallDir(self.context.user) + commonStaticConfigFile = "%s/bin/cluster_static_config" % gaussHome + if os.path.exists(commonStaticConfigFile): + self.context.oldClusterInfo = dbClusterInfo() + self.context.oldClusterInfo.initFromStaticConfig( + self.context.user, + commonStaticConfigFile) + sameFlag = self.compareOldNewClusterConfigInfo( + self.context.clusterInfo, self.context.oldClusterInfo) + else: + sameFlag = True + + step = self.context.readOperateStep() + # if step is STEP_INSTALL + if (step == STEP_INSTALL) or (step == STEP_CONFIG and not sameFlag): + # rollback the install + self.rollbackInstall() + # write the install step + self.context.writeOperateStep(STEP_INIT) + + # read the install step from INSTALL_STEP + step = self.context.readOperateStep() + # if step is STEP_INIT + if step == STEP_INIT: + # write the install step STEP_INSTALL into INSTALL_STEP + self.context.writeOperateStep(STEP_INSTALL) + # install Gauss200 DB + self.doInstall() + # write the install step STEP_CONFIG into INSTALL_STEP + self.context.writeOperateStep(STEP_CONFIG) + + # read the install step from INSTALL_STEP + step = self.context.readOperateStep() + # if step is STEP_CONFIG + if step == STEP_CONFIG: + # config Gauss200 DB + self.doConfig() + # write the install step STEP_CONFIG into STEP_START + self.context.writeOperateStep(STEP_START) + + # read the install step from INSTALL_STEP + step = self.context.readOperateStep() + # if step is STEP_START + if step == STEP_START: + # start Gauss200 DB + self.doStart() + # change pg_log file mode in pg_log path (only AP) + self.checkPgLogFileMode() + + # clear the backup directory. + self.context.managerOperateStepDir("delete") + self.context.logger.log("Successfully installed application.") + + def prepareInstallCluster(self): + """ + prepared install cluster + AP: distribute package + and Check installation environment on all nodes + TP: skip + """ + pass + + def installClusterApp(self): + """ + function: install cluster instance + input : NA + output: NA + """ + self.context.logger.log("Installing applications on all nodes.") + # Installing applications + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s -t %s -U %s -X %s -R %s -c %s -l %s %s" % ( + OMCommand.getLocalScript("Local_Install"), + ACTION_INSTALL_CLUSTER, + self.context.user + ":" + self.context.group, + self.context.xmlFile, + self.context.clusterInfo.appPath, self.context.clusterInfo.name, + self.context.localLog, + self.getCommandOptions()) + self.context.logger.debug( + "Command for installing application: %s" % cmd) + + # exec the cmd for install application on all nodes + DefaultValue.execCommandWithMode(cmd, + "Install applications", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + self.context.logger.log("Successfully installed APP.") + + def doInstall(self): + """ + function: do install + input: NA + output: NA + """ + self.context.logger.log("Installing the cluster.", "addStep") + try: + # prepared install cluster + # AP: distribute package + print("begin prepare Install Cluster..") + self.prepareInstallCluster() + except Exception as e: + self.context.managerOperateStepDir("delete") + self.context.logger.logExit(str(e)) + + try: + # install cluster APP + # AP: 1. check env 2. tar -xvcf PAKCAGE 3. modefy env flag + self.context.logger.log("begin install Cluster..") + self.installClusterApp() + self.context.logger.log("begin init Instance..") + self.initInstance() + self.configZenithInst() + self.context.logger.log("encrypt cipher and rand files " + "for database.") + initPasswd = self.getPasswdFromInitParam() + self.context.genCipherAndRandFile(None, initPasswd) + self.context.logger.log("begin to create CA cert files") + self.context.createServerCa() + if not self.context.localMode: + self.context.createGrpcCa() + + except Exception as e: + self.context.logger.logExit(str(e)) + + # Cluster installation is completed + self.context.logger.log("Cluster installation is completed.", + "constant") + + def getPasswdFromInitParam(self): + """ + function: get passwd from init-parameter + return: passwd + get passwd from --gsinit-parameter. if the passwd has been assigned, + the database will install with non-interactive. + """ + if len(self.context.dbInitParam) == 0: + return None + passwd = None + pwdIndex = -1 + for idx, param in enumerate(self.context.dbInitParam): + if param.startswith("--pwpasswd="): + passwd = param[11:] + pwdIndex = idx + break + elif param.startswith("-w="): + passwd = param[3:] + pwdIndex = idx + break + + # remove initpasswd from dbInitParam. + # otherwise it will be printed in log. + if pwdIndex > -1: + self.context.dbInitParam.pop(pwdIndex) + return passwd + + def configZenithInst(self): + """ + function: config zenith inst + :return: + """ + pass + + def initInstance(self): + """ + function: init instance + :return: + """ + pass + + def getCommandOptions(self): + """ + function: get command options + """ + pass + + def checkNodeConfig(self): + """ + function: Check node config on all nodes + input : NA + output: NA + """ + pass + + # for ap + def prepareConfigCluster(self): + """ + function: install cluster instance + input : NA + output: NA + """ + pass + + def initNodeInstance(self): + """ + function: init instance applications + input : NA + output: NA + """ + pass + + def configInstance(self): + """ + function: config instance + input : NA + output: NA + """ + pass + + def distributeRackInfo(self): + """ + function: Distributing the rack Information File + input : NA + output: NA + """ + pass + + def doConfig(self): + """ + function: Do config action + input : NA + output: NA + """ + self.context.logger.log("Configuring.", "addStep") + try: + # prepared config cluster + # AP: clean instance directory and check node config + self.prepareConfigCluster() + self.initNodeInstance() + self.configInstance() + self.distributeRackInfo() + DefaultValue.enableWhiteList( + self.context.sshTool, + self.context.mpprcFile, + self.context.clusterInfo.getClusterNodeNames(), + self.context.logger) + except Exception as e: + # failed to clear the backup directory + self.context.logger.logExit(str(e)) + # Configuration is completed + self.context.logger.log("Configuration is completed.", "constant") + + def startCluster(self): + """ + function: start cluster + input : NA + output: NA + """ + # Start cluster applications + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s -t %s -U %s -X %s -R %s -c %s -l %s %s" % ( + OMCommand.getLocalScript("Local_Install"), + ACTION_START_CLUSTER, + self.context.user + ":" + self.context.group, + self.context.xmlFile, + self.context.clusterInfo.appPath, + self.context.clusterInfo.name, self.context.localLog, + self.getCommandOptions()) + self.context.logger.debug("Command for start cluster: %s" % cmd) + DefaultValue.execCommandWithMode( + cmd, + "Start cluster", + self.context.sshTool, + self.context.isSingle or self.context.localMode, + self.context.mpprcFile) + + # build stand by + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s -t %s -U %s -X %s -R %s -c %s -l %s %s" % ( + OMCommand.getLocalScript("Local_Install"), + ACTION_BUILD_STANDBY, + self.context.user + ":" + self.context.group, + self.context.xmlFile, + self.context.clusterInfo.appPath, + self.context.clusterInfo.name, self.context.localLog, + self.getCommandOptions()) + self.context.logger.debug("Command for build standby: %s" % cmd) + DefaultValue.execCommandWithMode( + cmd, + "Build standby", + self.context.sshTool, + self.context.isSingle or self.context.localMode, + self.context.mpprcFile) + + # build casecadestand by + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s -t %s -U %s -X %s -R %s -c %s -l %s %s" % ( + OMCommand.getLocalScript("Local_Install"), + ACTION_BUILD_CASCADESTANDBY, + self.context.user + ":" + self.context.group, + self.context.xmlFile, + self.context.clusterInfo.appPath, + self.context.clusterInfo.name, self.context.localLog, + self.getCommandOptions()) + self.context.logger.debug("Command for build cascade standby: %s" % cmd) + for hostname in self.context.sshTool.hostNames: + DefaultValue.execCommandWithMode( + cmd, + "Build cascade standby", + self.context.sshTool, + self.context.isSingle or self.context.localMode, + self.context.mpprcFile, [hostname]) + + self.context.logger.log("Successfully started cluster.") + + def doStart(self): + """ + function:start cluster + input : NA + output: NA + """ + self.context.logger.debug("Start the cluster.", "addStep") + try: + tmpGucFile = "" + tmpGucPath = DefaultValue.getTmpDirFromEnv(self.context.user) + tmpGucFile = "%s/tmp_guc" % tmpGucPath + cmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( + tmpGucFile, tmpGucFile) + DefaultValue.execCommandWithMode(cmd, "Install applications", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + # start cluster in non-native mode + self.startCluster() + except Exception as e: + self.context.logger.logExit(str(e)) + self.context.logger.debug("Successfully started the cluster.", + "constant") + + def rollbackInstall(self): + """ + function: Rollback install + input : NA + output: NA + 0 succeed + 1 failed + 2 rollback succeed + 3 rollback failed + """ + pass + + # for olap + def deleteTempFileForUninstall(self): + """ + function: Rollback install ,delete temporary file + input : NA + output: NA + """ + pass diff --git a/script/impl/install/OLAP/InstallImplOLAP.py b/script/impl/install/OLAP/InstallImplOLAP.py new file mode 100644 index 0000000..ac2668c --- /dev/null +++ b/script/impl/install/OLAP/InstallImplOLAP.py @@ -0,0 +1,490 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_install is a utility to deploy a Gauss200 server. +############################################################################# +import subprocess +import os +import sys + +sys.path.append(sys.path[0] + "/../../../") +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.VersionInfo import VersionInfo +from gspylib.os.gsfile import g_file +from impl.install.InstallImpl import InstallImpl + +ROLLBACK_FAILED = 3 + + +class InstallImplOLAP(InstallImpl): + """ + The class is used to do perform installation + """ + """ + init the command options + save command line parameter values + """ + + def __init__(self, install): + """ + function: constructor + """ + super(InstallImplOLAP, self).__init__(install) + + def checkTimeout(self): + """ + function: check timeout + input: NA + output: NA + """ + if (self.context.time_out is None): + # if --time-out is null + self.context.time_out = DefaultValue.TIMEOUT_CLUSTER_START + else: + if (not str(self.context.time_out).isdigit()): + # --time-out is not a digit + raise Exception(ErrorCode.GAUSS_500["GAUSS_50003"] % ( + "-time-out", "a nonnegative integer")) + self.context.time_out = int(self.context.time_out) + if ( + self.context.time_out <= 0 + or self.context.time_out >= 2147483647): + # --time-out is not a int + raise Exception( + ErrorCode.GAUSS_500["GAUSS_50004"] % "-time-out") + + def deleteTempFileForUninstall(self): + """ + function: Rollback install ,delete temporary file + input : NA + output: NA + """ + # Deleting temporary file + self.context.logger.debug("Deleting temporary file.") + tmpFile = "/tmp/temp.%s" % self.context.user + cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (tmpFile, tmpFile) + DefaultValue.execCommandWithMode(cmd, + "delete temporary file", + self.context.sshTool, + self.context.isSingle) + self.context.logger.debug("Successfully deleted temporary file.") + + def prepareInstallCluster(self): + """ + function: prepared install cluster + AP: distribute package + and Check installation environment on all nodes + TP: skip + """ + if (not self.context.dws_mode and not self.context.isSingle): + # distribute package to every host + self.context.distributeFiles() + self.checkNodeInstall() + + def getCommandOptions(self): + """ + function: get command options + input: NA + output: NA + """ + opts = "" + if self.context.alarm_component != "": + opts += " --alarm=%s " % self.context.alarm_component + if self.context.time_out is not None: + opts += " --time_out=%d " % self.context.time_out + return opts + + def prepareConfigCluster(self): + """ + function: install cluster instance + input : NA + output: NA + """ + self.context.cleanNodeConfig() + self.checkNodeConfig() + + def checkNodeConfig(self): + """ + function: Check node config on all nodes + input : NA + output: NA + """ + self.context.logger.log("Checking node configuration on all nodes.") + # Check node config on all nodes + cmdParam = "" + for param in self.context.dataGucParam: + cmdParam += " -D \\\"%s\\\"" % param + + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s -U %s -l %s %s" % ( + OMCommand.getLocalScript("Local_Check_Config"), self.context.user, + self.context.localLog, cmdParam) + self.context.logger.debug( + "Command for checking node configuration: %s." % cmd) + + cmd = self.singleCmd(cmd) + + DefaultValue.execCommandWithMode(cmd, + "check node configuration", + self.context.sshTool, + self.context.isSingle) + self.context.logger.debug("Successfully checked node configuration.") + + def checkNodeInstall(self): + """ + function: check node install + input: NA + output: NA + """ + self.context.logger.debug("Checking node's installation.", "constant") + # Checking node's installation + self.context.logger.log( + "Checking the installation environment on all nodes.", "constant") + # Checking the installation environment + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s -U %s -R %s -l %s -X %s" % ( + OMCommand.getLocalScript("Local_Check_Install"), + self.context.user + ":" + self.context.group, + self.context.clusterInfo.appPath, + self.context.localLog, self.context.xmlFile) + self.context.logger.debug( + "Command for checking installation: %s." % cmd) + + cmd = self.singleCmd(cmd) + + DefaultValue.execCommandWithMode(cmd, + "check installation environment", + self.context.sshTool, + self.context.isSingle) + self.context.logger.debug("Successfully checked node's installation.", + "constant") + + def initNodeInstance(self): + """ + function: init instance applications + input : NA + output: NA + """ + self.context.logger.log("Initializing instances on all nodes.") + # init instance applications + cmdParam = "" + # get the --gsinit-parameter parameter values + for param in self.context.dbInitParam: + cmdParam += " -P \\\"%s\\\"" % param + + cmd = "source %s;" % self.context.mpprcFile + # init instances on all nodes + cmd += "%s -U %s %s -l %s" % ( + OMCommand.getLocalScript("Local_Init_Instance"), self.context.user, + cmdParam, self.context.localLog) + self.context.logger.debug( + "Command for initializing instances: %s" % cmd) + + cmd = self.singleCmd(cmd) + + DefaultValue.execCommandWithMode(cmd, + "initialize instances", + self.context.sshTool, + self.context.isSingle) + self.context.logger.debug("Successfully initialized node instance.") + + def configInstance(self): + """ + function: config instance + input : NA + output: NA + """ + # config instance applications + self.updateInstanceConfig() + self.updateHbaConfig() + + def checkMemAndCores(self): + """ + function: memCheck and coresCheck + input : NA + output : False/True + """ + self.context.logger.log( + "Check consistence of memCheck and coresCheck on database nodes.") + self.context.logger.debug( + "Check whether the memory " + "and CPU cores of database nodes meet the requirements.") + self.context.logger.debug("If all database nodes meet follows : ") + self.context.logger.debug("memory=128G and CPU logic_cores=16") + self.context.logger.debug( + "Then we don't use default guc set xmlFile : guc_list.xml") + checkConsistence = False + data_check_info = {} + if self.context.isSingle: + return False + all_dn = [] + for dataNode in self.context.clusterInfo.dbNodes: + if len(dataNode.datanodes) > 0: + all_dn.append(dataNode) + self.context.logger.debug( + "Check consistence of memCheck and coresCheck on database node: %s" + % [node.name for node in all_dn]) + for dbNode in all_dn: + memCheck = "cat /proc/cpuinfo | grep processor | wc -l" + coresCheck = "free -g --si | grep 'Mem' | awk -F ' ' '{print \$2}'" + cmd = "pssh -s -H %s \"%s & %s\"" % ( + dbNode.name, memCheck, coresCheck) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or len(output.strip().split()) != 2: + self.context.logger.debug( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % str( + output)) + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % str( + output)) + data_check_info[dbNode.name] = str(output).strip().split() + self.context.logger.debug( + "The check info on each node. \nNode : Info(MemSize | CPUCores)") + for each_node, check_info in data_check_info.items(): + self.context.logger.debug("%s : %s" % (each_node, check_info)) + try: + if len(set([",".join(value) for value in + list(data_check_info.values())])) == 1: + coresNum = int(list(data_check_info.values())[0][0]) + memSize = int(list(data_check_info.values())[0][1]) + if (coresNum == 16 and memSize >= 124 and memSize <= 132): + checkConsistence = True + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53023"] % str(e)) + self.context.logger.log( + "Successful check consistence of memCheck " + "and coresCheck on all nodes.") + return checkConsistence + + def updateInstanceConfig(self): + """ + function: Update instances config on all nodes + input : NA + output: NA + """ + self.context.logger.log( + "Updating instance configuration on all nodes.") + # update instances config on all nodes + cmdParam = "" + paralistdn = [param.split('=')[0].strip() for param in + self.context.dataGucParam] + if ("autovacuum" not in paralistdn): + self.context.dataGucParam.append("autovacuum=on") + + # get the --dn-guc parameter values + for param in self.context.dataGucParam: + cmdParam += "*==SYMBOL==*-D*==SYMBOL==*%s" % param + # check the --alarm-component parameter + if (self.context.alarm_component != ""): + cmdParam += "*==SYMBOL==*--alarm=%s" % self.context.alarm_component + + # create tmp file for guc parameters + # comm_max_datanode and max_process_memory + self.context.logger.debug("create tmp_guc file.") + tmpGucPath = DefaultValue.getTmpDirFromEnv(self.context.user) + tmpGucFile = "%s/tmp_guc" % tmpGucPath + cmd = g_file.SHELL_CMD_DICT["createFile"] % ( + tmpGucFile, DefaultValue.MAX_DIRECTORY_MODE, tmpGucFile) + DefaultValue.execCommandWithMode(cmd, "Install applications", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + self.context.logger.debug("Create tmp_guc file successfully.") + + # get the master datanode number + primaryDnNum = DefaultValue.getPrimaryDnNum(self.context.clusterInfo) + self.context.logger.debug( + "get master datanode number : %s" % primaryDnNum) + # get the physic memory of all node and choose the min one + physicMemo = DefaultValue.getPhysicMemo(self.context.sshTool, + self.context.isSingle) + self.context.logger.debug("get physic memory value : %s" % physicMemo) + # get the datanode number in all nodes and choose the max one + dataNodeNum = DefaultValue.getDataNodeNum(self.context.clusterInfo) + self.context.logger.debug("get min datanode number : %s" % dataNodeNum) + + # write the value in tmp file + self.context.logger.debug("Write value in tmp_guc file.") + gucValueContent = str(primaryDnNum) + "," + str( + physicMemo) + "," + str(dataNodeNum) + cmd = g_file.SHELL_CMD_DICT["overWriteFile"] % ( + gucValueContent, tmpGucFile) + DefaultValue.execCommandWithMode(cmd, "Install applications", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + self.context.logger.debug("Write tmp_guc file successfully.") + + # update instances config + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s " % (OMCommand.getLocalScript("Local_Config_Instance")) + paraLine = \ + "*==SYMBOL==*-U*==SYMBOL==*%s%s*==SYMBOL==*-l*==SYMBOL==*%s" % ( + self.context.user, cmdParam, self.context.localLog) + if (self.context.dws_mode): + paraLine += "*==SYMBOL==*--dws-mode" + # get the --gucXml parameter + if (self.checkMemAndCores()): + paraLine += "*==SYMBOL==*--gucXml" + paraLine += "*==SYMBOL==*-X*==SYMBOL==*%s" % self.context.xmlFile + cmd += DefaultValue.encodeParaline(paraLine, DefaultValue.BASE_ENCODE) + + self.context.logger.debug( + "Command for updating instances configuration: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "update instances configuration", + self.context.sshTool, + self.context.isSingle) + self.context.logger.debug("Successfully configured node instance.") + + def updateHbaConfig(self): + """ + function: config Hba instance + input : NA + output: NA + """ + self.context.logger.log("Configuring pg_hba on all nodes.") + + # Configuring pg_hba + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s -U %s -X '%s' -l '%s' " % ( + OMCommand.getLocalScript("Local_Config_Hba"), self.context.user, + self.context.xmlFile, self.context.localLog) + self.context.logger.debug( + "Command for configuring Hba instance: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "config Hba instance", + self.context.sshTool, + self.context.isSingle) + self.context.logger.debug("Successfully configured HBA.") + + def rollbackInstall(self): + """ + function: Rollback install + input : NA + output: NA + 0 succeed + 1 failed + 2 rollback succeed + 3 rollback failed + """ + # Rollback install + self.context.logger.log("Rolling back.") + try: + self.deleteTempFileForUninstall() + # Rollback install + cmd = "source %s;" % self.context.mpprcFile + cmd += "%s -U %s -R '%s' -l '%s' -T" % ( + OMCommand.getLocalScript("Local_Uninstall"), self.context.user, + os.path.realpath(self.context.clusterInfo.appPath), + self.context.localLog) + self.context.logger.debug("Command for rolling back: %s." % cmd) + # exec the cmd for rollback + (status, output) = self.context.sshTool.getSshStatusOutput(cmd) + for ret in list(status.values()): + if (ret != DefaultValue.SUCCESS): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd + "Error:\n%s" % str(output)) + self.context.logger.debug(output) + except Exception as e: + # failed to roll back + self.context.logger.error(str(e)) + sys.exit(ROLLBACK_FAILED) + # Rollback succeeded + self.context.logger.log("Rollback succeeded.") + + def checkPgLogFileMode(self): + """ + function: change pg_log file mode + input : NA + output: NA + """ + try: + userDir = "%s/%s" % ( + self.context.clusterInfo.logPath, self.context.user) + # change log file mode + ClusterCommand.getchangeFileModeCmd(userDir) + except Exception as e: + raise Exception(str(e)) + + def checkClusterStatus(self): + """ + function: Check if cluster is running + input : NA + output: NA + """ + # Check if cluster is running + self.context.logger.debug("Checking the cluster status.", "addStep") + try: + cmd = ClusterCommand.getQueryStatusCmd(self.context.user, "", "", + False) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + # You can find the cluster status, + # indicating that the cluster is installed, and exit the error. + self.context.logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_516["GAUSS_51625"] + + " Can not do install now.") + else: + self.context.logger.debug( + "Successfully checked the cluster status.", "constant") + except Exception as e: + self.context.logger.debug("Failed to check cluster status. " + "and the cluster may be not installed.") + + def singleCmd(self, cmd): + """ + function: remove symbol \ if in single mode. + input : cmd + output: str + """ + # remove symbol \ if in single mode. + if (self.context.isSingle): + cmd = cmd.replace("\\", "") + return cmd + + def distributeRackInfo(self): + """ + function: Distributing the rack Information File + input : NA + output: NA + """ + node_names = self.context.clusterInfo.getClusterNodeNames() + DefaultValue.distributeRackFile(self.context.sshTool, node_names) + + def deleteSymbolicAppPath(self): + """ + function: delete symbolic app path + input : NA + output : NA + """ + self.context.logger.debug("Delete symbolic link $GAUSSHOME.") + versionFile = VersionInfo.get_version_file() + commitid = VersionInfo.get_version_info(versionFile)[2] + cmd = "rm -rf %s" % self.context.clusterInfo.appPath + self.context.clusterInfo.appPath = \ + self.context.clusterInfo.appPath + "_" + commitid + DefaultValue.execCommandWithMode(cmd, "Delete symbolic link", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + self.context.logger.debug( + "Successfully delete symbolic link $GAUSSHOME, cmd: %s." % cmd) diff --git a/script/impl/install/OLAP/__init__.py b/script/impl/install/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/install/__init__.py b/script/impl/install/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py new file mode 100644 index 0000000..1d326d5 --- /dev/null +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -0,0 +1,350 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : omManagerImplOLAP.py is a utility to manage a Gauss200 cluster. +############################################################################# +import subprocess +import sys +import re +import time + +sys.path.append(sys.path[0] + "/../../../../") +from gspylib.common.DbClusterInfo import dbClusterInfo, queryCmd +from gspylib.threads.SshTool import SshTool +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue +from gspylib.common.OMCommand import OMCommand +from impl.om.OmImpl import OmImpl +from gspylib.os.gsfile import g_file + +# action type +ACTION_CHANGEIP = "changeip" + +# tmp file that storage change io information +CHANGEIP_BACKUP_DIR = "%s/change_ip_bak" % DefaultValue.getTmpDirFromEnv() +# cluster_static_config file +CHANGEIP_BAK_STATIC = "%s/cluster_static_config" % CHANGEIP_BACKUP_DIR +# daily alarm timeout waiting for other nodes to complete +DAILY_ALARM_TIME_OUT = 300 +# daily alarm result file validity time +DAILY_ALARM_FILE_VALIDITY_TIME = 60 * 60 * 1 +DAILY_ALARM_OUT_FILE = "" +# The shell script with check remote result file change time +DAILY_ALARM_SHELL_FILE = "/tmp/om_dailyAlarm_%s.sh" % \ + DefaultValue.GetHostIpOrName() +# The tmp file with cluster status +DAILY_ALARM_STATUS_FILE = "/tmp/gauss_cluster_status_dailyAlarm.dat" + +ISOLATE_TIMEOUT = 180 +KILL_SESSION = "select pg_terminate_backend(pid) " \ + "from pg_stat_activity where state " \ + "in ('active', 'fastpath function call', 'retrying') and " \ + "query not like '%terminate%' " \ + "and application_name not " \ + "in('JobScheduler','WorkloadMonitor'," \ + "'workload','WLMArbiter','cm_agent');" +QUERY_SESSION = "select pid from pg_stat_activity" \ + " where state " \ + "in ('active', 'fastpath function call', 'retrying') and " \ + "query not like '%terminate%' " \ + "and application_name " \ + "not in('JobScheduler','WorkloadMonitor'" \ + ",'workload','WLMArbiter','cm_agent');" + + +########################################### +class OmImplOLAP(OmImpl): + """ + class: OmImplOLAP + """ + + def __init__(self, OperationManager=None): + """ + function:class init + input:OperationManager + output:NA + """ + OmImpl.__init__(self, OperationManager) + + def checkNode(self): + """ + function: check if the current node is to be uninstalled + input : NA + output: NA + """ + if (len( + self.context.g_opts.nodeInfo) != 0 + and self.context.g_opts.hostname == + DefaultValue.GetHostIpOrName()): + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51631"] % "coordinate" + + "\nPlease perform this operation on other nodes " + "because this node will be deleted.") + + # AP + def stopCluster(self): + """ + function:Stop cluster + input:NA + output:NA + """ + self.logger.log("Stopping the cluster.") + # Stop cluster in 300 seconds + cmd = "source %s; %s -t %d" % ( + self.context.g_opts.mpprcFile, OMCommand.getLocalScript("Gs_Stop"), + DefaultValue.TIMEOUT_CLUSTER_STOP) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.log( + "Warning: Failed to stop cluster within 300 seconds," + "stopping cluster again at immediate mode.") + cmd = "source %s; %s -m immediate -t %d" % ( + self.context.g_opts.mpprcFile, + OMCommand.getLocalScript("Gs_Stop"), + DefaultValue.TIMEOUT_CLUSTER_STOP) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.log("The cmd is %s " % cmd) + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51610"] + % "the cluster at immediate mode" + + " Error: \n%s" % output) + + self.logger.log("Successfully stopped the cluster.") + + # AP + def startCluster(self): + """ + function:Start cluster + input:NA + output:NA + """ + self.logger.log("Starting the cluster.", "addStep") + # Delete cluster dynamic config if it is exist on all nodes + clusterDynamicConf = "%s/bin/cluster_dynamic_config" \ + % self.oldClusterInfo.appPath + cmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( + clusterDynamicConf, clusterDynamicConf) + self.logger.debug( + "Command for removing the cluster dynamic configuration: %s." + % cmd) + self.sshTool.executeCommand(cmd, "remove dynamic configuration") + # Start cluster in 300 seconds + cmd = "source %s; %s -t %s" % ( + self.context.g_opts.mpprcFile, + OMCommand.getLocalScript("Gs_Start"), + DefaultValue.TIMEOUT_CLUSTER_START) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.debug("The cmd is %s " % cmd) + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51607"] + % "the cluster" + " Error: \n%s" % output) + + self.logger.log("Successfully started the cluster.", "constant") + + ########################################################################## + # Start Flow + ########################################################################## + def getNodeId(self): + """ + function: get node Id + input: NA + output: NA + """ + clusterType = "cluster" + nodeId = 0 + if (self.context.g_opts.nodeName != ""): + clusterType = "node" + dbNode = self.context.clusterInfo.getDbNodeByName( + self.context.g_opts.nodeName) + if not dbNode: + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51619"] + % self.context.g_opts.nodeName) + nodeId = dbNode.id + elif (self.context.g_opts.azName != ""): + clusterType = self.context.g_opts.azName + # check whether the given azName is in the cluster + if ( + self.context.g_opts.azName + not in self.context.clusterInfo.getazNames()): + raise Exception( + ErrorCode.GAUSS_500["GAUSS_50004"] + % '-az' + " The az name [%s] is not in the cluster." + % self.context.g_opts.azName) + return nodeId, clusterType + + def doStartCluster(self): + """ + function: do start cluster + input: NA + output: NA + """ + self.logger.debug("Operating: Starting.") + # Specifies the stop node + # Gets the specified node id + startType = "node" if self.context.g_opts.nodeName != "" else "cluster" + # Perform a start operation + self.logger.log("Starting %s." % startType) + self.logger.log("=========================================") + hostName = DefaultValue.GetHostIpOrName() + # get the newest dynaminc config and send to other node + self.clusterInfo.checkClusterDynamicConfig(self.context.user, hostName) + if self.context.g_opts.nodeName == "": + hostList = self.clusterInfo.getClusterNodeNames() + else: + hostList = [] + hostList.append(self.context.g_opts.nodeName) + self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, + DefaultValue.TIMEOUT_CLUSTER_START) + if self.time_out is None: + time_out = DefaultValue.TIMEOUT_CLUSTER_START + else: + time_out = self.time_out + cmd = "source %s; %s -U %s -R %s -t %s --security-mode=%s" % ( + self.context.g_opts.mpprcFile, + OMCommand.getLocalScript("Local_StartInstance"), + self.context.user, self.context.clusterInfo.appPath, time_out, + self.context.g_opts.security_mode) + if self.dataDir != "": + cmd += " -D %s" % self.dataDir + failedOutput = '' + for nodeName in hostList: + (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, [nodeName]) + if statusMap[nodeName] != 'Success': + failedOutput += output + elif re.search("another server might be running", output): + self.logger.log(output) + elif re.search("] WARNING:", output): + tmp = '\n'.join(re.findall(".*] WARNING:.*", output)) + self.logger.log(output[0:output.find(":")] + '\n' + tmp) + if len(failedOutput): + self.logger.log("=========================================") + raise Exception( + ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, failedOutput)) + if startType == "cluster": + starttime = time.time() + cluster_state = "" + cmd = "source %s; gs_om -t status|grep cluster_state" \ + % self.context.g_opts.mpprcFile + while time.time() <= 30 + starttime: + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster" + + " After startup, check cluster_state failed") + else: + cluster_state = output.split()[-1] + if cluster_state != "Normal": + self.logger.log("Waiting for check cluster state...") + time.sleep(5) + else: + break + if cluster_state != "Normal": + raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster" + + " After startup, the last check results were" + " %s. Please check manually." + % cluster_state) + self.logger.log("=========================================") + self.logger.log("Successfully started.") + self.logger.debug("Operation succeeded: Start.") + + def doStopCluster(self): + """ + function: do stop cluster + input: NA + output: NA + """ + self.logger.debug("Operating: Stopping.") + # Specifies the stop node + # Gets the specified node id + stopType = "node" if self.context.g_opts.nodeName != "" else "cluster" + # Perform a stop operation + self.logger.log("Stopping %s." % stopType) + self.logger.log("=========================================") + if self.context.g_opts.nodeName == "": + hostList = self.clusterInfo.getClusterNodeNames() + else: + hostList = [] + hostList.append(self.context.g_opts.nodeName) + self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, + DefaultValue.TIMEOUT_CLUSTER_START) + if self.time_out is None: + time_out = DefaultValue.TIMEOUT_CLUSTER_STOP + else: + time_out = self.time_out + cmd = "source %s; %s -U %s -R %s -t %s" % ( + self.context.g_opts.mpprcFile, + OMCommand.getLocalScript("Local_StopInstance"), + self.context.user, self.context.clusterInfo.appPath, time_out) + if self.dataDir != "": + cmd += " -D %s" % self.dataDir + if self.mode != "": + cmd += " -m %s" % self.mode + (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, hostList) + for nodeName in hostList: + if statusMap[nodeName] != 'Success': + raise Exception( + ErrorCode.GAUSS_536["GAUSS_53606"] % (cmd, output)) + self.logger.log("Successfully stopped %s." % stopType) + + self.logger.log("=========================================") + self.logger.log("End stop %s." % stopType) + self.logger.debug("Operation succeeded: Stop.") + + def doView(self): + """ + function:get cluster node info + input:NA + output:NA + """ + # view static_config_file + self.context.clusterInfo.printStaticConfig(self.context.user, + self.context.g_opts.outFile) + + def doQuery(self): + """ + function: do query + input : NA + output : NA + """ + hostName = DefaultValue.GetHostIpOrName() + sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames()) + cmd = queryCmd() + if (self.context.g_opts.outFile != ""): + cmd.outputFile = self.context.g_opts.outFile + self.context.clusterInfo.queryClsInfo(hostName, sshtool, + self.context.mpprcFile, cmd) + + def doRefreshConf(self): + """ + function: do refresh conf + input : NA + output : NA + """ + if self.context.clusterInfo.isSingleNode(): + self.logger.log( + "No need to generate dynamic configuration file for one node.") + return + self.logger.log("Generating dynamic configuration file for all nodes.") + hostname = DefaultValue.GetHostIpOrName() + sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames()) + self.context.clusterInfo.doRefreshConf(self.context.user, hostname, + sshtool) + + self.logger.log("Successfully generated dynamic configuration file.") diff --git a/script/impl/om/OLAP/__init__.py b/script/impl/om/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/om/OmImpl.py b/script/impl/om/OmImpl.py new file mode 100644 index 0000000..4dcaa74 --- /dev/null +++ b/script/impl/om/OmImpl.py @@ -0,0 +1,949 @@ +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : gs_om is a utility to manage a Gauss200 cluster. +############################################################################# + +import subprocess +import os +import sys +import pwd + +from datetime import datetime + +sys.path.append(sys.path[0] + "/../../../") +from gspylib.common.DbClusterInfo import dbClusterInfo, queryCmd +from gspylib.threads.SshTool import SshTool +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.OMCommand import OMCommand +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsfile import g_file +from gspylib.os.gsplatform import g_Platform + +# Cert +EMPTY_CERT = "emptyCert" +EMPTY_FLAG = "emptyflag" + +# tmp file that storage change io information +CHANGEIP_BACKUP_DIR = "%s/change_ip_bak" % DefaultValue.getTmpDirFromEnv() + +# EC odbc files +ODBC_INI = "odbc.ini" +ODBC_SYS_INI = "odbcsys" +FC_CONF = "fc_conf/" + +# postgis lib file +POSTGIS_FILE_LIST = ["lib/postgresql/postgis-[0-9]+.[0-9]+.so", + "lib/libgeos_c.so.[0-9]+", "lib/libproj.so.[0-9]+", + "lib/libjson-c.so.[0-9]+", + "lib/libgeos-[0-9]+.[0-9]+.[0-9]+so", + "lib/libgcc_s.so.[0-9]+", + "lib/libstdc\+\+.so.[0-9]+", + "share/postgresql/extension/postgis--[0-9]" + "+.[0-9]+.[0-9]+.sql", + "share/postgresql/extension/postgis.control", + "bin/pgsql2shp", "bin/shp2pgsql"] + +# elastic group with node group name +ELASTIC_GROUP = "elastic_group" + +# lib of sparkodbc +LIB_SPARK = ["/usr/lib64/libboost_filesystem.so.1.55.0", + "/usr/lib64/libboost_system.so.1.55.0", + "/usr/lib64/libfb303.so", "/usr/lib64/libhiveclient.so", + "/usr/lib64/libhiveclient.so.1.0.0", + "/usr/lib64/libodbchive.so", "/usr/lib64/libodbchive.so.1.0.0", + "/usr/lib64/libsasl2.so.2", + "/usr/lib64/libsasl2.so.2.0.23", "/usr/lib64/libthrift-0.9.3.so", + "/usr/lib64/libsasl2.so.2.0.22"] + + +########################################### +class OmImpl: + """ + init the command options + save command line parameter values + """ + + def __init__(self, OperationManager): + """ + function: constructor + """ + # global + self.context = OperationManager + self.logger = OperationManager.logger + self.user = OperationManager.user + self.newClusterInfo = None + self.oldClusterInfo = None + self.utilsPath = None + self.mpprcFile = "" + self.nodeId = OperationManager.g_opts.nodeId + self.time_out = OperationManager.g_opts.time_out + self.mode = OperationManager.g_opts.mode + self.clusterInfo = OperationManager.clusterInfo + self.dataDir = OperationManager.g_opts.dataDir + self.sshTool = None + + def doStopCluster(self): + """ + function: do stop cluster + input: NA + output: NA + """ + pass + + def doClusterStatus(self): + """ + function: get cluster + input: NA + output: NA + """ + pass + + def doStart(self): + """ + function:Start cluster or node + input:NA + output:NA + """ + self.doStartCluster() + + def doStop(self): + """ + function:Stop cluster or node + input:NA + output:NA + """ + self.logger.debug("Operating: Stopping.") + self.doStopCluster() + + def getNodeStatus(self, nodename): + """ + function: get node status + input: nodename + output: NA + """ + try: + # Create a temporary file to save cluster status + tmpDir = DefaultValue.getTmpDirFromEnv() + tmpFile = os.path.join(tmpDir, "gauss_cluster_status.dat_" + \ + str(datetime.now().strftime( + '%Y%m%d%H%M%S')) + "_" + str( + os.getpid())) + + # Perform the start operation + # Writes the execution result to a temporary file + cmd = ClusterCommand.getQueryStatusCmd(self.context.g_opts.user, + "", tmpFile, True) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % \ + cmd + "Error: \n%s" % output) + + # Initialize cluster status information for the temporary file + clusterStatus = DbClusterStatus() + clusterStatus.initFromFile(tmpFile) + + # Get node status + nodeStatusInfo = None + for dbNode in clusterStatus.dbNodes: + if (dbNode.name == nodename): + nodeStatusInfo = dbNode + if (nodeStatusInfo and nodeStatusInfo.isNodeHealthy()): + nodeStatus = clusterStatus.OM_NODE_STATUS_NORMAL + else: + nodeStatus = clusterStatus.OM_NODE_STATUS_ABNORMAL + + DefaultValue.cleanTmpFile(tmpFile) + return nodeStatus + except Exception as e: + DefaultValue.cleanTmpFile(tmpFile) + self.logger.debug( + "Failed to get node status. Error: \n%s." % str(e)) + return "Abnormal" + + def doStatus(self): + """ + function:Get the status of cluster or node + input:NA + output:NA + """ + hostName = DefaultValue.GetHostIpOrName() + sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames()) + nodeId = 0 + if (self.context.g_opts.nodeName != ""): + for dbnode in self.context.clusterInfo.dbNodes: + if dbnode.name == self.context.g_opts.nodeName: + nodeId = dbnode.id + if (nodeId == 0): + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51619"] + % self.context.g_opts.nodeName) + cmd = queryCmd() + if (self.context.g_opts.outFile != ""): + cmd.outputFile = self.context.g_opts.outFile + else: + cmd.outputFile = self.logger.logFile + if (self.context.g_opts.show_detail): + if ( + self.context.clusterInfo.clusterType + == DefaultValue.CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY): + cmd.dataPathQuery = True + cmd.azNameQuery = True + else: + cmd.dataPathQuery = True + else: + if (nodeId > 0): + self.context.clusterInfo.queryNodeInfo(sshtool, hostName, + nodeId, cmd.outputFile) + return + if (self.context.g_opts.showAll): + self.context.clusterInfo.queryNodeInfo(sshtool, hostName, + nodeId, cmd.outputFile) + return + cmd.clusterStateQuery = True + self.context.clusterInfo.queryClsInfo(hostName, sshtool, + self.context.mpprcFile, cmd) + + def doRebuildConf(self): + """ + generating static configuration files for all nodes + input:NA + output:NA + """ + try: + self.logger.log( + "Generating static configuration files for all nodes.") + # Initialize the cluster information according to the XML file + self.context.clusterInfo = dbClusterInfo() + self.context.clusterInfo.initFromXml(self.context.g_opts.confFile) + + # 1.create a tmp dir + self.logger.log( + "Creating temp directory to store static configuration files.") + dirName = os.path.dirname(os.path.realpath(__file__)) + tmpDirName = os.path.realpath( + "%s/../../static_config_files" % dirName) + cmd = "mkdir -p -m %s '%s'" % ( + DefaultValue.KEY_DIRECTORY_MODE, tmpDirName) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50208"] + % "temporary directory" + "\nCommand:%s\nError: %s" + % (cmd, output)) + self.logger.log("Successfully created the temp directory.") + + # create static files + self.logger.log("Generating static configuration files.") + for dbNode in self.context.clusterInfo.dbNodes: + staticConfigPath = "%s/cluster_static_config_%s" % ( + tmpDirName, dbNode.name) + self.context.clusterInfo.saveToStaticConfig(staticConfigPath, + dbNode.id) + self.logger.log( + "Successfully generated static configuration files.") + self.logger.log( + "Static configuration files for all nodes are saved in %s." + % tmpDirName) + + # check if need send static config files + if not self.context.g_opts.distribute: + self.logger.debug( + "No need to distribute static configuration files " + "to installation directory.") + return + + # distribute static config file + self.logger.log( + "Distributing static configuration files to all nodes.") + for dbNode in self.context.clusterInfo.dbNodes: + if (dbNode.name != DefaultValue.GetHostIpOrName()): + cmd = \ + "pscp -H %s '%s'/cluster_static_config_%s '%s'" \ + "/bin/cluster_static_config" % ( + dbNode.name, tmpDirName, + dbNode.name, self.context.clusterInfo.appPath) + else: + cmd = \ + "cp '%s'/cluster_static_config_%s '%s'" \ + "/bin/cluster_static_config" % ( + tmpDirName, + dbNode.name, self.context.clusterInfo.appPath) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50216"] + % "static configuration file" + + "Node: %s.\nCommand: \n%s\nError: \n%s" + % (dbNode.name, cmd, output)) + self.logger.log( + "Successfully distributed static configuration files.") + + except Exception as e: + g_file.removeDirectory(tmpDirName) + raise Exception(str(e)) + + ########################################################################## + # doReplaceSSLCert start + ########################################################################## + def doReplaceSSLCert(self): + """ + function: replace ssl cert files + input: NA + output: NA + """ + try: + # Initialize the cluster information according to the xml file + self.context.clusterInfo = dbClusterInfo() + self.context.clusterInfo.initFromStaticConfig( + g_OSlib.getPathOwner(self.context.g_opts.certFile)[0]) + self.sshTool = SshTool( + self.context.clusterInfo.getClusterNodeNames(), + self.logger.logFile) + except Exception as e: + raise Exception(str(e)) + + try: + self.logger.log("Starting ssl cert files replace.", "addStep") + tempDir = os.path.join(DefaultValue.getTmpDirFromEnv(), + "tempCertDir") + + # unzip files to temp directory + if (os.path.exists(tempDir)): + g_file.removeDirectory(tempDir) + g_file.createDirectory(tempDir, True, + DefaultValue.KEY_DIRECTORY_MODE) + g_file.decompressZipFiles(self.context.g_opts.certFile, tempDir) + + realCertList = DefaultValue.CERT_FILES_LIST + clientCertList = DefaultValue.CLIENT_CERT_LIST + # check file exists + for clientCert in clientCertList: + sslFile = os.path.join(tempDir, clientCert) + if (not os.path.isfile(sslFile)): + raise Exception( + (ErrorCode.GAUSS_502["GAUSS_50201"] % sslFile) + \ + "Missing SSL client cert file in ZIP file.") + + certList = [] + dnDict = self.getDnNodeDict() + for cert in realCertList: + sslFile = os.path.join(tempDir, cert) + + if (not os.path.isfile( + sslFile) and cert != DefaultValue.SSL_CRL_FILE): + raise Exception( + (ErrorCode.GAUSS_502["GAUSS_50201"] % sslFile) + \ + "Missing SSL server cert file in ZIP file.") + if (os.path.isfile(sslFile)): + certList.append(cert) + + # distribute cert files to datanodes + self.doDNBackup() + self.distributeDNCert(certList, dnDict) + + # clear temp directory + g_file.removeDirectory(tempDir) + if (not self.context.g_opts.localMode): + self.logger.log( + "Successfully distributed cert files on all nodes.") + except Exception as e: + g_file.removeDirectory(tempDir) + raise Exception(str(e)) + + def isDnEmpty(self, nodeName=""): + """ + function: Is there exists empty file in dbnodes directory. + input: node name + output: True/False + """ + allDnNodeDict = self.getDnNodeDict() + nodeDnDir = allDnNodeDict[nodeName] + emptyCert = os.path.join(nodeDnDir, EMPTY_CERT) + status = self.sshTool.checkRemoteFileExist( + nodeName, emptyCert, + self.context.g_opts.mpprcFile) + return status + + def doDNBackup(self): + """ + function: backup SSL cert files on single_inst cluster. + input: backupFlag is a flag of exist DB in node + output: NA + """ + self.logger.log("Backing up old ssl cert files.") + + backupList = DefaultValue.CERT_FILES_LIST[:] + allDnNodeDict = self.getDnNodeDict() + normalNodeList = [] + + tarBackupList = [] + if (self.context.g_opts.localMode): + self.logger.debug("Backing up database node SSL cert files.") + nodeDnDir = allDnNodeDict[DefaultValue.GetHostIpOrName()] + backupFlagFile = os.path.join(nodeDnDir, "certFlag") + if (os.path.isfile(backupFlagFile)): + self.logger.log("There is no need to backup ssl cert files.") + return + + os.mknod(backupFlagFile, DefaultValue.KEY_FILE_PERMISSION) + for certFile in backupList: + realCertFile = os.path.join(nodeDnDir, certFile) + if (os.path.isfile(realCertFile)): + tarBackupList.append(certFile) + + if (len(tarBackupList) == 0): + os.mknod(os.path.join(nodeDnDir, EMPTY_CERT)) + cmd = " %s && " % g_Platform.getCdCmd(nodeDnDir) + cmd += g_Platform.getCompressFilesCmd( + DefaultValue.CERT_BACKUP_FILE, EMPTY_CERT) + else: + cmd = " %s && " % g_Platform.getCdCmd(nodeDnDir) + cmd += "tar -zcvf %s" % (DefaultValue.CERT_BACKUP_FILE) + for certFile in tarBackupList: + cmd += " %s" % certFile + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Failed backup gds cert files on local node." + + "Error: \n%s" % output) + + # Clear empty file + if (os.path.isfile(os.path.join(nodeDnDir, EMPTY_CERT))): + os.remove(os.path.join(nodeDnDir, EMPTY_CERT)) + self.logger.log("Successfully executed local backup.") + return + # 1 check backup flag file on all dbnodes. + for node in allDnNodeDict.keys(): + nodeDnDir = allDnNodeDict[node] + backupFlagFile = os.path.join(nodeDnDir, "certFlag") + status = self.sshTool.checkRemoteFileExist( + node, backupFlagFile, + self.context.g_opts.mpprcFile) + if not status: + normalNodeList.append(node) + # 2 if exists flag file on anyone node, there will be return. + if (len(normalNodeList) != len(allDnNodeDict.keys())): + self.logger.log("There is no need to backup on all dbnodes.") + return + # 3 backup cert files on all dbnodes. + for node in allDnNodeDict.keys(): + nodeDnDir = allDnNodeDict[node] + backupFlagFile = os.path.join(nodeDnDir, "certFlag") + backupTar = os.path.join(nodeDnDir, DefaultValue.CERT_BACKUP_FILE) + sshcmd = g_file.SHELL_CMD_DICT["overWriteFile"] % ( + "backupflagfile", backupFlagFile) + sshcmd += " && " + g_file.SHELL_CMD_DICT["changeMode"] % ( + DefaultValue.KEY_FILE_MODE, backupFlagFile) + self.sshTool.executeCommand(sshcmd, "Make a flag file of backup.", + DefaultValue.SUCCESS, [node], + self.context.g_opts.mpprcFile) + for certFile in backupList: + realCertFile = os.path.join(nodeDnDir, certFile) + status = self.sshTool.checkRemoteFileExist( + node, realCertFile, + self.context.g_opts.mpprcFile) + if status: + tarBackupList.append(certFile) + # if no cert files, + # there will be create a file for '.tar' file. + if (len(tarBackupList) == 0): + sshcmd = g_Platform.getCreateFileCmd( + os.path.join(nodeDnDir, EMPTY_CERT)) + self.sshTool.executeCommand(sshcmd, + "Backup empty cert file.", + DefaultValue.SUCCESS, [node], + self.context.g_opts.mpprcFile) + sshcmd = " %s && " % g_Platform.getCdCmd(nodeDnDir) + sshcmd += g_Platform.getCompressFilesCmd( + DefaultValue.CERT_BACKUP_FILE, EMPTY_CERT) + else: + sshcmd = " %s && " % g_Platform.getCdCmd(nodeDnDir) + sshcmd += "tar -zcvf %s" % (DefaultValue.CERT_BACKUP_FILE) + for certDir in tarBackupList: + sshcmd += " %s" % certDir + self.sshTool.executeCommand(sshcmd, "Backup cert file.", + DefaultValue.SUCCESS, [node], + self.context.g_opts.mpprcFile) + # Clear empty file + if (self.isDnEmpty(node)): + sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( + os.path.join(nodeDnDir, EMPTY_CERT), + os.path.join(nodeDnDir, EMPTY_CERT)) + self.sshTool.executeCommand(sshcmd, "Clear empty file.", + DefaultValue.SUCCESS, [node], + self.context.g_opts.mpprcFile) + self.logger.log( + "Successfully backup SSL cert files on [%s]." % node) + sshcmd = g_file.SHELL_CMD_DICT["changeMode"] % ( + DefaultValue.KEY_FILE_MODE, backupTar) + self.sshTool.executeCommand(sshcmd, "Chmod back up cert", + DefaultValue.SUCCESS, [node], + self.context.g_opts.mpprcFile) + + def doDNSSLCertRollback(self): + """ + function: rollback SSL cert file in DN instance directory + input: NA + output: NA + """ + self.context.clusterInfo = dbClusterInfo() + self.context.clusterInfo.initFromStaticConfig( + pwd.getpwuid(os.getuid()).pw_name) + self.sshTool = SshTool(self.context.clusterInfo.getClusterNodeNames(), + self.logger.logFile) + backupList = DefaultValue.CERT_FILES_LIST[:] + + allDnNodeDict = self.getDnNodeDict() + noBackupList = [] + + temp = "tempDir" + if self.context.g_opts.localMode: + if ((DefaultValue.GetHostIpOrName() in allDnNodeDict.keys()) and + os.path.isfile(os.path.join( + allDnNodeDict[DefaultValue.GetHostIpOrName()], + DefaultValue.CERT_BACKUP_FILE))): + + localDnDir = allDnNodeDict[DefaultValue.GetHostIpOrName()] + tempDir = os.path.join(localDnDir, temp) + if (os.path.exists(tempDir)): + g_file.removeDirectory(tempDir) + os.mkdir(tempDir, DefaultValue.KEY_DIRECTORY_PERMISSION) + + for certFile in backupList: + realCertFile = os.path.join(localDnDir, certFile) + if (os.path.exists(realCertFile)): + g_file.moveFile(realCertFile, tempDir) + + cmd = "cd '%s' && if [ -f '%s' ];then tar -zxvf %s;fi" % \ + (localDnDir, DefaultValue.CERT_BACKUP_FILE, + DefaultValue.CERT_BACKUP_FILE) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + cmd = "cp '%s'/* '%s' && rm -rf '%s'" % ( + tempDir, localDnDir, tempDir) + (status, output) = subprocess.getstatusoutput(cmd) + raise Exception( + (ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + + "Failed uncompression SSL backup file." + + "Error: \n%s" % output) + + # remove temp directory + if (os.path.exists(tempDir)): + g_file.removeDirectory(tempDir) + + # set guc option + if (os.path.isfile( + os.path.join(localDnDir, DefaultValue.SSL_CRL_FILE))): + cmd = \ + "gs_guc set -D %s " \ + "-c \"ssl_crl_file=\'%s\'\"" \ + % (localDnDir, DefaultValue.SSL_CRL_FILE) + else: + cmd = \ + "gs_guc set -D %s " \ + "-c \"ssl_crl_file=\'\'\"" % localDnDir + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Error: \n%s" % output) + + if (os.path.isfile(os.path.join(localDnDir, EMPTY_CERT))): + os.remove(os.path.join(localDnDir, EMPTY_CERT)) + + self.logger.log( + "Successfully rollback SSL cert files with local mode.") + return + else: + self.logger.log("There is not exists backup files.") + return + # 1.check backup file "gsql_cert_backup.tar.gz" on all dbnodes. + for node in allDnNodeDict.keys(): + backupGzFile = os.path.join(allDnNodeDict[node], + DefaultValue.CERT_BACKUP_FILE) + status = self.sshTool.checkRemoteFileExist( + node, backupGzFile, + self.context.g_opts.mpprcFile) + if not status: + noBackupList.append(node) + if (len(noBackupList) > 0): + raise Exception( + (ErrorCode.GAUSS_502["GAUSS_50201"] + % DefaultValue.CERT_BACKUP_FILE) + + "Can't rollback SSL cert files on %s." % noBackupList) + + # 2.perform rollback on all dbnodes. + for node in allDnNodeDict.keys(): + backupGzFile = os.path.join( + allDnNodeDict[node], DefaultValue.CERT_BACKUP_FILE) + # 2-1.move SSL cert files in dn directory to temp directory. + sshcmd = "cd '%s' && if [ -d '%s' ];then rm -rf '%s'" \ + " && mkdir '%s';else mkdir '%s';fi" % \ + (allDnNodeDict[node], temp, temp, temp, temp) + self.sshTool.executeCommand(sshcmd, "Make temp directory.", + DefaultValue.SUCCESS, \ + [node], self.context.g_opts.mpprcFile) + for certFile in backupList: + realCertFile = os.path.join(allDnNodeDict[node], certFile) + sshcmd = " %s && " % g_Platform.getCdCmd( + os.path.join(allDnNodeDict[node], temp)) + sshcmd += g_file.SHELL_CMD_DICT["renameFile"] % ( + realCertFile, realCertFile, "./") + self.sshTool.executeCommand( + sshcmd, + "Backup cert files to temp directory.", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + + # 2-2.uncompression "gsql_cert_backup.tar.gz" file + sshcmd = "cd '%s' && if [ -f '%s' ];then tar -zxvf %s;fi" % \ + (allDnNodeDict[node], DefaultValue.CERT_BACKUP_FILE, + DefaultValue.CERT_BACKUP_FILE) + self.sshTool.executeCommand(sshcmd, + "Unzip backup file.", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + + # 2-3.clear temp directory + sshcmd = " %s && " % g_Platform.getCdCmd(allDnNodeDict[node]) + sshcmd += g_file.SHELL_CMD_DICT["deleteDir"] % (temp, temp) + self.sshTool.executeCommand(sshcmd, + "Clear backup cert files.", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + + # 2-4.is have "sslcrl-file.crl",config 'ssl_crl_file' option + status = self.sshTool.checkRemoteFileExist( + node, os.path.join( + allDnNodeDict[node], + DefaultValue.SSL_CRL_FILE), + self.context.g_opts.mpprcFile) + # exists 'sslcrl-file.crl' file ,config option of 'postgresql.conf' + if (status): + if node == DefaultValue.GetHostIpOrName(): + sshcmd = \ + "gs_guc set -D %s " \ + "-c \"ssl_crl_file='%s'\"" \ + % (allDnNodeDict[node], DefaultValue.SSL_CRL_FILE) + else: + sshcmd = "gs_guc set -D %s " \ + "-c \"ssl_crl_file=\\\\\\'%s\\\\\\'\"" \ + % (allDnNodeDict[node], DefaultValue.SSL_CRL_FILE) + self.sshTool.executeCommand(sshcmd, + "Exist 'ssl_crl_file'", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + else: + if (node == DefaultValue.GetHostIpOrName()): + sshcmd = "gs_guc set " \ + "-D %s -c \"ssl_crl_file=''\"" % ( + allDnNodeDict[node]) + else: + sshcmd = "gs_guc set " \ + "-D %s -c \"ssl_crl_file=\\\\\\'\\\\\\'\"" \ + % (allDnNodeDict[node]) + self.sshTool.executeCommand(sshcmd, + "No exist 'ssl_crl_file'", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + + # Clear empty file. + if (self.isDnEmpty(node)): + sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( + os.path.join(allDnNodeDict[node], EMPTY_CERT), + os.path.join(allDnNodeDict[node], EMPTY_CERT)) + self.sshTool.executeCommand(sshcmd, + "Clear empty file.", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + self.logger.log( + "Successfully rollback SSL cert files on [%s]." % node) + + def getDnNodeDict(self): + """ + function: get dbnodes information + input: NA + output: dictionary + """ + clusterDnNodes = {} + if (not self.context.clusterInfo.isSingleInstCluster()): + return clusterDnNodes + for node in self.context.clusterInfo.dbNodes: + if (len(node.datanodes) > 0): + clusterDnNodes[node.datanodes[0].hostname] = node.datanodes[ + 0].datadir + self.logger.debug("Successfully get database node dict.") + return clusterDnNodes + + def distributeDNCert(self, certList, dnDict=None): + """ + function: distribute ssl cert files on single_inst cluster + input: certList: cert files list + dnDict: dictionary + output: NA + """ + tempDir = "tempCertDir" + gphost = DefaultValue.getTmpDirFromEnv() + if dnDict is None: + dnDict = {} + dnName = dnDict.keys() + certPathList = [] + self.logger.debug(certList) + + for num in iter(certList): + sslPath = os.path.join(os.path.join(gphost, tempDir), num) + certPathList.append(sslPath) + # local mode + if self.context.g_opts.localMode: + localDnDir = dnDict[DefaultValue.GetHostIpOrName()] + for num in range(len(certList)): + # distribute gsql SSL cert + if (os.path.isfile(os.path.join(localDnDir, certList[num]))): + os.remove(os.path.join(localDnDir, certList[num])) + if (os.path.isfile(certPathList[num])): + g_file.cpFile(certPathList[num], + os.path.join(localDnDir, certList[num])) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + os.path.join(localDnDir, certList[num])) + + # remove 'sslcrl-file.crl' file + if (DefaultValue.SSL_CRL_FILE not in certList and + os.path.isfile( + os.path.join(localDnDir, DefaultValue.SSL_CRL_FILE))): + os.remove(os.path.join(localDnDir, DefaultValue.SSL_CRL_FILE)) + + # config 'sslcrl-file.crl' option in 'postgresql.conf' + if (os.path.isfile( + os.path.join(localDnDir, DefaultValue.SSL_CRL_FILE))): + cmd = "gs_guc set " \ + "-D %s -c \"ssl_crl_file=\'%s\'\"" % \ + (localDnDir, DefaultValue.SSL_CRL_FILE) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception( + (ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + + "Failed set 'ssl_crl_file' option." + + "Error: \n%s" % output) + else: + cmd = "gs_guc set -D %s -c \"ssl_crl_file=\'\'\"" \ + % localDnDir + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception( + (ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + + "Failed set 'ssl_crl_file' option." + + "Error: \n%s" % output) + # remove backup flag file 'certFlag' + if (os.path.isfile(os.path.join(localDnDir, 'certFlag'))): + os.remove(os.path.join(localDnDir, 'certFlag')) + self.logger.log( + "Replace SSL cert files with local mode successfully.") + return + # not local mode + for node in dnName: + for num in range(len(certList)): + sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( + os.path.join(dnDict[node], certList[num]), + os.path.join(dnDict[node], certList[num])) + self.sshTool.executeCommand(sshcmd, + "Delete read only cert file.", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + + if (os.path.exists(certPathList[num])): + self.sshTool.scpFiles(certPathList[num], dnDict[node], + [node]) + + # change permission of cert file 600, + # there no need to is exists file, + # because the files must be exist. + sshcmd = g_file.SHELL_CMD_DICT["changeMode"] % ( + DefaultValue.KEY_FILE_MODE, + os.path.join(dnDict[node], certList[num])) + self.sshTool.executeCommand(sshcmd, + "Change file permisstion.'", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + + if (DefaultValue.SSL_CRL_FILE in certList): + if (node == DefaultValue.GetHostIpOrName()): + sshcmd = "gs_guc set " \ + "-D %s -c \"ssl_crl_file='%s'\"" \ + % (dnDict[node], DefaultValue.SSL_CRL_FILE) + else: + sshcmd = "gs_guc set " \ + " -D %s -c \"ssl_crl_file=\\\\\\'%s\\\\\\'\"" \ + % (dnDict[node], DefaultValue.SSL_CRL_FILE) + self.sshTool.executeCommand(sshcmd, + "Find 'ssl_crl_file'", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + else: + # no ssl cert file there will delete old cert file, + # and config option ssl_crl_file = '' + sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( + os.path.join(dnDict[node], DefaultValue.SSL_CRL_FILE), + os.path.join(dnDict[node], DefaultValue.SSL_CRL_FILE)) + self.sshTool.executeCommand(sshcmd, + "Find 'ssl_crl_file'", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + if (node == DefaultValue.GetHostIpOrName()): + sshcmd = "gs_guc set " \ + "-D %s -c \"ssl_crl_file=\'\'\"" % (dnDict[node]) + else: + sshcmd = \ + "gs_guc set " \ + "-D %s " \ + "-c \"ssl_crl_file=\\\\\\'\\\\\\'\"" % (dnDict[node]) + self.sshTool.executeCommand(sshcmd, + "Find 'ssl_crl_file'", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + # remove file 'sslcrl-file.crl' + sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( + os.path.join(dnDict[node], DefaultValue.SSL_CRL_FILE), + os.path.join(dnDict[node], DefaultValue.SSL_CRL_FILE)) + self.sshTool.executeCommand(sshcmd, + "Delete read only cert file.", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + # remove backup flag file 'certFlag' + sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( + os.path.join(dnDict[node], "certFlag"), + os.path.join(dnDict[node], "certFlag")) + self.sshTool.executeCommand(sshcmd, + "Delete backup flag file.", + DefaultValue.SUCCESS, + [node], + self.context.g_opts.mpprcFile) + self.logger.log("%s replace SSL cert files successfully." % node) + + ########################################################################### + # Kerberos Flow + ########################################################################### + def doKerberos(self): + """ + function: operation kerberos + input: NA + output: NA + """ + try: + if self.context.g_opts.kerberosMode == "install": + self.logger.log("Starting install Kerberos.", "addStep") + cmd = "%s -m %s -U %s --%s" % \ + (OMCommand.getLocalScript("Local_Kerberos"), + "install", + self.context.g_opts.clusterUser, + self.context.g_opts.kerberosType) + # local mode + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command: %s. Error:\n%s" % (cmd, output)) + self.logger.log("Successfully install Kerberos.") + elif self.context.g_opts.kerberosMode == "uninstall": + self.logger.log("Starting uninstall Kerberos.", "addStep") + cmd = "%s -m %s -U %s" % \ + (OMCommand.getLocalScript("Local_Kerberos"), + "uninstall", + self.context.g_opts.clusterUser) + # local mode + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command: %s. Error:\n%s" % (cmd, output)) + self.logger.log("Successfully uninstall Kerberos.") + except Exception as e: + raise Exception(str(e)) + + def checkRemoteFileExist(self, filepath): + """ + funciton:check file exist on remote node + input:filepath + output:dictionary + """ + existNodes = [] + for nodeName in self.context.clusterInfo.getClusterNodeNames(): + if (nodeName == DefaultValue.GetHostIpOrName()): + continue + if (self.sshTool.checkRemoteFileExist(nodeName, filepath, "")): + existNodes.append(nodeName) + + return existNodes + + def recursivePath(self, filepath): + """ + function: recursive path + input: filepath + output: NA + """ + fileList = os.listdir(filepath) + for fileName in fileList: + fileName = os.path.join(filepath, fileName) + # change the owner of files + g_file.changeOwner(self.context.g_opts.user, fileName) + if (os.path.isfile(fileName)): + # change fileName permission + g_file.changeMode(DefaultValue.KEY_FILE_MODE, fileName) + else: + # change directory permission + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, fileName, + True) + self.recursivePath(fileName) + + def checkNode(self): + """ + function: check if the current node is to be uninstalled + input : NA + output: NA + """ + pass + + def stopCluster(self): + """ + function:Stop cluster + input:NA + output:NA + """ + pass + + def startCluster(self): + """ + function:Start cluster + input:NA + output:NA + """ + pass diff --git a/script/impl/om/__init__.py b/script/impl/om/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/postuninstall/OLAP/PostUninstallImplOLAP.py b/script/impl/postuninstall/OLAP/PostUninstallImplOLAP.py new file mode 100644 index 0000000..6036eef --- /dev/null +++ b/script/impl/postuninstall/OLAP/PostUninstallImplOLAP.py @@ -0,0 +1,58 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import sys + +sys.path.append(sys.path[0] + "/../") + +from gspylib.common.Common import DefaultValue +from gspylib.common.OMCommand import OMCommand +from impl.postuninstall.PostUninstallImpl import PostUninstallImpl + +############################################################################# +# Global variables +############################################################################# +# action name +ACTION_CLEAN_VIRTUALIP = "clean_virtualIp" + + +class PostUninstallImplOLAP(PostUninstallImpl): + """ + init the command options + input : NA + output: NA + """ + + def __init__(self, GaussPost): + self.xmlFile = GaussPost.xmlFile + self.logFile = GaussPost.logFile + self.deleteUser = GaussPost.deleteUser + self.deleteGroup = GaussPost.deleteGroup + self.nodeList = GaussPost.nodeList + self.localLog = GaussPost.localLog + self.user = GaussPost.user + self.group = GaussPost.group + self.mpprcFile = GaussPost.mpprcFile + self.clusterToolPath = GaussPost.clusterToolPath + self.localMode = GaussPost.localMode + self.logger = GaussPost.logger + self.sshTool = GaussPost.sshTool + self.clusterInfo = GaussPost.clusterInfo + self.clean_gphome = GaussPost.clean_gphome + self.clean_host = GaussPost.clean_host + self.sshpwd = GaussPost.sshpwd + self.userHome = GaussPost.userHome + diff --git a/script/impl/postuninstall/OLAP/__init__.py b/script/impl/postuninstall/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/postuninstall/PostUninstallImpl.py b/script/impl/postuninstall/PostUninstallImpl.py new file mode 100644 index 0000000..9735be2 --- /dev/null +++ b/script/impl/postuninstall/PostUninstallImpl.py @@ -0,0 +1,781 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import os +import sys +import subprocess +import grp +import pwd +import getpass + +sys.path.append(sys.path[0] + "/../") +from gspylib.threads.parallelTool import parallelTool +from gspylib.common.DbClusterInfo import initParserXMLFile, \ + readOneClusterConfigItem +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file +from gspylib.os.gsfile import g_Platform +from gspylib.common.VersionInfo import VersionInfo +import impl.upgrade.UpgradeConst as Const + +sys.path.append(sys.path[0] + "/../../../lib/") +DefaultValue.doConfigForParamiko() +import paramiko + +############################################################################# +# Global variables +############################################################################# +gphome = None +# system config file +PROFILE_FILE = '/etc/profile' +# pssh directory name +PSSHDIR = 'pssh-2.3.1' +# action name +ACTION_CLEAN_TOOL_ENV = "clean_tool_env" +ACTION_CHECK_UNPREINSTALL = "check_unpreinstall" +ACTION_CLEAN_GAUSS_ENV = "clean_gauss_env" +ACTION_DELETE_GROUP = "delete_group" +ACTION_CLEAN_SYSLOG_CONFIG = 'clean_syslog_config' +ACTION_CLEAN_DEPENDENCY = "clean_dependency" + + +class PostUninstallImpl: + """ + init the command options + input : NA + output: NA + """ + + def __init__(self, GaussPost): + """ + function: constructor + """ + pass + + def checkLogFilePath(self): + """ + function: Check log file path + input : NA + output: NA + """ + clusterPath = [] + + try: + self.logger.log("Check log file path.", "addStep") + # get tool path + clusterPath.append(DefaultValue.getClusterToolPath(self.user)) + + # get tmp path + tmpDir = DefaultValue.getTmpDir(self.user, self.xmlFile) + clusterPath.append(tmpDir) + + # get cluster path + hostName = DefaultValue.GetHostIpOrName() + dirs = self.clusterInfo.getClusterDirectorys(hostName, False) + for checkdir in dirs.values(): + clusterPath.extend(checkdir) + + self.logger.debug("Cluster paths %s." % clusterPath) + # check directory + g_file.checkIsInDirectory(self.logFile, clusterPath) + self.logger.log("Successfully checked log file path.", "constant") + except Exception as e: + self.logger.logExit(str(e)) + + ########################################################################## + # Uninstall functions + ########################################################################## + def doCleanEnvironment(self): + """ + function: Clean Environment + input : NA + output: NA + """ + self.logger.debug("Do clean Environment.", "addStep") + try: + # set GPHOME env + self.setOrCleanGphomeEnv() + # check uninstall + self.checkUnPreInstall() + # clean app/log/data/temp dirs + self.cleanDirectory() + # clean other user + self.cleanRemoteOsUser() + # clean other nodes environment software and variable + self.cleanOtherNodesEnvSoftware() + # clean other nodes log + self.cleanOtherNodesLog() + # clean local node environment software and variable + self.cleanLocalNodeEnvSoftware() + # clean local user + self.cleanLocalOsUser() + except Exception as e: + self.logger.logExit(str(e)) + self.logger.debug("Do clean Environment succeeded.", "constant") + + def setOrCleanGphomeEnv(self, setGphomeenv=True): + osProfile = "/etc/profile" + if setGphomeenv: + GphomePath = DefaultValue.getPreClusterToolPath(self.user, + self.xmlFile) + # set GPHOME + g_file.writeFile(osProfile, ["export GPHOME=%s" % GphomePath]) + else: + g_file.deleteLine(osProfile, "^\\s*export\\s*GPHOME=.*$") + self.logger.debug( + "Deleting crash GPHOME in user environment variables.") + + def checkUnPreInstall(self): + """ + function: check whether do uninstall before unpreinstall + input : NA + output: NA + """ + self.logger.log("Checking unpreinstallation.") + if not self.localMode: + DefaultValue.checkAllNodesMpprcFile( + self.clusterInfo.getClusterNodeNames(), + self.clusterInfo.appPath, self.mpprcFile) + + cmd = "%s -t %s -u %s -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Local_UnPreInstall"), + ACTION_CHECK_UNPREINSTALL, + self.user, + self.localLog, + self.xmlFile) + self.logger.debug("Command for checking unpreinstall: %s" % cmd) + # check if do postuninstall in all nodes + DefaultValue.execCommandWithMode(cmd, "check unpreinstall", + self.sshTool, self.localMode, + self.mpprcFile) + self.logger.log("Successfully checked unpreinstallation.") + + def cleanDirectory(self): + """ + function: clean install/instance/temp dirs + input : NA + output: NA + """ + # clean instance path + hostName = DefaultValue.GetHostIpOrName() + dbNodeInfo = self.clusterInfo.getDbNodeByName(hostName) + instanceDirs = [] + # get DB instance + for dbInst in dbNodeInfo.datanodes: + instanceDirs.append(dbInst.datadir) + if (len(dbInst.ssdDir) != 0): + instanceDirs.append(dbInst.ssdDir) + # clean all instances + if (len(instanceDirs) > 0): + if (os.path.exists(instanceDirs[0]) and len( + os.listdir(instanceDirs[0])) == 0): + self.CleanInstanceDir() + else: + self.logger.debug( + "Instance directory [%s] is not empty. " + "Skip to delete instance's directory." % + instanceDirs[0]) + else: + self.logger.debug( + "Instance's directory is not been found. " + "Skip to delete instance's directory.") + + # clean install path + if (os.path.exists(self.clusterInfo.appPath)): + self.logger.log("Deleting the installation directory.") + cmd = "rm -rf '%s'" % self.clusterInfo.appPath + self.logger.debug( + "Command for deleting the installation path: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, "delete install path", + self.sshTool, self.localMode, + self.mpprcFile) + self.logger.log("Successfully deleted the installation directory.") + + # clean tmp dir + self.logger.log("Deleting the temporary directory.") + tmpDir = DefaultValue.getTmpDir(self.user, self.xmlFile) + cmd = "rm -rf '%s'; rm -rf /tmp/gs_checkos; rm -rf /tmp/gs_virtualip" \ + % tmpDir + self.logger.debug( + "Command for deleting the temporary directory: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, "delete the temporary directory", + self.sshTool, self.localMode, + self.mpprcFile) + self.logger.log("Successfully deleted the temporary directory.") + + def CleanInstanceDir(self): + """ + function: Clean instance directory + input : NA + output: NA + """ + self.logger.log("Deleting the instance's directory.") + cmd = "%s -U %s -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Local_Clean_Instance"), self.user, + self.localLog, self.xmlFile) + self.logger.debug("Command for deleting the instance: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, "delete the instances data", + self.sshTool, self.localMode, + self.mpprcFile) + + # clean upgrade temp backup path + cmd = "rm -rf '%s'" % DefaultValue.getBackupDir(self.user, "upgrade") + self.logger.debug( + "Command for deleting the upgrade temp backup path: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "delete backup directory for upgrade", + self.sshTool, self.localMode, + self.mpprcFile) + + self.logger.log("Successfully deleted the instance's directory.") + + def cleanRemoteOsUser(self): + """ + function: Clean remote os user + input : NA + output: NA + """ + # check if local mode + if (self.localMode): + return + + if (not self.deleteUser): + # clean static config file + cmd = "rm -rf '%s'" % self.clusterInfo.appPath + DefaultValue.execCommandWithMode(cmd, "delete install directory", + self.sshTool, self.localMode, + self.mpprcFile) + return + + group = grp.getgrgid(pwd.getpwnam(self.user).pw_gid).gr_name + + # get other nodes + hostName = DefaultValue.GetHostIpOrName() + otherNodes = self.clusterInfo.getClusterNodeNames() + for otherNode in otherNodes: + if (otherNode == hostName): + otherNodes.remove(otherNode) + + # clean remote user + self.logger.log("Deleting remote OS user.") + cmd = "%s -U %s -l %s" % ( + OMCommand.getLocalScript("Local_Clean_OsUser"), self.user, + self.localLog) + self.logger.debug("Command for deleting remote OS user: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, "delete OS user", self.sshTool, + self.localMode, self.mpprcFile, + otherNodes) + self.logger.log("Successfully deleted remote OS user.") + + if (self.deleteGroup): + # clean remote group + self.logger.debug("Deleting remote OS group.") + cmd = "%s -t %s -u %s -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Local_UnPreInstall"), + ACTION_DELETE_GROUP, group, self.localLog, self.xmlFile) + self.logger.debug("Command for deleting remote OS group: %s" % cmd) + status = self.sshTool.getSshStatusOutput(cmd, otherNodes, + self.mpprcFile)[0] + outputMap = self.sshTool.parseSshOutput(otherNodes) + for node in status.keys(): + if (status[node] != DefaultValue.SUCCESS): + self.logger.log((outputMap[node]).strip("\n")) + self.logger.debug("Deleting remote group is completed.") + + def cleanOtherNodesEnvSoftware(self): + """ + function: clean other nodes environment software and variable + input : NA + output: NA + """ + # check if local mode + if self.localMode: + return + self.logger.log( + "Deleting software packages " + "and environmental variables of other nodes.") + try: + # get other nodes + hostName = DefaultValue.GetHostIpOrName() + otherNodes = self.clusterInfo.getClusterNodeNames() + for otherNode in otherNodes: + if (otherNode == hostName): + otherNodes.remove(otherNode) + self.logger.debug( + "Deleting environmental variables of nodes: %s." % otherNodes) + + # clean $GAUSS_ENV + if (not self.deleteUser): + cmd = "%s -t %s -u %s -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Local_UnPreInstall"), + ACTION_CLEAN_GAUSS_ENV, + self.user, + self.localLog, + self.xmlFile) + self.logger.debug("Command for deleting $GAUSS_ENV: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, "delete $GAUSS_ENV", + self.sshTool, self.localMode, + self.mpprcFile, otherNodes) + cmd = "%s -t %s -u %s -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Local_UnPreInstall"), + ACTION_CLEAN_TOOL_ENV, + self.user, + self.localLog, + self.xmlFile) + self.logger.debug( + "Command for deleting environmental variables: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "delete environment variables", + self.sshTool, + self.localMode, + self.mpprcFile, + otherNodes) + except Exception as e: + self.logger.logExit(str(e)) + self.logger.log( + "Successfully deleted software packages " + "and environmental variables of other nodes.") + + def cleanOtherNodesLog(self): + """ + function: clean other nodes log + input : NA + output: NA + """ + # check if local mode + if self.localMode: + return + self.logger.log("Deleting logs of other nodes.") + try: + # get other nodes + hostName = DefaultValue.GetHostIpOrName() + otherNodes = self.clusterInfo.getClusterNodeNames() + for otherNode in otherNodes: + if (otherNode == hostName): + otherNodes.remove(otherNode) + + # clean log + cmd = "rm -rf '%s/%s'; rm -rf /tmp/gauss_*;" % ( + self.clusterInfo.logPath, self.user) + cmd += "rm -rf '%s/Python-2.7.9'" \ + % DefaultValue.getClusterToolPath(self.user) + self.logger.debug( + "Command for deleting logs of other nodes: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "delete user log directory", + self.sshTool, + self.localMode, + self.mpprcFile, + otherNodes) + self.logger.debug( + "Successfully deleted logs of the nodes: %s." % otherNodes) + except Exception as e: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50207"] % "other nodes log" + + " Error: \n%s." % str(e)) + self.logger.log("Successfully deleted logs of other nodes.") + + def cleanLocalNodeEnvSoftware(self): + """ + function: clean local node environment software and variable + input : NA + output: NA + in this function, Gauss-MPPDB* & sctp_patch is came from R5 upgrade R7 + """ + self.logger.log( + "Deleting software packages " + "and environmental variables of the local node.") + try: + self.clusterToolPath = DefaultValue.getClusterToolPath(self.user) + + # clean local node environment software + path = "%s/%s" % (self.clusterToolPath, PSSHDIR) + g_file.removeDirectory(path) + path = "%s/upgrade.sh" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/version.cfg" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/GaussDB.py" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/libcgroup" % self.clusterToolPath + g_file.removeDirectory(path) + path = "%s/unixodbc" % self.clusterToolPath + g_file.removeDirectory(path) + path = "%s/server.key.cipher" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/server.key.rand" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/%s*" % (self.clusterToolPath, VersionInfo.PRODUCT_NAME) + g_file.removeDirectory(path) + path = "%s/server.key.rand" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/Gauss*" % (self.clusterToolPath) + g_file.removeDirectory(path) + path = "%s/sctp_patch" % (self.clusterToolPath) + g_file.removeDirectory(path) + path = "%s/%s" % (Const.UPGRADE_SQL_FILE, self.clusterToolPath) + g_file.removeFile(path) + path = "%s/%s" % (Const.UPGRADE_SQL_SHA, self.clusterToolPath) + g_file.removeFile(path) + self.logger.debug( + "Deleting environmental software of local nodes.") + + # clean local node environment variable + cmd = "(if [ -s '%s' ]; then " % PROFILE_FILE + cmd += "sed -i -e '/^export PATH=\/root\/gauss_om\/%s\/script:" \ + "\$PATH$/d' %s; fi)" % (self.user, PROFILE_FILE) + self.logger.debug( + "Command for deleting environment variable: %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50207"] % + "environment variables of the local node" + + " Error: \n%s" % output) + # check if user profile exist + userProfile = "" + if (self.mpprcFile is not None and self.mpprcFile != ""): + userProfile = self.mpprcFile + else: + userProfile = "/home/%s/.bashrc" % self.user + if (not os.path.exists(userProfile)): + self.logger.debug( + "The %s does not exist. " + "Please skip to clean $GAUSS_ENV." % userProfile) + return + # clean user's environmental variable + DefaultValue.cleanUserEnvVariable(userProfile, + cleanGAUSS_WARNING_TYPE=True) + + # clean $GAUSS_ENV + if (not self.deleteUser): + envContent = "^\\s*export\\s*GAUSS_ENV=.*$" + g_file.deleteLine(userProfile, envContent) + self.logger.debug("Command for deleting $GAUSS_ENV: %s" % cmd, + "constant") + + except Exception as e: + self.logger.logExit(str(e)) + self.logger.log( + "Successfully deleted software packages " + "and environmental variables of the local nodes.") + + def cleanLocalOsUser(self): + """ + function: Clean local os user + input : NA + output: NA + """ + if (not self.deleteUser): + if (self.localMode): + cmd = "rm -rf '%s'" % self.clusterInfo.appPath + DefaultValue.execCommandWithMode(cmd, + "delete install directory", + self.sshTool, self.localMode, + self.mpprcFile) + return + + group = grp.getgrgid(pwd.getpwnam(self.user).pw_gid).gr_name + + # clean local user + self.logger.log("Deleting local OS user.") + cmd = "%s -U %s -l %s" % ( + OMCommand.getLocalScript("Local_Clean_OsUser"), self.user, + self.localLog) + self.logger.debug("Command for deleting local OS user: %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit(output) + self.logger.log("Successfully deleted local OS user.") + + if (self.deleteGroup): + # clean local user group + self.logger.debug("Deleting local OS group.") + cmd = "%s -t %s -u %s -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Local_UnPreInstall"), + ACTION_DELETE_GROUP, + group, + self.localLog, + self.xmlFile) + self.logger.debug("Command for deleting local OS group: %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.log(output.strip()) + self.logger.debug("Deleting local group is completed.") + + def cleanLocalLog(self): + """ + function: Clean default log + input : NA + output: NA + """ + self.logger.log("Deleting local node's logs.", "addStep") + try: + # clean log + path = "%s/%s" % (self.clusterInfo.logPath, self.user) + g_file.removeDirectory(path) + except Exception as e: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50207"] + % "logs" + " Error: \n%s." % str(e)) + self.logger.log("Successfully deleted local node's logs.", "constant") + + def cleanMpprcFile(self): + """ + function: clean mpprc file if we are using environment seperate + version. + input : NA + output: NA + """ + self.logger.debug("Clean mpprc file.", "addStep") + # check if mpprcfile is null + if (self.mpprcFile != ""): + baseCmd = "rm -rf '%s'" % self.mpprcFile + # check if local mode + if (self.localMode): + (status, output) = subprocess.getstatusoutput(baseCmd) + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50207"] + % "MPPRC file" + + " Command: %s. Error: \n%s" % (baseCmd, output)) + else: + dbNodeNames = self.clusterInfo.getClusterNodeNames() + for dbNodeName in dbNodeNames: + cmd = "pssh -s -H %s '%s'" % (dbNodeName, baseCmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50207"] + % "MPPRC file" + + " Command: %s. Error: \n%s" % (cmd, output)) + self.logger.debug("Successfully cleaned mpprc file.", "constant") + + def cleanScript(self): + """ + clean script directory + """ + self.logger.debug("Clean script path") + cmd = "%s -t %s -u %s -Q %s" % ( + OMCommand.getLocalScript("Local_UnPreInstall"), + ACTION_CLEAN_DEPENDENCY, self.user, + self.clusterToolPath) + if self.deleteUser: + cmd += " -P %s" % self.userHome + DefaultValue.execCommandWithMode(cmd, "clean script", + self.sshTool, self.localMode, + self.mpprcFile) + self.logger.debug("Clean script path successfully.") + + def cleanSyslogConfig(self): + """ + function: clean syslog config + input : NA + output: NA + """ + try: + # only suse11/suse12 can support it + distname = g_Platform.dist()[0] + if (distname.upper() != "SUSE"): + return + + # clean syslog-ng/rsyslog config + cmd = "%s -t %s -u %s -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Local_UnPreInstall"), + ACTION_CLEAN_SYSLOG_CONFIG, + self.user, + self.localLog, + self.xmlFile) + self.logger.debug( + "Command for clean syslog-ng/rsyslog config: %s" % cmd) + DefaultValue.execCommandWithMode( + cmd, + "clean syslog-ng/rsyslog config", + self.sshTool, + self.localMode, + self.mpprcFile, + self.clusterInfo.getClusterNodeNames()) + except Exception as e: + self.logger.logExit(str(e)) + + def sshExecWithPwd(self, host): + """ + function: execute command with root password + input : host + output: NA + """ + cmd = "rm -rf %s/* && echo 'OKOKOK'" % gphome + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect(host, 22, "root", self.sshpwd) + stdin, stdout, stderr = ssh.exec_command(cmd) + output = stdout.read() + self.logger.debug("%s: %s" % (str(host), str(output))) + if output.find('OKOKOK') < 0: + errMsg = stderr.read() + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "host: %s. Error:\n%s" + % (host, output)) + + def verifyCleanGphome(self, localMode=True): + """ + function: verify clean gphome and get root password + input : localMode + output: str + """ + sshpwd = "" + flag = input( + "Are you sure you want to clean gphome[%s] (yes/no)? " % gphome) + while (True): + if ( + flag.upper() != "YES" + and flag.upper() != "NO" + and flag.upper() != "Y" and flag.upper() != "N"): + flag = input("Please type 'yes' or 'no': ") + continue + break + if (flag.upper() == "NO" or flag.upper() == "N"): + sys.exit(0) + if "HOST_IP" in os.environ.keys() and not localMode: + sshpwd = getpass.getpass("Please enter password for root:") + sshpwd_check = getpass.getpass("Please repeat password for root:") + if sshpwd_check != sshpwd: + sshpwd_check = "" + sshpwd = "" + raise Exception(ErrorCode.GAUSS_503["GAUSS_50306"] % "root") + sshpwd_check = "" + return sshpwd + + def checkAuthentication(self, hostname): + """ + function: Ensure the proper password-less access to the remote host. + input : hostname + output: True/False, hostname + """ + cmd = 'ssh -n %s %s true' % (DefaultValue.SSH_OPTION, hostname) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.debug("The cmd is %s " % cmd) + self.logger.debug( + "Failed to check authentication. Hostname:%s. Error: \n%s" % ( + hostname, output)) + return (False, hostname) + return (True, hostname) + + def getItemValueFromXml(self, itemName): + """ + function: Get item from xml tag CLUSTER. + input : hostname + output: True/False, hostname + """ + (retStatus, retValue) = readOneClusterConfigItem( + initParserXMLFile(self.xmlFile), itemName, "cluster") + if (retStatus != 0): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50204"] + % itemName + " Error: \n%s" % retValue) + return retValue + + def cleanGphomeScript(self): + """ + function: clean gphome script + input : NA + output: NA + """ + try: + if not self.clean_gphome: + return + global gphome + gphome = os.path.normpath( + self.getItemValueFromXml("gaussdbToolPath")) + cmd = "rm -rf %s/*" % gphome + if "HOST_IP" in os.environ.keys(): + # Agent Mode + if self.localMode: + # clean gphome in local mode + self.verifyCleanGphome() + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error:\n%s" % output) + self.logger.logExit("Successfully clean gphome locally.") + else: + # clean gphome with specified node + self.sshpwd = self.verifyCleanGphome(self.localMode) + parallelTool.parallelExecute(self.sshExecWithPwd, + self.clean_host) + self.logger.logExit( + "Successfully clean gphome on node %s." + % self.clean_host) + + else: + # SSH Mode + SSH_TRUST = True + self.nodeList = self.getItemValueFromXml("nodeNames").split( + ",") + if len(self.nodeList) == 0: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50203"] % "nodeList") + results = parallelTool.parallelExecute( + self.checkAuthentication, self.nodeList) + for (key, value) in results: + if (not key): + self.logger.log("SSH trust has not been created. \ + \nFor node : %s. Only clean local node." % value, + "constant") + SSH_TRUST = False + break + if SSH_TRUST and not self.localMode: + # SSH trust has been created + self.verifyCleanGphome() + parallelTool.parallelExecute(self.sshExecWithPwd, + self.nodeList) + if not SSH_TRUST or self.localMode: + # SSH trust has not been created + # which means clean gphome locally + self.verifyCleanGphome() + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error:\n%s" % output) + self.logger.logExit("Successfully clean gphome.") + + except Exception as e: + self.logger.logExit(str(e)) + + def run(self): + try: + self.logger.debug( + "gs_postuninstall execution takes %s steps in total" + % ClusterCommand.countTotalSteps("gs_postuninstall")) + self.cleanGphomeScript() + self.checkLogFilePath() + self.cleanSyslogConfig() + self.doCleanEnvironment() + self.logger.closeLog() + self.cleanLocalLog() + self.cleanMpprcFile() + self.cleanScript() + self.setOrCleanGphomeEnv(setGphomeenv=False) + self.logger.log("Successfully cleaned environment.") + except Exception as e: + self.logger.logExit(str(e)) + sys.exit(0) diff --git a/script/impl/postuninstall/__init__.py b/script/impl/postuninstall/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/preinstall/OLAP/PreinstallImplOLAP.py b/script/impl/preinstall/OLAP/PreinstallImplOLAP.py new file mode 100644 index 0000000..111f786 --- /dev/null +++ b/script/impl/preinstall/OLAP/PreinstallImplOLAP.py @@ -0,0 +1,609 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +import os +import sys +import time + +sys.path.append(sys.path[0] + "/../../") + +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.OMCommand import OMCommand +from gspylib.os.gsfile import g_file +from impl.preinstall.PreinstallImpl import PreinstallImpl + +# action name +# set the user environment variable +ACTION_SET_USER_ENV = "set_user_env" +# set the tools environment variable +ACTION_SET_TOOL_ENV = "set_tool_env" +# set core path +ACTION_SET_CORE_PATH = "set_core_path" +# set virtual Ip +ACTION_SET_VIRTUALIP = "set_virtualIp" +# clean virtual Ip +ACTION_CLEAN_VIRTUALIP = "clean_virtualIp" +# check platform arm +ACTION_CHECK_PLATFORM_ARM = "check_platform_arm" +# set arm optimization +ACTION_SET_ARM_OPTIMIZATION = "set_arm_optimization" + +ACTION_CHECK_DISK_SPACE = "check_disk_space" +ACTION_FIX_SERVER_PACKAGE_OWNER = "fix_server_package_owner" + +############################################################################# +# Global variables +############################################################################# +toolTopPath = "" + + +class PreinstallImplOLAP(PreinstallImpl): + """ + init the command options + save command line parameter values + """ + + def __init__(self, preinstall): + """ + function: constructor + """ + super(PreinstallImplOLAP, self).__init__(preinstall) + + def makeCompressedToolPackage(self, path): + """ + function: make compressed tool package + input: NA + output: NA + """ + DefaultValue.makeCompressedToolPackage(path) + + def installToolsPhase1(self): + """ + function: install tools to local machine + input: NA + output: NA + """ + self.context.logger.log("Installing the tools on the local node.", + "addStep") + try: + # Determine if the old version of the distribution package + # is in the current directory + oldPackName = "%s-Package-bak.tar.gz" \ + % VersionInfo.PRODUCT_NAME_PACKAGE + oldPackPath = os.path.join(self.context.clusterToolPath, + oldPackName) + if os.path.exists(self.context.clusterToolPath): + versionFile = os.path.join(self.context.clusterToolPath, + "version.cfg") + if os.path.isfile(versionFile): + version, number, commitid = VersionInfo.get_version_info( + versionFile) + newPackName = "%s-Package-bak_%s.tar.gz" % ( + VersionInfo.PRODUCT_NAME_PACKAGE, commitid) + newPackPath = os.path.join(self.context.clusterToolPath, + newPackName) + if os.path.isfile(oldPackPath): + cmd = "(if [ -f '%s' ];then mv -f '%s' '%s';fi)" % ( + oldPackPath, oldPackPath, newPackPath) + self.context.logger.debug( + "Command for rename bak-package: %s." % cmd) + DefaultValue.execCommandWithMode( + cmd, "backup bak-package files", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + + if (self.context.mpprcFile != ""): + # check mpprc file + self.checkMpprcFile() + # check the package is not matches the system + DefaultValue.checkPackageOS() + # get the package path + dirName = os.path.dirname(os.path.realpath(__file__)) + packageDir = os.path.join(dirName, "./../../../../") + packageDir = os.path.normpath(packageDir) + + # change logPath owner + self.context.logger.debug("Modifying logPath owner") + dirName = os.path.dirname(self.context.logFile) + topDirFile = "%s/topDirPath.dat" % dirName + keylist = [] + if (self.context.localMode): + if (os.path.exists(topDirFile)): + keylist = g_file.readFile(topDirFile) + if (keylist != []): + for key in keylist: + if (os.path.exists(key.strip())): + g_file.changeOwner(self.context.user, + key.strip(), True, "shell") + else: + self.context.logger.debug( + "Warning: Can not find the " + "path in topDirPath.dat.") + + g_file.removeFile(topDirFile) + self.context.logger.debug("Successfully modified logPath owner") + + # Delete the old bak package in GPHOME before copy the new one. + for bakPack in DefaultValue.PACKAGE_BACK_LIST: + bakFile = os.path.join(self.context.clusterToolPath, bakPack) + if (os.path.isfile(bakFile)): + self.context.logger.debug( + "Remove old bak-package: %s." % bakFile) + g_file.removeFile(bakFile) + + DefaultValue.makeCompressedToolPackage(packageDir) + + # check and create tool package dir + global toolTopPath + ownerPath = self.context.clusterToolPath + clusterToolPathExistAlready = True + # if clusterToolPath exist, + # set the clusterToolPathExistAlready False + if (not os.path.exists(ownerPath)): + clusterToolPathExistAlready = False + ownerPath = DefaultValue.getTopPathNotExist(ownerPath) + toolTopPath = ownerPath + # append clusterToolPath to self.context.needFixOwnerPaths + # self.context.needFixOwnerPaths will be checked the ownet + self.context.needFixOwnerPaths.append(ownerPath) + + # if clusterToolPath is not exist, then create it + + if not os.path.exists(self.context.clusterToolPath): + g_file.createDirectory(self.context.clusterToolPath) + g_file.changeMode(DefaultValue.MAX_DIRECTORY_MODE, + self.context.clusterToolPath, True, "shell") + + # change the clusterToolPath permission + if not clusterToolPathExistAlready: + #check the localMode + if self.context.localMode: + #local mode,change the owner + g_file.changeMode(DefaultValue.DIRECTORY_MODE, ownerPath, + recursive=True, cmdType="shell") + g_file.changeOwner(self.context.user, ownerPath, + recursive=True, cmdType="shell") + #not localMode, only change the permission + else: + g_file.changeMode(DefaultValue.MAX_DIRECTORY_MODE, + ownerPath, recursive=True, + cmdType="shell") + else: + g_file.changeMode(DefaultValue.DIRECTORY_MODE, ownerPath, + recursive=False, cmdType="shell") + + # Send compressed package to local host + if (packageDir != self.context.clusterToolPath): + # copy the package to clusterToolPath + g_file.cpFile(os.path.join( + packageDir, + DefaultValue.get_package_back_name()), + self.context.clusterToolPath) + + # Decompress package on local host + g_file.decompressFiles(os.path.join( + self.context.clusterToolPath, + DefaultValue.get_package_back_name()), + self.context.clusterToolPath) + + # change mode of packages + g_file.changeMode(DefaultValue.DIRECTORY_MODE, + self.context.clusterToolPath, recursive=True, + cmdType="shell") + + # get the top path of mpprc file need to be created on local node + # this is used to fix the newly created path owner later + if self.context.mpprcFile != "": + ownerPath = self.context.mpprcFile + if (not os.path.exists(self.context.mpprcFile)): + while True: + # find the top path to be created + (ownerPath, dirName) = os.path.split(ownerPath) + if os.path.exists(ownerPath) or dirName == "": + ownerPath = os.path.join(ownerPath, dirName) + break + self.context.needFixOwnerPaths.append(ownerPath) + + # check the current storage package path is legal + Current_Path = os.path.dirname(os.path.realpath(__file__)) + DefaultValue.checkPathVaild(os.path.normpath(Current_Path)) + # set ENV + cmd = "%s -t %s -u %s -l %s -X '%s' -Q %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_SET_TOOL_ENV, + self.context.user, + self.context.localLog, + self.context.xmlFile, + self.context.clusterToolPath) + if self.context.mpprcFile != "": + cmd += " -s '%s' " % self.context.mpprcFile + #check the localmode,if mode is local then modify user group + if self.context.localMode: + cmd += "-g %s" % self.context.group + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then exit + if status != 0: + self.context.logger.debug( + "Command for setting %s tool environment variables: %s" % ( + VersionInfo.PRODUCT_NAME, cmd)) + raise Exception(output) + + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log( + "Successfully installed the tools on the local node.", "constant") + + def checkDiskSpace(self): + """ + function: delete step tmp file + input : NA + output: NA + """ + try: + cmd = "%s -t %s -u %s -l %s -R %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CHECK_DISK_SPACE, + self.context.user, + self.context.localLog, + self.context.clusterInfo.appPath) + if self.context.mpprcFile != "": + cmd += " -s '%s'" % self.context.mpprcFile + self.context.sshTool.executeCommand(cmd, "check disk space") + except Exception as e: + raise Exception(str(e)) + + def setEnvParameter(self): + """ + function: setting DBA environmental variables + input: NA + output: NA + """ + self.context.logger.log("Setting user environmental variables.", + "addStep") + + try: + # Setting DBA environmental variables + cmdParam = "" + # get then envParams + for param in self.context.envParams: + cmdParam += " -e \\\"%s\\\"" % param + + # set the environmental variables on all nodes + cmd = "%s -t %s -u %s %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_SET_USER_ENV, + self.context.user, + cmdParam, + self.context.localLog) + # check the mpprcFile + if (self.context.mpprcFile != ""): + cmd += " -s '%s'" % self.context.mpprcFile + self.context.logger.debug( + "Command for setting user's environmental variables: %s" % cmd) + + # set user's environmental variables + DefaultValue.execCommandWithMode( + cmd, + "set user's environmental variables.", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log( + "Successfully set user environmental variables.", "constant") + + def setCorePath(self): + """ + function: set file size and path with core file + :return: NA + """ + self.context.clusterInfo.corePath = \ + self.context.clusterInfo.readClustercorePath(self.context.xmlFile) + if not self.context.clusterInfo.corePath: + return + self.context.logger.log("Setting Core file", "addStep") + try: + # this is used to fix the newly created path owner later + ownerPath = self.context.clusterInfo.corePath + if not os.path.exists(self.context.clusterInfo.corePath): + ownerPath = DefaultValue.getTopPathNotExist(ownerPath) + cmd = "ulimit -c unlimited; ulimit -c unlimited -S" + DefaultValue.execCommandWithMode( + cmd, + "set core file size.", + self.context.sshTool, + self.context.localMode or self.context.isSingle) + cmd = "echo 1 > /proc/sys/kernel/core_uses_pid && " + cmd += "echo '%s" % self.context.clusterInfo.corePath + cmd += "/core-%e-%p-%t' > /proc/sys/kernel/core_pattern " + cmd += "&& if [ ! -d '%s' ]; then mkdir %s -p -m %d;fi" % ( + self.context.clusterInfo.corePath, + self.context.clusterInfo.corePath, + DefaultValue.DIRECTORY_MODE) + cmd += " && chown %s:%s %s -R" % ( + self.context.user, self.context.group, + ownerPath) + DefaultValue.execCommandWithMode( + cmd, + "set core file path.", + self.context.sshTool, + self.context.localMode or self.context.isSingle) + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Successfully set core path.", "constant") + + def setPssh(self): + """ + function: set pssh + input : NA + output : NA + """ + if "HOST_IP" in os.environ.keys(): + return + self.context.logger.log("Setting pssh path", "addStep") + try: + pssh_path = os.path.join(os.path.dirname(__file__), + "../../../gspylib/pssh/bin/pssh") + pscp_path = os.path.join(os.path.dirname(__file__), + "../../../gspylib/pssh/bin/pscp") + psshlib_path = os.path.join( + os.path.dirname(__file__), + "../../../gspylib/pssh/bin/TaskPool.py") + dest_path = "/usr/bin/" + secbox_path = "/var/chroot/usr/bin/" + cmd = "cp %s %s %s %s" % ( + pssh_path, pscp_path, psshlib_path, dest_path) + cmd += \ + " && chmod %s %s/pssh && chmod %s %s/pscp " \ + "&& chmod %s %s/TaskPool.py" % ( + DefaultValue.MAX_DIRECTORY_MODE, dest_path, + DefaultValue.MAX_DIRECTORY_MODE, dest_path, + DefaultValue.MAX_DIRECTORY_MODE, dest_path) + # Set pssh and pscp path to secbox environment in dwsMode + if (os.path.exists('/var/chroot/') and os.path.exists( + '/rds/datastore/')): + cmd += " && cp %s %s %s %s" % ( + pssh_path, pscp_path, psshlib_path, secbox_path) + cmd += " && chmod %s %s/pssh && chmod %s %s/pscp " \ + "&& chmod %s %s/TaskPool.py" % ( + DefaultValue.MAX_DIRECTORY_MODE, secbox_path, + DefaultValue.MAX_DIRECTORY_MODE, secbox_path, + DefaultValue.MAX_DIRECTORY_MODE, secbox_path) + DefaultValue.execCommandWithMode( + cmd, + "set pssh file.", + self.context.sshTool, + self.context.localMode or self.context.isSingle) + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Successfully set pssh path.", "constant") + + def setHostIpEnv(self): + """ + function: set host ip env + input : NA + output : NA + """ + self.context.logger.log("Setting pssh path", "addStep") + try: + # remove HOST_IP info with /etc/profile and environ + cmd = "sed -i '/^export[ ]*HOST_IP=/d' /etc/profile" + DefaultValue.execCommandWithMode( + cmd, + "set host_ip env.", + self.context.sshTool, + self.context.localMode or self.context.isSingle) + if "HOST_IP" in os.environ.keys(): + os.environ.pop("HOST_IP") + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Successfully set core path.", "constant") + + def setArmOptimization(self): + """ + function: setting ARM Optimization + input: NA + output: NA + """ + self.context.logger.log("Set ARM Optimization.", "addStep") + cmd = "python3 -c 'import platform;print(platform.machine())'" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.context.logger.logExit("Command for set platform ARM:" + "%s" % cmd + " Error: \n%s" % output) + if str(output) == "aarch64": + pass + else: + self.context.logger.log("No need to set ARM Optimization.", + "constant") + return + try: + # exec cmd for set platform ARM + cmd = "%s -t %s -u %s -l %s -Q %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_SET_ARM_OPTIMIZATION, + self.context.user, + self.context.localLog, + self.context.clusterToolPath) + self.context.logger.debug("Command for set platform ARM: %s" % cmd) + + DefaultValue.execCommandWithMode( + cmd, + "set platform ARM", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + # Successfully set ARM Optimization + self.context.logger.log("Successfully set ARM Optimization.", + "constant") + + # AP + def setVirtualIp(self): + """ + function: set the virtual IPs + input: NA + output: NA + """ + # the flag for whether the virtual IP exists + flag = 0 + # all virtual IPs list + allVirtualIP = [] + # get the all virtual IPs + for node in self.context.clusterInfo.dbNodes: + if node.virtualIp != []: + flag = 1 + allVirtualIP.extend(node.virtualIp) + # if flag=0, then return + if (flag == 0): + return + self.context.logger.log("Setting the virtual IP service.", "addStep") + # get the timestamp + currentTime = time.strftime("%Y-%m-%d_%H%M%S") + # temporary files + tmpFile = os.path.join("/tmp/", "gauss_set_virtualIP_%d_%s.dat" % ( + os.getpid(), currentTime)) + try: + # Setting the virtual IP service + setCmd = "%s -t %s -u %s -l '%s' -X '%s' -f '%s'" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_SET_VIRTUALIP, + self.context.user, + self.context.localLog, + self.context.xmlFile, tmpFile) + self.context.logger.debug( + "Command for setting virtual IP: %s." % setCmd) + # exec cmd for set virtual IP + DefaultValue.execCommandWithMode( + setCmd, + "set virtual IP", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + # if non-native mode + if (not self.context.localMode and not self.context.isSingle): + # check all virtual IP is OK + noPassIPs = DefaultValue.checkIsPing(allVirtualIP) + # virtual IP are not accessible after configuring + if noPassIPs != []: + self.context.logger.error( + ErrorCode.GAUSS_516["GAUSS_51632"] + % "the configuration of virtual IP") + self.context.logger.log( + "These virtual IP(%s) are not accessible after " + "configuring.\nRollback to clean virtual IP " + "service." % ",".join(noPassIPs), "constant") + # Rollback to clean virtual IP service + cleanCmd = "%s -t %s -u %s -l '%s' -X '%s' -f '%s'" % ( + OMCommand.getLocalScript("Local_UnPreInstall"), + ACTION_CLEAN_VIRTUALIP, + self.context.user, + self.context.localLog, + self.context.xmlFile, + tmpFile) + # exec the cmd for clean virtual IP service + DefaultValue.execCommandWithMode( + cleanCmd, + "rollback to clean virtual IP service", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + # remove the temporary files + cmd = "rm -rf '%s'" % tmpFile + DefaultValue.execCommandWithMode( + cmd, + "rollback to delete temporary file", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + # exit + raise Exception("Successfully rollback to delete " + "virtual IP service.") + except Exception as e: + # failed set virtual IP service + # remove the temporary files + cmd = "rm -rf '%s'" % tmpFile + DefaultValue.execCommandWithMode( + cmd, + "delete temporary file", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + # exit + raise Exception(str(e)) + # Successfully set virtual IP service + self.context.logger.log("Successfully set virtual IP service.", + "constant") + + def del_remote_pkgpath(self): + """ + delete remote package path om scripts, lib and version.cfg + :return: + """ + if not self.context.is_new_root_path: + current_path = self.get_package_path() + script = os.path.join(current_path, "script") + hostList = self.context.clusterInfo.getClusterNodeNames() + hostList.remove(DefaultValue.GetHostIpOrName()) + if not self.context.localMode and hostList: + cmd = "rm -f %s/gs_*" % script + self.context.sshTool.executeCommand(cmd, "", + DefaultValue.SUCCESS, + hostList, + self.context.mpprcFile) + + def fixServerPackageOwner(self): + """ + function: fix server package. when distribute server package, + the os user has not been created, + so we should fix server package Owner here after user create. + input: NA + output: NA + """ + self.context.logger.log("Fixing server package owner.", "addStep") + try: + # fix server package owner for oltp + cmd = ("%s -t %s -u %s -g %s -X %s -Q %s -l %s" + % (OMCommand.getLocalScript("Local_PreInstall"), + ACTION_FIX_SERVER_PACKAGE_OWNER, + self.context.user, + self.context.group, + self.context.xmlFile, + self.context.clusterToolPath, + self.context.localLog)) + # check the env file + if self.context.mpprcFile != "": + cmd += " -s %s" % self.context.mpprcFile + self.context.logger.debug("Fix server pkg cmd: %s" % cmd) + # exec the cmd + DefaultValue.execCommandWithMode(cmd, + "fix server package owner", + self.context.sshTool, + self.context.localMode, + self.context.mpprcFile) + + self.del_remote_pkgpath() + except Exception as e: + raise Exception(str(e)) diff --git a/script/impl/preinstall/OLAP/__init__.py b/script/impl/preinstall/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/preinstall/PreinstallImpl.py b/script/impl/preinstall/PreinstallImpl.py new file mode 100644 index 0000000..a35e87a --- /dev/null +++ b/script/impl/preinstall/PreinstallImpl.py @@ -0,0 +1,1941 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import subprocess +import os +import pwd +import sys +import getpass + +sys.path.append(sys.path[0] + "/../") + +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.Common import ClusterCommand, DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.os.gsfile import g_file +from multiprocessing.dummy import Pool as ThreadPool + +# action name +# prepare cluster tool package path +ACTION_PREPARE_PATH = "prepare_path" +# check the OS version +ACTION_CHECK_OS_VERSION = "check_os_Version" +# create os user +ACTION_CREATE_OS_USER = "create_os_user" +# check os user +ACTION_CHECK_OS_USER = "check_os_user" +# create cluster path +ACTION_CREATE_CLUSTER_PATHS = "create_cluster_paths" +# set the os parameters +ACTION_SET_OS_PARAMETER = "set_os_parameter" +# set finish flag +ACTION_SET_FINISH_FLAG = "set_finish_flag" +# set the user environment variable +ACTION_SET_USER_ENV = "set_user_env" +# set the tools environment variable +ACTION_SET_TOOL_ENV = "set_tool_env" +# prepare CRON service +ACTION_PREPARE_USER_CRON_SERVICE = "prepare_user_cron_service" +# prepare ssh service +ACTION_PREPARE_USER_SSHD_SERVICE = "prepare_user_sshd_service" +# set the dynamic link library +ACTION_SET_LIBRARY = "set_library" +# set sctp service +ACTION_SET_SCTP = "set_sctp" +# set virtual Ip +ACTION_SET_VIRTUALIP = "set_virtualIp" +# clean virtual Ip +ACTION_CLEAN_VIRTUALIP = "clean_virtualIp" +# check hostname on all nodes +ACTION_CHECK_HOSTNAME_MAPPING = "check_hostname_mapping" +# write /etc/hosts flag +HOSTS_MAPPING_FLAG = "#Gauss OM IP Hosts Mapping" +# init Gausslog +ACTION_INIT_GAUSSLOG = "init_gausslog" +# check envfile +ACTION_CHECK_ENVFILE = "check_envfile" +# check path owner +ACTION_CHECK_DIR_OWNER = "check_dir_owner" +# check os software +ACTION_CHECK_OS_SOFTWARE = "check_os_software" +# change tool env +ACTION_CHANGE_TOOL_ENV = "change_tool_env" +############################################################################# +# Global variables +# self.context.logger: globle logger +# self.context.clusterInfo: global clueter information +# self.context.sshTool: globle ssh tool interface +# g_warningTpye: warning type +############################################################################# +iphostInfo = "" +topToolPath = "" +# create the tmp file for dist trust steps +g_stepTrustTmpFile = None +# the tmp file name +TRUST_TMP_FILE = "step_preinstall_file.dat" +# the tmp file path +TRUST_TMP_FILE_DIR = None +createTrustFlag = False + + +class PreinstallImpl: + """ + init the command options + save command line parameter values + """ + + def __init__(self, preinstall): + """ + function: constructor + """ + self.context = preinstall + + def installToolsPhase1(self): + """ + function: install tools to local machine + input: NA + output: NA + """ + pass + + def checkMpprcFile(self): + """ + function: Check mpprc file path + input : NA + output: NA + """ + clusterPath = [] + # get the all directorys list about cluster in the xml file + dirs = self.context.clusterInfo.getClusterDirectorys() + for checkdir in list(dirs.values()): + # append directory to clusterPath + clusterPath.extend(checkdir) + # get tool path + clusterPath.append(self.context.clusterToolPath) + # get tmp path + clusterPath.append( + dbClusterInfo.readClusterTmpMppdbPath(self.context.user, + self.context.xmlFile)) + self.context.logger.debug("Cluster paths %s." % clusterPath, + "constant") + # check directory + g_file.checkIsInDirectory(self.context.mpprcFile, clusterPath) + + def getUserName(self): + """ + function: get the user name + input: NA + output: str + """ + return os.environ.get('LOGNAME') or os.environ.get('USER') + + def getUserPasswd(self, name, point=""): + """ + function: + get user passwd + input: name, point + output: str + """ + if point == "": + self.context.logger.log("Please enter password for %s." % name, + "constant") + else: + self.context.logger.log( + "Please enter password for %s %s." % (name, point), "constant") + passwdone = getpass.getpass() + + return passwdone + + def checkRootPasswd(self, ip): + """ + function:check the root passwd is correct or not + input:node ip + output:NA + + """ + ssh = None + try: + import paramiko + except ImportError as e: + raise Exception(ErrorCode.GAUSS_522["GAUSS_52200"] % str(e)) + try: + # ssh the ip + ssh = paramiko.Transport((ip, 22)) + except Exception as e: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + "IP: %s" % ip) + try: + ssh.connect(username="root", password=self.context.rootPasswd) + except Exception as e: + raise Exception( + ErrorCode.GAUSS_503["GAUSS_50306"] % ip + + " Maybe communication is exception, please check " + "the password and communication." + + " Error: \nWrong password or communication is abnormal.") + finally: + if ssh is not None: + ssh.close() + + def twoMoreChancesForRootPasswd(self): + """ + function:for better user experience, + if the root password is wrong, two more chances should be given + input:ip list of all hosts + output:NA + """ + # save the sshIps + Ips = [] + # get the user sshIps + sshIps = self.context.clusterInfo.getClusterSshIps() + # save the sshIps to Ips + for ips in sshIps: + Ips.extend(ips) + times = 0 + while True: + try: + # get the number of concurrent processes + pool = ThreadPool(DefaultValue.getCpuSet()) + # start the concurrent processes + ipHostname = pool.map(self.checkRootPasswd, Ips) + # close the pool + pool.close() + # wait the return from concurrent processes + pool.join() + break + except Exception as e: + if str(e).find("The IP address is invalid") != -1: + raise Exception(str(e)) + if times == 2: + raise Exception(str(e)) + self.context.logger.log( + "Password authentication failed, please try again.") + self.context.rootPasswd = getpass.getpass() + times += 1 + + def createTrustForRoot(self): + """ + function: + create SSH trust for user who call this script with root privilege + precondition: + 1.create SSH trust tool has been installed on local host + postcondition: + caller's SSH trust has been created + input: NA + output: NA + hideninfo:NA + """ + if self.context.localMode or self.context.isSingle: + return + + try: + # check the interactive mode + # if interactive is True + if not self.context.preMode: + # Ask to create trust for root + flag = input( + "Are you sure you want " + "to create trust for root (yes/no)? ") + while True: + # If it is not yes or no, it has been imported + # if it is yes or no, it has been break + if ( + flag.upper() != "YES" + and flag.upper() != "NO" + and flag.upper() != "Y" and flag.upper() != "N"): + flag = input("Please type 'yes' or 'no': ") + continue + break + + # Confirm that the user needs to be created trust for root + # Receives the entered password + if flag.upper() == "YES" or flag.upper() == "Y": + self.context.rootPasswd = self.getUserPasswd("root") + # check passwd, if wrong, then give two more chances + self.twoMoreChancesForRootPasswd() + + # save the distribute + result = {} + # save the sshIps + Ips = [] + # create trust for root + # get the user name + username = pwd.getpwuid(os.getuid()).pw_name + # get the user sshIps + sshIps = self.context.clusterInfo.getClusterSshIps() + # save the sshIps to Ips + for ips in sshIps: + Ips.extend(ips) + + Hosts = [] + # get the sshIps and node name + for node in self.context.clusterInfo.dbNodes: + Hosts.append(node.name) + for ip in node.sshIps: + result[ip] = node.name + # get the all hostname + iphostnamedict = self.getAllHosts(Ips, self.context.rootPasswd) + # check the hostname and node name + checkResult = self.checkIpHostname(result, iphostnamedict) + # if check failed, then exit + if checkResult != DefaultValue.SUCCESS: + raise Exception(checkResult) + + # write the /etc/hosts + if not self.context.skipHostnameSet: + # write the ip and hostname to /etc/hosts + self.writeLocalHosts(result) + if self.context.rootPasswd == "": + # write the /etc/hosts to remote node + self.writeRemoteHosts(result) + + # if not provide root passwd, + # then do not create SSH trust for root user + if not self.context.preMode: + if self.context.rootPasswd != "": + self.context.logger.log( + "Creating SSH trust for the root permission user.") + self.context.sshTool.createTrust( + username, + self.context.rootPasswd, + Ips, + self.context.mpprcFile, + self.context.skipHostnameSet) + g_file.changeMode(DefaultValue.HOSTS_FILE, "/etc/hosts", + False, "shell") + + except Exception as e: + raise Exception(str(e)) + if self.context.rootPasswd != "": + self.context.logger.log( + "Successfully created SSH trust for the root permission user.") + + def getAllHostName(self, ip): + """ + function: + Connect to all nodes ,then get all hostaname by threading + precondition: + 1.User's password is correct on each node + postcondition: + NA + input: ip + output:Dictionary ipHostname,key is IP and value is hostname + hideninfo:NA + """ + # ip and hostname + ipHostname = {} + # user name + username = pwd.getpwuid(os.getuid()).pw_name + try: + # load paramiko + import paramiko + except ImportError as e: + raise Exception(ErrorCode.GAUSS_522["GAUSS_52200"] % str(e)) + try: + # ssh the ip + ssh = paramiko.Transport((ip, 22)) + except Exception as e: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + "IP: %s" % ip) + try: + # connect + ssh.connect(username=username, password=self.context.rootPasswd) + except Exception as e: + ssh.close() + raise Exception( + ErrorCode.GAUSS_503["GAUSS_50306"] % ip + + " Maybe communication is exception, " + "please check the password and communication." + + " Error: \nWrong password or communication is abnormal.") + + check_channel = ssh.open_session() + cmd = "cd" + check_channel.exec_command(cmd) + channel_read = "" + env_msg = check_channel.recv_stderr(9999).decode() + while True: + channel_read = check_channel.recv(9999).decode().strip() + if len(channel_read) != 0: + env_msg += str(channel_read) + else: + break + if env_msg != "": + ipHostname[ + "Node[%s]" % ip] = \ + "Output: [" \ + + env_msg \ + + " ] print by /etc/profile or ~/.bashrc, please check it." + return ipHostname + + # get hostname + cmd = "hostname" + channel = ssh.open_session() + # exec the hostname on remote node + channel.exec_command(cmd) + # recv the result from remote node + hostname = channel.recv(9999).decode().strip() + # save the hostname + ipHostname[ip] = hostname + # close ssh + ssh.close() + + return ipHostname + + def getAllHosts(self, sshIps, passwd): + """ + function: + Connect to all nodes ,then get all hostaname + precondition: + 1.User's password is correct on each node + postcondition: + NA + input: sshIps,passwd + output:Dictionary ipHostname,key is IP and value is hostname + hideninfo:NA + """ + # ip and hostname + # the result for return + result = {} + if passwd != "": + try: + # get the number of concurrent processes + pool = ThreadPool(DefaultValue.getCpuSet()) + # start the concurrent processes + ipHostname = pool.map(self.getAllHostName, sshIps) + # close the pool + pool.close() + # wait the return from concurrent processes + pool.join() + except Exception as e: + if str(e) == "": + raise Exception( + ErrorCode.GAUSS_511["GAUSS_51101"] + % "communication may be abnormal.") + else: + raise Exception(str(e)) + + # save the hostname to result + err_msg = "" + for i in ipHostname: + for (key, value) in list(i.items()): + if key.find("Node") >= 0: + err_msg += str(i) + else: + result[key] = value + if len(err_msg) > 0: + raise Exception(ErrorCode.GAUSS_518["GAUSS_51808"] % err_msg) + # if the passwd is null + else: + cmd = "source /etc/profile " \ + "&& if [ -f ~/.bashrc ]; then source ~/.bashrc; fi" + if self.context.mpprcFile != "": + cmd += "&& if [ -f '%s' ]; then source '%s'; fi" % ( + self.context.mpprcFile, self.context.mpprcFile) + # send the cmd to sshIps + # check the trust and envfile + self.context.sshTool.executeCommand(cmd, + "check cluster trust", + DefaultValue.SUCCESS, + sshIps, + self.context.mpprcFile, + checkenv=True) + + pssh_path = os.path.join(os.path.dirname(__file__), + "../../gspylib/pssh/bin/pssh") + for sship in sshIps: + # set the cmd + cmd = "%s -s -H %s hostname 2>/dev/null" % (pssh_path, sship) + # exec the command + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then exit + if status != 0: + + raise Exception(ErrorCode.GAUSS_516["GAUSS_51618"] + + "The cmd is %s " % cmd) + result[sship] = output + + return result + + def checkIpHostname(self, srcList, tgtList): + """ + function: + Checking the hostname and IP is matched or not . + precondition: + NA + postcondition: + NA + input: srcList,tgtList + output: retValue ,if srclist and tgtlist is same , + then return Success else return Warning message. + hideninfo:NA + """ + retValue = "" + # Checking the hostname and IP is matched or not + for (key, value) in list(srcList.items()): + if srcList[key] != tgtList[key]: + retValue = retValue + ErrorCode.GAUSS_524["GAUSS_52402"] % ( + key, value) + + if retValue == "": + # the result of check is OK + retValue = DefaultValue.SUCCESS + return retValue + + def writeLocalHosts(self, result): + """ + function: + Write hostname and Ip into /etc/hosts + when there's not the same one in /etc/hosts file + precondition: + NA + postcondition: + NA + input: Dictionary result,key is IP and value is hostname + output: NA + hideninfo:NA + """ + writeResult = [] + hostIPList = [] + hostIPInfo = "" + # the temporary Files for /etc/hosts + tmp_hostipname = "./tmp_hostsiphostname_%d" % os.getpid() + # Delete the line with 'HOSTS_MAPPING_FLAG' in the /etc/hosts + cmd = "grep -v '%s' %s > %s ; cp %s %s && rm -rf '%s'" % \ + ("#Gauss.* IP Hosts Mapping", '/etc/hosts', tmp_hostipname, + tmp_hostipname, '/etc/hosts', tmp_hostipname) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + # if cmd failed, append the output to writeResult + if status != 0: + g_file.removeFile(tmp_hostipname) + writeResult.append(output) + # cmd OK + else: + for (key, value) in list(result.items()): + # set the string + hostIPInfo = '%s %s %s' % (key, value, HOSTS_MAPPING_FLAG) + hostIPList.append(hostIPInfo) + # write the ip and hostname to /etc/hosts + g_file.writeFile("/etc/hosts", hostIPList, mode="a+") + + def writeRemoteHosts(self, result): + """ + function: + Write hostname and Ip into /etc/hosts + when there's not the same one in /etc/hosts file + precondition: + NA + postcondition: + NA + input: Dictionary result,key is IP and value is hostname + rootPasswd + output: NA + hideninfo:NA + """ + # IP and hostname + global iphostInfo + iphostInfo = "" + # remote hosts + remoteHosts = [] + + # set the str for write into /etc/hosts + for (key, value) in list(result.items()): + iphostInfo += '%s %s %s\n' % (key, value, HOSTS_MAPPING_FLAG) + if value != DefaultValue.GetHostIpOrName(): + remoteHosts.append(value) + remoteHosts1 = list(set(remoteHosts)) + iphostInfo = iphostInfo[:-1] + if len(remoteHosts1) == 0: + return + # the temporary Files for /etc/hosts + tmp_hostipname = "./tmp_hostsiphostname_%d" % os.getpid() + # Delete the line with 'HOSTS_MAPPING_FLAG' in the /etc/hosts + cmd = "if [ -f '%s' ]; then grep -v '%s' %s > %s " \ + "; cp %s %s ; rm -rf '%s'; fi" % \ + ('/etc/hosts', "#Gauss.* IP Hosts Mapping", '/etc/hosts', + tmp_hostipname, tmp_hostipname, '/etc/hosts', tmp_hostipname) + # exec the cmd on all remote nodes + self.context.sshTool.executeCommand(cmd, + "grep /etc/hosts", + DefaultValue.SUCCESS, + remoteHosts1, + self.context.mpprcFile) + + # write the iphostInfo into /etc/hosts on all remote nodes + cmd = "echo '%s' >> /etc/hosts" % iphostInfo + self.context.sshTool.executeCommand(cmd, + "write /etc/hosts", + DefaultValue.SUCCESS, + remoteHosts1, + self.context.mpprcFile) + + def distributePackages(self): + """ + function: + distribute packages and xml to all nodes of cluster + precondition: + 1.packages and xml exist on local host + 2.root SSH trust has been created + postcondition: + 1.packages and xml exist on all hosts + 2.os user can access package and xml + input:NA + output:NA + information hiding: + 1.the package and xml path + 2.node names + ppp: + check and create the server package path + make compressed server package + send server package + Decompress package on every host + change mode of packages + check and create the xml path + send xml + change mode of xml file + check and create the tool package path + make compressed tool package + send tool package + change mode of packages + """ + if self.context.localMode or self.context.isSingle: + return + + self.context.logger.log("Distributing package.", "addStep") + try: + self.makeCompressedToolPackage(self.context.clusterToolPath) + + # get the all node names in xml file + hosts = self.context.clusterInfo.getClusterNodeNames() + # remove the local node name + hosts.remove(DefaultValue.GetHostIpOrName()) + self.getTopToolPath(self.context.sshTool, + self.context.clusterToolPath, hosts, + self.context.mpprcFile) + + # Delete the old bak package in GPHOME before copy the new one. + for bakPack in DefaultValue.PACKAGE_BACK_LIST: + bakFile = os.path.join(self.context.clusterToolPath, bakPack) + cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (bakFile, bakFile) + self.context.logger.debug( + "Command for deleting bak-package: %s." % cmd) + (status, output) = self.context.sshTool.getSshStatusOutput( + cmd, hosts) + for ret in list(status.values()): + if ret != DefaultValue.SUCCESS: + self.context.logger.debug( + "Failed delete bak-package, result: %s." % output) + + # Retry 3 times, if distribute failed. + for i in range(3): + try: + self.context.logger.log( + "Begin to distribute package to tool path.") + # Send compressed package to every host + DefaultValue.distributePackagesToRemote( + self.context.sshTool, + self.context.clusterToolPath, + self.context.clusterToolPath, + hosts, + self.context.mpprcFile, + self.context.clusterInfo) + # Decompress package on every host + except Exception as e: + # loop 3 times, if still wrong, exit with error code. + if i == 2: + raise Exception(str(e)) + # If error, continue loop. + self.context.logger.log( + "Distributing package failed, retry.") + continue + # If scp success, exit loop. + self.context.logger.log( + "Successfully distribute package to tool path.") + break + # 2.distribute gauss server package + # Get the path to the server package + dirName = os.path.dirname(os.path.realpath(__file__)) + packageDir = os.path.join(dirName, "./../../../") + packageDir = os.path.normpath(packageDir) + for i in range(3): + try: + self.context.logger.log( + "Begin to distribute package to package path.") + # distribute the distribute package to all node names + DefaultValue.distributePackagesToRemote( + self.context.sshTool, + self.context.clusterToolPath, + packageDir, + hosts, + self.context.mpprcFile, + self.context.clusterInfo) + except Exception as e: + # loop 3 times, if still wrong, exit with error code. + if i == 2: + raise Exception(str(e)) + # If error, continue loop. + self.context.logger.log( + "Distributing package failed, retry.") + continue + # If scp success, exit loop. + self.context.logger.log( + "Successfully distribute package to package path.") + break + # 3.distribute xml file + DefaultValue.distributeXmlConfFile(self.context.sshTool, + self.context.xmlFile, hosts, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log("Successfully distributed package.", + "constant") + + def makeCompressedToolPackage(self, path): + """ + function: make compressed tool package + input : path + output : NA + """ + pass + + def getTopToolPath(self, top_sshTool, clusterToolPath, hosts, mpprcFile): + """ + function: find the top path of GPHOME in remote nodes. + input: top_sshTool, clusterToolPath, hosts, mpprcFile + output: NA + """ + # get the String of each path & split it with space. + global topToolPath + topToolPath = {} + pathList = clusterToolPath.split("/") + pathStr = "" + # get the string of GPHOME, split it by white spaces + for path in pathList: + if path == pathList[0]: + pathStr = "/" + elif path == pathList[1]: + pathNext = "/" + path + pathStr = pathNext + else: + pathNext = pathNext + "/" + path + pathStr += " " + pathNext + + # use the shell command to get top path of gausstool + cmd = "str='%s'; for item in \$str; " \ + "do if [ ! -d \$item ]; then TopPath=\$item; " \ + "break; fi; done; echo \$TopPath" % ( + pathStr) + top_sshTool.getSshStatusOutput(cmd, hosts, mpprcFile) + outputMap = top_sshTool.parseSshOutput(hosts) + for node in list(outputMap.keys()): + topToolList = outputMap[node].split("\n") + topToolPath[node] = topToolList[0] + + def fixServerPackageOwner(self): + """ + function: when distribute server package, + the os user has not been created, so we should fix + server package Owner here after user create. + input: NA + output: NA + """ + pass + + def installToolsPhase2(self): + """ + function: install the tools + input: NA + output: NA + """ + # check if path have permission. + if self.context.localMode or self.context.isSingle: + # fix new created path's owner + for onePath in self.context.needFixOwnerPaths: + g_file.changeOwner(self.context.user, onePath, recursive=True, + cmdType="shell") + return + + self.context.logger.log("Installing the tools in the cluster.", + "addStep") + try: + self.context.logger.debug( + "Paths need to be fixed owner:%s." + % self.context.needFixOwnerPaths) + # fix new created path's owner + for onePath in self.context.needFixOwnerPaths: + g_file.changeOwner(self.context.user, onePath, recursive=True, + cmdType="shell") + + # fix remote toolpath's owner + for node in list(topToolPath.keys()): + nodelist = [] + nodelist.append(node) + if os.path.exists(topToolPath[node]): + cmd = "chown -R %s:%s '%s'" % ( + self.context.user, self.context.group, + topToolPath[node]) + self.context.sshTool.executeCommand( + cmd, + "authorize top tool path", + DefaultValue.SUCCESS, + nodelist, + self.context.mpprcFile) + + # chown chmod top path file + dirName = os.path.dirname(self.context.logFile) + topDirFile = "%s/topDirPath.dat" % dirName + cmd = "(if [ -f '%s' ];then cat '%s' " \ + "| awk -F = '{print $1}' " \ + "| xargs chown -R %s:%s; rm -rf '%s';fi)" % \ + (topDirFile, topDirFile, self.context.user, + self.context.group, topDirFile) + self.context.sshTool.executeCommand(cmd, + "authorize top path", + DefaultValue.SUCCESS, + [], + self.context.mpprcFile) + + # change owner of packages + self.context.logger.debug("Changing package path permission.") + dirName = os.path.dirname(os.path.realpath(__file__)) + packageDir = os.path.realpath( + os.path.join(dirName, "./../../../")) + "/" + + list_dir = g_file.getDirectoryList(packageDir) + for directory in list_dir: + dirPath = packageDir + directory + dirPath = os.path.normpath(dirPath) + if directory.find('sudo') >= 0: + continue + g_file.changeOwner(self.context.user, dirPath, recursive=True, + cmdType="python") + + # check enter permission + cmd = "su - %s -c 'cd '%s''" % (self.context.user, packageDir) + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + # change owner of GaussLog dir + self.context.logger.debug("Changing the owner of Gauss log path.") + user_dir = "%s/%s" % ( + self.context.clusterInfo.logPath, self.context.user) + # the user_dir may not been created now, + # so we need check its exists + if os.path.exists(user_dir): + + g_file.changeOwner(self.context.user, user_dir, recursive=True, + cmdType="shell", retryFlag=True, + retryTime=15, waiteTime=1) + + # check enter permission + cmd = "su - %s -c 'cd '%s''" % (self.context.user, user_dir) + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + # user can specify log file, + # so we need change the owner of log file alonely + g_file.changeOwner(self.context.user, self.context.logger.logFile, + recursive=False, cmdType="shell") + g_file.changeMode(DefaultValue.FILE_MODE, + self.context.logger.logFile, recursive=False, + cmdType="shell") + + # check enter permission + log_file_dir = os.path.dirname(self.context.logger.logFile) + cmd = "su - %s -c 'cd '%s''" % (self.context.user, log_file_dir) + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + # set tool env on all hosts + cmd = "%s -t %s -u %s -l %s -X '%s' -Q %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_SET_TOOL_ENV, + self.context.user, + self.context.localLog, + self.context.xmlFile, + self.context.clusterToolPath) + if self.context.mpprcFile != "": + cmd += " -s '%s' -g %s" % ( + self.context.mpprcFile, self.context.group) + self.context.sshTool.executeCommand(cmd, + "set cluster tool ENV", + DefaultValue.SUCCESS, + [], + self.context.mpprcFile) + cmd = "%s -t %s -u %s -g %s -P %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_PREPARE_PATH, + self.context.user, + self.context.group, + self.context.clusterToolPath, + self.context.localLog) + # prepare cluster tool package path + self.context.sshTool.executeCommand( + cmd, + "prepare cluster tool package path", + DefaultValue.SUCCESS, + [], + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log( + "Successfully installed the tools in the cluster.", "constant") + + def changeToolEnv(self): + """ + function: + change software tool env path + input:NA + output:NA + """ + try: + # Change software tool env path + cmd = "%s -t %s -u %s -l %s -X '%s' -Q %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CHANGE_TOOL_ENV, + self.context.user, + self.context.localLog, + self.context.xmlFile, + self.context.clusterToolPath) + if self.context.mpprcFile == "": + DefaultValue.execCommandWithMode( + cmd, + "change software tool env path", + self.context.sshTool) + except Exception as e: + raise Exception(str(e)) + + def checkMappingForHostName(self): + """ + function: check mpping for hostname + input: NA + output: NA + """ + if self.context.localMode or self.context.isSingle: + return + + self.context.logger.log("Checking hostname mapping.", "addStep") + try: + # check hostname mapping + cmd = "%s -t %s -u %s -X '%s' -l '%s'" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CHECK_HOSTNAME_MAPPING, + self.context.user, + self.context.xmlFile, + self.context.localLog) + self.context.sshTool.executeCommand(cmd, + "check hostname mapping", + DefaultValue.SUCCESS, + [], + self.context.mpprcFile, + DefaultValue.getCpuSet()) + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log("Successfully checked hostname mapping.", + "constant") + + def createTrustForCommonUser(self): + """ + function: + create SSH trust for common user + precodition: + config file /etc/hosts has been modified correctly on local host + input: NA + output: NA + """ + if self.context.localMode or self.context.isSingle: + return + + if createTrustFlag: + return + self.context.logger.log( + "Creating SSH trust for [%s] user." % self.context.user) + try: + # the IP for create trust + allIps = [] + sshIps = self.context.clusterInfo.getClusterSshIps() + # get all IPs + for ips in sshIps: + allIps.extend(ips) + # create trust + self.context.sshTool.createTrust(self.context.user, + self.context.password, allIps, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + self.context.logger.log( + "Successfully created SSH trust for [%s] user." + % self.context.user) + + def checkOSVersion(self): + """ + function: + check if os version is support + precondition: + postcondition: + input:NA + output:NA + hiden info:support os version + ppp: + """ + self.context.logger.log("Checking OS version.", "addStep") + try: + # Checking OS version + cmd = "%s -t %s -u %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CHECK_OS_VERSION, + self.context.user, + self.context.localLog) + DefaultValue.execCommandWithMode( + cmd, + "check OS version", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Successfully checked OS version.", "constant") + + def createOSUser(self): + """ + function: + create os user and create trust for user + precondition: + 1.user group passwd has been initialized + 2.create trust tool has been installed + postcondition: + 1.user has been created + 2.user's trust has been created + input:NA + output:NA + hiden:NA + """ + # single cluster also need to create user without local mode + self.context.logger.debug("Creating OS user and create trust for user") + if self.context.localMode: + return + + global createTrustFlag + try: + # check the interactive mode + # if the interactive mode is True + if not self.context.preMode: + try: + # get the input + if self.context.localMode: + flag = input( + "Are you sure you want to " + "create the user[%s] (yes/no)? " + % self.context.user) + else: + flag = input( + "Are you sure you want to create " + "the user[%s] and create trust for it (yes/no)? " + % self.context.user) + while True: + # check the input + if ( + flag.upper() != "YES" + and flag.upper() != "NO" + and flag.upper() != "Y" + and flag.upper() != "N"): + flag = input("Please type 'yes' or 'no': ") + continue + break + + # set the flag for create user trust + self.context.logger.debug( + "Setting the flag for creating user's trust.") + if flag.upper() == "NO" or flag.upper() == "N": + createTrustFlag = True + cmd = "%s -t %s -u %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_INIT_GAUSSLOG, + self.context.user, + self.context.localLog) + DefaultValue.execCommandWithMode( + cmd, + "init gausslog", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + return + # check the user is not exist on all nodes + cmd = "%s -t %s -u %s -g %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CHECK_OS_USER, + self.context.user, + self.context.group, + self.context.localLog) + DefaultValue.execCommandWithMode(cmd, + "check OS user", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + self.context.logger.debug( + "Successfully set the flag for creating user's trust") + return + except Exception as e: + i = 0 + # get the password + while i < 3: + self.context.password = self.getUserPasswd( + "cluster user") + DefaultValue.checkPasswordVaild( + self.context.password, + self.context.user, + self.context.clusterInfo) + self.context.passwordsec = self.getUserPasswd( + "cluster user", "again") + + if self.context.password != self.context.passwordsec: + i = i + 1 + self.context.logger.printMessage( + "Sorry. passwords do not match.") + continue + break + + # check the password is not OK + if i == 3: + self.context.logger.printMessage( + "passwd: Have exhausted maximum number " + "of retries for service.") + sys.exit(1) + else: + createTrustFlag = True + cmd = "%s -t %s -u %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_INIT_GAUSSLOG, + self.context.user, + self.context.localLog) + DefaultValue.execCommandWithMode(cmd, + "init gausslog", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + return + + self.context.logger.debug( + "Successfully created [%s] user on all nodes." + % self.context.user) + + # create the user on all nodes + # write the password into temporary file + tmp_file = "/tmp/temp.%s" % self.context.user + g_file.createFileInSafeMode(tmp_file) + with open("/tmp/temp.%s" % self.context.user, "w") as fp: + fp.write(self.context.password) + fp.flush() + # change the temporary file permissions + g_file.changeMode(DefaultValue.KEY_FILE_MODE, tmp_file, + recursive=False, cmdType="shell") + + if not self.context.isSingle: + # send the temporary file to all remote nodes + try: + self.context.sshTool.scpFiles( + tmp_file, "/tmp/", + self.context.sshTool.hostNames) + except Exception as e: + cmd = "(if [ -f '/tmp/temp.%s' ];" \ + "then rm -f '/tmp/temp.%s';fi)" % ( + self.context.user, self.context.user) + DefaultValue.execCommandWithMode(cmd, + "delete temporary files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50216"] + % "temporary files") + + # create the user on all nodes + cmd = "%s -t %s -u %s -g %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CREATE_OS_USER, + self.context.user, + self.context.group, + self.context.localLog) + DefaultValue.execCommandWithMode(cmd, + "create OS user", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + # delete the temporary file on all nodes + cmd = "(if [ -f '/tmp/temp.%s' ];then rm -f '/tmp/temp.%s';fi)" \ + % (self.context.user, self.context.user) + DefaultValue.execCommandWithMode(cmd, + "delete temporary files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + # Successfully created user on all nodes + self.context.logger.log( + "Successfully created [%s] user on all nodes." + % self.context.user) + except Exception as e: + # delete the temporary file on all nodes + cmd = "(if [ -f '/tmp/temp.%s' ];then rm -f '/tmp/temp.%s';fi)" \ + % (self.context.user, self.context.user) + DefaultValue.execCommandWithMode(cmd, + "delete temporary files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + raise Exception(str(e)) + + def createDirs(self): + """ + function: create directorys + input: NA + output: NA + """ + self.context.logger.log("Creating cluster's path.", "addStep") + try: + # fix new created path's owner after create user for single cluster + if self.context.isSingle: + self.context.logger.debug( + "Paths need to be fixed owner:%s." + % self.context.needFixOwnerPaths) + for onePath in self.context.needFixOwnerPaths: + g_file.changeOwner(self.context.user, onePath, + recursive=True, cmdType="shell") + + dirName = os.path.dirname(self.context.logFile) + topDirFile = "%s/topDirPath.dat" % dirName + if os.path.exists(topDirFile): + keylist = g_file.readFile(topDirFile) + if keylist != []: + for key in keylist: + g_file.changeOwner(self.context.user, key.strip(), + True, "shell") + + g_file.removeFile(topDirFile) + + # create the directory on all nodes + cmd = "%s -t %s -u %s -g %s -X '%s' -l '%s'" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CREATE_CLUSTER_PATHS, + self.context.user, + self.context.group, + self.context.xmlFile, + self.context.localLog) + # check the env file + if self.context.mpprcFile != "": + cmd += " -s '%s'" % self.context.mpprcFile + # exec the cmd + DefaultValue.execCommandWithMode( + cmd, + "create cluster's path", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Successfully created cluster's path.", + "constant") + + def setAndCheckOSParameter(self): + """ + function: set and check OS parameter. + If skipOSSet is true, pass; else call gs_checkos to do it. + input: NA + output: NA + """ + self.context.logger.log("Set and check OS parameter.", "addStep") + try: + # get all node hostnames + NodeNames = self.context.clusterInfo.getClusterNodeNames() + namelist = "" + + # set the localmode + if self.context.localMode or self.context.isSingle: + # localmode + namelist = DefaultValue.GetHostIpOrName() + else: + # Non-native mode + namelist = ",".join(NodeNames) + + # check skip-os-set parameter + if self.context.skipOSSet: + # check the OS parameters + self.checkOSParameter(namelist) + else: + # set and check parameters + self.setOSParameter(namelist) + self.checkOSParameter(namelist) + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Set and check OS parameter completed.", + "constant") + + def setOSParameter(self, namelist): + """ + function: set and check OS parameter. + If skipOSSet is true, pass; else call gs_checkos to do it. + input: namelist + output: NA + """ + self.context.logger.log("Setting OS parameters.") + + # set OS parameters + cmd = "%s -h %s -i B -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Gauss_CheckOS"), + namelist, + self.context.localLog, + self.context.xmlFile) + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then raise + if status != 0 and output.strip() == "": + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s." % output) + + self.context.logger.log("Successfully set OS parameters.") + + def checkOSParameter(self, namelist): + """ + check OS parameter. + If skipOSSet is true, pass; else call gs_checkos to do it. + """ + self.context.logger.debug("Checking OS parameters.") + try: + # check the OS parameters + cmd = "%s -h %s -i A -l '%s' -X '%s'" % ( + OMCommand.getLocalScript("Gauss_CheckOS"), + namelist, + self.context.localLog, + self.context.xmlFile) + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then raise + if status != 0 and output.strip() == "": + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s." % output) + + # parse the result + result = "" + abnormal_num = 0 + warning_num = 0 + # get the total numbers + for line in output.split('\n'): + if line.find("Total numbers") >= 0: + result = line + break + if result == "": + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s." % output) + # type [Total numbers:14. Abnormal numbers:0. Warning number:1.] + try: + # get the abnormal numbers + abnormal_num = int(result.split('.')[1].split(':')[1].strip()) + # get the warning numbers + warning_num = int(result.split('.')[2].split(':')[1].strip()) + except Exception as e: + abnormal_num = 1 + warning_num = 0 + + # get the path where the script is located + current_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "./../../") + gs_checkos_path = os.path.realpath( + os.path.join(current_path, "gs_checkos")) + if abnormal_num > 0: + raise Exception( + ErrorCode.GAUSS_524["GAUSS_52400"] + + "\nPlease get more details by \"%s " + "-i A -h %s --detail\"." + % (gs_checkos_path, namelist)) + if warning_num > 0: + self.context.logger.log( + "Warning: Installation environment " + "contains some warning messages." + \ + "\nPlease get more details by \"%s " + "-i A -h %s --detail\"." + % (gs_checkos_path, namelist)) + + except Exception as e: + raise Exception(str(e)) + + self.context.logger.debug("Successfully check OS parameters.") + + def prepareCronService(self): + """ + function: preparing CRON service + input: NA + output: NA + """ + self.context.logger.log("Preparing CRON service.", "addStep") + try: + # Preparing CRON service + cmd = "%s -t %s -u %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_PREPARE_USER_CRON_SERVICE, + self.context.user, + self.context.localLog) + DefaultValue.execCommandWithMode( + cmd, + "prepare CRON service", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + # Successfully prepared CRON service + self.context.logger.log("Successfully prepared CRON service.", + "constant") + + def prepareSshdService(self): + """ + function: preparing SSH service + input: NA + output: NA + """ + self.context.logger.log("Preparing SSH service.", "addStep") + try: + # Preparing SSH service + cmd = "%s -t %s -u %s -X %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_PREPARE_USER_SSHD_SERVICE, + self.context.user, + self.context.xmlFile, + self.context.localLog) + DefaultValue.execCommandWithMode( + cmd, + "prepare SSH service", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + # Successfully prepared SSH service + self.context.logger.log("Successfully prepared SSH service.", + "constant") + + def setEnvParameter(self): + """ + function: setting cluster environmental variables + input: NA + output: NA + """ + pass + + def setLibrary(self): + """ + function: setting the dynamic link library + input: NA + output: NA + """ + self.context.logger.log("Setting the dynamic link library.", "addStep") + try: + # Setting the dynamic link library + cmd = "%s -t %s -u %s -l %s " % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_SET_LIBRARY, + self.context.user, + self.context.localLog) + self.context.logger.debug("Command for setting library: %s" % cmd) + # exec the cmd for set library + DefaultValue.execCommandWithMode( + cmd, + "set library", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + # failed to set the dynamic link library + raise Exception(str(e)) + # Successfully set the dynamic link library + self.context.logger.log("Successfully set the dynamic link library.", + "constant") + + def setCorePath(self): + """ + function: setting core path + input: NA + output: NA + """ + pass + + def setPssh(self): + """ + function: setting pssh + input: NA + output: NA + """ + pass + + def setSctp(self): + """ + function: setting SCTP service + input: NA + output: NA + """ + self.context.logger.log("Setting SCTP service.", "addStep") + try: + # set SCTP service + cmd = "%s -t %s -u %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_SET_SCTP, + self.context.user, + self.context.localLog) + # check the mpprcFile + if self.context.mpprcFile != "": + cmd += " -s '%s'" % self.context.mpprcFile + self.context.logger.debug("Command for setting SCTP: %s" % cmd) + + # exec cmd for set SCTP + DefaultValue.execCommandWithMode( + cmd, + "set SCTP", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + # failed set SCTP service + raise Exception(str(e)) + # Successfully set SCTP service + self.context.logger.log("Successfully set SCTP service.", "constant") + + def setVirtualIp(self): + """ + function: set the virtual IPs + input: NA + output: NA + """ + pass + + def doPreInstallSucceed(self): + """ + function: setting finish flag + input: NA + output: NA + """ + # Before set finish flag, + # we need to check if path permission is correct in local mode. + self.checkLocalPermission() + + self.context.logger.log("Setting finish flag.", "addStep") + try: + # set finish flag + cmd = "%s -t %s -u %s -l '%s' -X '%s' -Q %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_SET_FINISH_FLAG, + self.context.user, + self.context.localLog, + self.context.xmlFile, + self.context.clusterToolPath) + # check the mpprcFile + if self.context.mpprcFile != "": + cmd += " -s '%s'" % self.context.mpprcFile + # exec the cmd for set finish flag + DefaultValue.execCommandWithMode( + cmd, + "setting finish flag", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + # failed set finish flag + raise Exception(str(e)) + # Successfully set finish flag + self.context.logger.log("Successfully set finish flag.", "constant") + + def checkLocalPermission(self): + """ + function: check if path have permission in local mode or single mode. + input : NA + output: NA + """ + # check if path have permission in local mode or single mode. + if self.context.localMode or self.context.isSingle: + dirName = os.path.dirname(os.path.realpath(__file__)) + packageDir = os.path.realpath( + os.path.join(dirName, "./../../../")) + "/" + + # check enter permission + cmd = "su - %s -c 'cd '%s''" % (self.context.user, packageDir) + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + user_dir = "%s/%s" % ( + self.context.clusterInfo.logPath, self.context.user) + + # the user_dir may not been created now, + # so we need check its exists + if os.path.exists(user_dir): + # check enter permission + cmd = "su - %s -c 'cd '%s''" % (self.context.user, user_dir) + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + # check enter permission + log_file_dir = os.path.dirname(self.context.logger.logFile) + + cmd = "su - %s -c 'cd '%s''" % (self.context.user, log_file_dir) + (status, output) = subprocess.getstatusoutput(cmd) + # if cmd failed, then exit + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + def createStepTmpFile(self): + """ + function: create step tmp file + input : NA + output: NA + """ + if self.context.localMode or self.context.isSingle: + return + + try: + global g_stepTrustTmpFile + global TRUST_TMP_FILE_DIR + TRUST_TMP_FILE_DIR = "/tmp/%s" % TRUST_TMP_FILE + g_file.createFileInSafeMode(TRUST_TMP_FILE_DIR) + with open(TRUST_TMP_FILE_DIR, "w") as g_stepTrustTmpFile: + g_stepTrustTmpFile.flush() + except Exception as e: + raise Exception(str(e)) + + def deleteStepTmpFile(self): + """ + function: delete step tmp file + input : NA + output: NA + """ + if self.context.localMode or self.context.isSingle: + return + + try: + cmd = "rm -rf '%s'" % TRUST_TMP_FILE_DIR + self.context.sshTool.executeCommand(cmd, "delete step tmp file") + except Exception as e: + self.context.logger.error(str(e)) + + def checkEnvFile(self): + """ + function: delete step tmp file + input : NA + output: NA + """ + if self.context.localMode or self.context.isSingle: + return + + try: + cmd = "%s -t %s -u %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CHECK_ENVFILE, + self.context.user, + self.context.localLog) + if self.context.mpprcFile != "": + cmd += " -s '%s'" % self.context.mpprcFile + self.context.sshTool.executeCommand(cmd, "delete step tmp file") + except Exception as e: + raise Exception(str(e)) + + def checkDiskSpace(self): + """ + function: check remain disk space of GAUSSHOME for olap + input: NA + output: NA + """ + pass + + def setHostIpEnv(self): + """ + function: set host ip env + input : NA + output : NA + """ + pass + + def checkRepeat(self): + """ + function: check repeat + input : NA + output : NA + """ + gphome = gausshome = pghost = gausslog \ + = agent_path = agent_log_path = "" + if self.context.mpprcFile and os.path.isfile(self.context.mpprcFile): + source_file = self.context.mpprcFile + elif self.context.mpprcFile: + self.context.logger.debug( + "Environment file is not exist environment file," + " skip check repeat.") + return + elif os.path.isfile( + os.path.join("/home", "%s/.bashrc" % self.context.user)): + source_file = os.path.join("/home", + "%s/.bashrc" % self.context.user) + else: + self.context.logger.debug( + "There is no environment file, skip check repeat.") + return + with open(source_file, 'r') as f: + env_list = f.readlines() + new_env_list = [] + if not self.context.mpprcFile: + with open(os.path.join("/etc", "profile"), "r") as etc_file: + gp_home_env = etc_file.readlines() + gphome_env_list = [env.replace('\n', '') for env in gp_home_env] + for env in gphome_env_list: + if env.startswith("export GPHOME="): + if len(new_env_list) != 0: + new_env_list = [] + new_env_list.append(env.strip()) + + new_env_list.extend([env.replace('\n', '') for env in env_list]) + if "export GAUSS_ENV=2" not in new_env_list: + self.context.logger.debug( + "There is no install cluster exist. " + "Skip check repeat install.") + return + for env in new_env_list: + if env.startswith("export GPHOME=") and env.split('=')[1] != "": + gphome = env.split('=')[1] + if env.startswith("export GAUSSHOME="): + gausshome = env.split('=')[1] + if env.startswith("export PGHOST="): + pghost = env.split('=')[1] + if env.startswith("export GAUSSLOG="): + gausslog = env.split('=')[1] + if env.startswith("export AGENTPATH="): + agent_path = env.split('=')[1] + if env.startswith("export AGENTLOGPATH="): + agent_log_path = env.split('=')[1] + + gaussdbToolPath = DefaultValue.getPreClusterToolPath( + self.context.user, + self.context.xmlFile) + gaussdbAppPath = self.context.getOneClusterConfigItem( + "gaussdbAppPath", + self.context.xmlFile) + DefaultValue.checkPathVaild(gaussdbAppPath) + tmpMppdbPath = self.context.clusterInfo.readClusterTmpMppdbPath( + self.context.user, self.context.xmlFile) + gaussdbLogPath = self.context.clusterInfo.readClusterLogPath( + self.context.xmlFile) + agentToolPath = self.context.getOneClusterConfigItem( + "agentToolPath", + self.context.xmlFile) + DefaultValue.checkPathVaild(agentToolPath) + agentLogPath = self.context.getOneClusterConfigItem( + "agentLogPath", + self.context.xmlFile) + DefaultValue.checkPathVaild(agentLogPath) + if gphome and gphome.strip() != gaussdbToolPath: + raise Exception( + ErrorCode.GAUSS_527["GAUSS_52704"] % "preinstall repeat" + + "gaussdbToolPath [%s] is not same with environment[%s]" % ( + gaussdbToolPath, gphome)) + if gausshome and gausshome.strip() != gaussdbAppPath: + raise Exception( + ErrorCode.GAUSS_527["GAUSS_52704"] % "preinstall repeat" + + "gaussdbAppPath [%s] is not same with environment[%s]" % ( + gaussdbAppPath, gausshome)) + if pghost and pghost.strip() != tmpMppdbPath: + raise Exception( + ErrorCode.GAUSS_527["GAUSS_52704"] % "preinstall repeat" + + "tmpMppdbPath [%s] is not same with environment[%s]" % ( + tmpMppdbPath, pghost)) + if gausslog and gausslog.strip() != os.path.join( + gaussdbLogPath.strip(), self.context.user): + raise Exception( + ErrorCode.GAUSS_527["GAUSS_52704"] % "preinstall repeat" + + "gaussdbLogPath [%s] is not same with environment[%s]" + % (os.path.join(gaussdbLogPath.strip(), self.context.user), + gausslog)) + if agent_path and agentToolPath \ + and agent_path.strip() != agentToolPath.strip(): + raise Exception( + ErrorCode.GAUSS_527["GAUSS_52704"] % "preinstall repeat" + + "agentToolPath [%s] is not same with environment[%s]" % ( + agentToolPath, agent_path)) + if agent_log_path \ + and agentLogPath \ + and agent_log_path.strip() != agentLogPath.strip(): + raise Exception( + ErrorCode.GAUSS_527["GAUSS_52704"] % "preinstall repeat" + + "agentLogPath [%s] is not same with environment[%s]" % ( + agentLogPath, agent_log_path)) + + self.context.logger.debug("Preinstall check repeat success.") + + def checkInstanceDir(self): + """ + function : Check whether the instance path is in the gausshome path + input : None + output : None + """ + appPath = self.context.clusterInfo.appPath + self.checkRepeat() + for dbNode in self.context.clusterInfo.dbNodes: + # dn + for dataInst in dbNode.datanodes: + if os.path.dirname(dataInst.datadir) == appPath: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50232"] % ( + dataInst.datadir, appPath)) + + def checkOSSoftware(self): + """ + function: setting the dynamic link library + input: NA + output: NA + """ + self.context.logger.log("Checking OS software.", "addStep") + try: + # Checking software + cmd = "%s -t %s -u %s -l %s " % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CHECK_OS_SOFTWARE, + self.context.user, + self.context.localLog) + self.context.logger.debug("Checking OS software: %s" % cmd) + # exec the cmd for Checking software + DefaultValue.execCommandWithMode( + cmd, + "check software", + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + # failed to Check software + raise Exception(str(e)) + # Successfully Check software + self.context.logger.log("Successfully check os software.", + "constant") + + def get_package_path(self): + """ + get package path, then can get script path, /package_path/script/ + :return: + """ + dir_name = os.path.dirname(os.path.realpath(__file__)) + package_dir = os.path.join(dir_name, "./../../../") + return os.path.realpath(package_dir) + + def doPreInstall(self): + """ + function: the main process of preinstall + input: NA + output: NA + """ + self.context.logger.debug( + "gs_preinstall execution takes %s steps in total" % \ + ClusterCommand.countTotalSteps( + "gs_preinstall", "", + self.context.localMode or self.context.isSingle)) + # Check whether the instance directory + # conflicts with the application directory. + self.checkInstanceDir() + # install tools phase1 + self.installToolsPhase1() + + # no need do the following steps in local mode + # create tmp file + self.createStepTmpFile() + # exchange user key for root user + self.createTrustForRoot() + # distribute server package + # set HOST_IP env + self.setHostIpEnv() + self.distributePackages() + # create user and exchange keys for database user + self.createOSUser() + # prepare sshd service for user. + # This step must be nearly after createOSUser, + # which needs sshd service to be restarted. + self.prepareSshdService() + # check env file + self.checkEnvFile() + # install tools phase2 + self.installToolsPhase2() + # check whether the /etc/hosts file correct + self.checkMappingForHostName() + # exchage user key for common user + self.createTrustForCommonUser() + # change tool env path + self.changeToolEnv() + # delete tmp file + self.deleteStepTmpFile() + # the end of functions which do not use in in local mode + #check software + self.checkOSSoftware() + # check os version + self.checkOSVersion() + # create path and set mode + self.createDirs() + + # set Sctp + if not DefaultValue.checkDockerEnv(): + self.setSctp() + # set os parameters + self.setAndCheckOSParameter() + # prepare cron service for user + self.prepareCronService() + # set environment parameters + self.setEnvParameter() + # set virtual IP + self.setVirtualIp() + # set Library + self.setLibrary() + # set core path + self.setCorePath() + # set core path + self.setPssh() + self.setArmOptimization() + # fix server package mode + self.fixServerPackageOwner() + + # set user env and a flag, + # indicate that the preinstall.py has been execed succeed + self.doPreInstallSucceed() + + self.context.logger.log("Preinstallation succeeded.") + + def run(self): + """ + function: run method + """ + try: + # do preinstall option + self.doPreInstall() + # close log file + self.context.logger.closeLog() + except Exception as e: + self.deleteStepTmpFile() + for rmPath in self.context.needFixOwnerPaths: + if os.path.isfile(rmPath): + g_file.removeFile(rmPath) + elif os.path.isdir(rmPath): + g_file.removeDirectory(rmPath) + self.context.logger.logExit(str(e)) + sys.exit(0) diff --git a/script/impl/preinstall/__init__.py b/script/impl/preinstall/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/uninstall/OLAP/UninstallImplOLAP.py b/script/impl/uninstall/OLAP/UninstallImplOLAP.py new file mode 100644 index 0000000..c9743be --- /dev/null +++ b/script/impl/uninstall/OLAP/UninstallImplOLAP.py @@ -0,0 +1,96 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import sys + +sys.path.append(sys.path[0] + "/../../") + +from gspylib.common.Common import DefaultValue +from gspylib.os.gsfile import g_file +from impl.uninstall.UninstallImpl import UninstallImpl + + +class UninstallImplOLAP(UninstallImpl): + """ + init the command options + save command line parameter values + """ + + def __init__(self, unstallation): + self.logFile = unstallation.logFile + self.cleanInstance = unstallation.cleanInstance + + self.localLog = unstallation.localLog + self.user = unstallation.user + self.group = unstallation.group + self.mpprcFile = unstallation.mpprcFile + self.localMode = unstallation.localMode + self.logger = unstallation.logger + self.sshTool = unstallation.sshTool + self.tmpDir = DefaultValue.getTmpDirFromEnv(self.user) + try: + # Initialize the unstallation.clusterInfo variable + unstallation.initClusterInfoFromStaticFile(self.user) + self.clusterInfo = unstallation.clusterInfo + nodeNames = self.clusterInfo.getClusterNodeNames() + # Initialize the self.sshTool variable + unstallation.initSshTool(nodeNames, + DefaultValue.TIMEOUT_PSSH_UNINSTALL) + self.sshTool = unstallation.sshTool + except Exception as e: + self.logger.logExit(str(e)) + + def checkEnv(self): + """ + function: check if GAUSS_ENV is 2 + input : NA + output: NA + """ + try: + DefaultValue.checkUser(self.user) + except Exception as e: + self.logger.exitWithError(str(e)) + + def ReCleanEtcdPath(self): + """ + function: make sure the etcd path is clean. + input : NA + output: NA + """ + # check if need delete instance + if (not self.cleanInstance): + self.logger.debug("No need to redelete etcd path.") + return + + if (self.localMode): + for dbnode in self.clusterInfo.dbNodes: + if (dbnode.name == DefaultValue.GetHostIpOrName()): + if (len(dbnode.etcds) > 0): + etcdDir = dbnode.etcds[0].datadir + self.logger.debug("Clean etcd path %s in node: %s." % ( + etcdDir, dbnode.name)) + g_file.cleanDirectoryContent(etcdDir) + else: + for dbnode in self.clusterInfo.dbNodes: + if (len(dbnode.etcds) > 0): + etcdDir = dbnode.etcds[0].datadir + cmd = g_file.SHELL_CMD_DICT["cleanDir4"] % etcdDir + self.logger.debug("Clean etcd path %s in node: %s." % ( + etcdDir, dbnode.name)) + (status, output) = self.sshTool.getSshStatusOutput(cmd, [ + dbnode.name], self.mpprcFile) + if (status[dbnode.name] != DefaultValue.SUCCESS): + self.logger.debug("Clean etcd failed: %s" % output) diff --git a/script/impl/uninstall/OLAP/__init__.py b/script/impl/uninstall/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/uninstall/UninstallImpl.py b/script/impl/uninstall/UninstallImpl.py new file mode 100644 index 0000000..e7be509 --- /dev/null +++ b/script/impl/uninstall/UninstallImpl.py @@ -0,0 +1,362 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import sys +import subprocess +import time +import os + +sys.path.append(sys.path[0] + "/../") + +from gspylib.common.Common import DefaultValue +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file + + +class UninstallImpl: + """ + init the command options + save command line parameter values + """ + + def __init__(self, unstallation): + """ + function: constructor + """ + pass + + def checkLogFilePath(self): + """ + function: Check log file path + input : NA + output: NA + """ + clusterPath = [] + try: + # get tool path + clusterPath.append(DefaultValue.getClusterToolPath(self.user)) + # get tmp path + tmpDir = DefaultValue.getTmpDirFromEnv() + clusterPath.append(tmpDir) + # get cluster path + hostName = DefaultValue.GetHostIpOrName() + dirs = self.clusterInfo.getClusterDirectorys(hostName, False) + # loop all cluster path + for checkdir in dirs.values(): + clusterPath.extend(checkdir) + self.logger.debug("Cluster paths %s." % clusterPath) + + # check directory + g_file.checkIsInDirectory(self.logFile, clusterPath) + except Exception as e: + self.logger.logExit(str(e)) + + def checkUninstall(self): + """ + function: Check uninstall + input : NA + output: NA + """ + # Checking uninstallation + self.logger.log("Checking uninstallation.", "addStep") + # use check uninstall to check every nodes + cmd = "%s -R '%s' -U %s -l %s" % ( + OMCommand.getLocalScript("Local_Check_Uninstall"), + self.clusterInfo.appPath, self.user, self.localLog) + # check if need to clean instance + if (self.cleanInstance): + cmd += " -d" + self.logger.debug("Command for checking uninstallation: " + cmd) + DefaultValue.execCommandWithMode(cmd, "check uninstallation.", + self.sshTool, self.localMode, + self.mpprcFile) + self.logger.log("Successfully checked uninstallation.", "constant") + + def StopCluster(self): + """ + function: Stopping the cluster + input : NA + output: NA + """ + self.logger.log("Stopping the cluster.", "addStep") + # get the static config + static_config = \ + "%s/bin/cluster_static_config" % self.clusterInfo.appPath + static_config_bak = \ + "%s/bin/cluster_static_config_bak" % self.clusterInfo.appPath + # if cluster_static_config_bak exists + # and static_config does not exists, mv it to static_config + if (not os.path.exists(static_config) and os.path.exists( + static_config_bak)): + cmd = "mv %s %s" % (static_config_bak, static_config) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.debug("The cmd is %s " % cmd) + self.logger.error("rename cluster_static_config_bak failed") + self.logger.debug("Error:\n%s" % output) + # if path not exits, can not stop cluster + if (not os.path.exists(static_config)): + self.logger.debug("Failed to stop the cluster.", "constant") + return + + # Stop cluster applications + cmd = "source %s; %s -U %s -R %s -l %s" % ( + self.mpprcFile, OMCommand.getLocalScript("Local_StopInstance"), + self.user, self.clusterInfo.appPath, self.localLog) + self.logger.debug("Command for stop cluster: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, "Stop cluster", self.sshTool, + self.localMode, self.mpprcFile) + self.logger.log("Successfully stopped cluster.") + + def CheckAndKillAliveProc(self, procFileName): + """ + function: When uninstall gaussdb cluster. After it is stopped, + We must make sure that all process + about gaussdb cluster have been stopped. Not including + om_monitor. + input : procFileName + output: NA + """ + try: + failedNodes = [] + validNodeName = self.clusterInfo.getClusterNodeNames() + # the command for killing all process + cmd_check_kill = DefaultValue.killInstProcessCmd(procFileName, + True, 9, False) + # use sshTool to kill process in all nodes + (status, output) = self.sshTool.getSshStatusOutput(cmd_check_kill, + validNodeName) + # get the node which not be killed + for node in validNodeName: + if (status[node] != DefaultValue.SUCCESS): + failedNodes.append(node) + # kill process in nodes again + if (len(failedNodes)): + time.sleep(1) + (status, output) = self.sshTool.getSshStatusOutput( + cmd_check_kill, failedNodes) + for node in failedNodes: + # if still fail, throw error + if (status[node] != DefaultValue.SUCCESS): + raise Exception(output) + + except Exception as e: + raise Exception(str(e)) + + def CleanInstance(self): + """ + function: clean instance + input : NA + output : NA + """ + self.logger.debug("Deleting instance.", "addStep") + # check if need delete instance + if (not self.cleanInstance): + self.logger.debug("No need to delete data.", "constant") + return + + # Clean instance data + cmd = "%s -U %s -l %s" % ( + OMCommand.getLocalScript("Local_Clean_Instance"), self.user, + self.localLog) + self.logger.debug("Command for deleting instance: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, "delete instances data.", + self.sshTool, self.localMode, + self.mpprcFile) + + # clean upgrade temp backup path + upgrade_bak_dir = DefaultValue.getBackupDir(self.user, "upgrade") + cmd = g_file.SHELL_CMD_DICT["cleanDir"] % ( + upgrade_bak_dir, upgrade_bak_dir, upgrade_bak_dir) + DefaultValue.execCommandWithMode(cmd, + "delete backup directory for upgrade", + self.sshTool, self.localMode, + self.mpprcFile) + + self.logger.log("Successfully deleted instances.", "constant") + + def CleanTmpFiles(self): + """ + function: clean temp files + input : NA + output: NA + """ + self.logger.debug("Deleting temporary files.", "addStep") + try: + # copy record_app_directory file + tmpDir = DefaultValue.getTmpDirFromEnv(self.user) + if tmpDir == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST") + upgradeBackupPath = os.path.join(tmpDir, "binary_upgrade") + copyPath = os.path.join(upgradeBackupPath, "record_app_directory") + appPath = DefaultValue.getInstallDir(self.user) + if appPath == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST") + if copyPath != "": + copyCmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/';fi)" % ( + copyPath, copyPath, appPath) + DefaultValue.execCommandWithMode( + copyCmd, + "copy record_app_directory file", + self.sshTool, self.localMode, + self.mpprcFile) + + cmd = g_file.SHELL_CMD_DICT["cleanDir"] % ( + self.tmpDir, self.tmpDir, self.tmpDir) + # clean dir of PGHOST + DefaultValue.execCommandWithMode(cmd, "delete temporary files", + self.sshTool, self.localMode, + self.mpprcFile) + except Exception as e: + self.logger.logExit(str(e)) + self.logger.debug("Successfully deleted temporary files.", "constant") + + def UninstallApp(self): + """ + function: Uninstall application + input : NA + output: NA + """ + self.logger.log("Uninstalling application.", "addStep") + cmd = "%s -R '%s' -U %s -l %s -T" % ( + OMCommand.getLocalScript("Local_Uninstall"), + self.clusterInfo.appPath, + self.user, self.localLog) + self.logger.debug("Command for Uninstalling: %s" % cmd) + # clean application + DefaultValue.execCommandWithMode(cmd, "uninstall application", + self.sshTool, self.localMode, + self.mpprcFile) + self.logger.log("Successfully uninstalled application.", "constant") + + def CleanStaticConfFile(self): + """ + function: clean static conf file + input : NA + output: NA + """ + self.logger.debug("Deleting static configuration file.", "addStep") + try: + cmd = "rm -rf '%s'/bin " % self.clusterInfo.appPath + # delete bin dir in GAUSSHOME + DefaultValue.execCommandWithMode( + cmd, + "delete cluster static configuration file.", + self.sshTool, self.localMode, + self.mpprcFile) + except Exception as e: + self.logger.exitWithError(str(e)) + self.logger.debug("Successfully deleted static configuration file.", + "constant") + + def CleanRackFile(self): + """ + function: clean rack information file + input : NA + output: NA + """ + gp_home = DefaultValue.getEnv("GPHOME") + if os.path.exists(gp_home): + gp_home = os.path.realpath(gp_home) + rack_conf_file = os.path.realpath( + os.path.join(gp_home, "script/gspylib/etc/conf/rack_info.conf")) + if os.path.isfile(rack_conf_file): + cmd = "rm -f %s" % rack_conf_file + DefaultValue.execCommandWithMode(cmd, + "Deleted rack information file.", + self.sshTool, self.localMode, + mpprcFile=self.mpprcFile) + self.logger.debug("Successfully deleted rack information file.") + + def CleanLog(self): + """ + function: Clean default log + input : NA + output: NA + """ + self.logger.debug("Deleting log.", "addStep") + # check if need delete instance + if (not self.cleanInstance): + self.logger.debug("No need to delete data.", "constant") + return + + try: + # clean log + userLogDir = DefaultValue.getUserLogDirWithUser(self.user) + cmd = g_file.SHELL_CMD_DICT["cleanDir"] % ( + userLogDir, userLogDir, userLogDir) + # delete log dir + DefaultValue.execCommandWithMode(cmd, "delete user log directory", + self.sshTool, self.localMode, + self.mpprcFile) + except Exception as e: + self.logger.exitWithError(str(e)) + self.logger.debug("Successfully deleted log.", "constant") + + def checkEnv(self): + """ + function: check if GAUSS_ENV is 2 + input : NA + output: NA + """ + pass + + def ReCleanEtcdPath(self): + """ + function: make sure the etcd path is clean. + input : NA + output: NA + """ + pass + + def ReKillEtcdProcess(self): + """ + function: make sure the etcd process is clean. + input : NA + output: NA + """ + if (self.localMode): + DefaultValue.KillAllProcess(self.user, "etcd") + # kill process in all nodes + else: + etcd_file = "%s/bin/etcd" % self.clusterInfo.appPath + self.CheckAndKillAliveProc(etcd_file) + + def run(self): + """ + function: Uninstall database cluster + input : NA + output: NA + """ + try: + self.checkEnv() + self.checkLogFilePath() + # do uninstall + self.checkUninstall() + self.StopCluster() + self.CleanInstance() + self.CleanTmpFiles() + self.UninstallApp() + self.ReCleanEtcdPath() + self.ReKillEtcdProcess() + self.logger.closeLog() + self.CleanStaticConfFile() + self.CleanRackFile() + self.CleanLog() + self.logger.log("Uninstallation succeeded.") + except Exception as e: + self.logger.logExit(str(e)) diff --git a/script/impl/uninstall/__init__.py b/script/impl/uninstall/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/upgrade/OLAP/UpgradeImplOLAP.py b/script/impl/upgrade/OLAP/UpgradeImplOLAP.py new file mode 100644 index 0000000..f8fbd23 --- /dev/null +++ b/script/impl/upgrade/OLAP/UpgradeImplOLAP.py @@ -0,0 +1,34 @@ +#-*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import sys + +sys.path.append(sys.path[0] + "/../../../") +from impl.upgrade.UpgradeImpl import UpgradeImpl + + +############################################################################# +# Global variables +############################################################################# + + +class UpgradeImplOLAP(UpgradeImpl): + """ + The class is used to do perform upgrade + """ + def __init__(self, upgrade): + super(UpgradeImplOLAP, self).__init__(upgrade) diff --git a/script/impl/upgrade/OLAP/__init__.py b/script/impl/upgrade/OLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/impl/upgrade/UpgradeConst.py b/script/impl/upgrade/UpgradeConst.py new file mode 100644 index 0000000..0280a19 --- /dev/null +++ b/script/impl/upgrade/UpgradeConst.py @@ -0,0 +1,162 @@ +#-*- coding:utf-8 -*- + +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Const values +############################################################################# + +UPGRADE_TIMEOUT_CLUSTER_START = 600 +UPGRADE_TIMEOUT_CLUSTER_STOP = 1800 + +#because the number is float, so notice the recision +DELTA_NUM = 0.000001 +#external action +ACTION_CHOSE_STRATEGY = "chose-strategy" +ACTION_INPLACE_UPGRADE = "inplace-binary-upgrade" +#grey upgrade +ACTION_SMALL_UPGRADE = "small-binary-upgrade" +ACTION_LARGE_UPGRADE = "large-binary-upgrade" +# ACTION_ONLINE_UPGRADE is used for record online upgrade step, +# not really provide this action outside to user, +# if use ACTION_BINARY_UPGRADE, it will confuse with off-line binary upgrade +ACTION_AUTO_UPGRADE = "auto-upgrade" +ACTION_AUTO_ROLLBACK = "auto-rollback" +ACTION_COMMIT_UPGRADE = "commit-upgrade" + +ACTION_SYNC_CONFIG = "sync_config" +ACTION_SWITCH_PROCESS = "switch_little_effect_process" +ACTION_SWITCH_BIN = "switch_bin" +ACTION_COPY_CERTS = "copy_certs" +ACTION_CLEAN_INSTALL_PATH = "clean_install_path" + +ACTION_TOUCH_INIT_FILE = "touch_init_file" +ACTION_CHECK_VERSION = "check_version" + +ACTION_BACKUP_CONFIG = "backup_config" +ACTION_RESTORE_CONFIG = "restore_config" +ACTION_INPLACE_BACKUP = "inplace_backup" +ACTION_INPLACE_RESTORE = "inplace_restore" +ACTION_CHECK_GUC = "check_guc" +ACTION_BACKUP_HOTPATCH = "backup_hotpatch" +ACTION_ROLLBACK_HOTPATCH = "rollback_hotpatch" +ACTION_UPGRADE_SQL_FOLDER = "prepare_upgrade_sql_folder" +ACTION_BACKUP_OLD_CLUSTER_DB_AND_REL = "backup_old_cluster_db_and_rel" +ACTION_UPDATE_CATALOG = "update_catalog" +ACTION_BACKUP_OLD_CLUSTER_CATALOG_PHYSICAL_FILES = \ + "backup_old_cluster_catalog_physical_files" +ACTION_RESTORE_OLD_CLUSTER_CATALOG_PHYSICAL_FILES = \ + "restore_old_cluster_catalog_physical_files" +ACTION_CLEAN_OLD_CLUSTER_CATALOG_PHYSICAL_FILES = \ + "clean_old_cluster_catalog_physical_files" +ACTION_REPLACE_PG_PROC_FILES = "replace_pg_proc_files" +ACTION_CREATE_PG_PROC_MAPPING_FILE = "create_pg_proc_mapping_file" +ACTION_CREATE_NEW_CSV_FILE = "create_new_csv_file" +ACTION_RESTORE_DYNAMIC_CONFIG_FILE = "restore_dynamic_config_file" +ACTION_GREY_SYNC_GUC = "grey_sync_guc" +ACTION_GREY_UPGRADE_CONFIG_SYNC = "grey_upgrade_config_sync" +ACTION_SWITCH_DN = "switch_dn" +ACTION_GET_LSN_INFO = "get_lsn_info" +ACTION_GREY_RESTORE_CONFIG = "grey_restore_config" +ACTION_GREY_RESTORE_GUC = "grey_restore_guc" +ACTION_CLEAN_CONF_BAK_OLD = "clean_conf_bak_old" + +OPTION_PRECHECK = "before" +OPTION_POSTCHECK = "after" +INPLACE_UPGRADE_STEP_FILE = "upgrade_step.dat" +GREY_UPGRADE_STEP_FILE = "upgrade_step.csv" +CLUSTER_CMSCONF_FILE = "cluster_cmsconf.json" +CLUSTER_CNSCONF_FILE = "cluster_cnconf.json" +READONLY_MODE = "read_only_mode" +TMP_DYNAMIC_DN_INFO = "upgrade_gauss_dn_status.dat" +GET_LSN_SQL_FILE = "get_lsn_sql" +INPLACE_UPGRADE_FLAG_FILE = "inplace_upgrade_flag" +POSTGRESQL_CONF_BAK_OLD = "postgresql.conf.bak.old" + +#step flag +BINARY_UPGRADE_NO_NEED_ROLLBACK = -2 +INVALID_UPRADE_STEP = -1 +#binary upgrade step +BINARY_UPGRADE_STEP_INIT_STATUS = 0 +BINARY_UPGRADE_STEP_BACKUP_STATUS = 1 +BINARY_UPGRADE_STEP_STOP_NODE = 2 +BINARY_UPGRADE_STEP_BACKUP_VERSION = 3 +BINARY_UPGRADE_STEP_UPGRADE_APP = 4 +BINARY_UPGRADE_STEP_START_NODE = 5 +BINARY_UPGRADE_STEP_PRE_COMMIT = 6 + +ERR_GREP_NO_RESULT = 256 + + +#grey upgrade +class GreyUpgradeStep: + def __init__(self): + pass + + (STEP_INIT_STATUS, + STEP_UPDATE_CATALOG, + STEP_SWITCH_NEW_BIN, + STEP_UPGRADE_PROCESS, + STEP_UPDATE_POST_CATALOG, + STEP_PRE_COMMIT, + STEP_BEGIN_COMMIT + ) = range(0, 7) + + +BACKUP_DIR_LIST = ['global', 'pg_clog', 'pg_xlog', 'pg_multixact', + 'pg_replslot', 'pg_notify', 'pg_subtrans', 'pg_cbm', + 'pg_twophase'] + + +BACKUP_DIR_LIST_BASE = ['global', 'pg_clog', 'pg_csnlog'] +BACKUP_DIR_LIST_64BIT_XID = ['pg_multixact', 'pg_replslot', 'pg_notify', + 'pg_subtrans', 'pg_twophase'] +VALUE_OFF = ["off", "false", "0", "no"] +VALUE_ON = ["on", "true", "1", "yes"] +DN_GUC = ["upgrade_mode", "enable_stream_replication"] +FIRST_GREY_UPGRADE_NUM = 92 + +UPGRADE_PRECOMMIT_NUM = 0.001 +UPGRADE_UNSET_NUM = 0 + +CMSERVER_GUC_DEFAULT = {"enable_transaction_read_only": "on", + "coordinator_heartbeat_timeout": "1800", + "instance_failover_delay_timeout": 0, + "cmserver_ha_heartbeat_timeout": 8} +CMSERVER_GUC_CLOSE = {"enable_transaction_read_only": "off", + "coordinator_heartbeat_timeout": "0", + "instance_failover_delay_timeout": 40, + "cmserver_ha_heartbeat_timeout": 20} +# Script name +GS_UPGRADECTL = "gs_upgradectl" +# table schema and table name +UPGRADE_SCHEMA = "on_upgrade_69954349032535120" +RECORD_NODE_STEP = "record_node_step" +READ_STEP_FROM_FILE_FLAG = "read_step_from_file_flag" +RECORD_UPGRADE_DIR = "record_app_directory" +XLOG_BACKUP_INFO = "xlog_backup_info.json" +OLD = "old" +NEW = "new" +# upgrade sql sha file and sql file +UPGRADE_SQL_SHA = "upgrade_sql.sha256" +UPGRADE_SQL_FILE = "upgrade_sql.tar.gz" + +COMBIN_NUM = 30 +ON_INPLACE_UPGRADE = "IsInplaceUpgrade" +MAX_APP_SIZE = 2000 +UPGRADE_VERSION_64bit_xid = 91.208 +ENABLE_STREAM_REPLICATION_VERSION = "92.149" +ENABLE_STREAM_REPLICATION_NAME = "enable_stream_replication" diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py new file mode 100644 index 0000000..d61e5ca --- /dev/null +++ b/script/impl/upgrade/UpgradeImpl.py @@ -0,0 +1,5842 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +import os +import sys +import subprocess +import time +import timeit +import json +import re +import csv +import traceback +import copy +import random + +from datetime import datetime, timedelta +from gspylib.common.Common import DefaultValue, ClusterCommand, \ + ClusterInstanceConfig +from gspylib.common.DbClusterInfo import instanceInfo, \ + dbNodeInfo, dbClusterInfo, compareObject +from gspylib.common.OMCommand import OMCommand +from gspylib.common.ErrorCode import ErrorCode +from gspylib.threads.SshTool import SshTool +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.os.gsplatform import g_Platform +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from gspylib.inspection.common import SharedFuncs +from impl.upgrade.UpgradeConst import GreyUpgradeStep +import impl.upgrade.UpgradeConst as Const + + +class OldVersionModules(): + """ + class: old version modules + """ + + def __init__(self): + """ + function: constructor + """ + # old cluster information + self.oldDbClusterInfoModule = None + # old cluster status + self.oldDbClusterStatusModule = None + + +class UpgradeImpl: + """ + Class: The class is used to do perform upgrade + """ + def __init__(self, upgrade): + """ + function: constructor + """ + self.dnInst = None + self.context = upgrade + self.newCommitId = "" + self.oldCommitId = "" + self.isLargeInplaceUpgrade = False + self.__upgrade_across_64bit_xid = False + self.action = upgrade.action + + def exitWithRetCode(self, action, succeed=True, msg=""): + """ + funtion: should be called after cmdline parameter check + input : action, succeed, msg, strategy + output: NA + """ + ######################################### + # doUpgrade + # + # binary-upgrade success failure + # 0 1 + # + # binary-rollback success failure + # 2 3 + + # commit-upgrade success failure + # 5 1 + ######################################### + + ######################################### + # choseStrategy + # success failure + # 4 1 + ######################################### + if not succeed: + if action == Const.ACTION_AUTO_ROLLBACK: + retCode = 3 + else: + retCode = 1 + elif action in [Const.ACTION_SMALL_UPGRADE, + Const.ACTION_LARGE_UPGRADE, + Const.ACTION_INPLACE_UPGRADE]: + retCode = 0 + elif action == Const.ACTION_AUTO_ROLLBACK: + retCode = 2 + elif action == Const.ACTION_CHOSE_STRATEGY: + retCode = 4 + elif action == Const.ACTION_COMMIT_UPGRADE: + retCode = 5 + else: + retCode = 1 + + if msg != "": + if self.context.logger is not None: + if succeed: + self.context.logger.log(msg) + else: + self.context.logger.error(msg) + else: + print(msg) + sys.exit(retCode) + + def initGlobalInfos(self): + """ + function: init global infos + input : NA + output: NA + """ + self.context.logger.debug("Init global infos", "addStep") + self.context.sshTool = SshTool( + self.context.clusterNodes, self.context.localLog, + DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE) + self.initClusterConfig() + self.context.logger.debug("Successfully init global infos", "constant") + + def setClusterDetailInfo(self): + """ + function: set cluster detail info + input : NA + output : NA + """ + for dbNode in self.context.clusterInfo.dbNodes: + dbNode.setDnDetailNum() + #self.context.clusterInfo.setClusterDnCount() + + def checkExistsProcess(self, greyNodeNames): + """ + function: check exists process + input : greyNodeNames + output : NA + """ + pass + + def removeOmRollbackProgressFile(self): + """ + function: remove om rollback process file + input : NA + output : NA + """ + self.context.logger.debug("Remove the om rollback" + " record progress file.") + fileName = os.path.join(self.context.tmpDir, + ".upgrade_task_om_rollback_result") + cmd = "(if [ -f '%s' ];then rm -f '%s';fi)" % (fileName, fileName) + DefaultValue.execCommandWithMode(cmd, + "remove om rollback " + "record progress file", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + def initOmRollbackProgressFile(self): + """ + function: init om rollback process file + input : NA + output : NA + """ + filePath = os.path.join(self.context.tmpDir, + ".upgrade_task_om_rollback_result") + cmd = "echo \"OM:RUN\" > %s" % filePath + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.context.logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % filePath + + "Error: \n%s" % str(output)) + + if (not self.context.isSingle): + # send file to remote nodes + self.context.sshTool.scpFiles(filePath, self.context.tmpDir) + self.context.logger.debug("Successfully write file %s." % filePath) + + def run(self): + """ + function: Do upgrade + input : NA + output: NA + """ + # the action may be changed in each step, + # if failed in auto-rollback, + # we will check if we need to rollback + action = self.context.action + # upgrade backup path + self.context.tmpDir = DefaultValue.getTmpDirFromEnv(self.context.user) + if self.context.tmpDir == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST") + self.context.upgradeBackupPath = \ + "%s/%s" % (self.context.tmpDir, "binary_upgrade") + try: + self.initGlobalInfos() + self.removeOmRollbackProgressFile() + self.commonCheck() + + # 4. get upgrade type + # After choseStrategy, it will assign action to self.context.action + # to do full-upgrade or binary-upgrade + if self.context.action == Const.ACTION_AUTO_UPGRADE: + self.context.action = self.choseStrategy() + self.context.logger.debug( + "%s execution takes %s steps in total" % ( + Const.GS_UPGRADECTL, ClusterCommand.countTotalSteps( + Const.GS_UPGRADECTL, self.context.action))) + # If get upgrade strategy failed, + # then try to get rollback strategy. + # Set strategyFlag as True to check + # upgrade parameter is correct or not + if self.context.action in [Const.ACTION_LARGE_UPGRADE, + Const.ACTION_SMALL_UPGRADE]: + self.doGreyBinaryUpgrade() + else: + self.doInplaceBinaryUpgrade() + # After choseStrategy, it will assign action to self.context.action + elif self.context.action == Const.ACTION_AUTO_ROLLBACK: + # because if we rollback with auto rollback, + # we will rollback all the nodes, + # but if we rollback under upgrade, + # we will only rollback specified nodes + self.context.action = self.choseStrategy() + self.context.rollback = True + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + self.exitWithRetCode(Const.ACTION_AUTO_ROLLBACK, + self.doInplaceBinaryRollback()) + else: + self.exitWithRetCode(Const.ACTION_AUTO_ROLLBACK, + self.doGreyBinaryRollback( + Const.ACTION_AUTO_ROLLBACK)) + elif self.context.action == Const.ACTION_COMMIT_UPGRADE: + self.context.action = self.choseStrategy() + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + self.doInplaceCommitUpgrade() + else: + self.doGreyCommitUpgrade() + else: + self.doChoseStrategy() + except Exception as e: + self.context.logger.debug(traceback.format_exc() + str(e)) + if not self.context.sshTool: + self.context.sshTool = SshTool( + self.context.clusterNodes, self.context.logger, + DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE) + if action == Const.ACTION_AUTO_ROLLBACK and \ + self.checkBakPathNotExists(): + self.context.logger.log("No need to rollback.") + self.exitWithRetCode(action, True) + else: + self.context.logger.error(str(e)) + self.exitWithRetCode(action, False, str(e)) + + def commonCheck(self): + """ + Check in the common process. + :return: + """ + self.checkReadOnly() + if self.context.is_grey_upgrade: + self.checkUpgradeMode() + + def checkReadOnly(self): + """ + check if in read only mode under grey upgrade, grey upgrade commit or + grey upgrade rollback if not in read only, then record the value of + enable_transaction_read_only and set it to off + """ + try: + self.context.logger.debug("Check if in read only mode.") + greyUpgradeFlagFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + # only used under grey upgrade, grey upgrade commit or grey upgrade + # rollback if under grey upgrade, the flag file + # greyUpgradeFlagFile has not been created + # so we use is_inplace_upgrade to judge the mode + if (self.context.action == Const.ACTION_AUTO_UPGRADE and + not self.context.is_inplace_upgrade or + (os.path.isfile(greyUpgradeFlagFile) and + self.context.action in [Const.ACTION_AUTO_ROLLBACK, + Const.ACTION_COMMIT_UPGRADE])): + if self.unSetClusterReadOnlyMode() != 0: + raise Exception("NOTICE: " + + ErrorCode.GAUSS_529["GAUSS_52907"]) + except Exception as e: + raise Exception(str(e)) + + def checkUpgradeMode(self): + """ + used to check if upgrade_mode is 0 under before upgrade + if not, we set it to 0 + """ + tempPath = self.context.upgradeBackupPath + filePath = os.path.join(tempPath, Const.INPLACE_UPGRADE_STEP_FILE) + if self.context.action == Const.ACTION_AUTO_UPGRADE \ + and not os.path.exists(filePath): + try: + self.setUpgradeMode(0) + self.context.logger.log( + "Successfully set upgrade_mode to 0.") + except Exception as e: + self.context.logger.log("Failed to set upgrade_mode to 0, " + "please set it manually, " + "or rollback first.") + raise Exception(str(e)) + + def checkBakPathNotExists(self): + """ + check binary_upgrade exists on all nodes, + :return: True if not exists on all nodes + """ + try: + cmd = "if [ -d '%s' ]; then echo 'GetDir'; else echo 'NoDir'; fi" \ + % self.context.upgradeBackupPath + self.context.logger.debug("Command for checking if upgrade bak " + "path exists: %s" % cmd) + outputCollect = self.context.sshTool.getSshStatusOutput(cmd)[1] + if outputCollect.find('GetDir') >= 0: + self.context.logger.debug("Checking result: %s" + % outputCollect) + return False + self.context.logger.debug("Path %s does not exists on all node." + % self.context.upgradeBackupPath) + return True + except Exception: + self.context.logger.debug("Failed to check upgrade bak path.") + return False + + def doChoseStrategy(self): + """ + function: chose the strategy for upgrade + input : NA + output: NA + """ + self.context.logger.debug("Choosing strategy.") + try: + self.context.action = self.choseStrategy() + # we only support binary-upgrade. + if self.context.action in [Const.ACTION_SMALL_UPGRADE, + Const.ACTION_LARGE_UPGRADE]: + self.exitWithRetCode(Const.ACTION_CHOSE_STRATEGY, + True, + "Upgrade strategy: %s." + % self.context.action) + # Use inplace upgrade under special case + else: + self.exitWithRetCode(Const.ACTION_CHOSE_STRATEGY, + True, + "Upgrade strategy: %s." + % self.context.action) + except Exception as e: + self.exitWithRetCode(Const.ACTION_CHOSE_STRATEGY, False, str(e)) + self.context.logger.debug("Successfully got the upgrade strategy.") + + def choseStrategy(self): + """ + function: chose upgrade strategy + input : NA + output: NA + """ + upgradeAction = None + try: + # get new cluster info + newVersionFile = VersionInfo.get_version_file() + newClusterVersion, newClusterNumber, newCommitId = \ + VersionInfo.get_version_info(newVersionFile) + gaussHome = DefaultValue.getInstallDir(self.context.user) + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] + % "$GAUSSHOME") + if not os.path.islink(gaussHome): + raise Exception(ErrorCode.GAUSS_529["GAUSS_52915"]) + newPath = gaussHome + "_%s" % newCommitId + # new app dir should exist after preinstall, + # then we can use chose strategy + if not os.path.exists(newPath): + if self.context.action != Const.ACTION_AUTO_ROLLBACK: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % newPath) + self.context.logger.debug( + "Successfully obtained version information" + " of new clusters by %s." % newVersionFile) + + # get the old cluster info, if binary_upgrade does not exists, + # try to copy from other nodes + oldPath = self.getClusterAppPath(Const.OLD) + if oldPath == "": + self.context.logger.debug("Cannot get the old install " + "path from table and file.") + oldPath = os.path.realpath(gaussHome) + self.context.logger.debug("Old cluster app path is %s" % oldPath) + + oldVersionFile = "%s/bin/upgrade_version" % oldPath + try: + (oldClusterVersion, oldClusterNumber, oldCommitId) = \ + VersionInfo.get_version_info(oldVersionFile) + self.context.logger.debug("Successfully obtained version" + " information of old clusters by %s." + % oldVersionFile) + except Exception as e: + if os.path.exists(self.context.upgradeBackupPath): + # if upgradeBackupPath exist, + # it means that we do rollback first. + # and we get cluster version from the backup file + possibOldVersionFile = "%s/old_upgrade_version" \ + % self.context.upgradeBackupPath + self.context.logger.debug(str(e)) + self.context.logger.debug( + "Try to get the version information from %s." + % possibOldVersionFile) + (oldClusterVersion, oldClusterNumber, oldCommitId) = \ + VersionInfo.get_version_info(possibOldVersionFile) + else: + raise Exception(str(e)) + + # if last success commit upgrade_type is grey upgrade, + # the symbolic link should point to the + # old app path with old commit id + if oldCommitId == newCommitId: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52901"]) + self.context.logger.debug( + "Successfully obtained version information of new and old " + "clusters.\n The old cluster number:%s, the new " + "cluster number:%s." % (oldClusterNumber, newClusterNumber)) + + self.canDoRollbackOrCommit() + + if oldClusterVersion > newClusterVersion: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52902"] + % (oldClusterVersion, newClusterVersion)) + + self.checkLastUpgrade(newCommitId) + + if float(newClusterNumber) < float(oldClusterNumber): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51629"] + % newClusterNumber) + elif float(newClusterNumber) == float(oldClusterNumber): + if self.context.is_inplace_upgrade: + upgradeAction = Const.ACTION_INPLACE_UPGRADE + else: + upgradeAction = Const.ACTION_SMALL_UPGRADE + else: + if int(float(newClusterNumber)) > int(float(oldClusterNumber)): + raise Exception(ErrorCode.GAUSS_529["GAUSS_52904"] + + "This cluster version is " + "not supported upgrade.") + elif ((float(newClusterNumber) - int(float(newClusterNumber))) + > (float(oldClusterNumber) - + int(float(oldClusterNumber)))): + if self.context.is_inplace_upgrade: + upgradeAction = Const.ACTION_INPLACE_UPGRADE + self.isLargeInplaceUpgrade = True + else: + upgradeAction = Const.ACTION_LARGE_UPGRADE + else: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51629"] + % newClusterNumber) + self.context.logger.debug("The matched upgrade strategy is: %s." + % upgradeAction) + self.context.newClusterVersion = newClusterVersion + self.context.newClusterNumber = newClusterNumber + self.context.oldClusterVersion = oldClusterVersion + self.context.oldClusterNumber = oldClusterNumber + self.context.newClusterAppPath = newPath + self.context.oldClusterAppPath = oldPath + self.newCommitId = newCommitId + self.oldCommitId = oldCommitId + return upgradeAction + except Exception as e: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52900"] % str(e) + + " Do nothing this time.") + + def canDoRollbackOrCommit(self): + """ + Check whether rollback or commit is required. + :return: + """ + try: + if self.context.action == Const.ACTION_AUTO_ROLLBACK or \ + self.context.action == Const.ACTION_COMMIT_UPGRADE: + inplaceUpgradeFlagFile = os.path.join( + self.context.upgradeBackupPath, + Const.INPLACE_UPGRADE_FLAG_FILE) + grayUpgradeFlagFile = os.path.join( + self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + self.context.is_inplace_upgrade = False + # we do rollback by the backup directory + if os.path.isfile(inplaceUpgradeFlagFile): + self.context.logger.debug("inplace upgrade flag exists, " + "use inplace rollback or commit.") + self.context.is_inplace_upgrade = True + if os.path.isfile(grayUpgradeFlagFile): + self.context.logger.debug("grey upgrade flag exists, " + "use grey rollback or commit.") + self.context.isGreyUpgrade = True + if not (self.context.is_inplace_upgrade or + self.context.isGreyUpgrade): + if self.context.action == Const.ACTION_AUTO_ROLLBACK \ + and not self.checkBakPathNotExists(): + self.cleanBinaryUpgradeBakFiles(True) + exitMsg = "No need to {0}".format(self.context.action) + self.exitWithRetCode(self.context.action, True, exitMsg) + except Exception as e: + raise Exception("Failed to check whether the rollback or commit." + " Error {0}".format(str(e))) + + def checkLastUpgrade(self, newCommitId): + """ + check the last fail upgrade type is same with this time + check the last upgrade version is same with this time + under grey upgrade, if under inplace upgrade, we will + rollback first, under grey upgrade, we will upgrade again + """ + if self.context.action == Const.ACTION_AUTO_UPGRADE: + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + cmd = "if [ -f '%s' ]; then echo 'True';" \ + " else echo 'False'; fi" % stepFile + (resultMap, outputCollect) = \ + self.context.sshTool.getSshStatusOutput(cmd) + self.context.logger.debug( + "The result of checking grey upgrade step flag" + " file on all nodes is:\n%s" % outputCollect) + if self.context.is_inplace_upgrade: + # if the grey upgrade rollback failed, it should have file, + # so cannot do grey upgrade now + if outputCollect.find('True') >= 0: + ermsg = ErrorCode.GAUSS_502["GAUSS_50200"] \ + % Const.GREY_UPGRADE_STEP_FILE \ + + "In grey upgrade process, " \ + "cannot do inplace upgrade!" + raise Exception(str(ermsg)) + else: + inplace_upgrade_flag_file =\ + "%s/inplace_upgrade_flag" % self.context.upgradeBackupPath + if os.path.isfile(inplace_upgrade_flag_file): + ermsg = ErrorCode.GAUSS_502["GAUSS_50200"] % \ + inplace_upgrade_flag_file + \ + "In inplace upgrade process, " \ + "cannot do grey upgrade!" + raise Exception(ermsg) + # it may have remaining when last upgrade use + # --force to forceRollback + self.checkBakPathAndTable(outputCollect) + self.checkNewCommitid(newCommitId) + elif self.context.action == Const.ACTION_AUTO_ROLLBACK or \ + self.context.action == Const.ACTION_COMMIT_UPGRADE: + self.checkNewCommitid(newCommitId) + + def checkBakPathAndTable(self, outputCollect): + """ + if the record step file in all nodes not exists, and the + table exists, so this situation means the last upgrade + remaining table + if the table and step file exists, check if the content is correct + :param resultMap: + :param outputCollect: + :return: + """ + # no need to check and drop schema under force upgrade + if not self.existTable(Const.RECORD_NODE_STEP): + return + output = outputCollect.split('\n') + output = output[:-1] + findBakPath = False + for record in output: + # if can find step, means this + if record.find('True') >= 0: + findBakPath = True + break + if not findBakPath: + self.dropSupportSchema() + return + + def checkNewCommitid(self, newCommitId): + """ + the commitid is in version.cfg, it should be same with the record + commitid in record app directory file + :param newCommitId: version.cfg line 3 + :return: NA + """ + newPath = self.getClusterAppPath(Const.NEW) + if newPath != "": + LastNewCommitId = newPath[-8:] + # When repeatedly run gs_upgradectl script, + # this time upgrade version should be same + # with last record upgrade version + if newCommitId != LastNewCommitId: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52935"]) + + def checkOldClusterVersion(self, gaussdbPath, oldClusterVersionFile): + """ + check old cluster version + input : gaussdbPath, oldClusterVersionFile + output: + 1. (0,"V100R00XCXX") + 2. (999,"NAC00Version") + 3. (1, errorMsg) + otherwise raise exception + """ + if os.path.isfile(oldClusterVersionFile): + cmd = "cat %s" % oldClusterVersionFile + else: + gaussdbFile = "%s/gaussdb" % gaussdbPath + if not os.path.exists(gaussdbFile): + self.context.logger.debug("The %s does not exist." + " Cannot obtain old cluster" + " version." % gaussdbFile) + return 1, " The %s does not exist. Cannot " \ + "obtain old cluster version" % gaussdbFile + if not os.path.isfile(gaussdbFile): + self.context.logger.debug("The %s is not a file. " + "Cannot obtain old cluster" + " version." % gaussdbFile) + return 1, " The %s is not a file. Cannot " \ + "obtain old cluster version" % gaussdbFile + # get old cluster version by gaussdb + # the information of gaussdb like this: + # gaussdb Gauss200 V100R00XCXX build xxxx + # compiled at xxxx-xx-xx xx:xx:xx + cmd = "export LD_LIBRARY_PATH=%s/lib:$LD_LIBRARY_PATH;%s " \ + "--version" % (os.path.dirname(gaussdbPath), gaussdbFile) + + self.context.logger.debug("Command for getting old" + " cluster version:%s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and re.compile(r'V[0-9]{3}R[0-9]{3}C[0-9]{2}' + ).search(str(output)) is not None: + return 0, re.compile( + r'V[0-9]{3}R[0-9]{3}C[0-9]{2}').search(str(output)).group() + else: + self.context.logger.debug("Failed to obtain old cluster" + " version. Error: \n%s" % str(output)) + return 999, "NAC00Version" + + def setGUCValue(self, gucKey, gucValue, actionType="reload"): + """ + function: do gs_guc + input : gucKey - parameter name + gucValue - parameter value + actionType - guc action type(set/reload). default is 'reload' + """ + userProfile = DefaultValue.getMpprcFile() + if gucValue != "": + gucStr = "%s='%s'" % (gucKey, gucValue) + else: + gucStr = "%s" % gucKey + + cmd = "source %s ;" % userProfile + cmd += "gs_guc %s -N all -I all -c \"%s\"" % (actionType, gucStr) + self.context.logger.debug("Command for setting " + "GUC parameter %s: %s" % (gucKey, cmd)) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + return status, output + + def setClusterReadOnlyMode(self): + """ + function: set cluster read only mode + input : NA + output : int + """ + self.context.logger.debug("Setting up the cluster read-only mode.") + (status, output) = self.setGUCValue("default_transaction_read_only", + "true") + if status == 0: + self.context.logger.debug("successfully set the " + "cluster read-only mode.") + return 0 + else: + self.context.logger.debug( + "Failed to set default_transaction_read_only parameter." + + " Error: \n%s" % str(output)) + return 1 + + def unSetClusterReadOnlyMode(self): + """ + function: Canceling the cluster read-only mode + input : NA + output: 0 successfully + 1 failed + """ + self.context.logger.debug("Canceling the cluster read-only mode.") + # un set cluster read only mode + (status, output) = self.setGUCValue("default_transaction_read_only", + "false") + if status == 0: + self.context.logger.debug("Successfully cancelled the" + " cluster read-only mode.") + return 0 + else: + self.context.logger.debug( + "Failed to set default_transaction_read_only parameter." + + " Error: \n%s" % str(output)) + return 1 + + def stopCluster(self): + """ + function: Stopping the cluster + input : NA + output: NA + """ + self.context.logger.debug("Stopping the cluster.", "addStep") + # Stop cluster applications + cmd = "%s -U %s -R %s -t %s" % ( + OMCommand.getLocalScript("Local_StopInstance"), + self.context.user, self.context.clusterInfo.appPath, + Const.UPGRADE_TIMEOUT_CLUSTER_STOP) + self.context.logger.debug("Command for stop cluster: %s" % cmd) + DefaultValue.execCommandWithMode( + cmd, "Stop cluster", self.context.sshTool, + self.context.isSingle or self.context.localMode, + self.context.mpprcFile) + self.context.logger.debug("Successfully stopped cluster.") + + def startCluster(self): + """ + function: start cluster + input : NA + output: NA + """ + versionFile = os.path.join( + self.context.oldClusterAppPath, "bin/upgrade_version") + if os.path.exists(versionFile): + _, number, _ = VersionInfo.get_version_info(versionFile) + cmd = "%s -U %s -R %s -t %s --cluster_number=%s" % ( + OMCommand.getLocalScript("Local_StartInstance"), + self.context.user, self.context.clusterInfo.appPath, + Const.UPGRADE_TIMEOUT_CLUSTER_START, number) + else: + cmd = "%s -U %s -R %s -t %s" % ( + OMCommand.getLocalScript("Local_StartInstance"), + self.context.user, self.context.clusterInfo.appPath, + Const.UPGRADE_TIMEOUT_CLUSTER_START) + DefaultValue.execCommandWithMode( + cmd, "Start cluster", self.context.sshTool, + self.context.isSingle or self.context.localMode, + self.context.mpprcFile) + self.context.logger.log("Successfully started cluster.") + + def createCommitFlagFile(self): + """ + function: create a flag file, if this file exists, + means that user have called commit interface, + but still not finished. if create failed, script should exit. + input : NA + output: NA + """ + commitFlagFile = "%s/commitFlagFile" % self.context.upgradeBackupPath + self.context.logger.debug("Start to create the commit flag file.") + try: + cmd = "(if [ -d '%s' ]; then touch '%s'; fi) " % ( + self.context.upgradeBackupPath, commitFlagFile) + DefaultValue.execCommandWithMode(cmd, + "create commit flag file", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50206"] + % ("commit flag file: %s" % str(e))) + self.context.logger.debug("Successfully created the commit flag file.") + + def checkCommitFlagFile(self): + """ + function: check if commit flag file exists. + input : NA + output: return 0, If there is the file commitFlagFile. + else, return 1 + """ + commitFlagFile = "%s/commitFlagFile" % self.context.upgradeBackupPath + if (os.path.isfile(commitFlagFile)): + return 0 + else: + return 1 + + def createInplaceUpgradeFlagFile(self): + """ + function: create inplace upgrade flag file on + all nodes if is doing inplace upgrade + 1.check if is inplace upgrade + 2.get new and old cluster version number + 3.write file + Input: NA + output : NA + """ + self.context.logger.debug("Start to create inplace upgrade flag file.") + try: + newClusterNumber = self.context.newClusterNumber + oldClusterNumber = self.context.oldClusterNumber + + inplace_upgrade_flag_file = "%s/inplace_upgrade_flag" % \ + self.context.upgradeBackupPath + g_file.createFile(inplace_upgrade_flag_file) + g_file.writeFile(inplace_upgrade_flag_file, + ["newClusterNumber:%s" % newClusterNumber], 'a') + g_file.writeFile(inplace_upgrade_flag_file, + ["oldClusterNumber:%s" % oldClusterNumber], 'a') + if (not self.context.isSingle): + self.context.sshTool.scpFiles(inplace_upgrade_flag_file, + self.context.upgradeBackupPath) + if float(self.context.oldClusterNumber) <= float( + Const.UPGRADE_VERSION_64bit_xid) < \ + float(self.context.newClusterNumber): + self.__upgrade_across_64bit_xid = True + + self.context.logger.debug("Successfully created inplace" + " upgrade flag file.") + except Exception as e: + raise Exception(str(e)) + + def setUpgradeMode(self, mode): + """ + function: set upgrade_mode parameter + Input : mode + output : NA + """ + try: + self.setUpgradeModeGuc(mode) + except Exception as e: + if self.context.action == Const.ACTION_INPLACE_UPGRADE or \ + not self.context.forceRollback: + raise Exception(str(e)) + try: + self.setUpgradeModeGuc(mode, "set") + except Exception as e: + self.context.logger.log("Failed to set upgrade_mode," + " please set it manually.") + + def setUpgradeModeGuc(self, mode, setType="reload"): + """ + function: set upgrade mode guc + input : mode, setType + output : NA + """ + self.context.logger.debug("Set upgrade_mode guc parameter.") + cmd = "gs_guc %s -N all -I all -c 'upgrade_mode=%d'" % ( + setType, mode) + self.context.logger.debug("Command for setting database" + " node parameter: %s." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.context.logger.debug("Set upgrade_mode parameter " + "failed. cmd:%s\nOutput:%s" + % (cmd, str(output))) + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error: \n%s" % str(output)) + gucStr = "upgrade_mode:%d" % mode + self.checkParam(gucStr) + self.context.logger.debug("Successfully set " + "upgrade_mode to %d." % mode) + + def checkParam(self, gucStr): + """ + function: check the cmagent guc value + Input : gucStr the guc key:value string + output : NA + """ + self.context.logger.debug("Start to check GUC value %s." % gucStr) + try: + # send cmd to that node and exec + cmd = "%s -t %s -U %s --upgrade_bak_path=%s" \ + " --guc_string=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_CHECK_GUC, + self.context.user, + self.context.upgradeBackupPath, + gucStr, + self.context.localLog) + self.context.logger.debug("Command for checking" + " parameter: %s." % cmd) + DefaultValue.execCommandWithMode(cmd, + "check GUC value", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + self.context.logger.debug("Successfully checked guc value.") + except Exception as e: + raise Exception(str(e)) + + def floatMoreThan(self, numOne, numTwo): + """ + function: float more than + input : numOne, numTwo + output : True/False + """ + if float(numOne) - float(numTwo) > float(Const.DELTA_NUM): + return True + return False + + def floatLessThan(self, numOne, numTwo): + """ + function: float less than + input: numOne, numTwo + output: True/False + """ + if float(numOne) - float(numTwo) < float(-Const.DELTA_NUM): + return True + return False + + def floatEqualTo(self, numOne, numTwo): + """ + function: float equal to + input: numOne, numTwo + output: True/False + """ + if float(-Const.DELTA_NUM) < (float(numOne) - float(numTwo)) \ + < float(Const.DELTA_NUM): + return True + return False + + def floatGreaterOrEqualTo(self, numOne, numTwo): + """ + function: float greater or equal to + input: numOne, numTwo + output: True/False + """ + if self.floatMoreThan(numOne, numTwo) or \ + self.floatEqualTo(numOne, numTwo): + return True + return False + + def reloadVacuumDeferCleanupAge(self): + """ + function: reload the guc paramter vacuum_defer_cleanup_age value on + inplace upgrade or grey large upgrade + input : NA + """ + (status, output) = self.setGUCValue("vacuum_defer_cleanup_age", + "100000", "reload") + if status != 0: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % "GUC" + + " Error: \n%s" % str(output)) + + def doGreyBinaryUpgrade(self): + """ + function: do grey binary upgrade, which essentially replace the binary + files, for the new version than 91.255, support this strategy to + change binary upgrade(Inplace), use the symbolic links to change the + binary file directory instead of installing the new bin in the same + directory.choose minority nodes to upgrade first, observe to decide + whether upgrade remaining nodes or rollback grey nodes + input : NA + output: NA + """ + upgradeAgain = False + try: + # 1. distribute xml configure file to every nodes. + self.distributeXml() + # 2. check if the app path is ready and sha256 is right and others + self.checkUpgrade() + # 4. check the cluster pressure + self.HASyncReplayCheck() + # 5. before do grey binary upgrade, we must make sure the + # cluster is Normal and the database could be + # connected, if not, exit. + (status, output) = self.doHealthCheck(Const.OPTION_PRECHECK) + if status != 0: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] % + "cluster" + "Detail: " + output) + # 6.chose the node name list that satisfy the condition as + # upgrade nodes + self.chooseUpgradeNodes() + # check if it satisfy upgrade again, if it is the second loop to + # upgrade, it can go go upgrade again branch + upgradeAgain = self.canUpgradeAgain() + except Exception as e: + # before this step, the upgrade process do nothing to the cluster, + # this time has no remaining + self.context.logger.debug(traceback.format_exc()) + self.context.logger.log(ErrorCode.GAUSS_529["GAUSS_52934"] + + "Nodes are the old version.\n" + + "Error: %s." % str(e) + + " Do nothing this time.") + self.exitWithRetCode(self.action, False, str(e)) + + if not upgradeAgain: + try: + if not self.doGreyBinaryRollback(): + self.exitWithRetCode(Const.ACTION_AUTO_ROLLBACK, False) + self.removeOmRollbackProgressFile() + self.context.logger.log( + "The directory %s will be deleted after commit-upgrade, " + "please make sure there is no personal data." % + self.context.oldClusterAppPath) + # 7. prepare upgrade function for sync and table + # RECORD_NODE_STEP, init the step of all nodes as 0 + self.prepareGreyUpgrade() + + # 8. install the new bin in the appPath which has been + # prepared in the preinstall + self.installNewBin() + #self.createGrpcCA() + #self.prepareServerKey() + #self.prepareRoachServerKey() + # decompress the catalog upgrade_sql.tar.gz to temp dir, + # include upgrade sql file and guc set + self.prepareUpgradeSqlFolder() + + self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_CATALOG) + # 9. if we update catalog after switch to the new bin, + # the system will raise error cannot find + # catalog or column until finish the updateCatalog function + # we can not recognize if it really cannot + # find the column, or just because the old version. So we + # will update the catalog in the old version + if self.context.action == Const.ACTION_LARGE_UPGRADE: + self.updateCatalog() + self.recordNodeStep(GreyUpgradeStep.STEP_SWITCH_NEW_BIN) + + self.upgradeAgain() + except Exception as e: + errmsg = ErrorCode.GAUSS_529["GAUSS_52934"] + \ + "You can use --grey to upgrade or manually rollback." + self.context.logger.log(errmsg + str(e)) + self.exitWithRetCode(self.context.action, False) + else: + self.upgradeAgain() + self.exitWithRetCode(self.context.action, True) + + def upgradeAgain(self): + try: + self.context.logger.debug( + "From this step, you can use -h to upgrade again if failed.") + # we have guarantee specified nodes have same step, + # so we only need to get one node step + currentStep = self.getOneNodeStep(self.context.nodeNames[0]) + self.context.logger.debug("Current node step is %d" % currentStep) + # first time execute grey upgrade, we will record the step for + # all the nodes, if we upgrade remain nodes, + # reenter the upgrade process, we will not rollback autonomously, + # just upgrade again + if currentStep < GreyUpgradeStep.STEP_UPGRADE_PROCESS: + self.backupHotpatch() + # 10. sync Cgroup configure and etc. + # use the symbolic link to change the bin dir + # sync old config to new bin path, the pg_plugin save the + # C function .so file(but not end with .so), + # so if it create in the old appPath after copy to the + # newAppPath but not switch to new bin + # the new version may not recognize the C function + self.greySyncGuc() + self.greyUpgradeSyncOldConfigToNew() + # 11. switch the cluster version to new version + self.switchBin(Const.NEW) + self.setNewVersionGuc() + self.recordNodeStep(GreyUpgradeStep.STEP_UPGRADE_PROCESS) + if currentStep < GreyUpgradeStep.STEP_UPDATE_POST_CATALOG: + # 12. kill the old existing process, will judge whether + # each process is the required version + self.switchExistsProcess() + self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG) + + except Exception as e: + self.context.logger.log("Failed to upgrade, can use --grey to " + "upgrade again after rollback. Error: " + "%s" % str(e)) + self.context.logger.debug(traceback.format_exc()) + self.exitWithRetCode(self.context.action, False, str(e)) + self.context.logger.log( + "The nodes %s have been successfully upgraded to new version. " + "Then do health check." % self.context.nodeNames) + + try: + # 13. check the cluster status, the cluster status can be degraded + (status, output) = self.doHealthCheck(Const.OPTION_POSTCHECK) + if status != 0: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] % + "cluster" + output) + if self.isNodeSpecifyStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG): + # 14. exec post upgrade script + if self.context.action == Const.ACTION_LARGE_UPGRADE: + self.waitClusterForNormal() + self.prepareSql("rollback-post") + self.execRollbackUpgradedCatalog(scriptType="rollback-post") + self.prepareSql("upgrade-post") + self.execRollbackUpgradedCatalog(scriptType="upgrade-post") + self.getLsnInfo() + hosts = copy.deepcopy(self.context.clusterNodes) + self.recordNodeStep( + GreyUpgradeStep.STEP_PRE_COMMIT, nodes=hosts) + self.printPrecommitBanner() + except Exception as e: + hintInfo = "Nodes are new version. " \ + "Please check the cluster status. ERROR: \n" + self.context.logger.log(hintInfo + str(e)) + self.context.logger.debug(traceback.format_exc()) + self.exitWithRetCode(self.context.action, False, hintInfo + str(e)) + self.context.logger.log("Successfully upgrade nodes.") + self.exitWithRetCode(self.context.action, True) + + def getOneNodeStep(self, nodeName): + """ + get the node's step + """ + currentStep = self.getOneNodeStepInFile(nodeName) + return currentStep + + def getOneNodeStepInFile(self, nodeName): + """ + get the node's step from step file + """ + try: + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + self.context.logger.debug( + "trying to get one node step in file %s" % stepFile) + with open(stepFile, 'r') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if row['node_host'] == nodeName: + step = int(row['step']) + break + self.context.logger.debug("successfully got one node step {0} " + "in file {1}".format(step, stepFile)) + return step + except Exception as e: + exitMsg = "Failed to get node step in step file. ERROR {0}".format( + str(e)) + self.exitWithRetCode(self.action, False, exitMsg) + + def greySyncGuc(self): + """ + delete the old version guc + """ + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_GREY_SYNC_GUC, + self.context.user, + self.context.upgradeBackupPath, + self.context.localLog) + self.context.logger.debug("Command for sync GUC in upgrade: %s" % cmd) + hostList = copy.deepcopy(self.context.nodeNames) + self.context.sshTool.executeCommand(cmd, "", hostList=hostList) + self.context.logger.debug("Successfully sync guc.") + + def greyUpgradeSyncOldConfigToNew(self): + """ + function: sync old cluster config to the new cluster install path + input : NA + output: NA + """ + # restore list: + # etc/gscgroup_xxx.cfg + # lib/postgresql/pg_plugin + # initdb_param + # server.key.cipher + # server.key.rand + # /share/sslsert/ca.key + # /share/sslsert/etcdca.crt + self.context.logger.log("Sync cluster configuration.") + try: + # backup DS libs and gds file + cmd = "%s -t %s -U %s -V %d --old_cluster_app_path=%s " \ + "--new_cluster_app_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_GREY_UPGRADE_CONFIG_SYNC, + self.context.user, + int(float(self.context.oldClusterNumber) * 1000), + self.context.oldClusterAppPath, + self.context.newClusterAppPath, + self.context.localLog) + self.context.logger.debug("Command for syncing config files: %s" + % cmd) + hostList = copy.deepcopy(self.context.nodeNames) + self.context.sshTool.executeCommand(cmd, "", hostList=hostList) + + # change the owner of application + cmd = "chown -R %s:%s '%s'" % \ + (self.context.user, self.context.group, + self.context.newClusterAppPath) + hostList = copy.deepcopy(self.context.nodeNames) + self.context.sshTool.executeCommand(cmd, "", hostList=hostList) + except Exception as e: + raise Exception(str(e) + " Failed to sync configuration.") + self.context.logger.log("Successfully synced cluster configuration.") + + def switchExistsProcess(self, isRollback=False): + """ + switch all the process + :param isRollback: + :return: + """ + self.context.logger.log("Switching all db processes.", "addStep") + self.createCheckpoint() + self.switchDn(isRollback) + try: + self.waitClusterNormalDegrade() + except Exception as e: + # can't promise normal status in force upgrade or forceRollback + if self.context.forceRollback: + self.context.logger.log("WARNING: Failed to wait " + "cluster normal or degrade.") + else: + raise Exception(str(e)) + self.context.logger.log("Successfully switch all process version", + "constant") + + def createCheckpoint(self): + try: + self.context.logger.debug("Create checkpoint before switching.") + start_time = timeit.default_timer() + # create checkpoint + sql = "CHECKPOINT;" + for i in range(10): + (status, output) = self.execSqlCommandInPrimaryDN(sql) + # no need to retry under force upgrade + if status == 0: + break + self.context.logger.debug("Waring: checkpoint creation fails " + "for the %s time. Fail message:%s." + "try again at one second intervals" % + (str(i), str(output))) + time.sleep(1) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + + elapsed = timeit.default_timer() - start_time + self.context.logger.debug("Time to create checkpoint: %s" % + self.getTimeFormat(elapsed)) + except Exception as e: + if self.context.forceRollback: + self.context.logger.log( + "WARNING: Failed to create checkpoint, " + "the switch process may use more time.") + else: + raise Exception(str(e)) + + def switchDn(self, isRollback): + self.context.logger.debug("Switching DN processes.") + start_time = timeit.default_timer() + # under upgrade, kill the process from old cluster app path, + # rollback: kill from new cluster app path + cmd = "%s -t %s -U %s -V %d --old_cluster_app_path=%s " \ + "--new_cluster_app_path=%s -X '%s' -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_SWITCH_DN, + self.context.user, + int(float(self.context.oldClusterNumber) * 1000), + self.context.oldClusterAppPath, + self.context.newClusterAppPath, + self.context.xmlFile, + self.context.localLog) + + if isRollback: + cmd += " --rollback" + if self.context.forceRollback: + cmd += " --force" + self.context.logger.debug( + "Command for switching DN processes: %s" % cmd) + hostList = copy.deepcopy(self.context.nodeNames) + self.context.sshTool.executeCommand(cmd, "", hostList=hostList) + start_cluster_time = timeit.default_timer() + self.greyStartCluster() + end_cluster_time = timeit.default_timer() - start_cluster_time + self.context.logger.debug("Time to start cluster is %s" % + self.getTimeFormat(end_cluster_time)) + elapsed = timeit.default_timer() - start_time + self.context.logger.debug("Time to switch DN process version: %s" + % self.getTimeFormat(elapsed)) + + def greyStartCluster(self): + """ + start cluster in grey upgrade + :return: + """ + cmd = "gs_om -t start" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command:%s. Error:\n%s" % (cmd, output)) + + def isNodeSpecifyStep(self, step, nodes=None): + """ + check if all the specfied nodes is the step + """ + return self.isNodeSpecifyStepInFile(step, nodes) + + def isNodeSpecifyStepInFile(self, step=-1, nodes=None): + """ + step = -1 means we just check if step in all the specfied nodes is the + same otherwise, we check if all the specfied nodes is the given step + """ + try: + if nodes: + self.context.logger.debug( + "check if the nodes %s step is %s" % (nodes, step)) + else: + self.context.logger.debug( + "check if all the nodes step is %s" % step) + nodes = copy.deepcopy(self.context.clusterNodes) + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + if not os.path.isfile(stepFile): + self.context.logger.debug( + "no step file, which means nodes %s step is same" % nodes) + return True + + with open(stepFile, 'r') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if row['node_host'] in nodes: + if step == -1: + step = int(row['step']) + else: + if step == int(row['step']): + continue + else: + self.context.logger.debug( + "the nodes %s step is not all %s" % ( + nodes, step)) + return False + self.context.logger.debug( + "the nodes %s step is all %s" % (nodes, step)) + return True + except Exception as e: + exitMsg = \ + "Failed to check node step in file. ERROR {0}".format(str(e)) + self.exitWithRetCode(self.action, False, exitMsg) + + def getLsnInfo(self): + """ + Obtain the maximum LSN of each DN instance. + """ + self.context.logger.debug("Start to get lsn info.") + try: + # prepare dynamic cluster info file in every node + self.getOneDNInst(checkNormal=True) + execHosts = [self.dnInst.hostname] + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_GET_LSN_INFO, + self.context.user, + self.context.upgradeBackupPath, + self.context.localLog) + self.context.logger.debug("Command for geting lsn info: %s." % cmd) + self.context.sshTool.executeCommand(cmd, "", hostList=execHosts) + self.context.logger.debug( + "Successfully get lsn info in instanse node.") + except Exception as e: + if self.context.forceRollback: + self.context.logger.debug( + "Failed to get lsn info in force Scenario.") + return + raise Exception( + "Failed to get lsn info in instanse node. " + "Error:{0}".format(str(e))) + + def chooseUpgradeNodes(self): + # Already set the self.context.nodesNum = 1 + # when number and node names is empty + self.context.logger.debug("Choose the nodes to be upgraded.") + self.setClusterDetailInfo() + self.context.nodeNames = self.context.clusterNodes + self.context.logger.log("Upgrade all nodes.") + + def getUpgradedNodeNames(self, step=GreyUpgradeStep.STEP_INIT_STATUS): + """ + by default, return upgraded nodes + otherwise, return the nodes that step is more than given step + under force upgrade, we only get step from file + """ + return self.getUpgradedNodeNamesInFile(step) + + def getUpgradedNodeNamesInFile(self, step=GreyUpgradeStep.STEP_INIT_STATUS): + """ + get upgraded nodes from step file + by default, return upgraded nodes + otherwise, return the nodes that step is more than given step + """ + try: + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + self.context.logger.debug( + "trying to get upgraded nodes from %s" % (stepFile)) + if not os.path.isfile(stepFile): + return [] + greyNodeNames = [] + with open(stepFile, 'r') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if int(row['step']) > step: + greyNodeNames.append(row['node_host']) + self.context.logger.debug("upgraded nodes are {0}".format( + greyNodeNames)) + return greyNodeNames + except Exception as e: + exitMsg = "Failed to get upgraded nodes from step file. " \ + "ERROR {0}".format(str(e)) + self.exitWithRetCode(self.action, False, exitMsg) + + def existTable(self, relname): + """ + funcation: if the table exist in pg_class + input : NA + output: NA + """ + try: + sql = "select count(*) from pg_catalog.pg_class c, " \ + "pg_catalog.pg_namespace n " \ + "where n.nspname = '%s' AND relname = '%s' " \ + "AND c.relnamespace = n.oid;" % ( + Const.UPGRADE_SCHEMA, relname) + self.context.logger.debug("Sql to query if has the table: %s" % sql) + (status, output) = self.execSqlCommandInPrimaryDN(sql) + if status != 0 or ClusterCommand.findErrorInSql(output): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + sql + " Error: \n%s" % str(output)) + if output == '0': + self.context.logger.debug("Table does not exist.") + return False + self.context.logger.debug("Table exists.") + return True + except Exception as e: + raise Exception(str(e)) + + def findOneMatchedCombin(self, clusterNodes): + """ + function: if the node number is less than const.COMBIN_NUM, we will + try all possiblity combination to get one + matched combination, otherwise, we will use a strategy to find the + node with less instance(cms, gtm, etc.) + input : check the score or return the first match combination + output: one match best node + """ + combinNodes = clusterNodes + # combin is node name list + randomNodes = random.sample(combinNodes, self.context.nodesNum) + self.context.logger.log("Not match the condition, " + "choose nodes %s" % randomNodes) + return randomNodes + + def canUpgradeAgain(self): + """ + judge if we should rollback or can upgrade again, + if has the nodes whose step is more than switch bin + """ + self.context.logger.debug("Check if we can upgrade again.") + greyNodeNames = self.getUpgradedNodeNames( + GreyUpgradeStep.STEP_SWITCH_NEW_BIN) + if len(greyNodeNames) > 0: + self.context.logger.debug( + "Has nodes step greater or equal than %d. Can upgrade again." + % GreyUpgradeStep.STEP_SWITCH_NEW_BIN) + return True + self.context.logger.debug( + "There is no node step greater or equal than %d. " + "Can not do upgrade again." % GreyUpgradeStep.STEP_SWITCH_NEW_BIN) + return False + + def prepareGreyUpgrade(self): + """ + function: do pre-upgrade stuffs for primary and standby HA + sync check, and create table to record step + input : NA + output: NA + """ + if self.context.upgrade_remain: + self.context.logger.debug("No need to create pre-upgrade stuffs") + return + self.context.logger.debug("Start to create pre-upgrade stuffs") + # under force upgrade, we only prepare the files + self.prepareGreyUpgradeFiles() + # all stuffs done successfully, return 0 + self.context.logger.debug("Successfully created pre-upgrade stuffs.") + + def prepareGreyUpgradeFiles(self): + # the bakpath is created in checkUpgrade, + # but may deleted when rollback, so need to check + try: + self.context.logger.debug("start to prepare grey upgrade files") + self.createBakPath() + self.initNodeStepInCsv() + self.initUpgradeProcessStatus() + self.recordDirFile() + self.copyBakVersion() + self.context.logger.debug( + "successfully prepared grey upgrade files") + except Exception as e: + self.context.logger.debug("failed to prepare grey upgrade files") + raise Exception(str(e)) + + def initNodeStepInCsv(self): + bakStepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE + "_bak") + self.context.logger.debug("Create and init the file %s." % bakStepFile) + g_file.createFile(bakStepFile, True, DefaultValue.KEY_FILE_MODE) + header = ["node_host", "upgrade_action", "step"] + g_file.createFileInSafeMode(bakStepFile) + writeInfo = [] + for dbNode in self.context.clusterInfo.dbNodes: + writeInfo.append([('%s' % dbNode.name), + ('%s' % self.context.action), + ('%s' % GreyUpgradeStep.STEP_INIT_STATUS)]) + with open(bakStepFile, "w") as csvfile: + writer = csv.writer(csvfile) + writer.writerow(header) + writer.writerows(writeInfo) + finalStepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + g_file.rename(bakStepFile, finalStepFile) + # so if we can get the step file, we can get the step information + self.context.logger.debug("Rename the file %s to %s." % ( + bakStepFile, finalStepFile)) + self.distributeFile(finalStepFile) + self.context.logger.debug("Successfully inited the file %s and " + "send it to each node." % finalStepFile) + + def initUpgradeProcessStatus(self): + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.INPLACE_UPGRADE_STEP_FILE) + self.context.logger.debug("Create and init the file %s" % stepFile) + g_file.removeFile(stepFile, "python") + g_file.createFile(stepFile, True, DefaultValue.KEY_FILE_MODE) + self.recordNodeStepInplace(self.context.action, + GreyUpgradeStep.STEP_INIT_STATUS) + self.context.logger.debug("Successfully inited the file %s " + "and send it to each node" % stepFile) + + def recordNodeStep(self, step, nodes=None): + """ + under normal rollback, if not have the binary_upgrade dir, + recordNodeStepInplace will create a file named binary_upgrade, + so we should raise error, and use the force rollback mode + For commit upgrade, we should create the dir to record the cannot + rollback flag to avoid node inconsistency + :param step: upgrade or rollback step + :param nodes: the nodes shoud be the step + :return:NA + """ + cmd = "if [ -d '%s' ]; then echo 'True'; else echo 'False'; fi" %\ + self.context.upgradeBackupPath + hostList = copy.deepcopy(self.context.clusterNodes) + (resultMap, outputCollect) = self.context.sshTool.getSshStatusOutput( + cmd, hostList) + self.context.logger.debug( + "The result of checking distribute directory is:\n%s" % + outputCollect) + if outputCollect.find('False') >= 0: + if step != GreyUpgradeStep.STEP_BEGIN_COMMIT: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + self.context.upgradeBackupPath) + self.createBakPath() + self.recordNodeStepInplace(self.context.action, step) + # under force upgrade, we only record step to file + self.recordNodeStepInCsv(step, nodes) + self.context.logger.debug( + "Successfully record node step %s." % str(step)) + + def recordNodeStepInCsv(self, step, nodes=None): + if nodes is None: + nodes = [] + self.context.logger.debug("Record node step %s in file" % str(step)) + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + stepTempFile = os.path.join(self.context.upgradeBackupPath, + "upgrade_step_temp.csv") + g_file.createFileInSafeMode(stepTempFile) + with open(stepFile, 'r') as csvfile, \ + open(stepTempFile, 'w') as tempfile: + header = ["node_host", "upgrade_action", "step"] + reader = csv.DictReader(csvfile) + writer = csv.writer(tempfile) + writer.writerow(header) + writeInfo = [] + if not nodes: + nodes = self.context.nodeNames + if nodes: + for row in reader: + if row['node_host'] in nodes: + writeInfo.append([row['node_host'], row[ + 'upgrade_action'], str(step)]) + else: + writeInfo.append([row['node_host'], row[ + 'upgrade_action'], row['step']]) + else: + for row in reader: + writeInfo.append([row['node_host'], + row['upgrade_action'], str(step)]) + writer.writerows(writeInfo) + + g_file.removeFile(stepFile) + g_file.rename(stepTempFile, stepFile) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, stepFile) + # distribute the node step file to each node + self.distributeFile(stepFile) + + def doInplaceBinaryUpgrade(self): + """ + function: do binary upgrade, which essentially replace the binary files + input : NA + output: NA + """ + # 1. distribute new package to every nodes. + self.distributeXml() + # 2. check whether we should do rollback or not. + if not self.doInplaceBinaryRollback(): + self.exitWithRetCode(Const.ACTION_AUTO_ROLLBACK, False) + try: + self.checkUpgrade() + + # 3. before do binary upgrade, we must make sure the cluster is + # Normal and the database could be connected + # if not, exit. + self.startCluster() + + # uninstall kerberos if has already installed + pghost_path = DefaultValue.getEnvironmentParameterValue( + 'PGHOST', self.context.user) + kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path + if os.path.exists(kerberosflagfile): + self.stopCluster() + self.context.logger.log("Starting uninstall Kerberos.", + "addStep") + cmd = "source %s && " % self.context.userProfile + cmd += "%s -m uninstall -U %s" % (OMCommand.getLocalScript( + "Local_Kerberos"), self.context.user) + self.context.sshTool.executeCommand(cmd, "") + self.context.logger.log("Successfully uninstall Kerberos.") + self.startCluster() + if self.unSetClusterReadOnlyMode() != 0: + raise Exception("NOTICE: " + + ErrorCode.GAUSS_529["GAUSS_52907"]) + self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_INIT_STATUS) + + (status, output) = self.doHealthCheck(Const.OPTION_PRECHECK) + if status != 0: + self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, False, + ErrorCode.GAUSS_516["GAUSS_51601"] + % "cluster" + output) + self.getOneDNInst() + # 4.record the old and new app dir in file + self.recordDirFile() + if self.isLargeInplaceUpgrade: + self.recordLogicalClusterName() + # 6. reload vacuum_defer_cleanup_age to new value + if self.isLargeInplaceUpgrade: + if self.__upgrade_across_64bit_xid: + self.reloadVacuumDeferCleanupAge() + + if self.setClusterReadOnlyMode() != 0: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52908"]) + + # after checkUpgrade, the bak path is ready, we can use it now + # create inplace upgrade flag file if is doing inplace upgrade + self.createInplaceUpgradeFlagFile() + # 7. backup current application and configuration. + # The function only be used by binary upgrade. + # to ensure the transaction atomicity, + # it will be used with checkUpgrade(). + self.backupNodeVersion() + # For inplace upgrade, we have to perform additional checks + # and then backup catalog files. + if self.isLargeInplaceUpgrade: + self.prepareUpgradeSqlFolder() + self.HASyncReplayCheck() + self.backupOldClusterDBAndRelInfo() + # 8. stop old cluster + self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_STOP_NODE) + self.context.logger.debug("Start to stop all instances" + " on the node.", "addStep") + self.stopCluster() + self.context.logger.debug("Successfully stop all" + " instances on the node.", "constant") + # 9. back cluster config. including this: + # cluster_static_config + # cluster_dynamic_config + # etc/gscgroup_xxx.cfg + # lib/postgresql/pg_plugin + # server.key.cipher + # server.key.rand + # Data Studio lib files + # gds files + # physical catalog files if performing inplace upgrade + self.recordNodeStepInplace( + Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_BACKUP_VERSION) + self.backupClusterConfig() + + # 10. Upgrade application on node + # install new bin file + self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_UPGRADE_APP) + self.installNewBin() + + # 11. restore the cluster config. including this: + # cluster_static_config + # cluster_dynamic_config + # etc/gscgroup_xxx.cfg + # lib/postgresql/pg_plugin + # server.key.cipher + # server.key.rand + # Data Studio lib files + # gds files + # cn cert files + # At the same time, sync newly added guc for instances + self.restoreClusterConfig() + self.syncNewGUC() + # unset cluster readonly + self.startCluster() + if self.unSetClusterReadOnlyMode() != 0: + raise Exception("NOTICE: " + + ErrorCode.GAUSS_529["GAUSS_52907"]) + # flush new app dynamic configuration + dynamicConfigFile = "%s/bin/cluster_dynamic_config" % \ + self.context.newClusterAppPath + if os.path.exists(dynamicConfigFile) \ + and self.isLargeInplaceUpgrade: + self.refresh_dynamic_config_file() + self.context.logger.debug( + "Successfully refresh dynamic config file") + self.stopCluster() + if os.path.exists(dynamicConfigFile) \ + and self.isLargeInplaceUpgrade: + self.restore_dynamic_config_file() + # 12. modify GUC parameter unix_socket_directory + self.modifySocketDir() + # 13. start new cluster + self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_START_NODE) + self.context.logger.debug("Start to start all instances" + " on the node.", "addStep") + + # update catalog + # start cluster in normal mode + if self.isLargeInplaceUpgrade: + self.touchRollbackCatalogFlag() + self.updateCatalog() + self.CopyCerts() + self.context.createGrpcCa() + self.context.logger.debug("Successfully createGrpcCa.") + + self.switchBin(Const.NEW) + self.startCluster() + if self.isLargeInplaceUpgrade: + self.modifyPgProcIndex() + self.context.logger.debug("Start to exec post upgrade script") + self.doUpgradeCatalog(self.context.oldClusterNumber, + postUpgrade=True) + self.context.logger.debug( + "Successfully exec post upgrade script") + self.context.logger.debug("Successfully start all " + "instances on the node.", "constant") + if self.setClusterReadOnlyMode() != 0: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52908"]) + # 14. check the cluster status + (status, output) = self.doHealthCheck(Const.OPTION_POSTCHECK) + if status != 0: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] + % "cluster" + output) + + # 15. record precommit step status + self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_PRE_COMMIT) + self.printPrecommitBanner() + except Exception as e: + self.context.logger.error(str(e)) + self.context.logger.log("Binary upgrade failed. Rollback" + " to the original cluster.") + # do rollback + self.exitWithRetCode(Const.ACTION_AUTO_ROLLBACK, + self.doInplaceBinaryRollback()) + self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, True) + + def doInplaceCommitUpgrade(self): + """ + function: commit binary upgrade and clean up backup files + 1. unset read-only + 2. drop old PMK schema + 3. restore UDF + 4. clean backup catalog physical + files if doing inplace upgrade + 5. clean up other upgrade tmp files + input : NA + output: NA + """ + if self.getNodeStepInplace() != Const.BINARY_UPGRADE_STEP_PRE_COMMIT: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52916"] + + " Please check if previous upgrade" + " operation was successful or if" + " upgrade has already been committed.") + # create commit flag file + self.createCommitFlagFile() + + # variable to indicate whether we should keep step file + # and cleanup list file for re-entry + cleanUpSuccess = True + + # drop table and index after large upgrade + if self.isLargeInplaceUpgrade: + if self.check_upgrade_mode(): + self.drop_table_or_index() + # 1.unset read-only + if self.isLargeInplaceUpgrade: + self.setUpgradeMode(0) + if self.unSetClusterReadOnlyMode() != 0: + self.context.logger.log("NOTICE: " + + ErrorCode.GAUSS_529["GAUSS_52907"]) + cleanUpSuccess = False + if self.isLargeInplaceUpgrade: + self.cleanCsvFile() + # 2. drop old PMK schema + # we sleep 10 seconds first because DB might be updating + # ha status after unsetting read-only + time.sleep(10) + # 3. clean backup catalog physical files if doing inplace upgrade + if self.cleanBackupedCatalogPhysicalFiles() != 0: + self.context.logger.debug( + "Failed to clean backup files in directory %s. " + % self.context.upgradeBackupPath) + + if not cleanUpSuccess: + self.context.logger.log("NOTICE: Cleanup is incomplete during" + " commit. Please re-commit upgrade once" + " again or cleanup manually") + self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, False) + else: + # 8. clean up other upgrade tmp files + # and uninstall inplace upgrade support functions + self.cleanInstallPath(Const.OLD) + self.cleanBinaryUpgradeBakFiles() + if self.isLargeInplaceUpgrade: + self.stopCluster() + self.startCluster() + + # install Kerberos + self.install_kerberos() + self.context.logger.log("Commit binary upgrade succeeded.") + self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, True) + + def install_kerberos(self): + """ + install kerberos after upgrade + :return:NA + """ + pghost_path = DefaultValue.getEnvironmentParameterValue( + 'PGHOST', self.context.user) + kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path + if os.path.exists(kerberosflagfile): + # install kerberos + cmd = "source %s &&" % self.context.userProfile + cmd += "gs_om -t stop && " + cmd += "%s -m install -U %s --krb-server" % ( + OMCommand.getLocalScript("Local_Kerberos"), + self.context.user) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command:%s. Error:\n%s" % (cmd, output)) + cmd = "source %s && " % self.context.userProfile + cmd += "%s -m install -U %s --krb-client " % ( + OMCommand.getLocalScript("Local_Kerberos"), self.context.user) + self.context.sshTool.executeCommand( + cmd, "", hostList=self.context.clusterNodes) + self.context.logger.log("Successfully install Kerberos.") + cmd = "source %s && gs_om -t start" % self.context.userProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 and not self.context.ignoreInstance: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command:%s. Error:\n%s" % (cmd, output)) + os.remove(kerberosflagfile) + + def refresh_dynamic_config_file(self): + """ + refresh dynamic config file + :return: + """ + cmd = "source %s ;gs_om -t refreshconf" % self.context.userProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command:%s. Error:\n%s" % (cmd, output)) + + def restore_dynamic_config_file(self): + """ + restore dynamic config file + :return: + """ + cmd = "%s -t %s -U %s -V %d --upgrade_bak_path=%s " \ + "--old_cluster_app_path=%s --new_cluster_app_path=%s " \ + "-l %s" % ( + OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_RESTORE_DYNAMIC_CONFIG_FILE, + self.context.user, + int(float(self.context.oldClusterNumber) * 1000), + self.context.upgradeBackupPath, + self.context.oldClusterAppPath, + self.context.newClusterAppPath, + self.context.localLog) + + self.context.logger.debug("Command for restoring " + "config files: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "restore config files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + def cleanCsvFile(self): + """ + clean csv file + :return: + """ + clusterNodes = self.context.clusterInfo.dbNodes + for dbNode in clusterNodes: + if len(dbNode.datanodes) == 0: + continue + dnInst = dbNode.datanodes[0] + dndir = dnInst.datadir + pg_proc_csv_path = \ + '%s/pg_copydir/tbl_pg_proc_oids.csv' % dndir + new_pg_proc_csv_path = \ + '%s/pg_copydir/new_tbl_pg_proc_oids.csv' % dndir + if os.path.exists(pg_proc_csv_path): + g_file.removeFile(pg_proc_csv_path) + if os.path.exists(new_pg_proc_csv_path): + g_file.removeFile(new_pg_proc_csv_path) + + def check_upgrade_mode(self): + """ + check upgrade_mode value + :return: + """ + cmd = "source %s ; gs_guc check -N all -I all -c 'upgrade_mode'" % \ + self.context.userProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_500[ + "GAUSS_50010"] % 'upgrade_mode' + + "Error: \n%s" % str(output)) + if output.find("upgrade_mode=0") >= 0: + return False + else: + return True + + def cleanBackupedCatalogPhysicalFiles(self, isRollBack=False): + """ + function : clean backuped catalog physical files + input : isRollBack, default is False + output: return 0, if the operation is done successfully. + return 1, if the operation failed. + """ + try: + if self.isLargeInplaceUpgrade: + self.context.logger.log("Clean up backup catalog files.") + # send cmd to all node and exec + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_CLEAN_OLD_CLUSTER_CATALOG_PHYSICAL_FILES, + self.context.user, + self.context.upgradeBackupPath, + self.context.localLog) + if isRollBack: + cmd += " --rollback --oldcluster_num='%s'" % \ + self.context.oldClusterNumber + self.context.logger.debug( + "Command for cleaning up physical catalog files: %s." % cmd) + DefaultValue.execCommandWithMode( + cmd, + "clean backuped physical files of catalog objects", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + self.context.logger.debug( + "Successfully cleaned up backup catalog files.") + return 0 + except Exception as e: + if isRollBack: + raise Exception( + "Fail to clean up backup catalog files: %s" % str(e)) + else: + self.context.logger.debug( + "Fail to clean up backup catalog files. " + + "Please re-commit upgrade once again or clean up manually.") + return 1 + + def recordLogicalClusterName(self): + """ + function: record the logical node group name in bakpath, + so that we can restore specfic name in bakpath, + used in restoreCgroup, and refresh the CgroupConfigure + input : NA + output: NA + """ + lcgroupfile = "%s/oldclusterinfo.json" % self.context.tmpDir + try: + self.context.logger.debug( + "Write and send logical cluster info file.") + # check whether file is exists + if os.path.isfile(lcgroupfile): + return 0 + # check whether it is lc cluster + sql = """SELECT true AS group_kind + FROM pg_class c, pg_namespace n, pg_attribute attr + WHERE c.relname = 'pgxc_group' AND n.nspname = 'pg_catalog' + AND attr.attname = 'group_kind' AND c.relnamespace = + n.oid AND attr.attrelid = c.oid; """ + self.context.logger.debug( + "Check if the cluster type is a logical cluster.") + (status, output) = ClusterCommand.remoteSQLCommand( + sql, + self.context.user, + self.dnInst.hostname, + self.dnInst.port, + False, + DefaultValue.DEFAULT_DB_NAME, + IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513[ + "GAUSS_51300"] % sql + " Error: \n%s" % str( + output)) + if not output or output.strip() != 't': + self.context.logger.debug( + "The old cluster is not logical cluster.") + return 0 + self.context.logger.debug("The old cluster is logical cluster.") + # get lc group name lists + sql = "SELECT group_name FROM pgxc_group WHERE group_kind = 'v';" + self.context.logger.debug( + "Getting the list of logical cluster names.") + (status, output) = ClusterCommand.remoteSQLCommand( + sql, + self.context.user, + self.dnInst.hostname, + self.dnInst.port, + False, + DefaultValue.DEFAULT_DB_NAME, + IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513[ + "GAUSS_51300"] % sql + " Error: \n%s" % str( + output)) + lcgroupnames = output.split("\n") + self.context.logger.debug( + "The list of logical cluster names: %s." % lcgroupnames) + # create the file + g_file.createFile(lcgroupfile) + g_file.changeOwner(self.context.user, lcgroupfile) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, lcgroupfile) + # write result to file + with open(lcgroupfile, "w") as fp_json: + json.dump({"lcgroupnamelist": lcgroupnames}, fp_json) + # send file to remote nodes + self.context.sshTool.scpFiles(lcgroupfile, self.context.tmpDir) + self.context.logger.debug( + "Successfully to write and send logical cluster info file.") + return 0 + except Exception as e: + cmd = "(if [ -f '%s' ]; then rm -f '%s'; fi)" % ( + lcgroupfile, lcgroupfile) + DefaultValue.execCommandWithMode(cmd, + "clean lcgroup name list file", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + raise Exception(str(e)) + + def prepareUpgradeSqlFolder(self): + """ + function: verify upgrade_sql.tar.gz and extract it to binary backup + path, because all node need set_guc, so + we will decompress on all nodes + input : NA + output: NA + """ + self.context.logger.debug("Preparing upgrade sql folder.") + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + hostName = DefaultValue.GetHostIpOrName() + hosts = [hostName] + else: + hosts = self.context.clusterNodes + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -X %s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_UPGRADE_SQL_FOLDER, + self.context.user, + self.context.upgradeBackupPath, + self.context.xmlFile, + self.context.localLog) + DefaultValue.execCommandWithMode(cmd, + "prepare upgrade_sql", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile, + hosts) + + def HASyncReplayCheck(self): + """ + function: Wait and check if all standbys have replayed upto flushed + xlog positions of primaries.We record primary xlog flush + position at start of the check and wait until standby replay + upto that point. + Attention: If autovacuum is turned on, primary xlog flush + position may increase during the check.We do not check such + newly added xlog because they will not change catalog + physical file position. + Input: NA + output : NA + """ + self.context.logger.debug("Start to wait and check if all the standby" + " instances have replayed all xlogs.") + self.doReplay() + self.context.logger.debug("Successfully performed the replay check " + "of the standby instance.") + + def doReplay(self): + refreshTimeout = 180 + waitTimeout = 300 + RefreshTime = datetime.now() + timedelta(seconds=refreshTimeout) + EndTime = datetime.now() + timedelta(seconds=waitTimeout) + # wait and check sync status between primary and standby + + NeedReplay = True + PosList = [] + while NeedReplay: + sql = "SELECT sender_flush_location,receiver_replay_location " \ + "from pg_catalog.pg_stat_get_wal_senders() " \ + "where peer_role != 'Secondary';" + (status, output) = ClusterCommand.remoteSQLCommand( + sql, + self.context.user, + self.dnInst.hostname, + self.dnInst.port, + False, + DefaultValue.DEFAULT_DB_NAME, + IsInplaceUpgrade=True) + if status != 0: + self.context.logger.debug( + "Primary and Standby may be not in sync.") + self.context.logger.debug( + "Sync status: %s. Output: %s" % (str(status), output)) + elif output != "": + self.context.logger.debug( + "Sync status: %s. Output: %s" % (str(status), output)) + tmpPosList = self.getXlogPosition(output) + if len(PosList) == 0: + PosList = copy.deepcopy(tmpPosList) + self.context.logger.debug( + "Primary and Standby may be not in sync.") + else: + NeedReplay = False + for eachRec in PosList: + for eachTmpRec in tmpPosList: + if self.needReplay(eachRec, eachTmpRec): + NeedReplay = True + self.context.logger.debug( + "Primary and Standby may be not in sync.") + break + if NeedReplay: + break + else: + NeedReplay = False + + # Standby replay postion may keep falling behind primary + # flush position if it is at the end of one xlog page and the + # free space is less than xlog record header size. + # We do a checkpoint to avoid such situation. + if datetime.now() > RefreshTime and NeedReplay: + self.context.logger.debug( + "Execute CHECKPOINT to refresh xlog position.") + refreshsql = "set statement_timeout=300000;CHECKPOINT;" + (status, output) = ClusterCommand.remoteSQLCommand( + refreshsql, + self.context.user, + self.dnInst.hostname, + self.dnInst.port, + False, + DefaultValue.DEFAULT_DB_NAME, + IsInplaceUpgrade=True) + if status != 0: + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] % refreshsql + + "Error: \n%s" % str(output)) + + if datetime.now() > EndTime and NeedReplay: + self.context.logger.log("WARNING: " + ErrorCode.GAUSS_513[ + "GAUSS_51300"] % sql + " Timeout while waiting for " + "standby replay.") + return + time.sleep(5) + + def getXlogPosition(self, output): + """ + get xlog position from output + """ + tmpPosList = [] + resList = output.split('\n') + for eachLine in resList: + tmpRec = {} + (flushPos, replayPos) = eachLine.split('|') + (flushPosId, flushPosOff) = (flushPos.strip()).split('/') + (replayPosId, replayPosOff) = (replayPos.strip()).split('/') + tmpRec['nodeName'] = self.getHAShardingName() + tmpRec['flushPosId'] = flushPosId.strip() + tmpRec['flushPosOff'] = flushPosOff.strip() + tmpRec['replayPosId'] = replayPosId.strip() + tmpRec['replayPosOff'] = replayPosOff.strip() + tmpPosList.append(tmpRec) + return tmpPosList + + def getHAShardingName(self): + """ + in centralized cluster, used to get the only one sharding name + """ + peerInsts = self.context.clusterInfo.getPeerInstance(self.dnInst) + (instance_name, _, _) = ClusterInstanceConfig.\ + getInstanceInfoForSinglePrimaryMultiStandbyCluster( + self.dnInst, peerInsts) + return instance_name + + def needReplay(self, eachRec, eachTmpRec): + """ + judeg if need replay by xlog position + """ + if eachRec['nodeName'] == eachTmpRec['nodeName'] \ + and (int(eachRec['flushPosId'], 16) > int( + eachTmpRec['replayPosId'], 16) or ( + int(eachRec['flushPosId'], 16) == int( + eachTmpRec['replayPosId'], 16) and int( + eachRec['flushPosOff'], 16) > int(eachTmpRec['replayPosOff'], 16))): + return True + else: + return False + + def backupOldClusterDBAndRelInfo(self): + + """ + function: backup old cluster db and rel info + send cmd to that node + input : NA + output: NA + """ + tmpFile = os.path.join(DefaultValue.getTmpDirFromEnv( + self.context.user), Const.TMP_DYNAMIC_DN_INFO) + try: + self.context.logger.debug("Start to backup old cluster database" + " and relation information.") + # prepare backup path + backup_path = os.path.join( + self.context.upgradeBackupPath, "oldClusterDBAndRel") + cmd = "rm -rf '%s' && mkdir '%s' -m '%s' " % \ + (backup_path, backup_path, DefaultValue.KEY_DIRECTORY_MODE) + hostList = copy.deepcopy(self.context.clusterNodes) + self.context.sshTool.executeCommand(cmd, "", hostList=hostList) + # prepare dynamic cluster info file in every node + self.generateDynamicInfoFile(tmpFile) + # get dn primary hosts + dnPrimaryNodes = self.getPrimaryDnListFromDynamicFile() + execHosts = list(set(dnPrimaryNodes)) + + # send cmd to all node and exec + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_BACKUP_OLD_CLUSTER_DB_AND_REL, + self.context.user, + self.context.upgradeBackupPath, + self.context.localLog) + self.context.logger.debug( + "Command for backing up old cluster database and " + "relation information: %s." % cmd) + self.context.sshTool.executeCommand(cmd, "", hostList=execHosts) + self.context.logger.debug("Backing up information of all nodes.") + self.context.logger.debug("Successfully backed up old cluster " + "database and relation information") + except Exception as e: + raise Exception(str(e)) + finally: + if os.path.exists(tmpFile): + deleteCmd = "(if [ -f '%s' ]; then rm -f '%s'; fi) " % \ + (tmpFile, tmpFile) + hostList = copy.deepcopy(self.context.clusterNodes) + self.context.sshTool.executeCommand( + deleteCmd, "", hostList=hostList) + + def generateDynamicInfoFile(self, tmpFile): + """ + generate dynamic info file and send to every node + :return: + """ + self.context.logger.debug( + "Start to generate dynamic info file and send to every node.") + try: + cmd = ClusterCommand.getQueryStatusCmd( + self.context.user, outFile=tmpFile) + SharedFuncs.runShellCmd(cmd, self.context.user, + self.context.userProfile) + if not os.path.exists(tmpFile): + raise Exception("Can not genetate dynamic info file") + self.context.distributeFileToSpecialNode(tmpFile, + os.path.dirname(tmpFile), + self.context.clusterNodes) + self.context.logger.debug( + "Success to generate dynamic info file and send to every node.") + except Exception as er: + raise Exception("Failed to generate dynamic info file in " + "these nodes: {0}, error: {1}".format( + self.context.clusterNodes, str(er))) + + def getPrimaryDnListFromDynamicFile(self): + """ + get primary dn list from dynamic file + :return: primary dn list + """ + try: + self.context.logger.debug( + "Start to get primary dn list from dynamic file.") + tmpFile = os.path.join(DefaultValue.getTmpDirFromEnv( + self.context.user), Const.TMP_DYNAMIC_DN_INFO) + if not os.path.exists(tmpFile): + raise Exception(ErrorCode.GAUSS_529["GAUSS_50201"] % tmpFile) + dynamicClusterStatus = DbClusterStatus() + dynamicClusterStatus.initFromFile(tmpFile) + cnAndPrimaryDnNodes = [] + # Find the master DN instance + for dbNode in dynamicClusterStatus.dbNodes: + for instance in dbNode.datanodes: + if instance.status == 'Primary': + for staticDBNode in self.context.clusterInfo.dbNodes: + if staticDBNode.id == instance.nodeId: + cnAndPrimaryDnNodes.append(staticDBNode.name) + result = list(set(cnAndPrimaryDnNodes)) + self.context.logger.debug("Success to get primary dn list from " + "dynamic file: {0}.".format(result)) + return result + except Exception as er: + raise Exception("Failed to get primary dn list from dynamic file. " + "Error:{0}".format(str(er))) + + + def touchRollbackCatalogFlag(self): + """ + before update system catalog, touch a flag file. + """ + # touch init flag file + # during rollback, if init flag file has not been touched, + # we do not need to do catalog rollback. + cmd = "touch '%s/touch_init_flag'" % self.context.upgradeBackupPath + DefaultValue.execCommandWithMode(cmd, + "create init flag file", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + + def updateCatalog(self): + """ + function: update catalog to new version + steps: + 1.prepare update sql file and check sql file + 2.do update catalog + Input: NA + output : NA + """ + try: + self.prepareSql("upgrade-post") + self.prepareSql("upgrade") + self.prepareSql("rollback-post") + self.prepareSql("rollback") + self.doUpgradeCatalog(self.context.oldClusterNumber) + except Exception as e: + raise Exception( + "Failed to execute update sql file. Error: %s" % str(e)) + + def doUpgradeCatalog(self, oldClusterNumber, postUpgrade=False): + """ + function: update catalog to new version + 1.set upgrade_from param + 2.start cluster + 3.touch init files and do pre-upgrade staffs + 4.connect database and update catalog one by one + 5.stop cluster + 6.unset upgrade_from param + 7.start cluster + Input: oldClusterNumber + output : NA + """ + try: + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + if not postUpgrade: + self.startCluster() + self.setUpgradeMode(1) + self.touchInitFile() + elif not postUpgrade: + # the guc parameter upgrade_from need to restart + # cmagent to take effect + self.setUpgradeMode(2) + # kill snapshot thread in kernel + self.context.killKernalSnapshotThread(self.dnInst) + # if we use --force to forceRollback last time, + # it may has remaining last catalog + if postUpgrade: + self.waitClusterForNormal() + self.execRollbackUpgradedCatalog(scriptType="rollback-post") + self.execRollbackUpgradedCatalog(scriptType="upgrade-post") + else: + self.execRollbackUpgradedCatalog(scriptType="rollback") + self.execRollbackUpgradedCatalog(scriptType="upgrade") + self.pgxcNodeUpdateLocalhost("upgrade") + self.getLsnInfo() + if self.context.action == \ + Const.ACTION_INPLACE_UPGRADE and not postUpgrade: + self.updatePgproc() + except Exception as e: + raise Exception("update catalog failed.ERROR: %s" % str(e)) + + def updatePgproc(self): + """ + function: update pg_proc during large upgrade + :return: + """ + self.context.logger.debug( + "Start to update pg_proc in inplace large upgrade ") + # generate new csv file + execHosts = [self.dnInst.hostname] + # send cmd to all node and exec + cmd = "%s -t %s -U %s -R '%s' -l %s" % ( + OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_CREATE_NEW_CSV_FILE, + self.context.user, + self.context.tmpDir, + self.context.localLog) + self.context.logger.debug( + "Command for create new csv file: %s." % cmd) + self.context.sshTool.executeCommand(cmd, "", hostList=execHosts) + self.context.logger.debug( + "Successfully created new csv file.") + # select all databases + database_list = self.getDatabaseList() + # create pg_proc_temp_oids + new_pg_proc_csv_path = '%s/pg_copydir/new_tbl_pg_proc_oids.csv' % \ + self.dnInst.datadir + self.createPgprocTempOids(new_pg_proc_csv_path, database_list) + # create pg_proc_temp_oids index + self.createPgprocTempOidsIndex(database_list) + # make checkpoint + self.replyXlog(database_list) + # create pg_proc_mapping.txt to save the mapping between pg_proc + # file path and pg_proc_temp_oids file path + cmd = "%s -t %s -U %s -R '%s' -l %s" % ( + OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_CREATE_PG_PROC_MAPPING_FILE, + self.context.user, + self.context.tmpDir, + self.context.localLog) + DefaultValue.execCommandWithMode( + cmd, + "create file to save mapping between pg_proc file path and " + "pg_proc_temp_oids file path", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + self.context.logger.debug( + "Successfully created file to save mapping between pg_proc file " + "path and pg_proc_temp_oids file path.") + # stop cluster + self.stopCluster() + # replace pg_proc data file by pg_proc_temp data file + # send cmd to all node and exec + cmd = "%s -t %s -U %s -R '%s' -l %s" % ( + OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_REPLACE_PG_PROC_FILES, + self.context.user, + self.context.tmpDir, + self.context.localLog) + DefaultValue.execCommandWithMode( + cmd, + "replace pg_proc data file by pg_proc_temp data files", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + self.context.logger.debug( + "Successfully replaced pg_proc data files.") + + def copy_and_modify_tableinfo_to_csv(self, old_csv_path, new_csv_path): + """ + 1. copy pg_proc info to csv file + 2. modify csv file + 3. create new table and get info by csv file + :return: + """ + sql =\ + """copy pg_proc( proname, pronamespace, proowner, prolang, + procost, prorows, provariadic, protransform, prosecdef, + proleakproof, proisstrict, proretset, provolatile, pronargs, + pronargdefaults, prorettype, proargtypes, proallargtypes, + proargmodes, proargnames, proargdefaults, prosrc, probin, + proconfig, proacl, prodefaultargpos, fencedmode, proshippable, + propackage,prokind) WITH OIDS to '%s' delimiter ',' + csv header;""" % old_csv_path + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + pg_proc_csv_reader = csv.reader(open(old_csv_path, 'r')) + pg_proc_csv_data = list(pg_proc_csv_reader) + header = pg_proc_csv_data[0] + header.insert(header.index('protransform') + 1, 'proisagg') + header.insert(header.index('protransform') + 2, 'proiswindow') + new_pg_proc_csv_data = [] + new_pg_proc_csv_data.append(header) + pg_proc_data_info = pg_proc_csv_data[1:] + for i in range(2): + for info in pg_proc_data_info: + info.insert(header.index('protransform') + 2, 'True') + for info in pg_proc_data_info: + new_pg_proc_csv_data.append(info) + f = open(new_csv_path, 'w') + new_pg_proc_csv_writer = csv.writer(f) + for info in new_pg_proc_csv_data: + new_pg_proc_csv_writer.writerow(info) + f.close() + + def createPgprocTempOids(self, new_pg_proc_csv_path, database_list): + """ + create pg_proc_temp_oids + :return: + """ + sql = \ + """START TRANSACTION; SET IsInplaceUpgrade = on; + CREATE TABLE pg_proc_temp_oids (proname name NOT NULL, + pronamespace oid NOT NULL, proowner oid NOT NULL, prolang oid + NOT NULL, procost real NOT NULL, prorows real NOT NULL, + provariadic oid NOT NULL, protransform regproc NOT NULL, + proisagg boolean NOT NULL, proiswindow boolean NOT NULL, + prosecdef boolean NOT NULL, proleakproof boolean NOT NULL, + proisstrict boolean NOT NULL, proretset boolean NOT NULL, + provolatile "char" NOT NULL, pronargs smallint NOT NULL, + pronargdefaults smallint NOT NULL, prorettype oid NOT NULL, + proargtypes oidvector NOT NULL, proallargtypes oid[], + proargmodes "char"[], proargnames text[], proargdefaults + pg_node_tree, prosrc text, probin text, proconfig text[], + proacl aclitem[], prodefaultargpos int2vector,fencedmode boolean, + proshippable boolean, propackage boolean, prokind "char" NOT + NULL) with oids;""" + sql += "copy pg_proc_temp_oids WITH OIDS from '%s' with " \ + "delimiter ',' csv header FORCE NOT NULL proargtypes;" % \ + new_pg_proc_csv_path + sql += "COMMIT;" + # update proisagg and proiswindow message sql + sql += \ + "update pg_proc_temp_oids set proisagg = CASE WHEN prokind = 'a' " \ + "THEN True ELSE False END, proiswindow = CASE WHEN prokind = 'w' " \ + "THEN True ELSE False END;" + self.context.logger.debug("pg_proc_temp_oids sql is %s" % sql) + # creat table + for eachdb in database_list: + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + eachdb, IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + + def createPgprocTempOidsIndex(self, database_list): + """ + create index pg_proc_oid_index_temp and + pg_proc_proname_args_nsp_index_temp + :return: + """ + sql = "CREATE UNIQUE INDEX pg_proc_oid_index_temp ON " \ + "pg_proc_temp_oids USING btree (oid) TABLESPACE pg_default;" + sql += "CREATE UNIQUE INDEX pg_proc_proname_args_nsp_index_temp ON" \ + " pg_proc_temp_oids USING btree (proname, proargtypes," \ + " pronamespace) TABLESPACE pg_default;" + # creat index + for eachdb in database_list: + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + eachdb, IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + + def getDatabaseList(self): + """ + check database list in cluster + :return: + """ + self.context.logger.debug("Get database list in cluster.") + sql = "select datname from pg_database;" + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + if "" == output: + raise Exception("No database objects were found in the cluster!") + reslines = (output.strip()).split('\n') + if (len(reslines) < 3 + or "template1" not in reslines + or "template0" not in reslines + or "postgres" not in reslines): + raise Exception("The database list is invalid:%s." % str(reslines)) + self.context.logger.debug("Database list in cluster is %s." % reslines) + return reslines + + def replyXlog(self, database_list): + """ + make checkpoint + :return: + """ + sql = 'CHECKPOINT;' + for eachdb in database_list: + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + eachdb, IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + + def execRollbackUpgradedCatalog(self, scriptType="rollback"): + """ + function : connect database and rollback/upgrade catalog one by one + 1.find a node that has dn instance + 2.scp sql files to that node + 3.send cmd to that node and exec + input : NA + output: NA + """ + self.context.logger.debug("Start to {0} catalog.".format(scriptType)) + try: + dnNodeName = self.dnInst.hostname + if dnNodeName == "": + raise Exception(ErrorCode.GAUSS_526["GAUSS_52602"]) + self.context.logger.debug("dn nodes is {0}".format(dnNodeName)) + # scp sql files to that node + maindb_sql = "%s/%s_catalog_maindb_tmp.sql" \ + % (self.context.upgradeBackupPath, scriptType) + otherdb_sql = "%s/%s_catalog_otherdb_tmp.sql" \ + % (self.context.upgradeBackupPath, scriptType) + if "upgrade" == scriptType: + check_upgrade_sql = \ + "%s/check_upgrade_tmp.sql" % self.context.upgradeBackupPath + if not os.path.isfile(check_upgrade_sql): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50210"] % check_upgrade_sql) + self.context.logger.debug("Scp {0} file to nodes {1}".format( + check_upgrade_sql, dnNodeName)) + g_OSlib.scpFile(dnNodeName, check_upgrade_sql, + self.context.upgradeBackupPath) + if not os.path.isfile(maindb_sql): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % maindb_sql) + if not os.path.isfile(otherdb_sql): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50210"] % otherdb_sql) + g_OSlib.scpFile(dnNodeName, maindb_sql, + self.context.upgradeBackupPath) + g_OSlib.scpFile(dnNodeName, otherdb_sql, + self.context.upgradeBackupPath) + self.context.logger.debug( + "Scp {0} file and {1} file to nodes {2}".format( + maindb_sql, otherdb_sql, dnNodeName)) + # send cmd to that node and exec + cmd = "%s -t %s -U %s --upgrade_bak_path=%s --script_type=%s -l " \ + "%s" % (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_UPDATE_CATALOG, + self.context.user, + self.context.upgradeBackupPath, + scriptType, + self.context.localLog) + self.context.logger.debug( + "Command for executing {0} catalog.".format(scriptType)) + DefaultValue.execCommandWithMode(cmd, + "{0} catalog".format(scriptType), + self.context.sshTool, + self.context.isSingle, + self.context.userProfile, + [dnNodeName]) + self.context.logger.debug( + "Successfully {0} catalog.".format(scriptType)) + except Exception as e: + self.context.logger.log("Failed to {0} catalog.".format(scriptType)) + if not self.context.forceRollback: + raise Exception(str(e)) + + def pgxcNodeUpdateLocalhost(self, mode): + """ + This function is used to modify the localhost of the system table + which pgxc_node + :param mode: + :return: + """ + try: + if int(float(self.context.newClusterNumber) * 1000) < 92069 or \ + int(float(self.context.oldClusterNumber) * 1000) >= 92069: + return + if mode == "upgrade": + self.context.logger.debug("Update localhost in pgxc_node.") + else: + self.context.logger.debug("Rollback localhost in pgxc_node.") + for dbNode in self.context.clusterInfo.dbNodes: + for dn in dbNode.datanodes: + sql = "START TRANSACTION;" + sql += "SET %s = on;" % Const.ON_INPLACE_UPGRADE + if mode == "upgrade": + sql += "UPDATE PGXC_NODE SET node_host = '%s', " \ + "node_host1 = '%s' WHERE node_host = " \ + "'localhost'; " % (dn.listenIps[0], + dn.listenIps[0]) + else: + sql += "UPDATE PGXC_NODE SET node_host = " \ + "'localhost', node_host1 = 'localhost' WHERE" \ + " node_type = 'C' and node_host = '%s';" %\ + (dn.listenIps[0]) + sql += "COMMIT;" + self.context.logger.debug("Current sql %s." % sql) + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, dn.hostname, dn.port, + False, DefaultValue.DEFAULT_DB_NAME, + IsInplaceUpgrade=True) + if status != 0: + if self.context.forceRollback: + self.context.logger.debug("In forceRollback, " + "roll back pgxc_node. " + "%s " % str(output)) + else: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] + % sql + " Error: \n%s" % + str(output)) + if mode == "upgrade": + self.context.logger.debug( + "Success update localhost in pgxc_node.") + else: + self.context.logger.debug( + "Success rollback localhost in pgxc_node.") + except Exception as e: + raise Exception(str(e)) + + def touchInitFile(self): + """ + function: touch upgrade init file for every primary/standby and + do pre-upgrade staffs + input : NA + output: NA + """ + try: + if self.isLargeInplaceUpgrade: + self.context.logger.debug("Start to create upgrade init file.") + # send cmd to all node and exec + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_TOUCH_INIT_FILE, + self.context.user, + self.context.upgradeBackupPath, + self.context.localLog) + DefaultValue.execCommandWithMode(cmd, + "create upgrade init file", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + self.context.logger.debug( + "Successfully created upgrade init file.") + except Exception as e: + raise Exception(str(e)) + + def prepareSql(self, mode="rollback"): + """ + function : prepare 4 files: rollback_catalog_maindb_tmp.sql, + rollback_catalog_otherdb_tmp.sql and upgrade file + 2.for each result file: filter all files and merge + into the *_tmp.sql file + + :param rollback: can be rollback or upgrade + """ + try: + self.prepareSqlForDb(mode) + self.prepareSqlForDb(mode, "otherdb") + if mode == "upgrade": + self.prepareCheckSql() + except Exception as e: + raise Exception("Failed to prepare %s sql file failed. ERROR: %s" + % (mode, str(e))) + + def prepareSqlForDb(self, mode, dbType="maindb"): + self.context.logger.debug( + "Start to prepare {0} sql files for {1}.".format(mode, dbType)) + header = self.getSqlHeader() + if "upgrade" in mode: + listName = "upgrade" + else: + listName = "rollback" + fileNameList = self.getFileNameList("{0}_catalog_{1}".format( + listName, dbType), mode) + if "rollback" in mode: + fileNameList.sort(reverse=True) + else: + fileNameList.sort() + fileName = "{0}_catalog_{1}_tmp.sql".format(mode, dbType) + self.context.logger.debug("The real file list for %s: %s" % ( + dbType, fileNameList)) + self.togetherFile(header, "{0}_catalog_{1}".format(listName, dbType), + fileNameList, fileName) + self.context.logger.debug("Successfully prepared sql files for %s." + % dbType) + + def prepareCheckSql(self): + header = ["START TRANSACTION;"] + fileNameList = self.getFileNameList("check_upgrade") + fileNameList.sort() + self.context.logger.debug("The real file list for checking upgrade: " + "%s" % fileNameList) + self.togetherFile(header, "check_upgrade", fileNameList, + "check_upgrade_tmp.sql") + + def togetherFile(self, header, filePathName, fileNameList, executeFileName): + writeFile = "" + try: + filePath = "%s/upgrade_sql/%s" % (self.context.upgradeBackupPath, + filePathName) + self.context.logger.debug("Preparing [%s]." % filePath) + writeFile = "%s/%s" % (self.context.upgradeBackupPath, + executeFileName) + g_file.createFile(writeFile) + g_file.writeFile(writeFile, header, 'w') + + with open(writeFile, 'a') as sqlFile: + for each_file in fileNameList: + each_file_with_path = "%s/%s" % (filePath, each_file) + self.context.logger.debug("Handling file: %s" % + each_file_with_path) + with open(each_file_with_path, 'r') as fp: + for line in fp: + sqlFile.write(line) + sqlFile.write(os.linesep) + g_file.writeFile(writeFile, ["COMMIT;"], 'a') + self.context.logger.debug( + "Success to together {0} file".format(writeFile)) + if not os.path.isfile(writeFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % writeFile) + except Exception as e: + raise Exception("Failed to write {0} sql file. ERROR: {1}".format( + writeFile, str(e))) + + def modifyPgProcIndex(self): + """ + 1. 执行重建pg_proc index 的sql + 2. make checkpoint + 3. stop cluster + 4. start cluster + :return: + """ + self.context.logger.debug("Begin to modify pg_proc index.") + time.sleep(3) + database_list = self.getDatabaseList() + # 执行重建pg_proc index 的sql + sql = """START TRANSACTION;SET IsInplaceUpgrade = on; + drop index pg_proc_oid_index;SET LOCAL + inplace_upgrade_next_system_object_oids=IUO_CATALOG,false, + true,0,0,0,2690;CREATE UNIQUE INDEX pg_proc_oid_index ON pg_proc + USING btree (oid);SET LOCAL + inplace_upgrade_next_system_object_oids=IUO_CATALOG,false, + true,0,0,0,0;commit;CHECKPOINT;""" + for eachdb in database_list: + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + eachdb, IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + sql = """START TRANSACTION;SET IsInplaceUpgrade = on; + drop index pg_proc_proname_args_nsp_index;SET LOCAL + inplace_upgrade_next_system_object_oids=IUO_CATALOG,false, + true,0,0,0,2691;create UNIQUE INDEX pg_proc_proname_args_nsp_index + ON pg_proc USING btree (proname, proargtypes, pronamespace);SET + LOCAL inplace_upgrade_next_system_object_oids=IUO_CATALOG,false, + true,0,0,0,0;commit;CHECKPOINT;""" + for eachdb in database_list: + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + eachdb, IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + # stop cluster + self.stopCluster() + # start cluster + self.startCluster() + self.context.logger.debug("Successfully modified pg_proc index.") + + def setNewVersionGuc(self): + """ + function: set new Version guc + input : NA + output : NA + """ + pass + + def setActionFile(self): + """ + set the action from step file, if not find, set it to large upgrade, + if the upgrade type is small upgrade, but we set it to large upgrade, + just kill the cm agent as expense, take no effect to transaction + But if the action should be large, we does not set the upgrade_mode, + some new feature will not opened + :return: NA + """ + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + self.context.logger.debug("Get the action from file %s." % stepFile) + if not (os.path.exists(stepFile) or os.path.isfile(stepFile)): + self.context.logger.debug("Step file does not exists or not file," + " cannot get action from it. " + "Set it to large upgrade.") + self.context.action = Const.ACTION_LARGE_UPGRADE + return + with open(stepFile, 'r') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + self.context.action = row['upgrade_action'] + break + self.context.logger.debug("Set the action to %s" + % self.context.action) + + def getClusterAppPath(self, mode=Const.OLD): + """ + if cannot get path from table, try to get from the backup file + :param mode: + :return: + """ + self.context.logger.debug("Get the install path from table or file.") + path = self.getClusterAppPathFromFile(mode) + return path + + def getClusterAppPathFromFile(self, mode=Const.OLD): + """ + get the app path from backup dir, mode is new or old, + :param mode: 'old', 'new' + :return: the real path of appPath + """ + dirFile = "%s/%s" % (self.context.upgradeBackupPath, + Const.RECORD_UPGRADE_DIR) + self.context.logger.debug("Get the %s app path from file %s" + % (mode, dirFile)) + if mode not in [Const.OLD, Const.NEW]: + raise Exception(traceback.format_exc()) + if not os.path.exists(dirFile): + self.context.logger.debug(ErrorCode.GAUSS_502["GAUSS_50201"] + % dirFile) + if self.checkBakPathNotExists(): + return "" + # copy the binary_upgrade dir from other node, + # if one node is damaged while binary_upgrade may disappear, + # user repair one node before commit, and send the commit + # command to the repair node, we need to copy the + # dir from remote node + cmd = "if [ -f '%s' ]; then echo 'GetFile';" \ + " else echo 'NoThisFile'; fi" % dirFile + self.context.logger.debug("Command for checking file: %s" % cmd) + (status, output) = self.context.sshTool.getSshStatusOutput( + cmd, self.context.clusterNodes, self.context.mpprcFile) + outputMap = self.context.sshTool.parseSshOutput( + self.context.clusterNodes) + self.context.logger.debug("Output: %s" % output) + copyNode = "" + for node in self.context.clusterNodes: + if status[node] == DefaultValue.SUCCESS: + if 'GetFile' in outputMap[node]: + copyNode = node + break + if copyNode: + if not os.path.exists(self.context.upgradeBackupPath): + self.context.logger.debug("Create directory %s." + % self.context.tmpDir) + g_file.createDirectory( + self.context.upgradeBackupPath, True, + DefaultValue.KEY_DIRECTORY_MODE) + self.context.logger.debug("Copy the directory %s from node %s." + % (self.context.upgradeBackupPath, + copyNode)) + cmd = g_Platform.getRemoteCopyCmd( + self.context.upgradeBackupPath, self.context.tmpDir, + str(copyNode), False, 'directory') + self.context.logger.debug("Command for copying " + "directory: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + else: + # binary_upgrade exists, but no step file + return "" + if not os.path.isfile(dirFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % dirFile) + with open(dirFile, 'r') as fp: + retLines = fp.readlines() + if len(retLines) != 2: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % dirFile) + if mode == Const.OLD: + path = retLines[0].strip() + else: + path = retLines[1].strip() + # if can get the path from file, the path must be valid, + # otherwise the file is damaged accidentally + DefaultValue.checkPathVaild(path) + if not os.path.exists(path): + if mode == Const.NEW and \ + self.context.action == Const.ACTION_AUTO_ROLLBACK: + self.context.logger.debug("Under rollback, the new " + "cluster app path does not exists.") + elif mode == Const.OLD and \ + self.context.action == Const.ACTION_COMMIT_UPGRADE: + self.context.logger.debug("Under commit, no need to " + "check the old path exists.") + else: + self.context.logger.debug(traceback.format_exc()) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + self.context.logger.debug("Successfully Get the app" + " path [%s] from file" % path) + return path + + def printPrecommitBanner(self): + """ + funcation: if in pre-commit status, and do not execute + the commit cmd, then can print this message + input : NA + output: NA + """ + self.context.logger.log("Upgrade main process has been finished," + " user can do some check now.") + self.context.logger.log("Once the check done, please execute " + "following command to commit upgrade:") + xmlFile = self.context.xmlFile \ + if len(self.context.xmlFile) else "XMLFILE" + self.context.logger.log("\n gs_upgradectl -t " + "commit-upgrade -X %s \n" % xmlFile) + + def doGreyCommitUpgrade(self): + """ + function: commit binary upgrade and clean up backup files + 1. unset read-only + 2. drop old PMK schema + 3. clean up other upgrade tmp files + input : NA + output: NA + """ + try: + (status, output) = self.doHealthCheck(Const.OPTION_POSTCHECK) + if status != 0: + raise Exception( + "NOTICE: " + ErrorCode.GAUSS_516[ + "GAUSS_51601"] % "cluster" + output) + if self.unSetClusterReadOnlyMode() != 0: + raise Exception("NOTICE: " + ErrorCode.GAUSS_529["GAUSS_52907"]) + + if not (self.isNodeSpecifyStep(GreyUpgradeStep.STEP_PRE_COMMIT) + or self.isNodeSpecifyStep( + GreyUpgradeStep.STEP_BEGIN_COMMIT)): + raise Exception(ErrorCode.GAUSS_529["GAUSS_52916"]) + # for the reenter commit, the schema may have been deleted + if self.existTable(Const.RECORD_NODE_STEP): + self.recordNodeStep(GreyUpgradeStep.STEP_BEGIN_COMMIT) + self.setActionFile() + # self.restoreOriginalState() + if self.context.action == Const.ACTION_LARGE_UPGRADE: + self.setUpgradeMode(0) + time.sleep(10) + if self.dropPMKSchema() != 0: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52917"]) + + self.clearOtherToolPackage() + self.cleanInstallPath(Const.OLD) + self.dropSupportSchema() + self.cleanBinaryUpgradeBakFiles() + self.cleanConfBakOld() + self.context.logger.log("Commit upgrade succeeded.") + except Exception as e: + self.exitWithRetCode(Const.ACTION_COMMIT_UPGRADE, False, str(e)) + self.exitWithRetCode(Const.ACTION_COMMIT_UPGRADE, True) + + def dropPMKSchema(self): + """ + function: Notice: the pmk schema on database postgres + input : NA + output: return 0, if the operation is done successfully. + return 1, if the operation failed. + """ + try: + self.context.logger.debug("Start to drop schema PMK.") + # execute drop commands by the CN instance + sql = "DROP SCHEMA IF EXISTS pmk CASCADE; " + retry_times = 0 + while True: + (status, output) = self.execSqlCommandInPrimaryDN(sql) + if status != 0 or ClusterCommand.findErrorInSql(output): + if retry_times < 12: + self.context.logger.debug( + "ERROR: Failed to DROP SCHEMA pmk for the %d time." + " Error: \n%s" % (retry_times + 1, str(output))) + else: + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + else: + break + + time.sleep(5) + retry_times += 1 + self.context.logger.debug("Succcessfully deleted schema PMK.") + return 0 + except Exception as e: + self.context.logger.log( + "NOTICE: Failed to execute SQL command on CN instance, " + + "please re-commit upgrade once again or " + + "re-execute SQL command 'DROP SCHEMA " + "IF EXISTS pmk CASCADE' manually.") + self.context.logger.debug(str(e)) + return 1 + + def cleanConfBakOld(self): + """ + clean conf.bak.old files in all instances + input : NA + output : NA + """ + try: + cmd = "%s -t %s -U %s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_CLEAN_CONF_BAK_OLD, + self.context.user, + self.context.localLog) + hostList = copy.deepcopy(self.context.nodeNames) + self.context.sshTool.executeCommand(cmd, "", hostList=hostList) + except Exception as e: + raise Exception(str(e)) + self.context.logger.debug( + "Successfully cleaned conf.bak.old in all instances.") + + def doGreyBinaryRollback(self, action=""): + """ + function: rollback the upgrade of binary + input : NA + output: return True, if the operation is done successfully. + return False, if the operation failed. + """ + self.context.logger.log("Performing grey rollback.") + # before prepare upgrade function and table or after commit, + # table does not exist means not rollback + # if we read the step for file, means we have force to rollback, + # the record in table is not same with file + # we can only read the step from file + try: + self.distributeXml() + if action == Const.ACTION_AUTO_ROLLBACK: + self.clearOtherToolPackage(action) + try: + self.getOneDNInst(True) + except Exception as e: + # don't promise DN is available in force rollback + if self.context.forceRollback: + self.context.logger.debug("Error: %s" % str(e)) + else: + raise Exception(str(e)) + # if the cluster is degrade and cn is down, + # the set command will be False, ignore the error + if self.unSetClusterReadOnlyMode() != 0: + self.context.logger.log( + "WARNING: Failed to unset cluster read only mode.") + if self.context.forceRollback: + # if one node is uninstalled, + # there will be no binary_upgrade dir + self.createBakPath() + self.setReadStepFromFile() + self.createGphomePack() + # first time user may use forcerollback, but next time user may + # not use force rollback, so the step file and step + # table is not same, so we can only read step from file, + # consider if need to sync them, not important + # under force upgrade, only read step from file + maxStep = self.getNodeStep() + # if -2, it means there is no need to exec rollback + # if under upgrade continue mode, it will do upgrade not rollback, + # it can enter the upgrade process + # when the binary_upgrade bak dir has some files + if maxStep == Const.BINARY_UPGRADE_NO_NEED_ROLLBACK: + self.cleanBinaryUpgradeBakFiles(True) + self.context.logger.log("No need to rollback.") + return True + + elif maxStep == GreyUpgradeStep.STEP_BEGIN_COMMIT: + self.context.logger.log( + ErrorCode.GAUSS_529["GAUSS_52919"] + + " Please commit again! Can not rollback any more.") + return False + + # Mark that we leave pre commit status, + # so that if we fail at the first few steps, + # we won't be allowed to commit upgrade any more. + elif maxStep == GreyUpgradeStep.STEP_PRE_COMMIT: + nodes = self.getNodesWithStep(maxStep) + self.recordNodeStep( + GreyUpgradeStep.STEP_UPDATE_POST_CATALOG, nodes) + maxStep = self.getNodeStep() + if maxStep == GreyUpgradeStep.STEP_UPDATE_POST_CATALOG: + self.context.logger.debug( + "Record the step %d to mark it has leaved pre-commit" + " status." % GreyUpgradeStep.STEP_UPDATE_POST_CATALOG) + try: + if self.context.action == Const.ACTION_LARGE_UPGRADE\ + and \ + self.isNodeSpecifyStep( + GreyUpgradeStep.STEP_UPDATE_POST_CATALOG): + self.prepareUpgradeSqlFolder() + self.prepareSql("rollback-post") + self.setUpgradeMode(2) + self.execRollbackUpgradedCatalog( + scriptType="rollback-post") + except Exception as e: + if self.context.forceRollback: + self.context.logger.debug("Error: %s" % str(e)) + else: + raise Exception(str(e)) + nodes = self.getNodesWithStep(maxStep) + self.recordNodeStep(GreyUpgradeStep.STEP_UPGRADE_PROCESS, nodes) + # rollback the nodes from maxStep, each node do its rollback + needSwitchProcess = False + if maxStep >= GreyUpgradeStep.STEP_UPGRADE_PROCESS: + needSwitchProcess = True + + if maxStep >= GreyUpgradeStep.STEP_SWITCH_NEW_BIN: + self.greyRestoreConfig() + self.switchBin(Const.OLD) + self.greyRestoreGuc() + if needSwitchProcess: + self.rollbackHotpatch() + self.switchExistsProcess(True) + self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_CATALOG) + if maxStep >= GreyUpgradeStep.STEP_UPDATE_CATALOG and\ + self.context.action == Const.ACTION_LARGE_UPGRADE: + self.rollbackCatalog() + self.recordNodeStep(GreyUpgradeStep.STEP_INIT_STATUS) + + if maxStep >= GreyUpgradeStep.STEP_INIT_STATUS: + # clean on all the node, because the binary_upgrade temp + # dir will create in every node + self.cleanInstallPath(Const.NEW) + self.dropSupportSchema() + self.initOmRollbackProgressFile() + self.cleanBinaryUpgradeBakFiles(True) + except Exception as e: + self.context.logger.debug(str(e)) + self.context.logger.debug(traceback.format_exc()) + self.context.logger.log("Rollback failed. Error: %s" % str(e)) + return False + self.context.logger.log("Rollback succeeded.") + return True + + def setReadStepFromFile(self): + readFromFileFlag = os.path.join(self.context.upgradeBackupPath, + Const.READ_STEP_FROM_FILE_FLAG) + self.context.logger.debug("Under force rollback mode.") + g_file.createFile(readFromFileFlag, True, DefaultValue.KEY_FILE_MODE) + self.distributeFile(readFromFileFlag) + self.context.logger.debug("Create file %s. " % readFromFileFlag + + "Only read step from file.") + + def getNodeStep(self): + """ + get node step from file or tacle + """ + maxStep = self.getNodeStepFile() + return maxStep + + def getNodeStepFile(self): + if not os.path.exists(self.context.upgradeBackupPath): + self.context.logger.debug("Directory %s does not exist. " + "Only clean remaining files and schema." + % self.context.upgradeBackupPath) + return Const.BINARY_UPGRADE_NO_NEED_ROLLBACK + if not os.path.isdir(self.context.upgradeBackupPath): + raise Exception(ErrorCode.GAUSS_513["GAUSS_50211"] % + self.context.upgradeBackupPath) + # because the binary_upgrade dir is used to block expand, + # so we should clean the dir when rollback + fileList = os.listdir(self.context.upgradeBackupPath) + if not fileList: + return GreyUpgradeStep.STEP_INIT_STATUS + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + if not os.path.exists(stepFile): + self.context.logger.debug( + "No need to rollback. File %s does not exist." % stepFile) + return Const.BINARY_UPGRADE_NO_NEED_ROLLBACK + + self.context.logger.debug("Get the node step from file %s." % stepFile) + with open(stepFile, 'r') as csvfile: + reader = csv.DictReader(csvfile) + maxStep = Const.INVALID_UPRADE_STEP + for row in reader: + self.checkStep(row['step']) + maxStep = max(int(row['step']), maxStep) + if row['upgrade_action'] != self.context.action: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % + stepFile + + "\nIncorrect upgrade strategy, input " + "upgrade type: %s; record upgrade type: %s" + % (self.context.action, + row['upgrade_action'])) + self.context.logger.debug("Get the max step [%d] from file." % maxStep) + self.context.logger.debug( + "Successfully get the node step from file %s." % stepFile) + return maxStep + + def checkActionInTableOrFile(self): + """ + under force upgrade, step file and table may not be coincident. + So we only use step file + """ + self.checkActionInFile() + + def execSqlCommandInPrimaryDN(self, sql, retryTime=3): + self.context.logger.debug("Start to exec sql {0}.".format(sql)) + count = 0 + status, output = 1, "" + while count < retryTime: + self.getOneDNInst(checkNormal=True) + self.context.logger.debug( + "Exec sql in dn node {0}".format(self.dnInst.hostname)) + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True) + self.context.logger.debug( + "Exec sql result is, status:{0}, output is {1}".format( + status, output)) + if status != 0 or ClusterCommand.findErrorInSql(output): + count += 1 + continue + else: + break + return status, output + + def checkActionInFile(self): + """ + function: check whether current action is same + with record action in file + input : NA + output: NA + """ + try: + self.context.logger.debug("Check the action in file.") + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + if not os.path.isfile(stepFile): + self.context.logger.debug( + ErrorCode.GAUSS_502["GAUSS_50201"] % (stepFile)) + return + + with open(stepFile, 'r') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + upgrade_action = row['upgrade_action'] + if self.context.action != upgrade_action: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52925"] % ( + self.context.action, upgrade_action)) + self.context.logger.debug("Successfully check the action in file.") + return + except Exception as e: + self.context.logger.debug("Failed to check action in table.") + raise Exception(str(e)) + + def getNodesWithStep(self, step): + """ + get nodes with the given step from step file or table + """ + nodes = self.getNodesWithStepFile(step) + return nodes + + def getNodesWithStepFile(self, step): + """ + get nodes with the given step from file upgrade_step.csv + """ + stepFile = os.path.join(self.context.upgradeBackupPath, + Const.GREY_UPGRADE_STEP_FILE) + self.context.logger.debug("Get the node step from file %s." % stepFile) + nodes = [] + with open(stepFile, 'r') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if not row['step'].isdigit(): + raise Exception(ErrorCode.GAUSS_529["GAUSS_52926"]) + if int(row['step']) == step: + nodes.append(row['node_host']) + self.context.logger.debug("Nodes %s is step %d" % (nodes, step)) + return nodes + + def greyRestoreConfig(self): + """ + deal with the lib/postgresql/pg_plugin + Under rollback, we will use new pg_plugin dir as base, the file in + new dir but not in old dir will be moved to old dir considering add + the C function, and remove from old dir considering drop the C function + copy the config from new dir to old dir if the config may change + by user action + """ + + cmd = "%s -t %s -U %s --old_cluster_app_path=%s " \ + "--new_cluster_app_path=%s -l %s" % ( + OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_GREY_RESTORE_CONFIG, + self.context.user, + self.context.oldClusterAppPath, + self.context.newClusterAppPath, + self.context.localLog) + if self.context.forceRollback: + cmd += " --force" + self.context.logger.debug("Command for restoring config: %s" % cmd) + rollbackList = copy.deepcopy(self.context.clusterNodes) + self.context.sshTool.executeCommand(cmd, "", hostList=rollbackList) + self.context.logger.debug("Successfully restore config.") + + def greyRestoreGuc(self): + """ + restore the old guc in rollback + :return: NA + """ + cmd = "%s -t %s -U %s --old_cluster_app_path=%s -X %s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_GREY_RESTORE_GUC, + self.context.user, + self.context.oldClusterAppPath, + self.context.xmlFile, + self.context.localLog) + if self.context.forceRollback: + cmd += " --force" + self.context.logger.debug("Command for restoring GUC: %s" % cmd) + rollbackList = copy.deepcopy(self.context.clusterNodes) + self.context.sshTool.executeCommand(cmd, "", hostList=rollbackList) + self.context.logger.debug("Successfully restore guc.") + + def dropSupportSchema(self): + self.context.logger.debug("Drop schema.") + sql = "DROP SCHEMA IF EXISTS %s CASCADE;" % Const.UPGRADE_SCHEMA + retryTime = 0 + try: + while retryTime < 5: + (status, output) = self.execSqlCommandInPrimaryDN(sql) + if status != 0 or ClusterCommand.findErrorInSql(output): + retryTime += 1 + self.context.logger.debug( + "Failed to execute SQL: %s. Error: \n%s. retry" % ( + sql, str(output))) + else: + break + if status != 0 or ClusterCommand.findErrorInSql(output): + self.context.logger.debug( + "Failed to execute SQL: %s. Error: \n%s" % ( + sql, str(output))) + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Please drop manually with this command.") + self.context.logger.debug("Successfully drop schema %s cascade." % + Const.UPGRADE_SCHEMA) + except Exception as e: + if self.context.forceRollback: + self.context.logger.log( + "Failed to drop schema. Please drop manually " + "with this command: \n %s" % sql) + else: + raise Exception(str(e)) + + def doInplaceBinaryRollback(self): + """ + function: rollback the upgrade of binary + input : NA + output: return True, if the operation is done successfully. + return False, if the operation failed. + """ + self.context.logger.log("Performing inplace rollback.") + # step flag + # Const.BINARY_UPGRADE_NO_NEED_ROLLBACK value is -2 + # Const.INVALID_UPRADE_STEP value is -1 + # Const.BINARY_UPGRADE_STEP_INIT_STATUS value is 0 + # Const.BINARY_UPGRADE_STEP_BACKUP_STATUS value is 1 + # Const.BINARY_UPGRADE_STEP_STOP_NODE value is 2 + # Const.BINARY_UPGRADE_STEP_BACKUP_VERSION value is 3 + # Const.BINARY_UPGRADE_STEP_UPGRADE_APP value is 4 + # Const.BINARY_UPGRADE_STEP_START_NODE value is 5 + # Const.BINARY_UPGRADE_STEP_PRE_COMMIT value is 6 + self.distributeXml() + step = self.getNodeStepInplace() + if step == Const.BINARY_UPGRADE_NO_NEED_ROLLBACK: + self.context.logger.log("Rollback succeeded.") + return True + + # if step <= -1, it means the step file is broken, exit. + if step <= Const.INVALID_UPRADE_STEP: + self.context.logger.debug("Invalid upgrade step: %s." % str(step)) + return False + + # if step value is Const.BINARY_UPGRADE_STEP_PRE_COMMIT + # and find commit flag file, + # means user has commit upgrade, then can not do rollback + if step == Const.BINARY_UPGRADE_STEP_PRE_COMMIT: + if not self.checkCommitFlagFile(): + self.context.logger.log( + "Upgrade has already been committed, " + "can not execute rollback command any more.") + return False + + try: + self.checkStaticConfig() + self.startCluster() + # Mark that we leave pre commit status, + # so that if we fail at the first few steps, + # we won't be allowed to commit upgrade any more. + if step == Const.BINARY_UPGRADE_STEP_PRE_COMMIT: + self.recordNodeStepInplace( + Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_START_NODE) + + if step >= Const.BINARY_UPGRADE_STEP_START_NODE: + # drop table and index after large upgrade + if self.isLargeInplaceUpgrade: + if self.check_upgrade_mode(): + self.drop_table_or_index() + self.restoreClusterConfig(True) + self.switchBin(Const.OLD) + if self.isLargeInplaceUpgrade: + touchInitFlagFile = os.path.join( + self.context.upgradeBackupPath, "touch_init_flag") + if os.path.exists(touchInitFlagFile): + self.rollbackCatalog() + self.cleanCsvFile() + else: + self.setUpgradeMode(0) + else: + self.stopCluster() + self.recordNodeStepInplace( + Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_UPGRADE_APP) + + if step >= Const.BINARY_UPGRADE_STEP_UPGRADE_APP: + self.restoreNodeVersion() + self.restoreClusterConfig(True) + self.recordNodeStepInplace( + Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_BACKUP_VERSION) + + if step >= Const.BINARY_UPGRADE_STEP_BACKUP_VERSION: + self.cleanBackupedCatalogPhysicalFiles(True) + self.recordNodeStepInplace( + Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_STOP_NODE) + + if step >= Const.BINARY_UPGRADE_STEP_STOP_NODE: + self.startCluster() + self.recordNodeStepInplace( + Const.ACTION_INPLACE_UPGRADE, + Const.BINARY_UPGRADE_STEP_INIT_STATUS) + + if step >= Const.BINARY_UPGRADE_STEP_INIT_STATUS: + if self.unSetClusterReadOnlyMode() != 0: + raise Exception("NOTICE: " + + ErrorCode.GAUSS_529["GAUSS_52907"]) + self.cleanBinaryUpgradeBakFiles(True) + self.cleanInstallPath(Const.NEW) + # install kerberos + self.install_kerberos() + except Exception as e: + self.context.logger.error(str(e)) + self.context.logger.log("Rollback failed.") + return False + + self.context.logger.log("Rollback succeeded.") + return True + + def check_table_or_index_exist(self, name, eachdb): + """ + check a table exist + :return: + """ + sql = "select count(*) from pg_class where relname = '%s';" % name + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + eachdb, IsInplaceUpgrade=True) + if status != 0 or ClusterCommand.findErrorInSql(output): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + if output == '0': + self.context.logger.debug("Table does not exist.") + return False + self.context.logger.debug("Table exists.") + return True + + def drop_table_or_index(self): + """ + drop a table + :return: + """ + self.context.logger.debug("Start to drop table or index") + database_list = self.getDatabaseList() + # drop table and index + maindb = "postgres" + otherdbs = database_list + otherdbs.remove("postgres") + # check table exist in postgres + table_name = 'pg_proc_temp_oids' + if self.check_table_or_index_exist(table_name, maindb): + self.drop_one_database_table_or_index([maindb]) + else: + return + # drop other database table and index + self.drop_one_database_table_or_index(otherdbs) + self.context.logger.debug( + "Successfully droped table or index.") + + def drop_one_database_table_or_index(self, + database_list): + """ + drop a table in one database + :return: + """ + table_name = 'pg_proc_temp_oids' + delete_table_sql = "START TRANSACTION;SET IsInplaceUpgrade = on;" \ + "drop table %s;commit;" % table_name + index_name_list = ['pg_proc_oid_index_temp', + 'pg_proc_proname_args_nsp_index_temp'] + for eachdb in database_list: + if self.check_table_or_index_exist(table_name, eachdb): + (status, output) = ClusterCommand.remoteSQLCommand( + delete_table_sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + eachdb, IsInplaceUpgrade=True) + if status != 0: + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] % delete_table_sql + + " Error: \n%s" % str(output)) + for index in index_name_list: + if self.check_table_or_index_exist(index, eachdb): + sql = "START TRANSACTION;SET IsInplaceUpgrade = on;" \ + "drop index %s;commit;" % index + (status, output) = ClusterCommand.remoteSQLCommand( + sql, self.context.user, + self.dnInst.hostname, self.dnInst.port, False, + eachdb, IsInplaceUpgrade=True) + if status != 0: + raise Exception( + ErrorCode.GAUSS_513[ + "GAUSS_51300"] % sql + " Error: \n%s" % str( + output)) + + def rollbackCatalog(self): + """ + function: rollback catalog change + steps: + 1.prepare update sql file and check sql file + 2.do rollback catalog + input : NA + output: NA + """ + try: + if self.context.action == Const.ACTION_INPLACE_UPGRADE and int( + float(self.context.oldClusterNumber) * 1000) <= 93000: + raise Exception("For this old version %s, we only support " + "physical rollback." % str( + self.context.oldClusterNumber)) + self.context.logger.log("Rollbacking catalog.") + self.prepareUpgradeSqlFolder() + self.prepareSql() + self.doRollbackCatalog() + self.context.logger.log("Successfully Rollbacked catalog.") + except Exception as e: + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + self.context.logger.debug( + "Failed to perform rollback operation by rolling " + "back SQL files:\n%s" % str(e)) + try: + self.context.logger.debug("Try to recover again using " + "catalog physical files") + self.doPhysicalRollbackCatalog() + except Exception as e: + raise Exception( + "Failed to rollback catalog. ERROR: %s" % str(e)) + else: + raise Exception( + "Failed to rollback catalog. ERROR: %s" % str(e)) + + + def doRollbackCatalog(self): + """ + function : rollback catalog change + steps: + stop cluster + set upgrade_from param + start cluster + connect database and rollback catalog changes one by one + stop cluster + unset upgrade_from param + input : NA + output: NA + """ + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + self.startCluster() + self.setUpgradeMode(1) + else: + self.setUpgradeMode(2) + self.execRollbackUpgradedCatalog(scriptType="rollback") + self.pgxcNodeUpdateLocalhost("rollback") + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + self.stopCluster() + self.setUpgradeMode(0) + + def doPhysicalRollbackCatalog(self): + """ + function : rollback catalog by restore physical files + stop cluster + unset upgrade_from param + restore physical files + input : NA + output: NA + """ + try: + self.startCluster() + self.setUpgradeMode(0) + self.stopCluster() + self.execPhysicalRollbackUpgradedCatalog() + except Exception as e: + raise Exception(str(e)) + + def execPhysicalRollbackUpgradedCatalog(self): + """ + function : rollback catalog by restore physical files + send cmd to all node + input : NA + output: NA + """ + try: + if self.isLargeInplaceUpgrade: + self.context.logger.debug( + "Start to restore physical catalog files.") + # send cmd to all node and exec + cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \ + "--oldcluster_num='%s' -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_RESTORE_OLD_CLUSTER_CATALOG_PHYSICAL_FILES, + self.context.user, + self.context.upgradeBackupPath, + self.context.oldClusterNumber, + self.context.localLog) + self.context.logger.debug( + "Command for restoring physical catalog files: %s." % cmd) + DefaultValue.execCommandWithMode( + cmd, + "restore physical files of catalog objects", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + self.context.logger.debug( + "Successfully restored physical catalog files.") + except Exception as e: + raise Exception(str(e)) + + def getSqlHeader(self): + """ + function: get sql header + input : NA + output : NA + """ + header = ["START TRANSACTION;"] + header.append("SET %s = on;" % Const.ON_INPLACE_UPGRADE) + header.append("SET search_path = 'pg_catalog';") + header.append("SET local client_min_messages = NOTICE;") + header.append("SET local log_min_messages = NOTICE;") + return header + + def getFileNameList(self, filePathName, scriptType="_"): + """ + function: get file name list + input : filePathName + output : [] + """ + filePath = "%s/upgrade_sql/%s" % (self.context.upgradeBackupPath, + filePathName) + allFileList = os.listdir(filePath) + upgradeFileList = [] + if len(allFileList) == 0: + return [] + for each_sql_file in allFileList: + if not os.path.isfile("%s/%s" % (filePath, each_sql_file)): + continue + prefix = each_sql_file.split('.')[0] + resList = prefix.split('_') + if len(resList) != 5 or scriptType not in resList: + continue + file_num = "%s.%s" % (resList[3], resList[4]) + if self.floatMoreThan(float(file_num), + self.context.oldClusterNumber) and \ + self.floatGreaterOrEqualTo(self.context.newClusterNumber, + float(file_num)): + upgradeFileList.append(each_sql_file) + return upgradeFileList + + def initClusterInfo(self, dbClusterInfoPath): + """ + function: init the cluster + input : dbClusterInfoPath + output: dbClusterInfo + """ + clusterInfoModules = OldVersionModules() + fileDir = os.path.dirname(os.path.realpath(dbClusterInfoPath)) + sys.path.insert(0, fileDir) + # init cluster information + clusterInfoModules.oldDbClusterInfoModule = __import__('DbClusterInfo') + sys.path.remove(fileDir) + return clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo() + + def initOldClusterInfo(self, dbClusterInfoPath): + """ + function: init old cluster information + input : dbClusterInfoPath + output: clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo() + """ + clusterInfoModules = OldVersionModules() + fileDir = os.path.dirname(os.path.realpath(dbClusterInfoPath)) + # script and OldDbClusterInfo.py are in the same PGHOST directory + sys.path.insert(0, fileDir) + # V1R8 DbClusterInfo.py is "from gspylib.common.ErrorCode import + # ErrorCode" + sys.path.insert(0, os.path.join(fileDir, "script")) + # init old cluster information + clusterInfoModules.oldDbClusterInfoModule = \ + __import__('OldDbClusterInfo') + return clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo() + + def initClusterConfig(self): + """ + function: init cluster info + input : NA + output: NA + """ + gaussHome = \ + DefaultValue.getEnvironmentParameterValue("GAUSSHOME", + self.context.user) + # $GAUSSHOME must has available value. + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") + (appPath, appPathName) = os.path.split(gaussHome) + commonDbClusterInfoModule = \ + "%s/bin/script/gspylib/common/DbClusterInfo.py" % gaussHome + commonStaticConfigFile = "%s/bin/cluster_static_config" % gaussHome + try: + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + + # get DbClusterInfo.py and cluster_static_config both of backup + # path and install path + # get oldClusterInfo + # if the backup file exists, we use them; + # if the install file exists, we use them; + # else, we can not get oldClusterInfo, exit. + # backup path exists + commonDbClusterInfoModuleBak = "%s/../OldDbClusterInfo.py" % \ + self.context.upgradeBackupPath + commonStaticConfigFileBak = "%s/../cluster_static_config" % \ + self.context.upgradeBackupPath + + # if binary.tar exist, decompress it + if os.path.isfile("%s/%s" % (self.context.upgradeBackupPath, + self.context.binTarName)): + cmd = "cd '%s'&&tar xfp '%s'" % \ + (self.context.upgradeBackupPath, + self.context.binTarName) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd + "Error: \n%s" % str(output)) + + if (os.path.isfile(commonDbClusterInfoModuleBak) + and os.path.isfile(commonStaticConfigFileBak)): + try: + # import old module + # init old cluster config + self.context.oldClusterInfo = \ + self.initOldClusterInfo( + commonDbClusterInfoModuleBak) + self.context.oldClusterInfo.initFromStaticConfig( + self.context.user, commonStaticConfigFileBak) + except Exception as e: + # maybe the old cluster is V1R5C00 TR5 version, not + # support specify static config file + # path for initFromStaticConfig function, + # so use new cluster format try again + self.context.oldClusterInfo = dbClusterInfo() + self.context.oldClusterInfo.initFromStaticConfig( + self.context.user, commonStaticConfigFileBak) + # if backup path not exist, then use install path + elif (os.path.isfile(commonDbClusterInfoModule) + and os.path.isfile(commonStaticConfigFile)): + # import old module + # init old cluster config + self.context.oldClusterInfo = \ + self.initClusterInfo(commonDbClusterInfoModule) + self.context.oldClusterInfo.initFromStaticConfig( + self.context.user, commonStaticConfigFile) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + "static config file") + + # get the accurate logPath + logPathWithUser = DefaultValue.getEnv("GAUSSLOG") + DefaultValue.checkPathVaild(logPathWithUser) + splitMark = "/%s" % self.context.user + self.context.oldClusterInfo.logPath = \ + logPathWithUser[0:(logPathWithUser.rfind(splitMark))] + + # init new cluster config + # if xmlFile != "", init it by initFromXml(); + # else, using oldClusterInfo + if self.context.xmlFile != "": + # get clusterInfo + # if falied to do dbClusterInfo, it means the + # DbClusterInfo.py is not correct + # we will use the backup file to instead of it + self.context.clusterInfo = dbClusterInfo() + try: + self.context.clusterInfo.initFromXml( + self.context.xmlFile) + except Exception as e: + self.context.logger.error(str(e)) + try: + # init clusterinfo from backup dbclusterinfo + self.context.clusterInfo = \ + self.initOldClusterInfo( + commonDbClusterInfoModuleBak) + self.context.clusterInfo.initFromXml( + self.context.xmlFile) + except Exception as e: + try: + self.context.clusterInfo = \ + self.initClusterInfo( + commonDbClusterInfoModule) + self.context.clusterInfo.initFromXml( + self.context.xmlFile) + except Exception as e: + raise Exception(str(e)) + # verify cluster config info between old and new cluster + self.verifyClusterConfigInfo(self.context.clusterInfo, + self.context.oldClusterInfo) + # after doing verifyClusterConfigInfo(), + # the clusterInfo and oldClusterInfo are be changed, + # so we should do init it again + self.context.clusterInfo = dbClusterInfo() + try: + self.context.clusterInfo.initFromXml( + self.context.xmlFile) + except Exception as e: + self.context.logger.debug(str(e)) + try: + # init clusterinfo from backup dbclusterinfo + self.context.clusterInfo = \ + self.initOldClusterInfo( + commonDbClusterInfoModuleBak) + self.context.clusterInfo.initFromXml( + self.context.xmlFile) + except Exception as e: + try: + self.context.clusterInfo = \ + self.initClusterInfo( + commonDbClusterInfoModule) + self.context.clusterInfo.initFromXml( + self.context.xmlFile) + except Exception as e: + raise Exception(str(e)) + else: + self.context.clusterInfo = self.context.oldClusterInfo + elif (self.context.action == Const.ACTION_CHOSE_STRATEGY + or self.context.action == Const.ACTION_COMMIT_UPGRADE): + # after switch to new bin, the gausshome points to newversion, + # so the oldClusterNumber is same with + # newClusterNumber, the oldClusterInfo is same with new + try: + self.context.oldClusterInfo = self.context.clusterInfo + self.getOneDNInst(True) + if os.path.isfile(commonDbClusterInfoModule) and \ + os.path.isfile(commonStaticConfigFile): + # import old module + # init old cluster config + self.context.oldClusterInfo = \ + self.initClusterInfo(commonDbClusterInfoModule) + self.context.oldClusterInfo.initFromStaticConfig( + self.context.user, commonStaticConfigFile) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + "static config file") + except Exception as e: + # upgrade backup path + if (os.path.exists( + "%s/%s/bin/script/util/DbClusterInfo.py" % ( + self.context.upgradeBackupPath, appPathName))): + binaryModuleBak = \ + "%s/%s/bin/script/util/DbClusterInfo.py" % \ + (self.context.upgradeBackupPath, appPathName) + else: + binaryModuleBak = \ + "%s/%s/bin/script/gspylib/common/" \ + "DbClusterInfo.py" % \ + (self.context.upgradeBackupPath, appPathName) + binaryStaticConfigFileBak = \ + "%s/%s/bin/cluster_static_config" % \ + (self.context.upgradeBackupPath, appPathName) + + if os.path.isfile(binaryModuleBak) and \ + os.path.isfile(binaryStaticConfigFileBak): + # import old module + # init old cluster config + commonDbClusterInfoModuleBak = \ + "%s/../OldDbClusterInfo.py" % \ + self.context.upgradeBackupPath + self.context.oldClusterInfo = \ + self.initOldClusterInfo( + commonDbClusterInfoModuleBak) + self.context.oldClusterInfo.initFromStaticConfig( + self.context.user, binaryStaticConfigFileBak) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + "static config file") + elif (self.context.action in + [Const.ACTION_SMALL_UPGRADE, Const.ACTION_AUTO_UPGRADE, + Const.ACTION_LARGE_UPGRADE, Const.ACTION_AUTO_ROLLBACK]): + # 1. get new cluster info + self.context.clusterInfo = dbClusterInfo() + self.context.clusterInfo.initFromXml(self.context.xmlFile) + # 2. get oldClusterInfo + # when under rollback + # the gausshome may point to old or new clusterAppPath, + # so we must choose from the record table + # when upgrade abnormal nodes, the gausshome points to + # newClusterAppPath + + oldPath = self.getClusterAppPath() + if oldPath != "" and os.path.exists(oldPath): + self.context.logger.debug("The old install path is %s" % + oldPath) + commonDbClusterInfoModule = \ + "%s/bin/script/gspylib/common/DbClusterInfo.py" % \ + oldPath + commonStaticConfigFile = \ + "%s/bin/cluster_static_config" % oldPath + else: + self.context.logger.debug("The old install path is %s" + % os.path.realpath(gaussHome)) + if (os.path.isfile(commonDbClusterInfoModule) + and os.path.isfile(commonStaticConfigFile)): + # import old module + # init old cluster config + self.context.oldClusterInfo = \ + self.initClusterInfo(commonDbClusterInfoModule) + self.context.oldClusterInfo.initFromStaticConfig( + self.context.user, commonStaticConfigFile) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + "static config file") + + staticClusterInfo = dbClusterInfo() + config = os.path.join(gaussHome, "bin/cluster_static_config") + if not os.path.isfile(config): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + os.path.realpath(config)) + staticClusterInfo.initFromStaticConfig(self.context.user, + config) + + # verify cluster config info between old and new cluster + self.verifyClusterConfigInfo(self.context.clusterInfo, + staticClusterInfo) + # after doing verifyClusterConfigInfo(), the clusterInfo and + # oldClusterInfo are be changed, + # so we should do init it again + self.context.clusterInfo = dbClusterInfo() + # we will get the self.context.newClusterAppPath in + # choseStrategy + self.context.clusterInfo.initFromXml(self.context.xmlFile) + if self.context.is_inplace_upgrade or \ + self.context.action == Const.ACTION_AUTO_ROLLBACK: + self.getOneDNInst() + self.context.logger.debug("Successfully init cluster config.") + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' + + " Value: %s" % self.context.action) + + # judgment has installed kerberos before action_inplace_upgrade + self.context.logger.debug( + "judgment has installed kerberos before action_inplace_upgrade") + xmlfile = os.path.join(os.path.dirname(self.context.userProfile), + DefaultValue.FI_KRB_XML) + if os.path.exists(xmlfile) and \ + self.context.action == Const.ACTION_AUTO_UPGRADE \ + and self.context.is_grey_upgrade: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50200"] % "kerberos") + if os.path.exists(xmlfile) and self.context.is_inplace_upgrade: + pghost_path = DefaultValue.getEnvironmentParameterValue( + 'PGHOST', self.context.user) + destfile = "%s/krb5.conf" % os.path.dirname( + self.context.userProfile) + kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path + cmd = "cp -rf %s %s " % (destfile, kerberosflagfile) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 5) + if status != 0: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50206"] % kerberosflagfile + + " Error: \n%s" % output) + self.context.logger.debug( + "Successful back up kerberos config file.") + except Exception as e: + self.context.logger.debug(traceback.format_exc()) + self.exitWithRetCode(self.context.action, False, str(e)) + + def getOneDNInst(self, checkNormal=False): + """ + function: find a dn instance by dbNodes, + which we can execute SQL commands + input : NA + output: DN instance + """ + try: + self.context.logger.debug( + "Get one DN. CheckNormal is %s" % checkNormal) + dnInst = None + clusterNodes = self.context.oldClusterInfo.dbNodes + primaryDnNode, output = DefaultValue.getPrimaryNode( + self.context.userProfile) + self.context.logger.debug( + "Cluster status information is %s;The primaryDnNode is %s" % ( + output, primaryDnNode)) + for dbNode in clusterNodes: + if len(dbNode.datanodes) == 0: + continue + dnInst = dbNode.datanodes[0] + if dnInst.hostname not in primaryDnNode: + continue + break + + if checkNormal: + (checkStatus, checkResult) = OMCommand.doCheckStaus( + self.context.user, 0) + if checkStatus == 0: + self.context.logger.debug("The cluster status is normal," + " no need to check dn status.") + else: + clusterStatus = \ + OMCommand.getClusterStatus(self.context.user) + if clusterStatus is None: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51600"]) + clusterInfo = dbClusterInfo() + clusterInfo.initFromXml(self.context.xmlFile) + clusterInfo.dbNodes.extend(clusterNodes) + for dbNode in clusterInfo.dbNodes: + if len(dbNode.datanodes) == 0: + continue + dn = dbNode.datanodes[0] + if dn.hostname not in primaryDnNode: + continue + dbInst = clusterStatus.getInstanceStatusById( + dn.instanceId) + if dbInst is None: + continue + if dbInst.status == "Normal": + self.context.logger.debug( + "DN from %s is healthy." % dn.hostname) + dnInst = dn + break + self.context.logger.debug( + "DN from %s is unhealthy." % dn.hostname) + + # check if contain DN on nodes + if not dnInst or dnInst == []: + raise Exception(ErrorCode.GAUSS_526["GAUSS_52602"]) + else: + self.context.logger.debug("Successfully get one DN from %s." + % dnInst.hostname) + self.dnInst = dnInst + + except Exception as e: + self.context.logger.log("Failed to get one DN. Error: %s" % str(e)) + raise Exception(ErrorCode.GAUSS_516["GAUSS_51624"]) + + def verifyClusterConfigInfo(self, clusterInfo, oldClusterInfo, + ignoreFlag="upgradectl"): + """ + function: verify cluster config info between xml and static config + input : clusterInfo, oldClusterInfo + output: NA + """ + try: + # should put self.context.clusterInfo before + # self.context.oldClusterInfo, + # because self.context.oldClusterInfo is not the istance of + # dbCluster + # covert new cluster information to compare cluster + compnew = self.covertToCompCluster(clusterInfo) + # covert old cluster information to compare cluster + compold = self.covertToCompCluster(oldClusterInfo) + # do compare + # if it is not same, print it. + theSame, tempbuffer = compareObject(compnew, compold, + "clusterInfo", [], ignoreFlag) + if (theSame): + self.context.logger.log("Static configuration matched with " + "old static configuration files.") + else: + msg = "Instance[%s] are not the same.\nXml cluster " \ + "information: %s\nStatic cluster information: %s\n" % \ + (tempbuffer[0], tempbuffer[1], tempbuffer[2]) + self.context.logger.debug("The old cluster information is " + "from the cluster_static_config.") + raise Exception(ErrorCode.GAUSS_512["GAUSS_51217"] + + "Error: \n%s" % msg.strip("\n")) + except Exception as e: + raise Exception(str(e)) + + def covertToCompCluster(self, dbclusterInfo): + """ + function: covert to comp cluster + input : clusterInfo, oldClusterInfo + output: compClusterInfo + """ + # init dbcluster class + compClusterInfo = dbClusterInfo() + # get name + compClusterInfo.name = dbclusterInfo.name + # get appPath + compClusterInfo.appPath = dbclusterInfo.appPath + # get logPath + compClusterInfo.logPath = dbclusterInfo.logPath + + for dbnode in dbclusterInfo.dbNodes: + compNodeInfo = dbNodeInfo() + # get datanode instance information + for datanode in dbnode.datanodes: + compNodeInfo.datanodes.append( + self.coverToCompInstance(datanode)) + # get node information + compClusterInfo.dbNodes.append(compNodeInfo) + return compClusterInfo + + def coverToCompInstance(self, compinstance): + """ + function: cover to comp instance + 1. get instanceId + 2. get mirrorId + 3. get port + 4. get datadir + 5. get instanceType + 6. get listenIps + 7. get haIps + input : compinstance + output: covertedInstanceInfo + """ + covertedInstanceInfo = instanceInfo() + # get instanceId + covertedInstanceInfo.instanceId = compinstance.instanceId + # get mirrorId + covertedInstanceInfo.mirrorId = compinstance.mirrorId + # get port + covertedInstanceInfo.port = compinstance.port + # get datadir + covertedInstanceInfo.datadir = compinstance.datadir + # get instanceType + covertedInstanceInfo.instanceType = compinstance.instanceType + # get listenIps + covertedInstanceInfo.listenIps = compinstance.listenIps + # get haIps + covertedInstanceInfo.haIps = compinstance.haIps + return covertedInstanceInfo + + def distributeXml(self): + """ + function: distribute package to every host + input : NA + output: NA + """ + self.context.logger.debug("Distributing xml configure file.", + "addStep") + + try: + + hosts = self.context.clusterInfo.getClusterNodeNames() + hosts.remove(DefaultValue.GetHostIpOrName()) + + # Send xml file to every host + DefaultValue.distributeXmlConfFile(self.context.sshTool, + self.context.xmlFile, + hosts, + self.context.mpprcFile, + self.context.isSingle) + except Exception as e: + raise Exception(str(e)) + + self.context.logger.debug("Successfully distributed xml " + "configure file.", "constant") + + def recordNodeStepInplace(self, action, step): + """ + function: record step info on all nodes + input : action, step + output: NA + """ + try: + # record step info on local node + + tempPath = self.context.upgradeBackupPath + filePath = os.path.join(tempPath, Const.INPLACE_UPGRADE_STEP_FILE) + cmd = "echo \"%s:%d\" > %s" % (action, step, filePath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + filePath + "Error: \n%s" % str(output)) + + if not self.context.isSingle: + # send file to remote nodes + self.context.sshTool.scpFiles(filePath, tempPath) + self.context.logger.debug("Successfully wrote step file[%s:%d]." + % (action, step)) + except Exception as e: + raise Exception(str(e)) + + def distributeFile(self, step_file): + """ + function: distribute file + input : step_file + output : NA + """ + self.context.logger.debug("Distribute the file %s" % step_file) + # send the file to each node + hosts = self.context.clusterInfo.getClusterNodeNames() + hosts.remove(DefaultValue.GetHostIpOrName()) + if not self.context.isSingle: + stepDir = os.path.normpath(os.path.dirname(step_file)) + self.context.sshTool.scpFiles(step_file, stepDir, hosts) + self.context.logger.debug("Successfully distribute the file %s" + % step_file) + + def getNodeStepInplace(self): + """ + function: Get the upgrade step info for inplace upgrade + input : action + output: the upgrade step info + """ + try: + tempPath = self.context.upgradeBackupPath + # get file path and check file exists + filePath = os.path.join(tempPath, Const.INPLACE_UPGRADE_STEP_FILE) + if not os.path.exists(filePath): + self.context.logger.debug("The cluster status is Normal. " + "No need to rollback.") + return Const.BINARY_UPGRADE_NO_NEED_ROLLBACK + + # read and check record format + stepInfo = g_file.readFile(filePath)[0] + stepList = stepInfo.split(":") + if len(stepList) != 2: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % filePath) + + recordType = stepList[0].strip() + recordStep = stepList[1].strip() + # check upgrade type + # the record value must be consistent with the upgrade type + if self.context.action != recordType: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % "t" + + "Input upgrade type: %s record upgrade type: " + "%s\nMaybe you chose the wrong interface." % + (self.context.action, recordType)) + # if record value is not digit, exit. + if not recordStep.isdigit() or int(recordStep) > \ + Const.BINARY_UPGRADE_STEP_PRE_COMMIT or \ + int(recordStep) < Const.INVALID_UPRADE_STEP: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51633"] % + recordStep) + except Exception as e: + self.context.logger.error(str(e)) + return Const.INVALID_UPRADE_STEP + self.context.logger.debug("The rollback step is %s" % recordStep) + return int(recordStep) + + def checkStep(self, step): + """ + function: check step + input : step + output : NA + """ + if not step.isdigit() or \ + int(step) > GreyUpgradeStep.STEP_BEGIN_COMMIT or \ + int(step) < Const.INVALID_UPRADE_STEP: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51633"] % str(step)) + + ########################################################################## + # Offline upgrade functions + ########################################################################## + def checkUpgrade(self): + """ + function: Check the environment for upgrade + input : action + output: NA + """ + self.context.logger.log("Checking upgrade environment.", "addStep") + try: + # Check the environment for upgrade + cmd = "%s -t %s -R '%s' -l '%s' -N '%s' -X '%s'" % \ + (OMCommand.getLocalScript("Local_Check_Upgrade"), + self.context.action, + self.context.oldClusterAppPath, + self.context.localLog, + self.context.newClusterAppPath, + self.context.xmlFile) + self.context.logger.debug("Command for checking upgrade " + "environment: %s." % cmd) + DefaultValue.execCommandWithMode(cmd, + "check upgrade environment", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + self.context.logger.log("Failed to check upgrade environment.", + "constant") + raise Exception(str(e)) + if not self.context.forceRollback: + if self.context.oldClusterNumber >= \ + Const.ENABLE_STREAM_REPLICATION_VERSION: + self.check_gucval_is_inval_given( + Const.ENABLE_STREAM_REPLICATION_NAME, Const.VALUE_ON) + try: + if self.context.action == Const.ACTION_INPLACE_UPGRADE: + self.context.logger.log( + "Successfully checked upgrade environment.", "constant") + return + self.checkActionInTableOrFile() + self.checkDifferentVersion() + self.checkOption() + except Exception as e: + self.context.logger.log( + "Failed to check upgrade environment.", "constant") + raise Exception(str(e)) + + self.context.logger.log( + "Successfully checked upgrade environment.", "constant") + + def check_gucval_is_inval_given(self, guc_name, val_list): + """ + Checks whether a given parameter is a given value list in a + given instance list. + """ + self.context.logger.debug("checks whether the parameter:{0} is " + "the value:{1}.".format(guc_name, val_list)) + guc_str = "{0}:{1}".format(guc_name, ",".join(val_list)) + self.checkParam(guc_str) + self.context.logger.debug("Success to check the parameter:{0} value " + "is in the value:{1}.".format(guc_name, + val_list)) + + def checkDifferentVersion(self): + """ + if the cluster has only one version. no need to check + if the cluster has two version, it should be the new + version or the old version + :return: + """ + self.context.logger.debug("Check the amount of cluster version.") + failedHost = [] + failMsg = "" + gaussHome = DefaultValue.getInstallDir(self.context.user) + # $GAUSSHOME must has available value. + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") + versionFile = os.path.join(gaussHome, "bin/upgrade_version") + cmd = "sed -n \'3,1p\' %s" % versionFile + hostList = copy.deepcopy(self.context.clusterNodes) + (resultMap, outputCollect) = \ + self.context.sshTool.getSshStatusOutput(cmd, hostList) + for key, val in resultMap.items(): + if DefaultValue.FAILURE in val: + failedHost.append(key) + failMsg += val + if failedHost: + self.context.recordIgnoreOrFailedNodeInEveryNode( + self.context.failedNodeRecordFile, failedHost) + raise Exception(ErrorCode.GAUSS_529["GAUSS_52929"] + failMsg) + for result in outputCollect: + if result.find(self.newCommitId) or result.find(self.oldCommitId): + continue + self.context.logger.debug( + "Find the gausssdb version %s is not same with" + " current upgrade version" % str(result)) + raise Exception(ErrorCode.GAUSS_529["GAUSS_52935"]) + self.context.logger.debug( + "Successfully checked the amount of cluster version.") + + def checkOption(self): + """ + if user use -g first, and then use -h , + we can upgrade again + :return: + """ + if self.context.is_grey_upgrade: + self.check_option_grey() + if len(self.context.nodeNames) != 0: + self.checkOptionH() + elif self.context.upgrade_remain: + self.checkOptionContinue() + else: + self.checkOptionG() + + def check_option_grey(self): + """ + if nodes have been upgraded, no need to use --grey to upgrade again + :return: + """ + stepFile = os.path.join( + self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE) + if not os.path.isfile(stepFile): + self.context.logger.debug( + "File %s does not exists. No need to check." % + Const.GREY_UPGRADE_STEP_FILE) + return + grey_node_names = self.getUpgradedNodeNames() + if grey_node_names: + self.context.logger.log( + "All nodes have been upgrade, no need to upgrade again.") + self.exitWithRetCode(self.action, True) + + def checkOptionH(self): + self.checkNodeNames() + stepFile = os.path.join( + self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE) + if not os.path.isfile(stepFile): + self.context.logger.debug( + "File %s does not exists. No need to check." % + Const.GREY_UPGRADE_STEP_FILE) + return + if not self.isNodesSameStep(self.context.nodeNames): + raise Exception(ErrorCode.GAUSS_529["GAUSS_52909"]) + if self.isNodeSpecifyStep( + GreyUpgradeStep.STEP_UPDATE_POST_CATALOG, + self.context.nodeNames): + raise Exception( + ErrorCode.GAUSS_529["GAUSS_52910"] % self.context.nodeNames) + nodes = self.getNodeLessThan(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG) + # compare whether current upgrade nodes are same with + # last unfinished node names + if nodes: + a = [i for i in self.context.nodeNames if i not in nodes] + b = [i for i in nodes if i not in self.context.nodeNames] + if len(a) != 0 or len(b) != 0: + raise Exception( + ErrorCode.GAUSS_529["GAUSS_52911"] % nodes + + " Please upgrade them first.") + + def checkNodeNames(self): + self.context.logger.debug( + "Check if the node name is invalid or duplicated.") + clusterNodes = self.context.clusterInfo.getClusterNodeNames() + for nodeName in self.context.nodeNames: + if nodeName not in clusterNodes: + raise Exception( + ErrorCode.GAUSS_500["GAUSS_50011"] % ("-h", nodeName)) + + undupNodes = set(self.context.nodeNames) + if len(self.context.nodeNames) != len(undupNodes): + self.context.logger.log( + ErrorCode.GAUSS_500["GAUSS_50004"] % ( + "h" + "Duplicates node names")) + nodeDict = {}.fromkeys(self.context.nodeNames, 0) + for name in self.context.nodeNames: + nodeDict[name] = nodeDict[name] + 1 + for key, value in nodeDict.items(): + if value > 1: + self.context.logger.log( + "Duplicates node name %s, " + "only keep one in grey upgrade!" % key) + self.context.nodeNames = list(undupNodes) + + def isNodesSameStep(self, nodes): + """ + judge if given nodes are same step + """ + return self.isNodeSpecifyStepInFile(nodes=nodes) + + def getNodeLessThan(self, step): + """ + get the nodes whose step is less than specified step, and can not be 0 + """ + nodes = self.getNodeLessThanInFile(step) + return nodes + + def getNodeLessThanInFile(self, step): + """ + get the nodes whose step is less than specified step, and can not be 0 + """ + try: + stepFile = os.path.join( + self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE) + self.context.logger.debug("trying to get nodes that step is " + "less than %s from %s" % (step, stepFile)) + if not os.path.isfile(stepFile): + return [] + nodes = [] + with open(stepFile, 'r') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if int(row['step']) != 0 and int(row['step']) < step: + nodes.append(row['node_host']) + self.context.logger.debug("successfully got nodes that step is " + "less than %s from %s" % (step, stepFile)) + return nodes + except Exception as e: + exitMsg = "Failed to get nodes that step is less than {0} " \ + "from {1}. ERROR {2}".format(step, stepFile, str(e)) + self.exitWithRetCode(self.action, False, exitMsg) + + def checkOptionContinue(self): + stepFile = os.path.join( + self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE) + if not os.path.isfile(stepFile): + raise Exception(ErrorCode.GAUSS_529["GAUSS_52920"] + + "Need to upgrade some nodes first.") + greyNodeNames = self.getUpgradedNodeNames() + # the nodes that have upgraded that should reached to precommit + if not self.isNodeSpecifyStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG, + greyNodeNames): + raise Exception(ErrorCode.GAUSS_529["GAUSS_52912"]) + if len(greyNodeNames) == len(self.context.clusterInfo.dbNodes): + self.printPrecommitBanner() + self.context.logger.debug( + "The node host in table %s.%s is equal to cluster nodes." + % (Const.UPGRADE_SCHEMA, Const.RECORD_NODE_STEP)) + raise Exception(ErrorCode.GAUSS_529["GAUSS_52913"]) + if not self.checkVersion(self.newCommitId, greyNodeNames): + raise Exception( + ErrorCode.GAUSS_529["GAUSS_52914"] + + "Please use the same version to upgrade remain nodes.") + + def checkOptionG(self): + stepFile = os.path.join( + self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE) + if not os.path.isfile(stepFile): + self.context.logger.debug( + "File %s does not exists. No need to check." % + Const.GREY_UPGRADE_STEP_FILE) + return + # -g only support 2 loops to upgrade, if has node upgrade, + # cannot use -g to upgrade other nodes + greyNodeNames = self.getUpgradedNodeNames() + if not greyNodeNames: + self.context.logger.debug("No node has ever been upgraded.") + return + else: + raise Exception("-g only support if no node has ever been upgraded" + " ,nodes %s have been upgraded, " + "so can use --continue instead of -g to upgrade" + " other nodes" % greyNodeNames) + + def backupClusterConfig(self): + """ + function: Backup the cluster config + input : NA + output: NA + """ + # backup list: + # cluster_static_config + # cluster_dynamic_config + # etc/gscgroup_xxx.cfg + # lib/postgresql/pg_plugin + # server.key.cipher + # server.key.rand + # datasource.key.cipher + # datasource.key.rand + # utilslib + # /share/sslsert/ca.key + # /share/sslsert/etcdca.crt + # catalog physical files + # Data Studio lib files + # gds files + # javaUDF + # postGIS + # hadoop_odbc_connector extension files + # libsimsearch etc files and lib files + self.context.logger.log("Backing up cluster configuration.", "addStep") + try: + # send cmd to all node and exec + cmd = "%s -t %s -U %s -V %d --upgrade_bak_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_BACKUP_CONFIG, + self.context.user, + int(float(self.context.oldClusterNumber) * 1000), + self.context.upgradeBackupPath, + self.context.localLog) + self.context.logger.debug("Command for backing up cluster " + "configuration: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "backup config files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + # backup hotpatch info file + self.backupHotpatch() + # backup version file. + self.backup_version_file() + + if not self.isLargeInplaceUpgrade: + return + # backup catalog data files if needed + self.backupCatalogFiles() + + # backup DS libs and gds file + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_INPLACE_BACKUP, + self.context.user, + self.context.upgradeBackupPath, + self.context.localLog) + self.context.logger.debug( + "Command for backing up gds file: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "backup DS libs and gds file", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log("Successfully backed up cluster " + "configuration.", "constant") + + def backupCatalogFiles(self): + """ + function: backup physical files of catalg objects + 1.check if is inplace upgrade + 2.get database list + 3.get catalog objects list + 4.backup physical files for each database + 5.backup global folder + input : NA + output: NA + """ + try: + # send cmd to all node and exec + cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \ + "--oldcluster_num='%s' -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_BACKUP_OLD_CLUSTER_CATALOG_PHYSICAL_FILES, + self.context.user, + self.context.upgradeBackupPath, + self.context.oldClusterNumber, + self.context.localLog) + self.context.logger.debug("Command for backing up physical files " + "of catalg objects: %s" % cmd) + DefaultValue.execCommandWithMode( + cmd, + "backup physical files of catalg objects", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + self.context.logger.debug("Successfully backed up catalog " + "physical files for old cluster.") + except Exception as e: + raise Exception(str(e)) + + def syncNewGUC(self): + """ + function: sync newly added guc during inplace upgrade. + For now, we only sync guc of cm_agent and cm_server + input : NA + output: NA + """ + self.context.logger.debug("Start to sync new guc.", "addStep") + try: + # send cmd to all node and exec + cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \ + "--new_cluster_app_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_SYNC_CONFIG, + self.context.user, + self.context.upgradeBackupPath, + self.context.newClusterAppPath, + self.context.localLog,) + self.context.logger.debug( + "Command for synchronizing new guc: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "sync new guc", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + self.context.logger.debug("Failed to synchronize new guc.", + "constant") + raise Exception(str(e)) + self.context.logger.debug("Successfully synchronized new guc.", + "constant") + + def cleanExtensionFiles(self): + """ + function: clean extension library and config files + input: NA + output: 0 / 1 + """ + try: + # clean extension library and config files + hadoop_odbc_connector = "%s/lib/postgresql/" \ + "hadoop_odbc_connector.so" % \ + self.context.oldClusterInfo.appPath + extension_config01 = "%s/share/postgresql/extension/" \ + "hadoop_odbc_connector--1.0.sql" % \ + self.context.oldClusterInfo.appPath + extension_config02 = "%s/share/postgresql/extension/" \ + "hadoop_odbc_connector.control" % \ + self.context.oldClusterInfo.appPath + extension_config03 = "%s/share/postgresql/extension/hadoop_odbc_" \ + "connector--unpackaged--1.0.sql" % \ + self.context.oldClusterInfo.appPath + + cmd = "(if [ -f '%s' ];then rm -f '%s';fi)" % \ + (hadoop_odbc_connector, hadoop_odbc_connector) + cmd += " && (if [ -f '%s' ];then rm -f '%s';fi)" % \ + (extension_config01, extension_config01) + cmd += " && (if [ -f '%s' ];then rm -f '%s';fi)" % \ + (extension_config02, extension_config02) + cmd += " && (if [ -f '%s' ];then rm -f '%s';fi)" % \ + (extension_config03, extension_config03) + self.context.logger.debug("Command for cleaning extension " + "library and config files: %s" % cmd) + DefaultValue.execCommandWithMode( + cmd, "clean extension library and config files", + self.context.sshTool, self.context.isSingle, + self.context.mpprcFile) + self.context.logger.debug("Command for cleaning extension " + "library and config files: %s" % cmd) + return 0 + except Exception as e: + self.context.logger.debug("Fail to clean extension library and " + "config files.output:%s" % str(e)) + return 1 + + def waitClusterForNormal(self, waitTimeOut=300): + """ + function: Wait the node become Normal + input : waitTimeOut + output: NA + """ + self.context.logger.log("Waiting for the cluster status to " + "become normal.") + dotCount = 0 + # get the end time + endTime = datetime.now() + timedelta(seconds=int(waitTimeOut)) + while True: + time.sleep(5) + sys.stdout.write(".") + dotCount += 1 + if dotCount >= 12: + dotCount = 0 + sys.stdout.write("\n") + + (checkStatus, checkResult) = \ + OMCommand.doCheckStaus(self.context.user, 0) + if checkStatus == 0: + if dotCount != 0: + sys.stdout.write("\n") + self.context.logger.log("The cluster status is normal.") + break + + if datetime.now() >= endTime: + if dotCount != 0: + sys.stdout.write("\n") + self.context.logger.debug(checkResult) + raise Exception("Timeout." + "\n" + + ErrorCode.GAUSS_516["GAUSS_51602"]) + + if checkStatus != 0: + self.context.logger.debug(checkResult) + raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster") + + def getLcgroupnameList(self, jsonFile): + """ + function: get Lc group name list + input: jsonFile + output: [] + """ + para = {} + lcgroupnamelist = [] + try: + with open(jsonFile, "r") as fp_json: + para = json.load(fp_json) + except Exception as e: + raise Exception(str(e)) + if (para): + lcgroupnamelist = para['lcgroupnamelist'] + while '' in lcgroupnamelist: + lcgroupnamelist.remove('') + return lcgroupnamelist + + def restoreClusterConfig(self, isRollBack=False): + """ + function: Restore the cluster config + input : isRollBack + output: NA + """ + # restore list: + # cluster_dynamic_config + # etc/gscgroup_xxx.cfg + # lib/postgresql/pg_plugin + # server.key.cipher + # server.key.rand + # datasource.key.cipher + # datasource.key.rand + # utilslib + # /share/sslsert/ca.key + # /share/sslsert/etcdca.crt + # Data Studio lib files + # gds files + # javaUDF + # postGIS + # hadoop_odbc_connector extension files + # libsimsearch etc files and lib files + if isRollBack: + self.context.logger.log("Restoring cluster configuration.") + else: + self.context.logger.log("Restoring cluster configuration.", + "addStep") + try: + if isRollBack: + self.rollbackHotpatch() + else: + # restore static configuration + cmd = "%s -t %s -U %s -V %d --upgrade_bak_path=%s " \ + "--old_cluster_app_path=%s --new_cluster_app_path=%s " \ + "-l %s" % ( + OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_RESTORE_CONFIG, + self.context.user, + int(float(self.context.oldClusterNumber) * 1000), + self.context.upgradeBackupPath, + self.context.oldClusterAppPath, + self.context.newClusterAppPath, + self.context.localLog) + + self.context.logger.debug("Command for restoring " + "config files: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "restore config files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + if self.isLargeInplaceUpgrade: + # backup DS libs and gds file + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_INPLACE_BACKUP, + self.context.user, + self.context.upgradeBackupPath, + self.context.localLog) + self.context.logger.debug( + "Command for restoreing DS libs and gds file: %s" % cmd) + DefaultValue.execCommandWithMode( + cmd, + "restore DS libs and gds file", + self.context.sshTool, + self.context.isSingle, + self.context.userProfile) + # change the owner of application + cmd = "chown -R %s:%s '%s'" % \ + (self.context.user, self.context.group, + self.context.newClusterAppPath) + DefaultValue.execCommandWithMode( + cmd, "change the owner of application", + self.context.sshTool, self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + if isRollBack: + self.context.logger.log("Successfully restored " + "cluster configuration.") + else: + self.context.logger.log("Successfully restored cluster " + "configuration.", "constant") + + def checkStaticConfig(self): + """ + function: Check if static config file exists in bin dir, + if not exists, restore it from backup dir + input : NA + output: NA + """ + self.context.logger.log("Checking static configuration files.") + try: + # check static configuration path + staticConfigPath = "%s/bin" % self.context.oldClusterAppPath + # restore static configuration + cmd = "(if [ ! -f '%s/cluster_static_config' ];then cp " \ + "%s/cluster_static_config %s/bin;fi)" % \ + (staticConfigPath, self.context.upgradeBackupPath, + self.context.oldClusterAppPath) + DefaultValue.execCommandWithMode(cmd, + "restore static configuration", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Successfully checked static " + "configuration files.") + + def backupNodeVersion(self): + """ + function: Backup current application and configuration. + The function only be used by binary upgrade. + To ensure the transaction atomicity, + it will be used with checkUpgrade(). + input : NA + output: NA + """ + self.context.logger.log("Backing up current application " + "and configurations.", "addStep") + try: + # back up environment variables + cmd = "cp '%s' '%s'_gauss" % (self.context.userProfile, + self.context.userProfile) + self.context.logger.debug( + "Command for backing up environment file: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "back up environment variables", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + # back up application and configuration + cmd = "%s -U %s -P %s -p -b -l %s" % \ + (OMCommand.getLocalScript("Local_Backup"), self.context.user, + self.context.upgradeBackupPath, self.context.localLog) + self.context.logger.debug( + "Command for backing up application: %s" % cmd) + DefaultValue.execCommandWithMode( + cmd, "back up application and configuration", + self.context.sshTool, self.context.isSingle, + self.context.mpprcFile) + + except Exception as e: + # delete binary backup directory + delCmd = g_file.SHELL_CMD_DICT["deleteDir"] % \ + (self.context.tmpDir, os.path.join(self.context.tmpDir, + 'backupTemp_*')) + DefaultValue.execCommandWithMode(delCmd, + "delete binary backup directory", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + raise Exception(str(e)) + + self.context.logger.log("Successfully backed up current " + "application and configurations.", "constant") + + def restoreNodeVersion(self): + """ + function: Restore the application and configuration + 1. restore old version + 2. restore environment variables + input : NA + output: NA + """ + self.context.logger.log("Restoring application and configurations.") + + try: + # restore old version + cmd = "%s -U %s -P %s -p -b -l %s" % \ + (OMCommand.getLocalScript("Local_Restore"), + self.context.user, self.context.upgradeBackupPath, + self.context.localLog) + self.context.logger.debug("Command for restoring " + "old version: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "restore old version", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + # restore environment variables + cmd = "(if [ -f '%s'_gauss ];then mv '%s'_gauss '%s';fi)" % \ + (self.context.userProfile, self.context.userProfile, + self.context.userProfile) + self.context.logger.debug("Command for restoring environment file:" + " %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "restore environment variables", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log("Successfully restored application and " + "configuration.") + + def modifySocketDir(self): + """ + function: modify unix socket directory + input : NA + output: NA + """ + self.context.logger.log("Modifying the socket path.", "addStep") + try: + # modifying the socket path for all CN/DN instance + (status, output) = self.setGUCValue( + "unix_socket_directory", + DefaultValue.getTmpDirAppendMppdb(self.context.user), "set") + if (status != 0): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % "GUC" + + " Error: \n%s" % str(output)) + + userProfile = DefaultValue.getMpprcFile() + except Exception as e: + raise Exception(str(e)) + + self.context.logger.log("Successfully modified socket path.", + "constant") + + ########################################################################### + # Rollback upgrade functions + ########################################################################### + def cleanBackupFiles(self): + """ + function: Clean backup files. + input : action + output : NA + """ + try: + # clean backup files + cmd = "(if [ -f '%s/OldDbClusterInfo.py' ]; then rm -f " \ + "'%s/OldDbClusterInfo.py'; fi) &&" % \ + (self.context.tmpDir, self.context.tmpDir) + cmd += "(if [ -f '%s/OldDbClusterInfo.pyc' ]; then rm -f " \ + "'%s/OldDbClusterInfo.pyc'; fi) &&" % \ + (self.context.tmpDir, self.context.tmpDir) + cmd += "(if [ -d '%s/script' ]; then rm -rf '%s/script'; " \ + "fi) &&" % (self.context.tmpDir, self.context.tmpDir) + cmd += "(if [ -f '%s/oldclusterinfo' ]; then rm -f " \ + "'%s/oldclusterinfo'; fi) &&" % \ + (self.context.tmpDir, self.context.tmpDir) + cmd += "(if [ -f '%s/oldclusterGUC' ]; then rm -f " \ + "'%s/oldclusterGUC'; fi) &&" % \ + (self.context.tmpDir, self.context.tmpDir) + cmd += "(if [ -f '%s/cluster_static_config' ]; then rm -f " \ + "'%s/cluster_static_config'; fi) &&" % \ + (self.context.tmpDir, self.context.tmpDir) + cmd += "(if [ -f '%s/c_functionfilelist.dat' ]; then rm -f " \ + "'%s/c_functionfilelist.dat'; fi) &&" % \ + (self.context.tmpDir, self.context.tmpDir) + cmd += "(if [ -f '%s'_gauss ]; then rm -f '%s'_gauss ; fi) &&" % \ + (self.context.userProfile, self.context.userProfile) + cmd += "(if [ -f '%s/oldclusterinfo.json' ]; then rm -f " \ + "'%s/oldclusterinfo.json'; fi) &&" % \ + (self.context.tmpDir, self.context.tmpDir) + cmd += "(if [ -f '%s/%s' ]; then rm -f '%s/%s'; fi) &&" % \ + (self.context.tmpDir, Const.CLUSTER_CNSCONF_FILE, + self.context.tmpDir, Const.CLUSTER_CNSCONF_FILE) + cmd += "(rm -f '%s'/gauss_crontab_file_*) &&" % self.context.tmpDir + cmd += "(if [ -d '%s' ]; then rm -rf '%s'; fi) &&" % \ + (self.context.upgradeBackupPath, + self.context.upgradeBackupPath) + cmd += "(if [ -f '%s/pg_proc_mapping.txt' ]; then rm -f" \ + " '%s/pg_proc_mapping.txt'; fi)" % \ + (self.context.tmpDir, self.context.tmpDir) + self.context.logger.debug("Command for clean " + "backup files: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "clean backup files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + except Exception as e: + raise Exception(str(e)) + + def cleanBinaryUpgradeBakFiles(self, isRollBack=False): + """ + function: Clean back up files, include cluster_static_config, + cluster_dynamic_config, binary.tar, parameter.tar. + input : isRollBack + output: NA + """ + if (isRollBack): + self.context.logger.debug("Cleaning backup files.") + else: + self.context.logger.debug("Cleaning backup files.", "addStep") + + try: + # clean backup files + self.cleanBackupFiles() + except Exception as e: + raise Exception(str(e)) + if (isRollBack): + self.context.logger.debug("Successfully cleaned backup files.") + else: + self.context.logger.debug("Successfully cleaned backup files.", + "constant") + + ########################################################################### + # Rollback upgrade functions + ########################################################################### + + def doHealthCheck(self, checkPosition): + """ + function: Do health check, if healthy, return 0, else return 1 + input : checkPosition + output: 0 successfully + 1 failed + """ + ####################################################################### + # When do binary-upgrade: + # Const.OPTION_PRECHECK -> cluster Normal + # -> database can connec + # Const.OPTION_POSTCHECK -> cluster Normal + # -> package version Normal + # -> database can connec + ####################################################################### + self.context.logger.log("Start to do health check.", "addStep") + + status = 0 + output = "" + + if (checkPosition == Const.OPTION_PRECHECK): + if (self.checkClusterStatus(checkPosition, True) != 0): + output += "\n Cluster status does not match condition." + if (self.checkConnection() != 0): + output += "\n Database could not be connected." + elif (checkPosition == Const.OPTION_POSTCHECK): + if (self.checkClusterStatus(checkPosition) != 0): + output += "\n Cluster status is Abnormal." + if not self.checkVersion( + self.context.newClusterVersion, + self.context.clusterInfo.getClusterNodeNames()): + output += "\n The gaussdb version is inconsistent." + if (self.checkConnection() != 0): + output += "\n Database could not be connected." + else: + # Invalid check position + output += "\n Invalid check position." + if (output != ""): + status = 1 + # all check has been pass, return 0 + self.context.logger.log("Successfully checked cluster status.", + "constant") + return (status, output) + + def checkVersion(self, checkinfo, checknodes): + """ + function: Check if the node have been upgraded, if gaussdb bin + file verison is same on all host, return 0, else retrun 1 + input : checkinfo, checknodes + output: 0 successfully + 1 failed + """ + self.context.logger.debug( + "Start to check gaussdb version consistency.") + if self.context.isSingle: + self.context.logger.debug("There is single cluster," + " no need to check it.") + return True + + try: + # checking gaussdb bin file version VxxxRxxxCxx or commitid + cmd = "source %s;%s -t %s -v %s -U %s -l %s" % \ + (self.context.userProfile, + OMCommand.getLocalScript("Local_Check_Upgrade"), + Const.ACTION_CHECK_VERSION, + checkinfo, + self.context.user, + self.context.localLog) + self.context.logger.debug("Command for checking gaussdb version " + "consistency: %s." % cmd) + (status, output) = \ + self.context.sshTool.getSshStatusOutput(cmd, checknodes) + for node in status.keys(): + failFlag = "Failed to check version information" + if status[node] != DefaultValue.SUCCESS or \ + output.find(failFlag) >= 0: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52929"] + + "Error: \n%s" % str(output)) + # gaussdb bin file version is same on all host, return 0 + self.context.logger.debug("Successfully checked gaussdb" + " version consistency.") + return True + except Exception as e: + self.context.logger.debug(str(e)) + return False + + def checkClusterStatus(self, checkPosition=Const.OPTION_PRECHECK, + doDetailCheck=False): + """ + function: Check cluster status, if NORMAL, return 0, else return 1 + For grey upgrade, if have switched to new bin, we will remove + abnormal nodes and then return 0, else return 1 + input : checkPosition, doDetailCheck + output: 0 successfully + 1 failed + """ + self.context.logger.debug("Start to check cluster status.") + # build query cmd + # according to the implementation of the results to determine whether + # the implementation of success + cmd = "source %s;gs_om -t query" % self.context.userProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.context.logger.debug( + "Failed to execute command %s.\nStatus:%s\nOutput:%s" % + (cmd, status, output)) + return 1 + self.context.logger.debug( + "Successfully obtained cluster status information. " + "Cluster status information:\n%s" % output) + if output.find("Normal") < 0: + self.context.logger.debug("The cluster_state is Abnormal.") + if checkPosition == Const.OPTION_POSTCHECK: + if output.find("Degraded") < 0: + self.context.logger.debug("The cluster_state is not " + "Degraded under postcheck.") + return 1 + else: + return 1 + + # do more check if required + if doDetailCheck: + cluster_state_check = False + redistributing_check = False + for line in output.split('\n'): + if len(line.split(":")) != 2: + continue + (key, value) = line.split(":") + if key.strip() == "cluster_state" and \ + value.strip() == "Normal": + cluster_state_check = True + elif key.strip() == "redistributing" and value.strip() == "No": + redistributing_check = True + if cluster_state_check and redistributing_check: + self.context.logger.debug("Cluster_state must be Normal, " + "redistributing must be No.") + return 0 + else: + self.context.logger.debug( + "Cluster status information does not meet the upgrade " + "condition constraints. When upgrading, cluster_state must" + " be Normal, redistributing must be No and balanced" + " must be Yes.") + return 1 + + # cluster is NORMAL, return 0 + return 0 + + def waitClusterNormalDegrade(self, waitTimeOut=300): + """ + function: Check if cluster status is Normal for each main step of + online upgrade + input : waitTimeOut, default is 60. + output : NA + """ + # get the end time + self.context.logger.log("Wait for the cluster status normal " + "or degrade.") + endTime = datetime.now() + timedelta(seconds=int(waitTimeOut)) + while True: + cmd = "source %s;gs_om -t status --detail" % \ + self.context.userProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and (output.find("Normal") >= 0 or + output.find("Degraded") >= 0): + self.context.logger.debug( + "The cluster status is normal or degrade now.") + break + + if datetime.now() >= endTime: + self.context.logger.debug("The cmd is %s " % cmd) + raise Exception("Timeout." + "\n" + + ErrorCode.GAUSS_516["GAUSS_51602"]) + else: + self.context.logger.debug( + "Cluster status has not reach normal. Wait for another 3" + " seconds.\n%s" % output) + time.sleep(3) # sleep 3 seconds + + def checkConnection(self): + """ + function: Check if cluster accept connecitons, + upder inplace upgrade, all DB should be connected + under grey upgrade, makesure all CN in nodes that does not + under upgrade process or extracted abnormal nodes can be + connected if accpet connection, return 0, else return 1 + 1. find a cn instance + 2. connect this cn and exec sql cmd + input : NA + output: 0 successfully + 1 failed + """ + self.context.logger.debug("Start to check database connection.") + for dbNode in self.context.clusterInfo.dbNodes: + if len(dbNode.datanodes) == 0 or dbNode.name: + continue + for dnInst in dbNode.datanodes: + # connect this DB and exec sql cmd + sql = "SELECT 1;" + (status, output) = \ + ClusterCommand.remoteSQLCommand( + sql, self.context.user, dnInst.hostname, dnInst.port, + False, DefaultValue.DEFAULT_DB_NAME, + IsInplaceUpgrade=True) + if status != 0 or not output.isdigit(): + self.context.logger.debug( + "Failed to execute SQL on [%s]: %s. Error: \n%s" % + (dnInst.hostname, sql, str(output))) + return 1 + + # all DB accept connection, return 0 + self.context.logger.debug("Successfully checked database connection.") + return 0 + + def createBakPath(self): + """ + function: create bak path + input : NA + output : NA + """ + cmd = "(if [ ! -d '%s' ]; then mkdir -p '%s'; fi)" % \ + (self.context.upgradeBackupPath, self.context.upgradeBackupPath) + cmd += " && (chmod %d -R %s)" % (DefaultValue.KEY_DIRECTORY_MODE, + self.context.upgradeBackupPath) + self.context.logger.debug("Command for creating directory: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "create binary_upgrade path", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + def recordDirFile(self): + """ + function: record dir file + input: NA + output: NA + """ + self.context.logger.debug("Create the file to record " + "old and new app directory.") + # write the old cluster number and new cluster number into backup dir + appDirRecord = os.path.join(self.context.upgradeBackupPath, + Const.RECORD_UPGRADE_DIR) + g_file.createFile(appDirRecord, True, DefaultValue.KEY_FILE_MODE) + g_file.writeFile(appDirRecord, [self.context.oldClusterAppPath, + self.context.newClusterAppPath], 'w') + self.distributeFile(appDirRecord) + self.context.logger.debug("Successfully created the file to " + "record old and new app directory.") + + def copyBakVersion(self): + """ + under commit, if we have cleaned old install path, then node disabled, + we cannot get old version, + under choseStrategy, we will not pass the check + :return:NA + """ + versionFile = os.path.join(self.context.oldClusterAppPath, + "bin/upgrade_version") + bakVersionFile = os.path.join(self.context.upgradeBackupPath, + "old_upgrade_version") + cmd = "(if [ -f '%s' ]; then cp -f -p '%s' '%s';fi)" % \ + (versionFile, versionFile, bakVersionFile) + cmd += " && (chmod %d %s)" % \ + (DefaultValue.KEY_FILE_MODE, bakVersionFile) + DefaultValue.execCommandWithMode(cmd, + "copy upgrade_version file", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + def cleanInstallPath(self, cleanNew=Const.NEW): + """ + function: after grey upgrade succeed, clean old install path + input : cleanNew + output: NA + """ + self.context.logger.debug("Cleaning %s install path." % cleanNew, + "addStep") + # clean old install path + if cleanNew == Const.NEW: + installPath = self.context.newClusterAppPath + elif cleanNew == Const.OLD: + installPath = self.context.oldClusterAppPath + else: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52937"]) + + cmd = "%s -t %s -U %s -R %s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_CLEAN_INSTALL_PATH, + self.context.user, + installPath, + self.context.localLog) + if self.context.forceRollback: + cmd += " --force" + self.context.logger.debug("Command for clean %s install path: %s" % + (cleanNew, cmd)) + DefaultValue.execCommandWithMode(cmd, + "clean %s install path" % cleanNew, + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + self.context.logger.log("Successfully cleaned %s install path." % + cleanNew, "constant") + + def installNewBin(self): + """ + function: install new binary in a new directory + 1. get env GAUSSLOG + 2. get env PGHOST + 3. install new bin file + 4. sync old config to new bin path + 5. update env + input: none + output: none + """ + try: + self.context.logger.log("Installing new binary.", "addStep") + + # install new bin file + cmd = "%s -t 'install_cluster' -U %s:%s -R '%s' -P %s -c %s" \ + " -l '%s' -X '%s' -T -u" % \ + (OMCommand.getLocalScript("Local_Install"), + self.context.user, + self.context.group, + self.context.newClusterAppPath, + self.context.tmpDir, + self.context.clusterInfo.name, + self.context.localLog, + self.context.xmlFile) + self.context.logger.debug( + "Command for installing new binary: %s." % cmd) + DefaultValue.execCommandWithMode(cmd, + "install new application", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + self.context.logger.debug( + "Successfully installed new binary files.") + except Exception as e: + self.context.logger.debug("Failed to install new binary files.") + raise Exception(str(e)) + + def backupHotpatch(self): + """ + function: backup hotpatch config file patch.info in xxx/data/hotpatch + input : NA + output: NA + """ + self.context.logger.debug("Start to backup hotpatch.") + try: + cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \ + "--new_cluster_app_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_BACKUP_HOTPATCH, + self.context.user, + self.context.upgradeBackupPath, + self.context.newClusterAppPath, + self.context.localLog) + DefaultValue.execCommandWithMode(cmd, + "backup hotpatch files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(" Failed to backup hotpatch config file." + str(e)) + self.context.logger.log("Successfully backup hotpatch config file.") + + def rollbackHotpatch(self): + """ + function: backup hotpatch config file patch.info in xxx/data/hotpatch + input : NA + output: NA + """ + self.context.logger.debug("Start to rollback hotpatch.") + try: + cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s -X '%s'" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_ROLLBACK_HOTPATCH, + self.context.user, + self.context.upgradeBackupPath, + self.context.localLog, + self.context.xmlFile) + if self.context.forceRollback: + cmd += " --force" + DefaultValue.execCommandWithMode(cmd, + "rollback hotpatch", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(" Failed to rollback hotpatch config file." + + str(e)) + self.context.logger.log("Successfully rollback hotpatch config file.") + + def backup_version_file(self): + """ + Backup the old version file. + """ + oldVersionFile = "%s/bin/%s" % \ + (self.context.oldClusterAppPath, + DefaultValue.DEFAULT_DISABLED_FEATURE_FILE_NAME) + oldLicenseFile = "%s/bin/%s" % (self.context.oldClusterAppPath, + DefaultValue.DEFAULT_LICENSE_FILE_NAME) + + cmd = "(if [ -d %s ] && [ -f %s ]; then cp -f %s %s; fi) && " % \ + (self.context.upgradeBackupPath, oldVersionFile, oldVersionFile, + self.context.upgradeBackupPath) + cmd += "(if [ -d %s ] && [ -f %s ]; then cp -f %s %s; fi)" % \ + (self.context.upgradeBackupPath, oldLicenseFile, oldLicenseFile, + self.context.upgradeBackupPath) + + self.context.logger.debug( + "Execute command to backup the product version file and the " + "license control file: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "Backup old gaussdb.version file.", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + def getTimeFormat(self, seconds): + """ + format secends to h-m-s + input:int + output:int + """ + seconds = int(seconds) + if seconds == 0: + return 0 + # Converts the seconds to standard time + hour = seconds / 3600 + minute = (seconds - hour * 3600) / 60 + s = seconds % 60 + resultstr = "" + if hour != 0: + resultstr += "%dh" % hour + if minute != 0: + resultstr += "%dm" % minute + return "%s%ds" % (resultstr, s) + + def CopyCerts(self): + """ + function: copy certs + input : NA + output : NA + """ + self.context.logger.log("copy certs from %s to %s." % ( + self.context.oldClusterAppPath, self.context.newClusterAppPath)) + try: + cmd = "%s -t %s -U %s --old_cluster_app_path=%s " \ + "--new_cluster_app_path=%s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_COPY_CERTS, + self.context.user, + self.context.oldClusterAppPath, + self.context.newClusterAppPath, + self.context.localLog) + self.context.logger.debug("Command for copy certs: '%s'." % cmd) + DefaultValue.execCommandWithMode(cmd, + "Command for copy certs", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + except Exception as e: + self.context.logger.log("Failed to copy certs from %s to %s." % + (self.context.oldClusterAppPath, + self.context.newClusterAppPath)) + raise Exception(str(e)) + time.sleep(10) + self.context.logger.log("Successfully copy certs from %s to %s." % + (self.context.oldClusterAppPath, + self.context.newClusterAppPath), + "constant") + + def switchBin(self, switchTo=Const.OLD): + """ + function: switch bin + input : switchTo + output : NA + """ + self.context.logger.log("Switch symbolic link to %s binary directory." + % switchTo, "addStep") + try: + cmd = "%s -t %s -U %s -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + Const.ACTION_SWITCH_BIN, + self.context.user, + self.context.localLog) + if switchTo == Const.NEW: + cmd += " -R '%s'" % self.context.newClusterAppPath + else: + cmd += " -R '%s'" % self.context.oldClusterAppPath + if self.context.forceRollback: + cmd += " --force" + self.context.logger.debug("Command for switching binary directory:" + " '%s'." % cmd) + if self.context.is_grey_upgrade: + DefaultValue.execCommandWithMode(cmd, + "Switch the binary directory", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile, + self.context.nodeNames) + else: + DefaultValue.execCommandWithMode(cmd, + "Switch the binary directory", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + + except Exception as e: + self.context.logger.log("Failed to switch symbolic link to %s " + "binary directory." % switchTo) + raise Exception(str(e)) + time.sleep(10) + self.context.logger.log("Successfully switch symbolic link to %s " + "binary directory." % switchTo, "constant") + + def clearOtherToolPackage(self, action=""): + """ + function: clear other tool package + input : action + output : NA + """ + if action == Const.ACTION_AUTO_ROLLBACK: + self.context.logger.debug("clean other tool package files.") + else: + self.context.logger.debug( + "clean other tool package files.", "addStep") + try: + commonPart = DefaultValue.get_package_back_name().rsplit("_", 1)[0] + gphomePath = \ + os.listdir(DefaultValue.getClusterToolPath(self.context.user)) + commitId = self.newCommitId + if action == Const.ACTION_AUTO_ROLLBACK: + commitId = self.oldCommitId + for filePath in gphomePath: + if commonPart in filePath and commitId not in filePath: + toDeleteFilePath = os.path.join( + DefaultValue.getClusterToolPath(self.context.user), + filePath) + deleteCmd = "(if [ -f '%s' ]; then rm -rf '%s'; fi) " % \ + (toDeleteFilePath, toDeleteFilePath) + DefaultValue.execCommandWithMode( + deleteCmd, + "clean tool package files", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + self.context.logger.log( + "Failed to clean other tool package files.") + raise Exception(str(e)) + if action == Const.ACTION_AUTO_ROLLBACK: + self.context.logger.debug( + "Success to clean other tool package files.") + else: + self.context.logger.debug( + "Success to clean other tool package files.", "constant") + + def createGphomePack(self): + """ + function: create Gphome pack + input : NA + output : NA + """ + try: + cmd = "(if [ ! -d '%s' ]; then mkdir -p '%s'; fi)" % \ + (DefaultValue.getClusterToolPath(self.context.user), + DefaultValue.getClusterToolPath(self.context.user)) + cmd += " && (chmod %d -R %s)" % \ + (DefaultValue.KEY_DIRECTORY_MODE, + DefaultValue.getClusterToolPath(self.context.user)) + self.context.logger.debug( + "Command for creating directory: %s" % cmd) + DefaultValue.execCommandWithMode(cmd, + "create gphome path", + self.context.sshTool, + self.context.isSingle, + self.context.mpprcFile) + oldPackName = "%s-Package-bak_%s.tar.gz" % \ + (VersionInfo.PRODUCT_NAME_PACKAGE, self.oldCommitId) + packFilePath = "%s/%s" % (DefaultValue.getClusterToolPath( + self.context.user), oldPackName) + copyNode = "" + cmd = "if [ -f '%s' ]; then echo 'GetFile'; " \ + "else echo 'NoThisFile'; fi" % packFilePath + self.context.logger.debug("Command for checking file: %s" % cmd) + (status, output) = self.context.sshTool.getSshStatusOutput( + cmd, self.context.clusterNodes, self.context.mpprcFile) + outputMap = self.context.sshTool.parseSshOutput( + self.context.clusterNodes) + self.context.logger.debug("Output: %s" % output) + for node in self.context.clusterNodes: + if status[node] == DefaultValue.SUCCESS: + if 'GetFile' in outputMap[node]: + copyNode = node + break + if copyNode: + self.context.logger.debug("Copy the file %s from node %s." % + (packFilePath, copyNode)) + for node in self.context.clusterNodes: + if status[node] == DefaultValue.SUCCESS: + if 'NoThisFile' in outputMap[node]: + cmd = g_Platform.getRemoteCopyCmd( + packFilePath, + DefaultValue.getClusterToolPath( + self.context.user), + str(copyNode), False, 'directory', node) + self.context.logger.debug( + "Command for copying directory: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % + packFilePath) + except Exception as e: + raise Exception(str(e)) diff --git a/script/impl/upgrade/__init__.py b/script/impl/upgrade/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/killall b/script/killall new file mode 100644 index 0000000..b6280e8 --- /dev/null +++ b/script/killall @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : killall is a program for kill process. +############################################################################# +""" + +""" + +import sys +import os +import pwd +import getpass +import subprocess +import optparse +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.GaussLog import GaussLog + + +class Closelogger(): + def __init__(self): + pass + + def printMessage(self, msg): + sys.stdout.write("%s\n" % msg) + + def debug(self, msg): + pass + + +def init_logger(): + global logger + LOG_DEBUG = 1 + log_path = DefaultValue.getEnv("GAUSSLOG") + static_config_file = os.path.realpath("%s/bin/cluster_static_config")\ + % DefaultValue.getEnv("GAUSSHOME") + if not os.path.exists(str(log_path)) or \ + not os.path.exists(str(static_config_file)): + logger = Closelogger() + else: + logFile = os.path.realpath("%s/om/%s" % (log_path, "killall_log.log")) + logger = GaussLog(logFile, "killall", LOG_DEBUG) + + +def parse_command(): + """parser command""" + parser = optparse.OptionParser(conflict_handler='resolve') + parser.disable_interspersed_args() + parser.add_option('-s', dest='signal', help='Sends a specified signal.') + parser.add_option('-u', dest='user', help='Specified user') + return parser + + +def exec_kill_process(user="", signal="", processname=""): + """execute command""" + logger.debug("start to kill process %s" % processname) + if signal == "": + signal = 15 + if user: + user_uid = pwd.getpwnam("%s" % user).pw_gid + cmd = "ps -eo uid,pid,comm | awk '{if ($1==\"%s\")print}' | awk" \ + " '{if ($3==\"%s\")print}'" % (user_uid, processname) + else: + cmd = "ps -eo uid,pid,comm | awk '{if ($3==\"%s\")print}'" \ + % processname + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + logger.printMessage("%s: no process found" % processname) + sys.exit(1) + if output: + result_list = output.strip().split('\n') + else: + logger.printMessage("%s: no process found" % processname) + sys.exit(1) + for result in result_list: + result = [i.strip() for i in result.strip().split(" ") if i] + if result[2].strip() == processname: + cmd = "kill -%s %s" % (signal, result[1]) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + cmd = "ps -eo pid | awk \'{print $1}\' | grep %s" % result[1] + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and output: + logger.debug("Error: %s(%s): Operation not permitted" + % (processname, result[1])) + logger.printMessage("%s(%s): Operation not permitted" + % (processname, result[1])) + else: + logger.debug("Error: %s: no process found" % processname) + logger.printMessage("%s: no process found" % processname) + sys.exit(1) + logger.debug("kill process %s success" % processname) + sys.exit(0) + + +def check_signal_value(signal, user, processname): + signal_list = [i for i in range(1, 65)] + if int(signal) not in signal_list: + if user: + user_uid = pwd.getpwnam("%s" % user).pw_gid + cmd = "ps -eo uid,pid,comm | awk '{if ($1==\"%s\")print}' | awk " \ + "'{if ($3==\"%s\")print}'" % (user_uid, processname) + else: + cmd = "ps -eo uid,pid,comm | awk '{if ($3==\"%s\")print}'"\ + % processname + (status, output) = subprocess.getstatusoutput(cmd) + # The query result does not exist, and the status is 1 + if status != 0: + logger.printMessage("%s: no process found" % processname) + sys.exit(1) + if output: + result_list = output.strip().split('\n') + else: + logger.printMessage("%s: no process found" % processname) + sys.exit(1) + for result in result_list: + result = [i.strip() for i in result.strip().split(" ") if i] + if result[2].strip() == processname: + logger.debug("Error: %s(%s): Invalid argument" + % (processname, result[1])) + logger.printMessage("%s(%s): Invalid argument" + % (processname, result[1])) + logger.printMessage("%s: no process found" % processname) + sys.exit(1) + else: + return int(signal) + + +if __name__ == '__main__': + """main""" + init_logger() + signal_type = "" + for value in sys.argv[1:]: + if value.strip().startswith("-") and value.count("-") == 1 and \ + value.strip().split("-")[1].isdigit(): + signal_type = value.strip().split("-")[1] + sys.argv.remove(value) + break + parser = parse_command() + opts, args = parser.parse_args() + signal = "" + user = "" + process = "" + if opts.user: + user = opts.user.strip() + if args: + process = args[0] + if opts.signal: + signal = opts.signal.strip() + elif signal_type: + signal_type = check_signal_value(signal_type, user, process) + signal = signal_type + exec_kill_process(user=user, signal=signal, processname=process) diff --git a/script/local/Backup.py b/script/local/Backup.py new file mode 100644 index 0000000..c82f196 --- /dev/null +++ b/script/local/Backup.py @@ -0,0 +1,544 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Backup.py is a local utility to backup binary file +# and parameter file +############################################################################# +import getopt +import os +import sys + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsfile import g_file + +####################################################################### +# GLOBAL VARIABLES +####################################################################### +GTM_CONF = "gtm.conf" + +POSTGRESQL_CONF = "postgresql.conf" +POSTGRESQL_HBA_CONF = "pg_hba.conf" +CM_SERVER_CONF = "cm_server.conf" +CM_AGENT_CONF = "cm_agent.conf" +HOSTNAME = DefaultValue.GetHostIpOrName() + +g_clusterUser = "" +g_ignoreMiss = False + + +class OldVersionModules(): + def __init__(self): + """ + function: constructor + """ + self.oldDbClusterInfoModule = None + self.oldDbClusterStatusModule = None + + +class LocalBackup(LocalBaseOM): + """ + function: classdocs + input : NA + output: NA + """ + + def __init__(self, logFile="", user="", tmpBackupDir="", backupDir="", \ + backupPara=False, backupBin=False, nodeName=""): + """ + function: initialize variable + input : user, tmpBackupDir, backupDir, backupPara, + backupBin, logFile, nodeName + output: parameter + """ + LocalBaseOM.__init__(self, logFile, user) + self.tmpBackupDir = tmpBackupDir + self.backupDir = backupDir + self.backupPara = backupPara + self.backupBin = backupBin + self.nodeName = nodeName + self.installPath = "" + self.__hostnameFile = None + self.dbNodeInfo = None + self.clusterInfo = None + + ##static parameter + self.binTarName = "binary_%s.tar" % HOSTNAME + self.paraTarName = "parameter_%s.tar" % HOSTNAME + self.hostnameFileName = "HOSTNAME" + + ######################################################################## + # This is the main install flow. + ######################################################################## + + def run(self): + """ + function: 1.parse config file + 2.check the backup directory + 3.do the backup + 4.close log file + input : NA + output: NA + """ + try: + # parse config file + self.parseConfigFile() + # Checking backup directory + self.checkBackupDir() + # back up binary files and parameter file + self.doBackup() + except Exception as e: + self.logger.logExit(str(e)) + # close log file + self.logger.closeLog() + + def parseClusterInfoFromStaticFile(self): + """ + function: 1.init the clusterInfo + 2.get clusterInfo from static config file + input : NA + output: NA + """ + try: + self.readConfigInfo() + except Exception as e: + self.logger.debug(str(e)) + gaussHome = DefaultValue.getInstallDir(self.user) + try: + g_oldVersionModules = OldVersionModules() + if (os.path.exists( + "%s/bin/script/util/DbClusterInfo.py" % gaussHome)): + sys.path.append( + os.path.dirname("%s/bin/script/util/" % gaussHome)) + else: + sys.path.append(os.path.dirname( + "%s/bin/script/gspylib/common/" % gaussHome)) + g_oldVersionModules.oldDbClusterInfoModule = __import__( + 'DbClusterInfo') + self.clusterInfo = \ + g_oldVersionModules.oldDbClusterInfoModule.dbClusterInfo() + self.clusterInfo.initFromStaticConfig(self.user) + except Exception as e: + self.logger.debug(str(e)) + try: + self.clusterInfo = dbClusterInfo() + self.clusterInfo.initFromStaticConfig(self.user) + except Exception as e: + self.logger.logExit(str(e)) + + def parseConfigFile(self): + """ + function: 1.init the clusterInfo + 2.get clusterInfo from static config file + 3.obtain local installation path for backup + 4.obtain user and group for backup + 5.obtain local node information for backup + input : NA + output: NA + """ + self.logger.log("Parsing the configuration file.") + self.parseClusterInfoFromStaticFile() + try: + self.logger.log("Obtaining local installation path for backup.") + self.installPath = os.path.realpath(self.clusterInfo.appPath) + if (not os.path.exists(self.installPath)): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % self.installPath) + + self.logger.debug( + "Local installation path: %s." % self.installPath) + if (self.dbNodeInfo is None): + self.logger.log("Obtaining local node information for backup.") + self.dbNodeInfo = self.clusterInfo.getDbNodeByName(HOSTNAME) + self.logger.debug( + "Local node information: \n%s." % str(self.dbNodeInfo)) + except Exception as e: + raise Exception(str(e)) + + self.logger.log("Successfully parsed the configuration file.") + + def checkBackupDir(self): + """ + function: 1.mkdir the tmp backup directory + 2.check the tmp backup directory size + 3.mkdir the backup directory + input : NA + output: NA + """ + self.logger.log("Checking backup directory.") + + try: + if (not os.path.exists(self.tmpBackupDir)): + os.makedirs(self.tmpBackupDir, + DefaultValue.KEY_DIRECTORY_PERMISSION) + needSize = DefaultValue.APP_DISK_SIZE + vfs = os.statvfs(self.tmpBackupDir) + availableSize = vfs.f_bavail * vfs.f_bsize // (1024 * 1024) + # 100M for binary files and parameter files + if (availableSize < needSize): + raise Exception(ErrorCode.GAUSS_504["GAUSS_50400"] % ( + self.tmpBackupDir, str(needSize))) + except Exception as e: + + raise Exception(str(e)) + + if (self.backupDir != ""): + try: + if (not os.path.exists(self.backupDir)): + os.makedirs(self.backupDir, + DefaultValue.KEY_DIRECTORY_PERMISSION) + except Exception as e: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50208"] % self.backupDir + + " Error:\n%s" % e) + + self.logger.log("Successfully checked backup directory.") + + def doBackup(self): + """ + function: 1.back up binary files + 2.back up parameter files + input : NA + output: NA + """ + self.logger.log("Backing up files.") + + if self.backupBin: + self.logger.log("Backing up binary files.") + + try: + self.logger.debug( + "Installation path is %s." % self.installPath) + if (len(os.listdir(self.installPath)) == 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50203"] % ( + "installation path [%s]" % self.installPath)) + self.__tarDir(self.installPath, self.binTarName) + except Exception as e: + raise Exception(str(e)) + + self.logger.log("Successfully backed up binary files.") + + if self.backupPara: + self.logger.log("Backing up parameter files.") + + try: + self.logger.debug( + "Creating temporary directory for all parameter files.") + temp_dir = os.path.join(self.tmpBackupDir, + "parameter_%s" % HOSTNAME) + self.logger.debug("Temporary directory path: %s." % temp_dir) + if (os.path.exists(temp_dir)): + file_list = os.listdir(temp_dir) + if (len(file_list) != 0): + self.logger.debug( + "The temporary directory " + "is not empty.\n%s\nRemove all files silently." + % file_list) + g_file.cleanDirectoryContent(temp_dir) + else: + os.makedirs(temp_dir, + DefaultValue.KEY_DIRECTORY_PERMISSION) + + self.logger.debug("Creating hostname file.") + hostnameFile = os.path.join(temp_dir, self.hostnameFileName) + self.logger.debug( + "Register hostname file path: %s." % hostnameFile) + g_file.createFileInSafeMode(hostnameFile) + with open(hostnameFile, "w") as self.__hostnameFile: + hostName = DefaultValue.GetHostIpOrName() + self.__hostnameFile.write("%s" % hostName) + self.logger.debug("Flush hostname file.") + self.__hostnameFile.flush() + self.__hostnameFile = None + + os.chmod(hostnameFile, DefaultValue.KEY_FILE_PERMISSION) + + self.logger.debug("Collecting parameter files.") + for inst in self.dbNodeInfo.datanodes: + self.__collectParaFilesToTempDir(inst, temp_dir) + + self.logger.debug( + "Generating parameter files to be compressed.") + self.__tarDir(temp_dir, self.paraTarName, True) + + self.logger.debug("Removing temporary directory.") + g_file.removeDirectory(temp_dir) + except Exception as e: + g_file.removeDirectory(temp_dir) + raise Exception(str(e)) + + self.logger.log("Successfully backed up parameter files.") + + self.logger.log("Successfully backed up files.") + + def __collectParaFilesToTempDir(self, inst, temp_dir): + """ + function: 1.check the instance directory + 2.get the parameter file of instance + 3.copy the parameter file to backup directory + input : inst, temp_dir + output: NA + """ + if (not os.path.exists(inst.datadir) or len( + os.listdir(inst.datadir)) == 0): + if (g_ignoreMiss): + self.logger.log( + "Data directory (%s) of instance (%s) " + "does not exist or is empty." % \ + (inst.datadir, str(inst))) + return + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50228"] % \ + (("data directory [%s] of instance [%s]") + % (inst.datadir, str(inst)))) + + paraFileList = {} + if (inst.instanceRole == DefaultValue.INSTANCE_ROLE_CMSERVER): + paraFileList[CM_SERVER_CONF] = os.path.join(inst.datadir, + CM_SERVER_CONF) + elif (inst.instanceRole == DefaultValue.INSTANCE_ROLE_CMAGENT): + paraFileList[CM_AGENT_CONF] = os.path.join(inst.datadir, + CM_AGENT_CONF) + elif (inst.instanceRole == DefaultValue.INSTANCE_ROLE_GTM): + paraFileList[GTM_CONF] = os.path.join(inst.datadir, GTM_CONF) + elif (inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR): + paraFileList[POSTGRESQL_CONF] = os.path.join(inst.datadir, + POSTGRESQL_CONF) + paraFileList[POSTGRESQL_HBA_CONF] = os.path.join( + inst.datadir, POSTGRESQL_HBA_CONF) + elif (inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE): + paraFileList[POSTGRESQL_CONF] = os.path.join( + inst.datadir, POSTGRESQL_CONF) + paraFileList[POSTGRESQL_HBA_CONF] = os.path.join( + inst.datadir, POSTGRESQL_HBA_CONF) + else: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % ( + "specified", inst.instanceRole)) + + for key in paraFileList: + if (not os.path.exists(paraFileList[key])): + self.logger.debug( + "The parameter path is: %s." % paraFileList[key]) + if (g_ignoreMiss): + self.logger.log( + "Parameter file of instance [%s] is not existed." % ( + str(inst))) + return + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % ( + 'parameter file of instance [%s]' % ( + str(inst)))) + + for key in paraFileList: + backupFileName = "%d_%s" % (inst.instanceId, key) + g_file.cpFile(paraFileList[key], + os.path.join(temp_dir, backupFileName)) + + def __tarDir(self, targetDir, tarFileName, backParameter=False): + """ + function: 1.use tar commonds compress the backup file + 2.copy the tar file to currently performing the backup + input : targetDir, tarFileName, backParameter + output: NA + """ + tarName = os.path.join(self.tmpBackupDir, tarFileName) + tarDir = targetDir.split("/")[-1] + path = os.path.realpath(os.path.join(targetDir, "..")) + cmd = g_file.SHELL_CMD_DICT["compressTarFile"] % ( + path, tarName, tarDir, DefaultValue.KEY_FILE_MODE, tarName) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50227"] % ( + "directory [%s] to [%s]" % \ + (targetDir, tarName)) + " Error: \n%s" % output) + + if self.nodeName != "": + # Only parameter backup + # send backup file which is compressed to the node + # that is currently performing the backup + if backParameter and self.nodeName != g_OSlib.getHostName(): + g_OSlib.scpFile(self.nodeName, tarName, self.tmpBackupDir) + + +############################################################################## +# Help context. U:R:oC:v: +############################################################################## +def usage(): + """ + function: usage + input : NA + output : NA + """ + print( + "Backup.py is a local utility to backup binary file " + "and parameter file.") + print(" ") + print("Usage:") + print("python3 Backup.py --help") + print(" ") + print("Common options:") + print(" -U the user of cluster.") + print(" -P, --position=TEMPBACKUPPATH the temp backup directory.") + print(" -B, --backupdir=BACKUPPATH the backup directory.") + print(" -p, --parameter backup parameter files.") + print(" -b, --binary_file backup binary files.") + print(" -i, --ingore_miss ignore Backup entity miss.") + print( + " --nodeName=HOSTNAME the node that is " + "currently performing the backup.") + print(" -l, --logpath=LOGPATH the log directory.") + print(" -h, --help show this help, then exit.") + print(" ") + + +def checkUserParameter(): + """ + function: check user parameter + input : NA + output: NA + """ + if (g_clusterUser == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + + +def checkLogFile(logFile): + """ + function: check log file + input : NA + output: NA + """ + if (logFile == ""): + logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + g_clusterUser, "", "") + if (not os.path.isabs(logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + +def checkBackupPara(backupPara, backupBin): + """ + function: check -P and -b parameter + input : NA + output: NA + """ + if not backupPara and not backupBin: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'P or -b' + ".") + + +def checkTmpBackupDir(tmpBackupDir): + """ + function: check tmp backup directory + input : NA + output: NA + """ + if (tmpBackupDir == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'P' + ".") + + +def main(): + """ + function: main function + 1.parse command line + 2.check if user exist and is the right user + 3.check log file + 4.check backupPara and backupBin + 5.check tmpBackupDir + 6.do backup + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:P:B:l:pbhi", + ["position=", "backupdir=", \ + "nodeName=", "parameter", "binary_file", + "logpath=", "help", "ingore_miss"]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + if (len(args) > 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + global g_clusterUser + global g_ignoreMiss + tmpBackupDir = "" + backupDir = "" + backupPara = False + backupBin = False + logFile = "" + nodeName = "" + for key, value in opts: + if (key == "-h" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-U"): + g_clusterUser = value.strip() + elif (key == "-P" or key == "--position"): + tmpBackupDir = value.strip() + elif (key == "-B" or key == "--backupdir"): + backupDir = value.strip() + elif (key == "-p" or key == "--parameter"): + backupPara = True + elif (key == "-b" or key == "--binary_file"): + backupBin = True + elif (key == "-i" or key == "--ingore_miss"): + g_ignoreMiss = True + elif (key == "-l" or key == "--logpath"): + logFile = value.strip() + elif (key == "--nodeName"): + nodeName = value.strip() + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % value) + + Parameter.checkParaVaild(key, value) + + if (g_ignoreMiss): + gaussHome = DefaultValue.getEnv("GAUSSHOME") + if not gaussHome: + return + + # check if user exist and is the right user + checkUserParameter() + DefaultValue.checkUser(g_clusterUser, False) + # check log file + checkLogFile(logFile) + # check backupPara and backupBin + checkBackupPara(backupPara, backupBin) + # check tmpBackupDir + checkTmpBackupDir(tmpBackupDir) + try: + LocalBackuper = LocalBackup(logFile, g_clusterUser, tmpBackupDir, + backupDir, backupPara, backupBin, nodeName) + LocalBackuper.run() + except Exception as e: + GaussLog.exitWithError(str(e)) + + +if __name__ == '__main__': + """ + function: main function + input : NA + output: NA + """ + main() + sys.exit(0) diff --git a/script/local/CheckConfig.py b/script/local/CheckConfig.py new file mode 100644 index 0000000..ad2a478 --- /dev/null +++ b/script/local/CheckConfig.py @@ -0,0 +1,397 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CheckConfig.py is a local utility to +# execute some functions about init instance +############################################################################# +import subprocess +import getopt +import sys +import os + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file +from gspylib.common.VersionInfo import VersionInfo + +############################################################################# +# Global variables +# TIME_OUT: set time out +# self.logger: globle logger +# g_clusterUser: global user information +############################################################################# +TIME_OUT = 2 + + +class CheckNodeEnv(LocalBaseOM): + """ + function: Init all instance on local node + input : NA + output: NA + """ + + def __init__(self, logFile, clusterUser, dataParams, instIds): + """ + function: init function + input : logFile, clusterUser, dataParams, instIds + output: NA + """ + LocalBaseOM.__init__(self, logFile, clusterUser) + self.__dataGucParams = dataParams[:] + self.__instanceIds = instIds[:] # if is empty, check all instances + self.clusterInfo = None + self.dbNodeInfo = None + self.__diskSizeInfo = {} + self.__pgsqlFiles = [] + + def run(self): + """ + function: Init instance on local node: + 1.Check GaussDB Log directory + 2.Check pgsql directory + 3.Check instances config on local node + 4.Set manual start + 5.Set linux cron + input : NA + output: NA + """ + self.__checkParameters() + self.readConfigInfo() + self.logger.debug("Instance information on local node:\n%s." + % str(self.dbNodeInfo)) + self.initComponent() + # Check GaussDB Log directory + self.__checkGaussLogDir() + # Check pgsql directory + self.__checkPgsqlDir() + # Check instances config on local node + self.__checkNodeConfig() + self.logger.log("Checked the configuration file on node[%s]" + " successfully." % DefaultValue.GetHostIpOrName()) + + def __checkParameters(self): + """ + function: Check parameters for instance config: + 1.Check parameter for configuring CNs + 2.Check parameter for configuring DNs + input : NA + output: NA + """ + self.logger.log("Checking parameters for configuring DNs.") + for param in self.__dataGucParams: + if self.__checkconfigParams(param.strip()) != 0: + self.logger.logExit(ErrorCode.GAUSS_500["GAUSS_50000"] + % param) + + def __checkconfigParams(self, param): + """ + function: Check parameter for postgresql.conf, + port : this is calculated automatically + input : param + output: 0/1 + """ + configInvalidArgs = ["port"] + + argList = param.split("=") + for arg in configInvalidArgs: + if (arg in argList): + return 1 + + return 0 + + def __checkGaussLogDir(self): + """ + function: Check GaussDB Log directory: + 1.check user base log directory + 2.create instance log directory + 3.change directory mode + input : NA + output: NA + """ + # check user base log dir + user_dir = DefaultValue.getUserLogDirWithUser(self.user) + self.logger.log("Checking %s log directory[%s]." + % (VersionInfo.PRODUCT_NAME, user_dir)) + if (not os.path.exists(user_dir)): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % ('user base log directory [%s]' % user_dir)) + ##make gs_profile dir + user_profile_dir = os.path.join(user_dir, "gs_profile") + self.__makeDirForDBUser(user_profile_dir, "user_profile_dir") + + ##make pg_log dir and pg_audit dir + user_pg_log_dir = os.path.join(user_dir, "pg_log") + self.__makeDirForDBUser(user_pg_log_dir, "user_pg_log_dir") + + user_pg_audit_dir = os.path.join(user_dir, "pg_audit") + self.__makeDirForDBUser(user_pg_audit_dir, "user_pg_audit_dir") + + ##make bin log dir + user_bin_dir = os.path.join(user_dir, "bin") + self.__makeDirForDBUser(user_bin_dir, "user_bin_dir") + + for inst in self.dbNodeInfo.datanodes: + log_dir_name = "dn_%d" % (inst.instanceId) + log_dir = os.path.join(user_pg_log_dir, log_dir_name) + audit_dir = os.path.join(user_pg_audit_dir, log_dir_name) + self.__makeDirForDBUser(log_dir, "user_pg_log_%s_dir" + % log_dir_name) + self.__makeDirForDBUser(audit_dir, "user_pg_audit_%s_dir" + % log_dir_name) + + try: + self.logger.debug("Command to find directory in directory[%s] " + % user_dir) + # change directory mode + ClusterCommand.getchangeDirModeCmd(user_dir) + self.logger.debug("Command to find file in directory[%s] " + % user_dir) + # change log file mode + ClusterCommand.getchangeFileModeCmd(user_dir) + self.logger.debug("Command to change the obs log setting.") + # change the obs log setting file distribute package + self.changeObsLogSetting() + except Exception as e: + self.logger.logExit(str(e)) + + def changeObsLogSetting(self): + """ + function: change the obs log setting file distribute package + input : NA + output: NA + """ + obspathNum = self.clusterInfo.appPath.count("/") + """ + obs path is the relative path between log path and app path. + if app path is /test/app and log path is /test/log + then the relative path from app to log is '..'+'/..'*(num-1)+logpath + the relative path from obs to log is '../../..'+'/..'*(num-1)+logpath + """ + username = DefaultValue.getEnv("LOGNAME") + DefaultValue.checkPathVaild(username) + obspath = "LogPath=../.." + "/.." * obspathNum + "%s/" \ + % self.clusterInfo.logPath + "%s" % username + "/bin/gs_obs" + cmd = "mkdir -p '%s/%s/bin/gs_obs' -m %s" \ + % (self.clusterInfo.logPath, username, + DefaultValue.KEY_DIRECTORY_MODE) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50208"] % "obs log" + + " Error: \n%s " % output) + obsLogName = "gs_obs" + obsinifile = "%s/lib/OBS.ini" % self.clusterInfo.appPath + + if not os.path.exists(obsinifile): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % obsinifile) + try: + with open(obsinifile, 'r') as fp: + lines = fp.readlines() + flen = len(lines) - 1 + for i in range(flen): + if "sdkname=eSDK-OBS-API-Linux-C" in lines[i]: + lines[i] = lines[i].replace("sdkname=eSDK-OBS-API-Linux-C", + "sdkname=gs_obs") + if "LogPath=../logs" in lines[i]: + lines[i] = lines[i].replace("LogPath=../logs", obspath) + with open(obsinifile, 'w') as fpw: + fpw.writelines(lines) + except Exception as e: + self.logger.logExit(str(e)) + + def __makeDirForDBUser(self, path, desc): + """ + function: Create a dir for DBUser: + 1.create a dir for DB user + 2.Check if target directory is writeable for user + input : path, desc + output: NA + """ + self.logger.debug("Making %s directory[%s] for database node user." + % (desc, path)) + g_file.createDirectory(path) + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, path) + if (not g_file.checkDirWriteable(path)): + self.logger.logExit(ErrorCode.GAUSS_501["GAUSS_50102"] + % (path, self.user)) + + def __checkPgsqlDir(self): + """ + function: 1.Check pgsql directory + 2.change permission + 3.Check if target directory is writeable for user + input : NA + output: NA + """ + tmpDir = DefaultValue.getTmpDirFromEnv() + self.logger.log("Checking directory [%s]." % tmpDir) + if (not os.path.exists(tmpDir)): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % tmpDir + " Please create it first.") + + self.__pgsqlFiles = os.listdir(tmpDir) + + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, tmpDir) + if (not g_file.checkDirWriteable(tmpDir)): + self.logger.logExit(ErrorCode.GAUSS_501["GAUSS_50102"] + % (tmpDir, self.user)) + + def checkDNConfig(self): + """ + function: Check DN configuration + input : NA + output: NA + """ + for dnInst in self.dbNodeInfo.datanodes: + if (len(self.__instanceIds) != 0 and + dnInst.instanceId not in self.__instanceIds): + continue + self.__checkDataDir(dnInst.datadir) + if (len(dnInst.ssdDir) != 0): + self.__checkDataDir(dnInst.ssdDir) + + def __checkNodeConfig(self): + """ + function: Check instances config on local node + input : NA + output: NA + """ + + self.logger.log("Checking database node configuration.") + self.checkDNConfig() + + def __checkDataDir(self, datadir, checkEmpty=True, checkSize=True): + """ + function: Check if directory exists and disk size lefted + input : datadir, checkEmpty, checkSize + output: NA + """ + self.logger.log("Checking directory [%s]." % datadir) + + # Check and create directory + ownerPath = datadir + if (os.path.exists(datadir)): + if (checkEmpty): + fileList = os.listdir(datadir) + # full_upgrade_bak is backup path for datapath and install path + # we should skip it + if ("full_upgrade_bak" in fileList): + fileList.remove("full_upgrade_bak") + if ("pg_location" in fileList): + fileList.remove("pg_location") + if (len(fileList) != 0): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50202"] + % datadir) + else: + while True: + (ownerPath, dirName) = os.path.split(ownerPath) + if (os.path.exists(ownerPath) or dirName == ""): + ownerPath = os.path.join(ownerPath, dirName) + os.makedirs(datadir, + DefaultValue.KEY_DIRECTORY_PERMISSION) + break + + # Check if data directory is writeable + if (not g_file.checkDirWriteable(datadir)): + self.logger.logExit(ErrorCode.GAUSS_501["GAUSS_50102"] + % (datadir, self.user)) + + if (checkSize): + self.__diskSizeInfo = DefaultValue.checkDirSize( + datadir, DefaultValue.INSTANCE_DISK_SIZE, self.logger) + + +def usage(): + """ +Usage: + python3 CheckConfig.py -h | --help + python3 CheckConfig.py -U user + [-i instId [...]] + [-C "PARAMETER=VALUE" [...]] + [-D "PARAMETER=VALUE" [...]] + [-l logfile] + """ + + print(usage.__doc__) + + +def main(): + """ + function: main function + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:C:D:i:l:h", ["help"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + logFile = "" + dataParams = [] + instanceIds = [] + + for (key, value) in opts: + if (key == "-h" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-U"): + clusterUser = value + elif (key == "-D"): + dataParams.append(value) + elif (key == "-l"): + logFile = os.path.realpath(value) + elif (key == "-i"): + if (value.isdigit()): + instanceIds.append(int(value)) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "i") + + Parameter.checkParaVaild(key, value) + + # check if user exist and is the right user + DefaultValue.checkUser(clusterUser) + + # check log dir + if (logFile == ""): + logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + clusterUser, "", "") + + try: + checker = CheckNodeEnv(logFile, clusterUser, + dataParams, instanceIds) + checker.run() + + sys.exit(0) + except Exception as e: + GaussLog.exitWithError(str(e)) + + +if __name__ == '__main__': + main() diff --git a/script/local/CheckInstall.py b/script/local/CheckInstall.py new file mode 100644 index 0000000..4beed19 --- /dev/null +++ b/script/local/CheckInstall.py @@ -0,0 +1,640 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CheckInstall.py is a utility to install Gauss MPP Database. +############################################################################# +import getopt +import os +import sys +import platform +import math +import pwd + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from gspylib.threads.parallelTool import parallelTool + +OTHER_FLAG = "0" +PREINSTALL_FLAG = "1" +INSTALL_FLAG = "2" +g_clusterInfo = None +TIME_OUT = 2 +RETRY_TIMES = 3 + +######################################################################## +# Global variables define +######################################################################## +g_opts = None + + +######################################################################## +class CmdOptions(): + """ + Class: cmdOptions + """ + + def __init__(self): + """ + function: Constructor + input : NA + output: NA + """ + self.installPath = "" + self.user = "" + self.group = "" + self.userProfile = "" + self.mpprcFile = "" + self.clusterConfig = "" + self.logFile = "" + self.userInfo = "" + + # DB config parameters + self.confParameters = [] + self.platformString = platform.system() + self.logger = None + + +####################################################################### +# Help context. U:R:oC:v: +######################################################################## +def usage(): + """ +python3 checkInstall.py is a utility to check Gauss MPP Database install +env. +Usage: + python3 checkInstall.py --help + python3 checkInstall.py -U user:group -R installpath [--replace] + +Common options: + -U the database program and cluster owner + -R the database program path + -C configure the database configuration file, for more detail + information see postgresql.conf + --replace do check install for replace + --help show this help, then exit + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + parse command line + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:R:C:l:X:", ["help"]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + if (len(args) > 0): + usage() + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + parameter_map = {"-U": g_opts.userInfo, "-R": g_opts.installPath, + "-l": g_opts.logFile, "-X": g_opts.clusterConfig} + parameter_keys = parameter_map.keys() + for key, value in opts: + if (key == "--help"): + usage() + sys.exit(0) + elif (key in parameter_keys): + parameter_map[key] = value + elif (key == "-C"): + g_opts.confParameters.append(value) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % value) + Parameter.checkParaVaild(key, value) + + g_opts.userInfo = parameter_map["-U"] + g_opts.installPath = parameter_map["-R"] + if os.path.islink(g_opts.installPath) or not os.path.exists( + g_opts.installPath): + versionFile = VersionInfo.get_version_file() + commitid = VersionInfo.get_version_info(versionFile)[2] + g_opts.installPath = g_opts.installPath + "_" + commitid + g_opts.logFile = parameter_map["-l"] + g_opts.clusterConfig = parameter_map["-X"] + + +def checkParameter(): + """ + function: 1.check input parameters + 2.check user info + 3.check os user + 4.check log file info + 5.check mpprc file path + 6.check configFile + 7.check install path + input : NA + output: NA + """ + # check user info + checkUser(g_opts.userInfo) + + # check mpprc file path + g_opts.mpprcFile = DefaultValue.getMpprcFile() + checkOSUser() + + # check log file info + checkLogFile(g_opts.logFile) + + # check configFile + checkXMLFile() + + # check install path + checkInstallPath() + + +def checkUser(userInfo): + """ + function: check user + input : userInfo + output: NA + """ + if (userInfo == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + + strList = userInfo.split(":") + if (len(strList) != 2): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50009"]) + g_opts.user = strList[0].strip() + g_opts.group = strList[1].strip() + if (g_opts.user == "" or g_opts.group == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % "U") + + +def checkOSUser(): + """ + function: 1.use linux commands 'id -gn' get the user's group + 2.check the user's group match with the input group + 3.get user's env file + input : NA + output: NA + """ + try: + group = g_OSlib.getGroupByUser(g_opts.user) + except Exception as e: + GaussLog.exitWithError(str(e)) + if (group != g_opts.group): + GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50305"]) + + # get user env file + g_opts.userProfile = g_opts.mpprcFile + + +def checkLogFile(logFile): + """ + function: check log file + input : logFile + output: NA + """ + if (logFile == ""): + logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + g_opts.user, "", "") + g_opts.logger = GaussLog(logFile, "CheckInstall") + + +def checkXMLFile(): + """ + function: check configuration file + 1.check -X parameter + 2.check configuration file exists + 3.check configuration file an absolute path + input : NA + output: NA + """ + if (g_opts.clusterConfig != ""): + if (not os.path.exists(g_opts.clusterConfig)): + g_opts.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50201"] % g_opts.clusterConfig) + if (not os.path.isabs(g_opts.clusterConfig)): + g_opts.logger.logExit( + ErrorCode.GAUSS_512["GAUSS_51206"] % g_opts.clusterConfig) + + +def checkPath(path_type_in): + """ + function: Check the path: + the path must be composed of letters, numbers, + underscores, slashes, hyphen, and spaces + input : path_type_in + output: NA + """ + pathLen = len(path_type_in) + i = 0 + a_ascii = ord('a') + z_ascii = ord('z') + A_ascii = ord('A') + Z_ascii = ord('Z') + num0_ascii = ord('0') + num9_ascii = ord('9') + blank_ascii = ord(' ') + sep1_ascii = ord('/') + sep2_ascii = ord('_') + sep3_ascii = ord('-') + sep4_ascii = ord('.') + for i in range(0, pathLen): + char_check = ord(path_type_in[i]) + if (not ( + a_ascii <= char_check <= z_ascii + or A_ascii <= char_check <= Z_ascii + or num0_ascii <= char_check <= num9_ascii + or char_check == blank_ascii + or char_check == sep1_ascii + or char_check == sep2_ascii + or char_check == sep3_ascii + or char_check == sep4_ascii)): + return False + return True + + +def checkInstallPath(): + """ + function: check installation path + input : NA + output: NA + """ + if (g_opts.installPath == ""): + g_opts.logger.logExit(ErrorCode.GAUSS_500["GAUSS_50001"] % 'R' + ".") + g_opts.installPath = os.path.normpath(g_opts.installPath) + g_opts.installPath = os.path.realpath(g_opts.installPath) + if (not os.path.isdir(os.path.realpath(g_opts.installPath))): + g_opts.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50201"] % g_opts.installPath) + if (not checkPath(g_opts.installPath)): + g_opts.logger.logExit(ErrorCode.GAUSS_512["GAUSS_51235"] % + g_opts.installPath + " The path must be " + "composed of" + "letters, numbers," + "underscores," + "slashes, hyphen, " + "and spaces." + ) + + g_opts.logger.debug( + "Using installation program path: %s." % g_opts.installPath) + g_opts.logger.debug("Using set configuration file parameters: %s." % str( + g_opts.confParameters)) + + +def checkOldInstallStatus(): + """ + function: Check old database install. + If this user have old install, report error and exit. + input : NA + output: NA + """ + g_opts.logger.log("Checking old installation.") + # Check $GAUSS_ENV. + try: + gauss_ENV = DefaultValue.getEnvironmentParameterValue("GAUSS_ENV", + g_opts.user) + if (str(gauss_ENV) == str(INSTALL_FLAG)): + g_opts.logger.logExit(ErrorCode.GAUSS_518["GAUSS_51806"]) + except Exception as ex: + g_opts.logger.logExit(str(ex)) + g_opts.logger.log("Successfully checked old installation.") + + +def checkSHA256(): + """ + function: Check the sha256 number for database install binary file. + input : NA + output: NA + """ + g_opts.logger.log("Checking SHA256.") + try: + DefaultValue.checkPackageOS() + except Exception as e: + g_opts.logger.logExit(str(e)) + g_opts.logger.log("Successfully checked SHA256.") + + +def getFileInfo(fileName): + """ + function: + input : filename + output: file context + """ + res = g_file.readFile(fileName) + if (len(res) != 1): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % fileName) + return res[0].strip() + + +def checkOSKernel(): + """ + function: Check OS kernel parameters: share memory size and semaphore.( + postgresql.conf/gtm.conf) + 1.check shared_buffers + 2.check sem + input : NA + output: NA + """ + g_opts.logger.log("Checking kernel parameters.") + # GB MB kB + GB = 1 * 1024 * 1024 * 1024 + MB = 1 * 1024 * 1024 + kB = 1 * 1024 + shared_buffers = 1 * GB + max_connections = 800 + + for item in g_opts.confParameters: + tmp = item.strip() + listname = tmp.split("=") + try: + if (((listname[0].lower() > "shared_buffers") - ( + listname[0].lower() < "shared_buffers")) == 0): + if listname[1][0:-2].isdigit() and ( + (listname[1][-2:] > "GB") - ( + listname[1][-2:] < "GB")) == 0: + shared_buffers = int(listname[1][0:-2]) * GB + if listname[1][0:-2].isdigit() and ( + (listname[1][-2:] > "MB") - ( + listname[1][-2:] < "MB")) == 0: + shared_buffers = int(listname[1][0:-2]) * MB + if listname[1][0:-2].isdigit() and ( + (listname[1][-2:] > "kB") - ( + listname[1][-2:] < "kB")) == 0: + shared_buffers = int(listname[1][0:-2]) * kB + if listname[1][0:-1].isdigit() and ( + (listname[1][-2:] > "B") - ( + listname[1][-2:] < "B")) == 0: + shared_buffers = int(listname[1][0:-1]) + if (((listname[0].lower() > "max_connections") - ( + listname[0].lower() < "max_connections")) == 0): + if listname[1].isdigit(): + max_connections = int(listname[1]) + except ValueError as ex: + g_opts.logger.logExit(ErrorCode.GAUSS_500[ + "GAUSS_50010"] % "kernel" + + "Error:\n%s" % str( + ex)) + + # check shared_buffers + if (shared_buffers < 128 * kB): + g_opts.logger.logExit( + ErrorCode.GAUSS_504["GAUSS_50400"] % ("Shared_buffers", "128KB")) + + try: + shmaxFile = "/proc/sys/kernel/shmmax" + shmallFile = "/proc/sys/kernel/shmall" + shmmax = getFileInfo(shmaxFile) + shmall = getFileInfo(shmallFile) + PAGESIZE = g_OSlib.getSysConfiguration() + if (shared_buffers > int(shmmax)): + g_opts.logger.logExit(ErrorCode.GAUSS_505["GAUSS_50501"]) + if (shared_buffers > int(shmall) * int(PAGESIZE)): + g_opts.logger.logExit(ErrorCode.GAUSS_504["GAUSS_50401"] % ( + "Shared_buffers", "shmall*PAGESIZE")) + except ValueError as ex: + g_opts.logger.logExit(str(ex)) + + try: + semFile = "/proc/sys/kernel/sem" + semList = getFileInfo(semFile) + paramList = semList.split("\t") + if (int(paramList[0]) < 17): + g_opts.logger.logExit( + ErrorCode.GAUSS_524["GAUSS_52401"] % ("SEMMSL", "SEMMSL") + + paramList[0] + ". Please check it.") + if (int(paramList[3]) < math.ceil((max_connections + 150) // 16)): + g_opts.logger.logExit( + ErrorCode.GAUSS_524["GAUSS_52401"] % ("SEMMNI", "SEMMNI") + + paramList[3] + ". Please check it.") + if (int(paramList[1]) < math.ceil((max_connections + 150) // 16) * 17): + g_opts.logger.logExit( + ErrorCode.GAUSS_524["GAUSS_52401"] % ("SEMMNS", "SEMMNS") + + paramList[1] + ". Please check it.") + except ValueError as ex: + g_opts.logger.logExit(str(ex)) + g_opts.logger.log("Successfully checked kernel parameters.") + + +def checkInstallDir(): + """ + function: Check database program file install directory size. + The free space size should not smaller than 100M. + 1.check if install path exists + 2.check install path is empty or not + 3.check install path uasge + input : NA + output: NA + """ + g_opts.logger.log("Checking directory.") + + # check if install path exists + if (not os.path.exists(g_opts.installPath)): + g_opts.logger.logExit(ErrorCode.GAUSS_502[ + "GAUSS_50201"] % g_opts.installPath + + "\nPlease create it first.") + + # check install path is empty or not. + installFileList = os.listdir(g_opts.installPath) + for oneFile in installFileList: + if (oneFile == "full_upgrade_bak" and os.path.isdir( + "%s/%s" % (g_opts.installPath, oneFile))): + continue + elif (oneFile == "lib" and os.path.isdir( + "%s/%s" % (g_opts.installPath, oneFile))): + libList = os.listdir("%s/%s" % (g_opts.installPath, oneFile)) + if (len(libList) == 1 and libList[ + 0] == "libsimsearch" and os.path.isdir( + "%s/%s/%s" % (g_opts.installPath, oneFile, libList[0]))): + continue + else: + g_opts.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50202"] % g_opts.installPath) + elif (oneFile == "bin" and os.path.isdir( + "%s/%s" % (g_opts.installPath, oneFile))): + binFieList = os.listdir("%s/%s" % (g_opts.installPath, oneFile)) + for binFie in binFieList: + if (binFie.find("cluster_static_config") < 0): + g_opts.logger.logExit(ErrorCode.GAUSS_502[ + "GAUSS_50202"] % + g_opts.installPath) + elif (oneFile == "secbox" and os.path.isdir( + "%s/%s" % (g_opts.installPath, oneFile))): + continue + else: + g_opts.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50202"] % g_opts.installPath) + + # check install path uasge + vfs = os.statvfs(g_opts.installPath) + availableSize = vfs.f_bavail * vfs.f_bsize // (1024 * 1024) + + g_opts.logger.log( + "Database program installation path available size %sM." % str( + availableSize)) + if (availableSize < 100): + g_opts.logger.logExit( + ErrorCode.GAUSS_504["GAUSS_50400"] % (g_opts.installPath, "100M")) + + g_opts.logger.log("Successfully checked directory.") + + +class CheckInstall(LocalBaseOM): + """ + Class: check install + """ + + def __init__(self, logFile, user, clusterConf, dwsMode=False): + """ + function: Constructor + input : logFile, user, clusterConf, dwsMode + output: NA + """ + LocalBaseOM.__init__(self, logFile, user, clusterConf, dwsMode) + if (self.clusterConfig == ""): + # Read config from static config file + self.readConfigInfo() + else: + self.clusterInfo = dbClusterInfo() + self.clusterInfo.initFromXml(self.clusterConfig) + hostName = DefaultValue.GetHostIpOrName() + self.dbNodeInfo = self.clusterInfo.getDbNodeByName(hostName) + if (self.dbNodeInfo is None): + self.logger.logExit( + ErrorCode.GAUSS_516["GAUSS_51619"] % hostName) + # get user info + self.getUserInfo() + if (user != "" and self.user != user.strip()): + self.logger.debug("User parameter : %s." % user) + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50315"] % ( + self.user, self.clusterInfo.appPath)) + # init every component + self.initComponent() + + def checkPortAndIp(self): + """ + function: Check instance port and IP + input : NA + output: NA + """ + self.logger.log("Checking instance port and IP.") + components = self.etcdCons + self.cmCons + self.gtmCons \ + + self.cnCons + self.dnCons + try: + # config instance in paralle + parallelTool.parallelExecute(self.checkIpAndPort, components) + except Exception as e: + self.logger.logExit(str(e)) + self.logger.log("Successfully checked instance port and IP.") + + def checkIpAndPort(self, component): + """ + function: Check instance port and IP for per component + input : component + output: NA + """ + component.perCheck() + + def checkPreEnv(self): + """ + function: Check if LD path and path in preinstall had been changed. + input : NA + output: NA + """ + g_opts.logger.log("Checking preinstall enviroment value.") + # Check $GAUSS_ENV. + try: + # get mpp file by env parameter MPPDB_ENV_SEPARATE_PATH + mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + if (mpprcFile != "" and mpprcFile is not None): + userProfile = mpprcFile + if (not os.path.isabs(userProfile)): + raise Exception( + ErrorCode.GAUSS_512["GAUSS_51206"] % userProfile) + if (not os.path.exists(userProfile)): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % userProfile) + else: + userpath = pwd.getpwnam(self.user).pw_dir + userProfile = os.path.join(userpath, ".bashrc") + reEnvList = g_file.readFile(userProfile) + checkList = [ + "export PATH=$GPHOME/script/gspylib/pssh/bin:$GPHOME/script" + ":$PATH", + "export LD_LIBRARY_PATH=$GPHOME/lib:$LD_LIBRARY_PATH"] + for check in checkList: + if (check not in reEnvList and ( + check + '\n') not in reEnvList): + self.logger.logExit( + ErrorCode.GAUSS_518["GAUSS_51802"] % check) + except Exception as e: + g_opts.logger.logExit(str(e)) + g_opts.logger.log("Successfully checked preinstall enviroment value.") + + +if __name__ == '__main__': + ################################################################### + # check install + ################################################################### + """ + function: Check all kinds of environment. It includes: + 1. Input parameters. + 2. OS version . + 3. If it has a old install. + 4. OS kernel parameters. + 5. Install directory size and stauts. + 6. Security. + 7. Binary file integrity verify. + input : NA + output: NA + """ + try: + # parse and check input parameters + parseCommandLine() + checkParameter() + # Check whether the old database installed + checkOldInstallStatus() + # Check the sha256 number for database install binary file + checkSHA256() + + if (g_opts.platformString == "Linux"): + # Check OS kernel parameters: share memory size + # and semaphore.(postgresql.conf/gtm.conf) + checkOSKernel() + # Check database program file install directory size + checkInstallDir() + + if (g_opts.clusterConfig != ""): + # Check instance port and IP + checker = CheckInstall(g_opts.logFile, g_opts.user, + g_opts.clusterConfig) + checker.checkPortAndIp() + checker.checkPreEnv() + + g_opts.logger.closeLog() + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/CheckPreInstall.py b/script/local/CheckPreInstall.py new file mode 100644 index 0000000..f75bd35 --- /dev/null +++ b/script/local/CheckPreInstall.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CheckPreInstall.py is a utility to check whether the +# PreInstall has been done or not. +############################################################################# +import subprocess +import getopt +import sys +import os +import pwd + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode + +PREINSTALL_FLAG = "1" +INSTALL_FLAG = "2" + + +def usage(): + """ +Usage: + python3 CheckPreInstall.py -h|--help + python3 CheckPreInstall.py -U user + """ + print(usage.__doc__) + + +def main(): + """ + function: main function: + 1.parse parameter + 2.check $GAUSS_ENV + input : NA + output: NA + """ + try: + (opts, args) = getopt.getopt(sys.argv[1:], "U:h:t:", ["help"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + DBUser = "" + checkInstall = "preinstall" + for (key, value) in opts: + if (key == "-h" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-U"): + DBUser = value + elif (key == "-t"): + checkInstall = value + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % key) + # check para vaild + Parameter.checkParaVaild(key, value) + + # check user + if (DBUser == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + + try: + execUser = pwd.getpwuid(os.getuid()).pw_name + if (execUser != DBUser): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % "U") + # Check if user exists and if is the right user + DefaultValue.checkUser(DBUser, False) + except Exception as e: + GaussLog.exitWithError(str(e)) + # check if have done preinstall for this user + cmd = "echo $GAUSS_ENV 2>/dev/null" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_518["GAUSS_51802"] % "GAUSS_ENV") + if checkInstall == "preinstall": + if ( + output.strip() == PREINSTALL_FLAG or output.strip() == + INSTALL_FLAG): + GaussLog.printMessage("Successfully checked GAUSS_ENV.") + sys.exit(0) + else: + GaussLog.exitWithError( + ErrorCode.GAUSS_518["GAUSS_51805"] % "GAUSS_ENV") + elif checkInstall == "install" and output.strip() == INSTALL_FLAG: + GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51806"]) + + +if __name__ == '__main__': + try: + main() + except Exception as e: + GaussLog.exitWithError(str(e)) diff --git a/script/local/CheckUninstall.py b/script/local/CheckUninstall.py new file mode 100644 index 0000000..97033ec --- /dev/null +++ b/script/local/CheckUninstall.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CheckUninstall.py is a utility to check the +# instance status on local node. +############################################################################# + +import getopt +import os +import sys +import platform +import subprocess + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.ErrorCode import ErrorCode + + +class CheckUninstall: + ''' + classdocs + ''' + + def __init__(self): + ''' + Constructor + ''' + self.installPath = "" + self.user = "" + self.cleanUser = False + self.cleanData = False + self.logger = None + + ########################################################################## + # Help context. U:R:oC:v: + ########################################################################## + def usage(self): + """ + function: usage + input : NA + output : NA + """ + print("CheckUninstall.py is a utility to check Gauss MPP Database" + " status .") + print(" ") + print("Usage:") + print(" python3 CheckUninstall.py --help") + print(" python3 CheckUninstall.py -R installpath -U user [-d] [-u]" + " [-l log]") + print(" ") + print("Common options:") + print(" -U the database program and cluster owner") + print(" -R the database program path") + print(" -d clean data path") + print(" -u clean user") + print(" -l log directory") + print(" --help show this help, then exit") + print(" ") + + ########################################################################## + # check uninstall + ########################################################################## + def checkUninstall(self): + """ + function: + Check all kinds of environment. It includes: + 1. Input parameters. + 2. OS version. + 3. User Info + 4. If it has a old install. + input : NA + output: NA + """ + self.__checkParameters() + self.__checkOSVersion() + self.__checkOsUser() + self.__checkInstanllPath() + self.logger.closeLog() + + def __checkParameters(self): + """ + function: check input parameters + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:R:l:du", ["help"]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + logFile = "" + for key, value in opts: + if (key == "-U"): + self.user = value + elif (key == "-R"): + self.installPath = value + elif (key == "-l"): + logFile = value + elif (key == "-d"): + self.cleanData = True + elif (key == "-u"): + self.cleanUser = True + elif (key == "--help"): + self.usage() + sys.exit(0) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % key) + + Parameter.checkParaVaild(key, value) + + if (self.user == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'U' + ".") + + if (self.installPath == ""): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % 'R' + ".") + + if (logFile == ""): + logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + "", self.installPath, "") + + self.logger = GaussLog(logFile, "CheckUninstall") + self.logger.debug("The installation path of program: " + + self.installPath) + self.logger.debug("The parameter of clean user is: %s." + % self.cleanUser) + self.logger.debug("The parameter of clean data is: %s." + % self.cleanData) + + def __checkOSVersion(self): + """ + function: Check operator system version, install binary file version. + input : NA + output: NA + """ + self.logger.log("Checking OS version.") + try: + if (not DefaultValue.checkOsVersion()): + raise Exception(ErrorCode.GAUSS_519["GAUSS_51900"] + + "The current system is: %s." + % platform.platform()) + except Exception as e: + raise Exception(str(e)) + + self.logger.log("Successfully checked OS version.") + + def __checkOsUser(self): + """ + function: Check if user exists and get $GAUSSHOME + input : NA + output: NA + """ + if not self.cleanUser: + self.logger.log("Skipping user check. ") + return + + self.logger.log("Checking OS user.") + try: + DefaultValue.checkUser(self.user, False) + except Exception as e: + raise Exception(str(e)) + + # Get GAUSSHOME + cmd = "echo $GAUSSHOME 2>/dev/null" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] + % "$GAUSSHOME" + " Error:\n%s" % output) + + gaussHome = output.strip() + if (gaussHome == ""): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") + + if (gaussHome != self.installPath): + self.logger.debug("$GAUSSHOME: %s." % gaussHome) + self.logger.debug("Installation path parameter: %s." + % self.installPath) + raise Exception(ErrorCode.GAUSS_518["GAUSS_51807"]) + self.logger.log("Successfully checked OS user.") + + def __checkInstanllPath(self): + """ + function: Check if path exists and get owner + input : NA + output: NA + """ + self.logger.log("Checking installation path.") + if (not os.path.exists(self.installPath)): + self.logger.log("Installation path does not exist: %s." + % self.installPath) + if (not self.cleanData and not self.cleanUser): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] + % "installation path") + else: + # Get owner + cmd = "stat -c '%%U:%%G' %s" % self.installPath + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_503["GAUSS_50308"] + + " Error: \n%s" % str(output)) + + owerInfo = output.strip() + (user, group) = owerInfo.split(':') + if (self.user != user.strip()): + self.logger.debug("The owner information of installation" + " path: %s." % owerInfo) + self.logger.debug("User parameter : %s." % self.user) + raise Exception(ErrorCode.GAUSS_503["GAUSS_50315"] + % (self.user, self.installPath)) + self.logger.log("Successfully checked installation path.") + + +if __name__ == '__main__': + """ + main function + """ + try: + checker = CheckUninstall() + checker.checkUninstall() + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/CheckUpgrade.py b/script/local/CheckUpgrade.py new file mode 100644 index 0000000..91a777d --- /dev/null +++ b/script/local/CheckUpgrade.py @@ -0,0 +1,549 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CheckUpgrade.py is a utility to check the env before upgrade. +############################################################################# +import getopt +import sys +import os +import subprocess +import pwd +import re +import traceback + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file +import impl.upgrade.UpgradeConst as Const + +INSTANCE_TYPE_UNDEFINED = -1 +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 +DUMMY_STANDBY_INSTANCE = 2 + +############################################################################# +# Global variables +############################################################################# +g_logger = None +g_clusterInfo = None + + +class CmdOptions(): + """ + Class for defining some cmd options + """ + + def __init__(self): + self.action = "" + # the current old appPath + self.appPath = "" + self.user = "" + self.logFile = "" + self.xmlFile = "" + self.upgrade_version = "" + self.newAppPath = "" + + +class CheckUpgrade(): + """ + Class to Check application setting for upgrade + """ + + def __init__(self, appPath, action, newAppPath): + ''' + Constructor + ''' + self.appPath = appPath + self.action = action + self.newAppPath = newAppPath + + def run(self): + """ + function: Check upgrade environment + input: NA + output: NA + """ + self.__checkSHA256() + self.__checkAppPath() + self.__checkDataDir() + if self.action == Const.ACTION_INPLACE_UPGRADE: + self.__checkBackupDir() + self.__checkAppVersion() + self.__backupDbClusterInfo() + + def __checkAppPath(self): + """ + function: check app path + input: NA + output: NA + """ + if not os.path.isdir(self.appPath): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % self.appPath) + + static_config = "%s/bin/cluster_static_config" % self.appPath + if not os.path.exists(static_config): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % static_config) + if not os.path.isfile(static_config): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50210"] + % static_config) + + if not os.path.isdir(self.newAppPath): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % self.newAppPath) + if os.path.samefile(self.newAppPath, self.appPath): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50233"] + % ("install path", "$GAUSSHOME")) + + # check if the current app path is correct size, + # there should be no personal data + cmd = "du -hms %s | awk '{print $1}'" % os.path.realpath(self.appPath) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " ERROR: %s" % str(output)) + appSize = output + + cmd = "du -hms %s/lib/postgresql/pg_plugin | awk '{print $1}'" \ + % os.path.realpath(self.appPath) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " ERROR: %s" % str(output)) + pluginSize = output + if int(appSize) - int(pluginSize) > Const.MAX_APP_SIZE: + g_logger.logExit(ErrorCode.GAUSS_504["GAUSS_50401"] + % (self.appPath, "%dM" % Const.MAX_APP_SIZE) + + "\nThere may be personal data in path %s," + " please move your data to other directory" + % self.appPath) + + def __checkAppVersion(self): + """ + function: Check version + input: NA + output: NA + """ + # grey upgrade no need do this check + curVer = DefaultValue.getAppVersion(self.appPath) + if (curVer == ""): + g_logger.logExit(ErrorCode.GAUSS_516["GAUSS_51623"]) + + gaussHome = DefaultValue.getEnvironmentParameterValue("GAUSSHOME", + g_opts.user) + if not gaussHome: + g_logger.logExit(ErrorCode.GAUSS_518["GAUSS_51800"] + % "$GAUSSHOME") + + gaussdbFile = "%s/bin/gaussdb" % gaussHome + cmd = "%s --version 2>/dev/null" % (gaussdbFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n %s" % output) + + def __checkSHA256(self): + ''' + function: Check the sha256 of new version + input: NA + output: NA + ''' + try: + DefaultValue.checkPackageOS() + except Exception as e: + g_logger.logExit(str(e)) + + def __getTmpDir(self): + """ + """ + return DefaultValue.getTmpDirFromEnv() + + def __getBackupDir(self): + """ + """ + return "%s/binary_upgrade" % DefaultValue.getTmpDirFromEnv() + + def __getGaussdbVersion(self, gaussdbFile): + """ + """ + # backup gaussdb version + # get old cluster version by gaussdb + # the information of gaussdb like this: + # gaussdb Gauss200 V100R00XCXX build xxxx + # compiled at xxxx-xx-xx xx:xx:xx + if (not os.path.isfile(gaussdbFile)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % gaussdbFile) + + oldClusterVersion = "" + cmd = "%s --version" % (gaussdbFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and None is not re.compile( + r'V[0-9]{3}R[0-9]{3}C[0-9]{2}').search(str(output))): + oldClusterVersion = re.compile( + r'V[0-9]{3}R[0-9]{3}C[0-9]{2}').search(str(output)).group() + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + return oldClusterVersion + + def __backupDbClusterInfo(self): + """ + function: backup DbClusterInfo.py and cluster_static_config to temp + path + input: NA + output: NA + """ + commonStaticConfigFile = "%s/bin/cluster_static_config" \ + % g_opts.appPath + commonUpgradeVersionFile = "%s/bin/upgrade_version" % g_opts.appPath + commonDbClusterInfoModule = \ + "%s/bin/script/gspylib/common/DbClusterInfo.py" % g_opts.appPath + + bakPath = self.__getTmpDir() + + # backup DbClusterInfo.py + oldDbClusterInfoModule = "%s/OldDbClusterInfo.py" % bakPath + cmd = "cp -p '%s' '%s'" % (commonDbClusterInfoModule, + oldDbClusterInfoModule) + cmd += " && cp -rp '%s/bin/script/' '%s'" % (g_opts.appPath, bakPath) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + # backup cluster_static_config + cmd = "cp -p '%s' '%s'/" % (commonStaticConfigFile, bakPath) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + upgradeBakPath = self.__getBackupDir() + try: + # backup upgrade_version + oldUpgradeVersionFile = "%s/old_upgrade_version" % upgradeBakPath + cmd = "cp -p %s %s" % (commonUpgradeVersionFile, + oldUpgradeVersionFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + except Exception as e: + g_logger.debug("Backup failed.ERROR:%s\nClean backup path." + % str(e)) + if (os.path.isdir(upgradeBakPath)): + g_file.removeDirectory(upgradeBakPath) + + def __checkBackupDir(self): + """ + for binary upgrade, Check if backup dir exists, it may be not empty, + because in the second time, we may have + file record app dir and record node app + INPUT:NA + OUTPUT:NA + HIDEN: + 1.paths need to be baked + 2.100M + PRECONDITION: + POSTCONDITION: + 2.for binary upgrade, bak dir has been ready + TEST: + Pseudocode: + """ + binaryBakDir = "%s/binary_upgrade" % DefaultValue.getTmpDirFromEnv() + if not os.path.isdir(binaryBakDir): + os.makedirs(binaryBakDir, DefaultValue.KEY_DIRECTORY_PERMISSION) + + vfs = os.statvfs(binaryBakDir) + availableSize = vfs.f_bavail * vfs.f_bsize / (1024 * 1024) + g_logger.debug("The available size of backup directory: %d M." + % availableSize) + if(availableSize < Const.MAX_APP_SIZE): + g_logger.logExit(ErrorCode.GAUSS_504["GAUSS_50400"] + % ("BakDir", "%dM" % Const.MAX_APP_SIZE)) + + def __checkDataDir(self): + """ + function: check data directory access rights. + input: NA + output:NA + """ + g_logger.debug("Checking data directory access rights.") + + instDirs = self.getNodeDirs() + for path in instDirs: + if (not os.path.exists(path)): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + if (not g_file.checkDirWriteable(path)): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50205"] % path) + + g_logger.debug("Successfully to check data directory access rights.") + + def getNodeDirs(self, pathType=""): + """ + function: get the install path and data path of cluster + 1. collect install path + 2. collect data path + 3. collect tablespc path + 4. remove the same items + input : pathType + output : tempPaths + """ + localHost = DefaultValue.GetHostIpOrName() + dbNode = g_clusterInfo.getDbNodeByName(localHost) + if not dbNode: + g_logger.logExit(ErrorCode.GAUSS_512["GAUSS_51209"] + % ("NODE", localHost)) + newNodePaths = [] + DnInfos = [] + tablespacePaths = [] + + # collect install path + newNodePaths.append(g_clusterInfo.appPath) + + for instance in dbNode.datanodes: + newNodePaths.append(instance.datadir) + if (instance.instanceType != DUMMY_STANDBY_INSTANCE): + DnInfos.append(instance.datadir) + if ('ssdDir' in dir(instance)): + if (len(instance.ssdDir) != 0): + newNodePaths.append(instance.ssdDir) + + # collect tablespc path + for instanceDir in DnInfos: + if (not os.path.exists("%s/pg_tblspc" % instanceDir)): + g_logger.debug("%s/pg_tblspc does not exist." % instanceDir) + continue + fileList = os.listdir("%s/pg_tblspc" % instanceDir) + if (len(fileList)): + for filename in fileList: + if (os.path.islink("%s/pg_tblspc/%s" + % (instanceDir, filename))): + linkDir = os.readlink("%s/pg_tblspc/%s" + % (instanceDir, filename)) + if (os.path.isdir(linkDir)): + tablespacePaths.append(linkDir) + else: + g_logger.debug("%s is not a link directory." + % filename) + else: + g_logger.debug("%s is not a link file." % filename) + else: + g_logger.debug("%s/pg_tblspc is empty." % instanceDir) + + if (pathType == "tablespace"): + tempPaths = tablespacePaths + else: + tempPaths = newNodePaths + tablespacePaths + + # remove the same items + tempPaths = list(set(tempPaths)) + + return tempPaths + + +def usage(): + """ +Usage: + python3 CheckUpgrade.py -t action -R installpath -N newClusterInstallPath + [-U user] [-l log] +Common options: + -t the type of action + -l the path of log file + --help show this help, then exit +Options for big version upgrade check + -U the user of old cluster + -X path of the XML configuration file +Options for upgrade check + -R the install path of old cluster + -N the install path of new cluster + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: Parse command line and save to global variable + input: NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "t:R:U:v:l:X:N:", ["help"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if(len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + for (key, value) in opts: + if (key == "--help"): + usage() + sys.exit(0) + elif (key == "-t"): + g_opts.action = value + elif (key == "-R"): + g_opts.appPath = value + elif (key == "-U"): + g_opts.user = value + elif (key == "-l"): + g_opts.logFile = os.path.realpath(value) + elif (key == "-X"): + g_opts.xmlFile = os.path.realpath(value) + elif (key == "-v"): + g_opts.upgrade_version = value + elif (key == "-N"): + g_opts.newAppPath = value + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % key) + + Parameter.checkParaVaild(key, value) + + +def checkParameter(): + """ + function: Parse command line and save to global variable + input: NA + output: NA + """ + # only check need parameter, just ignore no need parameter + if (g_opts.user == ""): + g_opts.user = pwd.getpwuid(os.getuid()).pw_name + + if (g_opts.logFile == ""): + g_opts.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, "", g_opts.appPath, "") + + if g_opts.action in [Const.ACTION_LARGE_UPGRADE, + Const.ACTION_SMALL_UPGRADE, + Const.ACTION_INPLACE_UPGRADE]: + if (g_opts.appPath == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "R" + ".") + + elif(g_opts.action == Const.ACTION_CHECK_VERSION): + if(g_opts.upgrade_version == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % "v" + ".") + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' + + " Value: %s." % g_opts.action) + + +def checkVersion(): + """ + function: check version information + input: NA + output: NA + """ + g_logger.debug("Checking version information.") + gaussHome = DefaultValue.getInstallDir(g_opts.user) + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") + localPostgresVersion = \ + DefaultValue.getAppBVersion(os.path.realpath(gaussHome)) + if (localPostgresVersion.find(g_opts.upgrade_version) > 0): + g_logger.debug("Successfully checked version information.") + else: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52935"]) + + +class OldVersionModules(): + """ + Class for providing some functions to apply old version cluster + """ + + def __init__(self): + ''' + Constructor + ''' + self.oldDbClusterInfoModule = None + self.oldDbClusterStatusModule = None + + +def initGlobalInfos(): + """ + function: init global infos + input: NA + output: NA + """ + global g_logger + global g_clusterInfo + g_logger = GaussLog(g_opts.logFile, "CheckUpgrade") + try: + if g_opts.action in [Const.ACTION_CHECK_VERSION]: + g_logger.log("No need to init cluster info under action %s" + % g_opts.action) + return + g_clusterInfo = dbClusterInfo() + if g_opts.xmlFile == "" or not os.path.exists(g_opts.xmlFile): + if g_opts.appPath == "" or not os.path.exists(g_opts.appPath): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % "R") + staticConfigFile = "%s/bin/cluster_static_config" % g_opts.appPath + g_clusterInfo.initFromStaticConfig(g_opts.user, staticConfigFile) + else: + g_clusterInfo.initFromXml(g_opts.xmlFile) + except Exception as e: + g_logger.log(traceback.format_exc()) + g_logger.logExit(str(e)) + + +if __name__ == '__main__': + """ + main function + 1. parse command + 2. check other parameter + 3. init global infos + 4. check version information + 5. Check application setting for upgrade + """ + try: + g_opts = CmdOptions() + # 1. parse command + parseCommandLine() + # 2. check other parameter + checkParameter() + # 3. init global infos + initGlobalInfos() + except Exception as e: + GaussLog.exitWithError(str(e) + traceback.format_exc()) + + try: + # 4. check version information + if(g_opts.action == Const.ACTION_CHECK_VERSION): + checkVersion() + else: + # 5. Check application setting for upgrade + g_logger.log("Checking upgraded environment.") + checker = CheckUpgrade(g_opts.appPath, g_opts.action, + g_opts.newAppPath) + checker.run() + g_logger.log("Successfully checked upgraded environment.") + g_logger.closeLog() + except Exception as e: + g_logger.log(traceback.format_exc()) + g_logger.logExit(str(e)) + + sys.exit(0) diff --git a/script/local/CleanInstance.py b/script/local/CleanInstance.py new file mode 100644 index 0000000..754a4dd --- /dev/null +++ b/script/local/CleanInstance.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CleanInstance.py is a utility to clean Gauss MPP +# Database instance. +############################################################################# +import getopt +import os +import time +import sys + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue, ClusterInstanceConfig +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.threads.parallelTool import parallelTool +from gspylib.os.gsOSlib import g_OSlib + +######################################################################## +INSTANCE_TYPE_UNDEFINED = -1 +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 +DUMMY_STANDBY_INSTANCE = 2 +TYPE_DATADIR = "data-dir" +TYPE_LOCKFILE = "lock-file" +TIME_INIERVAL = 3 + +######################################################################## +# Global variables define +######################################################################## +g_opts = None + + +######################################################################## +class CmdOptions(): + """ + class: cmdOptions + """ + + def __init__(self): + """ + function: Constructor + input : NA + output: NA + """ + self.clusterInfo = None + self.dbNodeInfo = None + self.logger = None + + self.cleanType = [] + self.Instancedirs = [] + self.tblspcdirs = [] + self.user = "" + self.group = "" + self.clusterConfig = "" + self.failedDir = "" + self.nodedirCount = 0 + self.inputDir = False + self.logFile = "" + + +############################################################################## +# Help context. +############################################################################## +def usage(): + """ +CleanInstance.py is a utility to clean Gauss MPP Database instance. + +Usage: + python3 CleanInstance.py --help + python3 CleanInstance.py -U user + [-t cleanType...] + [-D datadir...] + [-l logfile] + [-X clusterConfig] + +Common options: + --help show this help, then exit + -U the user of Gauss MPP Database + -t the content to be cleaned, can be data directory, lock-file. + -D the directory of instance to be clean. + -l the log file path + -X the path of XML configuration file + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: check input parameters + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:D:l:t:X:", ["help"]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + for key, value in opts: + if (key == "-U"): + g_opts.user = value + elif (key == "-D"): + g_opts.inputDir = True + g_opts.Instancedirs.append(os.path.normpath(value)) + elif (key == "-t"): + g_opts.cleanType.append(value) + elif (key == "-l"): + g_opts.logFile = value + elif (key == "-X"): + g_opts.clusterConfig = value + elif (key == "--help"): + usage() + sys.exit(0) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % key) + Parameter.checkParaVaild(key, value) + + +def checkParameter(): + """ + class: check parameter + """ + # check if user exist and is the right user + if (g_opts.user == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + try: + DefaultValue.checkUser(g_opts.user, False) + except Exception as e: + GaussLog.exitWithError(str(e)) + + if (os.getuid() == 0 and g_opts.clusterConfig is None): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".") + + if (len(g_opts.cleanType) == 0): + g_opts.cleanType = [TYPE_DATADIR, TYPE_LOCKFILE] + + if (g_opts.logFile == ""): + g_opts.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, g_opts.user, "", "") + + +class CleanInstance(LocalBaseOM): + """ + class: cleanInstance + """ + + def __init__(self, logFile, user, clusterConf, dwsMode=False): + """ + function: Constructor + input : logFile, user, clusterConf, dwsMode + output: NA + """ + LocalBaseOM.__init__(self, logFile, user, clusterConf, dwsMode) + if (self.clusterConfig == ""): + # Read config from static config file + self.readConfigInfo() + elif self.clusterConfig.endswith("json"): + self.readConfigInfoByJson() + else: + self.readConfigInfoByXML() + # get user info + self.getUserInfo() + if (user != "" and self.user != user.strip()): + self.logger.debug("User parameter : %s." % user) + raise Exception(ErrorCode.GAUSS_503["GAUSS_50315"] + % (self.user, self.clusterInfo.appPath)) + # init every component + self.initComponent() + + ########################################################################## + # This is the main clean instance flow. + ########################################################################## + def cleanInstance(self): + """ + function: Clean node instances. + 1.get the data dirs, tablespaces, soketfiles + 2.use theard delete the dirs or files + input : NA + output: NA + """ + self.logger.log("Cleaning instance.") + compentsList = [] + + for compent in self.dnCons: + if ((g_opts.inputDir) and + (compent.instInfo.datadir not in g_opts.Instancedirs) and + (compent.instInfo.ssdDir not in g_opts.Instancedirs)): + continue + peerInsts = self.clusterInfo.getPeerInstance(compent.instInfo) + nodename = ClusterInstanceConfig. \ + setReplConninfoForSinglePrimaryMultiStandbyCluster( + compent.instInfo, peerInsts, self.clusterInfo)[1] + comList = [] + comList.append(compent) + comList.append(nodename) + compentsList.append(comList) + + if (len(compentsList) != 0): + try: + self.logger.debug("Deleting instances.") + parallelTool.parallelExecute(self.uninstallCompent, + compentsList) + except Exception as e: + raise Exception(str(e)) + self.logger.log("Successfully cleaned instances.") + + self.logger.log("Successfully cleaned instance information.") + + def uninstallCompent(self, compentEle=None): + """ + function: uninstall compent + input: NA + output: NA + """ + if compentEle is None: + compentEle = [] + if len(compentEle) == 1: + compentEle[0].uninstall() + if len(compentEle) == 2: + compentEle[0].uninstall(compentEle[1]) + + def killProcess(self): + """ + function: kill process for cleaning instance data. + input : NA + output: NA + """ + pidList = g_OSlib.getProcess("gs_initdb") + if len(pidList) == 0: + return + self.logger.debug("Initdb process exists.") + g_OSlib.killProcessByProcName("gs_initdb") + while (True): + pidList = g_OSlib.getProcess("gs_initdb") + if len(pidList) != 0: + time.sleep(TIME_INIERVAL) + else: + self.logger.debug("Initdb process is deleted.") + break + + +if __name__ == '__main__': + ########################################################################## + # clean instance + ########################################################################## + """ + function: Do clean instance + 1.get the clusterinfo and daNodeInfo + 2.check the user and group + 3.clean the instance + input : NA + output: NA + """ + try: + # parse and check input parameters + parseCommandLine() + checkParameter() + + # Initialize globals parameters + cleanInst = CleanInstance(g_opts.logFile, g_opts.user, + g_opts.clusterConfig) + cleanInst.cleanInstance() + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/CleanOsUser.py b/script/local/CleanOsUser.py new file mode 100644 index 0000000..6579d9c --- /dev/null +++ b/script/local/CleanOsUser.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CleanOsUser.py is a utility to clean OS user. +############################################################################# +import getopt +import sys +import subprocess + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM + + +class CleanOsUser(LocalBaseOM): + ''' + This class is for cleaning os user, it will not cleaning group. + ''' + + def __init__(self): + ''' + Constructor + ''' + self.userProfile = "" + self.user = "" + self.logger = None + + ########################################################################## + # Help context. + ########################################################################## + def usage(self): + """ + function: usage + input : NA + output : NA + """ + print("CleanOsUser.py is a utility to clean OS user.") + print(" ") + print("Usage:") + print(" python3 CleanOsUser.py --help") + print(" python3 CleanOsUser.py -U user") + print(" ") + print("Common options:") + print(" -U the database program and cluster owner") + print(" --help show this help, then exit") + print(" ") + + def __checkParameters(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:l:", ["help"]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + logFile = "" + for key, value in opts: + if (key == "-U"): + self.user = value + elif (key == "-l"): + logFile = value + elif (key == "--help"): + self.usage() + sys.exit(0) + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % key) + + Parameter.checkParaVaild(key, value) + + if (self.user == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + + ".") + try: + DefaultValue.checkUser(self.user, False) + except Exception as e: + GaussLog.exitWithError(str(e)) + + if (logFile == ""): + logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + self.user, "") + + self.logger = GaussLog(logFile, "CleanOsUser") + self.logger.ignoreErr = True + + ########################################################################## + # This is the main clean OS user flow. + ########################################################################## + def cleanOsUser(self): + """ + function: Clean OS user + input : NA + output: NA + """ + self.__checkParameters() + self.logger.log("Cleaning crash OS user.") + try: + # clean semaphore + subprocess.getstatusoutput("ipcs -s|awk '/ %s /{print $2}'|" + "xargs -n1 ipcrm -s" % self.user) + + # get install path + cmd = "su - %s -c 'echo $GAUSSHOME' 2>/dev/null" % self.user + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit(ErrorCode.GAUSS_518["GAUSS_51802"] + % "$GAUSSHOME" + " Error:\n%s" % output) + gaussHome = output.strip() + if (gaussHome == ""): + self.logger.debug("$GAUSSHOME is null. This means you may " + "must clean crash install path manually.") + self.logger.debug("The installation path is %s." % gaussHome) + + # delete user + status, output = subprocess.getstatusoutput("userdel -f %s" + % self.user) + if (status != 0): + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50314"] + % self.user + " Error: \n%s" % output) + + # delete path + status, output = subprocess.getstatusoutput("rm -rf '%s'" + % gaussHome) + if (status != 0): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50209"] + % gaussHome + " Error: \n%s" % output) + + except Exception as e: + self.logger.logExit(str(e)) + self.logger.log("Successfully cleaned OS user.") + + def removeAllowUsers(self): + """ + function: Remove the specific user from 'AllowUsers' + in /etc/ssh/sshd_config + input : NA + output: NA + """ + sshd_config = "/etc/ssh/sshd_config" + try: + cmd = "cat %s | grep -E '\\'" % sshd_config + (status, output) = subprocess.getstatusoutput(cmd) + # Not found, or there is an error. + if status != 0: + if output is None or len(output.lstrip()) == 0: + self.logger.debug("No 'AllowUsers' configuration found" + " in %s" % sshd_config) + else: + # Error occurred, but there is no need to report. + self.logger.debug("Failed to get 'AllowUsers' from %s" + % sshd_config) + return + + allowUsersLineBefore = output.lstrip() + userList = allowUsersLineBefore.split() + userList.remove(self.user) + allowUsersLineRemoved = ' '.join(userList) + cmd = "sed -i 's/%s/%s/g' %s" % (allowUsersLineBefore, + allowUsersLineRemoved, + sshd_config) + (status, output) = subprocess.getstatusoutput(cmd) + # Not found, or there is an error. + if status != 0: + self.logger.debug("Failed to remove user '%s' from " + "'AllowUsers' in %s. Command: %s, Error: %s" + % (self.user, sshd_config, cmd, output)) + except Exception as e: + self.logger.debug("Failed to remove user '%s' from 'AllowUsers'" + " in %s. Error: %s" % (self.user, sshd_config, + str(e))) + + +if __name__ == '__main__': + """ + main function + """ + try: + cleaner = CleanOsUser() + cleaner.cleanOsUser() + cleaner.removeAllowUsers() + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/ConfigHba.py b/script/local/ConfigHba.py new file mode 100644 index 0000000..af412ed --- /dev/null +++ b/script/local/ConfigHba.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : ConfigHba.py is a utility to config Hba instance. +############################################################################# + +import getopt +import os +import sys + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.threads.parallelTool import parallelTool + +######################################################################## +# Global variables define +######################################################################## +g_opts = None + + +######################################################################## +class CmdOptions(): + """ + """ + + def __init__(self): + """ + """ + self.clusterUser = "" + self.ignorepgHbaMiss = False + self.clusterConf = "" + self.logFile = "" + self.removeIps = [] + self.addIps = [] + self.dws_mode = False + + +def usage(): + """ +ConfigHba.py is a utility to configure pg_hba file on all nodes. + +Usage: + python3 ConfigHba.py --help + python3 ConfigHba.py -U USER + [-X XMLFILE] [-l LOGFILE] + [-r] [--remove-ip IPADDRESS [...]] [--add-ip=IPADDRESS] + +General options: + -U Cluster user. + -X Path of the XML configuration file. + -l Path of log file. + -r the signal about ignorepgHbaMiss + --remove-ip Remove ip address from pg_hba.conf + --add-ip Add ip address to pg_hba.conf + --help Show help information for this utility, + and exit the command line mode. + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: parse command line + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:X:l:r", + ["remove-ip=", "help", "dws-mode", + "add-ip="]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + for (key, value) in opts: + if (key == "-h" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-U"): + g_opts.clusterUser = value + elif (key == "-X"): + g_opts.clusterConf = value + elif (key == "-l"): + g_opts.logFile = value + elif (key == "-r"): + g_opts.ignorepgHbaMiss = True + elif (key == "--remove-ip"): + g_opts.removeIps.append(value) + elif (key == "--dws-mode"): + g_opts.dws_mode = True + elif (key == "--add-ip"): + g_opts.addIps = value.split(',') + Parameter.checkParaVaild(key, value) + + +def checkParameter(): + """ + function: check parameter + """ + if (g_opts.clusterUser == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + + if g_opts.ignorepgHbaMiss: + gaussHome = DefaultValue.getEnv("GAUSSHOME") + if not gaussHome: + GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51802"] + % "GAUSSHOME") + staticConfigfile = "%s/bin/cluster_static_config" % gaussHome + if (not os.path.isfile(staticConfigfile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50210"] + % staticConfigfile) + + if (g_opts.clusterConf != ""): + if (not os.path.exists(g_opts.clusterConf)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] + % g_opts.clusterConf) + + if (g_opts.logFile == ""): + g_opts.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, g_opts.clusterUser, "") + + +class ConfigHba(LocalBaseOM): + """ + class: configHba + """ + + def __init__(self, logFile, user, clusterConf, dwsMode=False, + ignorepgHbaMiss=False, removeIps=None): + """ + function: configure all instance on local node + """ + LocalBaseOM.__init__(self, logFile, user, clusterConf, dwsMode) + if (self.clusterConfig == ""): + # Read config from static config file + self.readConfigInfo() + else: + self.readConfigInfoByXML() + # get user info + self.getUserInfo() + if (user != "" and self.user != user.strip()): + self.logger.debug("User parameter : %s." % user) + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50315"] + % (self.user, self.clusterInfo.appPath)) + # init every component + self.initComponent() + + self.ignorepgHbaMiss = ignorepgHbaMiss + self.allIps = [] + if removeIps is None: + removeIps = [] + self.removeIps = removeIps + + def getAllIps(self): + """ + function: get all ip info from static configuration file + input : NA + output: NA + """ + if (g_opts.addIps): + self.allIps = g_opts.addIps + return + + # get all node names + nodenames = self.clusterInfo.getClusterNodeNames() + for nodename in nodenames: + nodeinfo = self.clusterInfo.getDbNodeByName(nodename) + self.allIps += nodeinfo.backIps + self.allIps += nodeinfo.sshIps + for inst in nodeinfo.datanodes: + self.allIps += inst.haIps + self.allIps += inst.listenIps + # get all ips. Remove the duplicates ips + self.allIps = DefaultValue.Deduplication(self.allIps) + + def configHba(self): + """ + function: set hba config + input : NA + output: NA + """ + self.getAllIps() + componentList = self.dnCons + # Determine whether this node containing CN, DN instance + if (len(componentList) == 0): + return + try: + parallelTool.parallelExecute(self.__configAnInstance, + componentList) + self.logger.log("Successfully configured all instances" + " on node[%s]." % DefaultValue.GetHostIpOrName()) + except Exception as e: + raise Exception(str(e)) + + def __configAnInstance(self, component): + """ + function: set hba config for single component + input : component + output: NA + """ + # check instance data directory + if (component.instInfo.datadir == "" or + not os.path.exists(component.instInfo.datadir)): + if self.ignorepgHbaMiss: + self.logger.debug("Failed to obtain data directory of" + " the instance[%s]." + % str(component.instInfo)) + return + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] + % ("data directory of the instance[%s]" + % str(component.instInfo))) + + # check pg_hba.conf + hbaFile = "%s/pg_hba.conf" % component.instInfo.datadir + if self.ignorepgHbaMiss and not os.path.exists(hbaFile): + self.logger.debug("The %s does not exist." % hbaFile) + return + + component.setPghbaConfig(self.allIps) + if len(self.removeIps) != 0: + component.removeIpInfoOnPghbaConfig(self.removeIps) + + +if __name__ == '__main__': + """ + function: config database node instance hba.conf + 1.check dbInitParams + 2.config instance hba.conf + input : NA + output: NA + """ + try: + # parse and check input parameters + parseCommandLine() + checkParameter() + + # modify Instance + configer = ConfigHba(g_opts.logFile, g_opts.clusterUser, + g_opts.clusterConf, g_opts.dws_mode, + g_opts.ignorepgHbaMiss, g_opts.removeIps) + configer.configHba() + + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/ConfigInstance.py b/script/local/ConfigInstance.py new file mode 100644 index 0000000..3a68135 --- /dev/null +++ b/script/local/ConfigInstance.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : ConfigInstance.py is a utility to config +# CN/DN/gtm/cm_agent/cm_server instance. +############################################################################# + +import getopt +import sys +import os + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue, ClusterInstanceConfig +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.common.ErrorCode import ErrorCode +from gspylib.threads.parallelTool import parallelTool + +############################################################################# +# Global variables +# INSTANCE_TYPE_UNDEFINED: the signal about instance +# MASTER_INSTANCE: the signal about instance +# STANDBY_INSTANCE: the signal about instance +# DUMMY_STANDBY_INSTANCE: the signal about instance +############################################################################# +INSTANCE_TYPE_UNDEFINED = -1 +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 +DUMMY_STANDBY_INSTANCE = 2 +CONFIG_ITEM_TYPE = "ConfigInstance" + +CONFIG_PG_FILE = "pg_config" +CONFIG_GS_FILE = "gs_config" +CONFIG_ALL_FILE = "all" +######################################################################## +# Global variables define +######################################################################## +g_opts = None + + +######################################################################## +class CmdOptions(): + """ + class: cmdOptions + """ + + def __init__(self): + """ + function: constructor + """ + self.clusterUser = "" + self.dataGucParams = [] + self.configType = CONFIG_ALL_FILE + self.clusterStaticConfigFile = "" + self.logFile = "" + self.alarmComponent = "" + self.gucXml = False + self.vcMode = False + self.dws_mode = False + self.clusterConf = "" + + +def usage(): + """ +Usage: + python3 -h | -help + python3 ConfigInstance.py -U user + [-T config_type] + [-P cluster_static_config] + [-C "PARAMETER=VALUE" [...]] + [-D "PARAMETER=VALUE" [...]] + [-L log] + target file: pg_config, gs_config, all + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: parseCommandLine + input: NA + output: NA + """ + try: + paraLine = sys.argv[1] + paraLine = DefaultValue.encodeParaline(paraLine, + DefaultValue.BASE_DECODE) + paraLine = paraLine.strip() + paraList = paraLine.split("*==SYMBOL==*") + opts, args = getopt.getopt(paraList[1:], "U:C:D:T:P:l:hX:", + ["help", "alarm=", "gucXml", + "vc_mode", "dws-mode"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + for (key, value) in opts: + if (key == "-h" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-U"): + g_opts.clusterUser = value + elif (key == "-D"): + g_opts.dataGucParams.append(value) + elif (key == "-T"): + g_opts.configType = value + elif (key == "-P"): + g_opts.clusterStaticConfigFile = value + elif (key == "-l"): + g_opts.logFile = os.path.realpath(value) + elif (key == "--alarm"): + g_opts.alarmComponent = value + elif (key == "--gucXml"): + g_opts.gucXml = True + elif (key == "--vc_mode"): + g_opts.vcMode = True + elif (key == "--dws-mode"): + g_opts.dws_mode = True + elif key == "-X": + g_opts.clusterConf = os.path.realpath(value) + Parameter.checkParaVaild(key, value) + + +def checkParameter(): + """ + function: checkParameter + input: NA + output: NA + """ + # check if user exist and is the right user + if (g_opts.clusterUser == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + DefaultValue.checkUser(g_opts.clusterUser) + + if (g_opts.configType not in [CONFIG_ALL_FILE, + CONFIG_GS_FILE, CONFIG_PG_FILE]): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % 'T' + " Value: %s." % g_opts.configType) + + if (g_opts.logFile == ""): + g_opts.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, g_opts.clusterUser, "") + + if (g_opts.alarmComponent == ""): + g_opts.alarmComponent = DefaultValue.ALARM_COMPONENT_PATH + + +def getAlarmDict(configItemType=None, alarmComponent=None): + """ + function: Get Alarm configuration for om_monitor + input : configItemType, alarmComponent + output: NA + """ + tmpAlarmDict = {} + if (configItemType == "ConfigInstance"): + tmpAlarmDict["alarm_component"] = "%s" % alarmComponent + return tmpAlarmDict + + +class ConfigInstance(LocalBaseOM): + """ + Class: ConfigInstance + """ + + def __init__(self, logFile, user, clusterConf, dwsMode=False, + dataParams=None, confType="", + clusterStaticConfigFile="", alarmComponent=""): + """ + function: configure all instance on local node + """ + if dataParams is None: + dataParams = [] + LocalBaseOM.__init__(self, logFile, user, clusterConf, dwsMode) + if (self.clusterConfig == ""): + # Read config from static config file + self.readConfigInfo() + else: + self.readConfigInfoByXML() + # get user info + self.getUserInfo() + if (user != "" and self.user != user.strip()): + self.logger.debug("User parameter : %s." % user) + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50315"] + % (self.user, self.clusterInfo.appPath)) + # get log file info + # init every component + self.initComponent() + + self.dataGucParams = dataParams + self.configType = confType + self.clusterStaticConfigFile = clusterStaticConfigFile + self.alarmComponent = alarmComponent + self.__dataConfig = {} + + def __checkconfigParams(self, param): + """ + function: + Check parameter for postgresql.conf + port : this is calculated automatically + input : param + output: int + """ + configInvalidArgs = ["port", "alarm_component"] + # get key name and key value + # split by '=' + keyValue = param.split("=") + if (len(keyValue) != 2): + return 1 + # the type like this: "key = value" + key = keyValue[0].strip() + value = keyValue[1].strip() + if key in configInvalidArgs: + return 1 + + self.__dataConfig[key] = value + return 0 + + def __checkDNInstParameters(self): + """ + function: Check parameters for instance configuration + input : NA + output: NA + """ + # Checking parameters for configuration CN and DN. + self.logger.log("Checking parameters for configuration database node.") + + for param in self.dataGucParams: + if self.__checkconfigParams(param.strip()) != 0: + self.logger.logExit(ErrorCode.GAUSS_500["GAUSS_50000"] + % param) + + def __modifyConfig(self): + """ + function: Modify all instances on loacl node + input : NA + output: NA + """ + self.logger.log("Modifying Alarm configuration.") + tmpAlarmDict = getAlarmDict(self.configType, self.alarmComponent) + # init alarmItem.conf file + configFile = "%s/bin/alarmItem.conf" % self.clusterInfo.appPath + ClusterInstanceConfig.setConfigItem( + DefaultValue.INSTANCE_ROLE_CMAGENT, "", configFile, tmpAlarmDict) + + componentList = self.dnCons + if len(componentList) == 0: + return + try: + # config instance in paralle + parallelTool.parallelExecute(self.configInst, componentList) + except Exception as e: + self.logger.logExit(str(e)) + + def configInst(self, dbCon): + """ + function: Config the instance + input : dbCon + output: NA + """ + if dbCon.instInfo.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + # modifying database node configuration. + self.logger.log("Modifying database node configuration.") + peerInsts = self.clusterInfo.getPeerInstance(dbCon.instInfo) + azNames = self.clusterInfo.getazNames() + allConfig = {} + allConfig.update(self.__dataConfig) + dbCon.configInstance(self.user, allConfig, + peerInsts, CONFIG_ITEM_TYPE, + self.alarmComponent, azNames, + g_opts.gucXml, self.clusterInfo) + + def modifyInstance(self): + """ + Class: modifyInstance + """ + self.__checkDNInstParameters() + # modify all instances on loacl node + if self.configType in [CONFIG_PG_FILE, CONFIG_ALL_FILE]: + self.__modifyConfig() + + +if __name__ == '__main__': + ########################################################################## + # config instance + ########################################################################## + """ + function: config instance + 1.check dbInitParams + 2.modify instance + 3.genarate cert files + input : NA + output: NA + """ + try: + # parse and check input parameters + parseCommandLine() + checkParameter() + + # modify Instance + configer = ConfigInstance(g_opts.logFile, g_opts.clusterUser, + g_opts.clusterConf, + False, + g_opts.dataGucParams, g_opts.configType, + g_opts.clusterStaticConfigFile, + g_opts.alarmComponent) + configer.modifyInstance() + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/CreatePath.py b/script/local/CreatePath.py new file mode 100644 index 0000000..cca9e02 --- /dev/null +++ b/script/local/CreatePath.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CreatePath.py is a utility to create new path. +############################################################################# +import getopt +import sys +import os +import subprocess +import pwd +import grp + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue + +g_user = "" +g_newPath = "" + + +def exitWithRetCode(retCode, msg=""): + """ + exit with retcode message + """ + if (msg != ""): + print(msg) + sys.exit(retCode) + + +def usage(): + """ +Usage: + python3 CreatePath.py -U user -P newpath +Common options: + -U the user of old cluster + -P the new path need to be created + --help show this help, then exit + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: Check parameter from command line + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:P:h", ["help"]) + except Exception as e: + usage() + exitWithRetCode(1, ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + exitWithRetCode(1, ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + global g_user + global g_newPath + + for (key, value) in opts: + if (key == "--help" or key == "-h"): + usage() + exitWithRetCode(0) + elif (key == "-U"): + g_user = value + elif (key == "-P"): + g_newPath = value + else: + exitWithRetCode(1, ErrorCode.GAUSS_500["GAUSS_50000"] % key) + Parameter.checkParaVaild(key, value) + + if (g_user == ""): + exitWithRetCode(1, ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + if (g_newPath == ""): + exitWithRetCode(1, ErrorCode.GAUSS_500["GAUSS_50001"] % 'P' + ".") + if (not os.path.isabs(g_newPath)): + exitWithRetCode(1, ErrorCode.GAUSS_502["GAUSS_50213"] % g_newPath) + g_newPath = os.path.normpath(g_newPath) + + +def getTopPathNotExist(topDirPath): + """ + function: find the top path to be created + output: tmpDir + """ + tmpDir = topDirPath + while True: + # find the top path to be created + (tmpDir, topDirName) = os.path.split(tmpDir) + if (os.path.exists(tmpDir) or topDirName == ""): + tmpDir = os.path.join(tmpDir, topDirName) + break + return tmpDir + + +def createPathUnderRoot(newPath, user): + """ + create path using root user + this function only can be called by root, and user should be exist + input : newPath, user + output: NA + """ + # get group information + try: + DefaultValue.getUserId(user) + except Exception as e: + exitWithRetCode(1, str(e)) + groupInfo = grp.getgrgid(pwd.getpwnam(user).pw_gid).gr_name + + # check and create new path + ownerPath = newPath + newPathExistAlready = True + if (not os.path.exists(ownerPath)): + newPathExistAlready = False + ownerPath = getTopPathNotExist(ownerPath) + # create newPath + cmd = "(if [ ! -d '%s' ]; then mkdir -p '%s' -m %s;fi)" \ + % (newPath, newPath, DefaultValue.KEY_DIRECTORY_MODE) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + msg = "Cmd:%s\noutput:%s" % (cmd, output) + exitWithRetCode(1, msg) + # give permissions to the directory ownerPath + if not newPathExistAlready: + cmd = "chown -R %s:%s '%s' && chmod -R %s '%s'" \ + % (user, groupInfo, ownerPath, + DefaultValue.KEY_DIRECTORY_MODE, ownerPath) + else: + cmd = "chown %s:%s '%s' && chmod %s '%s'" \ + % (user, groupInfo, ownerPath, + DefaultValue.KEY_DIRECTORY_MODE, ownerPath) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + msg = "Cmd:%s\noutput:%s" % (cmd, output) + exitWithRetCode(1, msg) + + # check enter permission + cmd = "su - %s -c 'cd %s'" % (user, newPath) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + msg = "Cmd:%s\noutput:%s" % (cmd, output) + exitWithRetCode(1, msg) + + # create new path succeed, return 0 + exitWithRetCode(0, "Successfully created new path.") + + +if __name__ == '__main__': + """ + main function + """ + # check precondition + if (os.getuid() != 0): + exitWithRetCode(1, ErrorCode.GAUSS_501["GAUSS_50104"]) + try: + parseCommandLine() + createPathUnderRoot(g_newPath, g_user) + except Exception as e: + exitWithRetCode(1, str(e)) diff --git a/script/local/ExecuteSql.py b/script/local/ExecuteSql.py new file mode 100644 index 0000000..4c3e17e --- /dev/null +++ b/script/local/ExecuteSql.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : ExecuteSql.py is a utility to execute sql by using libpq. +############################################################################# + +import getopt +import sys +import os +import json +import subprocess + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue, ClusterCommand + +libpath = os.path.join(DefaultValue.getEnv("GAUSSHOME"), "lib") +sys.path.append(libpath) +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file + + +def usage(): + """ +Usage: + python3 CheckCNStatus.py -h|--help + python3 CheckCNStatus.py -p port -S sql -f outputfile -s snapid -d database + + General options: + -p cn port + -S SQL senned to be executed + -f, result output file + -s, snapid for special use + -d, database to execute the sql + -h, --help Show help information for this utility, + and exit the command line mode. + """ + print(usage.__doc__) + + +def main(): + """ + main function + """ + try: + (opts, args) = getopt.getopt(sys.argv[1:], "p:S:f:s:d:h", ["help"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + port = "" + sqlfile = "" + outputfile = "" + database = "" + for (key, value) in opts: + if (key == "-h" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-p"): + port = value + elif (key == "-S"): + sqlfile = value + elif (key == "-f"): + outputfile = value + elif (key == "-s"): + snapid = value + elif (key == "-d"): + database = value + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % key) + + Parameter.checkParaVaild(key, value) + + # check parameter + if (port == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'p' + ".") + if (sqlfile == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'S' + ".") + if (outputfile == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'f' + ".") + if (database == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'd' + ".") + try: + output = {} + exesql = "" + if os.path.exists(sqlfile): + with open(sqlfile, "r") as fp: + lines = fp.readlines() + for line in lines: + exesql += line + "\n" + (status, result, err_output) = \ + ClusterCommand.excuteSqlOnLocalhost(port, exesql, database) + cmd = "rm -rf %s" % sqlfile + if (err_output != ""): + output["status"] = status + output["error_output"] = err_output + GaussLog.exitWithError(ErrorCode.GAUSS_513["GAUSS_51300"] % exesql + + "Errors:%s" % err_output) + output["status"] = status + output["result"] = result + output["error_output"] = err_output + g_file.createFileInSafeMode(outputfile) + with open(outputfile, "w") as fp_json: + json.dump(output, fp_json) + (status, outpout) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % output) + except Exception as e: + GaussLog.exitWithError("Errors:%s" % str(e)) + + +if __name__ == '__main__': + main() diff --git a/script/local/InitInstance.py b/script/local/InitInstance.py new file mode 100644 index 0000000..8bf5e4f --- /dev/null +++ b/script/local/InitInstance.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : InitInstance.py is a utility to init instance. +############################################################################# + +import getopt +import sys +import os + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.threads.parallelTool import parallelTool + +######################################################################## +# Global variables define +######################################################################## +g_opts = None + +######################################################################## +INSTANCE_TYPE_UNDEFINED = -1 +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 +DUMMY_STANDBY_INSTANCE = 2 + + +######################################################################## +class CmdOptions(): + """ + """ + + def __init__(self): + """ + constructor + """ + self.clusterUser = "" + self.dbInitParams = [] + self.logFile = "" + self.dws_mode = False + self.vc_mode = False + + +def usage(): + """ +Usage: + python3 InitInstance.py -U user [-P "-PARAMETER VALUE" [...]] [-G + "-PARAMETER VALUE" [...]] [-l logfile] + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:P:G:l:?", + ["help", "dws_mode", "vc_mode"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + for (key, value) in opts: + if (key == "-?" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-U"): + g_opts.clusterUser = value + elif (key == "-P"): + g_opts.dbInitParams.append(value) + elif (key == "-l"): + g_opts.logFile = os.path.realpath(value) + elif (key == "--dws_mode"): + g_opts.dws_mode = True + elif (key == "--vc_mode"): + g_opts.vc_mode = True + Parameter.checkParaVaild(key, value) + + +def __checkInitdbParams(param): + """ + function : Check parameter for initdb + -D, --pgdata : this has been specified in configuration file + -W, --pwprompt: this will block the script + --pwfile: it is not safe to read password from file + -A, --auth,--auth-local,--auth-host: They will be used with '--pwfile' + -c, --enpasswd: this will confuse the default password in script + with the password user specified + -Z: this has been designated internal + -U --username: use the user specified during install step + input : String + output : Number + """ + shortInvalidArgs = ("-D", "-W", "-C", "-A", "-Z", "-U", "-X", "-s") + longInvalidArgs = ( + "--pgdata", "--pwprompt", "--enpasswd", "--pwfile", "--auth", + "--auth-host", "--auth-local", "--username", "--xlogdir", "--show") + argList = param.split() + for arg in shortInvalidArgs: + if (arg in argList): + return 1 + + argList = param.split("=") + for arg in longInvalidArgs: + if (arg in argList): + return 1 + + return 0 + + +def checkParameter(): + """ + """ + # check if user exist and is the right user + if (g_opts.clusterUser == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + try: + DefaultValue.checkUser(g_opts.clusterUser, False) + except Exception as e: + GaussLog.exitWithError(str(e)) + + if (g_opts.logFile == ""): + g_opts.logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + g_opts.clusterUser, "") + + for param in g_opts.dbInitParams: + if (__checkInitdbParams(param.strip()) != 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % param) + + +class initDbNode(LocalBaseOM): + ''' + classdocs + ''' + + def __init__(self, logFile, user, dwsMode=False, dbInitParams=None): + """ + function: init instance + input : logFile, user, clusterConf, dbInitParams + output: NA + """ + if dbInitParams is None: + dbInitParams = [] + LocalBaseOM.__init__(self, logFile, user, "", dwsMode, dbInitParams) + if self.clusterConfig == "": + # Read config from static config file + self.readConfigInfo() + else: + self.readConfigInfoByXML() + # get user info + self.getUserInfo() + if user != "" and self.user != user.strip(): + self.logger.debug("User parameter : %s." % user) + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50315"] % ( + self.user, self.clusterInfo.appPath)) + + # init every component + self.initComponent() + + def initNodeInst(self, vc_mode=False): + """ + function : Init all instance on local node + input : NA + output : NA + """ + self.logger.log("Initializing instance.") + + if not vc_mode: + components = self.etcdCons + self.cmCons + self.gtmCons\ + + self.cnCons + self.dnCons + else: + # just init dn instance + components = self.dnCons + try: + # config instance in paralle + parallelTool.parallelExecute(self.initInstance, components) + except Exception as e: + self.logger.logExit(str(e)) + + self.logger.log("Successfully init instance information.") + + def initInstance(self, component): + """ + function: Check instance port and IP for per component + input : NA + output: NA + """ + component.initInstance() + + +if __name__ == '__main__': + ########################################################################## + # init instance + ########################################################################## + """ + function: init instance + 1.check dbInitParams + 2.init instance + 3.save initdb parameters into initdbParamFile + input : NA + output: NA + """ + try: + # parse and check input parameters + parseCommandLine() + checkParameter() + + # Initialize globals parameters + # add g_opts.vc_mode parameter : + # indicates whether it is a virtual cluster mode + dbInit = initDbNode(g_opts.logFile, g_opts.clusterUser, + g_opts.dws_mode, g_opts.dbInitParams) + dbInit.initNodeInst(g_opts.vc_mode) + + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/Install.py b/script/local/Install.py new file mode 100644 index 0000000..b0f733f --- /dev/null +++ b/script/local/Install.py @@ -0,0 +1,693 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Install.py is a utility to do gs_install. +############################################################################# + +import getopt +import os +import sys +import subprocess +import traceback + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.VersionInfo import VersionInfo + +################################################################# +ACTION_INSTALL_CLUSTER = "install_cluster" +ACTION_INIT_INSTNACE = "init_instance" +ACTION_CONFIG_CLUSTER = "config_cluster" +ACTION_START_CLUSTER = "start_cluster" +ACTION_CLEAN_TEMP_FILE = "clean_temp_file" +ACTION_PREPARE_CONFIG_CLUSTER = "prepare_config_cluster" +ACTION_BUILD_STANDBY = "build_standby" +ACTION_BUILD_CASCADESTANDBY = "build_cascadestandby" +################################################################# +g_opts = None +g_timer = None + + +################################################################# + +class CmdOptions(): + """ + class: cmdOptions + """ + + def __init__(self): + """ + Constructor + """ + self.action = "" + self.installPath = "" + self.logPath = "" + self.tmpPath = "" + self.user = "" + self.group = "" + self.clusterName = "" + self.clusterConfig = "" + self.mpprcFile = "" + self.static_config_file = "" + self.installflag = False + self.logFile = "" + self.alarmComponent = "" + self.dws_mode = False + self.upgrade = False + self.productVersion = None + # License mode + self.licenseMode = None + self.time_out = None + self.logger = None + + +def usage(): + """ +Usage: + python3 --help | -? + python3 Install.py -t action -U username:groupname -X xmlfile + [--alarm=ALARMCOMPONENT] + [-l logfile] + [--dws-mode] + [-R installPath] + [-c clusterName] + [-M logPath] + [-P tmpPath] + [-f staticConfigFile] +Common options: + -t The type of action. + -U The user and group name. + -X --xmlfile = xmlfile Cluster config file. + --alarm = ALARMCOMPONENT alarm component path. + --dws-mode dws mode. + -l --log-file=logfile The path of log file. + -R Install path. + -c Cluster name. + -M The directory of log file. + -P The tmp path. + -f The static_config_file. + -? --help Show this help screen. + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: parse input parameters + input : NA + output: NA + """ + try: + # option '-M' specify the environment parameter GAUSSLOG + # option '-P' specify the environment parameter PGHOST|GAUSSTMP + # option '-u' install new binary for upgrade + opts, args = getopt.getopt(sys.argv[1:], "t:U:X:R:M:P:i:l:c:f:Tu", + ["alarm=", "dws-mode", "time_out=", + "product=", "licensemode="]) + except getopt.GetoptError as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if len(args) > 0: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + parameter_map = {"-X": g_opts.clusterConfig, "-R": g_opts.installPath, + "-l": g_opts.logFile, "-c": g_opts.clusterName, + "-M": g_opts.logPath, "-P": g_opts.tmpPath, + "-f": g_opts.static_config_file, + "--alarm": g_opts.alarmComponent, + "--licensemode": g_opts.licenseMode, + "--time_out": g_opts.time_out} + parameter_keys = parameter_map.keys() + for key, value in opts: + if key == "-U": + strTemp = value + strList = strTemp.split(":") + if len(strList) != 2: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "U") + if strList[0] == "" or strList[1] == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] + % "U") + g_opts.user = strList[0] + g_opts.group = strList[1] + elif key in parameter_keys: + parameter_map[key] = value + elif key == "-t": + g_opts.action = value + elif key == "--dws-mode": + g_opts.dws_mode = True + elif key == "-u": + g_opts.upgrade = True + elif key == "-T": + g_opts.installflag = True + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % value) + Parameter.checkParaVaild(key, value) + + g_opts.clusterConfig = parameter_map["-X"] + g_opts.installPath = parameter_map["-R"] + g_opts.logFile = parameter_map["-l"] + g_opts.clusterName = parameter_map["-c"] + g_opts.logPath = parameter_map["-M"] + g_opts.tmpPath = parameter_map["-P"] + g_opts.static_config_file = parameter_map["-f"] + g_opts.alarmComponent = parameter_map["--alarm"] + g_opts.licenseMode = parameter_map["--licensemode"] + g_opts.time_out = parameter_map["--time_out"] + + +def checkParameterEmpty(parameter, parameterName): + """ + function: check parameter empty + input : parameter, parameterName + output: NA + """ + if parameter == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % parameterName + ".") + + +def checkParameter(): + """ + function: check install parameter + input : NA + output: NA + """ + if g_opts.action == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + '.') + + if (g_opts.action != ACTION_INSTALL_CLUSTER + and g_opts.action != ACTION_PREPARE_CONFIG_CLUSTER + and g_opts.action != ACTION_INIT_INSTNACE + and g_opts.action != ACTION_CONFIG_CLUSTER + and g_opts.action != ACTION_START_CLUSTER + and g_opts.action != ACTION_CLEAN_TEMP_FILE + and g_opts.action != ACTION_BUILD_STANDBY + and g_opts.action != ACTION_BUILD_CASCADESTANDBY): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % "t") + + if (g_opts.clusterConfig != "" and + not os.path.exists(g_opts.clusterConfig)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] + % g_opts.clusterConfig) + + if (g_opts.logPath != "" and not os.path.exists(g_opts.logPath) + and not os.path.isabs(g_opts.logPath)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50219"] + % g_opts.logPath) + + if (g_opts.static_config_file != "" and + not os.path.isfile(g_opts.static_config_file)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50219"] + % g_opts.static_config_file) + + # check mpprc file path + g_opts.mpprcFile = DefaultValue.getMpprcFile() + g_opts.logger = GaussLog(g_opts.logFile) + checkParameterEmpty(g_opts.user, "U") + g_opts.installPath = os.path.normpath(g_opts.installPath) + g_opts.installPath = os.path.realpath(g_opts.installPath) + g_opts.logger.log("Using " + g_opts.user + ":" + g_opts.group + + " to install database.") + g_opts.logger.log("Using installation program path : " + + g_opts.installPath) + + if g_opts.logFile == "": + g_opts.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, g_opts.user, "", + g_opts.clusterConfig) + + if g_opts.alarmComponent == "": + g_opts.alarmComponent = DefaultValue.ALARM_COMPONENT_PATH + + +def createLinkToApp(): + """ + function: create link to app + input : NA + output : NA + """ + if g_opts.upgrade: + g_opts.logger.log("Under upgrade process," + " no need to create symbolic link.") + return + g_opts.logger.debug("Created symbolic link to $GAUSSHOME with commitid.") + gaussHome = DefaultValue.getInstallDir(g_opts.user) + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") + versionFile = VersionInfo.get_version_file() + commitid = VersionInfo.get_version_info(versionFile)[2] + actualPath = gaussHome + "_" + commitid + if os.path.exists(gaussHome): + if not os.path.islink(gaussHome): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50200"] % gaussHome + + " Cannot create symbolic link," + " please rename or delete it.") + else: + if os.path.realpath(gaussHome) == actualPath: + g_opts.logger.log("$GAUSSHOME points to %s, no need to create" + " symbolic link." % actualPath) + return + + cmd = "ln -snf %s %s" % (actualPath, gaussHome) + g_opts.logger.log("Command for creating symbolic link: %s." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_opts.logger.log(output) + g_opts.logger.logExit(ErrorCode.GAUSS_501["GAUSS_50107"] % "app.") + g_opts.logger.debug("Successfully created symbolic link to" + " $GAUSSHOME with commitid.") + + +class Install(LocalBaseOM): + """ + class: install + """ + + def __init__(self, logFile, user, clusterConf, dwsMode=False, + mpprcFile="", installPath="", alarmComponent="", + upgrade=False): + """ + function: Constructor + input : logFile, user, clusterConf, dwsMode, mpprcFile, installPath + alarmComponent, upgrade + output: NA + """ + LocalBaseOM.__init__(self, logFile, user, clusterConf, dwsMode) + + if self.clusterConfig == "": + # Read config from static config file + self.readConfigInfo() + else: + self.clusterInfo = dbClusterInfo() + self.clusterInfo.initFromXml(self.clusterConfig, + g_opts.static_config_file) + hostName = DefaultValue.GetHostIpOrName() + self.dbNodeInfo = self.clusterInfo.getDbNodeByName(hostName) + if self.dbNodeInfo is None: + self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51619"] + % hostName) + # get user info + self.getUserInfo() + if user != "" and self.user != user.strip(): + self.logger.debug("User parameter : %s." % user) + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50315"] + % (self.user, self.clusterInfo.appPath)) + # init every component + self.initComponent() + + self.mpprcFile = mpprcFile + self.installPath = installPath + self.alarmComponent = alarmComponent + self.upgrade = upgrade + # This script will be not validating the parameters. + # Because this should be detected by which instance call + # this local script. + self.productVersion = None + self.time_out = None + + def __decompressBinPackage(self): + """ + function: Install database binary file. + input : NA + output: NA + """ + if self.dws_mode: + self.logger.log("Copying bin file.") + bin_image_path = DefaultValue.DWS_APP_PAHT + srcPath = "'%s'/*" % bin_image_path + destPath = "'%s'/" % self.installPath + cmd = g_file.SHELL_CMD_DICT["copyFile"] % (srcPath, destPath) + self.logger.debug("Copy command: " + cmd) + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50214"] + % srcPath + " Error: " + output) + else: + self.logger.log("Decompressing bin file.") + tarFile = g_OSlib.getBz2FilePath() + # let bin executable + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, tarFile) + + cmd = "export LD_LIBRARY_PATH=$GPHOME/script/gspylib/clib:" \ + "$LD_LIBRARY_PATH && " + # decompress tar file. + strCmd = cmd + "tar -xpf \"" + tarFile + "\" -C \"" + \ + self.installPath + "\"" + self.logger.log("Decompress command: " + strCmd) + status, output = subprocess.getstatusoutput(strCmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50217"] + % tarFile + " Error: \n%s" % str(output)) + + # mv $GPHOME/script/transfer.py to $GAUSSHOME/bin/ + dirName = os.path.dirname(os.path.realpath(__file__)) + transferFile = dirName + "/../../script/transfer.py" + if os.path.exists(transferFile): + g_file.cpFile(transferFile, self.installPath + "/bin/") + g_file.removeFile(transferFile) + # cp $GPHOME/script to $GAUSSHOME/bin/ + g_file.cpFile(dirName + "/../../script", + self.installPath + "/bin/") + + # cp $GAUSSHOME/bin/script/gspylib/etc/sql/pmk to /share/postgresql + destPath = self.installPath + "/share/postgresql/" + pmkPath = self.installPath + "/bin/script/gspylib/etc/sql/" + pmkFile = pmkPath + "pmk_schema.sql" + if os.path.exists(pmkFile): + g_file.cpFile(pmkFile, destPath) + + pmkSingeInstFile = pmkPath + "pmk_schema_single_inst.sql" + if os.path.exists(pmkSingeInstFile): + g_file.cpFile(pmkSingeInstFile, destPath) + + # change owner for tar file. + g_file.changeOwner(self.user, self.installPath, True) + self.logger.log("Successfully decompressed bin file.") + + def __saveUpgradeVerionInfo(self): + """ + function: save upgrade version info + input: NA + output: NA + """ + if self.dws_mode: + versionCfgFile = "%s/version.cfg" % DefaultValue.DWS_PACKAGE_PATH + upgradeVersionFile = "%s/bin/upgrade_version" % self.installPath + else: + dirName = os.path.dirname(os.path.realpath(__file__)) + versionCfgFile = "%s/../../version.cfg" % dirName + upgradeVersionFile = "%s/bin/upgrade_version" % self.installPath + + if not os.path.exists(versionCfgFile): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % versionCfgFile) + if not os.path.isfile(versionCfgFile): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50210"] + % versionCfgFile) + + try: + # read version info from version.cfg file + (newClusterVersion, newClusterNumber, commitId) = \ + VersionInfo.get_version_info(versionCfgFile) + # save version info to upgrade_version file + if os.path.isfile(upgradeVersionFile): + os.remove(upgradeVersionFile) + + g_file.createFile(upgradeVersionFile) + g_file.writeFile(upgradeVersionFile, + [newClusterVersion, newClusterNumber, commitId]) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, upgradeVersionFile) + except Exception as e: + self.logger.logExit(str(e)) + + def __modifyAlarmItemConfFile(self): + """ + function: modify alarm item conf file + input: NA + output: NA + """ + # modify alarmItem.conf file + alarmItemConfigFile = "%s/bin/alarmItem.conf" % self.installPath + if not os.path.exists(alarmItemConfigFile): + self.logger.log("Alarm's configuration file %s does not exist." + % alarmItemConfigFile) + return + + self.logger.log("Modifying Alarm configuration.") + g_file.replaceFileLineContent("^.*\(alarm_component.*=.*\)", "#\\1", + alarmItemConfigFile) + g_file.writeFile(alarmItemConfigFile, [' ']) + g_file.writeFile(alarmItemConfigFile, ['alarm_component = %s' + % self.alarmComponent]) + + def __createStaticConfig(self): + """ + function: Save cluster info to static config + input : NA + output: NA + """ + staticConfigPath = "%s/bin/cluster_static_config" % self.installPath + # save static config + nodeId = self.dbNodeInfo.id + self.clusterInfo.saveToStaticConfig(staticConfigPath, nodeId) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, staticConfigPath) + g_file.changeOwner(self.user, staticConfigPath, False) + + def __bakInstallPackage(self): + """ + function: backup install package for replace + input : NA + output: NA + """ + dirName = os.path.dirname(os.path.realpath(__file__)) + packageFile = "%s/%s" % (os.path.join(dirName, "./../../"), + DefaultValue.get_package_back_name()) + # Check if MPPDB package exist + if not os.path.exists(packageFile): + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % 'MPPDB package' + " Can not back up.") + # Save MPPDB package to bin path + destPath = "'%s'/bin/" % self.installPath + g_file.cpFile(packageFile, destPath) + + def __fixInstallPathPermission(self): + """ + function: fix the whole install path's permission + input : NA + output: NA + """ + installPathFileTypeDict = {} + try: + # get files type + installPathFileTypeDict = g_file.getFilesType(self.installPath) + except Exception as e: + self.logger.logExit(str(e)) + + for key in installPathFileTypeDict: + if not os.path.exists(key): + self.logger.debug("[%s] does not exist. Please skip it." + % key) + continue + if os.path.islink(key): + self.logger.debug("[%s] is a link file. Please skip it." + % key) + continue + # skip DbClusterInfo.pyc + if os.path.basename(key) == "DbClusterInfo.pyc": + continue + if (installPathFileTypeDict[key].find("executable") >= 0 or + installPathFileTypeDict[key].find("ELF") >= 0 or + installPathFileTypeDict[key].find("directory") >= 0): + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, key, True) + else: + g_file.changeMode(DefaultValue.KEY_FILE_MODE, key) + + def __changeEnv(self): + """ + function: Change GAUSS_ENV + input : NA + output: NA + """ + # modified user's environmental variable $GAUSS_ENV + self.logger.log("Modifying user's environmental variable $GAUSS_ENV.") + DefaultValue.updateUserEnvVariable(self.mpprcFile, "GAUSS_ENV", "2") + DefaultValue.updateUserEnvVariable(self.mpprcFile, "GS_CLUSTER_NAME", + g_opts.clusterName) + self.logger.log("Successfully modified user's environmental" + " variable $GAUSS_ENV.") + + def __fixFilePermission(self): + """ + function: modify permission for app path + input: NA + ouput: NA + """ + self.logger.log("Fixing file permission.") + binPath = "'%s'/bin" % self.installPath + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, binPath, True) + libPath = "'%s'/lib" % self.installPath + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, libPath, True) + sharePath = "'%s'/share" % self.installPath + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, sharePath, True) + etcPath = "'%s'/etc" % self.installPath + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, etcPath, True) + includePath = "'%s'/include" % self.installPath + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, includePath, True) + + tarFile = "'%s'/bin/'%s'" % (self.installPath, + DefaultValue.get_package_back_name()) + if (os.path.isfile(tarFile)): + g_file.changeMode(DefaultValue.KEY_FILE_MODE, tarFile) + + # ./script/util/*.conf *.service + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/bin/script/gspylib/etc/conf/check_list.conf" + % self.installPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/bin/script/gspylib/etc/conf/" + "check_list_dws.conf" % self.installPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/bin/script/gspylib/etc/conf/gs-OS-set.service" + % self.installPath) + # bin config file + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/bin/alarmItem.conf" % self.installPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/bin/cluster_guc.conf" % self.installPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/bin/upgrade_version" % self.installPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/bin/retry_errcodes.conf" % self.installPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, + "'%s'/bin/cluster_static_config" % self.installPath) + + # ./script/local/*.sql + cmd = "find '%s'/bin/script -type f -name \"*.sql\" -exec" \ + " chmod 600 {} \\;" % self.installPath + # ./lib files + cmd += " && find '%s'/lib/ -type f -exec chmod 600 {} \\;" \ + % self.installPath + # ./share files + cmd += " && find '%s'/share/ -type f -exec chmod 600 {} \\;" \ + % self.installPath + self.logger.debug("Command: %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.log(output) + self.logger.logExit(ErrorCode.GAUSS_501["GAUSS_50107"] % "app.") + + def installCluster(self): + """ + function: install application + input : NA + output: NA + """ + self.__decompressBinPackage() + self.__saveUpgradeVerionInfo() + self.__modifyAlarmItemConfFile() + self.__createStaticConfig() + if not self.dws_mode: + self.__bakInstallPackage() + self.__fixInstallPathPermission() + self.__changeEnv() + self.__fixFilePermission() + + def startCluster(self): + """ + function: start cluster + input: NA + output: NA + """ + for dn in self.dnCons: + dn.start(self.time_out) + + def buildStandby(self): + """ + function: build standby + input: NA + output: NA + """ + for dn in self.dnCons: + if dn.instInfo.instanceType == DefaultValue.STANDBY_INSTANCE: + dn.build() + + def buildCascadeStandby(self): + """ + function: build standby + input: NA + output: NA + """ + for dn in self.dnCons: + if dn.instInfo.instanceType == DefaultValue.CASCADE_STANDBY: + dn.build_cascade() + + def cleanTempFile(self): + """ + function: clean temp file + input: NA + output: NA + """ + filename = "/tmp/temp.%s" % self.user + try: + if os.path.isfile(filename): + g_file.removeFile(filename) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] + % ("file [%s]" % filename)) + + +if __name__ == '__main__': + ########################################################################## + # This is the main install flow. + ########################################################################## + """ + function: install the cluster + input : NA + output: NA + """ + try: + # Initialize self and Parse command line and save to global variable + parseCommandLine() + # check the parameters is not OK + checkParameter() + createLinkToApp() + # Initialize globals parameters + installer = Install(g_opts.logFile, g_opts.user, g_opts.clusterConfig, + g_opts.dws_mode, g_opts.mpprcFile, + g_opts.installPath, g_opts.alarmComponent, + g_opts.upgrade) + installer.productVersion = g_opts.productVersion + installer.time_out = g_opts.time_out + try: + functionDict = {ACTION_INSTALL_CLUSTER: installer.installCluster, + ACTION_START_CLUSTER: installer.startCluster, + ACTION_CLEAN_TEMP_FILE: installer.cleanTempFile, + ACTION_BUILD_STANDBY: installer.buildStandby, + ACTION_BUILD_CASCADESTANDBY: + installer.buildCascadeStandby} + functionKeys = functionDict.keys() + + if g_opts.action in functionKeys: + functionDict[g_opts.action]() + else: + g_opts.logger.logExit(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' + + " Value: %s." % g_opts.action) + except Exception as e: + g_opts.logger.log(traceback.format_exc()) + g_opts.logger.logExit(str(e)) + + # close the log file + g_opts.logger.closeLog() + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/KerberosUtility.py b/script/local/KerberosUtility.py new file mode 100644 index 0000000..87ceee7 --- /dev/null +++ b/script/local/KerberosUtility.py @@ -0,0 +1,1310 @@ +#!/usr/bin/env python3 +#-*- coding:utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +#------------------------------------------------------------------------- +# +# KerberosUtility.py +# KerberosUtility.py is a utility to handler kerberos things +# +# IDENTIFICATION +# src/manager/om/script/local/KerberosUtility.py +# +#------------------------------------------------------------------------- + +import sys +import os +import getopt +import subprocess +import shutil +import pwd + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.DbClusterInfo import initParserXMLFile, dbClusterInfo +from gspylib.common.Common import DefaultValue +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ErrorCode import ErrorCode +from gspylib.threads.SshTool import SshTool +from gspylib.os.gsfile import g_file +from multiprocessing.dummy import Pool as ThreadPool + +METHOD_TRUST = "trust" +BIGDATA_HOME = "$BIGDATA_HOME" +DUMMY_STANDBY_INSTANCE = 2 +INSTANCE_ROLE_COODINATOR = 3 +g_ignorepgHbaMiss = True +CONFIG_ITEM_TYPE = "ConfigInstance" +g_clusterInfo = None +#Gauss200 IP Hosts Mapping +GAUSS_HOSTS_MAPPING_FLAG = "#%s IP Hosts Mapping" % VersionInfo.PRODUCT_NAME +#write /etc/hosts kerberos flag +KERBEROS_HOSTS_MAPPING_FLAG = "#Kerberos IP Hosts Mapping" +VALUE_LIST = ["PGKRBSRVNAME", "KRBHOSTNAME", "MPPDB_KRB5_FILE_PATH", + "KRB5RCACHETYPE"] +SERVER_ENV_LIST = ["KRB_HOME", "KRB5_CONFIG", "KRB5_KDC_PROFILE"] + +g_logger = None +g_opts = None +g_sshTool = None + + +class CmdOptions(): + """ + install the cluster on local node + """ + def __init__(self): + self.action = "" + self.user = "" + self.mpprcFile = "" + self.clusterInfo = None + self.principal = "" + self.keytab = "" + self.dbNodeInfo = None + self.krbHomePath = "" + self.krbConfigPath = "" + self.server = False + self.client = False + self.gausshome = "" + self.gausshome_kerberso = "" + + +def initGlobals(): + """ + init global variables + input : NA + output: NA + """ + global g_opts + logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + g_opts.user, "") + + # Init logger + global g_logger + g_logger = GaussLog(logFile, "KerberosUtility") + + global g_clusterInfo + # init for clusterInfo + g_clusterInfo = dbClusterInfo() + g_clusterInfo.initFromStaticConfig(g_opts.user) + g_logger.debug("Cluster information: \n%s." % str(g_clusterInfo)) + + global g_sshTool + nodenames = g_clusterInfo.getClusterNodeNames() + g_sshTool = SshTool(nodenames) + + try: + # init for __clusterInfo and __dbNodeInfo + g_opts.clusterInfo = g_clusterInfo + hostName = DefaultValue.GetHostIpOrName() + g_opts.dbNodeInfo = g_clusterInfo.getDbNodeByName(hostName) + + #get env variable file + g_opts.mpprcFile = DefaultValue.getMpprcFile() + + # create kerberso directory under GAUSSHOME + gausshome = DefaultValue.getInstallDir(g_opts.user) + if not gausshome: + raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] % "GAUSSHOME") + g_opts.gausshome = gausshome + g_opts.gausshome_kerberso = os.path.join(gausshome, "kerberos") + if not os.path.isdir(g_opts.gausshome_kerberso): + dir_permission = 0o700 + os.makedirs(g_opts.gausshome_kerberso, mode=dir_permission) + + if g_opts.action == "install" and g_opts.server: + g_logger.debug("%s the kerberos server.", g_opts.action) + + else: + if g_opts.action == "uninstall": + g_logger.debug("%s the kerberos tool.", g_opts.action) + else: + g_logger.debug("%s the kerberos client.", g_opts.action) + tablespace = DefaultValue.getEnv("ELK_SYSTEM_TABLESPACE") + if tablespace is not None and tablespace != "": + xmlfile = os.path.join(os.path.dirname(g_opts.mpprcFile), + DefaultValue.FI_ELK_KRB_XML) + else: + xmlfile = os.path.join(os.path.dirname(g_opts.mpprcFile), + DefaultValue.FI_KRB_XML) + xmlfile = os.path.realpath(xmlfile) + if not os.path.isfile(xmlfile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % xmlfile) + + rootNode = initParserXMLFile(xmlfile) + elementArray = rootNode.findall("property") + + for element in elementArray: + if (element.find('name').text == "mppdb.kerberos.principal" + or element.find('name').text == "elk.kerberos.principal"): + g_opts.principal = element.find('value').text + if (element.find('name').text == "mppdb.kerberos.keytab" + or element.find('name').text == "elk.kerberos.keytab"): + g_opts.keytab = element.find('value').text + if (element.find('name').text == 'KRB_HOME'): + g_opts.krbHomePath = element.find('value').text + if (element.find('name').text == 'KRB_CONFIG'): + g_opts.krbConfigPath = element.find('value').text + + if(g_opts.principal == "" or g_opts.keytab == "" + or g_opts.krbHomePath == "" or g_opts.krbConfigPath == ""): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] % + "mppdb.kerberos.principal or " + "mppdb.kerberos.keytab" + " or KRB_HOME or KRB_CONFIG" + + " The xml file is %s." % xmlfile) + + except Exception as e: + g_logger.logExit(str(e)) + g_logger.debug("Instance information on local node:\n%s." + % str(g_opts.dbNodeInfo)) + + +class Kerberos(): + def __init__(self): + self.__dbNodeInfo = None + self.__allIps = [] + self.__cooConfig = {} + self.__dataConfig = {} + self.__gtmConfig = {} + self.__cmsConfig = {} + self.__IpStringList = [] + self.__DNStringList = [] + + def __rollback(self, isServer): + g_logger.log("An error happened in executing the command, " + "begin rollback work...") + if isServer: + self.__rollbackServerInstall() + else: + self.__uninstall(True) + g_logger.log("rollback work complete.") + + def __rollbackServerInstall(self): + if os.path.isdir(g_opts.gausshome_kerberso): + shutil.rmtree(g_opts.gausshome_kerberso) + self.__clearEnvironmentVariableValue(True) + self.__cleanAuthConfig() + self.__cleanServer() + + + def __triggerJob(self, isUninstall, isServer=False): + ''' + function: triggerJob for call kinit + ''' + if(not isUninstall): + g_logger.log("start triggerJob.") + if isServer: + self.__initUser() + self.__startServer() + self.__distributeKeyAndSite() + self.__setServiceCron() + else: + self.__executeJob() + g_logger.log("successfully start triggerJob.") + else: + g_logger.log("stop triggerJob.") + self.__cancelCron() + g_logger.log("successfully stop triggerJob.") + + def __clearEnvironmentVariableValue(self, isServer=False): + """ + function: clear kerberos EnvironmentVariable + input: isServer + output: NA + """ + for value in VALUE_LIST: + cmd = "sed -i -e '/^.*%s=/d' '%s'" % (value, g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % output) + if isServer: + for value in SERVER_ENV_LIST: + cmd = "sed -i -e '/^.*%s=/d' '%s' && " \ + "sed -i -e '/^.*PATH=\$%s/d' '%s' && " \ + "sed -i -e '/^.*LD_LIBRARY_PATH=\$%s/d' '%s'" % \ + (value, g_opts.mpprcFile, value, g_opts.mpprcFile, + value, g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if(status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % output) + + g_logger.log("successfully clear kerberos env Variables.") + + def __setUserEnvVariable(self, isUninstall, isServer=False, + isRollBack=False): + ''' + function: set user env Variable + ''' + g_logger.log("start set user env Variable.") + if(not isUninstall): + try: + if isServer: + self.__clearEnvironmentVariableValue(True) + + # SET variable KRB_HOME + cmd = "echo \"export KRB_HOME=%s\" >> %s" % \ + (g_opts.gausshome, g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51804"] % + "KRB_HOME" + output + + "\nThe cmd is %s " % cmd) + g_logger.log("Config environment variable KRB_HOME " + "successfully.") + + # SET variable KRB5_CONFIG + cmd = "echo \"export KRB5_CONFIG=%s/krb5.conf\" >> %s" % \ + (g_opts.gausshome_kerberso, g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51804"] % + "KRB5_CONFIG" + output + + "\nThe cmd is %s " % cmd) + g_logger.log("Config environment variable KRB5_CONFIG " + "successfully.") + + # SET variable KRB5_KDC_PROFILE + cmd = "echo \"export KRB5_KDC_PROFILE=%s/kdc.conf\" " \ + ">> %s" \ + % (g_opts.gausshome_kerberso, g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51804"] % + "KRB5_KDC_PROFILE" + output + + "\nThe cmd is %s " % cmd) + g_logger.log("Config environment " + "variable KRB5_KDC_PROFILE successfully.") + + # SET variable PATH + cmd = "echo \"export PATH=\$KRB_HOME/bin:\$PATH\" " \ + ">> %s" % (g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51804"] % + "PATH" + output + + "\nThe cmd is %s " % cmd) + g_logger.log("Config environment variable PATH " + "successfully.") + + # SET variable LD_LIBRARY_PATH + cmd = "echo \"export LD_LIBRARY_PATH=\$KRB_HOME/lib:" \ + "\$LD_LIBRARY_PATH\" >> %s" % (g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51804"] % + "LD_LIBRARY_PATH" + output + + "\nThe cmd is %s " % cmd) + g_logger.log("Config environment variable LD_LIBRARY_PATH " + "successfully.") + + else: + # get principal + principals = g_opts.principal.split("/") + if (len(g_opts.principal.split('/')) < 2): + raise Exception(ErrorCode.GAUSS_500["GAUSS_50009"] + + "principal: %s" % g_opts.principal) + address = g_opts.principal.split('/')[1].split('@')[0] + + self.__clearEnvironmentVariableValue() + # SET variable KRB5_CONFIG + cmd = "echo \"export MPPDB_KRB5_FILE_PATH=%s/krb5.conf\"" \ + " >> %s" % (os.path.dirname(g_opts.mpprcFile), + g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Error:\n%s." % output) + g_logger.log("Config environment variable KRB5_CONFIG " + "successfully.") + # SET variable PGKRBSRVNAME + + cmd = "echo \"export PGKRBSRVNAME=%s\" >>%s" % \ + (principals[0], g_opts.mpprcFile) + + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Error:\n%s." % output) + g_logger.log("Config environment variable PGKRBSRVNAME " + "successfully.") + # SET variable KRBHOSTNAME + + cmd = "echo \"export KRBHOSTNAME=%s\" >>%s" % \ + (address, g_opts.mpprcFile) + + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Error:\n%s." % output) + g_logger.log("Config environment variable KRBHOSTNAME " + "successfully.") + # SET variable KRB5RCACHETYPE + cmd = "echo \"export KRB5RCACHETYPE=none\" >>%s" % \ + (g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Error:\n%s." % output) + g_logger.log("Config environment variable KRB5RCACHETYPE " + "successfully.") + except Exception as e: + raise Exception(ErrorCode.GAUSS_518["GAUSS_51804"] + % "" + "Error:%s." % str(e)) + else: + if isRollBack: + self.__clearEnvironmentVariableValue(False) + else: + self.__clearEnvironmentVariableValue(True) + + def __configPostgresql(self, isUninstall): + ''' + function: set config postgresql file + ''' + g_logger.log("start set config postgresql file") + + instanceList = [] + if(not isUninstall): + self.__cooConfig["krb_server_keyfile"] = "'" + g_opts.keytab + "'" + self.__dataConfig["krb_server_keyfile"] = "'" + g_opts.keytab + "'" + self.__gtmConfig["gtm_authentication_type"] = "gss" + self.__gtmConfig["gtm_krb_server_keyfile"] = "'" + g_opts.keytab \ + + "'" + self.__cmsConfig["cm_auth_method"] = "gss" + self.__cmsConfig["cm_krb_server_keyfile"] = "'" + g_opts.keytab + \ + "'" + else: + self.__cooConfig["krb_server_keyfile"] = "" + self.__dataConfig["krb_server_keyfile"] = "" + self.__gtmConfig["gtm_authentication_type"] = "trust" + self.__gtmConfig["gtm_krb_server_keyfile"] = "''" + self.__cmsConfig["cm_auth_method"] = "trust" + self.__cmsConfig["cm_krb_server_keyfile"] = "''" + + # get coordinators instance + for cooInst in g_opts.dbNodeInfo.coordinators: + instanceList.append(cooInst) + # get datanode instance + for dnInst in g_opts.dbNodeInfo.datanodes: + instanceList.append(dnInst) + # get gtm instance + for gtmInst in g_opts.dbNodeInfo.gtms: + instanceList.append(gtmInst) + # get cms instance + for cmsInst in g_opts.dbNodeInfo.cmservers: + instanceList.append(cmsInst) + + if(len(instanceList) == 0): + return + try: + #config instance in paralle + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(self.__configInst, instanceList) + pool.close() + pool.join() + except Exception as e: + raise Exception(str(e)) + g_logger.log("successfully set config postgresql file") + + def __configPgHba(self, isUninstall): + ''' + set pg_hba.conf file + ''' + g_logger.log("start config pg_hba file") + try: + # get current node information + hostName = DefaultValue.GetHostIpOrName() + self.__dbNodeInfo = g_clusterInfo.getDbNodeByName(hostName) + if (self.__dbNodeInfo is None): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "local" + + " There is no host named %s." % hostName) + #getall node names + nodenames = g_clusterInfo.getClusterNodeNames() + for nodename in nodenames: + nodeinfo = g_clusterInfo.getDbNodeByName(nodename) + self.__allIps += nodeinfo.backIps + self.__allIps += nodeinfo.sshIps + for inst in nodeinfo.cmservers: + self.__allIps += inst.haIps + self.__allIps += inst.listenIps + for inst in nodeinfo.coordinators: + self.__allIps += inst.haIps + self.__allIps += inst.listenIps + for inst in nodeinfo.datanodes: + self.__allIps += inst.haIps + self.__allIps += inst.listenIps + for inst in nodeinfo.gtms: + self.__allIps += inst.haIps + self.__allIps += inst.listenIps + # set local ip 127.0.0.1 + self.__allIps += ['127.0.0.1'] + # get all ips. Remove the duplicates ips + self.__allIps = DefaultValue.Deduplication(self.__allIps) + # build ip string list + #set Kerberos ip + principals = g_opts.principal.split("/") + principals = principals[1].split("@") + # Every 1000 records merged into one" + ipstring = "" + j = 0 + for ip in self.__allIps: + j += 1 + if not isUninstall: + ipstring += " -h 'host all all " \ + " %s/32 gss " \ + "include_realm=1 krb_realm=%s'" % \ + (ip, principals[1]) + else: + ipstring += " -h 'host all all " \ + " %s/32 %s'" % (ip, METHOD_TRUST) + if ipstring != "": + self.__IpStringList.append(ipstring) + #write config hba + self.__writeConfigHba() + except Exception as e: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53024"] + + "Error:%s." % str(e)) + + g_logger.debug("Instance information about local node:\n%s." % + str(self.__dbNodeInfo)) + g_logger.log("successfully config pg_hba file") + + def __configDNPgHba(self, isUninstall): + ''' + set DN pg_hba.conf file for replication channel + ''' + g_logger.log("start config pg_hba file for database node replication " + "channel") + try: + principals = g_opts.principal.split("/") + principals = principals[1].split("@") + ipstring = "" + if (not isUninstall): + ipstring += " -h 'host replication %s " \ + " ::1/128 gss include_realm=1" \ + " krb_realm=%s'" % \ + (g_opts.user, principals[1]) + else: + ipstring += " -h 'host replication %s " \ + " ::1/128 %s'" % \ + (g_opts.user, METHOD_TRUST) + if (ipstring != ""): + self.__DNStringList.append(ipstring) + self.__writeDNConfigHba() + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] + % ("database node config for pg_hba.conf. %s" + % str(e))) + g_logger.log("successfully config pg_hba file for database node " + "replication channel") + + def __configInst(self, dbInst): + """ + function: Modify a parameter of postgresql.conf + input : typename, datadir, configFile, parmeterDict + output: NA + """ + configFile = os.path.join(dbInst.datadir, "postgresql.conf") + if (dbInst.instanceRole != DefaultValue.INSTANCE_ROLE_GTM and + dbInst.instanceRole != DefaultValue.INSTANCE_ROLE_CMSERVER and + not os.path.isfile(configFile)): + return + if dbInst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR: + # modifying CN configuration. + g_logger.log("Modify CN %s configuration." % dbInst.instanceId) + configFile = os.path.join(dbInst.datadir, "postgresql.conf") + # Set default value for each inst + tempCommonDict = self.__cooConfig + self.__setConfigItem(DefaultValue.INSTANCE_ROLE_COODINATOR, + dbInst.datadir, tempCommonDict) + + if dbInst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + # modifying database node configuration. + g_logger.log("Modify database node %s configuration." + % dbInst.instanceId) + # Set default value for each inst + tempCommonDict = self.__dataConfig + try: + self.__setConfigItem(DefaultValue.INSTANCE_ROLE_DATANODE, + dbInst.datadir, tempCommonDict) + except Exception as e: + raise Exception(str(e)) + + if dbInst.instanceRole == DefaultValue.INSTANCE_ROLE_GTM: + # modifying GTM configuration. + g_logger.log("Modify GTM %s configuration." % dbInst.instanceId) + # Set default value for each inst + tempCommonDict = self.__gtmConfig + try: + self.__setConfigItem(DefaultValue.INSTANCE_ROLE_GTM, + dbInst.datadir, tempCommonDict) + except Exception as e: + raise Exception(str(e)) + + if dbInst.instanceRole == DefaultValue.INSTANCE_ROLE_CMSERVER: + # modifying CMSERVER configuration. + g_logger.log("Modify CMserver %s configuration." + % dbInst.instanceId) + # Set default value for each inst + tempCommonDict = self.__cmsConfig + try: + self.__setConfigItem(DefaultValue.INSTANCE_ROLE_CMSERVER, + dbInst.datadir, tempCommonDict) + except Exception as e: + raise Exception(str(e)) + + def __setConfigItem(self, typename, datadir, parmeterDict): + """ + function: Modify a parameter + input : typename, datadir, parmeterDict + output: NA + """ + # build GUC parameter string + gucstr = "" + for entry in list(parmeterDict.items()): + if entry[1] == "": + gucstr += " -c \"%s=\'\'\"" % (entry[0]) + else: + gucstr += " -c \"%s=%s\"" % (entry[0], entry[1]) + # check the GUC parameter string + if gucstr == "": + return + if typename == DefaultValue.INSTANCE_ROLE_DATANODE or \ + typename == DefaultValue.INSTANCE_ROLE_COODINATOR: + cmd = "source '%s'; gs_guc set -D %s %s" % \ + (g_opts.mpprcFile, datadir, gucstr) + DefaultValue.retry_gs_guc(cmd) + if self.__gsdbStatus(): + cmd = "source '%s'; gs_guc reload -D %s %s" % \ + (g_opts.mpprcFile, datadir, gucstr) + try: + DefaultValue.retry_gs_guc(cmd) + except Exception as e: + raise Exception(str(e)) + else: + cmd = "source '%s'; gs_guc set -N all -I all %s" % \ + (g_opts.mpprcFile, gucstr) + DefaultValue.retry_gs_guc(cmd) + + def __gsdbStatus(self): + """ + function: get gaussdb process + input: NA + output: True/False + """ + cmd = "ps ux | grep -v '\' | grep '%s/bin/gaussdb'" % \ + g_clusterInfo.appPath + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 and output: + raise Exception("Get gaussdb process failed." + + "The cmd is %s " % cmd) + if output: + return True + return False + + def __writeConfigHba(self): + """ + function: set hba config + input : NA + output: NA + """ + instances = self.__dbNodeInfo.datanodes + \ + self.__dbNodeInfo.coordinators + #Determine whether this node containing CN, DN instance + if(len(instances) == 0): + g_logger.debug("The count number of coordinator and " + "datanode on local node is zero.") + return + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(self.__configAnInstance, instances) + pool.close() + pool.join() + except Exception as e: + raise Exception(str(e)) + + def __writeDNConfigHba(self): + """ + function: set hba config for dn replication channel + input : NA + output: NA + """ + instances = self.__dbNodeInfo.datanodes + if (len(instances) == 0): + g_logger.debug("The count number of datanode " + "on local node is zero.") + return + try: + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(self.__configAnInstanceHA, instances) + pool.close() + pool.join() + except Exception as e: + raise Exception(str(e)) + + def __configAnInstance(self, instance): + # check instance data directory + if (instance.datadir == "" or not os.path.isdir(instance.datadir)): + if(g_ignorepgHbaMiss): + g_logger.debug("Failed to obtain data directory of " + "the instance[%s]." % str(instance)) + return + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + ("data directory of the instance[%s]" % + str(instance))) + # check pg_hba.conf + hbaFile = "%s/pg_hba.conf" % instance.datadir + if(g_ignorepgHbaMiss and not os.path.isfile(hbaFile)): + g_logger.debug("The %s does not exist." % hbaFile) + return + # do gs_guc to add host into pg_hba.conf + self.__addHostToFile(instance.datadir) + + def __configAnInstanceHA(self, instance): + instanceRole = "datanode" + # check instance data directory + if (instance.datadir == "" or not os.path.isdir(instance.datadir)): + if(g_ignorepgHbaMiss): + g_logger.debug("Failed to obtain data directory " + "of the instance[%s]." % str(instance)) + return + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % + ("data directory of the instance[%s]" + % str(instance))) + # check pg_hba.conf + hbaFile = "%s/pg_hba.conf" % instance.datadir + if(g_ignorepgHbaMiss and not os.path.isfile(hbaFile)): + g_logger.debug("The %s does not exist." % hbaFile) + return + # do gs_guc to add host into pg_hba.conf + self.__addDNhostToFile(instanceRole, instance.datadir) + + def __addHostToFile(self, instanceDataPath): + # do gs_guc to add host into pg_hba.conf + for IpString in self.__IpStringList: + cmd = "source '%s';gs_guc set -D %s %s" % (g_opts.mpprcFile, + instanceDataPath, + IpString) + DefaultValue.retry_gs_guc(cmd) + + def __addDNhostToFile(self, dbInstanceRole, instanceDataPath): + """ + function: set DN config postgresql file for replication channel + input:dbInstanceRole, instanceDataPath + output:NA + """ + if (dbInstanceRole == "datanode"): + for IpDNString in self.__DNStringList: + cmd = "source '%s';gs_guc set -D %s %s" % \ + (g_opts.mpprcFile, instanceDataPath, IpDNString) + DefaultValue.retry_gs_guc(cmd) + + def __executeJob(self): + """ + function:call TGT from kinit's tool + input:NA + output:NA + """ + try: + kinitPath = "%s/bin/kinit" % g_opts.krbHomePath + kcmd = 'export LD_LIBRARY_PATH=%s/lib:$LD_LIBRARY_PATH;' \ + 'export KRB5_CONFIG=$MPPDB_KRB5_FILE_PATH;%s -k -t %s %s' \ + % (g_opts.krbHomePath, kinitPath, + g_opts.keytab, g_opts.principal) + cmd = 'source %s; %s' % (g_opts.mpprcFile, kcmd) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if(status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Output: \n%s" % str(output)) + g_logger.debug("Get ticket successfully.") + #set cron + self.__setCron() + except Exception as e: + raise Exception("Call TGT from kinit's tool: %s." % cmd + + " Exception: \n%s" % str(e)) + + def __setCron(self): + """ + function: Set linux cron + input : NA + output: NA + """ + g_logger.log("Set CRON.") + cronFile = "%s/gauss_cron_%d" % \ + (DefaultValue.getTmpDirFromEnv(g_opts.user), os.getpid()) + setCronCmd = "crontab -l > %s && " % cronFile + setCronCmd += "sed -i '/^.*kinit.*$/d' '%s'; " % cronFile + setCronCmd += '''echo '*/1 * * * * source '%s';''' \ + '''export LD_LIBRARY_PATH='%s'/lib:$LD_LIBRARY_PATH;''' \ + '''export KRB5_CONFIG=$MPPDB_KRB5_FILE_PATH ''' % \ + (g_opts.mpprcFile, g_opts.krbHomePath) + setCronCmd += '''klistcmd="'%s'/bin/klist";''' % (g_opts.krbHomePath) + setCronCmd += '''kinitcmd="'%s'/bin/kinit -k -t %s %s ";''' % \ + (g_opts.krbHomePath, g_opts.keytab, g_opts.principal) + setCronCmd += '''klistresult=`$klistcmd>>/dev/null 2>&1;echo $?`;''' + setCronCmd += '''if [ $klistresult -ne 0 ];then `$kinitcmd`;else ''' \ + '''expiresTime=`$klistcmd|grep krbtgt|awk -F " " ''' \ + '''"{print \\\\\\$2}"`;startTime=`$klistcmd|grep ''' \ + '''krbtgt|awk -F " " "{print \\\\\\$1}"`;''' \ + '''currentTime=`date +\%%s`;currentTime=''' \ + '''`date +\%%s`;if [ $[`date -d "$expiresTime"''' \ + ''' +\%%s`-$currentTime] -le 300 ] || [ $[`date -d''' \ + ''' "$startTime" +\%%s`-$currentTime] -ge 0 ];''' \ + '''then `$kinitcmd`;fi;fi;>>/dev/null 2>&1''' \ + '''& ' >> %s ;''' % (cronFile) + setCronCmd += "crontab %s&&" % cronFile + setCronCmd += "rm -f '%s'" % cronFile + + g_logger.debug("Command for setting CRON: %s" % setCronCmd) + (status, output) = subprocess.getstatusoutput(setCronCmd) + if(status != 0): + raise Exception(ErrorCode.GAUSS_508["GAUSS_50801"] + + " Error: \n%s." % str(output) + + "The cmd is %s " % setCronCmd) + + cmd = "source %s;export LD_LIBRARY_PATH=%s/lib:$LD_LIBRARY_PATH;" \ + "export KRB5_CONFIG=$MPPDB_KRB5_FILE_PATH;" % \ + (g_opts.mpprcFile, g_opts.krbHomePath) + cmd += "klistcmd='%s/bin/klist';" % (g_opts.krbHomePath) + cmd += "kinitcmd='%s/bin/kinit -k -t %s %s';" % \ + (g_opts.krbHomePath, g_opts.keytab, g_opts.principal) + cmd += "klistresult=`$klistcmd>>/dev/null 2>&1;echo $?`;" + cmd += "if [ $klistresult -ne 0 ];then `$kinitcmd`;" \ + "else expiresTime=`$klistcmd|grep krbtgt|" \ + "awk -F ' ' '{print \$2}'`;" \ + "startTime=`$klistcmd|grep krbtgt|awk -F ' ' '{print \$1}'`;" \ + "currentTime=`date +\%s`;currentTime=`date +\%s`;" \ + "if [ $[`date -d $expiresTime +\%s`-$currentTime] -le 300 ] " \ + "|| [ $[`date -d $startTime +\%s`-$currentTime] -ge 0 ];" \ + "then `$kinitcmd`;fi;fi;>>/dev/null 2>&1" + (status, output) = subprocess.getstatusoutput(cmd) + if(status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + + g_logger.log("Successfully Set CRON.") + + + def __setServiceCron(self): + g_logger.log("Set CRON.") + cronFile = "%s/gauss_cron_%d" % \ + (DefaultValue.getTmpDirFromEnv(g_opts.user), os.getpid()) + setCronCmd = "crontab -l > %s && " % cronFile + setCronCmd += "sed -i '/^.*krb5kdc.*$/d' '%s'; " % cronFile + setCronCmd += '''echo "*/1 * * * * source %s; ''' \ + '''kdc_pid_list=\`ps ux | grep -E krb5kdc| ''' \ + '''grep -v grep | awk '{print \\\\\\$2}'\` && ''' \ + '''(if [ X\"\$kdc_pid_list\" == X\"\" ]; ''' \ + '''then krb5kdc; fi) " >> %s; ''' % \ + (g_opts.mpprcFile, cronFile) + setCronCmd += "crontab %s && " % cronFile + setCronCmd += "rm -f '%s'" % cronFile + + g_logger.debug("Command for setting CRON: %s" % setCronCmd) + (status, output) = subprocess.getstatusoutput(setCronCmd) + if(status != 0): + raise Exception(ErrorCode.GAUSS_508["GAUSS_50801"] + + " Error: \n%s." % str(output) + + "The cmd is %s " % setCronCmd) + + g_logger.log("Successfully Set CRON.") + + def __cancelCron(self): + """ + function: clean kerberos_monitor process and delete cron + input : NA + output: NA + """ + g_logger.log("Deleting kerberos monitor.") + try: + # Remove cron + crontabFile = "%s/gauss_crontab_file_%d" % \ + (DefaultValue.getTmpDirFromEnv(g_opts.user), + os.getpid()) + cmd = "crontab -l > %s; " % crontabFile + cmd += "sed -i '/^.*kinit.*$/d' '%s'; " % crontabFile + cmd += "crontab '%s';" % crontabFile + cmd += "rm -f '%s'" % crontabFile + + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + #no need raise error here, user can do it manually. + g_logger.debug("Failed to delete regular tasks. Error: \n%s" + " You can do it manually." % str(output)) + g_logger.debug("The cmd is %s " % cmd) + cmd = "source '%s';export LD_LIBRARY_PATH=%s/lib:" \ + "$LD_LIBRARY_PATH;export KRB5_CONFIG=" \ + "$MPPDB_KRB5_FILE_PATH;%s/bin/kdestroy" % \ + (g_opts.mpprcFile, g_opts.krbHomePath, g_opts.krbHomePath) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if (status != 0): + g_logger.debug("Failed to delete ticket. Error: \n%s" % + str(output)) + g_logger.debug("The cmd is %s " % cmd) + except Exception as e: + raise Exception(str(e)) + g_logger.log("Successfully deleted kerberos OMMonitor.") + + def __copyConf(self, src_dir, dest_dir, file_list): + for config_file in file_list: + src_path = os.path.realpath(os.path.join(src_dir, config_file)) + if (not os.path.isfile(src_path)): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % src_path) + dest_path = os.path.realpath(os.path.join(dest_dir, config_file)) + try: + shutil.copy(src_path, dest_path) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50214"] % src_path) + g_logger.log("Copy server config files successfully.") + + def __initKadm5Conf(self, dest_dir): + kadm5_file = os.path.realpath(os.path.join(dest_dir, "kadm5.acl")) + cmd = "sed -i 's/#realms#/HUAWEI.COM/g' %s" % kadm5_file + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % kadm5_file + + output) + g_logger.log("Initialize \"kadm5.acl\" successfully.") + + def __initKrb5Conf(self, dest_dir, dest_file='krb5.conf'): + krb5_file = os.path.realpath(os.path.join(dest_dir, dest_file)) + kdc_ip = g_opts.dbNodeInfo.backIps[0] + kdc_port = 21732 + krb_conf = g_opts.gausshome_kerberso + gausslog = DefaultValue.getEnvironmentParameterValue("GAUSSLOG", "") + if not gausslog: + raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] % "GAUSSLOG") + cmd = "sed -i 's/#kdc_ip#/%s/g' %s && \ + sed -i 's/#kdc_ports#/%d/g' %s && \ + sed -i 's;#krb_conf#;%s;g' %s && \ + sed -i 's;#GAUSSHOME#;%s;g' %s" % \ + (kdc_ip, krb5_file, + kdc_port, krb5_file, + g_opts.gausshome_kerberso, krb5_file, + gausslog, krb5_file) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % krb5_file + + output) + + kerberoslog = os.path.join(gausslog, "kerberos") + cmd = "if [ ! -d '%s' ]; then mkdir -p '%s' -m %s; fi" % (kerberoslog, + kerberoslog, DefaultValue.KEY_DIRECTORY_MODE) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % krb5_file + + output) + + g_logger.log("Initialize \"krb5.conf\" successfully.") + + def __initKdcConf(self, dest_dir): + self.__initKrb5Conf(dest_dir, "kdc.conf") + + kdc_file = os.path.realpath(os.path.join(dest_dir, "kdc.conf")) + cmd = "sed -i 's;#KRB_HOME#;%s;g' %s" % (g_opts.gausshome, kdc_file) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % kdc_file + + output) + + g_logger.log("Initialize \"kdc.conf\" successfully.") + + def __initMppdbSite(self, dest_dir): + mppdb_site_file = os.path.realpath(os.path.join(dest_dir, + "mppdb-site.xml")) + principal = "%s/huawei.huawei.com@HUAWEI.COM " % g_opts.user + cmd = "sed -i 's;#mppdb.kerberos.principal#;%s;g' %s" % \ + (principal, mppdb_site_file) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + mppdb_site_file + output) + + cmd = "sed -i 's;#KRB_HOME#;%s;g' %s" % (g_opts.gausshome, + mppdb_site_file) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + mppdb_site_file + output) + + kdc_conf = os.path.realpath(os.path.join(g_opts.gausshome_kerberso, + "kdc.conf")) + cmd = "sed -i 's;#KRB_CONFIG#;%s;g' %s" % (kdc_conf, mppdb_site_file) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + mppdb_site_file + output) + + keytab = os.path.realpath(os.path.join(g_opts.gausshome_kerberso, + "%s.keytab" % g_opts.user)) + cmd = "sed -i 's;#mppdb.kerberos.keytab#;%s;g' %s" % (keytab, + mppdb_site_file) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % + mppdb_site_file + output) + + g_logger.log("Initialize \"mppdb-site.xml\" successfully.") + + def __configKrb5(self, isUninstall, isServer=False): + """ + function: config specify krb5.conf + input: isUninstall, isServer + output: NA + """ + destfile = "%s/krb5.conf" % os.path.dirname(g_opts.mpprcFile) + if not isUninstall: + if isServer: + # 1.copy conf files to GAUSSHOME/kerberos + CONFIG_LIST = ["kadm5.acl", "kdc.conf", "krb5.conf"] + src_path = os.path.realpath(os.path.join(g_opts.gausshome, + "etc", "kerberos")) + self.__copyConf(src_path, g_opts.gausshome_kerberso, + CONFIG_LIST) + # 2.initialize conf files + self.__initKadm5Conf(g_opts.gausshome_kerberso) + self.__initKrb5Conf(g_opts.gausshome_kerberso) + self.__initKdcConf(g_opts.gausshome_kerberso) + + else: + #1. copy "krb5.conf" + if (os.path.isfile(g_opts.krbConfigPath)): + shutil.copy(g_opts.krbConfigPath, destfile) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50228"] % + g_opts.krbConfigPath) + #2. change cache file path of kerberos + if(not os.path.isdir("%s/auth_config" % + os.path.dirname(g_opts.mpprcFile))): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + ("%s/auth_config" % + os.path.dirname(g_opts.mpprcFile))) + cmd = "sed -i '/default_realm.*/i default_ccache_name = " \ + "FILE:%s/auth_config/krb5cc_%s' '%s'" % \ + (os.path.dirname(g_opts.mpprcFile), + pwd.getpwnam(g_opts.user).pw_uid, destfile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception("Config 'krb5.conf' failed.cmd: %s" % cmd) + g_logger.log("Client Config \"krb5.conf\" successfully.") + + else: + if os.path.isfile(destfile): + os.remove(destfile) + + g_logger.log("Clear \"krb5.conf\" successfully.") + + def __initUser(self): + + # create kerberos database + kerberos_database_file = \ + os.path.realpath(os.path.join(g_opts.gausshome, + "var", "krb5kdc", "principal")) + if os.path.isfile(kerberos_database_file): + g_logger.debug("kerberos database has existed.") + else: + dir_permission = 0o700 + os.makedirs(os.path.dirname(kerberos_database_file), + mode=dir_permission) + with open("/dev/random", 'rb') as fp: + srp = fp.read(16) + passwd = int(srp.hex(), 16) + cmd = "source %s && kdb5_util create -r HUAWEI.COM -s -P %s" % \ + (g_opts.mpprcFile, str(passwd)) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "kdb5_util") + g_logger.debug("Create kerberos database successfully.") + + # create kerberos database user + cmd = "source %s && kadmin.local -q \"addprinc " \ + "-randkey %s/huawei.huawei.com\"" % \ + (g_opts.mpprcFile, g_opts.user) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + output) + g_logger.debug("Create kerberos database user successfully.") + + # create kerberos keytab + cmd = "source %s && kadmin.local -q \"ktadd -k %s/%s.keytab " \ + "%s/huawei.huawei.com@HUAWEI.COM\"" % \ + (g_opts.mpprcFile, g_opts.gausshome_kerberso, + g_opts.user, g_opts.user) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + output) + g_logger.debug("Create kerberos keytab successfully.") + + g_logger.log("Initialize kerberos user successfully.") + + def __startServer(self): + # start kdc + cmd = "source %s && krb5kdc" % g_opts.mpprcFile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + output) + g_logger.debug("Start kerberos kdc successfully.") + + g_logger.log("Start kerberos server successfully.") + + def __distributeKeyAndSite(self): + hostlist = [] + for hostName in g_sshTool.hostNames: + if hostName != g_opts.dbNodeInfo.name: + hostlist.append(hostName) + g_logger.debug("Distribute nodes: %s" % ",".join(hostlist)) + # distribute keytab + dest_kerberos_dir = os.path.dirname(g_opts.gausshome_kerberso) + '/' + g_sshTool.scpFiles(g_opts.gausshome_kerberso, + dest_kerberos_dir, hostlist) + + # create auth_config + mppdb_site_dir = os.path.join(os.path.dirname(g_opts.mpprcFile), + "auth_config") + cmd = "if [ ! -d '%s' ]; then mkdir %s; fi" % (mppdb_site_dir, + mppdb_site_dir) + g_sshTool.executeCommand(cmd, "create auth_config directory", + DefaultValue.SUCCESS, g_sshTool.hostNames, + g_opts.mpprcFile) + # copy mppdb-site.xml + src_path = os.path.realpath(os.path.join(g_opts.gausshome, "etc", + "kerberos", "mppdb-site.xml")) + if not os.path.isfile(src_path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % src_path) + dest_path = os.path.realpath(os.path.join(mppdb_site_dir, + "mppdb-site.xml")) + try: + shutil.copy(src_path, dest_path) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50214"] % src_path) + # init mppdb-site.xml + self.__initMppdbSite(mppdb_site_dir) + # distribute mppdb-site.xml + g_sshTool.scpFiles(dest_path, dest_path, hostlist) + + def __restartOMmonitor(self): + """ + function: restart OM_monitor for new environment variable + input: NA + output: NA + """ + #1. find om_monitor process + DefaultValue.KillAllProcess(g_opts.user, "om_monitor") + g_logger.log("Kill om_monitor successfully.") + cmd = "source /etc/profile;source '%s';%s/bin/om_monitor " \ + "-L %s/%s/cm/om_monitor >> /dev/null 2>&1 &" % \ + (g_opts.mpprcFile, g_clusterInfo.appPath, + g_clusterInfo.logPath, g_opts.user) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("The cmd is %s " % cmd) + g_logger.debug("Start om_monitor process failed.") + g_logger.debug("Error:%s\n" % output) + + g_logger.log("Restart om_monitor succeed.") + + def __cleanAuthConfig(self): + auth_config = os.path.join(os.path.dirname(g_opts.mpprcFile), + "auth_config") + if os.path.isdir(auth_config): + try: + shutil.rmtree(auth_config) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % + auth_config) + logPath = DefaultValue.getUserLogDirWithUser(g_opts.user) + kerberosLog = "%s/kerberos" % logPath + if os.path.exists(kerberosLog): + g_file.removeDirectory(kerberosLog) + g_logger.log("Clean auth config directory succeed.") + + def __cleanServer(self): + if os.path.isdir(g_opts.gausshome_kerberso): + try: + shutil.rmtree(g_opts.gausshome_kerberso) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % + g_opts.gausshome_kerberso) + + krb_data = "%s/var/krb5kdc" % g_opts.gausshome + if os.path.isdir(krb_data): + try: + shutil.rmtree(krb_data) + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % krb_data) + + # Remove cron + crontabFile = "%s/gauss_crontab_file_%d" % \ + (DefaultValue.getTmpDirFromEnv(g_opts.user), os.getpid()) + cmd = "crontab -l > %s; " % crontabFile + cmd += "sed -i '/^.*krb5kdc.*$/d' '%s'; " % crontabFile + cmd += "crontab '%s';" % crontabFile + cmd += "rm -f '%s'" % crontabFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + #no need raise error here, user can do it manually. + g_logger.debug("The cmd is %s " % cmd) + g_logger.debug("Failed to delete regular tasks. Error: \n%s " + "You can do it manually." % str(output)) + + cmd = "source /etc/profile; source '%s' && \ + proc_pid_list=`ps ux | grep -E 'krb5kdc'| \ + grep -v 'grep'|awk '{print \$2}'` && \ + (if [ X\"$proc_pid_list\" != X\"\" ]; \ + then echo \"$proc_pid_list\" | xargs -r -n 100 kill -9 ; fi)" \ + % (g_opts.mpprcFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + output) + + g_logger.log("Clean Server files and process succeed.") + + def __install(self): + try: + self.__configKrb5(False, g_opts.server) + self.__setUserEnvVariable(False, g_opts.server) + self.__triggerJob(False, g_opts.server) + if not g_opts.server: + self.__configPostgresql(False) + self.__configPgHba(False) + self.__configDNPgHba(False) + self.__restartOMmonitor() + g_logger.log("Successfully start Kerberos Authentication.") + except Exception as e: + self.__rollback(g_opts.server) + raise e + finally: + pass + + + def __uninstall(self, isRollBack=False): + self.__configKrb5(True) + self.__setUserEnvVariable(True, False, isRollBack) + self.__triggerJob(True) + self.__configPostgresql(True) + self.__configPgHba(True) + self.__configDNPgHba(True) + if not isRollBack: + self.__cleanAuthConfig() + self.__cleanServer() + g_logger.log("Successfully close Kerberos Authentication.") + + def run(self): + ''' + function: call start or stop + ''' + if(g_opts.action == "install"): + self.__install() + elif(g_opts.action == "uninstall"): + self.__uninstall(False) + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50000"] % g_opts.action) + + +def parseCommandLine(): + """ + function: Check parameter from command line + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "m:U:", + ["help", "krb-server", "krb-client"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if(len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % + str(args[0])) + + global g_opts + g_opts = CmdOptions() + + for (key, value) in opts: + if (key == "--help"): + usage() + sys.exit(0) + elif(key == "-m"): + g_opts.action = value + elif (key == "-U"): + g_opts.user = value + elif (key == "--krb-server"): + g_opts.server = True + elif (key == "--krb-client"): + g_opts.client = True + + if g_opts.action == 'install': + if not g_opts.server and not g_opts.client: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % + "-krb-server' or '--krb-client") + if g_opts.server and g_opts.client: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50005"] % + ("-krb-server", "-krb-client")) + + +def usage(): + ''' + python3 KerberosUtility.py is a utility to config a {0} cluster. +Usage: + KerberosUtility.py -m install -U USER --krb-server + KerberosUtility.py -m install -U USER --krb-client + KerberosUtility.py -m uninstall -U USER +General options: + -m "install" will set kerberos config for {0} cluster, + "uninstall" will cancel to set kerberos config for {0} cluster. + -U Cluster User for {0} cluster +Install options: + --krb-server Execute install for server. + This parameter only work for install + --krb-client Execute install for client. + This parameter only work for install +Notes: + --krb-server and --krb-client can only chose one + ''' + print(usage.__doc__) + +if __name__ == '__main__': + """ + main function + """ + try: + parseCommandLine() + initGlobals() + except Exception as e: + GaussLog.exitWithError(str(e)) + + try: + kbs = Kerberos() + kbs.run() + sys.exit(0) + except Exception as e: + g_logger.logExit(str(e)) diff --git a/script/local/LocalCheck.py b/script/local/LocalCheck.py new file mode 100644 index 0000000..82a9efb --- /dev/null +++ b/script/local/LocalCheck.py @@ -0,0 +1,804 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : +############################################################################# +import os +import sys +import getopt +import subprocess +import glob +import xml.etree.cElementTree as ETree + +sys.path.append(sys.path[0] + "/../") +sys.path.append(os.path.realpath(os.path.dirname(__file__)) + "/../../lib") +from gspylib.os.gsfile import g_file +from gspylib.common.GaussLog import GaussLog +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.Common import DefaultValue +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsfile import g_Platform + +actioItemMap = { + "Check_SysCtl_Parameter": ['/etc/sysctl.conf', False], + "Check_FileSystem_Configure": ['/etc/security/limits.conf', False], + "osKernelParameterCheck": ['/etc/sysctl.conf', False], + "Set_SysCtl_Parameter": ['/etc/sysctl.conf', True], + "Set_FileSystem_Configure": ['/etc/security/limits.conf', True] +} + +docker_no_need_check = ["net.core.wmem_max", "net.core.rmem_max", + "net.core.wmem_default", "net.core.rmem_default", + "net.sctp.sctp_mem", "net.sctp.sctp_rmem", + "net.sctp.sctp_wmem", "net.core.netdev_max_backlog", + "net.ipv4.tcp_max_tw_buckets", "net.ipv4.tcp_tw_reuse", + "net.ipv4.tcp_tw_recycle", "net.ipv4.tcp_retries2", + "net.ipv4.ip_local_reserved_ports", "net.ipv4.tcp_rmem", + "net.ipv4.tcp_wmem", "net.ipv4.tcp_max_syn_backlog", + "net.ipv4.tcp_syncookies", "net.ipv4.tcp_fin_timeout", + "net.ipv4.tcp_sack", "net.ipv4.tcp_timestamps", + "net.ipv4.tcp_retries1", "net.ipv4.tcp_syn_retries", + "net.ipv4.tcp_synack_retries"] + +paraList = {} + +############################################################################# +# Global variables +# g_opts: globle option +# g_logger: globle logger +# g_clusterInfo: global clueter information +############################################################################# +g_logger = None +g_opts = None +g_clusterInfo = None +g_check_os = False +configFile = '' +resultList = list() +netWorkBondInfo = None +netWorkLevel = 10000 + + +class CmdOptions(): + def __init__(self): + """ + function: constructor + """ + # initialize variable + self.action = "" + self.user = "" + self.extrachecklist = [] + self.logFile = "" + self.confFile = "" + self.mtuValue = "" + self.hostname = "" + + +class netWork: + """ + class: netWork + """ + + def __init__(self): + """ + function : Init class netWork + input : NA + output : NA + """ + self.netLevel = "" + self.netNum = "" + self.variables = dict() + self.modeType = False + self.nums = 0 + + +############################################################################# +# Parse and check parameters +############################################################################# +def usage(): + """ +Usage: + python3 --help | -? + python3 LocalCheck -t action [-l logfile] [-U user] [--check-os] [-V] +Common options: + -t The type of action. + -l --log-file=logfile The path of log file. + -? --help Show this help screen. + -U Cluster user with root permissions. + --check-os Whether or not gs_checkos + -V --version + """ + print(usage.__doc__) + + +def checkSpecifiedItems(key, paralist, isSet=False): + """ + function: check specified item name + input : key, paralist, isSet + output: NA + """ + # checkItemMap[key][0] is the check function about the key + func = checkItemMap[key][0] + try: + if (hasattr(func, "__name__")): + func(paralist, isSet) + else: + g_logger.logExit(ErrorCode.GAUSS_530["GAUSS_53010"] + % (func, "LocalCheck.py")) + except Exception as e: + g_logger.logExit(str(e)) + + +def checkNetWorkMTU(): + """ + function: gs_check check NetWork card MTU parameters + input: NA + output: int + """ + try: + # Init cluster info + DbClusterInfo = dbClusterInfo() + DbClusterInfo.initFromStaticConfig(g_opts.user) + localHost = DefaultValue.GetHostIpOrName() + nodeIp = None + for dbnode in DbClusterInfo.dbNodes: + if (dbnode.name == localHost): + nodeIp = dbnode.backIps[0] + break + networkCardNum = DefaultValue.CheckNetWorkBonding(nodeIp, False) + # check NetWork card MTU parameters + valueStr = DefaultValue.checkNetWorkMTU(nodeIp, False) + if (not str(valueStr).isdigit()): + g_logger.log("Abnormal reason: Failed to obtain network" + " card MTU value." + " Error: \n%s" % valueStr) + return 1 + netParameter = DefaultValue.getConfigFilePara(configFile, + '/sbin/ifconfig') + if (int(valueStr) != int(g_opts.mtuValue)): + g_logger.log(" Abnormal: network '%s' 'mtu' value[%s:%s]" + " is different from the other node [%s:%s]" + % (networkCardNum, localHost, valueStr, + g_opts.hostname, g_opts.mtuValue)) + return 1 + elif (int(valueStr) != int(netParameter["mtu"])): + g_logger.log(" Warning reason: variable 'MTU' RealValue " + "'%s' ExpectedValue '%s'." % (valueStr, + netParameter["mtu"])) + return 2 + else: + return 0 + + except Exception as e: + g_logger.log(" Abnormal reason: Failed to obtain the" + " networkCard parameter [MTU]. Error: \n %s" + % str(e)) + return 1 + + +def checkSysctlParameter(kernelParameter, isSet): + """ + function: check and set the OS parameters + input: kernelParameter: OS parameters list will be check and set + isSet: the flag, when it is only True then will set OS parameters + output: NA + """ + setParameterList = {} + patchlevel = "" + + # get the suggest parameters and updata kernelParameter + suggestParameterList = DefaultValue.getConfigFilePara( + configFile, 'SUGGEST:%s' % actioItemMap["Check_SysCtl_Parameter"][0]) + kernelParameter.update(suggestParameterList) + + # check the OS parameters + if ("fs.aio-max-nr" in kernelParameter): + g_logger.log(" Warning reason: Checking or setting the" + " parameter 'fs.aio-max-nr' should be use " + "'gs_checkos -i A10' or 'gs_checkos -i B4'.") + kernelParameter.pop("fs.aio-max-nr") + # Get OS version + distname, version = g_Platform.dist()[0:2] + if (distname == "SuSE" and version == "11"): + cmd = "grep -i 'PATCHLEVEL' /etc/SuSE-release |" \ + " awk -F '=' '{print $2}'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output != ""): + patchlevel = output.strip() + # Gs_check get NetWork card MTU + if (g_opts.action == "osKernelParameterCheck"): + checkrestult = checkNetWorkMTU() + if checkrestult != 0: + resultList.append(checkrestult) + + for key in kernelParameter: + # The SuSE 11 SP1 operating system + # does not have vm.extfrag_threshold parameter, skip check + if (patchlevel == "1" and key == "vm.extfrag_threshold"): + continue + sysFile = "/proc/sys/%s" % key.replace('.', '/') + # High version of linux no longer supports tcp_tw_recycle + if not os.path.exists( + sysFile) and key == "net.ipv4.tcp_tw_recycle": + continue + if (DefaultValue.checkDockerEnv() and key in docker_no_need_check): + continue + # The parameter sctpchecksumerrors check method is independent + if (key == "sctpchecksumerrors"): + cmd = "cat /proc/net/sctp/snmp | grep SctpChecksumErrors" \ + " | awk '{print $2}'" + else: + cmd = "cat %s" % ("/proc/sys/%s" % key.replace('.', '/')) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + if (key == "vm.min_free_kbytes" + and output.split() != kernelParameter[key].split()): + expected_min = float(kernelParameter[key].split()[0]) * 0.9 + expected_max = float(kernelParameter[key].split()[0]) * 1.1 + if (int(output.split()[0]) > expected_max + or int(output.split()[0]) < expected_min): + resultList.append(2) + g_logger.log(" Warning reason: variable '%s'" + " RealValue '%s' ExpectedValue '%s'." + % (key, output, kernelParameter[key])) + setParameterList[key] = kernelParameter[key] + elif (key == "net.ipv4.ip_local_port_range" + and output.split() != kernelParameter[key].split()): + expected_min = int(kernelParameter[key].split()[0]) + expected_max = int(kernelParameter[key].split()[1]) + if (int(output.split()[0]) < expected_min + or int(output.split()[1]) > expected_max): + resultList.append(2) + g_logger.log(" Warning reason: variable '%s'" + " RealValue '%s' ExpectedValue '%s'." + % (key, output, kernelParameter[key])) + elif (output.split() != kernelParameter[key].split() and + key not in list(suggestParameterList.keys())): + resultList.append(1) + g_logger.log(" Abnormal reason: variable '%s'" + " RealValue '%s' ExpectedValue '%s'." + % (key, output, kernelParameter[key])) + setParameterList[key] = kernelParameter[key] + elif output.split() != kernelParameter[key].split(): + if (key == "vm.overcommit_ratio"): + cmd = "cat /proc/sys/vm/overcommit_memory" + (status, value) = subprocess.getstatusoutput(cmd) + if (status == 0 and value == "0"): + continue + resultList.append(2) + g_logger.log(" Warning reason: variable '%s' RealValue" + " '%s' ExpectedValue '%s'." + % (key, output, kernelParameter[key])) + else: + resultList.append(1) + g_logger.log(" Abnormal reason: Failed to obtain the OS " + "kernel parameter [%s]. Error: \n %s" + % (key, output)) + setParameterList[key] = kernelParameter[key] + + if (1 in resultList and 'Check' in g_opts.action): + g_logger.log(" %s failed." % g_opts.action) + elif (2 in resultList and 'Check' in g_opts.action): + g_logger.log(" %s warning." % g_opts.action) + else: + g_logger.log(" All values about system control" + " parameters are correct: Normal") + + # set the OS parameters + if isSet: + setOSParameter(setParameterList, patchlevel) + + +def setOSParameter(setParameterList, patchlevel): + """ + function: set os parameter + input : setParameterList, patchlevel + output : NA + """ + # The SuSE 11 SP1 operating system does not have + # vm.extfrag_threshold parameter, skip set + if ("vm.extfrag_threshold" in setParameterList and patchlevel == "1"): + setParameterList.pop("vm.extfrag_threshold") + # The parameter sctpchecksumerrors set method is independent + if ("sctpchecksumerrors" in setParameterList): + cmd = "echo 1 > /sys/module/sctp/parameters/no_checksums" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.debug("The cmd is %s " % cmd) + g_logger.log(" Failed to enforce sysctl kernel variable" + " 'sctpchecksumerrors'. Error: %s" % output) + setParameterList.pop("sctpchecksumerrors") + + if (len(setParameterList) != 0): + g_logger.debug("Setting sysctl parameter.") + for key in setParameterList: + SetSysctlForList(key, setParameterList[key]) + g_logger.log(" Set variable '%s' to '%s'" + % (key, setParameterList[key])) + cmd = "sysctl -p" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + cmderrorinfo = "sysctl -p | grep 'No such file or directory'" + (status, outputresult) = subprocess.getstatusoutput(cmderrorinfo) + if (status != 0 and outputresult == ""): + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmderrorinfo) + for key in setParameterList: + tmp = "/proc/sys/%s" % key.replace('.', '/') + if (tmp in outputresult or key in outputresult): + # delete the record about key from the /etc/sysctl.conf + delSysctlForList(key, setParameterList[key]) + g_logger.log(" Failed to enforce sysctl kernel" + " variable '%s'. Error: the variable name" + " is incorrect." % key) + + +def SetSysctlForList(key, value): + """ + function: Set sysctl parameter + input : key, value + output: NA + """ + kernelParameterFile = "/etc/sysctl.conf" + cmd = """sed -i '/^\\s*%s *=.*$/d' %s && + echo %s = %s >> %s 2>/dev/null""" % (key, kernelParameterFile, + key, value, + kernelParameterFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.log(" Failed to set variable '%s %s'." % (key, value)) + g_logger.debug("Command:\n %s\nOutput:\n %s" % (cmd, str(output))) + + +def delSysctlForList(key, value): + """ + function: delete the record about key from the /etc/sysctl.conf + input: key, value + output: NA + """ + g_logger.debug("Deleting sysctl parameter.") + kernelParameterFile = "/etc/sysctl.conf" + cmd = """sed -i '/^\\s*%s *=.*$/d' %s """ % (key, kernelParameterFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.log(" Failed to delete variable" + " '%s %s' from /etc/sysctl.conf." % (key, value)) + g_logger.debug("Command:\n %s\nOutput:\n %s" % (cmd, str(output))) + + +def checkLimitsParameter(limitPara, isSet): + """ + function: check and set the limit parameter + input: limitPara, isSet + output: NA + """ + + # utility class for this function only + class limitsconf_data: + """ + Class: limitsconf_data + """ + + def __init__(self, expected): + """ + function: constructor + """ + self.domain = None + self.value_found = None + self.value_expected = expected + + # check the limit parameter + table = dict() + + for key in list(limitPara.keys()): + cmd = "ulimit -a | grep -F '%s' 2>/dev/null" % key + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + resLines = output.split('\n') + resList = resLines[0].split(' ') + limitValue = resList[-1].strip() + if (limitPara[key] == 'unlimited'): + resultList.append(2) + if limitValue != 'unlimited': + g_logger.log(" Warning reason: variable '%s'" + " RealValue '%s' ExpectedValue '%s'" + % (key, limitValue, limitPara[key])) + if (key == 'virtual memory'): + table[('soft', 'as')] = limitsconf_data(limitPara[key]) + table[('hard', 'as')] = limitsconf_data(limitPara[key]) + if (key == 'max user processes'): + table[('soft', 'nproc')] = limitsconf_data(limitPara[key]) + table[('hard', 'nproc')] = limitsconf_data(limitPara[key]) + + elif (limitPara[key] != 'unlimited'): + if (limitValue == 'unlimited'): + continue + if (int(limitValue) < int(limitPara[key])): + if (key == "stack size"): + resultList.append(1) + g_logger.log(" Abnormal reason: variable '%s'" + " RealValue '%s' ExpectedValue '%s'" + % (key, limitValue, limitPara[key])) + else: + resultList.append(2) + g_logger.log(" Warning reason: variable '%s'" + " RealValue '%s' ExpectedValue '%s'" + % (key, limitValue, limitPara[key])) + if (key == 'stack size'): + table[('soft', + 'stack')] = limitsconf_data(limitPara[key]) + table[('hard', + 'stack')] = limitsconf_data(limitPara[key]) + if (key == 'open files'): + table[('soft', + 'nofile')] = limitsconf_data(limitPara[key]) + table[('hard', + 'nofile')] = limitsconf_data(limitPara[key]) + else: + resultList.append(1) + g_logger.debug("The cmd is %s " % cmd) + g_logger.log(" Failed to obtain '%s'. Error: \n%s" + % (key, output)) + + # set the open file numbers + if isSet and len(list(table.keys())): + for key in list(table.keys()): + if (key[1] == "nofile" or key[1] == "nproc"): + limitPath = '/etc/security/limits.d/' + nofiles = glob.glob("/etc/security/limits.d/*.conf") + for conf in nofiles: + g_file.changeMode(DefaultValue.HOSTS_FILE, conf) + SetLimitsConf(key[0], key[1], + table[key].value_expected, conf) + if os.path.isfile(os.path.join(limitPath, '91-nofile.conf')): + limitFile = '91-nofile.conf' + else: + limitFile = '90-nofile.conf' + if (key[1] == "stack" or key[1] == "as" or key[1] == "nproc"): + limitPath = '/etc/security/' + limitFile = 'limits.conf' + if (checkLimitFile(limitPath, limitFile) != 0): + return + + SetLimitsConf(key[0], key[1], table[key].value_expected, + limitPath + limitFile) + g_logger.log(" Set variable '%s %s' to '%s'" + % (key[0], key[1], table[key].value_expected)) + + +def checkLimitFile(limitPath, limitFile): + """ + function: check limits file + input : limitPath, limitFile + output: status + """ + g_logger.debug("check limits configuration file.") + + pathCmd = "if [ ! -d '%s' ]; then mkdir '%s' -m %s;fi; cd '%s';" \ + % (limitPath, limitPath, + DefaultValue.MAX_DIRECTORY_MODE, limitPath) + pathCmd += "if [ ! -f '%s' ]; then touch '%s';chmod %s '%s';fi" \ + % (limitFile, limitFile, + DefaultValue.FILE_MODE, limitFile) + (status, output) = subprocess.getstatusoutput(pathCmd) + if (status != 0): + g_logger.debug("The cmd is %s " % pathCmd) + g_logger.log(" Abnormal reason: Failed to create %s%s." + " Error: \n%s" % (limitPath, limitFile, output)) + return status + + +def SetLimitsConf(typename, item, value, limitfile): + """ + function: write the /etc/security/limits.conf + input: typename, item, value, limitfile + output: NA + """ + g_logger.debug("Setting limits config.") + clusterUser = getClusterUser() + cmd = """sed -i '/^.* %s *%s .*$/d' %s && + echo "root %s %s %s" >> %s && """ \ + % (typename, item, limitfile, typename, item, value, limitfile) + cmd += """echo "%s %s %s %s" >> %s""" \ + % (clusterUser, typename, item, value, limitfile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.debug("The cmd is %s " % cmd) + g_logger.log(" Abnormal reason: Failed to set variable" + " '%s %s'. Error: \n%s" % (typename, item, output)) + + +def getGphome(xmlFilePath): + """ + function: Get GPHOME path + input : xmlFilePath + output: str + """ + gphome = "" + if os.path.exists(xmlFilePath): + with open(xmlFilePath, 'r') as fp: + xmlstr = fp.read() + domTree = ETree.fromstring(xmlstr) + rootNode = domTree + if not rootNode.findall('CLUSTER'): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] % 'CLUSTER') + element = rootNode.findall('CLUSTER')[0] + nodeArray = element.findall('PARAM') + for node in nodeArray: + name = node.attrib['name'] + if (name == "gaussdbToolPath"): + gphome = str(node.attrib['value']) + return gphome + + +def getClusterUser(): + """ + function: Check user information + input : NA + output: str + """ + # get user and group + gphome = getGphome(g_opts.confFile) + if not gphome or not os.path.exists(gphome): + user = "*" + return user + user = g_OSlib.getPathOwner(gphome)[0] + return user + + +def CheckSection(section, isSetting=False): + """ + function: check the section parameters status + input: section, isSetting + output: NA + """ + + global configFile + dirName = os.path.dirname(os.path.realpath(__file__)) + configFile = "%s/../gspylib/etc/conf/check_list.conf" % dirName + + # get the parameter and value about section from configuration file + if (section == '/etc/security/limits.conf'): + checkList = ['open files', 'pipe size'] + commParameterList = DefaultValue.getConfigFilePara(configFile, + section, checkList) + else: + commParameterList = DefaultValue.getConfigFilePara(configFile, + section) + + # checking or setting the parameter what in the commParameterList + checkSpecifiedItems(section, commParameterList, isSetting) + + +def parseCommandLine(): + """ + function: Parse command line and save to global variables + input : NA + output: NA + """ + try: + # Resolves the command line + opts, args = getopt.getopt(sys.argv[1:], "t:X:l:U:V?", + ["help", "log-file=", "xmlfile=", + "MTUvalue=", "hostname=", + "check-os", "version"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + global g_opts + global g_check_os + g_opts = CmdOptions() + + # Output help information and exit + for (key, value) in opts: + if (key == "-?" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-V" or key == "--version"): + print(("%s %s" % (sys.argv[0].split("/")[-1], + VersionInfo.COMMON_VERSION))) + sys.exit(0) + elif (key == "-t"): + g_opts.action = value + elif (key == "-U"): + g_opts.user = value + elif (key == "--check-os"): + g_check_os = True + elif (key == "-l" or key == "--log-file"): + g_opts.logFile = os.path.realpath(value) + elif (key == "--MTUvalue"): + g_opts.mtuValue = value + elif (key == "--hostname"): + g_opts.hostname = value + elif (key == "-X"): + g_opts.confFile = value + # check para vaild + Parameter.checkParaVaild(key, value) + + +def checkParameter(): + """ + function: check parameter for different action + input : NA + output: NA + """ + + # check if user exist and is the right user + if (g_opts.user != ''): + DefaultValue.checkUser(g_opts.user) + tmpDir = DefaultValue.getTmpDirFromEnv(g_opts.user) + if (not os.path.exists(tmpDir)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] + % ("temporary directory[" + tmpDir + "]")) + + # check the -t parameter + if (g_opts.action == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + '.') + if (g_opts.action not in list(actioItemMap.keys())): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % "t") + + if (g_opts.logFile == ""): + dirName = os.path.dirname(os.path.realpath(__file__)) + g_opts.logFile = os.path.join(dirName, "gaussdb_localcheck.log") + + +def initGlobals(): + """ + function: Init global log + input : NA + output: NA + """ + # state global variable + global g_logger + global g_clusterInfo + # Init the log file + g_logger = GaussLog(g_opts.logFile, "gaussdb_localcheck") + if os.path.exists(g_opts.confFile): + g_clusterInfo = dbClusterInfo() + g_clusterInfo.initFromXml(g_opts.confFile) + + +def setLocalReservedPort(): + """ + function: Set local reserved port in check_list.conf + input : NA + output: NA + """ + portList = [] + rportList = [] + portStr = "" + checkListfile = "%s/../gspylib/etc/conf/check_list.conf" \ + % os.path.dirname(os.path.realpath(__file__)) + if g_clusterInfo is not None: + for dbNode in g_clusterInfo.dbNodes: + for cn in dbNode.coordinators: + if cn.port not in portList: + portList.append(cn.port) + if cn.haPort != "" and cn.haPort != (int(cn.port) + 1): + if cn.haPort not in portList: + portList.append(cn.haPort) + for dn in dbNode.datanodes: + if dn.port not in portList: + portList.append(dn.port) + if dn.haPort != "" and dn.haPort != (int(dn.port) + 1): + if dn.haPort not in portList: + portList.append(dn.haPort) + for cm in dbNode.cmservers: + if cm.port not in portList: + portList.append(cm.port) + if cm.haPort != "" and cm.haPort != (int(cm.port) + 1): + if cm.haPort not in portList: + portList.append(cm.haPort) + for gtm in dbNode.gtms: + if gtm.port not in portList: + portList.append(gtm.port) + if gtm.haPort != "" and gtm.haPort != (int(gtm.port) + 1): + if gtm.haPort not in portList: + portList.append(gtm.haPort) + for etcd in dbNode.etcds: + if etcd.port not in portList: + portList.append(etcd.port) + if etcd.haPort != "" and etcd.haPort != (int(etcd.port) + 1): + if etcd.haPort not in portList: + portList.append(etcd.haPort) + if 20050 not in portList: + portList.append(20050) + sortedPortList = sorted(portList) + for port in sortedPortList: + localPortList = [] + nport = port + while nport <= port + 7: + if len(rportList) != 0 and port <= max(rportList[-1]) + 1: + if nport not in rportList[-1]: + rportList[-1].append(nport) + else: + if nport not in localPortList: + localPortList.append(nport) + nport += 1 + if len(localPortList) != 0: + rportList.append(localPortList) + for rport in rportList: + if rport == rportList[-1]: + portStr += "%s-%s" % (min(rport), max(rport)) + else: + portStr += "%s-%s," % (min(rport), max(rport)) + cmd = "sed -i '/%s/d' %s && sed -i '/%s/a\%s = %s' %s " % \ + ("ipv4.ip_local_reserved_ports", checkListfile, "tcp_retries2", + "net.ipv4.ip_local_reserved_ports", portStr, checkListfile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % str(output)) + + +def doLocalCheck(): + """ + function: check OS item on local node + input : NA + output: NA + """ + + global resultList + global netWorkBondInfo + netWorkBondInfo = netWork() + if (g_opts.action == "osKernelParameterCheck"): + CheckSection(actioItemMap["Check_FileSystem_Configure"][0], + actioItemMap["Check_FileSystem_Configure"][1]) + CheckSection(actioItemMap["Check_SysCtl_Parameter"][0], + actioItemMap["Check_SysCtl_Parameter"][1]) + else: + CheckSection(actioItemMap[g_opts.action][0], + actioItemMap[g_opts.action][1]) + + +# checkItemMap is a dictionary of global variable +# checkItemMap.keys() is configuration file's section +# checkItemMap[key][0] is the check function about the key +# checkItemMap[key][1] is the parameter of the check function +checkItemMap = {'/etc/sysctl.conf': + [checkSysctlParameter, (paraList, g_check_os)], + '/etc/security/limits.conf': + [checkLimitsParameter, (paraList, g_check_os)] + } + +if __name__ == '__main__': + """ + main function + """ + try: + # parse cmd lines + parseCommandLine() + # check Parameter + checkParameter() + # init globals + initGlobals() + setLocalReservedPort() + # check OS item on local node + doLocalCheck() + except Exception as e: + GaussLog.exitWithError(str(e)) + finally: + # close log file + g_logger.closeLog() + sys.exit(0) diff --git a/script/local/LocalCheckOS.py b/script/local/LocalCheckOS.py new file mode 100644 index 0000000..8ace379 --- /dev/null +++ b/script/local/LocalCheckOS.py @@ -0,0 +1,2316 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalCheckOS.py is a utility to check OS info on local node. +############################################################################# +import os +import sys +import subprocess +import glob +import getopt +import subprocess +import platform +import time +from datetime import datetime + +localDirPath = os.path.dirname(os.path.realpath(__file__)) + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsplatform import g_Platform, findCmdInPath +from gspylib.common.GaussLog import GaussLog +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.Common import DefaultValue +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.ErrorCode import ErrorCode + +sys.path.insert(0, localDirPath + "/../../lib") +import psutil + +ACTION_CHECK_OS_VERSION = "Check_OS_Version" +ACTION_CHECK_KERNEL_VERSION = "Check_Kernel_Version" +ACTION_CHECK_UNICODE = "Check_Unicode" +ACTION_CHECK_TIMEZONE = "Check_TimeZone" +ACTION_CHECK_DISK_CONFIGURE = "Check_Disk_Configure" +ACTION_CHECK_BLOCKDEV_CONFIGURE = "Check_BlockDev_Configure" +ACTION_CHECK_IO_CONFIGURE = "Check_IO_Configure" +ACTION_CHECK_LOGICAL_BLOCK = "Check_Logical_Block" +ACTION_CHECK_IO_REQUEST = "Check_IO_Request" +ACTION_CHECK_ASYNCHRONOUS_IO_REQUEST = "Check_Asynchronous_IO_Request" +ACTION_CHECK_NETWORK_CONFIGURE = "Check_Network_Configure" +ACTION_CHECK_NETWORK_BOND_MODE = "Check_Network_Bond_Mode" +ACTION_CHECK_SWAP_MEMORY_CONFIGURE = "Check_Swap_Memory_Configure" +ACTION_CHECK_TIME_CONSISTENCY = "Check_Time_Consistency" +ACTION_CHECK_FIREWALL_SERVICE = "Check_Firewall_Service" +ACTION_CHECK_THP_SERVICE = "Check_THP_Service" + +ACTION_SET_BLOCKDEV_CONFIGURE = "Set_BlockDev_Configure" +ACTION_SET_IO_CONFIGURE = "Set_IO_Configure" +ACTION_SET_REMOVEIPC_VALUE = "Set_RemoveIPC_Value" +ACTION_SET_SESSION_PROCESS = "Set_Session_Process" +ACTION_SET_NETWORK_CONFIGURE = "Set_Network_Configure" +ACTION_SET_THP_SERVICE = "Set_THP_Service" +ACTION_SET_LOGICAL_BLOCK = "Set_Logical_Block" +ACTION_SET_IO_REQUEST = "Set_IO_REQUEST" +ACTION_SET_ASYNCHRONOUS_IO_REQUEST = "Set_Asynchronous_IO_Request" + +############################################################################# +# Global variables +############################################################################# +netWorkLevel = 10000 +expectMTUValue = 8192 +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 + +g_logger = None +g_opts = None +g_clusterInfo = None +netWorkBondInfo = None + + +########################################################################### +# mounts +########################################################################### +class mounts: + """ + Class: mounts + """ + + def __init__(self): + """ + function : Init class mounts + input : NA + output : NA + """ + self.entries = dict() # dictionary key=partition value=mount object + self.errormsg = None + + +class GSMount: + """ + Class: GSMount + """ + + def __init__(self): + """ + function : Init class GSMount + input : NA + output : NA + """ + self.partition = None + self.dir = None + self.type = None + self.options = set() # mount options + + def __str__(self): + """ + function : Convert to a string + input : NA + output : string + """ + optionstring = '' + first = True + for k in self.options: + if not first: + optionstring = "%s," % optionstring + thisoption = k + optionstring = "%s%s" % (optionstring, thisoption) + first = False + return "%s on %s type %s (%s)" % (self.partition, self.dir, + self.type, optionstring) + + +def collectMounts(): + """ + function : Collector mounts + input : NA + output : Instantion + """ + data = mounts() + p = subprocess.Popen(["mount"], shell=False, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + result = p.communicate() + data.errormsg = result[1].decode().strip() + if p.returncode: + return data + + for line in result[0].decode().splitlines(): + mdata = GSMount() + words = line.strip().split() + mdata.partition = words[0] + mdata.dir = words[2] + mdata.type = words[4] + # get the options string + tmpa = words[5] + tmpb = tmpa.strip().strip("()") + tmpc = tmpb.split(",") + for op in tmpc: + mdata.options.add(op) + data.entries[mdata.partition] = mdata + return data + + +########################################################################### +# blockdev: +########################################################################### +class blockdev: + """ + Class: blockdev + """ + + def __init__(self): + """ + function : Init class blockdev + input : NA + output : NA + """ + self.ra = dict() # key is device name value is getra value + self.errormsg = '' + + +def collectBlockdev(): + """ + function : Collector blockdev + input : NA + output : Instantion + """ + data = blockdev() + devices = list() + try: + # If the directory of '/' is disk array, all disk prereads will be set + devlist = DefaultValue.getDevices() + cmd = "mount | awk '{if( $3==\"/\" ) print $1}' |" \ + " sed 's/\/dev\///' | sed 's/[0-9]//'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + for dev in devlist: + if (dev.strip() == output.strip()): + continue + devices.append("/dev/%s" % dev) + except Exception as e: + data.errormsg = e.__str__() + + for d in devices: + p = subprocess.Popen(["/sbin/blockdev", "--getra", "%s" % d], + shell=False, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + result = p.communicate() + data.errormsg += result[1].decode().strip() + if p.returncode: + continue + data.ra[d] = result[0].decode().strip() + + return data + + +########################################################################### +# platform: uname +########################################################################### +class uname: + """ + Class: uname + """ + + def __init__(self): + """ + function : Init class uname + input : NA + output : NA + """ + self.output = None + self.errormsg = None + + +def collectUname(): + """ + function : Collector uname + input : NA + output : Instantion + """ + data = uname() + p = subprocess.Popen(["uname", "-r"], shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + result = p.communicate() + data.errormsg = result[1].decode().strip() + if p.returncode: + return data + data.output = result[0].decode().strip() + return data + + +########################################################################### +# unicode +########################################################################### +class codename: + """ + Class: codename + """ + + def __init__(self): + """ + function : Init class codename + input : NA + output : NA + """ + self.output = None + self.errormsg = None + + +def collectUnicode(): + """ + function : Collector unicode + input : NA + output : Instantion + """ + data = codename() + cmd = "locale | grep '^LANG='" + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception((ErrorCode.GAUSS_505["GAUSS_50502"] % "Unicode") + + ("The cmd is : %s" % cmd)) + data.output = output + return data + + +########################################################################### +# timezone +########################################################################### + +class timezone: + """ + Class: timezone + """ + + def __init__(self): + """ + function : Init class timezone + input : NA + output : NA + """ + self.output = None + self.errormsg = None + + +def collectTimeZone(): + """ + function : Collector timezone + input : NA + output : Instantion + """ + data = timezone() + cmd = "date -R | awk -F ' ' '{print $NF}'" + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception((ErrorCode.GAUSS_505["GAUSS_50502"] % "TimeZone") + + ("The cmd is : %s" % cmd)) + data.output = output + return data + + +########################################################################### +# platform: version +########################################################################### +class platformInfo: + """ + Class: platformInfo + """ + + def __init__(self): + """ + function : Init class platforminfo + input : NA + output : NA + """ + self.distname = "" + self.version = "" + self.id = "" + self.bits = "" + self.linkage = "" + self.patchlevel = "" + + +def collectplatformInfo(): + """ + function : Collector platforminfo + input : NA + output : Instantion + """ + data = platformInfo() + distname, version, idnum = g_Platform.dist() + bits, linkage = platform.architecture() + + data.distname = distname + data.version = version + data.id = idnum + data.bits = bits + data.linkage = linkage + + # os-release is added since SLE 12; + # SuSE-release will be removed in a future service pack or release + if (distname == "SuSE" and version in ("11", "12")): + if os.path.exists('/etc/SuSE-release'): + cmd = "grep -i 'PATCHLEVEL' /etc/SuSE-release |" \ + " awk -F '=' '{print $2}'" + else: + cmd = "grep -i 'VERSION_ID' /etc/os-release |" \ + " awk -F '.' '{print $2}' | sed 's/\"//'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output != ""): + data.patchlevel = output.strip() + else: + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + + return data + + +########################################################################### +# I/O schedulers +########################################################################### +class ioschedulers: + """ + Class: ioschedulers + """ + + def __init__(self): + """ + function : Init class ioschedulers + input : NA + output : NA + """ + # key is device name, value is scheduler name + self.devices = dict() + self.errormsg = '' + # key is device name, value is optional configuration list + self.allItem = {} + + +def collectIOschedulers(): + """ + function : Collector IOschedulers + input : NA + output : Instantion + """ + data = ioschedulers() + devices = set() + try: + files = DefaultValue.getDevices() + for f in files: + fname = "/sys/block/%s/queue/scheduler" % f + words = fname.split("/") + if len(words) != 6: + continue + devices.add(words[3].strip()) + except Exception as e: + data.errormsg = e.__str__() + + for d in devices: + try: + with open("/sys/block/%s/queue/scheduler" % d, 'r') as fd: + scheduler = fd.read() + words = scheduler.split("[") + if len(words) != 2: + continue + words = words[1].split("]") + if len(words) != 2: + continue + data.devices[d] = words[0].strip() + data.allItem[d] = scheduler.replace("[", "").replace("]", + "").split() + except Exception as e: + data.errormsg += e.__str__() + + return data + + +########################################################################### +# I/O REQUEST #device_name +########################################################################### +class ioRequest(): + """ + Class: ioRequest + """ + + def __init__(self): + """ + function : Init class ioRequest + input : NA + output : NA + """ + self.devices = dict() + self.errormsg = '' + + +def collectIORequest(): + """ + function : Collector ioRequest + input : NA + output : Dict + """ + data = ioRequest() + devices = [] + + try: + files = glob.glob("/sys/block/*/queue/nr_requests") + for f in files: + words = f.split("/") + if len(words) != 6: + continue + devices.append(words[3].strip()) + except Exception as e: + data.errormsg = e.__str__() + + result = {} + for d in devices: + try: + with open("/sys/block/%s/queue/nr_requests" % d, 'r') as fd: + request = fd.read() + result[d] = request + except Exception as e: + data.errormsg += e.__str__() + + return result + + +########################################################################### +# Asynchronous I/O REQUEST #device_name +########################################################################### +class AsynchronousIoRequest(): + """ + Class: AsynchronousIoRequest + """ + + def __init__(self): + """ + function : Init class AsynchronousIoRequest + input : NA + output : NA + """ + self.devices = dict() + self.errormsg = '' + + +def collectAsynchronousIORequest(): + """ + function : Collector AsynchronousIORequest + input : NA + output : List + """ + data = AsynchronousIoRequest() + result = [] + try: + with open("/proc/sys/fs/aio-max-nr", 'r') as fd: + request = fd.read() + result.append(request) + except Exception as e: + data.errormsg += e.__str__() + + return result + + +########################################################################### +# LogicalBlock +########################################################################### +class LogicalBlock(): + """ + class: LogicalBlock + """ + + def __init__(self): + """ + function : Init class LogicalBlock + input : NA + output : NA + """ + self.devices = dict() + self.errormsg = '' + + +def collectLogicalBlock(): + """ + function : Collector LogicalBlock + input : NA + output : Dict + """ + data = LogicalBlock() + devices = set() + + try: + files = glob.glob("/sys/block/*/queue/logical_block_size") + for f in files: + words = f.split("/") + if len(words) != 6: + continue + devices.add(words[3].strip()) + except Exception as e: + data.errormsg = e.__str__() + + result = {} + for d in devices: + try: + with open("/sys/block/%s/queue/logical_block_size" % d, 'r') as fd: + request = fd.read() + result[d] = request + except Exception as e: + data.errormsg += e.__str__() + + return result + + +########################################################################### +# removeComments : delete the line which start with "#" +########################################################################### +def removeComments(line): + """ + function : Remove Comments + input : String + output : String + """ + words = line.split("#") + if len(words) < 2: + return line + return words[0] + + +########################################################################### +# sysctl parameter +########################################################################### +class sysctl: + """ + Class: sysctl + """ + + def __init__(self): + """ + function : Init class sysctl + input : NA + output : NA + """ + # dictionary of values + self.variables = dict() + self.errormsg = None + + +def collectSysctl(): + """ + function : Collector Sysctl + input : NA + output : instantion + """ + data = sysctl() + try: + # enforce sysctl kernel value + cmd = "sysctl -p" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("Warning: Failed to enforce sysctl kernel value" + " before checking/setting sysctl " + "parameter.Commands: %s. Error:\n%s." + % (cmd, output)) + + with open("/etc/sysctl.conf", "r") as f: + for line in f: + line = removeComments(line) + words = line.split("=") + if len(words) != 2: + continue + + key = words[0].strip() + value = words[1].strip() + data.variables[key] = ' '.join(value.split()) + + except Exception as e: + data.errormsg = e.__str__() + + return data + + +########################################################################### +# limits configure: +########################################################################### +class limitsconf: + """ + Class: limitsconf + """ + + def __init__(self): + """ + function : Init class limitsconf + input : NA + output : NA + """ + self.lines = list() + self.errormsg = None + + def __str__(self): + """ + function : Convert to a string + input : NA + output : String + """ + output = "" + for line in self.lines: + output = "%s\n%s" % (output, line) + return output + + +class limitsconf_entry: + """ + Class: limitsconf_entry + """ + + def __init__(self, domain, typename, item, value): + """ + function : Init class limitsconf_entry + input : String, String, String, String + output : NA + """ + self.domain = domain + self.type = typename + self.item = item + self.value = value + + def __str__(self): + """ + function : Merged into a string + input : NA + output : String + """ + return "%s %s %s %s" % (self.domain, self.type, self.item, self.value) + + +def collectLimits(): + """ + function : collect Limits + input : NA + output : instantion + """ + data = limitsconf() + try: + with open("/etc/security/limits.conf", "r") as f: + for line in f: + line = removeComments(line) + words = line.split() + if len(words) != 4: + continue + domain = words[0].strip() + typename = words[1].strip() + item = words[2].strip() + value = words[3].strip() + data.lines.append(limitsconf_entry( + domain, typename, item, value)) + except Exception as e: + data.errormsg = e.__str__() + + return data + + +########################################################################### +# getTHPandOSInitFile: +########################################################################### +def getTHPandOSInitFile(): + """ + function : We know that the centos have same init file and THP file + as RedHat. + input : NA + output : String, String + """ + THPFile = "/sys/kernel/mm/transparent_hugepage/enabled" + initFile = DefaultValue.getOSInitFile() + if (initFile == ""): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % "startup file of current OS" + + " The startup file for SUSE OS is" + " /etc/init.d/boot.local.The startup file for Redhat" + " OS is /etc/rc.d/rc.local.") + return (THPFile, initFile) + + +########################################################################### +# THP Server: +########################################################################### +class THPServer: + """ + Class: THPServer + """ + + def __init__(self): + """ + function : Init class THPServer + input : NA + output : NA + """ + self.status = "" + + +def collectTHPServer(): + """ + function : collect THPServer + input : NA + output : instantion + """ + data = THPServer() + THPFile = getTHPandOSInitFile()[0] + if (os.path.exists(THPFile)): + cmd = "grep '\[never\]' %s | wc -l" % THPFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.debug("Failed to obtain THP service status. Commands for" + " obtaining THP server status: %s." % cmd) + g_logger.logExit(ErrorCode.GAUSS_510["GAUSS_51001"] + + " Error: \n%s" % output) + if (output.strip().isdigit()): + num = int(output.strip()) + else: + num = 1 + if (num > 0): + data.status = "disabled" + else: + data.status = "enabled" + else: + data.status = "disabled" + return data + + +def disRemoveIPC(): + """ + function : close RemoveIPC + input : NA + output : NA + """ + g_logger.debug("disbale RemoveIPC.") + distName = g_Platform.getCurrentPlatForm()[0] + if distName.upper() in ("OPENEULER", "KYLIN"): + cmd = "setenforce 0" + subprocess.getstatusoutput(cmd) + initFile = "/usr/lib/systemd/system/systemd-logind.service" + if os.path.exists(initFile): + close_cmd = "if [ `systemctl show systemd-logind | " \ + "grep RemoveIPC` != \"RemoveIPC=no\" ]; " \ + "then echo 'RemoveIPC=no' >> " \ + "/usr/lib/systemd/system/systemd-logind.service; " \ + "sed -i '/RemoveIPC=yes/'d " \ + "/usr/lib/systemd/system/systemd-logind.service; fi;" + disableRemoveIPCLog(close_cmd) + initFile = "/etc/systemd/logind.conf" + if os.path.exists(initFile): + close_cmd = "if [ `loginctl show-session | " \ + "grep RemoveIPC` != \"RemoveIPC=no\" ]; " \ + "then echo 'RemoveIPC=no' >> " \ + "/etc/systemd/logind.conf; " \ + "sed -i '/RemoveIPC=yes/'d " \ + "/etc/systemd/logind.conf; fi;" + disableRemoveIPCLog(close_cmd) + cmd = "systemctl daemon-reload" + disableRemoveIPCLog(cmd) + + cmd = "systemctl restart systemd-logind" + disableRemoveIPCLog(cmd) + + cmd = "systemctl show systemd-logind | grep RemoveIPC && " \ + "loginctl show-session | grep RemoveIPC" + output = disableRemoveIPCLog(cmd) + ipcCheckNum = 0 + for result in output.split("\n"): + if result == "RemoveIPC=no": + ipcCheckNum = ipcCheckNum + 1 + if ipcCheckNum < 1: + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n cmd:\"systemctl show systemd-logind" + " | grep RemoveIPC and loginctl show-session " + "| grep RemoveIPC\" The result" + " cannot be all no") + g_logger.debug("Successfully change RemoveIPC to no.") + +def disableRemoveIPCLog(cmd): + """ + function : disable remove IPCLog + input : cmd + output : NA + """ + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("Failed to disbale RemoveIPC. Commands" + " for disbale RemoveIPC: %s." % cmd) + g_logger.logExit(ErrorCode.GAUSS_510["GAUSS_51002"] + + " Error: \n%s" % output) + return output + + + +def CheckSessionProcess(): + """ + function : Set User Session Process Control + input : NA + output : NA + """ + g_logger.debug("Setting User Session Process Control.") + etcFile = "/etc/pam.d/sshd" + if os.path.exists(etcFile): + set_cmd = "sed -i '/.*session\+.*pam_limits\.so/d' /etc/pam.d/sshd;" \ + "echo 'session required pam_limits.so' >> " \ + "/etc/pam.d/sshd; " + setSeesionProcess(set_cmd) + g_logger.debug("Successfully Set Session Process.") + +def setSeesionProcess(cmd): + """ + function : Set User Session Process Control + input : cmd + output : NA + """ + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("Failed to set session process. Commands" + " for set session process: %s." % cmd) + g_logger.logExit(ErrorCode.GAUSS_510["GAUSS_51003"] + + " Error: \n%s" % output) + return output + + + +def disTHPServer(): + """ + function : close THP Server + input : NA + output : NA + """ + g_logger.debug("Closing the THP service.") + (THPFile, initFile) = getTHPandOSInitFile() + if (os.path.exists(initFile)): + # 1.close thp + close_cmd = "(if test -f '%s'; then echo never > %s;fi)" \ + % (THPFile, THPFile) + (status, output) = subprocess.getstatusoutput(close_cmd) + if (status != 0): + g_logger.debug("Failed to close THP service. Commands" + " for closing THP server: %s." % close_cmd) + g_logger.logExit(ErrorCode.GAUSS_510["GAUSS_51002"] + + " Error: \n%s" % output) + # 2.add close cmd to init file + cmd = "sed -i '/^.*transparent_hugepage.*enabled.*echo" \ + " never.*$/d' %s &&" % initFile + cmd += "echo \"%s\" >> %s" % (close_cmd, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + g_logger.debug("Successfully closed the THP service.") + + +########################################################################### +# network card parameter: +########################################################################### +class netWork: + """ + Class: netWork + """ + + def __init__(self): + """ + function : Init class netWork + input : NA + output : NA + """ + self.netLevel = "" + self.netNum = "" + self.variables = dict() + self.modeType = False + self.nums = 0 + + +def CheckNetWorkBonding(serviceIP, bondMode=False): + """ + function : Check NetWork ConfFile + input : String, bool + output : List + """ + networkCardNum = DefaultValue.getNICNum(serviceIP) + NetWorkConfFile = DefaultValue.getNetWorkConfFile(networkCardNum) + if (NetWorkConfFile.find("No such file or directory") >= 0 + and DefaultValue.checkDockerEnv()): + return + networkCardNumList = [] + networkCardNumList.append(networkCardNum) + bondingConfFile = "/proc/net/bonding/%s" % networkCardNum + if os.path.exists(NetWorkConfFile): + cmd = "grep -i 'BONDING_OPTS\|BONDING_MODULE_OPTS' %s" \ + % NetWorkConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if ((status == 0) and (output.strip() != "")): + if ((output.find("mode") > 0) + and os.path.exists(bondingConfFile)): + networkCardNumList = networkCardNumList + \ + checkBondMode(bondingConfFile, bondMode) + else: + g_logger.logExit(ErrorCode.GAUSS_506["GAUSS_50611"] + + "The cmd is " + cmd) + else: + g_logger.log("BondMode Null") + else: + flag = DefaultValue.getNetWorkBondFlag(networkCardNum)[0] + if flag: + if os.path.exists(bondingConfFile): + networkCardNumList = networkCardNumList + \ + checkBondMode(bondingConfFile, bondMode) + else: + g_logger.logExit(ErrorCode.GAUSS_506["GAUSS_50611"] + + "Without NetWorkConfFile mode.") + else: + g_logger.log("BondMode Null") + if (len(networkCardNumList) != 1): + del networkCardNumList[0] + return networkCardNumList + + +def checkBondMode(bondingConfFile, isCheck): + """ + function : Check Bond mode + input : String, bool + output : List + """ + netNameList = [] + + cmd = "grep -w 'Bonding Mode' %s | awk -F ':' '{print $NF}'" \ + % bondingConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0 or output.strip() == ""): + g_logger.debug("Failed to obtain network card bonding information." + " Commands for getting: %s." % cmd) + g_logger.logExit(ErrorCode.GAUSS_506["GAUSS_50611"] + + " Error: \n%s" % output) + + if ("active-backup" in output): + netWorkBondInfo.modeType = 1 + netWorkBondInfo.nums = 0 + if isCheck: + g_logger.log("BondMode %s" % output.strip()) + else: + cmd = "grep -w 'Slave Interface' %s | awk -F ':' '{print $NF}'" \ + % bondingConfFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.debug("Failed to obtain network card bonding " + "information. Commands for getting: %s." % cmd) + g_logger.logExit(ErrorCode.GAUSS_506["GAUSS_50611"] + + " Error: \n%s" % output) + for networkname in output.split('\n'): + netNameList.append(networkname.strip()) + netWorkBondInfo.nums = netWorkBondInfo.nums + 1 + return netNameList + + +def getNetWorkTXRXValue(networkCardNum, valueType): + """ + function : Check Bond mode + input : int, String + output : int + """ + cmd = "/sbin/ethtool -g %s | grep '%s:' | tail -n 2" % (networkCardNum, + valueType) + (status, output) = subprocess.getstatusoutput(cmd) + if (output.find("Operation not supported") >= 0 + and DefaultValue.checkDockerEnv()): + g_logger.log(" Warning reason: Failed to obtain the" + " network card TXRX value in docker container. Commands " + "for obtain the network card TXRX: %s. Error: \n%s" + % (cmd, output)) + return (0, 0) + if (status != 0 or len(output.splitlines()) != 2): + g_logger.debug("Failed to obtain network card %s value. Commands" + " for getting information: %s." % (valueType, cmd)) + g_logger.logExit(ErrorCode.GAUSS_506["GAUSS_50612"] + % valueType + " Error: \n%s" % output) + + # redhat2.0 here means EulerOS, because we get the os version 2.0 + valueMax = output.splitlines()[0].split(':')[1].split(' ')[0].strip() + valueStr = output.splitlines()[1].split(':')[1].split(' ')[0].strip() + if (not str(valueStr).isdigit() or not str(valueMax).isdigit()): + g_logger.logExit(ErrorCode.GAUSS_506["GAUSS_50612"] % valueType + + " Error: \ncmd:%s\noutput:%s" % (cmd, output)) + if (int(valueMax) < int(valueStr)): + valueTmp = valueMax + valueMax = valueStr + valueStr = valueTmp + return (int(valueStr), int(valueMax)) + + +def GetNetWorkCardInfo(networkCardNum): + """ + function : Get NetWorkCard Info + input : int + output : instantion + """ + # set network card mtu and queue length + g_logger.debug("Obtaining the value about mtu and queue length" + " from network card configuration.") + data = netWork() + data.netNum = networkCardNum + # check the network card format. + # if Speed >= 10000Mb/s, do the setting; else, nothing + cmdGetSpeedStr = "/sbin/ethtool %s | grep 'Speed:'" % networkCardNum + (status, output) = subprocess.getstatusoutput(cmdGetSpeedStr) + if (status == 0 and output.find("Speed:") >= 0 + and output.find("Mb/s") >= 0): + data.netLevel = int(output.split(':')[1].strip()[:-4]) + # get default mtu value + valueMTU = psutil.net_if_stats()[networkCardNum].mtu + data.variables["mtu"] = valueMTU + if (data.netLevel >= int(netWorkLevel)): + # get default rx value + (valueRX, valueRXMax) = getNetWorkTXRXValue(networkCardNum, "RX") + data.variables["rx"] = valueRX + data.variables["rx_max"] = valueRXMax + # get default tx value + (valueTX, valueTXMax) = getNetWorkTXRXValue(networkCardNum, "TX") + data.variables["tx"] = valueTX + data.variables["tx_max"] = valueTXMax + else: + g_logger.debug("Warning: The speed of current card \"%s\"" + " is less than %s Mb/s." % (networkCardNum, + netWorkLevel)) + elif (netWorkBondInfo.modeType == 1): + data.netLevel = int(0) + netWorkBondInfo.nums = netWorkBondInfo.nums - 1 + if (output.find("Speed:") >= 0): + g_logger.log(" Warning reason: Obtain the network card " + "speed value is failed. Maybe the network card " + "\"%s\" is not working." % networkCardNum) + else: + g_logger.log(" Warning reason: Obtain the network card " + "speed value is failed. Commands for obtain the " + "network card speed: %s. Error: \n%s" + % (cmdGetSpeedStr, output)) + + if (netWorkBondInfo.nums == 0): + g_logger.log(" Warning reason: Failed to obtain speed rate" + " value for all bound networks card.") + else: + data.netLevel = int(0) + if (output.find("Speed:") >= 0): + g_logger.log(" Warning reason: Failed to obtain the " + "network card speed value. Maybe the network card" + " \"%s\" is not working." % networkCardNum) + else: + g_logger.log(" Warning reason: Failed to obtain the" + " network card speed value. Commands for obtain" + " the network card speed: %s. Error: \n%s" + % (cmdGetSpeedStr, output)) + g_logger.debug("Successfully obtained the mtu and queue length value" + " from network card.") + return data + + +def setNetWorkMTUOrTXRXValue(networkCardNum, valueType, + expectValue, initFileName): + """ + function : Set NetWork MTU Or TXRX Value + input : int, String, String, String + output : NA + """ + if (valueType == "tx" or valueType == "rx"): + cmd = "/sbin/ethtool -G %s %s %s" % (networkCardNum, + valueType, expectValue) + + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + if (valueType == 'tx' or valueType == 'rx'): + if (output.find("no ring parameters changed, aborting") < 0): + isPrint = True + else: + isPrint = False + else: + isPrint = True + if isPrint: + g_logger.debug("Failed to set network card %s value." + " Commands for setting: %s." % (valueType, cmd)) + g_logger.logExit(ErrorCode.GAUSS_506["GAUSS_50613"] + % valueType + " Error: \n%s" % output) + + # write setting cmds into init file + if (valueType == "tx" or valueType == "rx"): + cmdWrite = "sed -i \"/^.*\\/sbin\\/ethtool -G %s %s %s$/d\" %s" \ + % (networkCardNum, valueType, expectValue, initFileName) + + cmdInit = """%s && echo "%s">>%s""" % (cmdWrite, cmd, initFileName) + (status, output) = subprocess.getstatusoutput(cmdInit) + if (status != 0): + g_logger.debug("Faile to write %s setting commands into init file." + " Commands for setting: %s." % (valueType, cmdInit)) + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50205"] + % initFileName + " Error: \n%s" % output) + + +def SetNetWorkCardInfo(networkCardNum, data): + """ + function : Set NetWorkCard Info + input : int, instantion, Bool + output : NA + """ + g_logger.debug("Setting the network card configuration value.") + + if (int(data.netLevel) >= int(netWorkLevel)): + initFile = getTHPandOSInitFile()[1] + for k in list(data.variables.keys()): + if ((k == "rx") and int(data.variables[k].__str__()) + < int(data.variables["rx_max"].__str__())): + setNetWorkMTUOrTXRXValue( + data.netNum, k, + int(data.variables["rx_max"].__str__()), + initFile) + g_logger.debug( + "Set the \"%s\" '%s' value from \"%s\" to \"%s\"." + % (networkCardNum, k, int(data.variables[k].__str__()), + int(data.variables["rx_max"].__str__()))) + elif ((k == "tx") and int(data.variables[k].__str__()) < + int(data.variables["tx_max"].__str__())): + setNetWorkMTUOrTXRXValue( + data.netNum, k, + int(data.variables["tx_max"].__str__()), + initFile) + g_logger.debug( + "Set the \"%s\" '%s' value from \"%s\" to \"%s\"." + % (networkCardNum, k, int(data.variables[k].__str__()), + int(data.variables["tx_max"].__str__()))) + # after doing setting the value, please wait a moment, + # then we can get the real netwrok card information. + time.sleep(2) + + g_logger.debug("Successfully setted the network card value.") + + +def CheckNetWorkCardInfo(data): + """ + function : Check NetWorkCard Info + input : Instantion + output : NA + """ + g_logger.debug("Checking the network card configuration value.") + for k in list(data.variables.keys()): + value = int(data.variables[k].__str__()) + if (k == "mtu"): + if g_opts.mtuValue != "" and int(value) != int(g_opts.mtuValue): + g_logger.log(" Abnormal:" + " network '%s' '%s' value[%s:%s]" + " is different from the other node [%s:%s]" + % (data.netNum, k, + DefaultValue.GetHostIpOrName(), + value, g_opts.hostname, g_opts.mtuValue)) + elif (int(value) != int(expectMTUValue)): + g_logger.log(" Warning reason: network '%s' '%s'" + " RealValue '%s' ExpectedValue '%s'" + % (data.netNum, k, value, expectMTUValue)) + + elif ((k == "rx") and + int(value) < int(data.variables["rx_max"].__str__())): + if (int(data.netLevel) >= int(netWorkLevel)): + g_logger.log(" Warning reason: network '%s' '%s'" + " RealValue '%s' ExpectValue '%s'." + % (data.netNum, k, value, + data.variables["rx_max"].__str__())) + elif ((k == "tx") and + int(value) < int(data.variables["tx_max"].__str__())): + if (int(data.netLevel) >= int(netWorkLevel)): + g_logger.log(" Warning reason: network '%s' '%s' " + "RealValue '%s' ExpectValue '%s'." + % (data.netNum, k, value, + data.variables["tx_max"].__str__())) + else: + g_logger.log(" network '%s' '%s' RealValue '%s'" + " ExpectValue '%s'. [Normal]" + % (data.netNum, k, value, + data.variables["tx_max"].__str__())) + + g_logger.debug("Successfully checked the network card value.") + + +def GetInterruptCountNum(cardname): + """ + function : We can makesure that all dev names is startwith 'ethX-' + and endwith '-X' + input : String + output : Int + """ + cmd = "cat /proc/interrupts | grep '%s-' | wc -l" % cardname + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.debug("Failed to obtain network card interrupt" + " count numbers. Commands for getting interrupt" + " count numbers: %s." % cmd) + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + if (not str(output.strip()).isdigit()): + return 0 + return int(output.strip()) + + +def CheckNetWorkCardInterrupt(data, isSetting=False): + """ + function : Check NetWorkCard Interrupt + input : Instantion, Bool + output : NA + """ + g_logger.debug("Setting the network card interrupt value.") + if (int(data.netLevel) >= int(netWorkLevel)): + cmd = "for i in `cat /proc/interrupts | grep '%s-' |" \ + " awk -F ' ' '{print $1}' | awk -F ':' '{print $1}'`;" \ + " do cat /proc/irq/$i/smp_affinity ; done" % data.netNum + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.debug("Failed to obtain network card interrupt value." + " Commands for getting interrupt value: %s." % cmd) + g_logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + # cpu core number followed by 1 2 4 8,every 4 left shift one + Mapping = {0: "1", 1: "2", 2: "4", 3: "8"} + flag = "Normal" + for index, eachLine in enumerate(output.split()): + # Remove the ',' + eachLine = eachLine.replace(",", "") + # Replace 0000,00001000 to 1,Remove invalid content + validValue = eachLine.replace("0", "") + # Convert the row index to the expected value + expandNum = Mapping[index % 4] + # Convert line index to expected position + expandBit = index // 4 * -1 - 1 + # value and position is correct + if (len(eachLine) * -1) > expandBit: + g_logger.debug("Network card [%s] multi-queue support is" + " not enabled.\n" % data.netNum) + flag = "Error" + break + if (eachLine[expandBit] == expandNum and validValue == expandNum): + continue + else: + g_logger.debug("Network card [%s] multi-queue support is not" + " enabled.\n" % data.netNum) + flag = "Error" + break + if (flag == "Normal"): + pass + else: + if (isSetting): + g_logger.debug("The network card '%s' interrupt is not" + " be setted." % data.netNum) + cmd = "ps ax | grep -v grep | grep -q irqbalance; echo $?" + (status, output) = subprocess.getstatusoutput(cmd) + if (output.strip() == "0"): + g_logger.log(" Warning: irqbalance is running and" + " will likely override this script's" + " affinitization. Please stop the irqbalance" + " service and/or execute 'killall" + " irqbalance'.") + killcmd = "%s irqbalance" % findCmdInPath("killall") + (status, output) = subprocess.getstatusoutput(killcmd) + if status != 0: + g_logger.log("Failed to execute killall irqbalance") + count = int(GetInterruptCountNum(data.netNum)) + i = 0 + while (i < count): + # the dev name type like this: eth1-1, + # eth1-rx-1, eth1-tx-1, eth1-TxRx-1 + cmd_IRQ = "cat /proc/interrupts | grep '%s.*-' | " \ + "awk -F ' ' '{print $1}' | awk -F ':' " \ + "'{print $1}'| awk 'NR==%s'" \ + % (data.netNum, str(i + 1)) + (status, output) = subprocess.getstatusoutput(cmd_IRQ) + if status != 0 or output.strip() == "": + g_logger.debug( + "Failed to obtain network card interrupt value. " + "Commands for getting interrupt value: %s." + % cmd_IRQ) + else: + IRQ = output.strip() + g_logger.log("The network '%s' interrupt " + "configuration path:" + " /proc/irq/%s/smp_affinity." + % (data.netNum, IRQ)) + num = 2 ** i + # Under SuSE platform, when the length is + # greater than 8, the ',' must be used. + value = str(hex(num))[2:] + if (len(value) > 16 and value[-1] == 'L'): + value = value[:-1] + result_value = '' + while (len(value) > 8): + result_value = ",%s%s" \ + % (value[-8:], result_value) + value = value[:-8] + result_value = "%s%s" % (value, result_value) + cmd_set = "echo '%s'> /proc/irq/%s/smp_affinity" \ + % (result_value, IRQ) + (status, output) = subprocess.getstatusoutput(cmd_set) + if (status != 0): + g_logger.log( + "Failed to set network '%s' IRQ. Commands for" + " setting: %s." % (data.netNum, cmd_set)) + else: + g_logger.log( + "Set network card '%s' IRQ to \"%s\"." + % (data.netNum, result_value)) + i = i + 1 + + g_logger.debug("Successfully setted the network card interrupt value.") + + +def CheckNetWorkCardPara(serviceIP, isSetting=False): + """ + function : Check NetWorkCard Para + input : String, Bool + output : NA + """ + + global expectMTUValue + + # get the network parameter values from the configuration file + dirName = os.path.dirname(os.path.realpath(__file__)) + configFile = "%s/../gspylib/etc/conf/check_list.conf" % dirName + checkList = ['mtu', 'rx', 'tx'] + netParameterList = DefaultValue.getConfigFilePara(configFile, + '/sbin/ifconfig', + checkList) + if (('mtu' in list(netParameterList.keys())) and + (netParameterList['mtu'].strip() != '')): + expectMTUValue = netParameterList['mtu'].strip() + + # set network card mtu and queue length + networkCardNumList = DefaultValue.CheckNetWorkBonding(serviceIP) + + # if len=1, it means that there is no bonding + if (len(networkCardNumList) == 1): + data = GetNetWorkCardInfo(networkCardNumList[0].strip()) + if not isSetting: + CheckNetWorkCardInfo(data) + CheckNetWorkCardInterrupt(data) + else: + SetNetWorkCardInfo(networkCardNumList[0].strip(), data) + CheckNetWorkCardInterrupt(data, True) + else: + for networkCardNum in networkCardNumList: + data = GetNetWorkCardInfo(networkCardNum) + if not isSetting: + CheckNetWorkCardInfo(data) + CheckNetWorkCardInterrupt(data) + else: + SetNetWorkCardInfo(networkCardNum, data) + CheckNetWorkCardInterrupt(data, True) + + +########################################################################### +# meminfo: +########################################################################### +class meminfo: + """ + Class: meminfo + """ + + def __init__(self): + """ + function : Init class meminfo + input : NA + output : NA + """ + self.memvalue = 0 + self.errormsg = None + + +class swapinfo: + """ + class: swapinfo + """ + + def __init__(self): + """ + function : Init class swapinfo + input : NA + output : NA + """ + self.swapvalue = 0 + self.errormsg = None + + +def collectSwapInfo(): + """ + function : Collect Swap Info + input : NA + output : Instantion + """ + data = swapinfo() + cmd = "cat /proc/meminfo | grep SwapTotal" + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception((ErrorCode.GAUSS_505["GAUSS_50502"] % "SwapTotal") + + ("The cmd is:%s" % cmd)) + try: + listname = output.strip().split(' ') + val = int(listname[len(listname) - 2]) + factor = listname[len(listname) - 1] + if factor == 'kB': + data.swapvalue = val * 1024 + elif factor == '': + data.swapvalue = val + + except Exception as e: + raise Exception(ErrorCode.GAUSS_505["GAUSS_50502"] % "SwapTotal" + + " Error: \n%s" % str(e)) + return data + + +def collectMemInfo(): + """ + function : Collect Memory information + input : NA + output : Instantion + """ + data = meminfo() + cmd = "cat /proc/meminfo | grep MemTotal" + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception((ErrorCode.GAUSS_505["GAUSS_50502"] % "MemTotal") + + ("The cmd is %s " % cmd)) + try: + listname = output.strip().split(' ') + val = int(listname[len(listname) - 2]) + factor = listname[len(listname) - 1] + if factor == 'kB': + data.memvalue = val * 1024 + elif factor == '': + data.memvalue = val + + except Exception as e: + raise Exception(ErrorCode.GAUSS_505["GAUSS_50502"] % "MemTotal" + + " Error: \n%s" % str(e)) + return data + + +########################################################################### +# firewall: +########################################################################### +class firewall: + """ + class: firewall + """ + + def __init__(self): + """ + function : Init class firewall + input : NA + output : NA + """ + self.status = "" + self.distname = "" + self.errormsg = "" + + +def collectfirewall(): + """ + function : Collect firewall + input : NA + output : Instantion + """ + data = firewall() + distname = g_Platform.dist()[0] + if distname in ("redhat", "centos", "euleros", "openEuler"): + data.distname = distname.upper() + if g_Platform.isPlatFormEulerOSOrRHEL7X(): + cmd = "systemctl status firewalld.service" + else: + cmd = "service iptables status" + else: + data.distname = "SUSE" + cmd = "SuSEfirewall2 status" + + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + data.errormsg = output + return data + + if distname in ("redhat", "centos", "euleros", "openEuler"): + if g_Platform.isPlatFormEulerOSOrRHEL7X(): + if (output.strip()).find("Active: " + "active (running)") > 0: + data.status = "enabled" + else: + data.status = "disabled" + else: + if (output.strip()).find("Firewall is not" + " running") > 0: + data.status = "disabled" + else: + data.status = "enabled" + else: + if (output.strip()).find("SuSEfirewall2 not" + " active") > 0: + data.status = "disabled" + else: + data.status = "enabled" + + return data + + +########################################################################### +# ntp: time consistence +########################################################################### +class ntp: + """ + class: ntp + """ + + def __init__(self): + """ + function : Init class ntp + input : NA + output : NA + """ + self.running = False + self.hosts = set() + self.currenttime = "" + self.errormsg = None + + +def collectNtpd(): + """ + function : Collect Ntpd + input : NA + output : Instantion + """ + data = ntp() + p = subprocess.Popen(["pgrep", "ntpd"], shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + result = p.communicate() + + if data.errormsg: + data.errormsg = "%s\n%s" % (data.errormsg, result[1].strip()) + else: + data.errormsg = result[1].strip() + + if not p.returncode: + for line in result[0].splitlines(): + if line.strip().isdigit(): + data.running = True + + data.currenttime = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + + return data + + +############################################################################# +# CheckLinuxMounts: +############################################################################# +def CheckLinuxMounts(): + """ + function : Check Linux Mounts + input : NA + output : NA + """ + xfs_mounts = list() + expectedOption = "inode64" + data = collectMounts() + for k in list(data.entries.keys()): + entry = data.entries[k] + if entry.type == "xfs": + xfs_mounts.append(entry) + + for mnt in xfs_mounts: + if mnt.type != "xfs": + g_logger.log("The device '%s' is not XFS filesystem and" + " is expected to be so." % mnt.partition) + continue + + is_find = "failed" + for opt in mnt.options: + if (opt == expectedOption): + is_find = "success" + break + if (is_find == "failed"): + g_logger.log("XFS filesystem on device %s is missing the " + "recommended mount option '%s'." % (mnt.partition, + expectedOption)) + + +############################################################################# +def CheckBlockdev(isSetting=False): + """ + function : Check Block dev + input : Bool + output : NA + """ + expectedReadAhead = "16384" + data = collectBlockdev() + for dev in list(data.ra.keys()): + ra = data.ra[dev] + if int(ra) < int(expectedReadAhead): + if not isSetting: + g_logger.log("On device (%s) 'blockdev readahead' RealValue" + " '%s' ExpectedValue '%s'." + % (dev, ra, expectedReadAhead)) + else: + SetBlockdev(expectedReadAhead, dev) + g_logger.log("On device (%s) set 'blockdev readahead' from" + " '%s' to '%s'." % (dev, ra, expectedReadAhead)) + + +def SetBlockdev(expectedReadAhead, devname): + """ + function : Set Block dev + input : String, String + output : NA + """ + g_logger.debug("Setting block dev value.") + initFile = getTHPandOSInitFile()[1] + cmd = "/sbin/blockdev --setra %s %s " % (expectedReadAhead, devname) + cmd += " && echo \"/sbin/blockdev --setra %s %s\" >> %s" \ + % (expectedReadAhead, devname, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.log("Failed to set block dev '%s'. Error:\n%s" + % (devname, output)) + + +############################################################################# +def CheckIOSchedulers(isSetting=False): + """ + function : Check IO Schedulers + input : Bool + output : NA + """ + + data = collectIOschedulers() + for dev in list(data.devices.keys()): + expectedScheduler = "deadline" + # Vda disk only supports mq-deadline + if (expectedScheduler not in data.allItem[dev] + and "mq-deadline" in data.allItem[dev]): + expectedScheduler = "mq-deadline" + scheduler = data.devices[dev] + if scheduler != expectedScheduler: + if not isSetting: + g_logger.log("On device (%s) 'IO scheduler' RealValue '%s' " + "ExpectedValue '%s'." % (dev, scheduler, + expectedScheduler)) + else: + SetIOSchedulers(dev, expectedScheduler) + g_logger.log("On device (%s) set 'IO scheduler' from" + " '%s' to '%s'." % (dev, scheduler, + expectedScheduler)) + + +def SetIOSchedulers(devname, expectedScheduler): + """ + function : Set IO Schedulers + input : String + output : NA + """ + g_logger.debug("Set IO Schedulers value.") + initFile = getTHPandOSInitFile()[1] + cmd = " echo %s >> /sys/block/%s/queue/scheduler" \ + % (expectedScheduler, devname) + cmd += " && echo \"echo %s >> /sys/block/%s/queue/scheduler\" >> %s" \ + % (expectedScheduler, devname, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.log("Failed to set dev '%s' IO Schedulers. Error:\n%s" + % (devname, output)) + + +def CheckIORequest(isSetting=False): + """ + function : Check IO Request + input : Bool + output : NA + """ + expectedScheduler = "32768" + data = collectIORequest() + if len(data) == 0: + g_logger.log(" WARNING:Not find IO Request file.") + for i in list(data.keys()): + reuqest = data[i] + if int(reuqest) != int(expectedScheduler): + g_logger.log(" WARNING:On device (%s) 'IO Request' " + "RealValue '%s' ExpectedValue '%s'" + % (i, reuqest.strip(), expectedScheduler)) + if isSetting: + SetIORequest(expectedScheduler, i) + + +def SetIORequest(expectedScheduler, dev): + """ + function : Set IO Request + input : String, String + output : NA + """ + g_logger.debug("Set IO Request value!") + initFile = getTHPandOSInitFile()[1] + cmd = " echo %s >> /sys/block/%s/queue/nr_requests" \ + % (expectedScheduler, dev) + cmd += " && echo \"echo %s >> /sys/block/%s/queue/nr_requests\" >> %s" \ + % (expectedScheduler, dev, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.log(" WARNING:Failed to set dev '%s' IO Request." + " Error:\n%s" % (dev, output)) + + +############################################################################# +def CheckAsyIOrequests(isSetting=False): + """ + function : Check Asy IO requests + input : Bool + output : NA + """ + expectedScheduler = "104857600" + + cnnum = 0 + dnnum = 0 + instancenum = 0 + + hostname = DefaultValue.GetHostIpOrName() + dbnode = g_clusterInfo.getDbNodeByName(hostname) + for i in dbnode.coordinators: + if i.datadir != "": + cnnum += 1 + + for i in dbnode.datanodes: + if (i.instanceType == MASTER_INSTANCE): + dnnum += 1 + if (i.instanceType == STANDBY_INSTANCE): + dnnum += 1 + + instancenum = (dnnum + cnnum) * 1048576 + data = collectAsynchronousIORequest() + if len(data) == 0: + g_logger.log(" WARNING:Not find AsynchronousIORequest file.") + if int(instancenum) > int(expectedScheduler): + SetAsyIOrequests(instancenum) + else: + SetAsyIOrequests(expectedScheduler) + else: + for i in iter(data): + request = i + if (int(request) < int(instancenum) and + int(expectedScheduler) < int(instancenum)): + if isSetting: + SetAsyIOrequests(instancenum) + elif (int(request) < int(expectedScheduler) and + int(instancenum) < int(expectedScheduler)): + if isSetting: + SetAsyIOrequests(expectedScheduler) + elif (int(expectedScheduler) < int(request) and + int(instancenum) < int(request)): + if isSetting: + SetAsyIOrequests(request) + + +def SetAsyIOrequests(expectedScheduler): + """ + function : Set Asy IO requests + input : String + output : NA + """ + g_logger.debug("Set Asynchronous IO Maximum requests value!") + initFile = getTHPandOSInitFile()[1] + cmd = " echo %s >> /proc/sys/fs/aio-max-nr" % expectedScheduler + cmd += " && echo \"echo %s >> /proc/sys/fs/aio-max-nr\" >> %s" \ + % (expectedScheduler, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.log("Failed to set Asynchronous IO Maximum Request." + " Error:\n%s" % (output)) + + +############################################################################# +def CheckClogicalBlock(isSetting=True): + """ + function : Check Clogical Block + input : Bool + output : NA + """ + expectedScheduler = "512" + data = collectLogicalBlock() + if len(data) == 0: + g_logger.log(" Warning:Not find clogical block file," + "please check it.") + for i in list(data.keys()): + reuqest = data[i] + if int(reuqest) < int(expectedScheduler): + g_logger.log(" Warning:On device (%s) ' ClogicalBlock" + " Request' RealValue '%d' ExpectedValue '%d'" + % (i, int(reuqest), int(expectedScheduler))) + if isSetting: + SetClogicalBlock(expectedScheduler, i) + + +def SetClogicalBlock(expectedScheduler, dev): + """ + function : Set Clogical Block + input : String, String + output : NA + """ + g_logger.debug("Set logicalBlock value!") + initFile = getTHPandOSInitFile()[1] + cmd = " echo %s >> /sys/block/%s/queue/logical_block_size" \ + % (expectedScheduler, dev) + cmd += " && echo \"echo %s >> /sys/block/%s/queue/logical_block_size\"" \ + " >> %s" % (expectedScheduler, dev, initFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + g_logger.error("Failed to set dev '%s' logicalBlock by excuting" + " command:\n%s\nOutput:\n%s" % (dev, cmd, str(output))) + + +############################################################################# +def CheckPlatformInfo(): + """ + function : Check Platform Info + input : NA + output : NA + """ + data = collectplatformInfo() + if (data.distname == "SuSE"): + if (data.version == "11" and data.patchlevel == "1"): + mixedType = "%s%sSP%s" % (data.distname, data.version, + data.patchlevel) + platformStr = "%s_%s_SP%s_%s" % (data.distname, data.version, + data.patchlevel, data.bits) + elif (data.version == "11" and data.patchlevel in ("2", "3", "4")): + mixedType = "%s%s" % (data.distname, data.version) + platformStr = "%s_%s_SP%s_%s" % (data.distname, data.version, + data.patchlevel, data.bits) + elif (data.version == "12" and + data.patchlevel in ("0", "1", "2", "3")): + mixedType = "%s%s" % (data.distname, data.version) + platformStr = "%s_%s_SP%s_%s" % (data.distname, data.version, + data.patchlevel, data.bits) + else: + platformStr = "%s_%s_SP%s_%s" % (data.distname, data.version, + data.patchlevel, data.bits) + g_logger.log("False %s %s" % (data.distname, platformStr)) + return + elif (data.distname in ("redhat", "centos", "asianux")): + if (data.version in ("6.4", "6.5", "6.6", "6.7", "6.8", "6.9")): + mixedType = "%s6" % data.distname + platformStr = "%s_%s_%s" % (data.distname, + data.version, data.bits) + elif (data.version[0:3] + in ("7.0", "7.1", "7.2", "7.3", "7.4", "7.5", "7.6")): + mixedType = "%s7" % data.distname + platformStr = "%s_%s_%s" % (data.distname, data.version, + data.bits) + else: + platformStr = "%s_%s_%s" % (data.distname, data.version, + data.bits) + g_logger.log("False %s %s" % (data.distname, platformStr)) + return + elif (data.distname == "euleros" or data.distname == "openEuler" or data.distname == "kylin"): + mixedType = "%s" % data.distname + platformStr = "%s_%s_%s" % (data.distname, data.version, data.bits) + else: + platformStr = "%s_%s_%s" % (data.distname, data.version, data.bits) + g_logger.log("False unknown %s" % platformStr) + return + + g_logger.log("True %s %s" % (mixedType, platformStr)) + return + + +############################################################################# +def CheckUname(): + """ + function : Check Uname + input : NA + output : NA + """ + data = collectUname() + g_logger.log("KernelVersion %s" % data.output) + + +############################################################################# +def CheckUnicode(): + """" + function : Check Unicode + input : NA + output : NA + """ + data = collectUnicode() + g_logger.log("Unicode %s" % data.output) + + +############################################################################# +def CheckTimeZone(): + """ + function : Check Time Zone + input : NA + output : NA + """ + data = collectTimeZone() + g_logger.log("TimeZone %s" % data.output) + + +############################################################################# +def CheckNtp(): + """ + function : Check Ntp + input : NA + output : NA + """ + data = collectNtpd() + if not data.running: + g_logger.log("False, %s" % data.currenttime) + else: + g_logger.log("True, %s" % data.currenttime) + + +############################################################################# + + +def CheckTHPServer(): + """ + function : Check THP Server + input : NA + output : NA + """ + expectedValues = "disabled" + data = collectTHPServer() + if data.status != expectedValues: + g_logger.log("The THP service status RealValue '%s'" + " ExpectedValue '%s'." % (data.status, expectedValues)) + + +############################################################################# +def CheckFirewallServer(): + """ + function : Check Firewall Server + input : NA + output : NA + """ + expectedValues = "disabled" + data = collectfirewall() + if data.status == "": + return + elif (data.status != expectedValues): + g_logger.log("The firewall service status RealVaue '%s'" + " ExpectedValue '%s'" % (data.status, expectedValues)) + + +############################################################################# +def CheckMemInfo(): + """ + function : Check Mem Info + input : NA + output : NA + """ + memdata = collectMemInfo() + swapdata = collectSwapInfo() + if (swapdata.swapvalue > memdata.memvalue): + g_logger.log("SwapMemory %s TotalMemory %s" % (swapdata.swapvalue, + memdata.memvalue)) + + +############################################################################# +def getClusterUser(): + """ + function: Check user information + input : NA + output: NA + """ + # get user and group + gphome = DefaultValue.getPathFileOfENV("GPHOME") + if not os.path.exists(gphome): + raise Exception(ErrorCode.GAUSS_518["GAUSS_51805"] % "GPHOME") + user = g_OSlib.getPathOwner(gphome)[0] + return user + + +############################################################################# +def getFactorsFromDB(cmd): + """ + function: get factors from db + input : cmd + output: USE_LARGE_PAGES,TEMP_BUFFER_SIZE,DATA_BUFFER_SIZE,SHARED_POOL_SIZE + """ + (status, output) = subprocess.getstatusoutput(cmd) + + if (status != 0): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % cmd + + " Error: \n%s" % str(output)) + elif cmd.find("zsql") > -1: + result = output.split(os.linesep)[7].split()[1].strip() + else: + result = output.split('\n')[2].split('|')[1].strip() + # Just get the value of TEMP_BUFFER_SIZE, DATA_BUFFER_SIZE, + # SHARED_POOL_SIZE and USE_LARGE_PAGES + if (result not in ('TRUE', 'ONLY', 'FALSE')): + if (str(result[len(result) - 1]) in ('G' or 'g')): + result = int(result[:-1]) * 1024 + else: + result = int(result[:-1]) + + return result + + +############################################################################# +class CmdOptions(): + """ + Class: CmdOptions + """ + + def __init__(self): + """ + function : Init class CmdOptions + input : NA + output : NA + """ + self.action = "" + self.user = "" + self.extrachecklist = [] + self.logFile = "" + self.confFile = "" + self.mtuValue = "" + self.hostname = "" + self.mppdbfile = "" + + +######################################################### +# Init global log +######################################################### +def initGlobals(): + """ + function : init Globals + input : NA + output : NA + """ + global g_logger + global g_clusterInfo + + g_logger = GaussLog(g_opts.logFile, "LocalCheckOS") + + g_clusterInfo = dbClusterInfo() + if (g_opts.confFile != "" and g_opts.confFile is not None): + g_clusterInfo.initFromXml(g_opts.confFile) + + +def usage(): + """ +Usage: + python3 --help | -? + python3 LocalCheckOS -t action [-l logfile] [-X xmlfile] [-V] +Common options: + -t The type of action. + -s the path of MPPDB file + -l --log-file=logfile The path of log file. + -? --help Show this help screen. + -X --xmlfile = xmlfile Cluster config file + --ntp-server NTP server node's IP. + -V --version + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function : Parse command line and save to global variables + input : NA + output : NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "t:s:l:X:V?", + ["help", "log-file=", "xmlfile=", + "MTUvalue=", "hostname=", + "ntp-server=", "version"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + for (key, value) in opts: + if (key == "-?" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-V" or key == "--version"): + print("%s %s" % (sys.argv[0].split("/")[-1], + VersionInfo.COMMON_VERSION)) + sys.exit(0) + elif (key == "-t"): + g_opts.action = value + elif (key == "-s"): + g_opts.mppdbfile = value + elif (key == "-X" or key == "--xmlfile"): + g_opts.confFile = value + elif (key == "-l" or key == "--log-file"): + g_opts.logFile = os.path.realpath(value) + elif (key == "--MTUvalue"): + g_opts.mtuValue = value + elif (key == "--hostname"): + g_opts.hostname = value + Parameter.checkParaVaild(key, value) + + +def checkParameter(): + """ + function : check parameter + input : NA + output : NA + """ + if (g_opts.action == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + '.') + if (g_opts.action != ACTION_CHECK_OS_VERSION + and g_opts.action != ACTION_CHECK_KERNEL_VERSION + and g_opts.action != ACTION_CHECK_UNICODE + and g_opts.action != ACTION_CHECK_TIMEZONE + and g_opts.action != ACTION_CHECK_DISK_CONFIGURE + and g_opts.action != ACTION_CHECK_BLOCKDEV_CONFIGURE + and g_opts.action != ACTION_CHECK_IO_CONFIGURE + and g_opts.action != ACTION_CHECK_IO_REQUEST + and g_opts.action != ACTION_CHECK_ASYNCHRONOUS_IO_REQUEST + and g_opts.action != ACTION_CHECK_LOGICAL_BLOCK + and g_opts.action != ACTION_CHECK_NETWORK_CONFIGURE + and g_opts.action != ACTION_CHECK_NETWORK_BOND_MODE + and g_opts.action != ACTION_CHECK_SWAP_MEMORY_CONFIGURE + and g_opts.action != ACTION_CHECK_TIME_CONSISTENCY + and g_opts.action != ACTION_CHECK_FIREWALL_SERVICE + and g_opts.action != ACTION_CHECK_THP_SERVICE + and g_opts.action != ACTION_SET_BLOCKDEV_CONFIGURE + and g_opts.action != ACTION_SET_NETWORK_CONFIGURE + and g_opts.action != ACTION_SET_IO_CONFIGURE + and g_opts.action != ACTION_SET_REMOVEIPC_VALUE + and g_opts.action != ACTION_SET_SESSION_PROCESS + and g_opts.action != ACTION_SET_THP_SERVICE + and g_opts.action != ACTION_SET_LOGICAL_BLOCK + and g_opts.action != ACTION_SET_IO_REQUEST + and g_opts.action != ACTION_SET_ASYNCHRONOUS_IO_REQUEST): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % "t") + + if (g_opts.logFile == ""): + dirName = os.path.dirname(os.path.realpath(__file__)) + g_opts.logFile = os.path.join(dirName, DefaultValue.LOCAL_LOG_FILE) + + +def getLocalIPAddr(): + """ + function: get local ip + input : NA + output: Ips + """ + Ips = [] + + if g_opts.confFile == "": + Ips.append(DefaultValue.getIpByHostName()) + return Ips + + for node in g_clusterInfo.dbNodes: + if (node.name == DefaultValue.GetHostIpOrName()): + Ips.append(node.backIps[0]) + + return Ips + + +def doLocalCheck(): + """ + function: check OS item on local node + input : NA + output: NA + """ + + global netWorkBondInfo + netWorkBondInfo = netWork() + + function_dict = {ACTION_CHECK_OS_VERSION: CheckPlatformInfo, + ACTION_CHECK_KERNEL_VERSION: CheckUname, + ACTION_CHECK_UNICODE: CheckUnicode, + ACTION_CHECK_TIMEZONE: CheckTimeZone, + ACTION_CHECK_DISK_CONFIGURE: CheckLinuxMounts, + ACTION_CHECK_SWAP_MEMORY_CONFIGURE: CheckMemInfo, + ACTION_CHECK_TIME_CONSISTENCY: CheckNtp, + ACTION_CHECK_FIREWALL_SERVICE: CheckFirewallServer, + ACTION_SET_REMOVEIPC_VALUE: disRemoveIPC, + ACTION_SET_SESSION_PROCESS: CheckSessionProcess, + ACTION_CHECK_THP_SERVICE: CheckTHPServer, + ACTION_SET_THP_SERVICE: disTHPServer} + function_keys = list(function_dict.keys()) + + function_dict_false = {ACTION_CHECK_BLOCKDEV_CONFIGURE: CheckBlockdev, + ACTION_CHECK_IO_CONFIGURE: CheckIOSchedulers, + ACTION_CHECK_IO_REQUEST: CheckIORequest, + ACTION_CHECK_LOGICAL_BLOCK: CheckClogicalBlock} + function_keys_false = list(function_dict_false.keys()) + + function_dict_true = {ACTION_SET_BLOCKDEV_CONFIGURE: CheckBlockdev, + ACTION_SET_IO_CONFIGURE: CheckIOSchedulers, + ACTION_SET_IO_REQUEST: CheckIORequest, + ACTION_SET_LOGICAL_BLOCK: CheckClogicalBlock} + function_keys_true = list(function_dict_true.keys()) + + if (g_opts.action in function_keys): + function_dict[g_opts.action]() + elif (g_opts.action in function_keys_false): + function_dict_false[g_opts.action](False) + elif (g_opts.action in function_keys_true): + function_dict_true[g_opts.action](True) + elif (g_opts.action == ACTION_CHECK_ASYNCHRONOUS_IO_REQUEST): + if (g_opts.confFile != "" and g_opts.confFile is not None): + CheckAsyIOrequests(False) + elif (g_opts.action == ACTION_CHECK_NETWORK_CONFIGURE): + for localAddres in nodeIps: + CheckNetWorkCardPara(localAddres, False) + elif (g_opts.action == ACTION_CHECK_NETWORK_BOND_MODE): + CheckNetWorkBonding(DefaultValue.getIpByHostName(), True) + elif (g_opts.action == ACTION_SET_NETWORK_CONFIGURE): + for localAddres in nodeIps: + CheckNetWorkCardPara(localAddres, True) + elif (g_opts.action == ACTION_SET_ASYNCHRONOUS_IO_REQUEST): + if (g_opts.confFile != "" and g_opts.confFile is not None): + CheckAsyIOrequests(True) + else: + g_logger.logExit(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' + + " Value: %s." % g_opts.action) + + +if __name__ == '__main__': + """ + main function + """ + try: + parseCommandLine() + checkParameter() + initGlobals() + except Exception as e: + GaussLog.exitWithError(str(e)) + + try: + nodeIps = [] + nodeIps = getLocalIPAddr() + doLocalCheck() + g_logger.closeLog() + except Exception as e: + g_logger.logExit(str(e)) + + sys.exit(0) diff --git a/script/local/LocalCollect.py b/script/local/LocalCollect.py new file mode 100644 index 0000000..01a901f --- /dev/null +++ b/script/local/LocalCollect.py @@ -0,0 +1,1674 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LoaclCollector.py is a local utility to +# collect file and parameter file +############################################################################# + +import os +import sys +import subprocess +import getopt +import time +import re +import base64 +import json +import datetime + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import ClusterCommand, DefaultValue +from gspylib.os.gsfile import g_file +from multiprocessing.dummy import Pool as ThreadPool +from gspylib.common.ErrorCode import ErrorCode + +########################### +# instance type. only for CN/DN +########################### +INSTANCE_TYPE_UNDEFINED = -1 +# master +MASTER_INSTANCE = 0 +# standby +STANDBY_INSTANCE = 1 +# dummy standby +DUMMY_STANDBY_INSTANCE = 2 + +####################################################################### +# GLOBAL VARIABLES +# g_opts: globle option +# g_logger: globle logger +# g_clusterInfo: global clueter information +# g_resultdir: globle result dir +# g_localnodeinfo: globle local nodes information +####################################################################### +HOSTNAME = DefaultValue.GetHostIpOrName() +g_opts = None +g_logger = None +g_clusterInfo = None +g_resultdir = None +g_localnodeinfo = None +g_jobInfo = None +g_tmpdir = None +g_current_time = "" +g_need_gstack = 0 +g_core_pattern = 'core-%e-%p-%t' + + +class CmdOptions(): + ''' + classdocs + ''' + + def __init__(self): + """ + function: Constructor + """ + # initialize variable + self.action = "" + self.outputDir = "" + self.logFile = "" + self.nodeName = "" + self.appPath = "" + self.user = "" + self.begin = "" + self.end = "" + self.key = "" + # Speed limit to copy/remote copy files, in KB/s + # Here we use KB/s to avoid bandwidth is too small to calculate, + # which may get a zero. + self.speedLimitKBs = 0 + self.speedLimitFlag = 0 + self.config = "" + self.content = [] + + +class JobInfo(): + """ + class: JobInfo + """ + + def __init__(self): + ''' + Constructor + ''' + # initialize variable + self.jobName = "" + self.successTask = [] + self.failedTask = {} + + +def checkEmpty(path): + """ + function: check the path is empty + input : path + output : int + """ + isEmpty = 1 + for root, dirs, files in os.walk(path, topdown=False): + if files: + isEmpty = 0 + break + return isEmpty + + +def replaceInvalidStr(outputStr): + """ + function: replace invalid str + input : outputStr + output : str + """ + return outputStr.replace("\'", "").replace("\"", "").replace("`", + "").replace( + "echo", "e c h o").replace("\n", " ") + + +def sendLogFiles(): + """ + function: package and send log files back to the command node. + :return: + """ + g_logger.debug("Begin to remote copy log files.") + g_logger.debug( + "Speed limit to copy log files is %d KB/s." % g_opts.speedLimitKBs) + # Compress the copied log file and modify the permissions in the + # temporary directory + tarName = "%s.tar.gz" % HOSTNAME + + path = g_tmpdir + "/%s" % HOSTNAME + if not os.path.exists(path): + g_logger.logExit("Result Dir is not exists.") + + isEmpty = checkEmpty(path) + if isEmpty == 1: + # Delete the result temporary directory if the result temporary + # directory exists + cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % ( + g_resultdir, g_resultdir) + # Delete the archive if the archive is present in the temporary + # directory + cmd = "%s && (if [ -f '%s'/'%s' ];then rm -rf '%s'/'%s';fi)" % \ + (cmd, g_tmpdir, tarName, g_tmpdir, tarName) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if status != 0: + g_logger.logExit("Failed to delete %s." % "%s and %s" % ( + g_resultdir, tarName) + " Error:\n%s" % output) + g_logger.logExit("All collection tasks failed") + + cmd = "cd '%s' && tar -zcf '%s' '%s' && chmod %s '%s'" % \ + (g_tmpdir, tarName, HOSTNAME, DefaultValue.FILE_MODE, tarName) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if status != 0: + g_logger.logExit("Failed to compress %s." % ("directory %s/%s" % \ + (g_tmpdir, + HOSTNAME)) + + " Error: \n%s" % output) + + if g_opts.nodeName != "": + # send backup file which is compressed to the node that is + # currently performing the backup + if g_opts.nodeName == DefaultValue.GetHostIpOrName(): + if int(g_opts.speedLimitFlag) == 1: + cmd = "rsync --bwlimit=%d '%s'/'%s' '%s'/" % \ + (g_opts.speedLimitKBs, g_tmpdir, tarName, + g_opts.outputDir) + else: + cmd = "cp '%s'/'%s' '%s'/" % ( + g_tmpdir, tarName, g_opts.outputDir) + else: + # scp's limit parameter is specified in Kbit/s. 1KB/s = 8Kbit/s + cmd = "pscp -x '-l %d' -H %s '%s'/'%s' '%s'/" % \ + ( + g_opts.speedLimitKBs * 8, g_opts.nodeName, g_tmpdir, tarName, + g_opts.outputDir) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if status != 0: + g_logger.logExit( + "Failed to copy %s." % tarName + " Error:\n%s" % output) + + # Delete the temporary directory if the temporary directory exists + cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % (g_resultdir, g_resultdir) + # Delete the archive if the archive is present in the temporary directory + cmd = "%s && (if [ -f '%s'/'%s' ];then rm -rf '%s'/'%s';fi)" % \ + (cmd, g_tmpdir, tarName, g_tmpdir, tarName) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if status != 0: + g_logger.logExit("Failed to delete %s. %s" % ( + "%s and %s" % (g_resultdir, tarName), " Error:\n%s" % output)) + + +def checkParameterEmpty(parameter, parameterName): + """ + function: check parameter whether is or not empty + input : parameter, parameterName + output : NA + """ + if parameter == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % parameterName) + + +def parseCommandLine(): + """ + function: do parse command line + input : cmdCommand + output: help/version information + """ + global g_opts + g_opts = CmdOptions() + try: + # Parse command + opts, args = getopt.getopt(sys.argv[1:], "t:U:o:h:b:e:k:l:s:S:C:", + [""]) + except getopt.GetoptError as e: + # Error exit if an illegal parameter exists + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + if len(args) > 0: + # Error exit if an illegal parameter exists + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % + str(args[0])) + # Save parameter + parameter_map = {"-t": g_opts.action, "-U": g_opts.user, + "-o": g_opts.outputDir, "-h": g_opts.nodeName, \ + "-l": g_opts.logFile, "-b": g_opts.begin, + "-e": g_opts.end, "-k": g_opts.key, + "-s": g_opts.speedLimitKBs, "-S": g_opts.speedLimitFlag, + "-C": g_opts.config} + parameter_keys = parameter_map.keys() + + for key, value in opts: + if key in parameter_keys: + if key == "-C": + value = value.replace("#", "\"") + parameter_map[key] = value.strip() + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % value) + + Parameter.checkParaVaild(key, value) + g_opts.action = parameter_map["-t"] + g_opts.user = parameter_map["-U"] + g_opts.outputDir = parameter_map["-o"] + g_opts.nodeName = parameter_map["-h"] + g_opts.logFile = parameter_map["-l"] + g_opts.begin = parameter_map["-b"] + g_opts.end = parameter_map["-e"] + g_opts.key = parameter_map["-k"] + g_opts.speedLimitKBs = parameter_map["-s"] + g_opts.speedLimitFlag = parameter_map["-S"] + g_opts.config = parameter_map["-C"] + # The -t parameter is required + checkParameterEmpty(g_opts.action, "t") + # check if user exist and is the right user + checkParameterEmpty(g_opts.user, "U") + DefaultValue.checkUser(g_opts.user, False) + # check log file + if g_opts.logFile == "": + g_opts.logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + g_opts.user, "", "") + if not os.path.isabs(g_opts.logFile): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + if int(g_opts.speedLimitKBs) < 0: + GaussLog.exitWithError(ErrorCode.GAUSS_526["GAUSS_53032"]) + + g_opts.speedLimitKBs = int(g_opts.speedLimitKBs) + + # 1048576 KB/s = 1GB/s, which means unlimited. + if g_opts.speedLimitKBs == 0: + g_opts.speedLimitKBs = 1048576 + + +def initGlobal(): + """ + function: Init logger g_clusterInfo g_sshTool g_nodes + input : NA + output: [] + """ + global g_logger + global g_clusterInfo + global g_resultdir + global g_localnodeinfo + global g_tmpdir + global g_current_time + global g_core_pattern + + try: + # The -t parameter is required + g_logger = GaussLog(g_opts.logFile, "LocalCollect") + # Init the cluster information from static configuration file + g_clusterInfo = dbClusterInfo() + g_clusterInfo.initFromStaticConfig(g_opts.user) + g_tmpdir = DefaultValue.getTmpDirFromEnv() + + # Obtain the cluster installation directory + g_opts.appPath = g_clusterInfo.appPath + # Gets the current node information + g_localnodeinfo = g_clusterInfo.getDbNodeByName(HOSTNAME) + # Gets a temporary directory + g_resultdir = "%s/%s" % (g_tmpdir, HOSTNAME) + + g_current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f") + except Exception as e: + g_logger.logExit(str(e)) + + +def check_command(): + """ + function: check command + input : NA + output : NA + """ + g_logger.debug("check Command for rsync") + g_logger.debug(g_opts.speedLimitFlag) + cmd = "command -v rsync" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.logExit(("The cmd is %s." % cmd) + output) + + +def create_temp_result_folder(): + """ + function: create_temp_result_folder + output: Successfully create temp result folder + """ + # Delete the temporary folder if a temporary folder with the same name + # exists + cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % (g_resultdir, g_resultdir) + # Create temporary folders and subfolders + cmd = "%s && mkdir -p -m %s '%s'" % ( + cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir) + cmd = "%s && mkdir -p -m %s '%s/systemfiles'" % ( + cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir) + cmd = "%s && mkdir -p -m %s '%s/catalogfiles'" % ( + cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir) + cmd = "%s && mkdir -p -m %s '%s/xlogfiles'" % ( + cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir) + cmd = "%s && mkdir -p -m %s '%s/gstackfiles'" % ( + cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir) + cmd = "%s && mkdir -p -m %s '%s/coreDumpfiles'" % ( + cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir) + cmd = "%s && mkdir -p -m %s '%s/planSimulatorfiles'" % ( + cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir) + cmd = "%s && mkdir -p -m %s '%s'/logfiles && mkdir -p -m %s " \ + "'%s/configfiles'" % \ + (cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir) + g_logger.debug("Command for creating output directory: %s" % cmd) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if status != 0: + g_logger.logExit("Failed to create the %s directory." % \ + ("%s/logfiles and %s/configfiles" % ( + g_resultdir, g_resultdir)) + " Error:\n%s" % output) + + +def itemTitleCommand(cmds, info, dataFileName): + """ + function: item title command + input : cmds, info, dataFileName + output : NA + """ + itemTitle = "'###########################################################'" + cmds.append("echo '\n%s' >> %s 2>&1" % (itemTitle, dataFileName)) + cmds.append("echo '#' >> %s 2>&1" % dataFileName) + cmds.append("echo '#' %s >> %s 2>&1" % (info, dataFileName)) + cmds.append("echo '#' >> %s 2>&1" % dataFileName) + cmds.append("echo %s >> %s 2>&1" % (itemTitle, dataFileName)) + + +def basic_info_check(): + """ + function: collected basci information + output: Successfully collected basic information + """ + g_logger.debug("Starting collect basic info.") + dataFileName = "%s/systemfiles/database_system_info_%s.txt" % ( + g_resultdir, datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f")) + cmds = [] + itemTitleCommand(cmds, "C L U S T E R' 'I N F O", dataFileName) + cmds.append("gs_om -t status --detail >> %s 2>&1" % dataFileName) + + itemTitleCommand(cmds, "V E R S I O N' 'I N F O", dataFileName) + cmds.append("gaussdb --version >> %s 2>&1" % dataFileName) + cmds.append("cm_agent --version >> %s 2>&1" % dataFileName) + cmds.append("cm_server --version >> %s 2>&1" % dataFileName) + cmds.append("gs_gtm --version >> %s 2>&1" % dataFileName) + cmds.append("cat /proc/version >> %s 2>&1" % dataFileName) + + cmd = "cat /proc/sys/kernel/core_pattern" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug( + "Failed to collect core dump files. Command: %s.\n Error:\n%s" % ( + cmd, output)) + core_config = str(output) + core_pattern = core_config.split('/')[-1] + itemTitleCommand(cmds, "C O R E' 'F I L E' 'I N F O", dataFileName) + if core_pattern != g_core_pattern: + cmds.append( + "echo Failed to collect core dump files, core pattern " + "is not core-e-p-t. >> %s 2>&1" % dataFileName) + else: + core_path = "/".join(core_config.split("/")[:-1]) + cmds.append("ls -lrt %s >> %s 2>&1" % (core_path, dataFileName)) + + itemTitleCommand(cmds, "X L O G' 'F I L E' 'I N F O", dataFileName) + for Inst in g_localnodeinfo.datanodes: + cmds.append( + "echo '\n********' dn_%d xlog file info '*******' >> %s 2>&1" % ( + Inst.instanceId, dataFileName)) + pg_xlog = Inst.datadir + "/pg_xlog" + cmds.append("ls -lrt %s >> %s 2>&1" % (pg_xlog, dataFileName)) + + for Inst in g_localnodeinfo.coordinators: + cmds.append( + "echo '\n********' cn_%d xlog file info '*******' >> %s 2>&1" % ( + Inst.instanceId, dataFileName)) + pg_xlog = Inst.datadir + "/pg_xlog" + cmds.append("ls -lrt %s >> %s 2>&1" % (pg_xlog, dataFileName)) + + cmd = "echo $GAUSSLOG" + (status, output) = subprocess.getstatusoutput(cmd) + gausslog = str(output) + pg_log = "%s/pg_log" % gausslog + + itemTitleCommand(cmds, "P G_L O G' 'F I L E' 'I N F O", dataFileName) + for root, dirs, files in os.walk(pg_log): + for perDir in dirs: + cmds.append( + "echo '\n********' %s pg_log file info '*******' >> %s 2>&1" + % ( + perDir, dataFileName)) + cmds.append( + "ls -lrt %s/%s >> %s 2>&1" % (pg_log, perDir, dataFileName)) + + # Executes each query command and redirects the results to the specified + # file + for cmd in cmds: + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug( + ("Failed to collect basic information. Error:\n%s." % output) + + ("The cmd is %s " % cmd)) + + +def system_check(): + """ + function: collected OS information + input : dataFileName + output: Successfully collected OS information + """ + g_logger.debug("Collecting OS information.") + g_jobInfo.jobName = "Collecting OS information" + dataFileName = "%s/systemfiles/OS_information_%s.txt" % ( + g_resultdir, datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f")) + cmds = [] + # Add information to the document + cmds.append( + "echo '************************************\n* OS information" + " for host' > %s 2>&1" % dataFileName) + cmds.append("hostname >> %s 2>&1" % dataFileName) + cmds.append("echo '************************************' >> %s 2>&1" % + dataFileName) + appendCommand(cmds, "ps ux", dataFileName) + appendCommand(cmds, "iostat -xm 2 3", dataFileName) + appendCommand(cmds, "free -m", dataFileName) + # Executes each query command and redirects the results to the specified + # file + for cmd in cmds: + (status, output) = subprocess.getstatusoutput(cmd) + if ">>" in cmd: + cmd = cmd.split(">>")[0] + cmd = cmd.replace("\n", " ") + if "echo" in cmd: + continue + if status != 0: + if "Permission denied" in output: + output = "can not print info to file: Permission denied" + g_jobInfo.failedTask[cmd] = replaceInvalidStr(output) + g_logger.debug( + "Failed to collect OS information. Error:\n%s" % output) + else: + g_jobInfo.successTask.append(cmd) + basic_info_check() + # Modify the file permissions + os.chmod(dataFileName, DefaultValue.FILE_MODE_PERMISSION) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug("Successfully collected OS information.") + + +def appendCommand(cmds, newCommand, dataFileName): + """ + function: make up the commands into the array + input : cmds, newCommand, dataFileName + output: NA + """ + # Execute the command and output to the specified file + cmds.append("echo '\n************************************\n* " \ + "%s \n" \ + "************************************' >> %s 2>&1" % \ + (newCommand, dataFileName)) + cmds.append("%s >> %s 2>&1" % (newCommand, dataFileName)) + + +def database_check(): + """ + function: collected catalog informatics + input : dbNode + output: Successfully collected catalog statistics. + """ + # Execute SQL for collect catalog statistics + g_logger.debug("Collecting catalog statistics.") + g_jobInfo.jobName = "Collecting catalog information" + isFailed = 0 + for dnInst in g_localnodeinfo.datanodes: + if dnInst.instanceType == STANDBY_INSTANCE: + continue + sqls = [] + schema = "" + for s in DefaultValue.DATABASE_CHECK_WHITE_LIST: + schema += "\'%s\'," % s + sql = "SELECT viewname FROM pg_views Where schemaname IN (%s) union " \ + "SELECT tablename FROM pg_tables Where schemaname IN (%s);" % ( + schema[:-1], schema[:-1]) + g_logger.debug(sql) + (status, output) = ClusterCommand.execSQLCommand(sql, g_opts.user, "", + dnInst.port) + if status != 0: + g_logger.debug( + "Failed to exec SQL command. please check db status. sql: " + "%s.\n Error: %s.\n" % ( + sql, output)) + g_jobInfo.failedTask["find views"] = ErrorCode.GAUSS_535[ + "GAUSS_53502"] + g_logger.log(json.dumps(g_jobInfo.__dict__)) + raise Exception("") + g_jobInfo.successTask.append("find views") + V_list = output.split("\n") + for view in g_opts.content: + view = view.replace(" ", "") + if len(view) > 0: + schema = 'pg_catalog' + if "." in view: + s_t = view.split(".") + if len(s_t) != 2: + g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[ + "GAUSS_53515"] % view + continue + else: + schema = s_t[0] + name = s_t[1] + if schema.lower() not in \ + DefaultValue.DATABASE_CHECK_WHITE_LIST: + g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[ + "GAUSS_53513"] \ + % schema + continue + if name.lower() not in V_list: + g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[ + "GAUSS_53514"] % ( + name, schema) + continue + elif view.lower() not in V_list: + g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[ + "GAUSS_53514"] % ( + view, schema) + continue + filepath = ("%s/catalogfiles/" % g_resultdir) + if not os.path.exists(filepath): + os.makedirs(filepath) + filename = ("%s/dn_%s_%s_%s.csv" % ( + filepath, dnInst.instanceId, view.replace(".", "_"), + datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f"))) + sql = "\copy (select * from %s) to %s with csv HEADER;" % ( + view, filename) + (status, output) = ClusterCommand.execSQLCommand(sql, + g_opts.user, + "", + dnInst.port) + if status != 0: + g_logger.debug( + "Failed to exec SQL command. sql %s.\n Error: %s.\n" + % ( + sql, output)) + if "does not exist" in output: + g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[ + "GAUSS_53500"] % view + elif "Connection refused" in output: + g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[ + "GAUSS_53501"] + else: + g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[ + "GAUSS_53502"] + else: + g_jobInfo.successTask.append(view) + g_logger.debug( + "Successfully collected %s statistics. %s" % ( + view, sql)) + execute_sqls(sqls, dnInst) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug("Successfully collected catalog statistics.") + + +def execute_sqls(sqls, dnInst): + """ + function: execute the sql commands + input : sqls, dnInst + output: NA + """ + # Writes the formatted content to the specified file + filePath = "%s/catalogfiles/gs_clean_%s.txt" % ( + g_resultdir, datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f")) + g_file.createFileInSafeMode(filePath) + with open(filePath, "w") as f: + f.write( + "************************************\n" + "* Catalog statistics for host " + "%s \n************************************" % dnInst.hostname) + for sql in sqls: + # Execute each sql and write the results to a file + f.write( + "\n\n************************************\n %s " + "\n************************************\n" % sql) + output = ClusterCommand.execSQLCommand(sql, g_opts.user, "", + dnInst.port)[1] + f.write(str(output)) + + userProfile = DefaultValue.getMpprcFile() + cmd = "source %s ; gs_clean -a -N -s -p %s" \ + % (userProfile, dnInst.port) + f.write( + "\n\n************************************\n %s " + "\n************************************\n" % cmd) + output = subprocess.getstatusoutput(cmd)[1] + f.write(str(output)) + + f.flush() + # Modify the file permissions to 640 + os.chmod(filePath, DefaultValue.FILE_MODE_PERMISSION) + + +def compareTime(time_A, time_B): + """ + input: string, string + output: boolean + description: compare time, time_A >= time_B is True + """ + if time_A >= time_B: + return True + else: + return False + + +def matchFile(begin_t, end_t, fileTime): + """ + input: string, string, list + output: boolean + description: determine the time in the list is between the start time + and the end time. + """ + # both of begin_time and end_time + if begin_t and end_t: + for t in fileTime: + if compareTime(t, begin_t) and compareTime(end_t, t): + return True + # only begin_time + elif begin_t and (not end_t): + for t in fileTime: + if compareTime(t, begin_t): + return True + # only end_time + elif (not begin_t) and end_t: + for t in fileTime: + if compareTime(end_t, t): + return True + # none of begin_time and end_time + else: + return True + + return False + + +def filterFile(filename): + """ + input: string + output: boolean + description: filter the files suffixed in .log/.rlog/.dlog/.aud/.raft + """ + endList = [".log", ".rlog", ".dlog", ".aud", ".raft"] + if os.path.splitext(filename)[-1] in endList: + return True + else: + return False + + +# chieve rule1 or rule2 or rule4 to yyyymmddHHMM +def d_timeToString(dateString): + """ + input: string + output: string + description: format dateString to yyyymmddHHMM + example: 2018-08-26 14:18:40 ->> 201808261418 + """ + return dateString.replace("-", "").replace(" ", "").replace(":", + "").replace( + "/", "").replace("T", "")[:12] + + +# achieve rule3 to yyyymmddHHMM +def e_timeToString(dateString): + """ + input: string + output: string + description: format dateString to yyyymmddHHMM + example: Wed Aug 29 07:23:03 CST 2018 ->> 201808290723 + """ + # define month list for get digital + month = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", + "Oct", "Nov", "Dec"] + year_string = dateString[-4:] + month_string = str(month.index(dateString[4:7]) + 1) + if len(month_string) == 1: + month_string = "0" + month_string + day_string = dateString[8:10] + + # time format HHMM + time_string = dateString[11:16].replace(":", "") + + return year_string + month_string + day_string + time_string + + +def getCtimeOfFile(fileName): + """ + input: string + output: string + description: Get the first line of the file to determine whether there + is a date, + if any, change to the specified date format(yyyymmddHHMM) + and return, + if not, return an empty string + """ + + if not os.path.exists(fileName): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % str(fileName)) + + # 2018-08-26 14:18:40 + rule1 = r'\d{4}-[0-1]\d-[0-3]\d [0-2]\d:[0-6]\d:[0-6]\d' + + # 2018/08/25 20:40:16 + rule2 = r'\d{4}/[0-1]\d/[0-3]\d [0-2]\d:[0-6]\d:[0-6]\d' + + # Wed Aug 29 00:00:03 CST 2018 + rule3 = r'(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\b (' \ + r'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\b [0-3]\d [' \ + r'0-2]\d:[0-6]\d:[0-6]\d CST \d{4}' + + # 2018-08-25T20:49:05+08:00 + rule4 = r'\d{4}-[0-1]\d-[0-3]\dT[0-2]\d:[0-6]\d:[0-6]\d' + + # defining rules and partitioning method key-value pairs + rule_dict = {rule1: d_timeToString, + rule2: d_timeToString, + rule3: e_timeToString, + rule4: d_timeToString + } + + # open file + with open(fileName, "r") as f: + # get the first line of the file + line = f.readline().strip() + # match according to known rules + for rule in rule_dict.keys(): + result = re.search(rule, line) + if result: + # change to the specified date format and return + return rule_dict[rule](result.group()) + + return "" + + +def log_copy_for_zenith(): + """ + function: collected log files + output: Successfully collected log files + """ + g_logger.debug("Collecting log files.") + g_jobInfo.jobName = "Collecting zenith log information" + + try: + # get envPath $GAUSSLOG + gausslogPath = DefaultValue.getPathFileOfENV("GAUSSLOG") + + # define necessary path + logfilePath = "%s/logfiles/" % g_resultdir + keyword_result = "keyword_result.txt" + + # match the log files that meet the time requirements + # and add them to the archive + logfileList = [] + g_logger.debug("Start matching log file.") + for root, dirs, files in os.walk(gausslogPath): + for f in files: + logfile = os.path.join(root, f) + + # get matched files in the list + if filterFile(f): + # get the time of file + statInfo = os.stat(logfile) + + # convert timestamp to format "%Y%m%d%H%M" + mtime = time.strftime("%Y%m%d%H%M", + time.localtime(statInfo.st_mtime)) + ctime = getCtimeOfFile(logfile) + if not ctime: + ctime = mtime + + timeList = [mtime, ctime] + + # compare file time + if matchFile(g_opts.begin, g_opts.end, timeList): + childDir = ''.join(root.split(gausslogPath)[1:]) + childDir = childDir.lstrip("/") + targetDir = os.path.join(logfilePath, childDir) + if not os.path.exists(targetDir): + dir_permission = 0o700 + os.makedirs(targetDir, mode=dir_permission) + g_file.cpFile(logfile, targetDir) + g_logger.debug("Match log file completion.") + g_jobInfo.successTask.append("Match log file") + except Exception as e: + if os.path.exists(logfilePath): + g_file.cleanDirectoryContent(logfilePath) + g_logger.debug("Failed to filter log files. Error:\n%s" % str(e)) + g_jobInfo.failedTask["Failed to filter log files"] = str(e) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + raise Exception("") + + if g_opts.key: + # Look for keyword matching in the dir and write to the specified file + cmd = "echo \"\" > %s/logfiles/%s; for f in `find %s -type f`;" \ + " do grep -ai '%s' $f >> %s/logfiles/%s; done" % ( + g_resultdir, keyword_result, logfilePath, g_opts.key, g_resultdir, + keyword_result) + (status, output) = subprocess.getstatusoutput(cmd) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug("Successfully collected log files.") + + +def log_check(logFileName): + """ + function: log check + input : logFileName + output: filename includes keywords or not + """ + for c in g_opts.content: + c = c.replace(" ", "").lower() + if len(c) > 0 and c in logFileName.lower(): + return 1 + return 0 + + +def log_copy(): + """ + function: collected log files + input : NA + output: NA + """ + g_logger.debug("Starting collect log.") + g_jobInfo.jobName = "Collecting pg_log information" + logfiletar = "log_%s.tar.gz" % datetime.datetime.now().strftime( + "%Y%m%d_%H%M%S%f") + keyword_result = "keyword_result.txt" + deleteCmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \ + "then rm -rf tmp_gs_collector; fi" + + if g_opts.key is not None and g_opts.key != "": + g_logger.debug( + "Keyword for collecting log in base64 encode [%s]." % g_opts.key) + g_opts.key = base64.b64decode(g_opts.key) + g_logger.debug( + "Keyword for collecting log in plain text [%s]." % g_opts.key) + + g_logger.debug( + "Speed limit to copy log files is %d KB/s." % g_opts.speedLimitKBs) + + # Filter the log files, if has keyword, do not collect prf file + if g_opts.key is not None and g_opts.key != "": + cmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \ + "then rm -rf tmp_gs_collector; " \ + "fi && (find . -type f -iname '*.log' -print)" \ + " | xargs ls --time-style='+ %Y%m%d%H%M' -ll" + else: + cmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \ + "then rm -rf tmp_gs_collector; " \ + "fi && (find . -type f -iname '*.log' -print && " \ + "find . -type f -iname '*.prf' -print) " \ + "| xargs ls --time-style='+ %Y%m%d%H%M' -ll" + (status, output) = subprocess.getstatusoutput(cmd) + logFiles = output.split("\n") + logs = [] + Directorys = [] + findFiles = 0 + # If there is a log file filtered by time + if len(logFiles[0].split()) != 2: + for logFile in logFiles: + logFileName = logFile.split()[6] + logStartTime = formatTime(logFileName) + # If the log file name does not meet the format requirements,skip + if not logStartTime.isdigit() or len(logStartTime) != 12: + continue + logStartTime = int(logStartTime) + logEndTime = int(logFile.split()[5]) + # Filter out the log we need + if (logEndTime > int(g_opts.begin) and logStartTime < int( + g_opts.end) and log_check(logFileName)): + logs.append(logFileName) + findFiles = 1 + if findFiles == 1: + g_jobInfo.successTask.append("find log files") + else: + g_jobInfo.failedTask["find log files"] = ErrorCode.GAUSS_535[ + "GAUSS_53504"] % 'log' + g_logger.debug("Successfully find log files.") + + else: + g_jobInfo.failedTask["find log files"] = ErrorCode.GAUSS_535[ + "GAUSS_53505"] + g_logger.debug("There is no log files.") + + # Make temporary directory and copy + cmd = "cd $GAUSSLOG && mkdir -p -m %s tmp_gs_collector" % \ + DefaultValue.DIRECTORY_MODE + (status, output) = subprocess.getstatusoutput(cmd) + for log in logs: + Directorys.append(os.path.dirname(log)) + for directory in Directorys: + cmd = "cd $GAUSSLOG && mkdir -p -m %s tmp_gs_collector/'%s'" % ( + DefaultValue.DIRECTORY_MODE, directory) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + (status1, output1) = subprocess.getstatusoutput(deleteCmd) + g_jobInfo.failedTask["mkdir"] = ErrorCode.GAUSS_535["GAUSS_53506"] + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug("Failed to mkdir. Error:\n%s." % output) + raise Exception("") + for log in logs: + if int(g_opts.speedLimitFlag) == 1: + cmd = "cd $GAUSSLOG && rsync --bwlimit=%d '%s' " \ + "tmp_gs_collector/'%s'" % ( + g_opts.speedLimitKBs, log, log) + else: + cmd = "cd $GAUSSLOG && cp '%s' tmp_gs_collector/'%s'" % (log, log) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + (status1, output1) = subprocess.getstatusoutput(deleteCmd) + g_jobInfo.failedTask["copy log files"] = replaceInvalidStr(output) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug("Failed to copy logFiles. Error:\n%s." % output) + raise Exception("") + + g_jobInfo.successTask.append("copy log files") + g_logger.debug("Successful to copy logFiles.") + + # Filter zip files + cmd = "cd $GAUSSLOG && find . -type f -iname '*.zip' -print" \ + " | xargs ls --time-style='+ %Y%m%d%H%M' -ll" + (status, output) = subprocess.getstatusoutput(cmd) + zipFiles = output.split("\n") + # If there is a zip file filtered by time + if len(zipFiles[0].split()) != 2: + for zipFile in zipFiles: + zipFileName = zipFile.split()[6] + logStartTime = formatTime(zipFileName) + # If the zip file name does not meet the format requirements,skip + if not logStartTime.isdigit() or len(logStartTime) != 12: + continue + logStartTime = int(logStartTime) + logEndTime = int(zipFile.split()[5]) + # Filter out the log we need + if (logEndTime > int(g_opts.begin) and logStartTime < int( + g_opts.end)): + zipdir = os.path.dirname(zipFileName) + g_jobInfo.successTask.append( + "find log zip files: %s" % zipFileName) + cmd = "cd $GAUSSLOG && mkdir -p -m %s tmp_gs_collector/%s " \ + "&& unzip -o %s -d tmp_gs_collector/%s " % \ + (DefaultValue.DIRECTORY_MODE, zipdir, + zipFileName, zipdir) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_jobInfo.failedTask[ + "find log zip files"] = replaceInvalidStr(output) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug(("Failed to filter zip files. Error:\n%s." + % output) + ("The cmd is %s " % cmd)) + raise Exception("") + g_logger.debug("Successfully filter zip files.") + else: + g_logger.debug("There is no zip files.") + + # Filter keywords + if g_opts.key is not None and g_opts.key != "": + if len(logs) != 0: + g_opts.key = g_opts.key.replace('$', '\$') + g_opts.key = g_opts.key.replace('\"', '\\\"') + cmd = "cd $GAUSSLOG/tmp_gs_collector && " + cmd = "%s grep \"%s\" -r * > %s/logfiles/%s" % ( + cmd, g_opts.key, g_resultdir, keyword_result) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 and output != "": + cmd = "rm -rf $GAUSSLOG/tmp_gs_collector" + (status1, output1) = DefaultValue.retryGetstatusoutput(cmd) + g_jobInfo.failedTask[ + "filter keyword"] = "keywords: %s, Error: %s" % ( + g_opts.key, output) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug( + "Failed to filter keyword. Error:\n%s." % output) + raise Exception("") + else: + cmd = "rm -rf $GAUSSLOG/tmp_gs_collector" + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + g_logger.debug("Successfully filter keyword.") + g_jobInfo.successTask.append("filter keyword: %s" % g_opts.key) + + else: + cmd = "touch %s/logfiles/%s && " % (g_resultdir, keyword_result) + cmd = "%s rm -rf $GAUSSLOG/tmp_gs_collector" % cmd + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if status != 0: + g_jobInfo.failedTask["touch keyword file"] = replaceInvalidStr( + output) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug( + "Failed to touch keyword file. Error:\n%s." % output) + raise Exception("") + g_logger.debug("Successfully filter keyword.") + else: + cmd = "cd $GAUSSLOG/tmp_gs_collector && tar -czf ../'%s' . && "\ + % logfiletar + if int(g_opts.speedLimitFlag) == 1: + cmd = "%s rsync --bwlimit=%d $GAUSSLOG/'%s' '%s'/logfiles/ && " % ( + cmd, g_opts.speedLimitKBs, logfiletar, g_resultdir,) + else: + cmd = "%s cp $GAUSSLOG/'%s' '%s'/logfiles/ && " % ( + cmd, logfiletar, g_resultdir) + cmd = " %s rm -rf $GAUSSLOG/tmp_gs_collector " \ + "&& rm -rf $GAUSSLOG/'%s'" % \ + (cmd, logfiletar) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_jobInfo.failedTask[ + "copy result file and delete tmp file"] = replaceInvalidStr( + output) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + g_logger.debug("Failed to delete log files. Error:\n%s." % output) + raise Exception("") + + subprocess.getstatusoutput("cd '%s'/logfiles/ && chmod %s *" % ( + g_resultdir, DefaultValue.FILE_MODE)) + g_logger.debug("Successfully collected log files.") + g_logger.log(json.dumps(g_jobInfo.__dict__)) + + +def formatTime(filename): + """ + function: format time + input : filename + output : str + """ + try: + timelist = re.findall(r"\d\d\d\d-\d\d-\d\d_\d\d\d\d\d\d", filename) + time1 = re.findall("\d+", timelist[0]) + time2 = "" + for i in time1: + time2 += i + return time2[:-2] + except Exception: + return "ERROR" + + +def xlog_copy(): + """ + function: collected xlog files + input : NA + output: NA + """ + g_logger.debug("Starting collect xlog.") + if int(g_opts.speedLimitFlag) == 1: + g_logger.debug( + "Speed limit to collect xlog files is %d KB/s." + % g_opts.speedLimitKBs) + g_jobInfo.jobName = "Collecting xlog information" + Instances = [] + try: + for Inst in g_localnodeinfo.datanodes: + if "dn" in ",".join(g_opts.content).lower(): + Instances.append(Inst) + for Inst in g_localnodeinfo.coordinators: + if "cn" in ",".join(g_opts.content).lower(): + Instances.append(Inst) + # parallel copy xlog files + if Instances: + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(parallel_xlog, Instances) + pool.close() + pool.join() + path = "%s/xlogfiles" % g_resultdir + if checkEmpty(path) == 0: + cmd = " cd %s/xlogfiles " \ + "&& tar -czf xlogfile_%s.tar.gz xlogfile_%s " \ + "&& rm -rf xlogfile_%s" % \ + (g_resultdir, g_current_time, g_current_time, + g_current_time) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug( + "Failed to collect xlog. Command %s \n, Error %s \n", + (cmd, output)) + g_jobInfo.failedTask["compress xlog files"] = \ + ErrorCode.GAUSS_535["GAUSS_53507"] % 'tar' + else: + g_jobInfo.successTask.append("compress xlog files") + except Exception as e: + g_logger.debug(str(e)) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + raise Exception(str(e)) + g_logger.debug("Successfully collected xlog.") + g_logger.log(json.dumps(g_jobInfo.__dict__)) + + +def getTargetFile(dir_path, fileList): + """ + function: get target file + input : dir_path, filelist + output: target file + """ + if os.path.isfile(dir_path): + create_time = time.strftime('%Y%m%d%H%M', + time.localtime(os.stat(dir_path).st_ctime)) + if int(g_opts.begin) < int(create_time) < int(g_opts.end): + fileList.append(dir_path) + elif os.path.isdir(dir_path): + for s in os.listdir(dir_path): + if "archive" in s: + continue + newDir = os.path.join(dir_path, s) + getTargetFile(newDir, fileList) + return fileList + + +def getXlogCmd(Inst): + """ + function: get xlog file + input : Inst + output: xlog file + """ + pg_xlog = Inst.datadir + "/pg_xlog" + xlogs = getTargetFile(pg_xlog, []) + cmd = "" + if Inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR: + if len(xlogs) == 0: + g_jobInfo.failedTask["find cn_%s xlog files" % Inst.instanceId] = \ + ErrorCode.GAUSS_535["GAUSS_53504"] % 'xlog' + else: + g_jobInfo.successTask.append( + "find cn_%s xlog files" % Inst.instanceId) + cmd = "mkdir -p -m %s '%s/xlogfiles/xlogfile_%s/cn_%s'" % \ + ( + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time, + Inst.instanceId) + for xlog in xlogs: + if int(g_opts.speedLimitFlag) == 1: + cmd = \ + "%s && rsync --bwlimit=%d %s" \ + " '%s/xlogfiles/xlogfile_%s/cn_%s'" % \ + (cmd, g_opts.speedLimitKBs, xlog, g_resultdir, + g_current_time, Inst.instanceId) + else: + cmd = "%s && cp -rf %s " \ + "'%s/xlogfiles/xlogfile_%s/cn_%s'" % \ + (cmd, xlog, g_resultdir, g_current_time, + Inst.instanceId) + elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + if len(xlogs) == 0: + g_jobInfo.failedTask["find dn_%s xlog files" % Inst.instanceId] = \ + ErrorCode.GAUSS_535["GAUSS_53504"] % 'xlog' + else: + g_jobInfo.successTask.append( + "find dn_%s xlog files" % Inst.instanceId) + cmd = "mkdir -p -m %s '%s/xlogfiles/xlogfile_%s/dn_%s'" % \ + ( + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time, + Inst.instanceId) + for xlog in xlogs: + if int(g_opts.speedLimitFlag) == 1: + cmd = "%s && rsync --bwlimit=%d %s" \ + " '%s/xlogfiles/xlogfile_%s/dn_%s'" % \ + (cmd, g_opts.speedLimitKBs, xlog, g_resultdir, + g_current_time, Inst.instanceId) + else: + cmd = "%s && cp -rf %s " \ + "'%s/xlogfiles/xlogfile_%s/dn_%s'" % \ + (cmd, xlog, g_resultdir, g_current_time, + Inst.instanceId) + return cmd + + +def parallel_xlog(Inst): + """ + parallel copy xlog files + """ + cmd = getXlogCmd(Inst) + if len(cmd) > 1: + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug( + "Failed to collect xlog files. Command: %s.\n Error: %s\n" % ( + cmd, output)) + g_jobInfo.failedTask["collect xlog files"] = replaceInvalidStr( + output) + raise Exception("") + + +def core_copy(): + """ + function: collected core files + input : NA + output: NA + """ + g_logger.debug("Starting collect core dump.") + if int(g_opts.speedLimitFlag) == 1: + g_logger.debug( + "Speed limit to collect core dump files is %d KB/s." + % g_opts.speedLimitKBs) + g_jobInfo.jobName = "Collecting Core information" + Instances = [] + cmd = "cat /proc/sys/kernel/core_pattern" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug( + "Failed to collect core dump files. Command: %s.\n Error:\n%s" % ( + cmd, output)) + g_jobInfo.failedTask["read core pattern"] = ErrorCode.GAUSS_535[ + "GAUSS_53507"] % 'cat' + g_logger.log(json.dumps(g_jobInfo.__dict__)) + raise Exception("") + core_config = str(output) + core_pattern = core_config.split('/')[-1] + core_path = "/".join(core_config.split("/")[:-1]) + if core_pattern != g_core_pattern: + g_logger.debug( + "Failed to collect core dump files, core pattern is not '%s'." + % g_core_pattern) + g_jobInfo.failedTask["check core pattern"] = ErrorCode.GAUSS_535[ + "GAUSS_53508"] + g_logger.log(json.dumps(g_jobInfo.__dict__)) + raise Exception("") + + g_jobInfo.successTask.append("check core pattern") + cmd = "mkdir -p -m %s '%s/coreDumpfiles/corefile_%s'" % \ + (DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time) + cmd = "%s && gaussdb --version >>" \ + " %s/coreDumpfiles/corefile_%s/version.txt" % \ + (cmd, g_resultdir, g_current_time) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug( + "Failed to collect gaussdb version info." + " Command: %s.\n Error:\n%s" % ( + cmd, output)) + g_jobInfo.failedTask["check gaussdb version"] = replaceInvalidStr( + output) + g_jobInfo.successTask.append("check gaussdb version") + + cores = getTargetFile(core_path, []) + if len(cores) > 0: + g_jobInfo.successTask.append("find core files") + isEmpty = 1 + for core in cores: + tempName = str(core.split("/")[-1]) + if not tempName.startswith("core-"): + g_logger.debug( + "WARNING: core file %s is not match core-e-p-t." % ( + str(core.split("/")[-1]))) + continue + p = tempName.split("-")[1] + if "".join(p).lower() in ",".join(g_opts.content).lower(): + p_stack = "%s_stack" % p + cmdList = [] + if p_stack in g_opts.content: + cmd = "gdb -q --batch --ex" \ + " \"set height 0\" -ex \"thread apply" \ + " all bt full\" %s %s >> " \ + "%s/coreDumpfiles/corefile_%s/%s-stack1.txt" % ( + p, core, g_resultdir, g_current_time, core.split("/")[-1]) + cmd += " && gdb -q --batch --ex \"set height 0\"" \ + " -ex \"thread apply all bt\" %s %s >> " \ + "%s/coreDumpfiles/corefile_%s/%s-stack2.txt" % ( + p, core, g_resultdir, g_current_time, core.split("/")[-1]) + cmdList.append(cmd) + + if p in g_opts.content: + if int(g_opts.speedLimitFlag) == 1: + cmd = \ + "rsync --bwlimit=%d %s" \ + " '%s/coreDumpfiles/corefile_%s'" % ( + g_opts.speedLimitKBs, core, g_resultdir, + g_current_time) + else: + cmd = "cp -rf %s '%s/coreDumpfiles/corefile_%s'" % ( + core, g_resultdir, g_current_time) + cmdList.append(cmd) + for c in cmdList: + (status, output) = subprocess.getstatusoutput(c) + if status != 0: + g_logger.debug( + "Failed to copy core dump files. Command:" + " %s.\n Error:\n%s" % ( + c, output)) + g_jobInfo.failedTask[ + "copy core file"] = replaceInvalidStr(output) + else: + isEmpty = 0 + + if isEmpty == 0: + cmd = "cd %s/coreDumpfiles && tar -czf corefile_%s.tar.gz" \ + " corefile_%s && rm -rf corefile_%s" % \ + (g_resultdir, g_current_time, g_current_time, g_current_time) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug( + "Failed to collect core dump files." + " Command: %s.\n Error:\n%s" % ( + cmd, output)) + g_jobInfo.failedTask[ + "compress core files"] = replaceInvalidStr(output) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + raise Exception("") + else: + g_jobInfo.successTask.append("compress core files") + else: + g_jobInfo.failedTask["copy core file"] = ErrorCode.GAUSS_535[ + "GAUSS_53509"] + else: + g_jobInfo.failedTask["find core files"] = ErrorCode.GAUSS_535[ + "GAUSS_53504"] % 'core' + + g_logger.debug("Successfully collected core dump. %s" % cores) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + + +def conf_gstack(jobName): + """ + function: collected configuration files and processed stack information + output: Successfully collected configuration files + and processed stack information. + """ + g_logger.debug("Collecting %s information." % jobName) + g_jobInfo.jobName = "Collecting %s information" % jobName + try: + # Gets all instances of the cluster + Instances = [] + for Inst in g_localnodeinfo.datanodes: + if "dn" in ",".join(g_opts.content).lower(): + Instances.append(Inst) + # parallel copy configuration files, and get gstack + if Instances: + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(parallel_conf_gstack, Instances) + pool.close() + pool.join() + g_jobInfo.successTask.append("collect %s information" % jobName) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + except Exception as e: + g_logger.debug(str(e)) + g_logger.log(json.dumps(g_jobInfo.__dict__)) + raise Exception("") + + g_logger.debug( + "Successfully collected configuration files " + "and processed stack information.") + + +def plan_simulator_check(): + """ + function: collect plan simulator files + output: Successfully collected files. + """ + g_logger.debug("Collecting plan simulator.") + g_jobInfo.jobName = "Collecting plan simulator information" + haveCnInst = 0 + for cnInst in g_localnodeinfo.coordinators: + haveCnInst = 1 + if "*" in g_opts.content: + sql = "SELECT datname FROM pg_database" \ + " Where datname NOT IN ('template1', 'template0');" + (status, output) = ClusterCommand.execSQLCommand(sql, g_opts.user, + "", cnInst.port) + if status != 0: + g_logger.debug( + "Failed to exec SQL command. please " + "check db status. sql: %s.\n Error: %s.\n" % ( + sql, output)) + g_jobInfo.failedTask["find database"] = ErrorCode.GAUSS_535[ + "GAUSS_53502"] + g_logger.log(json.dumps(g_jobInfo.__dict__)) + raise Exception("") + g_jobInfo.successTask.append("find database") + dbList = output.split("\n") + else: + dbList = g_opts.content + for db in dbList: + cmd = "mkdir -p -m %s '%s/planSimulatorfiles/%s'" % \ + (DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, db) + cmd = "%s && gs_plan_simulator.sh -m dump -d %s " \ + "-p %d -D %s/planSimulatorfiles/%s" % \ + (cmd, db, cnInst.port, g_resultdir, db) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug( + "Failed to Collect plan simulator. " + "Command %s.\n Error: %s.\n" % ( + cmd, output)) + g_jobInfo.failedTask["dump %s plan info" % db] = \ + ErrorCode.GAUSS_535["GAUSS_53510"] + else: + g_jobInfo.successTask.append("dump %s plan info" % db) + if haveCnInst == 0: + g_jobInfo.failedTask["dump database plan info"] = ErrorCode.GAUSS_535[ + "GAUSS_53503"] + g_logger.log(json.dumps(g_jobInfo.__dict__)) + + +def getBakConfCmd(Inst): + """ + function: get bak conf cmd + input : Inst + output : NA + """ + cmd = "" + pidfile = "" + if Inst.instanceRole == DefaultValue.INSTANCE_ROLE_GTM: + if g_need_gstack == 0: + cmd = "mkdir -p -m %s '%s/configfiles/config_%s/gtm_%s'" % \ + ( + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time, + Inst.instanceId) + cmd = "%s && cp '%s'/gtm.conf '%s'/gtm.control " \ + "'%s'/configfiles/config_%s/gtm_%s/" % \ + ( + cmd, Inst.datadir, Inst.datadir, g_resultdir, g_current_time, + Inst.instanceId) + if Inst.instanceType == DefaultValue.MASTER_INSTANCE: + cmd = "%s && cp '%s'/gtm.sequence" \ + " '%s'/configfiles/config_%s/gtm_%s/" % \ + (cmd, Inst.datadir, g_resultdir, g_current_time, + Inst.instanceId) + else: + cmd = "mkdir -p -m %s '%s/gstackfiles/gstack_%s/gtm_%s'" % \ + ( + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time, + Inst.instanceId) + pidfile = Inst.datadir + "/gtm.pid" + try: + with open(pidfile, 'r') as f: + pid = int(f.readline()) + if pid != 0: + cmd += " && gstack '%d' >" \ + " '%s'/gtm.stack && mv " \ + "'%s'/gtm.stack '%s'" \ + "/gstackfiles/gstack_%s/gtm_%s/gtm_%s.stack" % \ + (pid, Inst.datadir, Inst.datadir, g_resultdir, + g_current_time, Inst.instanceId, + Inst.instanceId) + except Exception: + g_jobInfo.failedTask[ + "collect gtm_%s process stack info" % Inst.instanceId] = \ + ErrorCode.GAUSS_535["GAUSS_53511"] % 'GTM' + + elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR: + if g_need_gstack == 0: + cmd = "mkdir -p -m %s '%s/configfiles/config_%s/cn_%s'" % \ + ( + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time, + Inst.instanceId) + cmd = "%s && cp -rf '%s'/postgresql.conf '%s'" \ + "/pg_hba.conf '%s'/global/pg_control" \ + " '%s'/gaussdb.state %s/pg_replslot/ %s/pg_ident.conf" \ + " '%s'/configfiles/config_%s/cn_%s/" % \ + (cmd, Inst.datadir, Inst.datadir, Inst.datadir, Inst.datadir, + Inst.datadir, Inst.datadir, + g_resultdir, g_current_time, Inst.instanceId) + else: + cmd = "mkdir -p -m %s '%s/gstackfiles/gstack_%s/cn_%s'" % \ + ( + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time, + Inst.instanceId) + pidfile = Inst.datadir + "/postmaster.pid" + try: + with open(pidfile, 'r') as f: + pid = int(f.readline()) + if pid != 0: + cmd = "%s && gstack '%d' > '%s'" \ + "/cn.stack && mv '%s'/cn.stack '%s'" \ + "/gstackfiles/gstack_%s/cn_%s/cn_%s.stack" % \ + (cmd, pid, Inst.datadir, Inst.datadir, + g_resultdir, g_current_time, Inst.instanceId, + Inst.instanceId) + except Exception: + g_jobInfo.failedTask[ + "collect cn_%s process stack info" % Inst.instanceId] = \ + ErrorCode.GAUSS_535["GAUSS_53511"] % 'CN' + + elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + if g_need_gstack == 0: + cmd = "mkdir -p -m %s '%s/configfiles/config_%s/dn_%s'" % \ + ( + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time, + Inst.instanceId) + cmd = "%s && cp -rf '%s'/postgresql.conf '%s'/pg_hba." \ + "conf '%s'/global/pg_control" \ + " '%s'/gaussdb.state %s/pg_replslot/ %s/pg_ident.conf" \ + " '%s'/configfiles/config_%s/dn_%s/" % \ + (cmd, Inst.datadir, Inst.datadir, Inst.datadir, Inst.datadir, + Inst.datadir, Inst.datadir, + g_resultdir, g_current_time, Inst.instanceId) + else: + cmd = "mkdir -p -m %s '%s/gstackfiles/gstack_%s/dn_%s'" % \ + ( + DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time, + Inst.instanceId) + pidfile = Inst.datadir + "/postmaster.pid" + try: + with open(pidfile, 'r') as f: + pid = int(f.readline()) + if pid != 0: + cmd = "%s && gstack '%d' > '%s'/dn.stack && mv" \ + " '%s'/dn.stack '%s'" \ + "/gstackfiles/gstack_%s/dn_%s/dn_%s.stack" % \ + (cmd, pid, Inst.datadir, Inst.datadir, + g_resultdir, g_current_time, Inst.instanceId, + Inst.instanceId) + except Exception: + g_jobInfo.failedTask[ + "collect dn_%s process stack info" % Inst.instanceId] = \ + ErrorCode.GAUSS_535["GAUSS_53511"] % 'DN' + return (cmd, pidfile) + + +def parallel_conf_gstack(Inst): + """ + parallel copy configuration files, and get gstack + """ + (cmd, pidfile) = getBakConfCmd(Inst) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + if "command not found" in output: + g_jobInfo.failedTask["collect process stack info"] = \ + ErrorCode.GAUSS_535["GAUSS_53512"] + g_logger.debug( + "Failed to collect gstack files. " + "Command: %s \n Error: %s.\n" % ( + cmd, output)) + raise Exception("") + elif "gstack" in output: + g_jobInfo.failedTask[ + "collect process stack info"] = replaceInvalidStr(output) + g_logger.debug( + "Failed to collect gstack files." + " Command: %s \n Error: %s.\n" % ( + cmd, output)) + raise Exception("") + elif "Process" in output: + g_jobInfo.failedTask[ + "collect process stack info"] = replaceInvalidStr(output) + g_logger.debug( + "Failed to collect gstack files. " + "Command: %s \n Error: %s.\n" % ( + cmd, output)) + raise Exception("") + else: + g_jobInfo.failedTask[ + "collect configuration files"] = replaceInvalidStr(output) + g_logger.debug( + "Failed to collect configuration files." + " Command: %s \n Error: %s.\n" % ( + cmd, output)) + raise Exception("") + + +def parseConfig(): + """ + function: parse Config parameter + input : NA + output: NA + """ + if g_opts.config != "": + d = json.loads(g_opts.config) + g_opts.content = list(filter(None, d['Content'].split(","))) + + +def main(): + """ + main function + """ + try: + parseCommandLine() + initGlobal() + parseConfig() + global g_jobInfo + g_jobInfo = JobInfo() + if g_opts.action == "check_command": + check_command() + elif g_opts.action == "create_dir": + create_temp_result_folder() + # Get system information + elif g_opts.action == "system_check": + system_check() + # Gets the database information + elif g_opts.action == "database_check": + database_check() + # Make a copy of the log file + elif g_opts.action == "log_copy": + log_copy() + # Copy configuration files, and get g stack + elif g_opts.action == "Config": + conf_gstack("Config") + elif g_opts.action == "Gstack": + global g_need_gstack + g_need_gstack = 1 + conf_gstack("Gstack") + g_need_gstack = 0 + # Send all log files we collected to the command node. + elif g_opts.action == "copy_file": + sendLogFiles() + elif g_opts.action == "xlog_copy": + xlog_copy() + elif g_opts.action == "plan_simulator_check": + plan_simulator_check() + elif g_opts.action == "core_copy": + core_copy() + else: + g_logger.logExit("Unrecognized parameter: %s." % g_opts.action) + except Exception as e: + GaussLog.exitWithError(str(e)) + + +if __name__ == '__main__': + main() + sys.exit(0) diff --git a/script/local/LocalPerformanceCheck.py b/script/local/LocalPerformanceCheck.py new file mode 100644 index 0000000..012b129 --- /dev/null +++ b/script/local/LocalPerformanceCheck.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalPerformanceCheck.py is a utility to +# check if GaussDB performance. +############################################################################# +import subprocess +import getopt +import os +import sys +import time + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from multiprocessing.dummy import Pool as ThreadPool + +ACTION_SSDPerfCheck = "SSDPerfCheck" +INDENTATION_VALUE = 37 + + +class CmdOptions(): + def __init__(self): + """ + function: initialize variable + input : NA + output: NA + """ + pass + + action = "" + logFile = "" + user = "" + + +g_opts = CmdOptions() +g_logger = None +g_perfChecker = None + + +class LocalPerformanceCheck(): + def __init__(self): + """ + function: initialize variable + input : NA + output: NA + """ + self.user = "" + self.logFile = "" + self.action = "" + + def CheckSSDPerf(self): + """ + function: check SSD performance + input : NA + output: NA + """ + diskDevList = [] + # Obtain the SSD device + devList = DefaultValue.obtainSSDDevice() + # traverse dev + for dev in devList: + cmd = "df -P -h | grep %s | awk '{print $6}'" % dev + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + if (output == ""): + continue + diskDirInfo = output.split('\n') + for diskDir in diskDirInfo: + diskDevList.append("%s:%s" % (dev, diskDir)) + # check if SSD disk exists on current node + if diskDevList == []: + raise Exception(ErrorCode.GAUSS_530["GAUSS_53005"]) + # Concurrent execution + pool = ThreadPool(DefaultValue.getCpuSet()) + results = pool.map(self.CheckSingleSSDPerf, diskDevList) + pool.close() + pool.join() + + def CheckSingleSSDPerf(self, diskDev): + """ + function: check Single SSD performance + input : diskDev + output: NA + """ + try: + devlist = diskDev.split(':') + dev = devlist[0] + diskDir = devlist[1] + # get current time + currentTime = time.strftime("%Y-%m-%d_%H%M%S") + # get tmp File + tmpFile = os.path.join(diskDir, "%s-%s-%d" % ("tmpfile_SSDperf", + currentTime, + os.getpid())) + cmd = "dd if=/dev/zero of=%s bs=8M count=2560 oflag=direct &&" \ + % tmpFile + cmd += "dd if=%s of=/dev/null bs=8M count=2560 iflag=direct" \ + % tmpFile + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0): + output = output.split("\n") + writeInfolist = output[2].strip().split(",") + readInfolist = output[5].strip().split(",") + result = " %s (%s) Path (%s)\n" \ + " %s: %s\n" \ + " %s: %s\n %s: %s\n" \ + " %s: %s\n %s: %s" \ + % (dev.split('/')[2], dev, diskDir, + "Data size".ljust(INDENTATION_VALUE), + writeInfolist[0][:-7], + "Write time".ljust(INDENTATION_VALUE), + (writeInfolist[1]).strip(), + "Write speed".ljust(INDENTATION_VALUE), + (writeInfolist[2]).strip(), + "Read time".ljust(INDENTATION_VALUE), + (readInfolist[1]).strip(), + "Read speed".ljust(INDENTATION_VALUE), + (readInfolist[2]).strip()) + g_logger.log(result) + else: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + os.remove(tmpFile) + g_logger.debug("Successfully checked SSD performance.") + except Exception as e: + os.remove(tmpFile) + g_logger.log("%s failed." % g_opts.action) + g_logger.debug(str(e)) + + +def usage(): + """ +localPerfCheck.py is a utility to check if GaussDB performance. +Internal use only. +Usage: + python3 --help | -? + python3 LocalPerformanceCheck.py -t action [-l logfile] [-U username] +Common options: + -t The type of action. + -U The user and group name. + -l The path of log file. + -? --help Show this help screen. + """ + print(usage.__doc__) + + +def initGlobal(): + """ + function: Init global variables + input : NA + output: NA + """ + global g_logger + global g_perfChecker + + try: + g_logger = GaussLog(g_opts.logFile, g_opts.action) + # Modify log File Permissions + DefaultValue.modifyFileOwner(g_opts.user, g_logger.logFile) + g_perfChecker = LocalPerformanceCheck() + except Exception as e: + g_logger.logExit(str(e)) + + +def parseCommandLine(): + """ + function: Parse command line and save to global variable + input : NA + output: NA + """ + try: + (opts, args) = getopt.getopt(sys.argv[1:], "t:U:l:", ["help"]) + except Exception as e: + # print help information + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + # parse parameter + for (key, value) in opts: + if (key == "--help"): + usage() + sys.exit(0) + elif (key == "-t"): + g_opts.action = value + elif (key == "-l"): + g_opts.logFile = value + elif (key == "-U"): + g_opts.user = value + + Parameter.checkParaVaild(key, value) + + +def checkParameter(): + """ + function: Check parameter from command line + input : NA + output: NA + """ + # check if user exist and is the right user + DefaultValue.checkUser(g_opts.user) + # check log file + if (g_opts.logFile == ""): + g_opts.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, g_opts.user, "") + # check if absolute path + if (not os.path.isabs(g_opts.logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + # check if installed SSD + if not DefaultValue.checkSSDInstalled(): + GaussLog.exitWithError(ErrorCode.GAUSS_530["GAUSS_53008"]) + + +def docheck(): + """ + function: check SSD performance + input : NA + output: NA + """ + if (g_opts.action == ACTION_SSDPerfCheck): + # check SSD performance + g_perfChecker.CheckSSDPerf() + else: + g_logger.logExit(ErrorCode.GAUSS_500["GAUSS_50000"] % g_opts.action) + + +if __name__ == '__main__': + """ + main function + """ + try: + # arse command line and save to global variable + parseCommandLine() + # Check parameter from command line + checkParameter() + # Init global variables + initGlobal() + except Exception as e: + # Modify the file's owner + DefaultValue.modifyFileOwner(g_opts.user, g_opts.logFile) + GaussLog.exitWithError(str(e)) + + try: + # check SSD performance + docheck() + # close log file + g_logger.closeLog() + except Exception as e: + # Modify the file's owner + DefaultValue.modifyFileOwner(g_opts.user, g_logger.logFile) + g_logger.logExit(str(e)) + + sys.exit(0) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py new file mode 100644 index 0000000..cbe2a59 --- /dev/null +++ b/script/local/PreInstallUtility.py @@ -0,0 +1,2979 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : PreInstallUtility.py is a utility to +# install the cluster on local node. +############################################################################# + +import getopt +import sys +import os +import shutil +import subprocess +import time +import pwd +import grp +import configparser + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.OMCommand import OMCommand +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from gspylib.common.VersionInfo import VersionInfo +from gspylib.os.gsplatform import g_Platform +from gspylib.os.gsservice import g_service +from gspylib.os.gsnetwork import g_network + +ACTION_PREPARE_PATH = "prepare_path" +ACTION_CHECK_OS_VERSION = "check_os_Version" +ACTION_CREATE_OS_USER = "create_os_user" +ACTION_CHECK_OS_USER = "check_os_user" +ACTION_CREATE_CLUSTER_PATHS = "create_cluster_paths" +ACTION_SET_FINISH_FLAG = "set_finish_flag" +ACTION_SET_USER_ENV = "set_user_env" +ACTION_SET_TOOL_ENV = "set_tool_env" +ACTION_PREPARE_USER_CRON_SERVICE = "prepare_user_cron_service" +ACTION_PREPARE_USER_SSHD_SERVICE = "prepare_user_sshd_service" +ACTION_SET_LIBRARY = "set_library" +ACTION_SET_SCTP = "set_sctp" +ACTION_SET_VIRTUALIP = "set_virtualIp" +ACTION_CHECK_HOSTNAME_MAPPING = "check_hostname_mapping" +ACTION_INIT_GAUSSLOG = "init_gausslog" +ACTION_CHECK_ENVFILE = "check_envfile" +ACTION_SET_ARM_OPTIMIZATION = "set_arm_optimization" +ACTION_CHECK_DISK_SPACE = "check_disk_space" +ACTION_SET_WHITELIST = "set_white_list" +ACTION_CHECK_OS_SOFTWARE = "check_os_software" +ACTION_FIX_SERVER_PACKAGE_OWNER = "fix_server_package_owner" +ACTION_CHANGE_TOOL_ENV = "change_tool_env" + +g_nodeInfo = None +envConfig = {} +configuredIps = [] +checkOSUser = False +g_component_list = [] +instance_type_set = () +software_list = ["bzip2"] + +##################################################### +# syslog variables +##################################################### +RSYSLOG = "rsyslog" +SYSLOG_NG = "syslog-ng" +RSYSLOG_CONFIG_FILE = "/etc/rsyslog.conf" +SYSLOG_NG_CONFIG_FILE = "/etc/syslog-ng/syslog-ng.conf" +SYSLOG_NG_CONFIG_FILE_SERVER = "/etc/sysconfig/syslog" +SYSTEMD_JOURNALD_CONF = "/etc/systemd/journald.conf" +RSYSLOG_FACILITY_LEVEL = "local3.*" +AP_RSYSLOG_FACILITY_LEVEL = ":msg,contains,\"MPPDB\"" +SYSLOG_NG_FACILITY = "local3" +SYSLOG_NG_LEVEL = "debug..emerg" +AP_SERVER_SYSLOG_FILE = "/var/log/syslog_MPPDB" +IMJOURNAL_RATELIMIT_INTERVAL = 1 +IMJOURNAL_RATELIMIT_BURST = 50000 +SYSTEMLOG_RATELIMIT_INTERVAL = 1 +SYSTEMLOG_RATELIMIT_BURST = 50000 +ARM_PLATE = False + + +def get_package_path(): + """ + get package path + :return: + :return: + """ + dir_name = os.path.dirname(os.path.realpath(__file__)) + package_path = os.path.join(dir_name, "./../../") + package_path = os.path.realpath(package_path) + return package_path + + +class PreInstall(LocalBaseOM): + """ + install the cluster on local node + """ + + def __init__(self): + """ + function: constructor + """ + self.action = "" + self.userInfo = "" + self.user = "" + self.group = "" + self.clusterConfig = "" + self.preparePath = "" + self.checkEmpty = False + self.envParams = [] + self.logFile = "" + self.mpprcFile = "" + self.clusterToolPath = "" + self.tmpFile = "" + self.clusterAppPath = "" + self.white_list = {} + self.logger = None + + def initGlobals(self): + """ + init global variables + input : NA + output: NA + """ + global instance_type_set + + self.logger = GaussLog(self.logFile, self.action) + if self.clusterConfig != "": + self.readConfigInfoByXML() + + def initNodeInfo(self): + """ + function: + init node info + precondition: + self.clusterInfo has been initialized + input : NA + output: NA + """ + global g_nodeInfo + + hostName = DefaultValue.GetHostIpOrName() + g_nodeInfo = self.clusterInfo.getDbNodeByName(hostName) + if g_nodeInfo is None: + self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51620"] + % "local" + " It is not a host named %s." + % hostName) + + def usage(self): + """ +Usage: + python3 PreInstallUtility.py -t action -u user -T warning_type + [-g group] [-X xmlfile] [-P path] [-Q clusterToolPath] [-D mount_path] + [-e "envpara=value" [...]] [-w warningserverip] [-h nodename] + [-s mpprc_file] [--check_empty] [-l log] +Common options: + -t The type of action. + -u The OS user of cluster. + -g The OS user's group of cluster. + -X The XML file path. + -P The path to be check. + -Q The path of cluster tool. + -e "envpara=value" The OS user environment variable. + --check_empty Check path empty. + -s The path of MPP environment file. + -l The path of log file. + -R The path of cluster install path. + --help Show this help, then exit. + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "t:u:g:X:P:Q:e:s:l:f:R:", + ["check_empty", "help"]) + except Exception as e: + self.usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if len(args) > 0: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + parameter_map = {"-t": self.action, "-u": self.user, "-g": self.group, + "-X": self.clusterConfig, + "-P": self.preparePath, "-Q": self.clusterToolPath, + "-s": self.mpprcFile, "-f": self.tmpFile, + "-R": self.clusterAppPath} + parameter_keys = parameter_map.keys() + + for (key, value) in opts: + if key == "--help": + self.usage() + sys.exit(0) + elif key in parameter_keys: + parameter_map[key] = value + elif key == "-e": + self.envParams.append(value) + elif key == "--check_empty": + self.checkEmpty = True + elif key == "-l": + self.logFile = os.path.realpath(value) + self.tmpFile = value + + Parameter.checkParaVaild(key, value) + self.action = parameter_map["-t"] + self.user = parameter_map["-u"] + self.group = parameter_map["-g"] + self.clusterConfig = parameter_map["-X"] + self.preparePath = parameter_map["-P"] + self.clusterToolPath = parameter_map["-Q"] + self.mpprcFile = parameter_map["-s"] + self.tmpFile = parameter_map["-f"] + self.clusterAppPath = parameter_map["-R"] + + def checkParameter(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + if (self.user == "" and self.action not in [ACTION_SET_VIRTUALIP, + ACTION_SET_WHITELIST]): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'u' + ".") + + try: + if (self.action == ACTION_PREPARE_PATH + or self.action == ACTION_CREATE_CLUSTER_PATHS + or self.action == ACTION_SET_FINISH_FLAG + or self.action == ACTION_SET_USER_ENV): + DefaultValue.checkUser(self.user, False) + except Exception as e: + GaussLog.exitWithError(str(e)) + parameter_list = [ACTION_CHECK_OS_VERSION, ACTION_SET_FINISH_FLAG, + ACTION_SET_USER_ENV, ACTION_SET_LIBRARY, \ + ACTION_SET_SCTP, ACTION_PREPARE_USER_CRON_SERVICE, + ACTION_PREPARE_USER_SSHD_SERVICE, \ + ACTION_SET_VIRTUALIP, ACTION_INIT_GAUSSLOG, + ACTION_CHECK_ENVFILE, ACTION_CHECK_OS_SOFTWARE, \ + ACTION_SET_ARM_OPTIMIZATION, + ACTION_CHECK_DISK_SPACE, ACTION_SET_WHITELIST, + ACTION_FIX_SERVER_PACKAGE_OWNER, + ACTION_CHANGE_TOOL_ENV] + if self.action == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".") + function_map = {ACTION_PREPARE_PATH: self.checkPreparePathParameter, + ACTION_CREATE_OS_USER: self.checkCreateOSUserParameter, + ACTION_CHECK_OS_USER: self.checkCreateOSUserParameter, + ACTION_CREATE_CLUSTER_PATHS: \ + self.checkCreateClusterPathsParameter, + ACTION_SET_TOOL_ENV: self.checkSetToolEnvParameter, + ACTION_CHECK_HOSTNAME_MAPPING: \ + self.checkHostnameMappingParameter} + function_map_keys = function_map.keys() + if self.action in function_map_keys: + function_map[self.action]() + elif self.action in parameter_list: + pass + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % "t") + + if self.mpprcFile != "": + if not os.path.isabs(self.mpprcFile): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50213"] % "mpprc file") + # 1.set tool env is the first time we use this mpprc file, + # so we can check and create it. + # 2.in other scene, the mpprc file should have exist, + # so we just check its exists + if self.action == ACTION_SET_TOOL_ENV: + self.prepareMpprcFile() + elif self.action == ACTION_CHECK_ENVFILE: + pass + else: + if not os.path.exists(self.mpprcFile): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50201"] % self.mpprcFile) + + if self.logFile == "": + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, self.user, "") + + def prepareMpprcFile(self): + """ + function: prepare MPPRC file, include path and permission + input : NA + output: NA + """ + mpprcFilePath, mpprcFileName = os.path.split(self.mpprcFile) + ownerPath = self.mpprcFile + if not os.path.exists(self.mpprcFile): + while True: + # find the top path to be created + (ownerPath, dirName) = os.path.split(ownerPath) + if os.path.exists(ownerPath) or dirName == "": + ownerPath = os.path.join(ownerPath, dirName) + break + + try: + # for internal useage, we should set + # mpprc file permission to 644 here, and change to 640 later. + g_file.createDirectory(mpprcFilePath, True) + if os.path.exists(self.mpprcFile): + pass + else: + g_file.createFile(self.mpprcFile, False) + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, ownerPath, True, + "shell") + g_file.changeMode(DefaultValue.HOSTS_FILE, self.mpprcFile, False, + "shell") + + # if given group info in cmdline, + # we will change the mpprc file owner, otherwise, + # will not change the mpprc file owner. + if self.group != "": + g_file.changeOwner(self.user, ownerPath, True, "shell") + except Exception as e: + raise Exception(str(e)) + + def checkPreparePathParameter(self): + """ + function: check whether PreparePath parameter is right + input : NA + output: NA + """ + if self.preparePath == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'P' + ".") + if not os.path.isabs(self.preparePath): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50213"] % self.preparePath) + if self.group == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'g' + ".") + + def checkCreateOSUserParameter(self): + """ + function: check whether CreateOSUser parameter is right + input : NA + output: NA + """ + if self.group == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'g' + ".") + + def checkCreateClusterPathsParameter(self): + """ + function: check whether CreateClusterPaths parameter is right + input : NA + output: NA + """ + if self.group == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'g' + ".") + + if self.clusterConfig == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".") + if not os.path.exists(self.clusterConfig): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50201"] % self.clusterConfig) + if not os.path.isabs(self.clusterConfig): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50213"] % "configuration file") + + def checkSetToolEnvParameter(self): + """ + function: check whether SetToolEnv parameter is right + input : NA + output: NA + """ + if self.clusterToolPath == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'Q' + ".") + + def checkSetCgroupParameter(self): + """ + function: check whether SetCgroup parameter is right + input : NA + output: NA + """ + if self.clusterToolPath == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'Q' + ".") + if self.cgroupMountDir != "": + if not os.path.isabs(self.cgroupMountDir): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] + % "Cgroup mount directory") + + def checkHostnameMappingParameter(self): + """ + function: check whether HostnameMapping parameter is right + input : NA + output: NA + """ + if self.clusterConfig == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".") + if not os.path.exists(self.clusterConfig): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50201"] % self.clusterConfig) + if not os.path.isabs(self.clusterConfig): + GaussLog.exitWithError( + ErrorCode.GAUSS_502["GAUSS_50213"] % "configuration file") + + def checkOSVersion(self): + """ + function: + check if OS version is supported + input : NA + output: NA + """ + self.logger.log("Checking OS version.") + try: + if not DefaultValue.checkOsVersion(): + self.logger.logExit(ErrorCode.GAUSS_519["GAUSS_51900"]) + except Exception as e: + self.logger.logExit(str(e)) + + self.logger.log("Successfully checked OS version.") + + def prepareGivenPath(self, onePath, checkEmpty=True, checkSize=True): + """ + function: + make sure the path exist and user has private to access this path + precondition: + 1.checkEmpty is True or False + 2.checkSize is True or False + 3.user and group has been initialized + 4.path list has been initialized + 5.path in path list is absolute path + postcondition: + 1. + input: + 1.path list + 2.checkEmpty + 3.checkSize + 4.path owner + output: + paths in os + hiden info:na + ppp: + for each path in the path list + save the path + if path exist + if need check empty + check empty + else + find the top path to be created + create the path + chown owner + check permission + check path size + """ + self.logger.debug("Preparing path [%s]." % onePath) + ownerPath = onePath + if os.path.exists(onePath): + if checkEmpty: + fileList = os.listdir(onePath) + if "pg_location" in fileList: + fileList.remove("pg_location") + if len(fileList) != 0: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50202"] % onePath) + # check the owner of 'onepath' whether it is exist; if not, + # change it's owner to the cluster user + DefaultValue.checkPathandChangeOwner( + onePath, self.user, + self.group, DefaultValue.KEY_DIRECTORY_MODE) + else: + while True: + # find the top path to be created + (ownerPath, dirName) = os.path.split(ownerPath) + if os.path.exists(ownerPath) or dirName == "": + ownerPath = os.path.join(ownerPath, dirName) + break + # create the given path + self.logger.debug( + "Path [%s] does not exist. Please create it." % onePath) + self.makeDirsInRetryMode(onePath, DefaultValue.KEY_DIRECTORY_MODE) + + # if the path already exist, just change the top path mode, + # else change mode with -R + ##do not change the file mode in path if exist + # found error: given path is /a/b/c, script path is /a/b/c/d, + # then change mode with -R + # will cause an error + try: + if ownerPath != onePath: + g_file.changeOwner(self.user, ownerPath, True, "shell") + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, ownerPath, + True, "shell") + else: + g_file.changeOwner(self.user, ownerPath, False, "shell") + except Exception as e: + raise Exception(str(e)) + # check permission + if self.action == ACTION_PREPARE_PATH: + # for tool path, we only need check enter permission + if not self.checkPermission(self.user, onePath, True): + self.logger.logExit( + ErrorCode.GAUSS_501["GAUSS_50100"] % (onePath, self.user)) + else: + if not self.checkPermission(self.user, onePath): + self.logger.logExit( + ErrorCode.GAUSS_501["GAUSS_50102"] % (onePath, self.user)) + # check path size + if checkSize: + diskSizeInfo = DefaultValue.checkDirSize( + onePath, + DefaultValue.INSTANCE_DISK_SIZE, self.logger) + + self.logger.debug("Successfully prepared path.") + + def makeDirsInRetryMode(self, onePath, dirMode, retryTimes=3): + """ + function: command for creating path, + if failed then retry.Retry for 3 times + input : onePath,dirMode,retryTimes + output: NA + """ + retry = 1 + try_flag = False + while True: + try_flag = g_file.createDirectory(onePath, True, dirMode) + if try_flag: + break + if retry >= retryTimes: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50206"] % onePath) + retry += 1 + + def checkPermission(self, username, originalPath, check_enter_only=False): + """ + function: + check if given user has operation permission for given path + precondition: + 1.user should be exist + 2.originalPath should be an absolute path + 3.caller should has root privilege + postcondition: + 1.return True or False + input : username,originalPath,check_enter_only + output: True/False + """ + # action: check and modify the permission of path before do check + # For the scene: After delete the user + # when execute gs_postuninstall --delete-user, + # the owner of GPHOME path becomes no owner; + # when execute gs_preinstall secondly, + # report permisson error about GPHOME + DefaultValue.checkPathandChangeOwner(originalPath, self.user, + self.group, + DefaultValue.KEY_DIRECTORY_MODE) + cmd = "su - %s -c \"cd '%s'\"" % (username, originalPath) + status = subprocess.getstatusoutput(cmd)[0] + if status != 0: + return False + + if check_enter_only: + return True + + testFile = os.path.join(originalPath, "touch.tst") + cmd = "su - %s -c 'touch %s && chmod %s %s' >/dev/null 2>&1" % ( + username, testFile, DefaultValue.KEY_FILE_MODE, testFile) + status = subprocess.getstatusoutput(cmd)[0] + if status != 0: + return False + + cmd = "su - %s -c 'echo aaa > %s' >/dev/null 2>&1" \ + % (username, testFile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + cmd = "rm -f '%s' >/dev/null 2>&1" % testFile + subprocess.getstatusoutput(cmd) + return False + + cmd = "rm -f '%s' >/dev/null 2>&1" % testFile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + return False + + return True + + def checkMappingForHostName(self): + """ + function: Checking hostname mapping + input : NA + output: NA + """ + self.logger.debug("Checking hostname mapping.") + try: + self.logger.debug("Change file[/etc/hosts] mode.") + g_file.changeMode(DefaultValue.HOSTS_FILE, "/etc/hosts") + OMCommand.checkHostnameMapping(self.clusterInfo, self.logFile) + except Exception as e: + self.logger.logExit(str(e)) + + self.logger.debug("Successfully checked hostname mapping.") + + def checkPasswdIsExpires(self): + """ + function: Check if user password is expires + input : NA + output: False or True + """ + cmd = g_file.SHELL_CMD_DICT["checkPassword"] % ( + self.user, "'^Password expires'") + (timestatus, output) = subprocess.getstatusoutput(cmd) + if timestatus != 0: + self.logger.logExit(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + " Error:\n%s" % output) + result = output.split(":")[1].strip() + try: + passwd_expiretime = time.strptime(result, "%b %d, %Y") + except Exception: + return False + local_time_string = time.strftime("%b %d, %Y") + local_time = time.strptime(local_time_string, "%b %d, %Y") + expire_seconds = int(time.mktime(passwd_expiretime)) + lcoalTime_seconds = int(time.mktime(local_time)) + if expire_seconds < lcoalTime_seconds: + return True + else: + return False + + def delTempFile(self, filename): + """ + function: delete temp file + input : filename + output: NA + """ + try: + if os.path.isfile(filename): + g_file.removeFile(filename, "shell") + except Exception as e: + raise Exception(str(e)) + + def addAllowUser(self, user): + """ + function: Add "user" to AllowUsers in /etc/ssh/sshd_config + if necessary. + input: + user: the user name in string. + output: + 1: Successfully added. + 0: Already added, or "AllowUsers" is disabled, nothing to do. + """ + # If "AllowUsers" in sshd is enabled, only specified users + # can be authenticated. + # So we need to add the newly created user to white list. + sshd_config = "/etc/ssh/sshd_config" + allowUsersCmd = "cat " + sshd_config + " | grep '\\'" + (status, output) = subprocess.getstatusoutput(allowUsersCmd) + + allowUsersRes = output + # No results found. "grep" returns non-zero if nothing grepped. + # AllowUsers in sshd_config is disabled. + if (status != 0) and (output is None or len(output) == 0): + self.logger.debug("'AllowUers' of sshd_config is disabled.") + return 0 + elif status != 0: + # It really failed. + self.logger.logExit( + ErrorCode.GAUSS_503["GAUSS_50321"] % "AllowUsers" + + " Command: %s. Error: \n%s" % ( + allowUsersCmd, output)) + else: + allowUsersRes = str(output).lstrip().lstrip("\t") + if allowUsersRes.find('#') == 0: + return 0 + elif allowUsersRes.find('#') > 0: + allowUsersRes = allowUsersRes[0:allowUsersRes.find('#')] + + if self.user not in allowUsersRes.split(' '): + setAllowUsersCmd = "sed -i '/\\/d' %s" % sshd_config + (status, output) = subprocess.getstatusoutput(setAllowUsersCmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % setAllowUsersCmd + + " Error:\n%s" % output) + g_Platform.setKeyValueInSshd(allowUsersRes, user) + # Attention: here we will not restart sshd service, + # as it will be done in "prepareUserSshdService". + self.logger.debug( + "User '%s' added to 'AllowUsers' of %s successfully." % ( + user, sshd_config)) + return 1 + + def createOSUser(self): + """ + function: Create OS user and group + input : NA + output: NA + """ + self.logger.debug("Creating OS user on local host.") + + tempFile = "/tmp/temp.%s" % self.user + + userstatus = 0 + # Check if user exists + try: + DefaultValue.getUserId(self.user) + # check user passwd is Expires + if self.checkPasswdIsExpires(): + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50307"]) + except Exception: + self.logger.debug("User[%s] not exists" % self.user) + userstatus = 1 + + # check if the user is correct in /home/user path + needChgOwner = False + if userstatus == 1: + userHomePath = "/home/%s" % self.user + if os.path.exists(userHomePath): + try: + homePathUser = g_file.getfileUser(userHomePath)[0] + if homePathUser != self.user: + needChgOwner = True + except Exception: + needChgOwner = True + + # Check if group exists + cmd = "cat /etc/group | awk -F [:] '{print $1}' | grep '^%s$'" \ + % self.group + (groupstatus, groupoutput) = subprocess.getstatusoutput(cmd) + if groupstatus != 0: + self.logger.debug( + "Command for checking group exists: %s." % cmd + + " Error:\n%s" % groupoutput) + + # user exists and input group not exists + if userstatus == 0 and groupstatus != 0: + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50305"]) + + # user exists and group exists + if userstatus == 0 and groupstatus == 0: + # UID is 0 + if pwd.getpwnam(self.user).pw_uid == 0: + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50302"]) + + # user's group != input group + groupInfo = grp.getgrgid(pwd.getpwnam(self.user).pw_gid).gr_name + if self.group != groupInfo: + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50305"]) + + self.delTempFile(tempFile) + + return + + if checkOSUser: + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50305"]) + + # user does not exist and group does not exist + if userstatus != 0 and groupstatus != 0: + self.logger.debug( + "Creating OS user [%s:%s]." % (self.user, self.group)) + cmd = "groupadd %s && useradd -m -g %s %s" % ( + self.group, self.group, self.user) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50206"] % 'OS user' + + " Command: %s. Error: \n%s" % (cmd, output)) + + # user does not exist and group exists + if userstatus != 0 and groupstatus == 0: + self.logger.debug( + "Creating OS user [%s:%s]." % (self.user, self.group)) + cmd = "useradd -m -g %s %s" % (self.group, self.user) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50206"] % 'OS user' + + " Command: %s. Error: \n%s" % (cmd, output)) + if needChgOwner: + userProfile = "/home/%s/.bashrc" % self.user + if not os.path.exists(userProfile): + cmd = g_file.SHELL_CMD_DICT["copyFile"] % \ + ("/etc/skel/.bash*", "/home/%s/" % self.user) + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50214"] + % userProfile + " Error: " + output) + g_file.changeOwner(self.user, "/home/%s/" % self.user, True) + + self.logger.debug("Changing user password.") + try: + # check if the file is a link + g_OSlib.checkLink(tempFile) + with open(tempFile, "r") as fp: + password = fp.read() + self.delTempFile(tempFile) + except Exception: + self.delTempFile(tempFile) + self.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50311"] % "user") + + cmd = "echo '%s:%s' | chpasswd" % (self.user, password) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit( + ErrorCode.GAUSS_503["GAUSS_50311"] % self.user + + " Error: \n%s" % output) + + def createClusterPaths(self): + """ + function: + create all paths for cluster + install path + tmp path + data path + log path + precondition: + 1.self.clusterInfo has been initialized + postcondition: + 1.all path exist and have proper authority + input:NA + output:na + hiden info: + current info of each path + """ + self.logger.debug("Creating paths for cluster.") + if self.checkFinishFlag(): + needCheckEmpty = False + else: + needCheckEmpty = True + + self.initNodeInfo() + self.prepareGaussLogPath() + self.prepareInstallPath(needCheckEmpty) + self.prepareTmpPath(needCheckEmpty) + self.prepareDataPath(needCheckEmpty) + + self.logger.debug("Successfully created paths for cluster.") + + def prepareGsdbHomePath(self, needCheckEmpty): + """ + function: Prepare GsdbHome Path + input : NA + output: NA + """ + self.logger.debug("Creating gsdb_home path.") + gsdbHomePath = "/home/%s/gsdb_home/protect" % self.user + self.prepareGivenPath(gsdbHomePath, needCheckEmpty) + self.logger.debug("Successfully create gsdb_home path.") + + def prepareGaussLogPath(self): + """ + function: Prepare Gausslog Path + input : NA + output: NA + """ + self.logger.debug("Creating log path.") + gaussdb_dir = self.clusterInfo.logPath + + self.logger.debug("Checking %s directory [%s]." % ( + VersionInfo.PRODUCT_NAME, gaussdb_dir)) + if not os.path.exists(gaussdb_dir): + self.makeDirsInRetryMode(gaussdb_dir, + DefaultValue.KEY_DIRECTORY_MODE) + + try: + # change gaussdb dir mode + g_file.changeMode(DefaultValue.DIRECTORY_MODE, gaussdb_dir, False, + "shell") + g_file.changeOwner(self.user, gaussdb_dir, False, "shell") + except Exception as e: + raise Exception(str(e)) + + # make user log dir + user_dir = "%s/%s" % (self.clusterInfo.logPath, self.user) + self.prepareGivenPath(user_dir, False) + + # change the directory permission. Remove hidden folders + cmdDir = "find '%s' -type d ! -name '.*' -exec chmod '%s' {} \;" % ( + user_dir, DefaultValue.DIRECTORY_MODE) + (status, diroutput) = subprocess.getstatusoutput(cmdDir) + self.logger.debug( + "Command to chmod the directory in directory[%s] " % user_dir) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502[ + "GAUSS_50201"] % user_dir + + " Error:\n%s" % diroutput) + # change the file permission + ClusterCommand.getchangeFileModeCmd(user_dir) + + # change user log dir owner + try: + g_file.changeOwner(self.user, user_dir, True, "shell", + retryFlag=True, retryTime=15, waiteTime=1) + except Exception as e: + raise Exception(str(e)) + self.logger.debug("Successfully created log path.") + + def prepareTmpPath(self, needCheckEmpty): + """ + function: Prepare temporary path + input : needCheckEmpty + output: NA + """ + self.logger.debug("Creating temporary path.") + tmpDir = DefaultValue.getTmpDir(self.user, self.clusterConfig) + self.prepareGivenPath(tmpDir, needCheckEmpty) + self.logger.debug("Successfully created temporary path.") + + def prepareDataPath(self, needCheckEmpty): + """ + function: Prepare data path + input : needCheckEmpty + output: NA + """ + self.logger.debug("Creating data path.") + + self.logger.debug("Checking database node configuration.") + for dnInst in g_nodeInfo.datanodes: + self.prepareGivenPath(dnInst.datadir, needCheckEmpty) + if len(dnInst.ssdDir) != 0: + self.prepareGivenPath(dnInst.ssdDir, needCheckEmpty) + + self.logger.debug("Checking database node XLOG PATH configuration.") + for dnInst in g_nodeInfo.datanodes: + if dnInst.xlogdir != '': + self.prepareGivenPath(dnInst.xlogdir, needCheckEmpty) + + self.logger.debug("Successfully created data path.") + + def prepareInstallPath(self, needCheckEmpty): + """ + function: Prepare installation path + input : needCheckEmpty + output: NA + """ + self.logger.debug("Creating installation path.") + + installPath = self.clusterInfo.appPath + if os.path.exists(installPath) and not os.path.islink(installPath): + self.logger.logExit(ErrorCode.GAUSS_502[ + "GAUSS_50200"] % installPath + + " Please remove it." + " It should be a symbolic link to " + "$GAUSSHOME if it exists") + versionFile = VersionInfo.get_version_file() + commitid = VersionInfo.get_version_info(versionFile)[2] + installPath = installPath + "_" + commitid + if not needCheckEmpty: + # check the upgrade app directory, if we set up a new + # directory for upgrade, we must check empty + gaussHome = DefaultValue.getInstallDir(self.user) + if os.path.islink(gaussHome): + actualPath = os.path.realpath(gaussHome) + oldCommitId = actualPath[-8:] + if oldCommitId != commitid and os.path.isdir(installPath): + fileList = os.listdir(installPath) + # mat have upgrade some node, so the dir should + # have the binary info + # if use other version to preinstall when upgrade + # is not finished, then we need to preinstall + # current upgrade version + if "bin" in fileList and "etc" in fileList and \ + "include" in fileList: + pass + else: + needCheckEmpty = True + + self.logger.debug("Install path %s." % installPath) + self.prepareGivenPath(installPath, needCheckEmpty) + self.checkUpperPath(needCheckEmpty, installPath) + + self.logger.debug("Successfully created installation path.") + + def checkUpperPath(self, needCheckEmpty, installPath): + """ + if first prepare the path, we should have the permission to + write file with self.user, so we can create + symbolic link in install process + :param needCheckEmpty: get GAUSS_ENV is 2, we have successfully + install the cluster, so needCheckEmpty is False + :param installPath: is same with $GAUSSHOME + :return: NA + """ + if not needCheckEmpty: + return + upperDir = os.path.dirname(installPath) + cmd = "su - %s -c \"python3 -c \\\"import os;" \ + "print(os.access('%s',os.W_OK))\\\"\"" % ( + self.user, upperDir) + self.logger.debug( + "Command to check if we have write permission for upper path:" + " %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error: \n%s" % str( + output)) + if output == "True": + return + fileList = os.listdir(upperDir) + if installPath in fileList: + fileList.remove(installPath) + if len(fileList) != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50202"] % upperDir + + " Or user [%s] has write" + " permission to directory %s." + " Because it will create " + "symbolic link [%s] to install path [%s] " + "in gs_install process with this user." + % (self.user, upperDir, + self.clusterInfo.appPath, installPath)) + self.logger.log("The path [%s] is empty, change the owner to %s." % ( + upperDir, self.user)) + g_file.changeOwner(self.user, upperDir, False, "shell") + self.logger.log("Successfully change the owner.") + + def prepareUserCronService(self): + """ + function: + 1.set cron bin permission + 2.check and make sure user have pemission to use cron + 3.restart cron service + input : NA + output: NA + """ + self.logger.debug("Preparing user cron service.") + ##1.set crontab file permission + crontabFile = "/usr/bin/crontab" + if not os.path.exists(crontabFile): + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50201"] % crontabFile) + if not os.path.isfile(crontabFile): + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50210"] % crontabFile) + + # attention:crontab file permission should be 755 + g_file.changeOwner("root", crontabFile) + g_file.changeMode(DefaultValue.MAX_DIRECTORY_MODE, crontabFile) + cmd = "chmod u+s '%s'" % crontabFile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_501[ + "GAUSS_50107"] % crontabFile + + " Command:%s. Error:\n%s" % ( + cmd, output)) + + ##2.make sure user have permission to use cron + cron_allow_file = "/etc/cron.allow" + if not os.path.isfile(cron_allow_file): + g_file.createFile(cron_allow_file) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, cron_allow_file) + g_file.changeOwner("root", cron_allow_file) + + g_file.deleteLine(cron_allow_file, "^\\s*%s\\s*$" % self.user) + g_file.writeFile(cron_allow_file, [self.user]) + + ##3.restart cron service + self.logger.debug("Restarting CRON service.") + retryTimes = 0 + while True: + (status, output) = g_service.manageOSService("crond", "restart") + if status == 0: + break + if retryTimes > 1: + self.logger.logExit(ErrorCode.GAUSS_508[ + "GAUSS_50802"] + % "restart crond" + " Error:\n%s" % output) + else: + self.logger.debug( + "Failed to restart CRON service." + " Retrying.\nOutput: \n%s." % str( + output)) + retryTimes = retryTimes + 1 + time.sleep(1) + + self.logger.debug("Successfully prepared user CRON service.") + + def prepareUserSshdService(self): + """ + function: set MaxStartups to 1000. + input : NA + output: NA + """ + self.logger.debug("Preparing user SSHD service.") + sshd_config_file = "/etc/ssh/sshd_config" + paramName = "MaxStartups" + sshdNeedReload = False + + # 1.change the MaxStartups + cmd = "grep -E '^[ ]*MaxStartups[ ]*1000$' %s" % sshd_config_file + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + cmd = "sed -i '/^.*%s.*$/d' %s" % (paramName, sshd_config_file) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + " Error:\n%s" % output) + g_Platform.setKeyValueInSshd('MaxStartups', '1000') + self.logger.debug("Write MaxStartups value.") + sshdNeedReload = True + + # Check if is DWS mode. + OsVersionFlag = False + rdsFlag = False + chrootFlag = False + distname = g_Platform.dist()[0] + if distname in "euleros": + OsVersionFlag = True + if os.path.exists("/rds/"): + rdsFlag = True + if os.path.exists("/var/chroot/"): + chrootFlag = True + if OsVersionFlag and rdsFlag and chrootFlag: + # DWS mode has its own value of ClientAliveInterval, + # which is 43200. + self.logger.debug("In DWS mode, skip set ClientAliveInterval.") + self.logger.debug("Successfully prepared user SSHD service.") + return + + # 2.change the ClientAliveInterval + cmd = "grep -E '^[ ]*ClientAliveInterval[ ]*0$' %s" % sshd_config_file + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + cmd = "sed -i '/^.*ClientAliveInterval.*$/d' %s" % sshd_config_file + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + " Error:\n%s" % output) + g_Platform.setKeyValueInSshd('ClientAliveInterval', '0') + self.logger.debug("Write ClientAliveInterval value.") + sshdNeedReload = True + + # 3. add cluster owner to 'AllowUser' to /etc/ssh/sshd_config + # if necessary. + if self.addAllowUser(self.user) > 0: + sshdNeedReload = True + + if sshdNeedReload: + self.logger.debug("Reload sshd service.") + (status, output) = g_service.manageOSService("sshd", "reload") + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_508[ + "GAUSS_50802"] % "reload sshd" + + " Error:\n%s" % output) + + self.logger.debug("Successfully prepared user SSHD service.") + + def setFinishFlag(self): + """ + function: + set env show that do pre install succeed + precondition: + 1.user has been created + postcondition: + 1.the value of GAUSS_ENV is 1 + input:NA + output:user's env GAUSS_ENV + hiden: + the evn name and value to be set + ppp: + if user bashrc file does not exist + create it + clean GAUSS_ENV in user bashrc file + set GAUSS_ENV in user bashrc file + After set env, set daily alarm. + """ + # get and check the userProfile + userProfile = "" + if self.mpprcFile != "" and self.mpprcFile is not None: + userProfile = self.mpprcFile + else: + cmd = "su - %s -c \"echo ~\" 2>/dev/null" % self.user + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + " Error:\n%s" % output) + + # check if user profile exist + userProfile = "/home/%s/.bashrc" % self.user + if not os.path.exists(userProfile): + self.logger.logExit(ErrorCode.GAUSS_502[ + "GAUSS_50201"] % 'user profile' + + " Please create %s." % userProfile) + + # clean user's environmental variable + self.logger.debug("Deleting user's environmental variable.") + DefaultValue.cleanUserEnvVariable(userProfile, + cleanGS_CLUSTER_NAME=False) + self.logger.debug("Successfully delete user's environmental variable.") + + # user's environmental variable + self.logger.debug("Seting user's environmental variable.") + installPath = self.clusterInfo.appPath + tmpPath = self.clusterInfo.readClusterTmpMppdbPath(self.user, + self.clusterConfig) + logPath = "%s/%s" % ( + self.clusterInfo.readClusterLogPath(self.clusterConfig), self.user) + agentPath = self.clusterInfo.agentPath + agentLogPath = self.clusterInfo.agentLogPath + DefaultValue.setUserEnvVariable(userProfile, installPath, tmpPath, + logPath, agentPath, agentLogPath) + + if (os.path.exists('/var/chroot/') and os.path.exists( + '/rds/datastore/')): + clusterName = self.clusterInfo.name + DefaultValue.updateUserEnvVariable(userProfile, "GS_CLUSTER_NAME", + clusterName) + + self.logger.debug("Successfully set user's environmental variable.") + + # Set daily alarm. + self.logger.debug("Set daily alarm.") + # Check if is DWS mode. + OsVersionFlag = False + rdsFlag = False + chrootFlag = False + distname, version, idnum = g_Platform.dist() + # check if OS version is Euler + if distname in "euleros": + OsVersionFlag = True + if os.path.exists("/rds/"): + rdsFlag = True + if os.path.exists("/var/chroot/"): + chrootFlag = True + # Change the owner of Gausslog + self.logger.debug("Changing the owner of Gausslog.") + user_dir = "%s/%s" % (self.clusterInfo.logPath, self.user) + self.logger.debug("Changing the owner of GPHOME: %s." % user_dir) + g_file.changeOwner(self.user, user_dir, True, "shell", retryFlag=True, + retryTime=15, waiteTime=1) + omLogPath = os.path.dirname(self.logFile) + self.logger.debug( + "Changing the owner of preinstall log path: %s." % omLogPath) + if os.path.exists(omLogPath): + g_file.changeOwner(self.user, omLogPath, True, "shell", + retryFlag=True, retryTime=15, waiteTime=1) + self.logger.debug("Checking the permission of GPHOME: %s." % user_dir) + cmd = g_file.SHELL_CMD_DICT["checkUserPermission"] % ( + self.user, user_dir) + self.logger.debug("The command of check permission is: %s." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + " Error:\n%s" % output) + self.logger.debug("Successfully change the owner of Gausslog.") + + # get the value of GAUSS_ENV + self.logger.debug("Setting finish flag.") + cmd = "su - %s -c 'source %s;echo $GAUSS_ENV' 2>/dev/null" % ( + self.user, userProfile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + " Error:\n%s" % output) + ENVNUM = output.split("\n")[0] + # set finish flag + if str(ENVNUM) != "2": + DefaultValue.updateUserEnvVariable(userProfile, "GAUSS_ENV", "1") + + self.logger.debug("Successfully set finish flag.") + + def checkFinishFlag(self): + """ + function: + return True means have execed preinstall script + return False means have not execed preinstall script + input : NA + output: True/False + """ + if self.mpprcFile != "": + cmd = "su - root -c 'source %s;echo $GAUSS_ENV' 2>/dev/null" \ + % self.mpprcFile + else: + cmd = "su - %s -c 'source ~/.bashrc;echo $GAUSS_ENV' 2>/dev/null" \ + % self.user + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.debug( + "Failed to obtain the environment variable " + "$GAUSS_ENV. Error:\n%s" % output) + return False + + if output.strip() == str(1) or output.strip() == str(2): + self.logger.debug("Successfully checked GAUSS_ENV.") + return True + else: + self.logger.debug( + "The environmental variable [GAUSS_ENV]'s value " + "is invalid. The value is:%s" % ( + output.strip())) + return False + + def setUserProfile(self, userEnvConfig): + """ + function: + set env into user's .bashrc file + precondition: + 1.env list are valid + 2.user exist + input: + 1.env list + 2.use name + postcondition:na + output:na + hiden: + the file to be set into + """ + self.logger.debug("Setting user profile.") + if self.mpprcFile != "": + # have check its exists when check parameters, + # so it should exist here + userProfile = self.mpprcFile + else: + # check if user home exist + cmd = "su - %s -c \"echo ~\" 2>/dev/null" % self.user + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + " Error:\n%s" % output) + + # check if user profile exist + userProfile = "/home/%s/.bashrc" % self.user + if not os.path.exists(userProfile): + self.logger.debug( + "User profile does not exist. Please create %s." + % userProfile) + cmd = "su - %s -c 'touch %s && chmod %s %s'" % ( + self.user, userProfile, DefaultValue.DIRECTORY_MODE, + userProfile) + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502[ + "GAUSS_50206"] % userProfile + + " Error:\n%s" % output) + + # clean ENV in user bashrc file + self.logger.debug("User profile exist. Deleting crash old ENV.") + for env in userEnvConfig: + g_file.deleteLine(userProfile, "^\\s*export\\s*%s=.*$" % env) + self.logger.debug("Deleting %s in user profile" % env) + + # set ENV in user bashrc file + self.logger.debug( + "Successfully deleted crash old ENV. Setting new ENV.") + for env in userEnvConfig: + context = "export %s=%s" % (env, userEnvConfig[env]) + g_file.writeFile(userProfile, [context]) + + self.logger.debug("Successfully set user profile.") + + def getUserProfile(self): + """ + function: set env into /etc/profile + input : OSEnvConfig + output: NA + """ + if self.mpprcFile != "": + # have check its exists when check parameters, + # so it should exist here + userProfile = self.mpprcFile + else: + # check if os profile exist + userProfile = "/etc/profile" + if not os.path.exists(userProfile): + self.logger.debug( + "Profile does not exist. Please create %s." % userProfile) + g_file.createFile(userProfile) + g_file.changeMode(DefaultValue.DIRECTORY_MODE, userProfile) + return userProfile + + def setOSProfile(self, OSEnvConfig): + """ + function: set env into /etc/profile + input : OSEnvConfig + output: NA + """ + self.logger.debug("Setting OS profile.") + + userProfile = self.getUserProfile() + + # clean ENV in os profile + self.logger.debug("OS profile exists. Deleting crash old ENV.") + for env in OSEnvConfig: + g_file.deleteLine(userProfile, "^\\s*export\\s*%s=.*$" % env) + self.logger.debug("Deleting crash [%s] in OS profile." % env) + + # set ENV in os profile + self.logger.debug("Successfully deleted old ENV. Setting new env.") + for env in OSEnvConfig: + context = "export %s=%s" % (env, OSEnvConfig[env]) + g_file.writeFile(userProfile, [context]) + + self.logger.debug("Successfully set OS profile.") + + def setDBUerProfile(self): + """ + function: + set database user's env into user's .bashrc file. + env list are provided by user + input : NA + output: NA + """ + self.logger.debug( + "Setting %s user profile." % VersionInfo.PRODUCT_NAME) + # check if need to set env parameter + if len(self.envParams) == 0: + self.logger.debug("No need to set ENV.") + return + + # parse env user inputed + for param in self.envParams: + keyValue = param.split("=") + if len(keyValue) != 2: + self.logger.logExit(ErrorCode.GAUSS_500["GAUSS_50000"] % param) + envConfig[keyValue[0].strip()] = keyValue[1].strip() + + # set env into user's profile + self.setUserProfile(envConfig) + + self.logger.debug( + "Successfully set %s user profile." % VersionInfo.PRODUCT_NAME) + + def setToolEnv(self): + """ + function: set environment variables + input : NA + output: NA + """ + self.logger.debug("Setting tool ENV.") + + userProfile = self.getUserProfile() + + # clean ENV in os profile + self.logger.debug("OS profile exists. Deleting crash old tool ENV.") + # clean MPPRC FILE PATH + if self.mpprcFile != "": + g_file.deleteLine(userProfile, + "^\\s*export\\s*%s=.*$" + % DefaultValue.MPPRC_FILE_ENV) + self.logger.debug( + "Deleting crash MPPRC file path in" + " user environment variables.") + + # clean GPHOME + g_file.deleteLine(userProfile, "^\\s*export\\s*GPHOME=.*$") + self.logger.debug( + "Deleting crash GPHOME in user environment variables.") + + # clean LD_LIBRARY_PATH + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH=\\$GPHOME\\/script" + "\\/gspylib\\/clib:\\$LD_LIBRARY_PATH$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH=\\$GPHOME\\/lib:" + "\\$LD_LIBRARY_PATH$") + self.logger.debug( + "Deleting crash LD_LIBRARY_PATH in user environment variables.") + + # clean PATH + g_file.deleteLine(userProfile, + "^\\s*export\\s*PATH=\\$GPHOME\\/pssh-2.3.1\\/bin:" + "\\$GPHOME\\/script:\\$PATH$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*PATH=\\$GPHOME\\/script\\/gspylib\\" + "/pssh\\/bin:\\$GPHOME\\/script:\\$PATH$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*PATH=\\/root\\/gauss_om\\/%s\\" + "/script:\\$PATH$" % self.user) + self.logger.debug("Deleting crash PATH in user environment variables.") + + # clean PYTHONPATH + g_file.deleteLine(userProfile, + "^\\s*export\\s*PYTHONPATH=\\$GPHOME\\/lib") + self.logger.debug( + "Deleting crash PYTHONPATH in user environment variables.") + + # set ENV in os profile + self.logger.debug( + "Successfully deleted crash old tool ENV. Setting new tool ENV.") + # set env in user profile + try: + # check if the file is a link + g_OSlib.checkLink(userProfile) + # set mpprc file + if self.mpprcFile != "": + context = "export %s=%s" % ( + DefaultValue.MPPRC_FILE_ENV, self.mpprcFile) + g_file.writeFile(userProfile, [context]) + # set GPHOME + g_file.writeFile(userProfile, + ["export GPHOME=%s" % self.clusterToolPath]) + # set PATH + g_file.writeFile(userProfile, [ + "export PATH=$GPHOME/script/gspylib/pssh/bin:" + "$GPHOME/script:$PATH"]) + # set LD_LIBRARY_PATH + g_file.writeFile(userProfile, [ + "export LD_LIBRARY_PATH=" + "$GPHOME/script/gspylib/clib:$LD_LIBRARY_PATH"]) + g_file.writeFile(userProfile, [ + "export LD_LIBRARY_PATH=$GPHOME/lib:$LD_LIBRARY_PATH"]) + # set PYTHONPATH + g_file.writeFile(userProfile, ["export PYTHONPATH=$GPHOME/lib"]) + # set om root script path + om_root_path = "%s/%s/script" % (DefaultValue.ROOT_SCRIPTS_PATH, + self.user) + g_file.writeFile(userProfile, + ["export PATH=%s:$PATH" % om_root_path]) + + except Exception as e: + self.logger.logExit(str(e)) + self.logger.debug("Successfully set tool ENV.") + + def cleanWarningEnv(self): + """ + function: Deleting crash rsyslog or syslog-ng log ENV + input : NA + output: NA + """ + self.logger.debug("Deleting crash system log ENV.") + # judge the syslog type on the local host is rsyslog or syslog-ng + syslogType = self.judgeSyslogType() + if syslogType == SYSLOG_NG: + self.cleanWarningEnvForSyslogng() + elif syslogType == RSYSLOG: + self.cleanWarningEnvForRsyslog() + self.logger.debug("Successfully deleted crash system log ENV.") + + def cleanWarningEnvForSyslogng(self): + """ + function: Deleting crash syslog-ng ENV + input : NA + output: NA + """ + # clean client syslog-ng configure + cmd = "(if [ -s '%s' ]; then " % SYSLOG_NG_CONFIG_FILE + cmd += "sed -i -e '/^filter f_gaussdb.*$/d' %s " \ + % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^destination d_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^log { source(src); filter(f_gaussdb);" \ + " destination(d_gaussdb); };$/d' %s;fi;) " \ + % SYSLOG_NG_CONFIG_FILE + self.logger.debug("Command for deleting crash client system log: %s." + % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50207"] + % 'crash client system log' + + " Error: \n%s" % output) + + # clean server syslog-ng configure + cmd = "(if [ -s '%s' ]; then " % SYSLOG_NG_CONFIG_FILE + cmd += "sed -i -e '/^template t_gaussdb.*$/d' %s " \ + % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^source s_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^filter f_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^destination d_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^log { source(s_gaussdb); filter(f_gaussdb);" \ + " destination(d_gaussdb); };$/d' %s;fi; " \ + % SYSLOG_NG_CONFIG_FILE + cmd += "if [ -s '%s' ]; then " % SYSLOG_NG_CONFIG_FILE_SERVER + cmd += "sed -i -e '/^SYSLOGD_OPTIONS=\\\"-r -m 0\\\"/d' %s " \ + % SYSLOG_NG_CONFIG_FILE_SERVER + cmd += "-e '/^KLOGD_OPTIONS=\\\"-x\\\"/d' %s; " \ + % SYSLOG_NG_CONFIG_FILE_SERVER + cmd += "fi) " + self.logger.debug("Command for cleaning crash server system log: %s." + % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50207"] + % 'crash server system log' + + " Error: \n%s" % output) + + # restart the syslog service + self.logger.debug("Restart syslog service.") + (status, output) = g_service.manageOSService("syslog", "restart") + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_508["GAUSS_50802"] + % "restart syslog" + " Error: \n%s" % output) + + def cleanWarningEnvForRsyslog(self): + """ + function: Deleting crash rsyslog ENV + input : NA + output: NA + """ + # clean rsyslog config on client and server + cmd = "(if [ -s %s ]; then " % RSYSLOG_CONFIG_FILE + cmd += "sed -i -e '/^$ModLoad imjournal.*$/d' %s " \ + % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$ModLoad imudp.*$/d' %s " % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$UDPServerRun 514.*$/d' %s " % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$imjournalRatelimitInterval.*$/d' %s " \ + % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$imjournalRatelimitBurst.*$/d' %s " \ + % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$SystemLogRateLimitInterval.*$/d' %s " \ + % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$SystemLogRateLimitBurst.*$/d' %s " \ + % RSYSLOG_CONFIG_FILE + cmd += "-e '/^%s.*$/d' %s; " % (AP_RSYSLOG_FACILITY_LEVEL, + RSYSLOG_CONFIG_FILE) + cmd += "fi) " + self.logger.debug("Command for cleaning crash rsyslog: %s." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50207"] + % 'crash rsyslog' + " Error: \n%s" % output) + + # restart the rsyslog service + self.logger.debug("Restart rsyslog service.") + (status, output) = g_service.manageOSService("rsyslog", "restart") + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_508["GAUSS_50802"] + % "restart rsyslog" + " Error: \n%s" % output) + + def setClientWarningEnv(self): + """ + function: Setting client warning ENV for rsyslog or syslog-ng + input : NA + output: NA + """ + self.logger.debug("Setting client warning ENV.") + # judge the syslog type on the local host is rsyslog or syslog-ng + syslogType = self.judgeSyslogType() + if syslogType == SYSLOG_NG: + self.setClientWarningEnvForSyslogng() + elif syslogType == RSYSLOG: + self.setJournalRateLimiting() + self.setClientWarningEnvForRsyslog() + self.logger.debug("Successfully set client warning ENV.") + + def setJournalRateLimiting(self): + """ + function: Setting Systemd Journal Rate Limiting + input : NA + output: NA + """ + # set SYSTEMD_JOURNALD_CONF configure + if os.path.isfile(SYSTEMD_JOURNALD_CONF): + self.logger.debug("Setting Systemd Journal Rate Limiting.") + # clean old RateLimitInterval and RateLimitBurst + g_file.deleteLine(SYSTEMD_JOURNALD_CONF, + "^\\s*RateLimitInterval\\s*=.*") + g_file.deleteLine(SYSTEMD_JOURNALD_CONF, + "^\\s*RateLimitBurst\\s*=.*") + # set RateLimitInterval and RateLimitBurst + g_file.writeFile(SYSTEMD_JOURNALD_CONF, ["RateLimitInterval=0"]) + g_file.writeFile(SYSTEMD_JOURNALD_CONF, ["RateLimitBurst=0"]) + # restart systemd-journald, make it working + self.logger.debug("Restart systemd-journald service.") + (status, output) = g_service.manageOSService("systemd-journald", + "restart") + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_508["GAUSS_50802"] + % "restart systemd-journald" + + " Error: \n%s" % output) + + def setClientWarningEnvForSyslogng(self): + """ + function: Setting client warning ENV for syslog-ng + input : NA + output: NA + """ + # set client syslog-ng configure + client_filter = "filter f_gaussdb { level(err, crit)" \ + " and match('MPPDB'); };" + client_destination = "destination d_gaussdb" \ + " { udp(\"%s\" port(514) ); };" % self.warningIp + client_log = "log { source(src); filter(f_gaussdb);" \ + " destination(d_gaussdb); };" + + if (os.path.exists(SYSLOG_NG_CONFIG_FILE) and + os.path.getsize(SYSLOG_NG_CONFIG_FILE) > 0): + cmdFileter = "'%s'" % client_filter + self.logger.debug("Setting syslog-ng client configuration: %s" + + client_filter) + g_file.echoLineToFile(cmdFileter, SYSLOG_NG_CONFIG_FILE) + cmdDestination = "'%s'" % client_destination + self.logger.debug("Setting syslog-ng client configuration: %s" + + client_destination) + g_file.echoLineToFile(cmdDestination, SYSLOG_NG_CONFIG_FILE) + cmdLog = "'%s'" % client_log + self.logger.debug("Setting syslog-ng client configuration: %s" + + client_log) + g_file.echoLineToFile(cmdLog, SYSLOG_NG_CONFIG_FILE) + + self.logger.debug("Restart client syslog service.") + (status, output) = g_service.manageOSService("syslog", "restart") + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_508["GAUSS_50802"] + % "restart syslog" + " Error: \n%s" % output) + + def setClientWarningEnvForRsyslog(self): + """ + function: Setting client warning ENV for rsyslog + input : NA + output: NA + """ + # set client rsyslog configure + client_journal = "$ModLoad imjournal" + client_udp = "$ModLoad imudp" + client_port = "$UDPServerRun 514" + # to prevent from losing log when there are mass logs, + # set the $imjournalRatelimitInterval and $imjournalRatelimitBurst + client_imjournal_ratelimit_interval = "$imjournalRatelimitInterval " \ + "%d" \ + % IMJOURNAL_RATELIMIT_INTERVAL + client_imjournal_ratelimit_burst = "$imjournalRatelimitBurst %d" \ + % IMJOURNAL_RATELIMIT_BURST + client_systemlog_ratelimit_interval = "$SystemLogRateLimitInterval " \ + "%d" \ + % SYSTEMLOG_RATELIMIT_INTERVAL + client_systemlog_ratelimit_burst = "$SystemLogRateLimitBurst %d" \ + % SYSTEMLOG_RATELIMIT_BURST + client_filter_destination = "%s @%s:514" % \ + (AP_RSYSLOG_FACILITY_LEVEL, + self.warningIp) + + if (os.path.exists(RSYSLOG_CONFIG_FILE) and + os.path.getsize(RSYSLOG_CONFIG_FILE) > 0): + self.logger.debug("Setting rsyslog client configuration.") + cmdJournalUdp = "'%s'" % client_journal + g_file.echoLineToFile(cmdJournalUdp, RSYSLOG_CONFIG_FILE) + cmdCientUdp = "'%s'" % client_udp + g_file.echoLineToFile(cmdCientUdp, RSYSLOG_CONFIG_FILE) + cmdCientPort = "'%s'" % client_port + g_file.echoLineToFile(cmdCientPort, RSYSLOG_CONFIG_FILE) + cmdCientInterval = "'%s'" % client_imjournal_ratelimit_interval + g_file.echoLineToFile(cmdCientInterval, RSYSLOG_CONFIG_FILE) + cmdCientBurst = "'%s'" % client_imjournal_ratelimit_burst + g_file.echoLineToFile(cmdCientBurst, RSYSLOG_CONFIG_FILE) + cmdCientSyslogInterval = "'%s'" \ + % client_systemlog_ratelimit_interval + g_file.echoLineToFile(cmdCientSyslogInterval, RSYSLOG_CONFIG_FILE) + cmdCientSyslogBurst = "'%s'" % client_systemlog_ratelimit_burst + g_file.echoLineToFile(cmdCientSyslogBurst, RSYSLOG_CONFIG_FILE) + cmdCientFilterDest = "'%s'" % client_filter_destination + g_file.echoLineToFile(cmdCientFilterDest, RSYSLOG_CONFIG_FILE) + + # restart the rsyslog service + self.logger.debug("Restart rsyslog service.") + (status, output) = g_service.manageOSService("rsyslog", "restart") + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_508["GAUSS_50802"] + % "restart rsyslog" + " Error: \n%s" % output) + + def setServerWarningEnv(self): + """ + function: Setting server warning ENV for rsyslog or syslog-ng + input : NA + output: NA + """ + self.logger.debug("Setting server warning ENV.") + # judge the syslog type on the local host is rsyslog or syslog-ng + syslogType = self.judgeSyslogType() + if syslogType == SYSLOG_NG: + self.setServerWarningEnvForSyslogng() + elif syslogType == RSYSLOG: + self.setServerWarningEnvForRsyslog() + self.logger.debug("Successfully set server warning ENV.") + + def setServerWarningEnvForSyslogng(self): + """ + function: Setting server warning ENV for syslog-ng + input : NA + output: NA + """ + # set server syslog-ng configure + server_template = "template t_gaussdb" \ + " {template(\"$DATE $SOURCEIP $MSGONLY\\n\");" \ + " template_escape(no); };" + server_source = "source s_gaussdb{ udp(); };" + server_filter = "filter f_gaussdb { level(err, crit) and" \ + " match('MPPDB'); };" + server_destination = "destination d_gaussdb" \ + " { file(\"%s\", template(t_gaussdb)); };" \ + % AP_SERVER_SYSLOG_FILE + server_log = "log { source(s_gaussdb); filter(f_gaussdb);" \ + " destination(d_gaussdb); };" + + if (os.path.exists(SYSLOG_NG_CONFIG_FILE) and + os.path.getsize(SYSLOG_NG_CONFIG_FILE) > 0): + cmdTemplate = "'%s'" % server_template + self.logger.debug("Setting syslog-ng server configuration: %s" + + server_template) + g_file.echoLineToFile(cmdTemplate, SYSLOG_NG_CONFIG_FILE) + cmdSource = "'%s'" % server_source + self.logger.debug("Setting syslog-ng server configuration: %s" + + server_source) + g_file.echoLineToFile(cmdSource, SYSLOG_NG_CONFIG_FILE) + cmdFilter = "'%s'" % server_filter + self.logger.debug("Setting syslog-ng server configuration: %s" + + server_filter) + g_file.echoLineToFile(cmdFilter, SYSLOG_NG_CONFIG_FILE) + cmdDestination = "'%s'" % server_destination + self.logger.debug("Setting syslog-ng server configuration: %s" + + server_destination) + g_file.echoLineToFile(cmdDestination, SYSLOG_NG_CONFIG_FILE) + cmdLog = "'%s'" % server_log + self.logger.debug("Setting syslog-ng server configuration: %s" + + server_log) + g_file.echoLineToFile(cmdLog, SYSLOG_NG_CONFIG_FILE) + + # set server sysconfig configure + server_sysconfig_syslogd = "SYSLOGD_OPTIONS=\"-r -m 0\"" + server_sysconfig_klogd = "KLOGD_OPTIONS=\"-x\"" + + if (os.path.exists(SYSLOG_NG_CONFIG_FILE_SERVER) and + os.path.getsize(SYSLOG_NG_CONFIG_FILE_SERVER) > 0): + cmdConfigLog = "'%s'" % server_sysconfig_syslogd + self.logger.debug("Setting sys-config server configuration: %s" + + server_sysconfig_syslogd) + g_file.echoLineToFile(cmdConfigLog, SYSLOG_NG_CONFIG_FILE_SERVER) + cmdConfigKLog = "'%s'" % server_sysconfig_klogd + self.logger.debug("Setting sys-config server configuration: %s" + + server_sysconfig_klogd) + g_file.echoLineToFile(cmdConfigKLog, SYSLOG_NG_CONFIG_FILE_SERVER) + + self.logger.debug("Restart server syslog service.") + (status, output) = g_service.manageOSService("syslog", "restart") + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_508["GAUSS_50802"] + % "restart syslog" + " Error: \n%s" % output) + + def setServerWarningEnvForRsyslog(self): + """ + function: Setting server warning ENV for rsyslog + input : NA + output: NA + """ + # set server rsyslog configure + server_filter_destination = "%s %s" % (AP_RSYSLOG_FACILITY_LEVEL, + AP_SERVER_SYSLOG_FILE) + + if (os.path.exists(RSYSLOG_CONFIG_FILE) and + os.path.getsize(RSYSLOG_CONFIG_FILE) > 0): + # clean RSYSLOG_FACILITY_LEVEL + cmd = "sed -i -e '/^%s.*$/d' %s" % (AP_RSYSLOG_FACILITY_LEVEL, + RSYSLOG_CONFIG_FILE) + self.logger.debug("Command for cleaning crash rsyslog: %s." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50207"] % + 'crash rsyslog' + " Error: \n%s" % output) + + self.logger.debug("Setting rsyslog server configuration.") + cmdFilterDest = "'%s'" % server_filter_destination + g_file.echoLineToFile(cmdFilterDest, RSYSLOG_CONFIG_FILE) + + self.logger.debug("Restart server syslog service.") + (status, output) = g_service.manageOSService("syslog", "restart") + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_508["GAUSS_50802"] + % "restart syslog" + " Error: \n%s" % output) + + def judgeSyslogType(self): + """ + function: judge syslog type + input : NA + output: NA + """ + self.logger.debug("judging the syslog type is rsyslog or syslog-ng.") + if os.path.isfile(RSYSLOG_CONFIG_FILE): + return RSYSLOG + elif os.path.isfile(SYSLOG_NG_CONFIG_FILE): + return SYSLOG_NG + else: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50219"] + + " \nError: Failed to judge the syslog type.") + + def setLibrary(self): + """ + function: Setting Library + input : NA + output: NA + """ + self.logger.debug("Setting Library.") + config_file_dir = "/etc/ld.so.conf" + alreadySet = False + # check if the file is a link + g_OSlib.checkLink(config_file_dir) + if os.path.isfile(config_file_dir): + with open(config_file_dir, "r") as fp: + libs = fp.read() + for lib in libs.split("\n"): + if lib.strip() == "/usr/local/lib": + alreadySet = True + if alreadySet: + pass + else: + cmd = "echo '/usr/local/lib' >> '/etc/ld.so.conf' && ldconfig" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error: \n%s" % output) + self.logger.debug("Successfully set Library.") + + def checkPlatformArm(self): + """ + function: Setting ARM Optimization + input : NA + output: NA + """ + self.logger.debug("Check if platform is ARM.") + try: + global ARM_PLATE + cmd = "python3 -c 'import platform;print(platform.machine())'" + self.logger.debug("Command for getting querying platform: %s" + % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + if str(output) == "aarch64": + ARM_PLATE = True + except Exception as e: + self.logger.logExit(str(e)) + self.logger.debug("Successfully check platform ARM.") + + def setArmOptimization(self): + """ + function: Setting ARM Optimization + input : NA + output: NA + """ + self.logger.debug("Set ARM Optimization.") + try: + initFile = DefaultValue.getOSInitFile() + clusterToolPath = self.clusterToolPath + # set_arm_optimization + init_cmd = "sed -i \"/(if test -f \'.*setArmOptimization.sh\';" \ + " then export LC_ALL=C;" \ + " sh .*setArmOptimization.sh;fi)/d\" %s && " \ + % initFile + init_cmd += "echo " \ + "\"(if test -f \'%s/sudo/setArmOptimization.sh\';" \ + " then export LC_ALL=C;" \ + "sh %s/sudo/setArmOptimization.sh;fi)\" >> %s" \ + % (clusterToolPath, clusterToolPath, initFile) + (status, output) = subprocess.getstatusoutput(init_cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] + % init_cmd + " Error: \n%s" % output) + cmd = "if test -f \'%s/sudo/setArmOptimization.sh\'; then export" \ + " LC_ALL=C;sh %s/sudo/setArmOptimization.sh;fi" \ + % (clusterToolPath, clusterToolPath) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + except Exception as e: + self.logger.logExit(str(e)) + self.logger.debug("Successfully set ARM Optimization.") + + def setSctp(self): + """ + function: Setting SCTP + input : NA + output: NA + """ + self.logger.debug("Setting SCTP.") + try: + + key = "install ipv6 \/bin\/true" + confFile = "/etc/modprobe.d/*ipv6.conf" + + initFile = DefaultValue.getOSInitFile() + cmd = "ls %s" % confFile + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + cmd = "sed -i 's/^.*\(%s.*\)/#\\1/g' %s" % (key, confFile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50223"] + % confFile + " Error: \n%s" % output) + cmd = "modprobe ipv6" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + cmd = "modprobe sctp" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + cmd = "uname -r" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + # Since redhat7.4 kernel module files ending in .xz + stcpFile = "/lib/modules/%s/kernel/net/sctp/sctp.ko" \ + % output.strip() + stcpFileXz = "/lib/modules/%s/kernel/net/sctp/sctp.ko.xz" \ + % output.strip() + if (not os.path.exists(stcpFile)) and \ + (not os.path.exists(stcpFileXz)): + output = stcpFile + " and " + stcpFileXz + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] + % output) + + cmd_insmod = "insmod %s >/dev/null 2>&1" % stcpFileXz + (status, output) = subprocess.getstatusoutput(cmd_insmod) + + cmd_insmod = "insmod %s >/dev/null 2>&1" % stcpFile + (status, output) = subprocess.getstatusoutput(cmd_insmod) + + cmd = "lsmod | grep 'sctp ' | wc -l" + (status, output) = subprocess.getstatusoutput(cmd) + if not str(output.strip()).isdigit() or int(output.strip()) == 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + init_cmd = "sed -i '/^modprobe sctp$/d' %s &&" % initFile + init_cmd += "echo \"modprobe sctp\" >> %s &&" % initFile + init_cmd += "sed -i '/^insmod.*sctp.ko/d' %s &&" % initFile + init_cmd += "echo \"%s\" >> %s" % (cmd_insmod, initFile) + (status, output) = subprocess.getstatusoutput(init_cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] + % init_cmd + " Error: \n%s" % output) + + cmd = "sed -i \"/^sysctl -p/d\" %s &&" % initFile + cmd += "echo \"sysctl -p\" >> %s" % initFile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + except Exception as e: + self.logger.logExit(str(e)) + + self.logger.debug("Successfully set Sctp.") + + def checkVirtualIp(self): + """ + function: Checking virtual IP + input : NA + output: NA + """ + self.logger.debug("Checking virtual IP...") + try: + global configuredIps + configuredIps = DefaultValue.checkIsPing(g_nodeInfo.virtualIp) + + # check the self.hostnameList values are whether or not local IPs + # obtain the all local IPs + localAddrs = DefaultValue.getIpAddressList() + for ip in g_nodeInfo.virtualIp: + if (ip not in configuredIps) and (ip not in localAddrs): + self.logger.logExit(ErrorCode.GAUSS_512["GAUSS_51224"] + % ip) + except Exception as e: + self.logger.logExit(str(e)) + self.logger.debug("Successfully check virtual IP.") + + # IP do operation with netmask + def netNum(self, ip, mask): + """ + function: net number + input : ip,mask + output: netAddress + """ + ipArr = ip.split(".") + maskArr = mask.split(".") + binaryIpArr = [] + binaryMaskArr = [] + for element in ipArr: + biElement = bin(int(element)).split("b")[1] + binaryIpArr.append("0" * (8 - len(biElement)) + biElement) + for element in maskArr: + biElement = bin(int(element)).split("b")[1] + binaryMaskArr.append("0" * (8 - len(biElement)) + biElement) + binaryIp = ".".join(binaryIpArr) + binaryMask = ".".join(binaryMaskArr) + netAddress = "" + for i in range(len(binaryMask)): + if binaryMask[i] == ".": + netAddress += "." + elif binaryIp[i] == "0" or binaryMask[i] == "0": + netAddress += "0" + else: + netAddress += "1" + return netAddress + + def setVirtualIp(self): + """ + function: creating Virtual Ip + input : NA + output: NA + """ + # The node instance initialization information + self.initNodeInfo() + # Add temporary files, save the virtual IP The actual + # configuration for the failure rollback + if os.path.exists(self.tmpFile): + g_file.removeFile(self.tmpFile) + tmpFileFp = None + # If this node is not configured virtual IP, exit + if g_nodeInfo.virtualIp == []: + return + # Check whether have configured the virtual ip + self.checkVirtualIp() + # If the current node virtual iP are configured, Exit + if configuredIps == []: + self.logger.debug("All virtual IP are configured.") + return + self.logger.debug("Start setting virtual IP...") + try: + # check if the file is a link + g_OSlib.checkLink(self.tmpFile) + tmpFileFp = open(self.tmpFile, "w+") + # Obtain network interface card of backIp, + # get this virtual IP network adapter card through it. + backIpNIC = DefaultValue.getNICNum(g_nodeInfo.backIps[0]) + + # Get this node netcard identifier already existing netcard + cmd = "/sbin/ifconfig -a | grep '%s' | awk '{print $1}'" \ + % backIpNIC + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50604"] + % g_nodeInfo.backIps[0] + + " Error: \n%s" % output) + # Gets the currently available virtual NIC + nicList = output.split('\n') + flagValues = [] + for nic in nicList: + if nic.find(':') >= 0: + flagList = nic.split(':') + flag = flagList[1].strip() + if flag.isdigit(): + flagValues.append(int(flag)) + vipNo = 0 + if flagValues != []: + flagValues.sort() + vipNo = flagValues[-1] + 1 + # Determine whether the same IP network segment. + subnetMasks = [] + for backIp in g_nodeInfo.backIps: + # Get backIP subnet mask + subnetMask = "" + allNetworkInfo = g_network.getAllNetworkInfo() + for network in allNetworkInfo: + if backIp == network.ipAddress: + subnetMask = network.networkMask + # Check whether the same subnet mask backIP + if not len(subnetMasks): + subnetMasks.append(subnetMask) + else: + if subnetMask != subnetMasks[0]: + raise Exception(ErrorCode.GAUSS_506["GAUSS_50606"]) + # start setting virtual IP + backIp = g_nodeInfo.backIps[0] + # get network startup file + # On SuSE12.X there is no /etc/init.d/network. so skip it + distname, version = g_Platform.dist()[0:2] + if not (distname == "SuSE" and version == "12"): + network_startupFile = "/etc/init.d/network" + if not os.path.exists(network_startupFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + ("startup file of the node network [%s]" + % network_startupFile)) + if not os.path.isfile(network_startupFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % + ("startup file of the node network [%s]" + % network_startupFile)) + + # Get OS startup file + OS_initFile = DefaultValue.getOSInitFile() + if OS_initFile == "": + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] + % "startup file of current OS" + + " The startup file for SUSE OS is " + "/etc/init.d/boot.local.The startup file" + " for Redhat OS is /etc/rc.d/rc.local.") + for configuredIp in configuredIps: + # Check with backup virtual IP on the same network segment + if self.netNum(backIp, subnetMasks[0]) != self.netNum( + configuredIp, subnetMasks[0]): + raise Exception(ErrorCode.GAUSS_512["GAUSS_51226"] + + "Invalid Virtual IP: %s. The Back IP:" + " %s. The subnetMasks: %s" + % (configuredIp, backIp, subnetMasks[0])) + # Configuring Virtual Ip + cmd_LABEL = "ifconfig -a | grep '%s:%d'" % (backIpNIC, vipNo) + (status, output) = subprocess.getstatusoutput(cmd_LABEL) + if status == 0 or self.IsLABELconfigured(backIpNIC, vipNo): + vipNo += 1 + cmd = "/sbin/ifconfig %s:%d %s netmask %s up" % (backIpNIC, + vipNo, + configuredIp, + subnetMask) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + # Virtual IP configuration write OS startup file + lineInfo = '^\\/sbin\\/ifconfig .* %s netmask .* up$' \ + % configuredIp + g_file.deleteLine(OS_initFile, lineInfo) + if distname == "SuSE": + if version == "11": + # get configure virtual IP line number position + # of network startup file for suse OS + cmd = "grep -rn '[ ]*$FAKE ifup-route noiface -o" \ + " rc $MODE' /etc/init.d/network" + (status, output) = subprocess.getstatusoutput(cmd) + outputlist = output.split("\n") + if status != 0 or len(outputlist) != 1: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error: \n%s" % output) + LineNumber = int(outputlist[0].split(":")[0]) + lineInfo = '[ ]*\\/sbin\\/ifconfig .* %s netmask' \ + ' .* up$' % configuredIp + g_file.deleteLine(network_startupFile, lineInfo) + cmd = "sed -i \"%di\ %s\" %s \ + " % (LineNumber + 1, cmd, network_startupFile) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50223"] + % OS_initFile + " Error: \n%s" + % output) + # if the Ip has been configured, do nothing, + # else if the ip not configured but the LABEL + # has been configured LABEL number +1,else do as privous + if (self.IsIPconfigured(backIpNIC, + configuredIp, vipNo) == 0): + cmd = "" + elif (self.IsIPconfigured(backIpNIC, + configuredIp, vipNo) == 1): + vipNo += 1 + cmd = "sed -i '$a\\\nIPADDR_%d=%s\\nNETMASK_%d=%s\\" \ + "nLABEL_%d=%d' /etc/sysconfig/network/ifcfg-%s \ + " % (vipNo, configuredIp, vipNo, subnetMask, + vipNo, vipNo, backIpNIC) + else: + cmd = "sed -i '$a\\\nIPADDR_%d=%s\\nNETMASK_%d=%s\\" \ + "nLABEL_%d=%d' /etc/sysconfig/network/ifcfg-%s \ + " % (vipNo, configuredIp, vipNo, subnetMask, + vipNo, vipNo, backIpNIC) + else: + vip_nic = "%s:%d" % (backIpNIC, vipNo) + nicFile = "/etc/sysconfig/network-scripts/ifcfg-%s" \ + % vip_nic + networkConfiguredFile = \ + DefaultValue.getNetworkConfiguredFile(configuredIp) + if networkConfiguredFile == "": + networkConfiguredFile = nicFile + cmd = "rm -rf '%s' && touch '%s' && chmod %s '%s' \ + && echo -e 'DEVICE=%s\nIPADDR=%s\nNETMASK=%s' >> %s\ + " % (networkConfiguredFile, nicFile, + DefaultValue.FILE_MODE, nicFile, vip_nic, + configuredIp, subnetMask, nicFile) + if cmd != "" and cmd is not None: + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50223"] + % OS_initFile + " Error: \n%s" + % output) + print("%s" % configuredIp, file=tmpFileFp) + vipNo += 1 + tmpFileFp.flush() + tmpFileFp.close() + g_file.changeMode(DefaultValue.KEY_FILE_MODE, self.tmpFile) + except Exception as e: + if tmpFileFp: + tmpFileFp.close() + if os.path.exists(self.tmpFile): + os.remove(self.tmpFile) + self.logger.logExit(str(e)) + self.logger.debug("Successfully set virtual IP.") + + def IsIPconfigured(self, backIpNIC, configuredIp, i): + """ + function: check has the ip or LABEL been configured , + if the ip has been configured return 0 , + else if the LABEL has been configured + return 1, else return 2 + input :backIpNIC, configuredIp, LABEL number + output: 0, 1, 2 + """ + networkfile = '/etc/sysconfig/network/ifcfg-' + backIpNIC + LABEL = "LABEL_" + str(i) + "=" + str(i) + # check if the file is a link + g_OSlib.checkLink(networkfile) + with open(networkfile, "r") as fp: + for line in fp: + if line.split("=")[1].strip() == configuredIp: + return 0 + elif LABEL in line: + return 1 + return 2 + + def IsLABELconfigured(self, backIpNIC, i): + """ + function: check does the label exists already in network file, + if yes, return True, if no, return False + input : backIpNIC ,LABEL number + output: bool + """ + networkfile = '/etc/sysconfig/network/ifcfg-' + backIpNIC + cmd = "cat '%s' | grep LABEL_%d=%d" % (networkfile, i, i) + status = subprocess.getstatusoutput(cmd)[0] + if status == 0: + return True + else: + return False + + def checkRemoveIpc(self): + """ + function: Checking RemoveIpc + input : NA + output: NA + """ + self.logger.debug("Checking RemoveIpc.") + ipcPath = "/etc/systemd/logind.conf" + if not os.path.exists(ipcPath): + return + distname, version = g_Platform.dist()[0:2] + ipcList = g_file.readFile(ipcPath) + ipcFlag = False + noFlag = False + for line in ipcList: + if "RemoveIPC" in line: + ipcFlag = True + self.logger.debug("Find the removeIPC in file" + " /etc/systemd/logind.conf," + " the content is: %s." % line) + if "no" in line.lower() and not line.startswith("#"): + self.logger.debug("The value of removeIPC is no.") + noFlag = True + for line in ipcList: + if "RemoveIPC" in line: + if "yes" in line.lower() and noFlag: + if not line.startswith("#"): + self.logger.debug("The value of removeIPC is yes.") + self.logger.logExit(ErrorCode.GAUSS_523["GAUSS_52301"] + + " The value of removeIPC" + " must be no.") + if "yes" in line.lower() and not noFlag: + if not line.startswith("#"): + self.logger.debug("The value of removeIPC is yes.") + self.logger.logExit(ErrorCode.GAUSS_523["GAUSS_52301"] + + " The value of removeIPC" + " must be no.") + # In Redhat/Centos 7.2, RemoveIPC default value is yes. + elif (distname in ("redhat", "centos") and + version in "7.2"): + self.logger.debug("The value of removeIPC is yes.") + self.logger.logExit(ErrorCode.GAUSS_523["GAUSS_52301"] + + " The value of removeIPC must" + " be no in Redhat/Centos 7.2.") + if not ipcFlag: + # In Redhat/Centos 7.2, RemoveIPC default value is yes. + if distname in ("redhat", "centos") and version in "7.2": + self.logger.debug("The value of removeIPC is yes.") + self.logger.logExit(ErrorCode.GAUSS_523["GAUSS_52301"] + + " The value of removeIPC can not be" + " empty in Redhat/Centos 7.2," + " it must be no.") + else: + self.logger.debug("Do not find RemoveIPC.") + self.logger.debug("Successfully check RemoveIpc.") + + def checkAbrt(self): + """ + function: Checking abrt, make sure abrt-hook-ccpp does not work. + input : NA + output: NA + """ + self.logger.debug("Checking core_pattern.") + sysFile = "/etc/sysctl.conf" + coreFile = "/proc/sys/kernel/core_pattern" + coreFlag = False + + coreList = g_file.readFile(sysFile) + for line in coreList: + if "kernel.core_pattern" in line and not line.startswith("#"): + coreFlag = True + self.logger.debug("Find the kernel.core_pattern in file" + " /etc/sysctl.conf, the content is: %s." + % line) + if "|" in line and "abrt-hook-ccpp" in line: + self.logger.logExit(ErrorCode.GAUSS_523["GAUSS_52301"] + + " The value of kernel.core_pattern " + "can not combine with " + "abrt-hook-ccpp in sysctl file.") + + if not coreFlag: + coreList = g_file.readFile(coreFile) + for line in coreList: + if ("|" in line and "abrt-hook-ccpp" in line and + not line.startswith("#")): + self.logger.debug("Find the abrt-hook-ccpp in file " + "/proc/sys/kernel/core_pattern," + " the content is: %s." % line) + self.logger.logExit(ErrorCode.GAUSS_523["GAUSS_52301"] + + " Core_pattern file can not use " + "abrt-hook-ccpp to dump core.") + self.logger.debug("Successfully check core_pattern.") + + def checkDiskSpace(self): + """ + function: check the disk if it has enough space to decompress + input : NA + output: NA + """ + self.logger.debug("Checking available space.") + versionFile = VersionInfo.get_version_file() + version, number, commitid = VersionInfo.get_version_info(versionFile) + actualPath = self.clusterAppPath + "_" + commitid + if not os.path.exists(actualPath): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % actualPath) + cmd = "df -h %s | awk '{print $4}' | xargs" % actualPath + self.logger.debug("Command to check available disk space is:\n%s" + % cmd) + # output is this format: "Avail 104G" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error: output is '%s'." % output) + self.logger.debug( + "The result of checking available disk space is:\n%s" % output) + size = output.split(" ")[1][:-1] + if not size.isdigit(): + self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51633"] + % "available disk space") + if int(size) < DefaultValue.GREY_DISK_SIZE: + raise Exception(ErrorCode.GAUSS_504["GAUSS_50411"] + % (DefaultValue.GREY_DISK_SIZE + 'G')) + self.logger.debug("Successfully check host available space.") + + def initGaussLog(self): + """ + function: creating GaussLog path. Before we modify the owner + of the path, we must create the path + input : NA + output: NA + """ + sys.exit(0) + + def getWhiteList(self, confFile): + """ + function: get whiteList + input : NA + output : NA + """ + print(confFile) + fp = configparser.RawConfigParser() + fp.read(confFile) + + optionList = fp.options("WHITELIST") + white_dict = {} + for key in optionList: + value = fp.get("WHITELIST", key) + ip_list = value.strip("\"").split(',') + white_dict[key] = ip_list + self.white_list = white_dict + + def setDefaultIptables(self): + """ + function: set default ip tables + input : NA + output : NA + """ + cmd = "iptables -A INPUT -s 0.0.0.0/0 -d 0.0.0.0/0 -p tcp" \ + " --dport 49537 -j ACCEPT && " + cmd += "iptables -A INPUT -s 0.0.0.0/0 -d 0.0.0.0/0 -p udp" \ + " --dport 60129 -j ACCEPT && " + cmd += "iptables -A INPUT -s 0.0.0.0/0 -d 0.0.0.0/0 -p tcp" \ + " --dport 49537 -j ACCEPT" + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + self.logger.debug("Set default iptables successfully.") + + def checkWhiteList(self): + """ + function: check whiteList + input : NA + output : NA + """ + if len(self.white_list) != 2: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] + % "White list, please check confFile.") + if ("inner" not in self.white_list.keys() or + "outter" not in self.white_list.keys()): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] + % "White list file with key error.") + if '' in self.white_list['inner']: + self.white_list['inner'].remove("") + if '' in self.white_list['outter']: + self.white_list['outter'].remove("") + if (len(self.white_list['inner']) > 0 or + len(self.white_list['outter'])) > 0: + self.logger.debug("Inner white list is %s" + % self.white_list['inner']) + self.logger.debug("Outter white list is %s" + % self.white_list['outter']) + for ip in self.white_list['inner']: + DefaultValue.isIpValid(ip) + for ip in self.white_list['outter']: + DefaultValue.isIpValid(ip) + compare_list = [ip for ip in self.white_list['inner'] + if ip in self.white_list['outter']] + if len(compare_list) > 0: + raise Exception(ErrorCode.GAUSS_527["GAUSS_52708"] + + "Inner IP and Outter IP have the same node.%s" + % compare_list) + return True + elif len(self.white_list['inner']) == 0 and \ + len(self.white_list['outter']) == 0: + self.clearIptables() + self.setDefaultIptables() + return False + else: + return False + + def clearIptablesItem(self, clear_length, chain_type): + """ + function: clear Iptables item + input : NA + output : NA + """ + exe_cmd = "" + cmd = "iptables -D %s 1" % chain_type + if clear_length.isdigit(): + for i in range(int(clear_length)): + if (i + 1) == int(clear_length): + exe_cmd += cmd + else: + exe_cmd += cmd + " && " + self.logger.debug("Execute command: %s" % exe_cmd) + status, output = subprocess.getstatusoutput(exe_cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % exe_cmd + + " Error: \n%s" % output) + self.logger.debug("Clear %s in iptables success." % chain_type) + + def clearIptablesChain(self, tables_str, chain_type): + """ + function: clear ip tables chain + input : NA + output : NA + """ + output_result = tables_str.split("Chain %s (policy ACCEPT)" + % chain_type)[1] + if output_result.find("Chain") >= 0: + output_result = output_result.split("Chain")[0] + self.logger.debug("RESULT IS %s" % output_result) + if len(output_result.strip().split('\n')) == 1: + self.logger.debug("no need to clear iptables.") + return + self.logger.debug(output_result.strip().split('\n')[-1]) + output_length = output_result.strip().split('\n')[-1].split()[0] + self.clearIptablesItem(output_length, chain_type) + + def clearWhiteListChain(self, tables_str): + """ + function: clear white list chain + input : NA + output : NA + """ + if "Chain whitelist" in tables_str: + old_white_str = tables_str.split("Chain whitelist")[1] + if "Chain " in old_white_str: + old_white_str = old_white_str.split("Chain ")[0] + white_length = old_white_str.split('\n')[-1].strip().split()[0] + self.clearIptablesItem(white_length, "whitelist") + cmd = "iptables -X whitelist" + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + else: + self.logger.debug("There is no white list chain.") + + def clearIptables(self): + """ + function: clear ip tables + input : NA + output : NA + """ + self.logger.debug("Start clear IP tables chain list.") + cmd = "iptables -nL --line-number" + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + # clear INPUT + self.clearIptablesChain(output, "INPUT") + # clear FORWARD + self.clearIptablesChain(output, "FORWARD") + # clear OUTPUT + self.clearIptablesChain(output, "OUTPUT") + # clear whitelist + self.clearWhiteListChain(output) + + self.logger.debug("Clear IP tables successfully.") + + def setWhiteList(self): + """ + function: set white list + input : NA + output : NA + """ + white_list = [] + white_list.extend(self.white_list['inner']) + white_list.extend(self.white_list['outter']) + cmd = "iptables -N whitelist && " + for ip in white_list: + cmd += "iptables -A whitelist -s %s -j ACCEPT && " % ip + + cmd += "iptables -A INPUT -m state --state RELATED," \ + "ESTABLISHED -j ACCEPT && " + cmd += "iptables -A INPUT -p all -j whitelist && " + cmd += "iptables -A INPUT -i lo -j ACCEPT && " + cmd += "iptables -A INPUT -j REJECT --reject-with " \ + "icmp-host-prohibited" + + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s" % output) + self.logger.debug("Set white list success.") + + def checkOSSoftware(self): + """ + function: Check whether software meets the installation requirements + input : NA + output: NA + """ + self.logger.debug("Checking os software.") + no_install_soft_list = [] + for softname in software_list: + if softname.startswith("bzip2"): + cmd = "which bzip2" + else: + cmd = "rpm -qa|grep -c " + softname + self.logger.debug("Command to check %s by %s" \ + % (softname, cmd)) + # output is num of softname + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.debug(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error: output is '%s'." % output) + no_install_soft_list.append(softname) + if len(no_install_soft_list) > 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51405"] % \ + str(no_install_soft_list)) + self.logger.debug("Successfully check OS software.") + + def separate_root_scripts(self): + """ + fix packgae path permission and owner + :return: + """ + package_path = get_package_path() + om_root_path = os.path.dirname(package_path) + if om_root_path == DefaultValue.ROOT_SCRIPTS_PATH: + return + + self.logger.log("Separate om root scripts.") + self.logger.debug("Create om root path.") + # /root/gauss_om/user_name + dest_path = os.path.join(DefaultValue.ROOT_SCRIPTS_PATH, self.user) + if os.path.exists(dest_path): + shutil.rmtree(dest_path) + os.makedirs(dest_path) + g_file.changeOwner("root", dest_path) + + # cp $GPHOME script lib to /root/gauss_om/xxx + cmd = ("cp -rf %s/script %s/lib %s/version.cfg %s" + % (self.clusterToolPath, self.clusterToolPath, + self.clusterToolPath, dest_path)) + DefaultValue.execCommandLocally(cmd) + root_scripts = ["gs_postuninstall", "gs_preinstall", + "gs_checkos"] + common_scripts = ["gs_sshexkey", "killall", "gs_checkperf"] + # the script files are not stored in the env path + not_in_env_scripts = ["gs_expansion"] + root_save_files = root_scripts + common_scripts + self.logger.debug("Delete user scripts in om root path.") + # delete user scripts in om root path + om_root_path = os.path.join(dest_path, "script") + root_om_files = os.listdir(om_root_path) + for root_file in root_om_files: + if root_file.startswith("gs_"): + if root_file not in root_save_files: + g_file.removeFile("%s/%s" % (om_root_path, root_file)) + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, + dest_path, recursive=True) + + self.logger.debug("Delete root scripts in om user path.") + # delete root scripts in GPHOME + om_user_path = os.path.join(self.clusterToolPath, "script") + user_om_files = os.listdir(om_user_path) + for user_file in user_om_files: + if user_file.startswith("gs_"): + if user_file in root_scripts or user_file in not_in_env_scripts: + g_file.removeFile("%s/%s" % (om_user_path, user_file)) + self.logger.debug("Delete cluster decompress package in root path.") + + def fixop_xml_and_mpp_file(self): + """ + fix config file owner + :return: + """ + self.logger.log("change '%s' files permission and owner." + % self.clusterConfig) + g_file.changeOwner(self.user, self.clusterConfig) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, self.clusterConfig) + if self.mpprcFile: + self.logger.log("change '%s' files permission and owner." + % self.mpprcFile) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, self.mpprcFile) + g_file.changeOwner(self.user, self.mpprcFile) + + def fixop_tool_path(self): + """ + fix cluster path owner + :return: + """ + toolPath = self.clusterToolPath + self.logger.log("change '%s' files permission and owner." % toolPath) + g_file.changeOwner(self.user, toolPath, recursive=True) + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, + toolPath, recursive=True) + g_file.changeMode(DefaultValue.SPE_FILE_MODE, + "%s/script/gs_*" % toolPath) + g_file.changeMode(DefaultValue.MIN_FILE_MODE, "%s/*.sha256" % toolPath) + g_file.changeMode(DefaultValue.MIN_FILE_MODE, "%s/*.tar.gz" % toolPath) + g_file.changeMode(DefaultValue.MIN_FILE_MODE, "%s/*.tar.bz2" % + toolPath) + g_file.changeMode(DefaultValue.MIN_FILE_MODE, "%s/version.cfg" % + toolPath) + + def fixop_package_path(self): + """ + fix software path permission + root permission + :return: + """ + package_path = get_package_path() + gsom_path = os.path.dirname(package_path) + if gsom_path != DefaultValue.ROOT_SCRIPTS_PATH: + self.logger.log("Change file mode in path %s" % package_path) + g_file.changeOwner("root", package_path, recursive=True) + g_file.changeMode(DefaultValue.MAX_DIRECTORY_MODE, package_path) + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, + "%s/script" % package_path, recursive=True) + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, + "%s/lib" % package_path, recursive=True) + g_file.changeMode(DefaultValue.MIN_FILE_MODE, "%s/*.sha256" % + package_path) + g_file.changeMode(DefaultValue.MIN_FILE_MODE, "%s/*.tar.gz" % + package_path) + g_file.changeMode(DefaultValue.MIN_FILE_MODE, "%s/*.tar.bz2" % + package_path) + g_file.changeMode(DefaultValue.MIN_FILE_MODE, "%s/version.cfg" % + package_path) + + def fix_owner_and_permission(self): + """ + function: fix owner and permission + input: NA + output: NA + """ + self.fixop_package_path() + self.fixop_tool_path() + self.fixop_xml_and_mpp_file() + + def fix_server_pkg_permission(self): + """ + fix server package permission + :return: + """ + self.fix_owner_and_permission() + self.separate_root_scripts() + + def changeToolEnv(self): + """ + function: change software tool env path + :return: + """ + osProfile = "/etc/profile" + self.clean_tool_env(osProfile) + userpath = pwd.getpwnam(self.user).pw_dir + userProfile = os.path.join(userpath, ".bashrc") + if not os.path.exists(userProfile): + self.logger.logExit(ErrorCode.GAUSS_502[ + "GAUSS_50201"] % 'user profile' + + " Please create %s." % userProfile) + self.clean_tool_env(userProfile) + # set GPHOME + g_file.writeFile(userProfile, + ["export GPHOME=%s" % self.clusterToolPath]) + # set PATH + g_file.writeFile(userProfile, [ + "export PATH=$GPHOME/script/gspylib/pssh/bin:" + "$GPHOME/script:$PATH"]) + # set LD_LIBRARY_PATH + g_file.writeFile(userProfile, [ + "export LD_LIBRARY_PATH=" + "$GPHOME/script/gspylib/clib:$LD_LIBRARY_PATH"]) + g_file.writeFile(userProfile, [ + "export LD_LIBRARY_PATH=$GPHOME/lib:$LD_LIBRARY_PATH"]) + # set PYTHONPATH + g_file.writeFile(userProfile, ["export PYTHONPATH=$GPHOME/lib"]) + + def clean_tool_env(self, userProfile): + # clean GPHOME + g_file.deleteLine(userProfile, "^\\s*export\\s*GPHOME=.*$") + self.logger.debug( + "Deleting crash GPHOME in user environment variables.") + + # clean LD_LIBRARY_PATH + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH=\\$GPHOME\\/script" + "\\/gspylib\\/clib:\\$LD_LIBRARY_PATH$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*LD_LIBRARY_PATH=\\$GPHOME\\/lib:" + "\\$LD_LIBRARY_PATH$") + self.logger.debug( + "Deleting crash LD_LIBRARY_PATH in user environment variables.") + + # clean PATH + g_file.deleteLine(userProfile, + "^\\s*export\\s*PATH=\\$GPHOME\\/pssh-2.3.1\\/bin:" + "\\$GPHOME\\/script:\\$PATH$") + g_file.deleteLine(userProfile, + "^\\s*export\\s*PATH=\\$GPHOME\\/script\\/gspylib\\" + "/pssh\\/bin:\\$GPHOME\\/script:\\$PATH$") + self.logger.debug("Deleting crash PATH in user environment variables.") + + # clean PYTHONPATH + g_file.deleteLine(userProfile, + "^\\s*export\\s*PYTHONPATH=\\$GPHOME\\/lib") + self.logger.debug( + "Deleting crash PYTHONPATH in user environment variables.") + + def run(self): + """ + function: run method + input : NA + output : NA + """ + try: + self.parseCommandLine() + self.checkParameter() + self.initGlobals() + except Exception as e: + GaussLog.exitWithError(str(e)) + + try: + if self.action == ACTION_PREPARE_PATH: + self.prepareGivenPath(self.preparePath, self.checkEmpty) + elif self.action == ACTION_CHECK_OS_VERSION: + self.checkOSVersion() + elif self.action == ACTION_CREATE_OS_USER: + self.createOSUser() + elif self.action == ACTION_CHECK_OS_USER: + global checkOSUser + checkOSUser = True + self.createOSUser() + elif self.action == ACTION_CHECK_HOSTNAME_MAPPING: + self.checkMappingForHostName() + elif self.action == ACTION_CREATE_CLUSTER_PATHS: + self.createClusterPaths() + elif self.action == ACTION_SET_FINISH_FLAG: + self.checkAbrt() + self.checkRemoveIpc() + self.setFinishFlag() + elif self.action == ACTION_SET_TOOL_ENV: + self.setToolEnv() + elif self.action == ACTION_SET_USER_ENV: + self.setDBUerProfile() + elif self.action == ACTION_PREPARE_USER_CRON_SERVICE: + self.prepareUserCronService() + elif self.action == ACTION_PREPARE_USER_SSHD_SERVICE: + self.prepareUserSshdService() + elif self.action == ACTION_SET_LIBRARY: + self.setLibrary() + elif self.action == ACTION_SET_SCTP: + self.setSctp() + elif self.action == ACTION_SET_VIRTUALIP: + DefaultValue.modifyFileOwnerFromGPHOME(self.logger.logFile) + self.setVirtualIp() + elif self.action == ACTION_INIT_GAUSSLOG: + self.initGaussLog() + elif self.action == ACTION_CHECK_DISK_SPACE: + self.checkDiskSpace() + elif self.action == ACTION_SET_ARM_OPTIMIZATION: + self.checkPlatformArm() + if ARM_PLATE: + self.setArmOptimization() + else: + self.logger.debug("The plate is not arm," + " skip set arm options.") + elif self.action == ACTION_CHECK_ENVFILE: + (checkstatus, checkoutput) = \ + DefaultValue.checkEnvFile(self.mpprcFile) + if self.mpprcFile != "": + envfile = self.mpprcFile + " and /etc/profile" + else: + envfile = "/etc/profile and ~/.bashrc" + if not checkstatus: + self.logger.logExit(ErrorCode.GAUSS_518["GAUSS_51808"] + % checkoutput + "Please check %s." + % envfile) + elif self.action == ACTION_SET_WHITELIST: + self.logger.debug("Start setting white list.") + confFile = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "../../agent/om_agent.conf") + if os.path.isfile(confFile): + self.getWhiteList(confFile) + if self.checkWhiteList(): + self.clearIptables() + self.setWhiteList() + else: + self.logger.debug("White list file not exist," + " skip set white list.") + elif self.action == ACTION_CHECK_OS_SOFTWARE: + self.checkOSSoftware() + elif self.action == ACTION_FIX_SERVER_PACKAGE_OWNER: + self.fix_server_pkg_permission() + elif self.action == ACTION_CHANGE_TOOL_ENV: + self.changeToolEnv() + else: + self.logger.logExit(ErrorCode.GAUSS_500["GAUSS_50000"] + % self.action) + except Exception as e: + self.logger.logExit(str(e)) + + +if __name__ == '__main__': + """ + main function + """ + try: + preInstallUtility = PreInstall() + preInstallUtility.run() + except Exception as e: + GaussLog.exitWithError(str(e)) diff --git a/script/local/Resetreplconninfo.py b/script/local/Resetreplconninfo.py new file mode 100644 index 0000000..5d7c6a3 --- /dev/null +++ b/script/local/Resetreplconninfo.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Resetreplconninfo.py is a utility to reset local replconninfo. +############################################################################# + +import getopt +import os +import sys +import subprocess + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode + +######################################################################## +# Global variables define +######################################################################## +g_opts = None + + +######################################################################## +class CmdOptions(): + """ + """ + + def __init__(self): + """ + """ + self.action = "" + self.clusterUser = "" + + +def usage(): + """ +Resetreplconninfo.py is a utility to reset replconninfos on local node. + +Usage: + python3 Resetreplconninfo.py --help + python3 Resetreplconninfo.py -U omm -t reset + +General options: + -U Cluster user. + -t reset. + --help Show help information for this utility, + and exit the command line mode. + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: parse command line + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:t:h", ["help"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if len(args) > 0: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + for (key, value) in opts: + if key == "-h" or key == "--help": + usage() + sys.exit(0) + elif key == "-t": + g_opts.action = value + elif key == "-U": + g_opts.clusterUser = value + + +def checkParameter(): + """ + function: check parameter + """ + if g_opts.clusterUser == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + if g_opts.action == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".") + if g_opts.action != "reset": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % 't') + + +class Resetreplconninfo(): + """ + class: Resetreplconninfo + """ + + def __init__(self): + """ + function: configure all instance on local node + """ + # get mpprc file + envfile = os.getenv('MPPDB_ENV_SEPARATE_PATH') + if envfile is not None and envfile != "": + self.userProfile = \ + envfile.replace("\\", "\\\\").replace('"', '\\"\\"') + else: + self.userProfile = "~/.bashrc" + + def __getStatusByOM(self): + """ + function :Get the environment parameter. + output : String + """ + cmd = "source %s;gs_om -t status --detail" % self.userProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error: \n%s" % output) + return output.split("\n")[-1] + + def resetRepl(self): + """ + function: reset Repl + input : NA + output: NA + """ + status_list = self.__getStatusByOM().split('|') + repl_list = ['replconninfo' + str(i) for i in + range(1, len(status_list))] + + localhost = DefaultValue.GetHostIpOrName() + remote_ip_dict = {} + for info_all in status_list: + info = info_all.split() + if info[1] == localhost: + local_dndir = info[4] + else: + remote_ip_dict[info[2]] = info[6] + head_cmd = "source %s;" % self.userProfile + for repl in repl_list: + cmd = head_cmd + 'gs_guc check -N %s -D %s -c "%s"' % \ + (localhost, local_dndir, repl) + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd + " Error:\n%s" % output) + # get remote ip and check iscascade + replinfo_all = output.split('\n')[-2].strip().split("'") + replinfo_value = replinfo_all[1].split() + for remoteip in remote_ip_dict: + if remoteip in replinfo_all[1]: + if remote_ip_dict[remoteip] == "Cascade" and \ + "iscascade=true" not in replinfo_value: + replinfo_value.append("iscascade=true") + elif remote_ip_dict[remoteip] != "Cascade" and \ + "iscascade=true" in replinfo_value: + replinfo_value.remove("iscascade=true") + else: + break + replinfo_all = \ + replinfo_all[0] + "'" + " ".join(replinfo_value) + "'" + cmd = head_cmd + 'gs_guc reload -N %s -D %s -c "%s"' % \ + (localhost, local_dndir, replinfo_all) + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd + " Error:\n%s" % output) + break + + +if __name__ == '__main__': + try: + # parse and check input parameters + parseCommandLine() + checkParameter() + + # reset replconninfos + reseter = Resetreplconninfo() + reseter.resetRepl() + + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/Restore.py b/script/local/Restore.py new file mode 100644 index 0000000..baa560d --- /dev/null +++ b/script/local/Restore.py @@ -0,0 +1,533 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Restore.py is a local utility to +# restore binary file and parameter file. +############################################################################# +import subprocess +import getopt +import os +import sys + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.os.gsfile import g_file + +# init config file parameter +POSTGRESQL_CONF = "postgresql.conf" +POSTGRESQL_HBA_CONF = "pg_hba.conf" +HOSTNAME = DefaultValue.GetHostIpOrName() +# init global paramter +g_clusterUser = "" +g_ignoreMiss = False +g_forceRestore = False +g_staticFile = "" + + +class LocalRestore(LocalBaseOM): + ''' + classdocs + ''' + + def __init__(self, logFile="", user="", restoreDir="", restorePara=False, + restoreBin=False): + """ + function: Constructor + input : logFile, user, restoreDir, restorePara, restoreBin + output: NA + """ + LocalBaseOM.__init__(self, logFile, user) + self.restoreDir = restoreDir + self.restorePara = restorePara + self.restoreBin = restoreBin + + self.installPath = "" + self.binExtractName = "" + self.group = "" + self.dbNodeInfo = None + self.clusterInfo = None + self.__hostNameFile = None + + # #static parameter + # Use binary_$hostname/parameter_$hostname to + # confirm the backup asked that + self.binTarName = "binary_%s.tar" % HOSTNAME + self.paraTarName = "parameter_%s.tar" % HOSTNAME + self.hostnameFileName = "HOSTNAME" + + ########################################################################## + # This is the main restore flow. + ########################################################################## + + def run(self): + """ + function: 1.parse the configuration file + 2.check restored directory + 3.restore files + input : NA + output: NA + """ + try: + self.logger.log("Executing the local restoration") + self.parseConfigFile() + self.checkRestoreDir() + self.doRestore() + self.logger.log("Successfully execute the local restoration.") + self.logger.closeLog() + sys.exit(0) + except Exception as e: + raise Exception(str(e)) + + def parseConfigFile(self): + """ + function: parse the configuration file: + 1.get local installation path for restoration + 2.Obtain user and group for restoration + 3.Obtain the local node information for restoration + input : NA + output: NA + """ + self.logger.log("Parsing the configuration file.") + + try: + self.clusterInfo = dbClusterInfo() + gaussHome = os.getenv("GAUSSHOME") + if g_forceRestore and self.restoreBin: + self.clusterInfo.appPath = gaussHome + else: + self.clusterInfo.initFromStaticConfig(self.user, g_staticFile) + hostName = DefaultValue.GetHostIpOrName() + self.dbNodeInfo = self.clusterInfo.getDbNodeByName(hostName) + if self.dbNodeInfo is None: + self.logger.logExit( + ErrorCode.GAUSS_516["GAUSS_51619"] % hostName) + # Getting local installation path for restoration. + self.logger.log("Getting local installation path for restoration.") + self.installPath = os.path.realpath(self.clusterInfo.appPath) + self.binExtractName = self.installPath.split("/")[-1] + self.logger.debug( + "Local installation path: %s." % self.installPath) + except Exception as e: + raise Exception(str(e)) + + self.logger.log("Successfully parsed the configuration file.") + + def checkRestoreDir(self): + """ + function: check restored directory + input : NA + output: NA + """ + self.logger.log("Checking restored directory.") + + try: + if (not os.path.exists(self.restoreDir) or len( + os.listdir(self.restoreDir)) == 0): + if (g_ignoreMiss): + self.logger.log( + "Restored directory does not exist or is empty.") + sys.exit(0) + else: + raise Exception(ErrorCode.GAUSS_502[ + "GAUSS_50228"] % "restored directory" + + " Error: \n%s" % self.restoreDir) + except Exception as e: + raise Exception(str(e)) + + self.logger.log("Successfully checked restored directory.") + + def doRestore(self): + """ + function: restore files + Restoring binary files: + 1.decompress tar file + 2.Check binary files + 3.Create installation path + 4.Restore binary files to install path + Restoring parameter files: + 1.decompress tar file + 2.delete temporary directory + 3.extract parameter files to the temporary directory + 4.check hostname and parameter + 5.Restore parameter files + 6.Remove the temporary directory + input : NA + output: NA + """ + self.logger.log("Restoring files.") + + if self.restoreBin: + self.logger.log("Restoring binary files.") + try: + # decompress tar file + self.decompressTarFile("binary") + + # Checking binary files + self.logger.debug("Checking if binary files exist.") + tarName = os.path.join(self.restoreDir, self.binTarName) + if (not os.path.exists(tarName)): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % "Binary files") + + # Creating installation path + self.logger.debug( + "Creating installation path if did not exist.") + if (not os.path.exists(self.installPath)): + os.makedirs(self.installPath, + DefaultValue.KEY_DIRECTORY_PERMISSION) + + # Restore binary files to install path. + self.logger.debug("Restore binary files to install path.") + g_file.cleanDirectoryContent(self.installPath) + cmd = g_file.SHELL_CMD_DICT["decompressTarFile"] % ( + self.restoreDir, tarName) + cmd += " && " + cmd += g_file.SHELL_CMD_DICT["copyFile"] % ( + "'%s'/*" % self.binExtractName, self.installPath) + self.logger.debug( + "Command for restoring binary files:%s." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50220"] % ( + "binary files to install path[%s]" % \ + self.installPath) + " Error: \n%s" % output) + g_file.removeDirectory( + os.path.join(self.restoreDir, self.binExtractName)) + except Exception as e: + raise Exception(str(e)) + self.logger.log("Successfully restored binary files.") + + if self.restorePara: + self.logger.log("Restoring parameter files.") + # Re-obtaining clusterInfo because the restoreBin succeeded + if self.dbNodeInfo is None: + self.clusterInfo.initFromStaticConfig(self.user, g_staticFile) + hostName = DefaultValue.GetHostIpOrName() + self.dbNodeInfo = self.clusterInfo.getDbNodeByName( + hostName) + if self.dbNodeInfo is None: + self.logger.logExit( + ErrorCode.GAUSS_516["GAUSS_51619"] % hostName) + + # Restoring parameter files. + try: + # decompress tar file + self.decompressTarFile("parameter") + # delete temporary directory + self.logger.debug( + "Delete temporary directory if it has existed.") + temp_dir = os.path.join(self.restoreDir, + "parameter_%s" % HOSTNAME) + if (os.path.exists(temp_dir)): + g_file.removeDirectory(temp_dir) + + # extract parameter files to the temporary directory + self.logger.debug( + "Extract parameter files to the temporary directory.") + tarName = os.path.join(self.restoreDir, self.paraTarName) + if (not os.path.exists(tarName)): + if (g_ignoreMiss): + self.logger.error(ErrorCode.GAUSS_502[ + "GAUSS_50201"] + % "parameter files") + sys.exit(0) + else: + raise Exception(ErrorCode.GAUSS_502[ + "GAUSS_50201"] % "parameter files") + + cmd = g_file.SHELL_CMD_DICT["decompressTarFile"] % ( + self.restoreDir, tarName) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + " Error: \n%s" % output) + + # check hostname + self.logger.debug("Checking hostname.") + self.__checkHostName( + "%s/%s" % (temp_dir, self.hostnameFileName)) + # check parameter + self.logger.debug("Checking parameter files.") + paraFileList = [] + self.__checkParaFiles(temp_dir, paraFileList) + + self.logger.debug("Restoring parameter files.") + paraFileNum = len(paraFileList) + for i in range(paraFileNum): + tarFileName, paraFilePath = paraFileList[i].split('|') + g_file.cpFile(os.path.join(temp_dir, tarFileName), + paraFilePath) + + self.logger.debug("Remove the temporary directory.") + g_file.removeDirectory(temp_dir) + except Exception as e: + g_file.removeDirectory(temp_dir) + raise Exception(str(e)) + self.logger.log("Successfully restored parameter files.") + + self.logger.log("Successfully restored files.") + + def decompressTarFile(self, flag): + """ + function: Decompress package on restore node + input : flag + output: NA + """ + tarFile = "%s/%s.tar" % (self.restoreDir, flag) + if (not os.path.exists(tarFile)): + return + # Decompress package on restore node + cmd = g_file.SHELL_CMD_DICT["decompressTarFile"] % ( + self.restoreDir, tarFile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception(ErrorCode.GAUSS_502[ + "GAUSS_50217"] % tarFile + + " Error: \n%s." % output + + "The cmd is %s " % cmd) + + def __checkHostName(self, hostnameFile): + """ + function: make sure the hostname stored in tar files + input : hostnameFile + output: NA + """ + # make sure the hostname stored in tar files + localHostName = DefaultValue.GetHostIpOrName() + with open(hostnameFile, 'r') as self.__hostNameFile: + storedHostName = self.__hostNameFile.read() + storedHostName.strip('\n') + if (((localHostName > storedHostName) - ( + localHostName < storedHostName)) != 0): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ + (("Local hostname [%s]", + "the hostname [%s] stored in tar files") % ( + localHostName, storedHostName))) + + def __checkParaFiles(self, temp_dir, paraFileList): + """ + function: check parameter file + input : temp_dir, paraFileList + output: NA + """ + storedParaFileNum = len(os.listdir(temp_dir)) - 1 + for inst in self.dbNodeInfo.datanodes: + self.__checkSingleParaFile(inst, temp_dir, paraFileList) + if ((storedParaFileNum > len(paraFileList)) - + (storedParaFileNum < len(paraFileList))) != 0: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % + ("number of parameter files", + "the number of files requested")) + + def __checkSingleParaFile(self, inst, temp_dir, paraFileList): + """ + function: check single parameter file + input : inst, temp_dir, paraFileList + output: NA + """ + # makesure instance exist + if (not os.path.exists(inst.datadir)): + if (g_ignoreMiss): + self.logger.log( + "Data directory [%s] of instance [%s]does not exist." % ( + inst.datadir, str(inst))) + return + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ + ("Data directory [%s] of instance [%s]" % ( + inst.datadir, str(inst)))) + # get all parameter file path into paraFileMap + paraFileMap = {} + if inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + paraFileMap[POSTGRESQL_CONF] = \ + os.path.join(inst.datadir, POSTGRESQL_CONF) + paraFileMap[POSTGRESQL_HBA_CONF] = \ + os.path.join(inst.datadir, POSTGRESQL_HBA_CONF) + else: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51204"] % ( + "specified", inst.instanceRole)) + + for key in paraFileMap: + backupFileName = "%d_%s" % (inst.instanceId, key) + if (not os.path.exists(os.path.join(temp_dir, backupFileName))): + if (g_ignoreMiss): + self.logger.log( + "The file of %s does not exist." % backupFileName) + return + else: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % backupFileName) + newRecord = "%s|%s" % (backupFileName, paraFileMap[key]) + paraFileList.append(newRecord) + + +############################################################################## +# Help context. U:R:oC:v: +############################################################################## +def usage(): + """ + function: usage + input : NA + output : NA + """ + print( + "Restore.py is a local utility to restore binary file " + "and parameter file.") + print(" ") + print("Usage:") + print("python3 Restore.py --help") + print(" ") + print("Common options:") + print(" -U the user of cluster.") + print(" -P, --position=RESTOREPATH the restore directory.") + print(" -p, --parameter restore parameter files.") + print(" -b, --binary_file restore binary files.") + print(" -i, --ingore_miss ignore Backup entity miss.") + print(" -s, --static_file static configuration files.") + print(" -l, --logpath=LOGPATH the log directory.") + print(" -h, --help show this help, then exit.") + print(" ") + + +def checkUserExist(): + """ + function: check user exists + input : NA + output: NA + """ + if (g_clusterUser == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "U" + ".") + DefaultValue.checkUser(g_clusterUser, False) + + +def checkLogFile(logFile): + """ + function: check log file + input : NA + output: NA + """ + if (logFile == ""): + logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + g_clusterUser, "") + if (not os.path.isabs(logFile)): + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") + + +def checkRestorePara(restorePara, restoreBin): + """ + function: check restore parameter + input : NA + output: NA + """ + if not restorePara and not restoreBin: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % "p or -b" + ".") + + +def checkRestoreDir(restoreDir): + """ + function: check restore directory + input : NA + output: NA + """ + if (restoreDir == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "P" + ".") + + +def main(): + """ + function: main function + input : NA + output: NA + """ + + try: + opts, args = getopt.getopt(sys.argv[1:], "U:P:l:pbhifs:", + ["position=", "parameter", "binary_file", + "logpath=", "help", "ingore_miss", + "force", "static_file="]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % e.msg) + if (len(args) > 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + global g_clusterUser + global g_ignoreMiss + global g_staticFile + global g_forceRestore + restoreDir = "" + restorePara = False + restoreBin = False + logFile = "" + + for key, value in opts: + if (key == "-h" or key == "--help"): + usage() + sys.exit(0) + elif (key == "-U"): + g_clusterUser = value.strip() + elif (key == "-P" or key == "--position"): + restoreDir = value.strip() + elif (key == "-p" or key == "--parameter"): + restorePara = True + elif (key == "-b" or key == "--binary_file"): + restoreBin = True + elif (key == "-i" or key == "--ingore_miss"): + g_ignoreMiss = True + elif (key == "-s" or key == "--static_file"): + g_staticFile = value.strip() + elif (key == "-l" or key == "--logpath"): + logFile = value + elif (key == "-f" or key == "--force"): + g_forceRestore = True + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % value) + + Parameter.checkParaVaild(key, value) + + if (g_ignoreMiss): + gaussHome = DefaultValue.getEnv("GAUSSHOME") + if not gaussHome: + return + + # check if user exist and is the right user + checkUserExist() + # check log file + checkLogFile(logFile) + # check -p and -b + checkRestorePara(restorePara, restoreBin) + # check -P + checkRestoreDir(restoreDir) + + try: + LocalRestorer = LocalRestore(logFile, g_clusterUser, restoreDir, + restorePara, restoreBin) + LocalRestorer.run() + except Exception as e: + GaussLog.exitWithError(str(e)) + + +if __name__ == '__main__': + main() diff --git a/script/local/StartInstance.py b/script/local/StartInstance.py new file mode 100644 index 0000000..bd764b9 --- /dev/null +++ b/script/local/StartInstance.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import sys +import getopt + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.common.ParameterParsecheck import Parameter + + +class Start(LocalBaseOM): + """ + The class is used to do perform start + """ + + def __init__(self): + """ + function: initialize the parameters + input: NA + output: NA + """ + super(Start, self).__init__() + self.user = "" + self.dataDir = "" + self.time_out = 300 + self.logFile = "" + self.logger = None + self.installPath = "" + self.security_mode = "" + self.cluster_number = None + + def usage(self): + """ +gs_start is a utility to start the database + +Uasge: + gs_start -? | --help + gs_start -U USER [-D DATADIR][-t SECS][-l LOGFILE] + +General options: + -U USER the database program and cluster owner") + -D DATADIR data directory of instance + -t SECS seconds to wait + -l LOGFILE log file + -?, --help show this help, then exit + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + function: Check input parameters + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:D:R:l:t:h?", + ["help", "security-mode=", + "cluster_number="]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + for key, value in opts: + if key == "-U": + self.user = value + elif key == "-D": + self.dataDir = value + elif key == "-t": + self.time_out = int(value) + elif key == "-l": + self.logFile = value + elif key == "-R": + self.installPath = value + elif key == "--help" or key == "-h" or key == "-?": + self.usage() + sys.exit(0) + elif key == "--security-mode": + self.security_mode = value + elif key == "--cluster_number": + self.cluster_number = value + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % key) + Parameter.checkParaVaild(key, value) + + if self.user == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'U' + ".") + if self.logFile == "": + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, self.user, self.installPath) + + def __initLogger(self): + """ + function: Init logger + input : NA + output: NA + """ + self.logger = GaussLog(self.logFile, "StartInstance") + + def init(self): + """ + function: constructor + """ + self.__initLogger() + self.readConfigInfo() + self.initComponent() + + def doStart(self): + """ + function: do start database + input : NA + output : NA + """ + isDataDirCorrect = False + for dn in self.dnCons: + if self.dataDir != "" and dn.instInfo.datadir != self.dataDir: + continue + if self.cluster_number: + dn.start(self.time_out, self.security_mode, self.cluster_number) + else: + dn.start(self.time_out, self.security_mode) + isDataDirCorrect = True + + if not isDataDirCorrect: + raise Exception(ErrorCode.GAUSS_536["GAUSS_53610"] % self.dataDir) + + +def main(): + """ + main function + """ + try: + start = Start() + start.parseCommandLine() + start.init() + except Exception as e: + GaussLog.exitWithError(ErrorCode.GAUSS_536["GAUSS_53608"] % str(e)) + try: + start.doStart() + except Exception as e: + GaussLog.exitWithError(str(e)) + + +if __name__ == "__main__": + main() diff --git a/script/local/StopInstance.py b/script/local/StopInstance.py new file mode 100644 index 0000000..c9bfac0 --- /dev/null +++ b/script/local/StopInstance.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- + +import sys +import getopt + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.common.ParameterParsecheck import Parameter + + +class Stop(LocalBaseOM): + """ + The class is used to do perform stop + """ + + def __init__(self): + """ + function: initialize the parameters + input: NA + output: NA + """ + super(Stop, self).__init__() + self.user = "" + self.dataDir = "" + self.time_out = 300 + self.logFile = "" + self.logger = None + self.stopMode = "" + self.installPath = "" + + def usage(self): + """ +gs_stop is a utility to stop the database + +Uasge: + gs_stop -? | --help + gs_stop -U USER [-D DATADIR][-t SECS][-l LOGFILE][-m SHUTDOWN-MODE] + +General options: + -U USER the database program and cluster owner") + -D DATADIR data directory of instance + -t SECS seconds to wait + -l LOGFILE log file + -m SHUTDOWN-MODE the modes of stop + -?, --help show this help, then exit + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + function: Check input parameters + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:D:l:t:R:m:h?", + ["help"]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + + for key, value in opts: + if key == "-U": + self.user = value + elif key == "-D": + self.dataDir = value + elif key == "-m": + self.stopMode = value + elif key == "-t": + self.time_out = int(value) + elif key == "-l": + self.logFile = value + elif key == "-R": + self.installPath = value + elif key == "--help" or key == "-h" or key == "-?": + self.usage() + sys.exit(0) + else: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % key) + Parameter.checkParaVaild(key, value) + + if self.user == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + if self.logFile == "": + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, self.user, self.installPath) + + def __initLogger(self): + """ + function: Init logger + input : NA + output: NA + """ + self.logger = GaussLog(self.logFile, "StopInstance") + + def init(self): + """ + function: constructor + """ + self.__initLogger() + self.readConfigInfo() + self.initComponent() + + def doStop(self): + """ + function: do stop database + input : NA + output : NA + """ + isDataDirCorrect = False + for dn in self.dnCons: + if self.dataDir != "" and dn.instInfo.datadir != self.dataDir: + continue + dn.stop(self.stopMode, self.time_out) + isDataDirCorrect = True + if not isDataDirCorrect: + raise Exception(ErrorCode.GAUSS_536["GAUSS_53610"] % self.dataDir) + + +def main(): + """ + main function + """ + try: + stop = Stop() + stop.parseCommandLine() + stop.init() + stop.doStop() + except Exception as e: + GaussLog.exitWithError(ErrorCode.GAUSS_536["GAUSS_53609"] % str(e)) + + +if __name__ == "__main__": + main() diff --git a/script/local/UnPreInstallUtility.py b/script/local/UnPreInstallUtility.py new file mode 100644 index 0000000..f99304d --- /dev/null +++ b/script/local/UnPreInstallUtility.py @@ -0,0 +1,732 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : UnPreInstallUtility.py is a utility to execute unPreInstall. +############################################################################# +import sys +import os +import getopt +import subprocess +import grp + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.VersionInfo import VersionInfo +from gspylib.common.ErrorCode import ErrorCode +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib +from gspylib.os.gsnetwork import g_network +from gspylib.os.gsservice import g_service +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.os.gsfile import g_Platform +import impl.upgrade.UpgradeConst as Const + +ACTION_CLEAN_SYSLOG_CONFIG = 'clean_syslog_config' +ACTION_CLEAN_TOOL_ENV = 'clean_tool_env' +ACTION_CHECK_UNPREINSTALL = "check_unpreinstall" +ACTION_CLEAN_GAUSS_ENV = "clean_gauss_env" +ACTION_DELETE_GROUP = "delete_group" +# clean instance paths +ACTION_CLEAN_INSTANCE_PATHS = "clean_instance_paths" +# clean $GAUSS_ENV +ACTION_CLEAN_ENV = "clean_env" +# clean dependency directory +ACTION_CLEAN_DEPENDENCY = "clean_dependency" + +PROFILE_FILE = '/etc/profile' +PSSHDIR = 'pssh-2.3.1' +LIBPATH = "lib" +SCRIPTPATH = "script" +##################################################### +# syslog variables +##################################################### +RSYSLOG = "rsyslog" +RSYSLOG_CONFIG_FILE = "/etc/rsyslog.conf" +RSYSLOG_FACILITY_LEVEL = "local3.*" +AP_RSYSLOG_FACILITY_LEVEL = ":msg,contains,\"MPPDB\"" +SYSLOG_NG = "syslog-ng" +SYSLOG_NG_CONFIG_FILE = "/etc/syslog-ng/syslog-ng.conf" +SYSLOG_NG_CONFIG_FILE_SERVER = "/etc/sysconfig/syslog" + +g_nodeInfo = None + + +class Postuninstall(LocalBaseOM): + """ + execute unPreInstall + """ + + def __init__(self): + self.action = "" + self.userInfo = "" + self.user = "" + self.group = "" + self.clusterConfig = "" + self.preparePath = "" + self.checkEmpty = False + self.envParams = [] + self.userProfile = "" + self.logFile = "" + self.clusterToolPath = "" + self.tmpFile = "" + self.component = [] + self.clusterComponent = [] + self.logger = None + self.userHome = "" + + def initGlobals(self): + """ + init global variables + input : NA + output: NA + """ + global g_nodeInfo + self.logger = GaussLog(self.logFile, self.action) + + if self.clusterConfig != "": + if os.path.isfile(self.clusterConfig): + self.clusterToolPath = DefaultValue.getPreClusterToolPath( + self.user, self.clusterConfig) + self.readConfigInfoByXML() + hostName = DefaultValue.GetHostIpOrName() + g_nodeInfo = self.clusterInfo.getDbNodeByName(hostName) + if (g_nodeInfo is None): + self.logger.logExit( + ErrorCode.GAUSS_516["GAUSS_51620"] % "local" + + " There is no host named %s!" % hostName) + else: + self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50210"] % ( + "config file [%s]" % self.clusterConfig)) + + elif self.action != ACTION_CLEAN_DEPENDENCY: + try: + self.clusterToolPath = DefaultValue.getClusterToolPath( + self.user) + except Exception as e: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50219"] % + "the cluster tool path" + " Error: \n%s" % str(e)) + + if not self.clusterToolPath: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50219"] % "cluster tool path") + + # make sure if we are using env seperate version, + # and get the right profile + # we can not check mppenvfile exists here + mppenvFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV) + if (mppenvFile != "" and mppenvFile is not None): + self.userProfile = mppenvFile + else: + self.userProfile = "/home/%s/.bashrc" % self.user + + def usage(self): + """ + Usage: + python3 UnPreInstallUtility.py -t action -u user [-X xmlfile] [-l log] + Common options: + -t the type of action + -u the os user of cluster + -X the xml file path + -l the path of log file + --help show this help, then exit + """ + print(self.usage.__doc__) + + def parseCommandLine(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + try: + opts, args = getopt.getopt( + sys.argv[1:], "t:u:X:l:f:Q:P:", ["help"]) + except Exception as e: + self.usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + for (key, value) in opts: + if (key == "--help"): + self.usage() + sys.exit(0) + elif (key == "-t"): + self.action = value + elif (key == "-u"): + self.user = value + elif (key == "-X"): + self.clusterConfig = value + elif (key == "-l"): + self.logFile = os.path.realpath(value) + elif (key == "-f"): + self.tmpFile = value + elif key == "-Q": + self.clusterToolPath = value + elif key == "-P": + self.userHome = value + else: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % key) + Parameter.checkParaVaild(key, value) + + def checkParameter(self): + """ + function: Check parameter from command line + input : NA + output: NA + """ + + if self.action == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % "t" + ".") + + if self.logFile == "": + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, self.user, "") + + if self.user == "" and self.action != ACTION_CLEAN_DEPENDENCY: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % "u" + ".") + + def getSyslogType(self): + """ + function: judge syslog type + input : NA + output: str + """ + self.logger.debug("Judging the syslog type is rsyslog or syslog-ng.") + if (os.path.isfile(RSYSLOG_CONFIG_FILE)): + return RSYSLOG + elif (os.path.isfile(SYSLOG_NG_CONFIG_FILE)): + return SYSLOG_NG + else: + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50219"] % "rsyslog or syslog-ng" + + " \nError: Failed to judge the syslog type.") + + def cleanWarningConfig(self): + """ + function: clean syslog-ng/rsyslog config + input : NA + output: NA + """ + self.logger.debug("Cleaning syslog-ng configuration.") + # judge the installed syslog type on the local host is rsyslog + # or syslog-ng + syslogType = self.getSyslogType() + if (syslogType == SYSLOG_NG): + self.cleanWarningConfigForSyslogng() + elif (syslogType == RSYSLOG): + self.cleanWarningConfigForRsyslog() + self.logger.debug("Successfully cleaned system log.") + + def cleanWarningConfigForSyslogng(self): + """ + function: clean syslog-ng config + input : NA + output: NA + """ + # clean client syslog-ng configure + cmd = "(if [ -s '%s' ]; then " % SYSLOG_NG_CONFIG_FILE + cmd += \ + "sed -i -e '/^filter f_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^destination d_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += \ + "-e '/^log { source(src); filter(f_gaussdb); " \ + "destination(d_gaussdb); };$/d' %s;fi;) " % SYSLOG_NG_CONFIG_FILE + self.logger.debug("Command for cleaning client system log: %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + + # clean server syslog-ng configure + cmd = "(if [ -s '%s' ]; then " % SYSLOG_NG_CONFIG_FILE + cmd += \ + "sed -i -e '/^template t_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^source s_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^filter f_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += "-e '/^destination d_gaussdb.*$/d' %s " % SYSLOG_NG_CONFIG_FILE + cmd += \ + "-e '/^log { source(s_gaussdb); " \ + "filter(f_gaussdb); destination(d_gaussdb); };$/d' %s;" \ + "fi; " % SYSLOG_NG_CONFIG_FILE + cmd += "if [ -s '%s' ]; then " % SYSLOG_NG_CONFIG_FILE_SERVER + cmd += \ + "sed -i -e '/^SYSLOGD_OPTIONS=\\\"-r -m 0\\\"/d' %s " \ + % SYSLOG_NG_CONFIG_FILE_SERVER + cmd += "-e '/^KLOGD_OPTIONS=\\\"-x\\\"/d' %s; " \ + % SYSLOG_NG_CONFIG_FILE_SERVER + cmd += "fi) " + self.logger.debug("Command for cleaning server system log: %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + + # restart the syslog service + (status, output) = g_service.manageOSService("syslog", "restart") + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_508["GAUSS_50802"] % "restart syslog" + + " Error: \n%s" % output) + + def cleanWarningConfigForRsyslog(self): + """ + function: clean rsyslog config + input : NA + output: NA + """ + # clean rsyslog config on client and server + cmd = "(if [ -s '%s' ]; then " % RSYSLOG_CONFIG_FILE + cmd += \ + "sed -i -e '/^$ModLoad imjournal.*$/d' %s " % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$ModLoad imudp.*$/d' %s " % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$UDPServerRun 514.*$/d' %s " % RSYSLOG_CONFIG_FILE + cmd += \ + "-e '/^$imjournalRatelimitInterval.*$/d' %s " % RSYSLOG_CONFIG_FILE + cmd += "-e '/^$imjournalRatelimitBurst.*$/d' %s " % RSYSLOG_CONFIG_FILE + cmd += "-e '/^%s.*$/d' %s; " % ( + AP_RSYSLOG_FACILITY_LEVEL, RSYSLOG_CONFIG_FILE) + cmd += "fi) " + self.logger.debug("Command for cleaning crash rsyslog: %s." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50207"] % 'crash rsyslog' + + " Error: \n%s" % output) + + # restart the rsyslog service + (status, output) = g_service.manageOSService("rsyslog", "restart") + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_508["GAUSS_50802"] % "restart rsyslog" + + " Error: \n%s" % output) + + def cleanEnvSoftware(self): + """ + function: clean environment software and variable + Gauss-MPPDB* & sctp_patch is came from R5 upgrade R7 + input : NA + output: NA + """ + self.logger.debug("Cleaning the environmental software and variable.") + # clean environment software + path = "%s/%s" % (self.clusterToolPath, PSSHDIR) + g_file.removeDirectory(path) + path = "%s/lib" % self.clusterToolPath + g_file.removeDirectory(path) + path = "%s/script" % self.clusterToolPath + g_file.removeDirectory(path) + path = "%s/sudo" % self.clusterToolPath + g_file.removeDirectory(path) + path = "%s/upgrade.sh" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/version.cfg" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/GaussDB.py" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/libcgroup" % self.clusterToolPath + g_file.removeDirectory(path) + path = "%s/server.key.cipher" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/server.key.rand" % self.clusterToolPath + g_file.removeFile(path) + path = "%s/%s*" % (self.clusterToolPath, VersionInfo.PRODUCT_NAME) + g_file.removeDirectory(path) + path = "%s/Gauss*" % (self.clusterToolPath) + g_file.removeDirectory(path) + path = "%s/sctp_patch" % (self.clusterToolPath) + g_file.removeDirectory(path) + path = "%s/unixodbc" % self.clusterToolPath + g_file.removeDirectory(path) + path = "%s/%s" % (self.clusterToolPath, Const.UPGRADE_SQL_FILE) + g_file.removeFile(path) + path = "%s/%s" % (self.clusterToolPath, Const.UPGRADE_SQL_SHA) + g_file.removeFile(path) + self.logger.debug( + "Successfully cleaned the environmental software and variable.") + + self.logger.debug("Cleaning environmental software.") + # clean environment variable + cmd = "(if [ -s '%s' ]; then " % PROFILE_FILE + cmd += "sed -i -e '/^export GPHOME=%s$/d' %s " % ( + self.clusterToolPath.replace('/', '\/'), PROFILE_FILE) + cmd += \ + "-e '/^export PATH=\$GPHOME\/pssh-2.3.1\/bin:" \ + "\$GPHOME\/script:\$PATH$/d' %s " % PROFILE_FILE + cmd += \ + "-e '/^export PATH=\$GPHOME\/script\/gspylib\/pssh\/bin:" \ + "\$GPHOME\/script:\$PATH$/d' %s " % PROFILE_FILE + cmd += \ + "-e '/^export LD_LIBRARY_PATH=\$GPHOME\/lib:" \ + "\$LD_LIBRARY_PATH$/d' %s " % PROFILE_FILE + cmd += \ + "-e '/^export PYTHONPATH=\$GPHOME\/lib$/d' %s; fi) " % PROFILE_FILE + self.logger.debug( + "Command for cleaning environment variable: %s." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + + self.logger.debug( + "Successfully cleaned environmental software and variable.") + + def checkUnPreInstall(self): + """ + function: check whether do uninstall before unpreinstall + input : NA + output: NA + """ + self.logger.debug("Checking UnPreInstall.") + # check if user exist + try: + DefaultValue.getUserId(self.user) + except Exception as e: + self.logger.logExit(str(e)) + + # check if user profile exist + if (not os.path.exists(self.userProfile)): + self.logger.debug( + "The %s does not exist." % self.userProfile + + " Please skip to check UnPreInstall.") + return + + # check $GAUSSHOME + cmd = "su - %s -c 'source %s && echo $GAUSS_ENV' 2>/dev/null" % ( + self.user, self.userProfile) + self.logger.debug("Command for getting $GAUSSHOME: %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + gaussEnv = output.strip() + if (gaussEnv == "2"): + self.logger.logExit( + ErrorCode.GAUSS_525["GAUSS_52501"] % "gs_uninstall") + + # check $GAUSS_ENV + cmd = "su - %s -c 'source %s && echo $GAUSS_ENV' 2>/dev/null" % ( + self.user, self.userProfile) + self.logger.debug("Command for getting $GAUSS_ENV: %s" % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + self.logger.logExit( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error:\n%s" % output) + gaussEnv = output.strip() + + if (str(gaussEnv) != "1"): + self.logger.logExit( + ErrorCode.GAUSS_525["GAUSS_52501"] % "gs_preinstall") + + self.logger.debug("Successfully checked UnPreInstall.") + + def cleanGaussEnv(self): + """ + function: clean $GAUSS_ENV + input : NA + output: NA + """ + self.logger.debug("Cleaning $GAUSS_ENV.") + + # check if user profile exist + if (self.userProfile is not None and self.userProfile != ""): + userProfile = self.userProfile + else: + userProfile = "/home/%s/.bashrc" % self.user + + if (not os.path.exists(userProfile)): + self.logger.debug( + "The %s does not exist." % userProfile + + " Please skip to clean $GAUSS_ENV.") + return + # clean user's environmental variable + DefaultValue.cleanUserEnvVariable(userProfile, + cleanGAUSS_WARNING_TYPE=True) + + # clean $GAUSS_ENV + envContent = "^\\s*export\\s*GAUSS_ENV=.*$" + g_file.deleteLine(userProfile, envContent) + + self.logger.debug("Cleaned $GAUSS_ENV.") + + def cleanNetworkfile(self, backIpNIC, virtualIp): + """ + function: clean configured IP in Network file + input : NA + output: NA + """ + self.logger.debug("Cleaning network file.") + try: + # read information from networkfile + networkfile = "/etc/sysconfig/network/ifcfg-" + backIpNIC + networkinfo = [] + # check if the file is a link + g_OSlib.checkLink(networkfile) + with open(networkfile, "r") as fp: + networkinfo = fp.readlines() + LABEL = self.getLABEL(virtualIp, networkfile) + if (LABEL is not None): + # init linenum for delete + del_1 = 0 + del_2 = 0 + linenum = 1 + for line in networkinfo: + if (line.split("=")[1].strip() == virtualIp): + # find if the netmask exist, if exist, delete this line + cmd_g = "grep -n 'NETMASK_%s=' %s" % ( + LABEL, networkfile) + (status, output) = subprocess.getstatusoutput(cmd_g) + if (status == 0): + linenum_net = int(output.split(":")[0]) + if (linenum + 1 == linenum_net): + del_1 = linenum_net + # find if the LABEL number exist, + # if exist, delete this line + cmd_g = "grep -n 'LABEL_%s=' %s " % ( + LABEL, networkfile) + (status, output) = subprocess.getstatusoutput(cmd_g) + if (status == 0): + linenum_net = int(output.split(":")[0]) + if (linenum + 2 == linenum_net): + del_2 = linenum_net + # delete issues which exist + if (del_1 != 0 and del_2 != 0): + cmd = "sed -i '%dd;%dd;%dd' %s" % ( + linenum, del_1, del_2, networkfile) + elif (del_1 != 0 and del_2 == 0): + cmd = "sed -i '%dd;%dd' %s" % ( + linenum, del_1, networkfile) + elif (del_1 == 0 and del_2 != 0): + cmd = "sed -i '%dd;%dd' %s" % ( + linenum, del_2, networkfile) + else: + cmd = "sed -i '%dd' %s" % (linenum, networkfile) + (status, output) = subprocess.getstatusoutput(cmd) + if (status != 0): + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "Error:\n%s" % output) + linenum += 1 + self.logger.log( + "Successfully clean virtual Ip from network file") + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % ( + "the LABEL number of %s " % virtualIp)) + self.logger.debug("Successfully cleaned network file.") + except Exception as e: + self.logger.log("Error: Write networkfile failed." + str(e)) + + def IsSuSE12SP0(self): + """ + function:Check is OS SuSE12.0 + input :NA + output :bool + """ + if (os.path.isfile("/etc/SuSE-release")): + cmd = "grep -i 'PATCHLEVEL' /etc/SuSE-release " \ + "| awk -F '=' '{print $2}'" + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output != ""): + if (output.strip().isdigit() and int(output.strip()) == 0): + return True + else: + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Error: \n%s " % output) + return False + + def getLABEL(self, virtualIp, networkfile): + """ + function: get LABEL number of virtual ip from network file + input : fp, virtualIp + output: int + """ + # check if the file is a link + g_OSlib.checkLink(networkfile) + with open(networkfile, "r") as fp: + for line in fp: + if line.split("=")[1].strip() == virtualIp: + if line.split("IPADDR_")[1].split("=%s" % virtualIp)[0]: + return line.split("IPADDR_")[1].split( + "=%s" % virtualIp)[0] + else: + return None + return None + + def cleanGroup(self): + """ + function: clean group + input : NA + output: NA + """ + self.logger.debug("Cleaning user group.") + hostName = DefaultValue.GetHostIpOrName() + groupname = self.user + + try: + groupid = grp.getgrnam(groupname).gr_gid + except Exception: + self.logger.debug("group %s has been deleted." % groupname) + sys.exit(0) + + cmd = "cat /etc/passwd | awk -F [:] '{print $1 \" \"$4}'" \ + "|grep ' %s$'" % groupid + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0: + self.logger.logExit( + "Warning: There are other users in the group %s on %s," + " skip to delete group." % (groupname, hostName)) + elif status == 1: + cmd = "groupdel %s" % groupname + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + self.logger.logExit( + "Warning: Failed to delete group " + "%s by cmd:%s. Error: \n%s" % (groupname, cmd, output)) + else: + self.logger.logExit( + "Warning: Failed to delete group " + "%s by cmd:%s. Error: \n%s" % (groupname, cmd, output)) + self.logger.debug("Successfully cleaned user group.") + + def cleanScript(self): + """ + function: clean script + """ + # clean lib + libPath = os.path.join(self.clusterToolPath, LIBPATH) + if os.path.exists(libPath): + g_file.removeDirectory(libPath) + + # clean om script + scriptPath = os.path.join(self.clusterToolPath, SCRIPTPATH) + if os.path.exists(scriptPath): + g_file.removeDirectory(scriptPath) + + # clean root script path + root_script_path = os.path.join(DefaultValue.ROOT_SCRIPTS_PATH, + self.user) + if os.path.exists(root_script_path): + g_file.removeDirectory(root_script_path) + # if /root/gauss_om has no files, delete it. + if not os.listdir(DefaultValue.ROOT_SCRIPTS_PATH): + g_file.removeDirectory(DefaultValue.ROOT_SCRIPTS_PATH) + + # clean others + if os.path.exists(self.clusterToolPath): + g_file.cleanDirectoryContent(self.clusterToolPath) + + if self.userHome != "": + if os.path.exists(self.userHome): + g_file.removeDirectory(self.userHome) + + def cleanEnv(self): + """ + function: clean envriment variable + """ + self.logger.debug("Begin clean envrionment variable") + if not self.userProfile: + self.logger.logExit("Clean Env failed: can not get user profile.") + for comp in self.clusterComponent: + comp.cleanEnv(self.userProfile) + + # clean user's environment variable + self.logger.debug("Clean user environment variable.") + DefaultValue.cleanUserEnvVariable(self.userProfile, + cleanGAUSS_WARNING_TYPE=True) + # clean GAUSS_ENV + self.logger.debug("Clean GAUSS_ENV.") + g_file.deleteLine(self.userProfile, "^\\s*export\\s*GAUSS_ENV=.*$") + self.logger.debug("Clean envrionment variable successfully.") + + def cleanPath(self): + """ + function: clean path + input: NA + output: NA + """ + self.logger.debug("Begin clean path") + if os.path.exists(self.clusterInfo.appPath): + self.logger.debug("Deleting the install directory.") + cleanPath = os.path.join(self.clusterInfo.appPath, "./*") + g_file.removeDirectory(cleanPath) + self.logger.debug("Successfully deleted the install directory.") + for i in self.component: + i.cleanPath() + gsdbHomePath = "/home/%s/gsdb_home" % self.user + if os.path.exists(gsdbHomePath): + self.logger.debug("Deleting the gsdb home path.") + g_file.removeDirectory(gsdbHomePath) + self.logger.debug("Successfully deleted the gsdb home path.") + self.logger.debug("Clean Path successfully.") + + def run(self): + try: + self.parseCommandLine() + self.checkParameter() + self.initGlobals() + except Exception as e: + GaussLog.exitWithError(str(e)) + + try: + if (self.action == ACTION_CLEAN_SYSLOG_CONFIG): + self.cleanWarningConfig() + elif (self.action == ACTION_CLEAN_TOOL_ENV): + self.cleanEnvSoftware() + elif (self.action == ACTION_CHECK_UNPREINSTALL): + self.checkUnPreInstall() + elif (self.action == ACTION_CLEAN_GAUSS_ENV): + self.cleanGaussEnv() + elif (self.action == ACTION_DELETE_GROUP): + self.cleanGroup() + elif (self.action == ACTION_CLEAN_DEPENDENCY): + self.cleanScript() + elif (self.action == ACTION_CLEAN_ENV): + self.cleanEnv() + elif (self.action == ACTION_CLEAN_INSTANCE_PATHS): + self.cleanPath() + else: + self.logger.logExit( + ErrorCode.GAUSS_500["GAUSS_50000"] % self.action) + except Exception as e: + self.logger.logExit(str(e)) + + +if __name__ == '__main__': + """ + main function + """ + try: + postUninstallUtility = Postuninstall() + postUninstallUtility.run() + except Exception as e: + GaussLog.exitWithError(str(e)) + sys.exit(0) diff --git a/script/local/Uninstall.py b/script/local/Uninstall.py new file mode 100644 index 0000000..3409339 --- /dev/null +++ b/script/local/Uninstall.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Uninstall.py is a utility to uninstall Gauss MPP Database. +############################################################################# + +import getopt +import os +import sys +import re + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.LocalBaseOM import LocalBaseOM +from gspylib.os.gsfile import g_file +from gspylib.os.gsOSlib import g_OSlib + + +class Uninstall(LocalBaseOM): + """ + uninstall the cluster + """ + + def __init__(self): + """ + Constructor + """ + super(Uninstall, self).__init__() + self.installPath = "" + self.user = "" + self.keepDir = False + self.mpprcFile = "" + self.logFile = "" + self.logger = None + self.installflag = False + self.clusterInfo = None + self.localNode = None + self.keepData = True + self.method = "" + self.action = "" + + ########################################################################## + # Help context. U:R:oC:v: + ########################################################################## + def usage(self): + """ + function: usage + """ + print("Uninstall.py is a utility to uninstall Gauss MPP Database.") + print(" ") + print("Usage:") + print(" python3 Uninstall.py --help") + print(" python3 Uninstall.py -U user -R installpath [-c] [-l log]") + print(" ") + print("Common options:") + print(" -U the database program and cluster owner") + print(" -R the database program install path") + print(" -l the log path") + print(" --help show this help, then exit") + print(" ") + + ########################################################################## + # This is the main uninstall flow. + ########################################################################## + def uninstall(self): + """ + function: Remove install path content, which depend on $GAUSSHOME + input : NA + output: NA + """ + try: + self.logger.debug("OLAP's local uninstall.") + self.__cleanMonitor() + self.__cleanInstallProgram() + self.__changeuserEnv() + self.logger.closeLog() + except Exception as e: + raise Exception(str(e)) + + def __changeuserEnv(self): + """ + function: Change user GAUSS_ENV + input : NA + output: NA + """ + # clean os user environment variable + self.logger.log("Modifying user's environmental variable $GAUSS_ENV.") + userProfile = self.mpprcFile + DefaultValue.updateUserEnvVariable(userProfile, "GAUSS_ENV", "1") + if "HOST_IP" in os.environ.keys(): + g_file.deleteLine(userProfile, "^\\s*export\\s*WHITELIST_ENV=.*$") + self.logger.log("Successfully modified user's environmental" + " variable GAUSS_ENV.") + + self.logger.debug("Deleting symbolic link to $GAUSSHOME if exists.") + gaussHome = DefaultValue.getInstallDir(self.user) + if gaussHome == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") + if os.path.islink(gaussHome): + self.installPath = os.path.realpath(gaussHome) + os.remove(gaussHome) + else: + self.logger.debug("symbolic link does not exists.") + self.logger.debug("Deleting bin file in installation path.") + g_file.removeDirectory("%s/bin" % self.installPath) + self.logger.debug("Successfully deleting bin file in" + " installation path.") + + def __cleanMonitor(self): + """ + function: clean om_monitor process and delete cron + input : NA + output: NA + """ + self.logger.log("Deleting monitor.") + try: + # get all content by crontab command + (status, output) = g_OSlib.getAllCrontab() + # overwrit crontabFile, make it empty. + crontabFile = "%s/gauss_crontab_file_%d" \ + % (DefaultValue.getTmpDirFromEnv(), os.getpid()) + g_file.createFile(crontabFile, True) + content_CronTabFile = [output] + g_file.writeFile(crontabFile, content_CronTabFile) + g_file.deleteLine(crontabFile, "\/bin\/om_monitor") + g_OSlib.execCrontab(crontabFile) + g_file.removeFile(crontabFile) + + # clean om_monitor,cm_agent,cm_server process + for progname in ["om_monitor", "cm_agent", "cm_server"]: + g_OSlib.killallProcess(self.user, progname, '9') + except Exception as e: + if os.path.exists(crontabFile): + g_file.removeFile(crontabFile) + raise Exception(str(e)) + self.logger.log("Successfully deleted OMMonitor.") + + def checkParameters(self): + """ + function: Check input parameters + input : NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "t:U:R:l:X:M:T", + ["help", "delete-data"]) + except getopt.GetoptError as e: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(e)) + + if (len(args) > 0): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + for key, value in opts: + if (key == "-U"): + self.user = value + elif (key == "-R"): + self.installPath = value + elif (key == "-l"): + self.logFile = value + elif (key == "--help"): + self.usage() + sys.exit(0) + elif (key == "-T"): + self.installflag = True + elif key == "--delete-data": + self.keepData = False + elif key == "-M": + self.method = value + elif key == "-t": + self.action = value + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % key) + + Parameter.checkParaVaild(key, value) + + if (self.user == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'U' + ".") + + if (self.installPath == ""): + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] + % 'R' + ".") + + self.mpprcFile = DefaultValue.getMpprcFile() + if (self.logFile == ""): + self.logFile = DefaultValue.getOMLogPath( + DefaultValue.LOCAL_LOG_FILE, self.user, self.installPath) + + def __initLogger(self): + """ + function: Init logger + input : NA + output: NA + """ + self.logger = GaussLog(self.logFile, "UninstallApp") + + def __cleanInstallProgram(self): + """ + function: Clean install program + input : NA + output: NA + """ + if (not os.path.exists(self.installPath)): + self.logger.log("The installation directory does not exist. ") + return + + realLink = self.installPath + if os.path.islink(self.installPath): + realLink = os.readlink(self.installPath) + + # delete upgrade directory + self.logger.debug("Starting delete other installation directory.") + try: + recordVersionFile = os.path.realpath( + os.path.join(self.installPath, "record_app_directory")) + if os.path.isfile(recordVersionFile): + with open(recordVersionFile, 'r') as fp: + retLines = fp.readlines() + if len(retLines) != 2: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] + % recordVersionFile) + oldPath = retLines[0].strip() + newPath = retLines[1].strip() + if os.path.normcase(oldPath) == os.path.normcase(realLink): + g_file.removeDirectory(newPath) + else: + g_file.removeDirectory(oldPath) + self.logger.debug("Successfully deleted other installation" + " path need to delete.") + else: + self.logger.debug("No other installation path need" + " to delete.") + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50209"] + % "other installation" + + " Can not delete other installation" + " directory: %s." % str(e)) + + self.logger.log("Removing the installation directory.") + try: + fileList = os.listdir(self.installPath) + for fileName in fileList: + fileName = fileName.replace("/", "").replace("..", "") + filePath = os.path.join(os.path.realpath(self.installPath), + fileName) + if os.path.isfile(filePath): + os.remove(filePath) + elif os.path.isdir(filePath): + if (fileName == "bin"): + binFileList = os.listdir(filePath) + for binFile in binFileList: + fileInBinPath = os.path.join(filePath, binFile) + if os.path.isfile(fileInBinPath) and \ + binFile != "cluster_static_config": + os.remove(fileInBinPath) + elif os.path.islink(fileInBinPath): + os.remove(fileInBinPath) + elif os.path.isdir(fileInBinPath): + g_file.removeDirectory(fileInBinPath) + else: + g_file.removeDirectory(filePath) + + self.logger.debug("Remove path:%s." % filePath) + + self.logger.debug("Successfully deleted bin file" + " in installation path.") + + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50209"] + % "installation" + + " Can not delete installation directory: %s." + % str(e)) + + # regular match delete empty directory + self.logger.debug("Starting delete empty installation directory.") + try: + removeflag = False + namePrefix = os.path.basename(self.installPath) + gaussPath = os.path.realpath(os.path.dirname(self.installPath)) + curInstallName = os.path.basename(realLink) + fileList = os.listdir(gaussPath) + for fileName in fileList: + if fileName.strip() != curInstallName.strip(): + filePath = os.path.join(os.path.realpath(gaussPath), + fileName) + if os.path.isdir(filePath) \ + and not os.listdir(filePath) and "_" in fileName: + fileNameElement = fileName.split("_", 1) + if namePrefix.strip() == fileNameElement[0].strip(): + res = re.search( + '^(?![0-9]+$)(?![a-zA-Z]+$)[0-9A-Za-z]{8}$', + fileNameElement[1].strip()) + if res: + removeflag = True + g_file.removeDirectory(filePath) + if removeflag: + self.logger.debug("Successfully deleted empty" + " installation path.") + else: + self.logger.debug("No empty installation path need" + " to delete.") + except Exception as e: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50209"] + % "other installation" + + " Can not delete empty installation" + " directory: %s." % str(e)) + + self.logger.log("Successfully deleted installation directory.") + + def init(self): + """ + function: constuctor + """ + self.__initLogger() + + +if __name__ == '__main__': + """ + main function + """ + try: + uninstaller = Uninstall() + uninstaller.checkParameters() + uninstaller.init() + uninstaller.uninstall() + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0) diff --git a/script/local/UpgradeUtility.py b/script/local/UpgradeUtility.py new file mode 100644 index 0000000..ba646db --- /dev/null +++ b/script/local/UpgradeUtility.py @@ -0,0 +1,4105 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : +# UpgradeUtility.py is a utility to execute upgrade on each local node +############################################################################# + +import getopt +import sys +import os +import subprocess +import pwd +import re +import time +import timeit +import traceback +import json +import platform +import shutil +import copy +import csv +import fcntl +from multiprocessing.dummy import Pool as ThreadPool + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue, ClusterCommand, \ + ClusterInstanceConfig +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.DbClusterStatus import DbClusterStatus +from gspylib.os.gsfile import g_file +import impl.upgrade.UpgradeConst as const + +INSTANCE_TYPE_UNDEFINED = -1 +MASTER_INSTANCE = 0 +STANDBY_INSTANCE = 1 +DUMMY_STANDBY_INSTANCE = 2 +# init value +INSTANCE_ROLE_UNDEFINED = -1 +# cn +INSTANCE_ROLE_COODINATOR = 3 +# dn +INSTANCE_ROLE_DATANODE = 4 + +BINARY_UPGRADE_TMP = "binary_upgrade" +PG_LOCATION = "pg_location" +CFDUMPPREFIX = "cfdump" + +# Global parameter +g_oldVersionModules = None +g_clusterInfo = None +g_oldClusterInfo = None +g_logger = None +g_dbNode = None +g_opts = None +g_DWS_mode = False +g_gausshome = None + + +class CmdOptions(): + """ + Class to define some cmd options + """ + + def __init__(self): + """ + function: constructor + """ + # action value + self.action = "" + # user value + self.user = "" + # app install path + self.appPath = "" + # env file + self.mpprcFile = "" + self.userProfile = "" + # log file + self.logFile = "" + # backup path + self.bakPath = "" + # old cluster version + self.oldVersion = "" + # xml file + self.xmlFile = "" + # inplace upgrade bak path or grey upgrade path + self.upgrade_bak_path = "" + self.scriptType = "" + self.rollback = False + self.forceRollback = False + self.rolling = False + self.oldClusterAppPath = "" + self.newClusterAppPath = "" + self.gucStr = "" + self.oldclusternum = "" + self.postgisSOFileList = \ + {"postgis-*.*.so": "lib/postgresql/", + "libgeos_c.so.*": "lib/", + "libproj.so.*": "lib/", + "libjson-c.so.*": "lib/", + "libgeos-*.*.*so": "lib/", + "postgis--*.*.*.sql": "share/postgresql/extension/", + "postgis.control": "share/postgresql/extension/", + "pgsql2shp": "bin/", + "shp2pgsql": "bin/", + "libgcc_s.so.*": "lib/", + "libstdc++.so.*": "lib/"} + + +class OldVersionModules(): + """ + Class for providing some functions to apply old version cluster + """ + def __init__(self): + """ + function: constructor + """ + # old cluster information module + self.oldDbClusterInfoModule = None + # old cluster status module + self.oldDbClusterStatusModule = None + + +def importOldVersionModules(): + """ + function: import some needed modules from the old cluster. + currently needed are: DbClusterInfo + input: NA + output:NA + """ + # get install directory by user name + installDir = DefaultValue.getInstallDir(g_opts.user) + if installDir == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_503["GAUSS_50308"] + " User: %s." % g_opts.user) + # import DbClusterInfo module + global g_oldVersionModules + g_oldVersionModules = OldVersionModules() + sys.path.append("%s/bin/script/util" % installDir) + g_oldVersionModules.oldDbClusterInfoModule = __import__('DbClusterInfo') + + +def initGlobals(): + """ + function: init global variables + input: NA + output: NA + """ + global g_oldVersionModules + global g_clusterInfo + global g_oldClusterInfo + global g_logger + global g_dbNode + # make sure which env file we use + g_opts.userProfile = g_opts.mpprcFile + + # init g_logger + g_logger = GaussLog(g_opts.logFile, g_opts.action) + + if g_opts.action in [const.ACTION_RESTORE_CONFIG, + const.ACTION_SWITCH_BIN, + const.ACTION_GREY_UPGRADE_CONFIG_SYNC, + const.ACTION_CLEAN_INSTALL_PATH, + const.ACTION_GREY_RESTORE_CONFIG]: + g_logger.debug( + "No need to init cluster information under action %s." + % g_opts.action) + return + # init g_clusterInfo + # not all action need init g_clusterInfo + try: + g_clusterInfo = dbClusterInfo() + if g_opts.xmlFile == "" or not os.path.exists(g_opts.xmlFile): + g_clusterInfo.initFromStaticConfig(g_opts.user) + else: + g_clusterInfo.initFromXml(g_opts.xmlFile) + except Exception as e: + g_logger.debug(traceback.format_exc()) + g_logger.error(str(e)) + # init cluster info from install path failed + # try to do it from backup path again + g_opts.bakPath = DefaultValue.getTmpDirFromEnv() + "/" + staticConfigFile = "%s/cluster_static_config" % g_opts.bakPath + + if os.path.isfile(staticConfigFile): + try: + # import old module + g_oldVersionModules = OldVersionModules() + sys.path.append(os.path.dirname(g_opts.bakPath)) + g_oldVersionModules.oldDbClusterInfoModule = __import__( + 'OldDbClusterInfo') + # init old cluster config + g_clusterInfo = \ + g_oldVersionModules.oldDbClusterInfoModule.dbClusterInfo() + g_clusterInfo.initFromStaticConfig(g_opts.user, + staticConfigFile) + except Exception as e: + g_logger.error(str(e)) + # maybe the old cluster is V1R5C00 TR5 version, + # not support specify static config file + # path for initFromStaticConfig function, + # so use new cluster format try again + try: + g_clusterInfo = dbClusterInfo() + g_clusterInfo.initFromStaticConfig(g_opts.user, + staticConfigFile) + except Exception as e: + g_logger.error(str(e)) + try: + # import old module + importOldVersionModules() + # init old cluster config + g_clusterInfo = \ + g_oldVersionModules \ + .oldDbClusterInfoModule.dbClusterInfo() + g_clusterInfo.initFromStaticConfig(g_opts.user) + except Exception as e: + raise Exception(str(e)) + elif g_opts.xmlFile and os.path.exists(g_opts.xmlFile): + try: + sys.path.append(sys.path[0] + "/../../gspylib/common") + curDbClusterInfoModule = __import__('DbClusterInfo') + g_clusterInfo = curDbClusterInfoModule.dbClusterInfo() + g_clusterInfo.initFromXml(g_opts.xmlFile) + except Exception as e: + raise Exception(str(e)) + else: + try: + # import old module + importOldVersionModules() + # init old cluster config + g_clusterInfo = \ + g_oldVersionModules.oldDbClusterInfoModule.dbClusterInfo() + g_clusterInfo.initFromStaticConfig(g_opts.user) + except Exception as e: + raise Exception(str(e)) + + # init g_dbNode + localHost = DefaultValue.GetHostIpOrName() + g_dbNode = g_clusterInfo.getDbNodeByName(localHost) + if g_dbNode is None: + raise Exception( + ErrorCode.GAUSS_512["GAUSS_51209"] % ("NODE", localHost)) + + +def usage(): + """ +Usage: + python3 UpgradeUtility.py -t action [-U user] [-R path] [-l log] + +Common options: + -t the type of action + -U the user of old cluster + -R the install path of cluster + -l the path of log file + -V original Version + -X the xml configure file + --help show this help, then exit + --upgrade_bak_path always be the $PGHOST/binary_upgrade + --scriptType upgrade script type + --old_cluster_app_path absolute path with old commit id + --new_cluster_app_path absolute path with new commit id + --rollback is rollback + --guc_string check the guc string has been successfully + --oldcluster_num old cluster number + --rolling is rolling upgrade or rollback + wrote in the configure file, format is guc:value, + can only check upgrade_from, upgrade_mode + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: Parse command line and save to global variables + input: NA + output: NA + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "t:U:R:l:V:X:", + ["help", "upgrade_bak_path=", + "script_type=", "old_cluster_app_path=", + "new_cluster_app_path=", "rollback", + "force", "guc_string=", "oldcluster_num=", + "rolling"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if len(args) > 0: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0])) + + for (key, value) in opts: + if key == "--help": + usage() + sys.exit(0) + elif key == "-t": + g_opts.action = value + elif key == "-U": + g_opts.user = value + elif key == "-R": + g_opts.appPath = value + elif key == "-l": + g_opts.logFile = os.path.realpath(value) + elif key == "-V": + g_opts.oldVersion = value + elif key == "-X": + g_opts.xmlFile = os.path.realpath(value) + elif key == "--upgrade_bak_path": + g_opts.upgrade_bak_path = os.path.normpath(value) + elif key == "--script_type": + g_opts.scriptType = os.path.normpath(value) + elif key == "--old_cluster_app_path": + g_opts.oldClusterAppPath = os.path.normpath(value) + elif key == "--new_cluster_app_path": + g_opts.newClusterAppPath = os.path.normpath(value) + elif key == "--rollback": + g_opts.rollback = True + elif key == "--rolling": + g_opts.rolling = True + elif key == "--force": + g_opts.forceRollback = True + elif key == "--guc_string": + g_opts.gucStr = value + elif key == "--oldcluster_num": + g_opts.oldclusternum = value + else: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % key) + + Parameter.checkParaVaild(key, value) + + +def checkParameter(): + """ + function: check parameter for different action + input: NA + output: NA + """ + # check mpprc file path + g_opts.mpprcFile = DefaultValue.getMpprcFile() + # the value of "-t" can not be "" + if g_opts.action == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "t" + ".") + + # check the value of "-t" + if g_opts.action in [const.ACTION_SWITCH_PROCESS, + const.ACTION_COPY_CERTS, + const.ACTION_GREY_UPGRADE_CONFIG_SYNC, + const.ACTION_SWITCH_DN, + const.ACTION_GREY_RESTORE_CONFIG] and \ + (not g_opts.newClusterAppPath or not g_opts.oldClusterAppPath): + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] + % "-new_cluster_app_path and --old_cluster_app_path") + elif g_opts.action in \ + [const.ACTION_SYNC_CONFIG, + const.ACTION_RESTORE_CONFIG] and not g_opts.newClusterAppPath: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % "-new_cluster_app_path") + elif g_opts.action in \ + [const.ACTION_SWITCH_BIN, + const.ACTION_CLEAN_INSTALL_PATH] and not g_opts.appPath: + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "R") + elif g_opts.action in [ + const.ACTION_GREY_SYNC_GUC, const.ACTION_UPGRADE_SQL_FOLDER] and\ + not g_opts.upgrade_bak_path: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50001"] % "-upgrade_bak_path") + elif g_opts.action in [const.ACTION_GREY_RESTORE_GUC] and\ + not g_opts.oldClusterAppPath: + raise Exception( + ErrorCode.GAUSS_500["GAUSS_50001"] % "-old_cluster_app_path") + # Check the incoming parameter -U + if g_opts.user == "": + g_opts.user = pwd.getpwuid(os.getuid()).pw_name + # Check the incoming parameter -l + if g_opts.logFile == "": + g_opts.logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE, + g_opts.user, "") + + global g_gausshome + g_gausshome = DefaultValue.getInstallDir(g_opts.user) + if g_gausshome == "": + GaussLog.exitWithError( + ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") + g_gausshome = os.path.normpath(g_gausshome) + + +def switchBin(): + """ + function: switch link bin from old to new + input : NA + output : NA + """ + if g_opts.forceRollback: + if not os.path.exists(g_opts.appPath): + g_file.createDirectory(g_opts.appPath, True, + DefaultValue.KEY_DIRECTORY_MODE) + g_logger.log("Switch to %s." % g_opts.appPath) + if g_opts.appPath == g_gausshome: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50233"] % ( + "install path", "$GAUSSHOME")) + if os.path.exists(g_gausshome): + if os.path.samefile(g_opts.appPath, g_gausshome): + g_logger.log( + "$GAUSSHOME points to %s. No need to switch." % g_opts.appPath) + cmd = "ln -snf %s %s" % (g_opts.appPath, g_gausshome) + g_logger.log("Command for switching binary directory: '%s'." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception( + ErrorCode.GAUSS_508["GAUSS_50803"] + " Error: \n%s" % str(output)) + + +def readPostgresqlConfig(filePath): + """ + function: read postgres sql config + input filepath + output gucParamDict + """ + GUC_PARAM_PATTERN = "^\\s*.*=.*$" + pattern = re.compile(GUC_PARAM_PATTERN) + gucParamDict = {} + try: + with open(filePath, 'r') as fp: + resList = fp.readlines() + for oneLine in resList: + # skip blank line + if oneLine.strip() == "": + continue + # skip comment line + if (oneLine.strip()).startswith('#'): + continue + # search valid line + result = pattern.match(oneLine) + if result is not None: + paramAndValue = oneLine + # remove comment if eixst + pos = oneLine.find(' #') + if pos >= 0: + paramAndValue = oneLine[:pos] + # should use tab here + pos = oneLine.find('\t#') + if pos >= 0: + paramAndValue = oneLine[:pos] + # if the value contain "$" , + # we should using "\\\\\\$" to instead of it + resList = paramAndValue.split('=') + if len(resList) == 2: + param = resList[0] + value = resList[1].replace("$", "\\\\\\$") + gucParamDict[param.strip()] = value.strip() + elif len(resList) > 2: + # invalid line, skip it + # only support replconninfo1, replconninfo2 + if not resList[0].strip().startswith("replconninfo"): + continue + pos = paramAndValue.find('=') + param = paramAndValue[:pos] + value = paramAndValue[pos + 1:].replace("$", "\\\\\\$") + gucParamDict[param.strip()] = value.strip() + else: + continue + except Exception as e: + g_logger.debug(str(e)) + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50204"] % "postgressql.conf file") + + return gucParamDict + + +def syncPostgresqlconf(dbInstance): + """ + function: syncPostgresqlconf during inplace upgrade + input: dbInstance + output: NA + """ + # get config info of current node + try: + # get guc param info from old cluster + gucCmd = "source %s" % g_opts.userProfile + oldPostgresConf = "%s/postgresql.conf" % dbInstance.datadir + gucParamDict = readPostgresqlConfig(oldPostgresConf) + + synchronousStandbyNames = "" + # synchronous_standby_names only can be set by write file + if "synchronous_standby_names" in gucParamDict.keys(): + synchronousStandbyNames = gucParamDict["synchronous_standby_names"] + del gucParamDict["synchronous_standby_names"] + + # internal parameters are not supported. So skip them when do gs_guc + internalGucList = ['block_size', 'current_logic_cluster', + 'integer_datetimes', 'lc_collate', + 'lc_ctype', 'max_function_args', + 'max_identifier_length', 'max_index_keys', + 'node_group_mode', 'segment_size', + 'server_encoding', 'server_version', + 'server_version_num', 'sql_compatibility', + 'wal_block_size', 'wal_segment_size', 'enable_beta_nestloop_fusion', + 'enable_upsert_to_merge', 'gs_clean_timeout', 'force_parallel_mode', + 'max_background_workers', 'max_parallel_workers_per_gather', + 'min_parallel_table_scan_size', 'pagewriter_threshold', + 'parallel_leader_participation', 'parallel_setup_cost', + 'parallel_tuple_cost', 'parctl_min_cost', 'tcp_recv_timeout', + 'transaction_sync_naptime', 'transaction_sync_timeout', + 'twophase_clean_workers', 'wal_compression'] + for gucName in internalGucList: + if gucName in gucParamDict.keys(): + del gucParamDict[gucName] + + if dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + # rebuild replconninfo + connInfo1 = None + dummyStandbyInst = None + peerInsts = g_clusterInfo.getPeerInstance(dbInstance) + if len(peerInsts) > 0: + (connInfo1, _) = ClusterInstanceConfig.\ + setReplConninfoForSinglePrimaryMultiStandbyCluster( + dbInstance, peerInsts, g_clusterInfo) + for i in range(len(connInfo1)): + connInfo = "replconninfo" + "%d" % (i + 1) + gucParamDict[connInfo] = "'%s'" % connInfo1[i] + + if len(gucParamDict) > 0: + gucStr = "" + for key, value in gucParamDict.items(): + gucStr += " -c \\\"%s=%s\\\" " % (key, value) + gucCmd += "&& gs_guc set -D %s %s" % (dbInstance.datadir, gucStr) + + # set guc parameters about DummpyStandbyConfig at DN + if dbInstance.instanceType == DUMMY_STANDBY_INSTANCE: + gucstr = "" + for entry in DefaultValue.getPrivateGucParamList().items(): + gucstr += " -c \"%s=%s\"" % (entry[0], entry[1]) + gucCmd += "&& gs_guc set -D %s %s " % (dbInstance.datadir, gucstr) + + g_logger.debug("Command for setting [%s] guc parameter:%s" % ( + dbInstance.datadir, gucCmd)) + + # save guc parameter to temp file + gucTempFile = "%s/setGucParam_%s.sh" % ( + g_opts.upgrade_bak_path, dbInstance.instanceId) + # Do not modify the write file operation. + # Escape processing of special characters in the content + cmd = "echo \"%s\" > %s" % (gucCmd, gucTempFile) + (status, output) = DefaultValue.retryGetstatusoutput(cmd) + if status != 0: + g_logger.debug("Command: %s. Error: \n%s" % (cmd, output)) + g_logger.logExit( + ErrorCode.GAUSS_502["GAUSS_50205"] % gucTempFile + + " Error: \n%s" % str( + output)) + g_file.changeOwner(g_opts.user, gucTempFile) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, gucTempFile) + + # replace old guc file with sample file + newPostgresConf = "%s/share/postgresql/postgresql.conf.sample" \ + % g_opts.newClusterAppPath + if os.path.exists(newPostgresConf): + g_file.cpFile(newPostgresConf, oldPostgresConf) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, oldPostgresConf) + + # set guc param + cmd = "sh %s" % gucTempFile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + g_logger.debug("Command: %s. Error: \n%s" % (cmd, output)) + g_logger.logExit( + ErrorCode.GAUSS_514["GAUSS_51401"] % gucTempFile[:-3] + + " Error: \n%s" % str(output)) + + if synchronousStandbyNames != "": + g_logger.debug( + "Set the GUC value %s to synchronous_standby_names for %s" % ( + synchronousStandbyNames, oldPostgresConf)) + g_file.deleteLine(oldPostgresConf, + "^\\s*synchronous_standby_names\\s*=.*$") + g_file.writeFile( + oldPostgresConf, + ["synchronous_standby_names " + "= %s # standby servers that provide sync rep" + % synchronousStandbyNames]) + + # clean temp file + if os.path.isfile(gucTempFile): + os.remove(gucTempFile) + + except Exception as e: + g_logger.logExit(str(e)) + + +def syncClusterConfig(): + """ + function: sync newly added guc during upgrade, + for now we only sync CN/DN, gtm, cm_agent and cm_server + input: NA + output: NA + """ + DnInstances = g_dbNode.datanodes + if len(DnInstances) > 0: + try: + # sync postgresql.conf in parallel + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(syncPostgresqlconf, DnInstances) + pool.close() + pool.join() + except Exception as e: + g_logger.logExit(str(e)) + + +def syncInstanceConfig(oldCmFile, newCmFile): + """ + function: sync instance config + input: NA + output:NA + """ + oldCmConfig = {} + newCmConfig = {} + newConfigItem = {} + try: + if not os.path.exists(oldCmFile): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] % oldCmFile) + if not os.path.exists(newCmFile): + g_logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"] % newCmFile) + # Read and save old config file + with open(oldCmFile, 'r') as fp: + oldConfig = fp + for eachLine in oldConfig: + ParameterConfig = eachLine.strip() + index = ParameterConfig.find("=") + if index > 0 and ParameterConfig[0] != "#": + key = ParameterConfig[:index].strip() + value = ParameterConfig[index + 1:].strip() + oldCmConfig[key] = value + # Read and save new config file + with open(newCmFile, 'r') as fp: + newConfig = fp + for eachLine in newConfig: + ParameterConfig = eachLine.strip() + index = ParameterConfig.find("=") + if index > 0 and ParameterConfig[0] != "#": + key = ParameterConfig[:index].strip() + value = ParameterConfig[index + 1:].strip() + newCmConfig[key] = value + + # Filter new configuration parameters + for newConfig in newCmConfig.keys(): + keyExist = False + for oldConfig in oldCmConfig.keys(): + if oldConfig == newConfig: + keyExist = True + break + if not keyExist: + newConfigItem[newConfig] = newCmConfig[newConfig] + # Write new config item to old config file + if len(newConfigItem) > 0: + with open(oldCmFile, "a") as fp: + for ConfigItem in newConfigItem.keys(): + fp.write("\n%s = %s" % (ConfigItem, + newConfigItem[ConfigItem])) + fp.write("\n") + fp.flush() + + except Exception as e: + g_logger.logExit(str(e)) + + +def touchInstanceInitFile(): + """ + function: touch upgrade init file for every primary and standby instance + input: NA + output: NA + """ + g_logger.log("Touch init file.") + try: + InstanceList = [] + # find all DB instances need to touch + if len(g_dbNode.datanodes) != 0: + for eachInstance in g_dbNode.datanodes: + if (eachInstance.instanceType == MASTER_INSTANCE + or eachInstance.instanceType == STANDBY_INSTANCE): + InstanceList.append(eachInstance) + + # touch each instance parallelly + if len(InstanceList) != 0: + pool = ThreadPool(len(InstanceList)) + pool.map(touchOneInstanceInitFile, InstanceList) + pool.close() + pool.join() + else: + g_logger.debug( + "No instance found on this node, nothing need to do.") + return + + g_logger.log( + "Successfully created all instances init file on this node.") + except Exception as e: + g_logger.logExit(str(e)) + + +def initDbInfo(): + """ + function: create a init dbInfo dict + input: NA + output: NA + """ + tmpDbInfo = {} + tmpDbInfo['dbname'] = "" + tmpDbInfo['dboid'] = -1 + tmpDbInfo['spclocation'] = "" + tmpDbInfo['CatalogList'] = [] + tmpDbInfo['CatalogNum'] = 0 + return tmpDbInfo + + +def initCatalogInfo(): + """ + function: create a init catalog dict + input: NA + output: NA + """ + tmpCatalogInfo = {} + tmpCatalogInfo['relname'] = "" + tmpCatalogInfo['oid'] = -1 + tmpCatalogInfo['relfilenode'] = -1 + + return tmpCatalogInfo + + +def cpDirectory(srcDir, destDir): + """ + function: copy directory + input : NA + output : NA + """ + cmd = "rm -rf '%s' && cp -r -p '%s' '%s'" % (destDir, srcDir, destDir) + g_logger.debug("Backup commad:[%s]." % cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output) + + +def touchOneInstanceInitFile(instance): + """ + function: touch upgrade init file for this instance + input: NA + output: NA + """ + g_logger.debug( + "Touch instance init file. Instance data dir: %s" % instance.datadir) + dbInfoDict = {} + dbInfoDict["dblist"] = [] + dbInfoDict["dbnum"] = 0 + try: + # we touch init file by executing a simple query for every database + get_db_list_sql = """ + SELECT d.datname, d.oid, pg_catalog.pg_tablespace_location(t.oid) + AS spclocation + FROM pg_catalog.pg_database d + LEFT OUTER JOIN pg_catalog.pg_tablespace t + ON d.dattablespace = t.oid + ORDER BY 2;""" + g_logger.debug("Get database info command: \n%s" % get_db_list_sql) + (status, output) = ClusterCommand.execSQLCommand(get_db_list_sql, + g_opts.user, "", + instance.port, + "postgres", False, + "-m", + IsInplaceUpgrade=True) + if status != 0: + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] % get_db_list_sql + + " Error:\n%s" % output) + if output == "": + raise Exception(ErrorCode.GAUSS_529["GAUSS_52938"] + % "any database!!") + g_logger.debug("Get database info result: \n%s." % output) + resList = output.split('\n') + for each_line in resList: + tmpDbInfo = initDbInfo() + (datname, oid, spclocation) = each_line.split('|') + tmpDbInfo['dbname'] = datname.strip() + tmpDbInfo['dboid'] = oid.strip() + tmpDbInfo['spclocation'] = spclocation.strip() + dbInfoDict["dblist"].append(tmpDbInfo) + dbInfoDict["dbnum"] += 1 + + # connect each database, run a simple query + touch_sql = "SELECT 1;" + for each_db in dbInfoDict["dblist"]: + (status, output) = ClusterCommand.execSQLCommand( + touch_sql, + g_opts.user, "", + instance.port, + each_db["dbname"], + False, "-m", + IsInplaceUpgrade=True) + if status != 0 or not output.isdigit(): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] % touch_sql + + " Error:\n%s" % output) + + except Exception as e: + raise Exception(str(e)) + + g_logger.debug( + "Successfully created instance init file. Instance data dir: %s" + % instance.datadir) + + +def getInstanceName(instance): + """ + get master instance name + """ + instance_name = "" + if instance.instanceRole == INSTANCE_ROLE_COODINATOR: + instance_name = "cn_%s" % instance.instanceId + elif instance.instanceRole == INSTANCE_ROLE_DATANODE: + if g_clusterInfo.isSingleInstCluster(): + # the instance type must be master or standby dn + peerInsts = g_clusterInfo.getPeerInstance(instance) + (instance_name, masterInst, _) = \ + ClusterInstanceConfig.\ + getInstanceInfoForSinglePrimaryMultiStandbyCluster( + instance, peerInsts) + else: + # if dn, it should be master or standby dn + if instance.instanceType == DUMMY_STANDBY_INSTANCE: + raise Exception( + "Invalid instance type:%s" % instance.instanceType) + peerInsts = g_clusterInfo.getPeerInstance(instance) + if len(peerInsts) != 2 and len(peerInsts) != 1: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "peer") + for i in range(len(peerInsts)): + if peerInsts[i].instanceType == MASTER_INSTANCE: + masterInst = peerInsts[i] + standbyInst = instance + instance_name = "dn_%d_%d" % (masterInst.instanceId, + standbyInst.instanceId) + elif peerInsts[i].instanceType == STANDBY_INSTANCE: + standbyInst = peerInsts[i] + masterInst = instance + instance_name = "dn_%d_%d" % (masterInst.instanceId, + standbyInst.instanceId) + else: + # we are searching master or standby dn instance, + # if dummy dn, just continue + continue + if instance_name == "": + raise Exception("Can not get instance name!") + else: + raise Exception("Invalid node type:%s" % instance.instanceRole) + + return instance_name.strip() + + +def getStandbyInstance(instance): + """ + function: get standby instance of input master instance + input: NA + output: NA + """ + if instance.instanceType != MASTER_INSTANCE: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52940"] + % instance.instanceType) + + if instance.instanceRole != INSTANCE_ROLE_DATANODE: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52941"] % + instance.instanceRole) + + peerInsts = g_clusterInfo.getPeerInstance(instance) + if len(peerInsts) == 0: + return + standbyInst = None + for i in iter(peerInsts): + if i.instanceType == STANDBY_INSTANCE: + standbyInst = i + if not standbyInst: + raise Exception( + "Can not find standby instance of instance [%s]!" + % instance.datadir) + + return standbyInst + + +def getJsonFile(instance, backup_path): + """ + function: get json file + input : instance, backup_path + output : db_and_catalog_info_file_name: str + """ + try: + instance_name = getInstanceName(instance) + # load db and catalog info from json file + if instance.instanceRole == INSTANCE_ROLE_COODINATOR: + db_and_catalog_info_file_name = \ + "%s/cn_db_and_catalog_info_%s.json" % ( + backup_path, instance_name) + elif instance.instanceRole == INSTANCE_ROLE_DATANODE: + if instance.instanceType == MASTER_INSTANCE or\ + instance.instanceType == STANDBY_INSTANCE: + db_and_catalog_info_file_name = \ + "%s/dn_db_and_catalog_info_%s.json" % ( + backup_path, instance_name) + else: + raise Exception( + "Invalid instance type:%s" % instance.instanceType) + else: + raise Exception("Invalid instance role:%s" % instance.instanceRole) + return db_and_catalog_info_file_name + except Exception as e: + raise Exception(str(e)) + + +def __backup_base_folder(instance): + """ + """ + g_logger.debug("Backup instance catalog physical files. " + "Instance data dir: %s" % instance.datadir) + + backup_path = "%s/oldClusterDBAndRel/" % g_opts.upgrade_bak_path + db_and_catalog_info_file_name = getJsonFile(instance, backup_path) + + fp = open(db_and_catalog_info_file_name, 'r') + dbInfoStr = fp.read() + fp.close() + dbInfoDict = json.loads(dbInfoStr) + + # get instance name + instance_name = getInstanceName(instance) + + # backup base folder + for each_db in dbInfoDict["dblist"]: + if each_db["spclocation"] != "": + if each_db["spclocation"].startswith('/'): + tbsBaseDir = each_db["spclocation"] + else: + tbsBaseDir = "%s/pg_location/%s" % (instance.datadir, + each_db["spclocation"]) + pg_catalog_base_dir = "%s/%s_%s/%d" % ( + tbsBaseDir, DefaultValue.TABLESPACE_VERSION_DIRECTORY, + instance_name, int(each_db["dboid"])) + else: + pg_catalog_base_dir = "%s/base/%d" % (instance.datadir, + int(each_db["dboid"])) + # for base folder, template0 need handle specially + if each_db["dbname"] == 'template0': + pg_catalog_base_back_dir = "%s_bak" % pg_catalog_base_dir + cpDirectory(pg_catalog_base_dir, pg_catalog_base_back_dir) + g_logger.debug( + "Template0 has been backed up from {0} to {1}".format( + pg_catalog_base_dir, pg_catalog_base_back_dir)) + continue + + # handle other db's base folder + if len(each_db["CatalogList"]) <= 0: + raise Exception( + "Can not find any catalog in database %s" % each_db["dbname"]) + for each_catalog in each_db["CatalogList"]: + # main/vm/fsm -- main.1 .. + main_file = "%s/%d" % ( + pg_catalog_base_dir, int(each_catalog['relfilenode'])) + if not os.path.isfile(main_file): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % main_file) + cmd = "cp -f -p '%s' '%s_bak'" % (main_file, main_file) + g_logger.debug( + "{0} needs to be backed up to {0}_bak".format(main_file)) + seg_idx = 1 + while 1: + seg_file = "%s/%d.%d" % (pg_catalog_base_dir, + int(each_catalog['relfilenode']), + seg_idx) + if os.path.isfile(seg_file): + cmd += "&& cp -f -p '%s' '%s_bak'" % (seg_file, seg_file) + seg_idx += 1 + else: + break + g_logger.debug("seg_file needs to be backed up") + vm_file = "%s/%d_vm" % (pg_catalog_base_dir, + int(each_catalog['relfilenode'])) + if os.path.isfile(vm_file): + cmd += "&& cp -f -p '%s' '%s_bak'" % (vm_file, vm_file) + g_logger.debug( + "{0} needs to be backed up to {0}_bak".format(vm_file)) + fsm_file = "%s/%d_fsm" % (pg_catalog_base_dir, + int(each_catalog['relfilenode'])) + if os.path.isfile(fsm_file): + cmd += "&& cp -f -p '%s' '%s_bak'" % (fsm_file, fsm_file) + g_logger.debug( + "{0} needs to be backed up to {0}_bak".format(fsm_file)) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + # special files pg_filenode.map pg_internal.init + cmd = "" + pg_filenode_map_file = "%s/pg_filenode.map" % pg_catalog_base_dir + if os.path.isfile(pg_filenode_map_file): + if cmd == "": + cmd = "cp -f -p '%s' '%s_bak'" % ( + pg_filenode_map_file, pg_filenode_map_file) + else: + cmd += "&& cp -f -p '%s' '%s_bak'" % ( + pg_filenode_map_file, pg_filenode_map_file) + g_logger.debug("{0} needs to be backed up to {0}_bak".format( + pg_filenode_map_file)) + pg_internal_init_file = "%s/pg_internal.init" % pg_catalog_base_dir + if os.path.isfile(pg_internal_init_file): + if cmd == "": + cmd = "cp -f -p '%s' '%s_bak'" % ( + pg_internal_init_file, pg_internal_init_file) + else: + cmd += "&& cp -f -p '%s' '%s_bak'" % ( + pg_internal_init_file, pg_internal_init_file) + g_logger.debug("{0} needs to be backed up to {0}_bak".format( + pg_internal_init_file)) + if cmd != 0: + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + g_logger.debug("Successfully backuped instance catalog physical files." + " Instance data dir: %s" % instance.datadir) + + +def __restore_base_folder(instance): + """ + """ + g_logger.debug("Restore instance base folders. " + "Instance data dir: {0}".format(instance.datadir)) + backup_path = "%s/oldClusterDBAndRel/" % g_opts.upgrade_bak_path + # get instance name + instance_name = getInstanceName(instance) + + # load db and catalog info from json file + if instance.instanceRole == INSTANCE_ROLE_COODINATOR: + db_and_catalog_info_file_name = \ + "%s/cn_db_and_catalog_info_%s.json" % (backup_path, instance_name) + elif instance.instanceRole == INSTANCE_ROLE_DATANODE: + if instance.instanceType == MASTER_INSTANCE or \ + instance.instanceType == STANDBY_INSTANCE: + db_and_catalog_info_file_name = \ + "%s/dn_db_and_catalog_info_%s.json" % ( + backup_path, instance_name) + else: + raise Exception("Invalid instance type:%s" % instance.instanceType) + else: + raise Exception("Invalid instance role:%s" % instance.instanceRole) + fp = open(db_and_catalog_info_file_name, 'r') + dbInfoStr = fp.read() + fp.close() + dbInfoDict = json.loads(dbInfoStr) + + # restore base folder + for each_db in dbInfoDict["dblist"]: + if each_db["spclocation"] != "": + if each_db["spclocation"].startswith('/'): + tbsBaseDir = each_db["spclocation"] + else: + tbsBaseDir = "%s/pg_location/%s" % ( + instance.datadir, each_db["spclocation"]) + pg_catalog_base_dir = "%s/%s_%s/%d" % ( + tbsBaseDir, DefaultValue.TABLESPACE_VERSION_DIRECTORY, + instance_name, int(each_db["dboid"])) + else: + pg_catalog_base_dir = "%s/base/%d" % ( + instance.datadir, int(each_db["dboid"])) + # for base folder, template0 need handle specially + if each_db["dbname"] == 'template0': + pg_catalog_base_back_dir = "%s_bak" % pg_catalog_base_dir + cpDirectory(pg_catalog_base_back_dir, pg_catalog_base_dir) + g_logger.debug( + "Template0 has been restored from {0} to {1}".format( + pg_catalog_base_back_dir, pg_catalog_base_dir)) + continue + + # handle other db's base folder + if len(each_db["CatalogList"]) <= 0: + raise Exception("Can not find any catalog in database %s" % + each_db["dbname"]) + + for each_catalog in each_db["CatalogList"]: + # main/vm/fsm -- main.1 .. + main_file = "%s/%d" % (pg_catalog_base_dir, + int(each_catalog['relfilenode'])) + if not os.path.isfile(main_file): + g_logger.debug("Instance data dir: %s, database: %s, " + "relnodefile: %s does not exists." \ + % (instance.datadir, each_db["dbname"], + main_file)) + + cmd = "cp -f -p '%s_bak' '%s'" % (main_file, main_file) + g_logger.debug( + "{0} needs to be restored from {0}_bak".format(main_file)) + seg_idx = 1 + while 1: + seg_file = "%s/%d.%d" % (pg_catalog_base_dir, + int(each_catalog['relfilenode']), + seg_idx) + seg_file_bak = "%s_bak" % seg_file + if os.path.isfile(seg_file): + if os.path.isfile(seg_file_bak): + cmd += "&& cp -f -p '%s' '%s'" % (seg_file_bak, + seg_file) + else: + cmd += "&& rm -f '%s'" % seg_file + seg_idx += 1 + else: + break + g_logger.debug("seg_file needs to be restored") + + vm_file = "%s/%d_vm" % (pg_catalog_base_dir, + int(each_catalog['relfilenode'])) + vm_file_bak = "%s_bak" % vm_file + if os.path.isfile(vm_file): + if os.path.isfile(vm_file_bak): + cmd += "&& cp -f -p '%s' '%s'" % (vm_file_bak, vm_file) + else: + cmd += "&& rm -f '%s'" % vm_file + g_logger.debug( + "{0} needs to be restored from {0}_bak".format(vm_file)) + fsm_file = "%s/%d_fsm" % (pg_catalog_base_dir, + int(each_catalog['relfilenode'])) + fsm_file_bak = "%s_bak" % fsm_file + if os.path.isfile(fsm_file): + if os.path.isfile(fsm_file_bak): + cmd += "&& cp -f -p '%s' '%s'" % (fsm_file_bak, fsm_file) + else: + cmd += "&& rm -f '%s'" % fsm_file + g_logger.debug("{0} needs to be restored from {0}_bak".format( + fsm_file)) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + # special files pg_filenode.map pg_internal.init + cmd = "" + pg_filenode_map_file = "%s/pg_filenode.map" % pg_catalog_base_dir + if os.path.isfile(pg_filenode_map_file): + if cmd == "": + cmd = "cp -f -p '%s_bak' '%s'" % (pg_filenode_map_file, + pg_filenode_map_file) + else: + cmd += "&& cp -f -p '%s_bak' '%s'" % (pg_filenode_map_file, + pg_filenode_map_file) + g_logger.debug("{0} needs to be restored from {0}_bak".format( + pg_filenode_map_file)) + + pg_internal_init_file = "%s/pg_internal.init" % pg_catalog_base_dir + if os.path.isfile(pg_internal_init_file): + if cmd == "": + cmd = "cp -f -p '%s_bak' '%s'" % (pg_internal_init_file, + pg_internal_init_file) + else: + cmd += "&& cp -f -p '%s_bak' '%s'" % (pg_internal_init_file, + pg_internal_init_file) + g_logger.debug("{0} needs to be restored from {0}_bak".format( + pg_internal_init_file)) + + if cmd != 0: + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + g_logger.debug("Successfully restore instance base folders. Instance data " + "dir: {0}".format(instance.datadir)) + + +def cleanBackUpDir(backupDir): + """ + function: clean backup dir + input : backupDir + output : NA + """ + # clean backupDir folder. First, we kill any pending backup process + bakDir = "%s_bak" % backupDir + backcmd = "cp -r -p %s %s" % (backupDir, bakDir) + killCmd = DefaultValue.killInstProcessCmd(backcmd, False, 9, False) + DefaultValue.execCommandLocally(killCmd) + # Then do clean + if os.path.isdir(bakDir): + g_file.removeDirectory(bakDir) + + +def checkExistsVersion(instanceNames, cooInst, curCommitid): + """ + function: check exits version + input : instanceNames, cooInst, curCommitid + output : needKill False/True + """ + needKill = False + sql = "" + for name in instanceNames: + sql += "execute direct on (%s) 'select version()';" % name + (status, output) = ClusterCommand.remoteSQLCommand( + sql, g_opts.user, + cooInst.hostname, + cooInst.port, False, + DefaultValue.DEFAULT_DB_NAME, + IsInplaceUpgrade=True) + g_logger.debug("Command to check version: %s" % sql) + if status != 0 or ClusterCommand.findErrorInSql(output): + raise Exception( + ErrorCode.GAUSS_513["GAUSS_51300"] % sql + " Error: \n%s" % str( + output)) + if not output: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51654"]) + resList = output.split('\n') + pattern = re.compile(r'[(](.*?)[)]') + for record in resList: + versionInBrackets = re.findall(pattern, record) + commitid = versionInBrackets[0].split(" ")[-1] + if commitid != curCommitid: + needKill = True + break + return needKill + + +def getTimeFormat(seconds): + """ + format secends to h-m-s + input:int + output:int + """ + seconds = int(seconds) + if seconds == 0: + return 0 + # Converts the seconds to standard time + hour = seconds / 3600 + minute = (seconds - hour * 3600) / 60 + s = seconds % 60 + resultstr = "" + if hour != 0: + resultstr += "%dh" % hour + if minute != 0: + resultstr += "%dm" % minute + return "%s%ds" % (resultstr, s) + + +def backupConfig(): + """ + function: backup config + output: none + """ + try: + bakPath = g_opts.upgrade_bak_path + clusterAppPath = g_clusterInfo.appPath + + # Backup cluster_static_config and cluster_dynamic_config, + # logic_cluster_name.txt + # cluster_static_config* at least one + cmd = "cp -f -p '%s'/bin/*cluster_static_config* '%s'" % ( + clusterAppPath, bakPath) + dynamic_config = "%s/bin/cluster_dynamic_config" % clusterAppPath + logicalNameFile = "%s/bin/logic_cluster_name.txt" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + dynamic_config, dynamic_config, bakPath) + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + logicalNameFile, logicalNameFile, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # Backup libcgroup config + MAX_PARA_NUMBER = 20 + cgroup_file_list = [] + gs_cgroup_path = "%s/etc" % clusterAppPath + file_name_list = os.listdir(gs_cgroup_path) + for file_name in file_name_list: + if file_name.endswith('.cfg'): + gs_cgroup_config_file = "%s/%s" % (gs_cgroup_path, file_name) + cgroup_file_list.append(gs_cgroup_config_file) + + # build cmd string list + # Every 20 records merged into one + i = 0 + cmdCgroup = "" + cmdList = [] + for gs_cgroup_config_file in cgroup_file_list: + i += 1 + cmdCgroup += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + gs_cgroup_config_file, gs_cgroup_config_file, bakPath) + if i % MAX_PARA_NUMBER == 0: + cmdList.append(cmdCgroup) + i = 0 + cmdCgroup = "" + if cmdCgroup != "": + cmdList.append(cmdCgroup) + for exeCmd in cmdList: + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(exeCmd[3:]) + + # Backup libsimsearch etc files and libs files + searchConfigFile = "%s/etc/searchletConfig.yaml" % clusterAppPath + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + searchConfigFile, searchConfigFile, bakPath) + searchIniFile = "%s/etc/searchServer.ini" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + searchIniFile, searchIniFile, bakPath) + cmd += " && (if [ -d '%s/lib/libsimsearch' ];" \ + "then cp -r '%s/lib/libsimsearch' '%s';fi)" % ( + clusterAppPath, clusterAppPath, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # Backup library file and database size file + cmd = "cp -r '%s'/lib/postgresql/pg_plugin '%s'" % ( + clusterAppPath, bakPath) + backup_dbsize = "%s/bin/%s" % ( + clusterAppPath, DefaultValue.DB_SIZE_FILE) + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + backup_dbsize, backup_dbsize, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # sync kerberos conf files + krbConfigFile = "%s/kerberos" % clusterAppPath + cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % ( + krbConfigFile, krbConfigFile, bakPath) + cmd += "&& (if [ -d '%s/var/krb5kdc' ];then mkdir %s/var;" \ + " cp -r '%s/var/krb5kdc' '%s/var/';fi)" % ( + clusterAppPath, bakPath, clusterAppPath, bakPath) + g_logger.debug("Grey upgrade sync command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup obsserver.key.cipher/obsserver.key.rand and server.key. + # cipher/server.key.rand and datasource.key.cipher/datasource.key.rand + OBS_cipher_key_bak_file = \ + "%s/bin/obsserver.key.cipher" % clusterAppPath + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + OBS_cipher_key_bak_file, OBS_cipher_key_bak_file, bakPath) + OBS_rand_key_bak_file = "%s/bin/obsserver.key.rand" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + OBS_rand_key_bak_file, OBS_rand_key_bak_file, bakPath) + trans_encrypt_cipher_key_bak_file = \ + "%s/bin/trans_encrypt.key.cipher" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + trans_encrypt_cipher_key_bak_file, + trans_encrypt_cipher_key_bak_file, + bakPath) + trans_encrypt_rand_key_bak_file = \ + "%s/bin/trans_encrypt.key.rand" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + trans_encrypt_rand_key_bak_file, trans_encrypt_rand_key_bak_file, + bakPath) + trans_encrypt_cipher_ak_sk_key_bak_file = \ + "%s/bin/trans_encrypt_ak_sk.key" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + trans_encrypt_cipher_ak_sk_key_bak_file, + trans_encrypt_cipher_ak_sk_key_bak_file, bakPath) + server_cipher_key_bak_file = \ + "%s/bin/server.key.cipher" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + server_cipher_key_bak_file, server_cipher_key_bak_file, bakPath) + server_rand_key_bak_file = "%s/bin/server.key.rand" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + server_rand_key_bak_file, server_rand_key_bak_file, bakPath) + datasource_cipher = "%s/bin/datasource.key.cipher" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + datasource_cipher, datasource_cipher, bakPath) + datasource_rand = "%s/bin/datasource.key.rand" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + datasource_rand, datasource_rand, bakPath) + tde_key_cipher = "%s/bin/gs_tde_keys.cipher" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + tde_key_cipher, tde_key_cipher, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup utilslib + utilslib = "%s/utilslib" % clusterAppPath + cmd = "if [ -d '%s' ];then cp -r '%s' '%s';fi" % ( + utilslib, utilslib, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup ca.key,etcdca.crt, client.key and client.crt + CA_key_file = "%s/share/sslcert/etcd/ca.key" % clusterAppPath + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + CA_key_file, CA_key_file, bakPath) + CA_cert_file = "%s/share/sslcert/etcd/etcdca.crt" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + CA_cert_file, CA_cert_file, bakPath) + client_key_file = "%s/share/sslcert/etcd/client.key" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + client_key_file, client_key_file, bakPath) + client_cert_file = "%s/share/sslcert/etcd/client.crt" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + client_cert_file, client_cert_file, bakPath) + if int(g_opts.oldVersion) >= 92019: + client_key_cipher_file = \ + "%s/share/sslcert/etcd/client.key.cipher" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + client_key_cipher_file, client_key_cipher_file, bakPath) + client_key_rand_file = \ + "%s/share/sslcert/etcd/client.key.rand" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + client_key_rand_file, client_key_rand_file, bakPath) + etcd_key_cipher_file = \ + "%s/share/sslcert/etcd/etcd.key.cipher" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + etcd_key_cipher_file, etcd_key_cipher_file, bakPath) + etcd_key_rand_file = \ + "%s/share/sslcert/etcd/etcd.key.rand" % clusterAppPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + etcd_key_rand_file, etcd_key_rand_file, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup java UDF + javadir = "'%s'/lib/postgresql/java" % clusterAppPath + cmd = "if [ -d '%s' ];then cp -r '%s' '%s';fi" % ( + javadir, javadir, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup postGIS + cmdPostGis = "" + for sofile in g_opts.postgisSOFileList.keys(): + absPath = os.path.join(clusterAppPath, + g_opts.postgisSOFileList[sofile]) + srcFile = "'%s'/%s" % (absPath, sofile) + cmdPostGis += " && (if [ -f %s ];then cp -f -p %s '%s';fi)" % ( + srcFile, srcFile, bakPath) + # skip " &&" + cmd = cmdPostGis[3:] + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup extension library and config files + hadoop_odbc_connector = \ + "%s/lib/postgresql/hadoop_odbc_connector.so" % clusterAppPath + extension_config01 = \ + "%s/share/postgresql/extension/hadoop_odbc_connector--1.0.sql" \ + % clusterAppPath + extension_config02 = \ + "%s/share/postgresql/extension/hadoop_odbc_connector.control" \ + % clusterAppPath + extension_config03 = \ + "%s/share/postgresql/extension/" \ + "hadoop_odbc_connector--unpackaged--1.0.sql" % clusterAppPath + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + hadoop_odbc_connector, hadoop_odbc_connector, bakPath) + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + extension_config01, extension_config01, bakPath) + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + extension_config02, extension_config02, bakPath) + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % ( + extension_config03, extension_config03, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup dict file and grpc files + dictFileDir = "'%s'/share/postgresql/tsearch_data" % clusterAppPath + grpcFileDir = "'%s'/share/sslcert/grpc" % clusterAppPath + cmd = "if [ -d '%s' ];then cp -r '%s' '%s';fi && " % (dictFileDir, + dictFileDir, + bakPath) + cmd += "if [ -d '%s' ];then cp -r '%s' '%s';fi" % (grpcFileDir, + grpcFileDir, + bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup gtm.control and gtm.sequence + if len(g_dbNode.gtms) > 0: + gtm_control = "%s/gtm.control" % g_dbNode.gtms[0].datadir + gtm_sequence = "%s/gtm.sequence" % g_dbNode.gtms[0].datadir + cmd = "(if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/gtm.control.bak';fi)" % \ + (gtm_control, gtm_control, bakPath) + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/gtm.sequence.bak';fi)" % \ + (gtm_sequence, gtm_sequence, bakPath) + g_logger.debug("Backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + except Exception as e: + raise Exception(str(e)) + + +def restoreConfig(): + """ + function: restore config + output: none + """ + try: + bakPath = g_opts.upgrade_bak_path + clusterAppPath = g_opts.newClusterAppPath + # init old cluster config + oldStaticConfigFile = os.path.join( + g_opts.oldClusterAppPath, "bin/cluster_static_config") + oldStaticClusterInfo = dbClusterInfo() + oldStaticClusterInfo.initFromStaticConfig(g_opts.user, + oldStaticConfigFile) + # flush new static configuration + newStaticConfig = os.path.join( + clusterAppPath, "bin/cluster_static_config") + if not os.path.isfile(newStaticConfig): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + os.path.realpath(newStaticConfig)) + g_file.removeFile(newStaticConfig) + newStaticClusterInfo = dbClusterInfo() + newStaticClusterInfo.saveToStaticConfig( + newStaticConfig, oldStaticClusterInfo.localNodeId, + oldStaticClusterInfo.dbNodes, upgrade=True) + # restore dynamic configuration + dynamic_config = "%s/cluster_dynamic_config" % bakPath + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + dynamic_config, dynamic_config, clusterAppPath) + # no need to restore alarm.conf at here, + # because it has been done on upgradeNodeApp + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore libsimsearch etc files and libsimsearch libs files + searchConfigFile = "%s/searchletConfig.yaml" % bakPath + cmd = "(if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/etc/searchletConfig.yaml'; fi)" % ( + searchConfigFile, searchConfigFile, clusterAppPath) + searchIniFile = "%s/searchServer.ini" % bakPath + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/etc/searchServer.ini'; fi)" % ( + searchIniFile, searchIniFile, clusterAppPath) + cmd += " && (if [ -d '%s/libsimsearch' ];" \ + "then cp -r '%s/libsimsearch' '%s/lib/';fi)" % ( + bakPath, bakPath, clusterAppPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore library file, + # database size file and initialized configuration parameters files + cmd = "cp -r '%s/pg_plugin' '%s'/lib/postgresql" % ( + bakPath, clusterAppPath) + backup_dbsize = os.path.join(bakPath, DefaultValue.DB_SIZE_FILE) + cmd += " && (if [ -f '%s' ];then cp '%s' '%s/bin';fi)" % ( + backup_dbsize, backup_dbsize, clusterAppPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # sync kerberos conf files + cmd = "(if [ -d '%s/kerberos' ];then cp -r '%s/kerberos' '%s/';fi)" % ( + bakPath, bakPath, clusterAppPath) + cmd += "&& (if [ -d '%s/var/krb5kdc' ];" \ + "then mkdir %s/var; cp -r '%s/var/krb5kdc' '%s/var/';fi)" % ( + bakPath, clusterAppPath, bakPath, clusterAppPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore obsserver.key.cipher/obsserver.key.rand + # and server.key.cipher/server.key.rand + # and datasource.key.cipher/datasource.key.rand + OBS_cipher_key_bak_file = "%s/obsserver.key.cipher" % bakPath + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + OBS_cipher_key_bak_file, OBS_cipher_key_bak_file, clusterAppPath) + OBS_rand_key_bak_file = "%s/obsserver.key.rand" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + OBS_rand_key_bak_file, OBS_rand_key_bak_file, clusterAppPath) + trans_encrypt_cipher_key_bak_file = \ + "%s/trans_encrypt.key.cipher" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + trans_encrypt_cipher_key_bak_file, + trans_encrypt_cipher_key_bak_file, + clusterAppPath) + trans_encrypt_rand_key_bak_file = "%s/trans_encrypt.key.rand" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + trans_encrypt_rand_key_bak_file, trans_encrypt_rand_key_bak_file, + clusterAppPath) + trans_encrypt_cipher_ak_sk_key_bak_file = \ + "%s/trans_encrypt_ak_sk.key" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + trans_encrypt_cipher_ak_sk_key_bak_file, + trans_encrypt_cipher_ak_sk_key_bak_file, clusterAppPath) + server_cipher_key_bak_file = "%s/server.key.cipher" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + server_cipher_key_bak_file, server_cipher_key_bak_file, + clusterAppPath) + server_rand_key_bak_file = "%s/server.key.rand" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + server_rand_key_bak_file, server_rand_key_bak_file, clusterAppPath) + datasource_cipher = "%s/datasource.key.cipher" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + datasource_cipher, datasource_cipher, clusterAppPath) + datasource_rand = "%s/datasource.key.rand" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + datasource_rand, datasource_rand, clusterAppPath) + tde_key_cipher = "%s/gs_tde_keys.cipher" % bakPath + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + tde_key_cipher, tde_key_cipher, clusterAppPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore utilslib + utilslib = "%s/utilslib" % bakPath + cmd = "if [ -d '%s' ];then cp -r '%s' '%s'/;" % ( + utilslib, utilslib, clusterAppPath) + # create new $GAUSSHOME/utilslib if not exist. + # no need to do chown, it will be done at all restore finished + cmd += " else mkdir -p '%s'/utilslib -m %s; fi " % ( + clusterAppPath, DefaultValue.DIRECTORY_MODE) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore ca.key,etcdca.crt, client.key and client.crt + CA_key_file = "%s/ca.key" % bakPath + cmd = "(if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + CA_key_file, CA_key_file, clusterAppPath) + CA_cert_file = "%s/etcdca.crt" % bakPath + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + CA_cert_file, CA_cert_file, clusterAppPath) + client_key_file = "%s/client.key" % bakPath + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + client_key_file, client_key_file, clusterAppPath) + client_cert_file = "%s/client.crt" % bakPath + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + client_cert_file, client_cert_file, clusterAppPath) + if int(g_opts.oldVersion) >= 92019: + client_key_cipher_file = "%s/client.key.cipher" % bakPath + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + client_key_cipher_file, client_key_cipher_file, + clusterAppPath) + client_key_rand_file = "%s/client.key.rand" % bakPath + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + client_key_rand_file, client_key_rand_file, + clusterAppPath) + etcd_key_cipher_file = "%s/etcd.key.cipher" % bakPath + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + etcd_key_cipher_file, etcd_key_cipher_file, + clusterAppPath) + etcd_key_rand_file = "%s/etcd.key.rand" % bakPath + cmd += " && (if [ -f '%s' ];" \ + "then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + etcd_key_rand_file, etcd_key_rand_file, clusterAppPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore javaUDF + # lib/postgresql/java/pljava.jar use new package, no need to restore. + javadir = "%s/java" % bakPath + desPath = "%s/lib/postgresql/" % clusterAppPath + cmd = "if [ -d '%s' ];" \ + "then rm -f '%s/pljava.jar'&&cp -r '%s' '%s' ;fi" % ( + javadir, javadir, javadir, desPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore postGIS + cmdPostGis = "" + machineType = platform.machine() + for sofile in g_opts.postgisSOFileList.keys(): + # To solve the dependency problem on the ARM platform, + # the dependency library libbgcc_s.so* and libstdc++. + # so.* is contained in the ARM package. + # The libgcc_s.so.* + # on the ARM platform is the database built-in library. + # Therefore, no restoration is required. + if machineType == "aarch64" and sofile.find('libgcc_s.so') >= 0: + continue + desPath = os.path.join(clusterAppPath, + g_opts.postgisSOFileList[sofile]) + srcFile = "'%s'/%s" % (bakPath, sofile) + cmdPostGis += " && (if [ -f %s ];then cp -f -p %s '%s';fi)" % ( + srcFile, srcFile, desPath) + # skip " &&" + cmd = cmdPostGis[3:] + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore extension library and config files + hadoop_odbc_connector = \ + "%s/lib/postgresql/hadoop_odbc_connector.so" % bakPath + extension_config01 = \ + "%s/share/postgresql/extension/hadoop_odbc_connector--1.0.sql" \ + % bakPath + extension_config02 = \ + "%s/share/postgresql/extension/hadoop_odbc_connector.control" \ + % bakPath + extension_config03 = \ + "%s/share/postgresql/extension/" \ + "hadoop_odbc_connector--unpackaged--1.0.sql" % bakPath + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/lib/postgresql/';fi)" % ( + hadoop_odbc_connector, hadoop_odbc_connector, clusterAppPath) + cmd += \ + " && (if [ -f '%s' ];then cp -f " \ + "-p '%s/share/postgresql/extension/' '%s';fi)" % ( + extension_config01, extension_config01, clusterAppPath) + cmd += \ + " && (if [ -f '%s' ];then cp " \ + "-f -p '%s/share/postgresql/extension/' '%s';fi)" % ( + extension_config02, extension_config02, clusterAppPath) + cmd += \ + " && (if [ -f '%s' ];then cp -f " \ + "-p '%s/share/postgresql/extension/' '%s';fi)" % ( + extension_config03, extension_config03, clusterAppPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # restore dict file and grpc file + dictFileDir = "'%s'/tsearch_data" % bakPath + dictDesPath = "'%s'/share/postgresql" % clusterAppPath + grpcFileDir = "'%s'/grpc" % bakPath + grpcDesPath = "'%s'/share/sslcert" % clusterAppPath + cmd = "if [ -d '%s' ];then cp -r '%s' '%s/' ;fi &&" % ( + dictFileDir, dictFileDir, dictDesPath) + cmd += "if [ -d '%s' ];then cp -r '%s' '%s/' ;fi" % ( + grpcFileDir, grpcFileDir, grpcDesPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + except Exception as e: + raise Exception(str(e)) + + +def restoreDynamicConfigFile(): + """ + function: restore dynamic config file + output: None + :return: + """ + bakPath = g_opts.upgrade_bak_path + newClusterAppPath = g_opts.newClusterAppPath + oldClusterAppPath = g_opts.oldClusterAppPath + # cp new dynamic config file to new app path + newDynamicConfigFile = "%s/bin/cluster_dynamic_config" % oldClusterAppPath + g_file.removeFile("%s/bin/cluster_dynamic_config" % newClusterAppPath) + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + newDynamicConfigFile, newDynamicConfigFile, newClusterAppPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + # cp old dynamic config file to old app path + dynamic_config = "%s/cluster_dynamic_config" % bakPath + g_file.removeFile(newDynamicConfigFile) + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + dynamic_config, dynamic_config, oldClusterAppPath) + g_logger.debug("Restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + +def inplaceBackup(): + """ + function: backup config + output: none + """ + try: + # backup gds files + bakPath = g_opts.upgrade_bak_path + gdspath = "%s/share/sslcert/gds" % g_clusterInfo.appPath + cmd = "(if [ -d '%s' ];" \ + "then chmod 600 -R '%s'/*; cp -r '%s' '%s';fi)" % ( + gdspath, gdspath, gdspath, bakPath) + g_logger.debug("Inplace backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # backup gsql files + bakPath = g_opts.upgrade_bak_path + gsqlpath = "%s/share/sslcert/gsql" % g_clusterInfo.appPath + cmd = "(if [ -d '%s' ];then chmod 600 -R '%s'/*; cp -r '%s' '%s';fi)" %\ + (gsqlpath, gsqlpath, gsqlpath, bakPath) + g_logger.debug("Inplace backup command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + except Exception as e: + raise Exception(str(e)) + + +def inplaceRestore(): + """ + function: restore config + output: none + """ + try: + # restore gds files + gdspath = "%s/share/sslcert/" % g_clusterInfo.appPath + gdsbackup = "%s/gds" % g_opts.upgrade_bak_path + cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % ( + gdsbackup, gdsbackup, gdspath) + g_logger.debug("Inplace restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + except Exception as e: + raise Exception(str(e)) + + +def checkGucValue(): + """ + function: check guc value + input : NA + output : NA + """ + try: + checkGucValueByShowing() + except Exception as e: + g_logger.debug("Failed to check dn guc paramter by " + "showing. Error is:{0}." + "Trying to check form file".format(str(e))) + checkGucValueFromFile() + + +def checkGucValueByShowing(): + """ + check dn guc value by "show guc" in database in all nodes + """ + instance_list = getDnInstance() + if len(instance_list) != 0: + pool = ThreadPool(len(instance_list)) + pool.map(checkOneInstanceGucValueByShowing, instance_list) + pool.close() + pool.join() + + +def checkOneInstanceGucValueByShowing(instance): + """ + check dn guc value by "show guc" in database in every node + :param instance: + :return: + """ + key = g_opts.gucStr.split(':')[0].strip() + value = g_opts.gucStr.split(':')[1].strip().split(",") + g_logger.debug( + "Check if the value of guc {0} is {1}. " + "Instance data dir is: {2}".format(key, value, instance.datadir)) + sql = "show %s;" % key + g_logger.debug("Command to check value is: %s" % sql) + retryTimes = 300 + for i in range(retryTimes): + (status, output) = \ + ClusterCommand.execSQLCommand( + sql, g_opts.user, "", instance.port, "postgres", + False, "-m", IsInplaceUpgrade=True) + if status == 0 and output != "": + g_logger.debug("Output is: %s" % output) + checkValue = output.strip() + if str(checkValue) in value: + return + raise Exception(ErrorCode.GAUSS_521["GAUSS_52102"] % key + + " expect value %s" % (str(value))) + + +def getDnInstance(): + """ + get all dn instance + """ + instance_list = [] + if len(g_dbNode.datanodes) != 0: + for eachInstance in g_dbNode.datanodes: + if eachInstance.instanceType == MASTER_INSTANCE or\ + eachInstance.instanceType == STANDBY_INSTANCE: + instance_list.append(eachInstance) + return instance_list + + +def checkGucValueFromFile(): + """ + check guc value from conf file + """ + key = g_opts.gucStr.split(':')[0].strip() + value = g_opts.gucStr.split(':')[1].strip() + if value in const.VALUE_OFF: + value = const.VALUE_OFF + if value in const.VALUE_ON: + value = const.VALUE_ON + if key in const.DN_GUC: + instances = g_dbNode.datanodes + fileName = "postgresql.conf" + else: + raise Exception(ErrorCode.GAUSS_529["GAUSS_52942"]) + for inst in instances: + configFile = "%s/%s" % (inst.datadir, fileName) + cmd = "sed 's/\t/ /g' %s " \ + "| grep '^[ ]*\<%s\>[ ]*=' " \ + "| awk -F '=' '{print $2}'" % (configFile, key) + g_logger.debug("Command for checking guc:%s" % cmd) + retryTimes = 100 + for i in range(retryTimes): + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + time.sleep(3) + g_logger.debug( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " Output: \n%s" % output) + continue + if output == "": + time.sleep(3) + g_logger.debug( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + " There is no %s in %s" % (key, configFile)) + continue + realValue = output.split('\n')[0].strip() + if '#' in realValue: + realValue = realValue.split('#')[0].strip() + g_logger.debug("[key:%s]: Realvalue %s, ExpectValue %s" % ( + key, str(realValue), str(value))) + if str(realValue) not in str(value): + raise Exception( + ErrorCode.GAUSS_521["GAUSS_52102"] % key + + " Real value %s, expect value %s" + % (str(realValue), str(value))) + break + + +def backupInstanceHotpatchConfig(instanceDataDir): + """ + function: backup + input : instanceDataDir + output : NA + """ + hotpatch_info_file = "%s/hotpatch/patch.info" % instanceDataDir + hotpatch_info_file_bak = "%s/hotpatch/patch.info.bak" % instanceDataDir + cmd = "(if [ -f '%s' ];then mv -f '%s' '%s';fi)" % ( + hotpatch_info_file, hotpatch_info_file, hotpatch_info_file_bak) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output) + + +def backupHotpatch(): + """ + function: if the upgrade process failed in check cluster status, + user can reenter upgrade process + """ + if os.path.samefile(g_gausshome, g_opts.newClusterAppPath): + g_logger.debug("Has switched to new version, no need to backup again.") + return + + for dbInstance in g_dbNode.cmservers: + backupInstanceHotpatchConfig(dbInstance.datadir) + + for dbInstance in g_dbNode.coordinators: + backupInstanceHotpatchConfig(dbInstance.datadir) + + for dbInstance in g_dbNode.datanodes: + backupInstanceHotpatchConfig(dbInstance.datadir) + + for dbInstance in g_dbNode.gtms: + backupInstanceHotpatchConfig(dbInstance.datadir) + + +def rollbackInstanceHotpatchConfig(instanceDataDir): + """ + function: rollback + input : instanceDataDir + output : NA + """ + hotpatch_info_file = "%s/hotpatch/patch.info" % instanceDataDir + hotpatch_info_file_bak = "%s/hotpatch/patch.info.bak" % instanceDataDir + cmd = "(if [ -f '%s' ];then mv -f '%s' '%s';fi)" % ( + hotpatch_info_file_bak, hotpatch_info_file_bak, hotpatch_info_file) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception( + ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output) + + +def rollbackHotpatch(): + """ + function: rollback + input : NA + output : NA + """ + for dbInstance in g_dbNode.cmservers: + rollbackInstanceHotpatchConfig(dbInstance.datadir) + + for dbInstance in g_dbNode.coordinators: + rollbackInstanceHotpatchConfig(dbInstance.datadir) + + for dbInstance in g_dbNode.datanodes: + rollbackInstanceHotpatchConfig(dbInstance.datadir) + + for dbInstance in g_dbNode.gtms: + rollbackInstanceHotpatchConfig(dbInstance.datadir) + + +def readDeleteGuc(): + """ + function: get the delete guc from file, + input: NA + output: return the dict gucContent[instanceName]: guc_name + :return:the key instancename is gtm, coordinator, + datanode, cmserver, cmagent + """ + deleteGucFile = os.path.join(g_opts.upgrade_bak_path, + "upgrade_sql/set_guc/delete_guc") + # Create tmp dir for delete_guc + delete_guc_tmp = "%s/upgrade_sql/set_guc" % g_opts.upgrade_bak_path + g_file.createDirectory(delete_guc_tmp) + g_file.createFileInSafeMode(deleteGucFile) + if not os.path.isfile(deleteGucFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % deleteGucFile) + g_logger.debug("Get the delete GUC from file %s." % deleteGucFile) + gucContent = {} + with open(deleteGucFile, 'r') as fp: + resList = fp.readlines() + for oneLine in resList: + oneLine = oneLine.strip() + # skip blank line and comment line + if not oneLine or oneLine.startswith('#'): + continue + result = oneLine.split() + if len(result) != 2: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % deleteGucFile) + gucName = result[0] + instanceName = result[1] + gucContent.setdefault(instanceName, []).append(gucName) + g_logger.debug("Successfully get the delete GUC from file.") + return gucContent + + +def cleanInstallPath(): + """ + function: clean install path + input : NA + output : NA + """ + installPath = g_opts.appPath + if not os.path.exists(installPath): + g_logger.debug(ErrorCode.GAUSS_502[ + "GAUSS_50201"] % installPath + " No need to clean.") + return + if not os.listdir(installPath): + g_logger.debug("The path %s is empty." % installPath) + cmd = "(if [ -d '%s' ]; then rm -rf '%s'; fi)" % ( + installPath, installPath) + g_logger.log("Command for cleaning install path: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + return + if g_opts.forceRollback and not os.path.islink(g_gausshome): + g_logger.log( + "Under force rollback mode, " + "$GAUSSHOME is not symbolic link. No need to clean.") + return + elif os.path.samefile(installPath, g_gausshome): + g_logger.log("The install path is $GAUSSHOME, cannot clean.") + return + tmpDir = DefaultValue.getTmpDirFromEnv(g_opts.user) + if tmpDir == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST") + # under upgrade, we will change the mode to read and execute + # in order to not change the dir, so we need to restore + # the permission to original mode after we switch to new version, + # and then we will have the permission to clean + # appPath under commit-upgrade + # under rollback, we also need to restore the permission + pluginPath = "%s/lib/postgresql/pg_plugin" % installPath + cmd = "(if [ -d '%s' ]; then chmod -R %d '%s'; fi)" % ( + pluginPath, DefaultValue.KEY_DIRECTORY_MODE, pluginPath) + appBakPath = "%s/to_be_delete" % tmpDir + cmd += " && (if [ ! -d '%s' ]; then mkdir -p '%s'; fi)" % ( + appBakPath, appBakPath) + cmd += " && (if [ -d '%s' ]; then cp -r '%s/' '%s/to_be_delete/'; fi)" % ( + installPath, installPath, tmpDir) + g_logger.debug( + "Command for change permission and backup install path: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + cmd = "(if [ -d '%s/bin' ]; then rm -rf '%s/bin'; fi) &&" % \ + (installPath, installPath) + cmd += "(if [ -d '%s/etc' ]; then rm -rf '%s/etc'; fi) &&" % \ + (installPath, installPath) + cmd += "(if [ -d '%s/include' ]; then rm -rf '%s/include'; fi) &&" % \ + (installPath, installPath) + cmd += "(if [ -d '%s/lib' ]; then rm -rf '%s/lib'; fi) &&" % \ + (installPath, installPath) + cmd += "(if [ -d '%s/share' ]; then rm -rf '%s/share'; fi) &&" % \ + (installPath, installPath) + cmd += "(if [ -d '%s/logs' ]; then rm -rf '%s/logs'; fi) &&" % \ + (installPath, installPath) + cmd += "(if [ -d '%s/utilslib' ]; then rm -rf '%s/utilslib'; fi) && " % \ + (installPath, installPath) + cmd += "(if [ -d '%s/jre' ]; then rm -rf '%s/jre'; fi) && " % \ + (installPath, installPath) + cmd += "(if [ -d '%s/jdk' ]; then rm -rf '%s/jdk'; fi) && " % \ + (installPath, installPath) + cmd += "(if [ -d '%s/kerberos' ]; then rm -rf '%s/kerberos'; fi) &&" % \ + (installPath, installPath) + cmd += "(if [ -d '%s/var/krb5kdc' ]; then rm -rf '%s/var/krb5kdc'; fi) &&" \ + % (installPath, installPath) + cmd += "(if [ -d '%s/simpleInstall' ]; then rm -rf '%s/simpleInstall';" \ + " fi) &&" % (installPath, installPath) + cmd += "(if [ -e '%s/version.cfg' ]; then rm -rf '%s/version.cfg'; fi)"\ + % (installPath, installPath) + DefaultValue.execCommandLocally(cmd) + if os.listdir(installPath): + g_logger.log( + "The path %s has personal file ot directory, please remove it." + % installPath) + else: + cmd = "(if [ -d '%s' ]; then rm -rf '%s'; fi)" % ( + installPath, installPath) + g_logger.log("Command for cleaning install path: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + +def copyCerts(): + """ + function: copy certs + input : NA + output : NA + """ + g_logger.debug("Starting copy Certs") + oldBinPath = os.path.join(g_opts.oldClusterAppPath, "bin") + newBinPath = os.path.join(g_opts.newClusterAppPath, "bin") + oldOmSslCerts = os.path.join(g_opts.oldClusterAppPath, "share/sslcert/om") + newOmSslCerts = os.path.join(g_opts.newClusterAppPath, "share/sslcert/om") + + g_file.cpFile("%s/server.key.cipher" % oldBinPath, "%s/" % newBinPath) + g_file.cpFile("%s/server.key.rand" % oldBinPath, "%s/" % newBinPath) + for certFile in DefaultValue.SERVER_CERT_LIST: + g_file.cpFile("%s/%s" % (oldOmSslCerts, certFile), "%s/" % + newOmSslCerts) + + g_file.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key.cipher" % + newBinPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key.rand" % + newBinPath) + g_file.changeMode(DefaultValue.KEY_FILE_MODE, "%s/*" % + newOmSslCerts) + + +def prepareUpgradeSqlFolder(): + """ + function: verify upgrade_sql.tar.gz and extract it to binary backup path, + if execute gs_upgradectl again, we will decompress the sql folder + again to avoid the file in backup path destroyed + input : NA + output: NA + """ + g_logger.debug("Preparing upgrade sql folder.") + # verify upgrade_sql.tar.gz + dirName = os.path.dirname(os.path.realpath(__file__)) + packageDir = os.path.join(dirName, "./../../") + packageDir = os.path.normpath(packageDir) + upgrade_sql_gz_file = "%s/%s" % (packageDir, const.UPGRADE_SQL_FILE) + upgrade_sql_sha256_file = "%s/%s" % (packageDir, const.UPGRADE_SQL_SHA) + if not os.path.isfile(upgrade_sql_gz_file): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % upgrade_sql_gz_file) + if not os.path.isfile(upgrade_sql_sha256_file): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % upgrade_sql_sha256_file) + g_logger.debug( + "The SQL file is %s, the sha256 file is %s." % ( + upgrade_sql_gz_file, upgrade_sql_sha256_file)) + + g_logger.debug("Checking the SHA256 value of upgrade sql folder.") + sha256Actual = g_file.getFileSHA256(upgrade_sql_gz_file) + sha256Record = g_file.readFile(upgrade_sql_sha256_file) + if sha256Actual.strip() != sha256Record[0].strip(): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51635"] + \ + " The SHA256 value is different: \nTar file: " + "%s \nSHA256 file: %s " % \ + (upgrade_sql_gz_file, upgrade_sql_sha256_file)) + + # extract it to binary backup path + # self.context.upgradeBackupPath just recreated at last step, + # it should not has upgrade_sql folder, so no need do clean + g_logger.debug("Extracting upgrade sql folder.") + g_file.decompressFiles(upgrade_sql_gz_file, g_opts.upgrade_bak_path) + g_logger.debug("Successfully prepared upgrade sql folder.") + + +def backupOldClusterDBAndRel(): + """ + backup old cluster db and rel info + get database list + connect to each cn and master dn + connect to each database, and get rel info + """ + g_logger.log("Backing up old cluster database and catalog.") + try: + InstanceList = [] + # find all instances need to do backup + if len(g_dbNode.coordinators) != 0: + InstanceList.append(g_dbNode.coordinators[0]) + primaryDnIntance = getLocalPrimaryDNInstance() + if primaryDnIntance: + InstanceList.extend(primaryDnIntance) + + # do backup parallelly + if len(InstanceList) != 0: + pool = ThreadPool(len(InstanceList)) + pool.map(backupOneInstanceOldClusterDBAndRel, InstanceList) + pool.close() + pool.join() + else: + g_logger.debug("No master instance found on this node, " + "nothing need to do.") + return + + g_logger.log("Successfully backed up old cluster database and catalog.") + except Exception as e: + g_logger.logExit(str(e)) + + +def getLocalPrimaryDNInstance(): + """ + function: Get local primary DN instance + input: NA + output: NA + """ + g_logger.log("We will find all primary dn instance in the local node.") + tmpFile = os.path.join(DefaultValue.getTmpDirFromEnv( + g_opts.user), const.TMP_DYNAMIC_DN_INFO) + primaryDNList = [] + try: + # Match query results and cluster configuration + clusterStatus = DbClusterStatus() + clusterStatus.initFromFile(tmpFile) + # Find the master DN instance + for dbNode in clusterStatus.dbNodes: + for instance in dbNode.datanodes: + if instance.status == 'Primary' and \ + instance.nodeId == g_dbNode.id: + for eachInstance in g_dbNode.datanodes: + if eachInstance.instanceId == instance.instanceId: + primaryDNList.append(eachInstance) + g_logger.log( + "Success get the primary dn instance:{0}.".format( + instance.__dict__)) + return primaryDNList + except Exception as er: + raise Exception(str(er)) + + +def backupOneInstanceOldClusterDBAndRel(instance): + """ + backup db and catalog info for one old cluster instance + do checkpoint + get database info list + remove template0 + connect each database, get catalog info + save to file + """ + tmpDir = DefaultValue.getTmpDirFromEnv(g_opts.user) + if tmpDir == "": + raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST") + g_logger.debug( + "Obtaining instance catalog information. Instance data dir: %s" % + instance.datadir) + dbInfoDict = {} + dbInfoDict["dblist"] = [] + dbInfoDict["dbnum"] = 0 + backup_path = "%s/oldClusterDBAndRel/" % g_opts.upgrade_bak_path + try: + # get database info + get_db_list_sql = """SELECT d.datname, d.oid, + pg_catalog.pg_tablespace_location(t.oid) AS spclocation + FROM pg_catalog.pg_database d LEFT OUTER JOIN + pg_catalog.pg_tablespace t ON d.dattablespace = t.oid ORDER BY 2;""" + g_logger.debug("Get database info command: \n%s" % get_db_list_sql) + (status, output) = ClusterCommand.execSQLCommand(get_db_list_sql, + g_opts.user, "", + instance.port, + "postgres", + False, "-m", + IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513[ + "GAUSS_51300"] % get_db_list_sql + + " Error:\n%s" % output) + if output == "": + raise Exception("can not find any database!!") + g_logger.debug("Get database info result: \n%s." % output) + resList = output.split('\n') + for each_line in resList: + tmpDbInfo = initDbInfo() + (datname, oid, spclocation) = each_line.split('|') + tmpDbInfo['dbname'] = datname.strip() + tmpDbInfo['dboid'] = oid.strip() + tmpDbInfo['spclocation'] = spclocation.strip() + dbInfoDict["dblist"].append(tmpDbInfo) + dbInfoDict["dbnum"] += 1 + + # connect each database, get catalog info + get_catalog_list_sql =\ + """SELECT p.oid, n.nspname, p.relname, + pg_catalog.pg_relation_filenode(p.oid) AS relfilenode, + p.reltablespace, pg_catalog.pg_tablespace_location(t.oid) AS + spclocation FROM pg_catalog.pg_class p INNER JOIN + pg_catalog.pg_namespace n ON (p.relnamespace = n.oid) LEFT OUTER + JOIN pg_catalog.pg_tablespace t ON (p.reltablespace = t.oid) WHERE + p.oid < 16384 AND p.relkind IN ('r', 'i', 't') AND + p.relisshared= false AND p.relpersistence != 'u' ORDER BY 1;""" + g_logger.debug("Get catalog info command: \n%s" % get_catalog_list_sql) + for each_db in dbInfoDict["dblist"]: + # template0 need handle specially, skip it here + if each_db["dbname"] == 'template0': + continue + (status, output) = ClusterCommand.execSQLCommand( + get_catalog_list_sql, g_opts.user, "", instance.port, + each_db["dbname"], False, "-m", IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513[ + "GAUSS_51300"] % get_catalog_list_sql + + " Error:\n%s" % output) + if output == "": + raise Exception("can not find any catalog!!") + g_logger.debug("Get catalog info result of %s: \n%s." % ( + each_db["dbname"], output)) + resList = output.split('\n') + for each_line in resList: + tmpCatalogInfo = initCatalogInfo() + (oid, nspname, relname, relfilenode, reltablespace, + spclocation) = each_line.split('|') + tmpCatalogInfo['oid'] = oid.strip() + tmpCatalogInfo['relname'] = relname.strip() + tmpCatalogInfo['relfilenode'] = relfilenode.strip() + each_db["CatalogList"].append(tmpCatalogInfo) + each_db["CatalogNum"] += 1 + + # save db and catlog info into file + instance_name = getInstanceName(instance) + if instance.instanceRole == INSTANCE_ROLE_COODINATOR: + # handle cn instance + cn_db_and_catalog_info_file_name = \ + "%s/cn_db_and_catalog_info_%s.json" % ( + backup_path, instance_name) + DbInfoStr = json.dumps(dbInfoDict, indent=2) + fp = open(cn_db_and_catalog_info_file_name, 'w') + fp.write(DbInfoStr) + fp.flush() + fp.close() + else: + # handle master dn instance + dn_db_and_catalog_info_file_name = \ + "%s/dn_db_and_catalog_info_%s.json" % ( + backup_path, instance_name) + DbInfoStr = json.dumps(dbInfoDict, indent=2) + fp = open(dn_db_and_catalog_info_file_name, 'w') + fp.write(DbInfoStr) + fp.flush() + fp.close() + + standbyInstLst = [] + peerInsts = g_clusterInfo.getPeerInstance(instance) + for i in range(len(peerInsts)): + if peerInsts[i].instanceType == DefaultValue.MASTER_INSTANCE\ + or peerInsts[i].instanceType == \ + DefaultValue.STANDBY_INSTANCE: + standbyInstLst.append(peerInsts[i]) + for standbyInstance in standbyInstLst: + cmd = "pscp -H %s %s %s" % ( + standbyInstance.hostname, dn_db_and_catalog_info_file_name, + dn_db_and_catalog_info_file_name) + g_logger.debug("exec cmd is: %s" % cmd) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + except Exception as e: + raise Exception(str(e)) + + g_logger.debug( + "Successfully obtained instance catalog information. " + "Instance data dir: %s" % instance.datadir) + + +def updateCatalog(): + """ + connect database and update catalog one by one + 1.get database list + 2.connect each database, and exec update sql/check sql + """ + g_logger.log("Updating catalog.") + try: + update_catalog_maindb_sql = "{0}/{1}_catalog_maindb_tmp.sql".format( + g_opts.upgrade_bak_path, g_opts.scriptType) + update_catalog_otherdb_sql = "{0}/{1}_catalog_otherdb_tmp.sql".format( + g_opts.upgrade_bak_path, + g_opts.scriptType) + check_upgrade_sql = "" + if "upgrade" == g_opts.scriptType: + check_upgrade_sql = "{0}/check_upgrade_tmp.sql".format( + g_opts.upgrade_bak_path) + if not os.path.isfile(check_upgrade_sql): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50210"] % check_upgrade_sql) + if not os.path.isfile(update_catalog_maindb_sql): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50210"] % update_catalog_maindb_sql) + if not os.path.isfile(update_catalog_otherdb_sql): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50210"] % update_catalog_otherdb_sql) + + # get database list + clusterNodes = g_clusterInfo.dbNodes + for dbNode in clusterNodes: + if len(dbNode.datanodes) == 0: + continue + dnInst = dbNode.datanodes[0] + primaryDnNode, _ = DefaultValue.getPrimaryNode(g_opts.userProfile) + if dnInst.hostname not in primaryDnNode: + continue + break + reslines = get_database_list(dnInst) + + # connect each database, and exec update sql/check sql + maindb = "postgres" + otherdbs = reslines + otherdbs.remove("postgres") + # 1.handle maindb first + upgrade_one_database([maindb, dnInst.port, + update_catalog_maindb_sql, check_upgrade_sql]) + + # 2.handle otherdbs + upgrade_info = [] + for eachdb in otherdbs: + g_logger.debug("Updating catalog for database %s." % eachdb) + upgrade_info.append([eachdb, dnInst.port, + update_catalog_otherdb_sql, check_upgrade_sql]) + if len(upgrade_info) != 0: + pool = ThreadPool(1) + pool.map(upgrade_one_database, upgrade_info) + pool.close() + pool.join() + + g_logger.log("Successfully updated catalog.") + except Exception as e: + g_logger.logExit(str(e)) + + +def get_database_list(dnInst): + """ + get database list + :return: + """ + # get database list + sqlSelect = "select datname from pg_database;" + g_logger.debug("Command for getting database list: %s" % sqlSelect) + (status, output) = ClusterCommand.execSQLCommand( + sqlSelect, g_opts.user, "", dnInst.port, IsInplaceUpgrade=True) + g_logger.debug("The result of database list: %s." % output) + if 0 != status: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % + sqlSelect + " Error:\n%s" % output) + if "" == output: + raise Exception( + "No database objects were found in the cluster!") + + reslines = (output.strip()).split('\n') + if (len(reslines) < 3 + or "template1" not in reslines + or "template0" not in reslines + or "postgres" not in reslines): + raise Exception( + "The database list is invalid:%s." % str(reslines)) + return reslines + + +def upgrade_one_database(upgrade_info): + """ + upgrade catalog for one database + """ + try: + db_name = upgrade_info[0] + port = upgrade_info[1] + update_catalog_file = upgrade_info[2] + check_upgrade_file = upgrade_info[3] + + g_logger.debug("Updating catalog for database %s" % db_name) + execSQLFile(db_name, update_catalog_file, port) + if "" != check_upgrade_file: + execSQLFile(db_name, check_upgrade_file, port) + except Exception as e: + raise Exception(str(e)) + + +def execSQLFile(dbname, sqlFile, cn_port): + """ + exec sql file + """ + gsql_cmd = ClusterCommand.getSQLCommandForInplaceUpgradeBackup( + cn_port, dbname.replace('$', '\$')) + cmd = "%s -X --echo-queries --set ON_ERROR_STOP=on -f %s" % ( + gsql_cmd, sqlFile) + (status, output) = subprocess.getstatusoutput(cmd) + g_logger.debug("Catalog modification log for database %s:\n%s." % ( + dbname, output)) + if status != 0 or ClusterCommand.findErrorInSqlFile(sqlFile, output): + g_logger.debug(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + raise Exception("Failed to update catalog. Error: %s" % str(output)) + + +def backupOldClusterCatalogPhysicalFiles(): + """ + backup old cluster catalog physical files + get database list + connect to each cn and dn, + connect to each database, and do backup + """ + g_logger.log("Backing up old cluster catalog physical files.") + try: + InstanceList = [] + # find all instances need to do backup + if len(g_dbNode.coordinators) != 0: + InstanceList.append(g_dbNode.coordinators[0]) + if len(g_dbNode.datanodes) != 0: + for eachInstance in g_dbNode.datanodes: + InstanceList.append(eachInstance) + + # do backup parallelly + if len(InstanceList) != 0: + pool = ThreadPool(len(InstanceList)) + pool.map( + backupOneInstanceOldClusterCatalogPhysicalFiles, InstanceList) + pool.close() + pool.join() + else: + g_logger.debug("No master instance found on this node," + " nothing need to do.") + return + + g_logger.log( + "Successfully backed up old cluster catalog physical files.") + except Exception as e: + g_logger.logExit(str(e)) + + +def backupOneInstanceOldClusterCatalogPhysicalFiles(instance): + """ + backup catalog physical files for one old cluster instance + read database and catalog info from file + connect each database, do backup + """ + g_logger.debug("Backup instance catalog physical files and xlog. " + "Instance data dir: %s" % instance.datadir) + try: + # backup list folder + __backup_global_dir(instance) + + if instance.instanceRole == INSTANCE_ROLE_DATANODE and \ + instance.instanceType == DUMMY_STANDBY_INSTANCE: + g_logger.debug("There is no need to backup catalog. " + "Instance data dir: %s" % instance.datadir) + return + __backup_xlog_file(instance) + __backup_cbm_file(instance) + __backup_base_folder(instance) + except Exception as e: + raise Exception(str(e)) + + g_logger.debug( + "Successfully backuped instance catalog physical files and xlog. " + "Instance data dir: %s" % instance.datadir) + + +def __backup_global_dir(instance): + """ + """ + g_logger.debug("Start to back up global_dir") + try: + backup_dir_list = const.BACKUP_DIR_LIST_BASE + if float(g_opts.oldclusternum) < float(const.UPGRADE_VERSION_64bit_xid): + backup_dir_list.extend(const.BACKUP_DIR_LIST_64BIT_XID) + for name in backup_dir_list: + srcDir = "%s/%s" % (instance.datadir, name) + destDir = "%s_bak" % srcDir + if os.path.isdir(srcDir): + cpDirectory(srcDir, destDir) + g_logger.debug("Successfully backed up global_dir") + except Exception as e: + raise Exception(str(e)) + + +def __backup_xlog_file(instance): + """ + """ + try: + g_logger.debug("Backup instance xlog files. " + "Instance data dir: %s" % instance.datadir) + + # get Latest checkpoint location + pg_xlog_info = __get_latest_checkpoint_location(instance) + xlog_back_file = os.path.join( + instance.datadir, "pg_xlog", pg_xlog_info.get( + 'latest_checkpoint_redo_xlog_file')) + if not os.path.exists(xlog_back_file): + raise Exception("There is no xlog to backup for %d." + % instance.instanceId) + + xlog_dir = os.path.join(instance.datadir, "pg_xlog") + xlog_file_list = os.listdir(xlog_dir) + xlog_file_list.sort() + + backup_xlog_list = [] + for one_file in xlog_file_list: + if not os.path.isfile(os.path.join(xlog_dir, one_file)): + continue + if len(one_file) != 24: + continue + if one_file >= pg_xlog_info.get('latest_checkpoint_redo_xlog_file'): + backup_xlog_list.append(one_file) + + if len(backup_xlog_list) == 0: + raise Exception("There is no xlog to backup for %d." % + instance.instanceId) + + for one_file in backup_xlog_list: + src_file = os.path.join(xlog_dir, one_file) + dst_file = os.path.join(xlog_dir, one_file + "_upgrade_backup") + shutil.copy2(src_file, dst_file) + g_logger.debug("file {0} has been backed up to {1}".format( + src_file, dst_file)) + + xlog_backup_info = copy.deepcopy(pg_xlog_info) + xlog_backup_info['backup_xlog_list'] = backup_xlog_list + xlog_backup_info_target_file = os.path.join(xlog_dir, + const.XLOG_BACKUP_INFO) + g_file.createFileInSafeMode(xlog_backup_info_target_file) + with open(xlog_backup_info_target_file, "w") as fp: + json.dump(xlog_backup_info, fp) + + g_logger.debug("XLOG backup info:%s." % xlog_backup_info) + g_logger.debug("Successfully backuped instance xlog files. " + "Instance data dir: %s" % instance.datadir) + except Exception as e: + raise Exception(str(e)) + + +def __get_latest_checkpoint_location(instance): + try: + result = dict() + cmd = "pg_controldata '%s'" % instance.datadir + if g_opts.mpprcFile != "" and g_opts.mpprcFile is not None: + cmd = "source %s; %s" % (g_opts.mpprcFile, cmd) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + g_logger.debug("Command for get control data:%s.Output:\n%s." % ( + cmd, output)) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + time_line_id = "" + latest_checkpoint_redo_location = "" + for one_line in output.split('\n'): + one_line = one_line.strip() + if len(one_line.split(':')) == 2: + if one_line.split(':')[0].strip() == \ + "Latest checkpoint's TimeLineID": + time_line_id = one_line.split(':')[1].strip() + elif one_line.split(':')[0].strip() == \ + "Latest checkpoint's REDO location": + latest_checkpoint_redo_location = \ + one_line.split(':')[1].strip() + if time_line_id != "" and latest_checkpoint_redo_location != "": + break + if time_line_id == "": + raise Exception( + "Failed to get Latest checkpoint's TimeLineID for %d." % + instance.instanceId) + if latest_checkpoint_redo_location == "": + raise Exception("Failed to get Latest checkpoint' " + "REDO location for %d." % instance.instanceId) + redo_log_id = latest_checkpoint_redo_location.split('/')[0] + redo_tmp_log_seg = latest_checkpoint_redo_location.split('/')[1] + if len(redo_tmp_log_seg) > 6: + redo_log_seg = redo_tmp_log_seg[0:-6] + else: + redo_log_seg = 0 + latest_checkpoint_redo_xlog_file = \ + "%08d%s%s" % (int(time_line_id, 16), + str(redo_log_id).zfill(8), str(redo_log_seg).zfill(8)) + result['latest_checkpoint_redo_location'] = \ + latest_checkpoint_redo_location + result['time_line_id'] = time_line_id + result['latest_checkpoint_redo_xlog_file'] = \ + latest_checkpoint_redo_xlog_file + g_logger.debug("%d(pg_xlog_info):%s." % (instance.instanceId, result)) + return result + except Exception as e: + raise Exception(str(e)) + + +def __backup_cbm_file(instance): + """ + """ + try: + g_logger.debug("Backup instance cbm files. " + "Instance data dir: %s" % instance.datadir) + cbm_back_dir = os.path.join(instance.datadir, "pg_cbm_back") + cmd = "rm -rf '%s' " % cbm_back_dir + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + cbm_dir = os.path.join(instance.datadir, "pg_cbm") + if not os.path.exists(cbm_dir): + g_logger.debug("There is no cbm dir to backup for %d." + % instance.instanceId) + return + + cpDirectory(cbm_dir, cbm_back_dir) + g_logger.debug("Successfully backuped instance cbm files. " + "Instance data dir: %s" % instance.datadir) + except Exception as e: + raise Exception(str(e)) + + +def restoreOldClusterCatalogPhysicalFiles(): + """ + restore old cluster catalog physical files + get database list + connect to each cn and dn, + connect to each database, and do backup + """ + g_logger.log("Restoring old cluster catalog physical files.") + try: + InstanceList = [] + # find all instances need to do restore + if len(g_dbNode.datanodes) != 0: + for eachInstance in g_dbNode.datanodes: + InstanceList.append(eachInstance) + + # do restore parallelly + if len(InstanceList) != 0: + pool = ThreadPool(len(InstanceList)) + pool.map( + restoreOneInstanceOldClusterCatalogPhysicalFiles, InstanceList) + pool.close() + pool.join() + else: + g_logger.debug("No master instance found on this node, " + "nothing need to do.") + return + + g_logger.log( + "Successfully restored old cluster catalog physical files.") + except Exception as e: + g_logger.logExit(str(e)) + + +def restoreOneInstanceOldClusterCatalogPhysicalFiles(instance): + """ + restore catalog physical files for one old cluster instance + read database and catalog info from file + connect each database, do restore + """ + g_logger.debug("Restore instance catalog physical files. " + "Instance data dir: %s" % instance.datadir) + try: + # handle dummy standby dn instance first + if instance.instanceRole == INSTANCE_ROLE_DATANODE and \ + instance.instanceType == DUMMY_STANDBY_INSTANCE: + # clean pg_xlog folder of dummy standby dn instance and return + pg_xlog_dir = "%s/pg_xlog" % instance.datadir + cmd = "find '%s' -type f | xargs -r -n 100 rm -f" % pg_xlog_dir + DefaultValue.execCommandLocally(cmd) + + # restore list folder + __restore_global_dir(instance) + return + + __restore_global_dir(instance) + __restore_xlog_file(instance) + __restore_cbm_file(instance) + __restore_base_folder(instance) + except Exception as e: + raise Exception(str(e)) + + g_logger.debug("Successfully restored instance catalog physical files. " + "Instance data dir: %s" % instance.datadir) + + +def __restore_global_dir(instance): + """ + """ + try: + g_logger.debug("Start to restore global_dir") + backup_dir_list = const.BACKUP_DIR_LIST_BASE + const.BACKUP_DIR_LIST_64BIT_XID + for name in backup_dir_list: + srcDir = "%s/%s" % (instance.datadir, name) + destDir = "%s/%s_bak" % (instance.datadir, name) + if os.path.isdir(destDir): + cpDirectory(destDir, srcDir) + g_logger.debug("Successfully restored global_dir") + except Exception as e: + raise Exception(str(e)) + + +def __restore_xlog_file(instance): + """ + """ + try: + g_logger.debug("Restore instance xlog files. " + "Instance data dir: %s" % instance.datadir) + + # read xlog_backup_info + xlog_backup_info_file = os.path.join(instance.datadir, + "pg_xlog", const.XLOG_BACKUP_INFO) + if not os.path.exists(xlog_backup_info_file): + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50201"] % xlog_backup_info_file) + + with open(xlog_backup_info_file, "r") as fp: + xlog_backup_info_str = fp.read() + xlog_backup_info = json.loads(xlog_backup_info_str) + + # clean new xlog after latest_checkpoint_xlog_file + xlog_dir = os.path.join(instance.datadir, "pg_xlog") + xlog_list = os.listdir(xlog_dir) + xlog_list.sort() + + for one_file in xlog_list: + xlog_path = os.path.join(xlog_dir, one_file) + if len(one_file) == 24 and one_file >= xlog_backup_info[ + 'latest_checkpoint_redo_xlog_file'] and \ + os.path.isfile(xlog_path): + g_logger.debug("%s:Removing %s." % ( + instance.instanceId, xlog_path)) + os.remove(xlog_path) + + # restore old xlog file + for one_file in xlog_backup_info['backup_xlog_list']: + src_file = os.path.join(xlog_dir, one_file + "_upgrade_backup") + dst_file = os.path.join(xlog_dir, one_file) + if os.path.exists(src_file): + g_logger.debug("%s:Restoring %s." % ( + instance.instanceId, dst_file)) + shutil.copy2(src_file, dst_file) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % src_file) + + g_logger.debug("Successfully restore instance xlog files. " + "Instance data dir: {0}".format(instance.datadir)) + except Exception as e: + raise Exception(str(e)) + + +def __restore_cbm_file(instance): + """ + """ + try: + g_logger.debug("restore instance cbm files. " + "Instance data dir: %s" % instance.datadir) + cbm_dir = os.path.join(instance.datadir, "pg_cbm") + cmd = "rm -rf '%s' " % cbm_dir + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + cbm_back_dir = os.path.join(instance.datadir, "pg_cbm_back") + if not os.path.exists(cbm_back_dir): + g_logger.debug("There is no cbm dir to restore for %d." % + instance.instanceId) + return + cpDirectory(cbm_back_dir, cbm_dir) + g_logger.debug("Successfully restored instance cbm files. " + "Instance data dir: %s" % instance.datadir) + except Exception as e: + raise Exception(str(e)) + + +def cleanOldClusterCatalogPhysicalFiles(): + """ + clean old cluster catalog physical files + get database list + connect to each cn and dn, + connect to each database, and do backup + """ + g_logger.log("Cleaning old cluster catalog physical files.") + try: + # kill any pending processes that are + # copying backup catalog physical files + killCmd = DefaultValue.killInstProcessCmd( + "backup_old_cluster_catalog_physical_files") + (status, output) = subprocess.getstatusoutput(killCmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % killCmd + + "\nOutput:%s" % output) + + InstanceList = [] + # find all instances need to do clean + if len(g_dbNode.datanodes) != 0: + for eachInstance in g_dbNode.datanodes: + InstanceList.append(eachInstance) + + # do clean parallelly + if len(InstanceList) != 0: + pool = ThreadPool(len(InstanceList)) + pool.map( + cleanOneInstanceOldClusterCatalogPhysicalFiles, InstanceList) + pool.close() + pool.join() + else: + g_logger.debug("No master instance found on this node, " + "nothing need to do.") + return + + g_logger.log("Successfully cleaned old cluster catalog physical files.") + except Exception as e: + g_logger.logExit(str(e)) + + +def cleanOneInstanceOldClusterCatalogPhysicalFiles(instance): + """ + clean catalog physical files for one old cluster instance + read database and catalog info from file + connect each database, do restore + """ + g_logger.debug("clean up instance catalog backup. " + "Instance data dir: %s" % instance.datadir) + try: + __clean_global_dir(instance) + + if g_opts.rollback: + pg_csnlog_dir = os.path.join(instance.datadir, "pg_csnlog") + # when do rollback, if old cluster num less than + # UPGRADE_VERSION_64bit_xid, remove the pg_csnlog directory + if float(g_opts.oldclusternum) < float( + const.UPGRADE_VERSION_64bit_xid) and \ + os.path.isdir(pg_csnlog_dir): + g_file.removeDirectory(pg_csnlog_dir) + else: + pg_subtrans_dir = os.path.join(instance.datadir, "pg_subtrans") + # when do commit, remove the pg_subtrans directory + if os.path.isdir(pg_subtrans_dir): + g_file.removeDirectory(pg_subtrans_dir) + + if instance.instanceRole == INSTANCE_ROLE_DATANODE and \ + instance.instanceType == DUMMY_STANDBY_INSTANCE: + g_logger.debug("There is no need to clean catalog. " + "Instance data dir: %s" % instance.datadir) + return + + __clean_xlog_file(instance) + __clean_cbm_file(instance) + __clean_base_folder(instance) + except Exception as e: + raise Exception(str(e)) + + g_logger.debug("Successfully cleaned up instance catalog backup. " + "Instance data dir: %s" % instance.datadir) + + +def __clean_global_dir(instance): + """ + """ + # clean pg_internal.init* + g_logger.debug("Start to clean global_dir") + cmd = "rm -f %s/global/pg_internal.init*" % instance.datadir + DefaultValue.execCommandLocally(cmd) + + backup_dir_list = const.BACKUP_DIR_LIST_BASE + const.BACKUP_DIR_LIST_64BIT_XID + for name in backup_dir_list: + backup_dir = "%s/%s" % (instance.datadir, name) + cleanBackUpDir(backup_dir) + g_logger.debug("Successfully cleaned global_dir") + + +def __clean_xlog_file(instance): + """ + """ + # clean *.upgrade_backup files + cmd = "rm -f '%s'/pg_xlog/*_upgrade_backup && rm -f '%s'/pg_xlog/%s" % \ + (instance.datadir, instance.datadir, const.XLOG_BACKUP_INFO) + DefaultValue.execCommandLocally(cmd) + g_logger.debug("Successfully clean instance xlog files. " + "Instance data dir: {0}".format(instance.datadir)) + + +def __clean_cbm_file(instance): + """ + """ + # clean pg_cbm_back files + cbm_back_dir = os.path.join(instance.datadir, "pg_cbm_back") + cmd = "rm -rf '%s' " % cbm_back_dir + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + g_logger.debug("Successfully clean instance cbm files. " + "Instance data dir: {0}".format(instance.datadir)) + + +def __clean_base_folder(instance): + """ + """ + g_logger.debug("Clean instance base folders. " + "Instance data dir: {0}".format(instance.datadir)) + backup_path = os.path.join(g_opts.upgrade_bak_path, "oldClusterDBAndRel") + # get instance name + instance_name = getInstanceName(instance) + # load db and catalog info from json file + if instance.instanceRole == INSTANCE_ROLE_COODINATOR: + db_and_catalog_info_file_name = \ + "%s/cn_db_and_catalog_info_%s.json" % (backup_path, instance_name) + elif instance.instanceRole == INSTANCE_ROLE_DATANODE: + if instance.instanceType == MASTER_INSTANCE or \ + instance.instanceType == STANDBY_INSTANCE: + db_and_catalog_info_file_name = \ + "%s/dn_db_and_catalog_info_%s.json" % ( + backup_path, instance_name) + else: + raise Exception("Invalid instance type:%s" % instance.instanceType) + else: + raise Exception("Invalid instance role:%s" % instance.instanceRole) + with open(db_and_catalog_info_file_name, 'r') as fp: + dbInfoStr = fp.read() + try: + dbInfoDict = json.loads(dbInfoStr) + except Exception as ee: + raise Exception(str(ee)) + + # clean base folder + for each_db in dbInfoDict["dblist"]: + if each_db["spclocation"] != "": + if each_db["spclocation"].startswith('/'): + tbsBaseDir = each_db["spclocation"] + else: + tbsBaseDir = "%s/pg_location/%s" % ( + instance.datadir, each_db["spclocation"]) + pg_catalog_base_dir = "%s/%s_%s/%d" % ( + tbsBaseDir, + DefaultValue.TABLESPACE_VERSION_DIRECTORY, + instance_name, + int(each_db["dboid"])) + else: + pg_catalog_base_dir = "%s/base/%d" % ( + instance.datadir, int(each_db["dboid"])) + + # for base folder, template0 need handle specially + if each_db["dbname"] == 'template0': + cmd = "rm -rf '%s_bak' && rm -f %s/pg_internal.init*" % \ + (pg_catalog_base_dir, pg_catalog_base_dir) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + g_logger.debug("{0} has been cleaned".format(pg_catalog_base_dir)) + continue + + # main/vm/fsm -- main.1 .. + # can not add '' for this cmd + cmd = "rm -f %s/*_bak && rm -f %s/pg_internal.init*" % ( + pg_catalog_base_dir, pg_catalog_base_dir) + g_logger.debug("{0} needs to be cleaned".format(pg_catalog_base_dir)) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + g_logger.debug("Successfully clean instance base folders. " + "Instance data dir: {0}".format(instance.datadir)) + + +def replacePgprocFile(): + """ + function: replace pg_proc data file by pg_proc_temp data file + input: NA + output: NA + """ + g_logger.log("Replace pg_proc file.") + try: + InstanceList = [] + # find all DB instances need to replace pg_proc + if len(g_dbNode.datanodes) != 0: + for eachInstance in g_dbNode.datanodes: + if (eachInstance.instanceType == MASTER_INSTANCE + or eachInstance.instanceType == STANDBY_INSTANCE): + InstanceList.append(eachInstance) + + # replace each instance pg_proc + if len(InstanceList) != 0: + pool = ThreadPool(len(InstanceList)) + pool.map(replaceOneInstancePgprocFile, InstanceList) + pool.close() + pool.join() + else: + g_logger.debug( + "No instance found on this node, nothing need to do.") + return + + g_logger.log( + "Successfully replaced all instances pg_proc file on this node.") + except Exception as e: + g_logger.logExit(str(e)) + + +def replaceOneInstancePgprocFile(instance): + """ + function: touch upgrade init file for this instance + input: NA + output: NA + """ + g_logger.debug("Replace instance pg_proc file. " + "Instance data dir: %s" % instance.datadir) + pg_proc_mapping_file = os.path.join(g_opts.appPath, + 'pg_proc_mapping.txt') + with open(pg_proc_mapping_file, 'r') as fp: + pg_proc_dict_str = fp.read() + proc_dict = eval(pg_proc_dict_str) + try: + # replace pg_proc data file with pg_proc_temp data file + for proc_file_path, pg_proc_temp_file_path in proc_dict.items(): + pg_proc_data_file = \ + os.path.join(instance.datadir, proc_file_path) + if not os.path.exists(pg_proc_data_file): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + pg_proc_data_file) + pg_proc_temp_data_file = os.path.join( + instance.datadir, pg_proc_temp_file_path) + if not os.path.exists(pg_proc_temp_data_file): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + pg_proc_temp_data_file) + g_file.removeFile(pg_proc_data_file) + g_file.cpFile(pg_proc_temp_data_file, pg_proc_data_file) + + except Exception as e: + raise Exception(str(e)) + + g_logger.debug( + "Successfully replaced instance pg_proc file. Instance data dir: %s" + % instance.datadir) + + +def createPgprocPathMappingFile(): + """ + create pg_proc and pg_proc_temp_oids data file path mapping + :return: + """ + g_logger.log("Create file to save mapping between pg_proc file path and" + " pg_proc_temp_oids file path.") + clusterNodes = g_clusterInfo.dbNodes + dnInst = None + for dbNode in clusterNodes: + if len(dbNode.datanodes) == 0: + continue + dnInst = dbNode.datanodes[0] + primaryDnNode, _ = DefaultValue.getPrimaryNode(g_opts.userProfile) + if dnInst.hostname not in primaryDnNode: + continue + break + database_list = get_database_list(dnInst) + pg_proc_list = ['pg_proc', 'pg_proc_oid_index', + 'pg_proc_proname_args_nsp_index'] + pg_proc_temp_list = ['pg_proc_temp_oids', 'pg_proc_oid_index_temp', + 'pg_proc_proname_args_nsp_index_temp'] + proc_file_path_list = [] + pg_proc_temp_file_path_list = [] + for eachdb in database_list: + for info in pg_proc_list: + pg_proc_file_path = getTableFilePath(info, dnInst, eachdb) + proc_file_path_list.append(pg_proc_file_path) + for temp_info in pg_proc_temp_list: + pg_proc_temp_file_path = getTableFilePath(temp_info, dnInst, eachdb) + pg_proc_temp_file_path_list.append(pg_proc_temp_file_path) + proc_dict = dict((proc_file_path, pg_proc_temp_file_path) for + proc_file_path, pg_proc_temp_file_path in + zip(proc_file_path_list, pg_proc_temp_file_path_list)) + pg_proc_mapping_file = os.path.join(g_opts.appPath, 'pg_proc_mapping.txt') + with open(pg_proc_mapping_file, 'w') as fp: + fp.write(str(proc_dict)) + g_logger.log( + "Successfully created file to save mapping between pg_proc file path" + " and pg_proc_temp_oids file path.") + + +def getTableFilePath(tablename, dnInst, db_name): + """ + get table file path by oid + :return: + """ + sql = "select oid from pg_class where relname='%s';" % tablename + (status, output) = ClusterCommand.remoteSQLCommand( + sql, g_opts.user, + dnInst.hostname, + dnInst.port, False, + db_name, + IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + table_oid = output.strip('\n') + g_logger.debug("pg_proc oid is %s" % table_oid) + sql = "select pg_relation_filepath(%s);" % table_oid + (status, output) = ClusterCommand.remoteSQLCommand( + sql, g_opts.user, + dnInst.hostname, + dnInst.port, False, + db_name, + IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + table_file_path = output.strip('\n') + g_logger.debug("pg_proc file path is %s" % table_file_path) + return table_file_path + + +def createNewCsvFile(): + """ + 1. copy pg_proc info to csv file + 2. modify csv file + 3. create new table and get info by csv file + :return: + """ + g_logger.log("Create new csv file.") + clusterNodes = g_clusterInfo.dbNodes + dnInst = None + for dbNode in clusterNodes: + if len(dbNode.datanodes) == 0: + continue + dnInst = dbNode.datanodes[0] + primaryDnNode, _ = DefaultValue.getPrimaryNode(g_opts.userProfile) + if dnInst.hostname not in primaryDnNode: + continue + break + dndir = dnInst.datadir + pg_proc_csv_path = '%s/pg_copydir/tbl_pg_proc_oids.csv' % dndir + new_pg_proc_csv_path = '%s/pg_copydir/new_tbl_pg_proc_oids.csv' % dndir + sql = \ + """copy pg_proc( proname, pronamespace, proowner, prolang, + procost, prorows, provariadic, protransform, prosecdef, + proleakproof, proisstrict, proretset, provolatile, pronargs, + pronargdefaults, prorettype, proargtypes, proallargtypes, + proargmodes, proargnames, proargdefaults, prosrc, probin, + proconfig, proacl, prodefaultargpos, fencedmode, proshippable, + propackage,prokind) WITH OIDS to '%s' delimiter ',' + csv header;""" % pg_proc_csv_path + (status, output) = ClusterCommand.remoteSQLCommand( + sql, g_opts.user, + dnInst.hostname, dnInst.port, False, + DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True) + if status != 0: + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + pg_proc_csv_reader = csv.reader(open(pg_proc_csv_path, 'r')) + pg_proc_csv_data = list(pg_proc_csv_reader) + header = pg_proc_csv_data[0] + header.insert(header.index('protransform') + 1, 'proisagg') + header.insert(header.index('protransform') + 2, 'proiswindow') + new_pg_proc_csv_data = [] + new_pg_proc_csv_data.append(header) + pg_proc_data_info = pg_proc_csv_data[1:] + for i in range(2): + for info in pg_proc_data_info: + info.insert(header.index('protransform') + 2, 'True') + for info in pg_proc_data_info: + new_pg_proc_csv_data.append(info) + f = open(new_pg_proc_csv_path, 'w') + new_pg_proc_csv_writer = csv.writer(f) + for info in new_pg_proc_csv_data: + new_pg_proc_csv_writer.writerow(info) + f.close() + # scp csv file to other nodes + standbyInstLst = [] + peerInsts = g_clusterInfo.getPeerInstance(dnInst) + for i in range(len(peerInsts)): + if peerInsts[i].instanceType == DefaultValue.MASTER_INSTANCE \ + or peerInsts[i].instanceType == \ + DefaultValue.STANDBY_INSTANCE: + standbyInstLst.append(peerInsts[i]) + for standbyInstance in standbyInstLst: + standbyCsvFilePath = \ + '%s/pg_copydir/new_tbl_pg_proc_oids.csv' % standbyInstance.datadir + cmd = "pscp -H %s %s %s" % ( + standbyInstance.hostname, new_pg_proc_csv_path, + standbyCsvFilePath) + g_logger.debug("exec cmd is: %s" % cmd) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514[ + "GAUSS_51400"] % cmd + + "\nOutput:%s" % output) + + +def greySyncGuc(): + # delete old guc from configure file + global g_deleteGucDict + g_deleteGucDict = readDeleteGuc() + allInstances = g_dbNode.datanodes + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(greySyncInstanceGuc, allInstances) + pool.close() + pool.join() + + +def greySyncInstanceGuc(dbInstance): + """ + from .conf file delete the old deleted GUC, need to have all + the .conf.bak.old, because new version may set new GUC + in config file, under rollback, we need to restore. + """ + if dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR: + oldConfig = "%s/postgresql.conf" % dbInstance.datadir + instanceName = "coordinator" + elif dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + oldConfig = "%s/postgresql.conf" % dbInstance.datadir + instanceName = "datanode" + elif dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_CMSERVER: + oldConfig = "%s/cm_server.conf" % dbInstance.datadir + instanceName = "cmserver" + elif dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_CMAGENT: + oldConfig = "%s/cm_agent.conf" % dbInstance.datadir + instanceName = "cmagent" + elif dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_GTM: + oldConfig = "%s/gtm.conf" % dbInstance.datadir + instanceName = "gtm" + else: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % ( + "specified", dbInstance.instanceRole)) + if not os.path.exists(oldConfig): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % oldConfig) + oldFileBak = oldConfig + ".bak.old" + oldTempFileBak = oldFileBak + ".temp" + # if reenter the upgrade process, we may have synced + if os.path.exists(oldFileBak): + g_logger.log("File %s exists, No need to backup old configure again." + % oldFileBak) + return + # if the bak.old.temp file exists while bak.old not exists, it may have + # try to deleted, but not finished, + # so cannot copy again, this oldConfig file may have deleted the old GUC + if not os.path.exists(oldTempFileBak): + g_file.cpFile(oldConfig, oldTempFileBak) + # if do not have delete line, no need to deal with old .conf + if instanceName in g_deleteGucDict.keys(): + gucNames = g_deleteGucDict[instanceName] + else: + # the rename must be the last, which is the finish flag + g_file.rename(oldTempFileBak, oldFileBak) + g_logger.debug("No need to sync %s guc with %s." % ( + instanceName, oldConfig)) + return + g_logger.debug("Sync %s guc with %s." % (instanceName, oldConfig)) + bakFile = oldConfig + ".bak.upgrade" + pattern = re.compile("^\\s*.*=.*$") + lineno = -1 + deleteLineNoList = [] + f = None + try: + if dbInstance.instanceRole in [DefaultValue.INSTANCE_ROLE_COODINATOR, + DefaultValue.INSTANCE_ROLE_GTM, + DefaultValue.INSTANCE_ROLE_DATANODE]: + lockFile = oldConfig + '.lock' + if not os.path.exists(lockFile): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % lockFile) + f = open(lockFile, 'r+') + fcntl.lockf(f.fileno(), fcntl.LOCK_EX) + g_logger.debug("Successfully locked file %s." % lockFile) + with open(oldConfig, 'r') as oldFile: + resList = oldFile.readlines() + for line in resList: + lineno += 1 + # skip blank line + line = line.strip() + if not line: + continue + # search valid line + result = pattern.match(line) + if result is None: + continue + nameInFile = line.split('=')[0].strip() + if nameInFile.startswith('#'): + name = nameInFile.lstrip('#') + if name in gucNames: + deleteLineNoList.append(lineno) + else: + if nameInFile in gucNames: + deleteLineNoList.append(lineno) + + if deleteLineNoList: + g_logger.debug("Deleting line number: %s." % deleteLineNoList) + g_file.createFile(bakFile, True, DefaultValue.KEY_FILE_MODE) + deleteContent = [] + for lineno in deleteLineNoList: + deleteContent.append(resList[lineno]) + resList[lineno] = '' + with open(bakFile, 'w') as bak: + bak.writelines(resList) + g_file.rename(bakFile, oldConfig) + g_logger.debug("Deleting guc content: %s" % deleteContent) + # the rename must be the last, which is the finish flag + g_file.rename(oldTempFileBak, oldFileBak) + if f: + f.close() + except Exception as e: + if f: + f.close() + if bakFile: + g_file.removeFile(bakFile) + raise Exception(str(e)) + g_logger.debug("Successfully dealt with %s." % oldConfig) + + +def greyUpgradeSyncConfig(): + """ + """ + # check if we have switched to new version, if we have switched to + # new version, no need to sync configure + srcDir = g_opts.oldClusterAppPath + destDir = g_opts.newClusterAppPath + if os.path.samefile(g_gausshome, destDir): + g_logger.debug("Current version is the new version, " + "no need to sync old configure to new install path.") + return + # synchronize static and dynamic configuration files + static_config = "%s/bin/cluster_static_config" % srcDir + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + static_config, static_config, destDir) + dynamic_config = "%s/bin/cluster_dynamic_config" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + dynamic_config, dynamic_config, destDir) + # sync obsserver.key.cipher/obsserver.key.rand and + # server.key.cipher/server.key.rand and + # datasource.key.cipher/datasource.key.rand + OBS_cipher_key_bak_file = "%s/bin/obsserver.key.cipher" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + OBS_cipher_key_bak_file, OBS_cipher_key_bak_file, destDir) + OBS_rand_key_bak_file = "%s/bin/obsserver.key.rand" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + OBS_rand_key_bak_file, OBS_rand_key_bak_file, destDir) + trans_encrypt_cipher_key_bak_file = "%s/bin/trans_encrypt.key.cipher" %\ + srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + trans_encrypt_cipher_key_bak_file, + trans_encrypt_cipher_key_bak_file, destDir) + trans_encrypt_rand_key_bak_file = "%s/bin/trans_encrypt.key.rand" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + trans_encrypt_rand_key_bak_file, trans_encrypt_rand_key_bak_file, + destDir) + trans_encrypt_cipher_ak_sk_key_bak_file = "%s/bin/trans_encrypt_ak_sk.key"\ + % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + trans_encrypt_cipher_ak_sk_key_bak_file, + trans_encrypt_cipher_ak_sk_key_bak_file, destDir) + roach_cipher_key_bak_file = "%s/bin/roach.key.cipher" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + roach_cipher_key_bak_file, roach_cipher_key_bak_file, destDir) + roach_rand_key_bak_file = "%s/bin/roach.key.rand" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + roach_rand_key_bak_file, roach_rand_key_bak_file, destDir) + roach_cipher_ak_sk_key_bak_file = "%s/bin/roach_ak_sk.key" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + roach_cipher_ak_sk_key_bak_file, roach_cipher_ak_sk_key_bak_file, + destDir) + server_cipher_key_bak_file = "%s/bin/server.key.cipher" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + server_cipher_key_bak_file, server_cipher_key_bak_file, destDir) + server_rand_key_bak_file = "%s/bin/server.key.rand" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + server_rand_key_bak_file, server_rand_key_bak_file, destDir) + datasource_cipher = "%s/bin/datasource.key.cipher" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + datasource_cipher, datasource_cipher, destDir) + datasource_rand = "%s/bin/datasource.key.rand" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + datasource_rand, datasource_rand, destDir) + tde_key_cipher = "%s/bin/gs_tde_keys.cipher" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + tde_key_cipher, tde_key_cipher, destDir) + g_logger.debug("Grey upgrade sync command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # sync ca.key,etcdca.crt, client.key and client.crt + CA_key_file = "%s/share/sslcert/etcd/ca.key" % srcDir + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % ( + CA_key_file, CA_key_file, destDir) + CA_cert_file = "%s/share/sslcert/etcd/etcdca.crt" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/share/sslcert/etcd/';" \ + "fi)" % (CA_cert_file, CA_cert_file, destDir) + client_key_file = "%s/share/sslcert/etcd/client.key" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/share/sslcert/etcd/';" \ + "fi)" % ( + client_key_file, client_key_file, destDir) + # copy cm_agent.lock file + cm_agent_lock_file = "%s/bin/cm_agent.lock" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + cm_agent_lock_file, cm_agent_lock_file, destDir) + client_cert_file = "%s/share/sslcert/etcd/client.crt" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/share/sslcert/etcd/';" \ + "fi)" % (client_cert_file, client_cert_file, destDir) + if int(g_opts.oldVersion) >= 92019: + client_key_cipher_file = \ + "%s/share/sslcert/etcd/client.key.cipher" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \ + "'%s/share/sslcert/etcd/';fi)" % ( + client_key_cipher_file, client_key_cipher_file, destDir) + client_key_rand_file = "%s/share/sslcert/etcd/client.key.rand" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \ + "'%s/share/sslcert/etcd/';fi)" % ( + client_key_rand_file, client_key_rand_file, destDir) + etcd_key_cipher_file = "%s/share/sslcert/etcd/etcd.key.cipher" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \ + "'%s/share/sslcert/etcd/';fi)" % ( + etcd_key_cipher_file, etcd_key_cipher_file, destDir) + etcd_key_rand_file = "%s/share/sslcert/etcd/etcd.key.rand" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \ + "'%s/share/sslcert/etcd/';fi)" % ( + etcd_key_rand_file, etcd_key_rand_file, destDir) + g_logger.debug("Grey upgrade sync command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # sync gsql certs + gsqlOldpath = "%s/share/sslcert/gsql/" % srcDir + gsqlNewDir = "%s/share/sslcert/" % destDir + cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % ( + gsqlOldpath, gsqlOldpath, gsqlNewDir) + g_logger.debug("Inplace restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + # sync gds certs + gdsOldpath = "%s/share/sslcert/gds/" % srcDir + gdsNewDir = "%s/share/sslcert/" % destDir + cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % ( + gdsOldpath, gdsOldpath, gdsNewDir) + g_logger.debug("Inplace restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + # sync grpc certs + grpcOldpath = "%s/share/sslcert/grpc/" % srcDir + grpcNewDir = "%s/share/sslcert/" % destDir + cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % ( + grpcOldpath, grpcOldpath, grpcNewDir) + g_logger.debug("Inplace restore command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # sync java UDF + javadir = "%s/lib/postgresql/java" % srcDir + desPath = "%s/lib/postgresql/" % destDir + cmd = "(if [ -d '%s' ];then mv '%s/java/pljava.jar' " \ + "'%s'&&cp -r '%s' '%s'&&mv '%s/pljava.jar' '%s/java/';fi)" % \ + (javadir, desPath, desPath, javadir, desPath, desPath, desPath) + g_logger.debug("Grey upgrade sync command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # sync postGIS + cmdPostGis = "" + for sofile in g_opts.postgisSOFileList.keys(): + desPath = os.path.join(destDir, g_opts.postgisSOFileList[sofile]) + srcFile = "'%s'/%s" % (srcDir, sofile) + cmdPostGis += " && (if [ -f %s ];then cp -f -p %s '%s';fi)" % ( + srcFile, srcFile, desPath) + # skip " &&" + cmd = cmdPostGis[3:] + g_logger.debug("Grey upgrade sync command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + # sync library file + + # sync libsimsearch etc files + searchConfigFile = "%s/etc/searchletConfig.yaml" % srcDir + cmd = "(if [ -f '%s' ];then cp -f -p '%s' " \ + "'%s/etc/searchletConfig.yaml';fi)" % ( + searchConfigFile, searchConfigFile, destDir) + searchIniFile = "%s/etc/searchServer.ini" % srcDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \ + "'%s/etc/searchServer.ini';fi)" % ( + searchIniFile, searchIniFile, destDir) + # sync libsimsearch libs files + cmd += "&& (if [ -d '%s/lib/libsimsearch' ];" \ + "then cp -r '%s/lib/libsimsearch' '%s/lib/';fi)" % ( + srcDir, srcDir, destDir) + # sync initialized configuration parameters files + cmd += " && (if [-f '%s/bin/initdb_param'];" \ + "then cp -f -p '%s/bin/initdb_param' '%s/bin/';fi)" % ( + srcDir, srcDir, destDir) + DefaultValue.execCommandLocally(cmd) + + # sync kerberos conf files + krbConfigFile = "%s/kerberos" % srcDir + cmd = "(if [ -d '%s' ];then cp -r '%s' '%s/';fi)" % ( + krbConfigFile, krbConfigFile, destDir) + cmd += "&& (if [ -d '%s/var/krb5kdc' ];then mkdir %s/var;" \ + " cp -r '%s/var/krb5kdc' '%s/var/';fi)" % ( + srcDir, destDir, srcDir, destDir) + g_logger.debug("Grey upgrade sync command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + # the pg_plugin should be the last to sync, because user may + # create C function, it may increase file under upgrade, + # after switch new bin, we will restore the file mode to original mode, + # it can write the C function + g_file.changeMode(DefaultValue.SPE_FILE_MODE, + '%s/lib/postgresql/pg_plugin' % srcDir, True) + cmd = "(cp -r '%s/lib/postgresql/pg_plugin' '%s/lib/postgresql')" % ( + srcDir, destDir) + g_logger.debug("Grey upgrade sync command: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + + +def switchDnNodeProcess(): + """ + function: switch node process which CN or DN exits + :return: + """ + if g_opts.rolling: + # for rolling upgrade, gaussdb fenced udf will be + # switched after cm_agent has been switched + start_time = timeit.default_timer() + switchFencedUDFProcess() + elapsed = timeit.default_timer() - start_time + g_logger.log( + "Time to switch gaussdb fenced udf: %s" % getTimeFormat(elapsed)) + + start_time = timeit.default_timer() + switchDn() + elapsed = timeit.default_timer() - start_time + g_logger.log("Time to switch DN: %s" % getTimeFormat(elapsed)) + + +def switchFencedUDFProcess(): + """ + function: Kill gaussdb fenced UDF master process. + """ + if not isNeedSwitch("gaussdb fenced UDF master process"): + g_logger.log("No need to kill gaussdb fenced UDF master process.") + return + + g_logger.log("Killing gaussdb fenced UDF master process.") + killCmd = DefaultValue.killInstProcessCmd( + "gaussdb fenced UDF master process") + g_logger.log( + "Command to kill gaussdb fenced UDF master process: %s" % killCmd) + (status, _) = DefaultValue.retryGetstatusoutput(killCmd, 3, 5) + if status == 0: + g_logger.log("Successfully killed gaussdb fenced UDF master process.") + else: + raise Exception("Failed to kill gaussdb fenced UDF master process.") + + +def isNeedSwitch(process, dataDir=""): + """ + get the pid from ps ux command, and then get the realpth of this pid from + /proc/$pid/exe, under upgrade, if we can find the new path, then we do not + need to kill process, otherwise we should kill process + :param process: can be "datanode" + :return:True means need switch + """ + if not g_opts.rollback: + path = g_opts.oldClusterAppPath + else: + path = g_opts.newClusterAppPath + if process == "datanode": + process = "gaussdb" + path = os.path.join(path, 'bin', process) + path = os.path.normpath(path) + if dataDir: + cmd = r"pidList=`ps ux | grep '\<%s\>' | grep '%s' | grep '%s'| " \ + r"grep -v 'grep' | awk '{print $2}' | xargs `; " \ + r"for pid in $pidList; do dir=`readlink -f /proc/$pid/exe | " \ + r"xargs `; if [ `echo $dir | grep %s` ];then echo 'True'; " \ + r"else echo 'False'; fi; done" + cmd = cmd % (process, g_gausshome, dataDir, path) + else: + cmd = r"pidList=`ps ux | grep '\<%s\>' | grep '%s' | grep -v 'grep'" \ + r" | awk '{print $2}' | xargs `; " \ + r"for pid in $pidList; do dir=`readlink -f /proc/$pid/exe | " \ + r"xargs `; if [ `echo $dir | grep %s` ];then echo 'True'; " \ + r"else echo 'False'; fi; done" + cmd = cmd % (process, g_gausshome, path) + g_logger.log("Command for finding if need switch: %s" % cmd) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % str(cmd) + + " Error: \n%s" % str(output)) + if output.find('False') >= 0: + g_logger.log("No need to switch.") + return False + g_logger.log("Need to switch.") + return True + + +def switchDn(): + """ + function: switch DN after checkpoint + """ + g_logger.log("Killing DN processes.") + needKillDn = isKillDn() + cmd = "(ps ux | grep '\-D' | grep '%s' | grep -v grep | " \ + "awk '{print $2}' | xargs -r kill -9 )" + killCmd = "" + if needKillDn: + killCmd += " && " + cmd % g_gausshome + if killCmd: + killCmd = killCmd.strip() + if killCmd.startswith("&&"): + killCmd = killCmd[2:] + g_logger.log("Command to kill other process: %s" % killCmd) + (status, output) = DefaultValue.retryGetstatusoutput(killCmd, 3, 5) + if status == 0: + g_logger.log("Successfully killed DN processes.") + else: + raise Exception("Failed to kill DN processes.") + else: + g_logger.log("No need to kill DN.") + + +def isKillDn(): + # if does not have cn and dn, no need to + if not g_dbNode.datanodes: + return False + needKillDn = False + try: + cmd = "gaussdb -V" + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 2, 5) + if status != 0 and output != "": + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + + "\nError: " + str(output)) + pattern = re.compile(r'[(](.*?)[)]') + versionInBrackets = re.findall(pattern, output) + curCommitid = versionInBrackets[0].split(" ")[-1] + # get the dn and cn name + dnInst = None + clusterNodes = g_clusterInfo.dbNodes + for dbNode in clusterNodes: + if len(dbNode.datanodes) == 0: + continue + dnInst = dbNode.datanodes[0] + primaryDnNode, _ = DefaultValue.getPrimaryNode(g_opts.userProfile) + if dnInst.hostname not in primaryDnNode: + continue + break + localHost = DefaultValue.GetHostIpOrName() + if int(g_opts.oldVersion) >= 92069: + sql = "select node_name, node_type from pg_catalog.pgxc_node " \ + "where node_host = '%s';" % localHost + else: + if g_dbNode.name != dnInst.hostname: + sql = "select node_name, node_type from pg_catalog.pgxc_node " \ + "where node_host = '%s';" % localHost + else: + sql = "select node_name, node_type from pg_catalog.pgxc_node" \ + " where node_host = 'localhost';" + g_logger.debug("Sql to query node name: %s" % sql) + (status, output) = ClusterCommand.remoteSQLCommand( + sql, g_opts.user, + dnInst.hostname, dnInst.port, False, + DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True) + if status != 0 or ClusterCommand.findErrorInSql(output): + raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + + " Error: \n%s" % str(output)) + resList = output.split('\n') + dnNames = [] + for record in resList: + record = record.split('|') + nodeName = record[0].strip() + dnNames.append(nodeName) + g_logger.debug("isKillDn dnName:{0} " + "commitid:{1}".format(dnNames, curCommitid)) + # execute on the dn and cn to get the exists process version + needKillDn = checkExistsVersion(dnNames, dnInst, curCommitid) + return needKillDn + except Exception as e: + g_logger.debug("Cannot query the exists dn process " + "version form select version(). Error: \n%s" % str(e)) + for dbInstance in g_dbNode.datanodes: + dataDir = os.path.normpath(dbInstance.datadir) + if isNeedSwitch("datanode", dataDir): + needKillDn = True + break + g_logger.log("needKillDn: %s" % (needKillDn)) + return needKillDn + + +def getLsnInfo(): + """ + get lsn info + :return: + """ + g_logger.log("Get lsn info.") + try: + InstanceList = [] + dnInst = None + # find all instances need to do backup + clusterNodes = g_clusterInfo.dbNodes + for dbNode in clusterNodes: + if len(dbNode.datanodes) == 0: + continue + dnInst = dbNode.datanodes[0] + primaryDnIntance, _ = DefaultValue.getPrimaryNode( + g_opts.userProfile) + if dnInst.hostname not in primaryDnIntance: + continue + break + if dnInst: + InstanceList.append(dnInst) + if InstanceList: + getLsnSqlPath = os.path.join( + g_opts.upgrade_bak_path, const.GET_LSN_SQL_FILE) + if not os.path.exists(getLsnSqlPath): + g_file.createFileInSafeMode(getLsnSqlPath) + lsnSql = "select pg_current_xlog_location(), " \ + "pg_xlogfile_name(pg_current_xlog_location()), " \ + "pg_xlogfile_name_offset(pg_current_xlog_location());" + with os.fdopen( + os.open(getLsnSqlPath, os.O_WRONLY, 0o755), 'w') as fp: + fp.writelines(lsnSql) + + # do backup parallelly + if len(InstanceList) != 0: + pool = ThreadPool(len(InstanceList)) + pool.map(getLsnInfoImpl, InstanceList) + pool.close() + pool.join() + else: + g_logger.debug("No master instance found on this node, " + "nothing need to do.") + return + + g_logger.log("Successfully get lsn info.") + except Exception as e: + raise Exception(str(e)) + + +def getLsnInfoImpl(instanceList): + """ + Run the SQL file of the LSN to obtain the current LSN information. + """ + getLsnSqlPath = os.path.join( + g_opts.upgrade_bak_path, const.GET_LSN_SQL_FILE) + execSQLFile("postgres", getLsnSqlPath, instanceList.port) + + +def greyRestoreConfig(): + oldDir = g_opts.oldClusterAppPath + newDir = g_opts.newClusterAppPath + if not os.path.exists(oldDir): + if g_opts.forceRollback: + g_logger.log( + ErrorCode.GAUSS_502["GAUSS_50201"] % oldDir + + " Under force rollback mode, no need to sync config.") + return + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % oldDir) + + # if sync the pg_plugin, and change the mode, + # but not switch to new bin, we need to restore the mode + oldPluginDir = "%s/lib/postgresql/pg_plugin" % g_opts.oldClusterAppPath + if os.path.exists(oldPluginDir): + g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, oldPluginDir, True) + + if not os.path.exists(newDir): + g_logger.log(ErrorCode.GAUSS_502["GAUSS_50201"] % newDir + + " No need to sync.") + return + if os.path.samefile(g_opts.oldClusterAppPath, g_gausshome): + g_logger.log("Current version is old version, nothing need to do.") + return + static_config = "%s/bin/cluster_static_config" % newDir + cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + static_config, static_config, oldDir) + dynamic_config = "%s/bin/cluster_dynamic_config" % newDir + cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % ( + dynamic_config, dynamic_config, oldDir) + DefaultValue.execCommandLocally(cmd) + + mergePlugin() + + +def mergePlugin(): + """ + under rollback, use the new dir as base, if the version is old version, + no need to sync + :return: NA + """ + g_logger.log("Sync pg_plugin.") + oldDir = "%s/lib/postgresql/pg_plugin" % g_opts.oldClusterAppPath + newDir = "%s/lib/postgresql/pg_plugin" % g_opts.newClusterAppPath + if not os.path.exists(newDir): + g_logger.log(ErrorCode.GAUSS_502["GAUSS_50201"] % newDir + + " No need to sync pg_plugin.") + return + g_file.changeMode(DefaultValue.SPE_FILE_MODE, newDir, True) + oldLines = os.listdir(oldDir) + newLines = os.listdir(newDir) + newAdd = [i for i in newLines if i not in oldLines] + newDelete = [i for i in oldLines if i not in newLines] + cmd = "" + for add in newAdd: + newFile = "%s/%s" % (newDir, add) + cmd += "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi) && " % ( + newFile, newFile, oldDir) + for delete in newDelete: + deleteFile = "%s/%s" % (oldDir, delete) + cmd += "(if [ -f '%s' ];then rm '%s';fi) && " % ( + deleteFile, deleteFile) + if cmd != "": + cmd = cmd[:-3] + g_logger.debug("Command to sync plugin: %s" % cmd) + DefaultValue.execCommandLocally(cmd) + else: + g_logger.log("No need to sync pg_plugin.") + + +def greyRestoreGuc(): + # delete old guc from configure file + oldDir = g_opts.oldClusterAppPath + if not os.path.exists(oldDir): + # the node is disable after rollback + if g_opts.forceRollback: + g_logger.log(ErrorCode.GAUSS_502["GAUSS_50201"] % oldDir + + " Under force rollback mode.") + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % oldDir) + # if the upgrade process interrupt when record delete guc, + # but not switch to new version and the record is not reliable if user + # set the GUC during the failure upgrade status, so we need to check if the + # configure file have had this record, if user has set, + # cannot sync this guc again + allInstances = g_dbNode.datanodes + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(greyRestoreInstanceGuc, allInstances) + pool.close() + pool.join() + + +def greyRestoreInstanceGuc(dbInstance): + if dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + oldConfig = "%s/postgresql.conf" % dbInstance.datadir + else: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % ( + "specified", dbInstance.instanceRole)) + # record the guc without delete guc + bakFile = oldConfig + ".bak.upgrade" + g_file.removeFile(bakFile) + oldBakFile = oldConfig + ".bak.old" + oldTempFileBak = oldBakFile + ".temp" + g_file.removeFile(oldTempFileBak) + if not os.path.exists(oldConfig): + if g_opts.forceRollback: + g_logger.warn(ErrorCode.GAUSS_502["GAUSS_50201"] % oldConfig) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % oldConfig) + + if not os.path.exists(oldBakFile): + g_logger.debug(ErrorCode.GAUSS_502["GAUSS_50201"] % oldBakFile + + " No need to restore guc.") + return + f = None + try: + if dbInstance.instanceRole in [DefaultValue.INSTANCE_ROLE_COODINATOR, + DefaultValue.INSTANCE_ROLE_GTM, + DefaultValue.INSTANCE_ROLE_DATANODE]: + lockFile = oldConfig + '.lock' + if not os.path.exists(lockFile): + if not g_opts.forceRollback: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % + lockFile) + else: + g_logger.warn(ErrorCode.GAUSS_502["GAUSS_50201"] % + lockFile + " Without lock to restore guc.") + else: + f = open(lockFile, 'r+') + fcntl.lockf(f.fileno(), fcntl.LOCK_EX) + # if user has set in the configure file, cannot sync, use the user set + g_file.rename(oldBakFile, oldConfig) + if f: + f.close() + except Exception as e: + if f: + f.close() + raise Exception(str(e)) + g_logger.debug("Successfully restore guc to %s." % oldConfig) + + +def cleanConfBakOld(): + """ + clean conf.bak.old files + """ + allInstances = g_dbNode.datanodes + pool = ThreadPool(DefaultValue.getCpuSet()) + pool.map(cleanOneInstanceConfBakOld, allInstances) + pool.close() + pool.join() + + +def cleanOneInstanceConfBakOld(dbInstance): + """ + clean conf.bak.old files in one instance + """ + if dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE: + oldConfig = "%s/%s" % ( + dbInstance.datadir, const.POSTGRESQL_CONF_BAK_OLD) + if not os.path.exists(oldConfig): + g_logger.debug( + "WARNING: " + ErrorCode.GAUSS_502["GAUSS_50201"] % oldConfig) + else: + cmd = "(if [ -f '%s' ]; then rm -f '%s'; fi)" % (oldConfig, oldConfig) + DefaultValue.execCommandLocally(cmd) + g_logger.debug("Successfully cleaned up %s." % oldConfig) + + +def checkAction(): + """ + function: check action + input : NA + output : NA + """ + if g_opts.action not in \ + [const.ACTION_TOUCH_INIT_FILE, + const.ACTION_UPDATE_CATALOG, + const.ACTION_BACKUP_OLD_CLUSTER_DB_AND_REL, + const.ACTION_SYNC_CONFIG, + const.ACTION_BACKUP_CONFIG, + const.ACTION_RESTORE_CONFIG, + const.ACTION_INPLACE_BACKUP, + const.ACTION_INPLACE_RESTORE, + const.ACTION_CHECK_GUC, + const.ACTION_BACKUP_HOTPATCH, + const.ACTION_ROLLBACK_HOTPATCH, + const.ACTION_SWITCH_PROCESS, + const.ACTION_SWITCH_BIN, + const.ACTION_CLEAN_INSTALL_PATH, + const.ACTION_COPY_CERTS, + const.ACTION_UPGRADE_SQL_FOLDER, + const.ACTION_BACKUP_OLD_CLUSTER_CATALOG_PHYSICAL_FILES, + const.ACTION_RESTORE_OLD_CLUSTER_CATALOG_PHYSICAL_FILES, + const.ACTION_CLEAN_OLD_CLUSTER_CATALOG_PHYSICAL_FILES, + const.ACTION_REPLACE_PG_PROC_FILES, + const.ACTION_CREATE_PG_PROC_MAPPING_FILE, + const.ACTION_CREATE_NEW_CSV_FILE, + const.ACTION_GREY_SYNC_GUC, + const.ACTION_GREY_UPGRADE_CONFIG_SYNC, + const.ACTION_SWITCH_DN, + const.ACTION_GET_LSN_INFO, + const.ACTION_GREY_RESTORE_CONFIG, + const.ACTION_GREY_RESTORE_GUC, + const.ACTION_CLEAN_CONF_BAK_OLD]: + GaussLog.exitWithError( + ErrorCode.GAUSS_500["GAUSS_50004"] % 't' + + " Value: %s" % g_opts.action) + + +def main(): + """ + function: main function + """ + try: + global g_opts + g_opts = CmdOptions() + parseCommandLine() + checkParameter() + initGlobals() + except Exception as e: + GaussLog.exitWithError(str(e) + traceback.format_exc()) + try: + # select the object's function by type + funcs = { + const.ACTION_SWITCH_BIN: switchBin, + const.ACTION_CLEAN_INSTALL_PATH: cleanInstallPath, + const.ACTION_TOUCH_INIT_FILE: touchInstanceInitFile, + const.ACTION_SYNC_CONFIG: syncClusterConfig, + const.ACTION_BACKUP_CONFIG: backupConfig, + const.ACTION_RESTORE_CONFIG: restoreConfig, + const.ACTION_INPLACE_BACKUP: inplaceBackup, + const.ACTION_INPLACE_RESTORE: inplaceRestore, + const.ACTION_CHECK_GUC: checkGucValue, + const.ACTION_BACKUP_HOTPATCH: backupHotpatch, + const.ACTION_ROLLBACK_HOTPATCH: rollbackHotpatch, + const.ACTION_COPY_CERTS: copyCerts, + const.ACTION_UPGRADE_SQL_FOLDER: prepareUpgradeSqlFolder, + const.ACTION_BACKUP_OLD_CLUSTER_DB_AND_REL: + backupOldClusterDBAndRel, + const.ACTION_UPDATE_CATALOG: updateCatalog, + const.ACTION_BACKUP_OLD_CLUSTER_CATALOG_PHYSICAL_FILES: + backupOldClusterCatalogPhysicalFiles, + const.ACTION_RESTORE_OLD_CLUSTER_CATALOG_PHYSICAL_FILES: + restoreOldClusterCatalogPhysicalFiles, + const.ACTION_CLEAN_OLD_CLUSTER_CATALOG_PHYSICAL_FILES: + cleanOldClusterCatalogPhysicalFiles, + const.ACTION_REPLACE_PG_PROC_FILES: replacePgprocFile, + const.ACTION_CREATE_PG_PROC_MAPPING_FILE: + createPgprocPathMappingFile, + const.ACTION_CREATE_NEW_CSV_FILE: createNewCsvFile, + const.ACTION_RESTORE_DYNAMIC_CONFIG_FILE: restoreDynamicConfigFile, + const.ACTION_GREY_SYNC_GUC: greySyncGuc, + const.ACTION_GREY_UPGRADE_CONFIG_SYNC: greyUpgradeSyncConfig, + const.ACTION_SWITCH_DN: switchDnNodeProcess, + const.ACTION_GET_LSN_INFO: getLsnInfo, + const.ACTION_GREY_RESTORE_CONFIG: greyRestoreConfig, + const.ACTION_GREY_RESTORE_GUC: greyRestoreGuc, + const.ACTION_CLEAN_CONF_BAK_OLD: cleanConfBakOld} + func = funcs[g_opts.action] + func() + except Exception as e: + checkAction() + g_logger.debug(traceback.format_exc()) + g_logger.logExit(str(e)) + +if __name__ == '__main__': + main() diff --git a/script/local/__init__.py b/script/local/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/simpleInstall/README.md b/simpleInstall/README.md new file mode 100644 index 0000000..224e158 --- /dev/null +++ b/simpleInstall/README.md @@ -0,0 +1,154 @@ +## 1 概述 + +本章节主要介绍采用openGauss简化安装脚本(以下简称安装脚本),一键式安装openGauss数据库所必须的系统环境及安装步骤。 + +## 2 安装环境要求 + +### 2.1 openGauss环境要求 + +安装openGauss的具体环境要求,请参考《openGauss安装指南》中的“2.3.1节软硬件环境要求”章节。 + +### 2.2 安装脚本环境要求 + +#### 硬件环境要求 + +安装脚本对安装环境的操作系统及对应处理器架构进行了限制,目前支持的环境如表1所示。 + +**表1** 硬件环境要求 + +| 操作系统 | 处理器架构 | +| --------- | ---------- | +| openEuler | aarch64 | +| openEuler | x86_64 | +| CentOS | x86_64 | + +#### 软件依赖要求 + +安装脚本依赖于其它软件的支持,如表2所示。 + +**表2** 软件依赖要求 + +| 所需软件 | 建议版本 | +| --------- | -------- | +| firewalld | - | +| python | 3 | + +## 3 安装openGauss + +### 3.1 安装前准备 + +#### 导入安装脚本 + +安装脚本包含了多个文件,其用途如表3所示。导入安装脚本时,建议直接导入tar包至安装环境中,随后在安装环境中进行解压,否则可能出现window与unix风格不兼容的问题。若出现此类问题,可以使用dos2unix命令对安装脚本进行格式转换。 + +**表3** 安装脚本清单 + +| 文件名称 | 用途 | +| ------------ | ---------------------- | +| install.sh | 简化安装主程序 | +| common.sh | 公共命令 | +| README.md | 参考文档 | +| template.xml | xml模板 | +| finance.sql | 金融数据模型展示数据库 | +| school.sql | 学校数据模型展示数据库 | + +#### 导入openGauss数据库安装包 + +安装脚本支持以下两种方式导入openGauss数据库安装包: + +- 手动导入 + +在[openGauss官网](https://opengauss.org/zh/download.html)下载对应版本安装包拷贝至安装环境中,存放路径为安装脚本的上层目录。 + +- 自动导入 + +配置安装环境外网访问,并确保安装脚本上层路径不存在openGauss数据库安装包。当运行安装脚本时,会自动下载对应版本安装包进行安装。 + +### 3.2 执行安装 + +使用如下命令执行安装脚本。 + +```shell +sh install.sh -U user_name -G user_group -h host_ip -p port [-D install_path] +``` + +#### 参数说明 + +- user_name为openGauss数据库的安装用户。 +- user_group为openGauss数据库安装用户所属用户组。 +- host_ip为主机在后端存储网络中的IP地址(内网IP)。 +- host_port为数据库节点的基础端口号。 +- install_path为openGauss数据库安装路径,该参数为可选参数。 + +以上参数的详细信息,请参考《openGauss安装指南》中的3.1节创建XML配置文件。 + +#### 注意事项 + +- 不指定install_path参数时,数据库默认安装在/opt/user_name路径下。 + +- 无论采用何种方式导入openGauss安装包,安装脚本都会在/home/user_name/openGaussTar路径下建立本地安装包文件。该路径支持修改,请在install.sh中修改install_location参数,但是需要与安装路径install_path不同。 +- 安装脚本必须在root下执行,且同一时刻只有1个安装脚本正在运行。 + +## 4 导入展示数据库 + +### 4.1 学校数据模型 + +假设A市B学校为了加强对学校的管理,引入了openGauss数据库。在B学校里,主要涉及的对象有学生、教师、班级、院系和课程。本实验假设在B学校数据库中,教师会教授课程,学生会选修课程,院系会聘请教师,班级会组成院系,学生会组成班级。因此,根据此关系,本文给出了相应的关系模式如下。在运行安装脚本时,会根据用户选择安装该展示模型。 + +#### 关系模式 + +对于B校中的5个对象,分别建立属于每个对象的属性集合,具体属性描述如下: + +- 学生(学号,姓名,性别,出生日期,入学日期,家庭住址) +- 教师(教师编号,教师姓名,职称,性别,年龄,入职日期) +- 班级(班级编号,班级名称,班主任) +- 院系(系编号,系名称,系主任) +- 课程(课程编号,课程名称,课程类型,学分) + +上述属性对应的编号为: + +- student(std_id,std_name,std_sex,std_birth,std_in,std_address) +- teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) +- class(cla_id,cla_name,cla_teacher) +- school_department(depart_id,depart_name,depart_teacher) +- course(cor_id,cor_name,cor_type,credit) + +对象之间的关系: + +- 一位学生可以选择多门课程,一门课程可被多名学生选择 +- 一位老师可以选择多门课程,一门课程可被多名老师教授 +- 一个院系可由多个班级组成 +- 一个院系可聘请多名老师 +- 一个班级可由多名学生组成 + +### 4.2 金融数据模型 + +假设A市C银行为了方便对银行数据的管理和操作,引入了openGauss数据库。针对C银行的业务,本实验主要将对象分为客户、银行卡、理财产品、保险、基金和资产。因此,针对这些数据库对象,本实验假设C银行的金融数据库存在着以下关系:客户可以办理银行卡,同时客户可以购买不用的银行产品,如资产,理财产品,基金和保险。那么,根据C银行的对象关系,本文给出了相应的关系模式如下。在运行安装脚本时,会根据用户选择安装该展示模型。 + +#### 关系模式 + +对于C银行中的6个对象,分别建立属于每个对象的属性集合,具体属性描述如下: + +- 客户(客户编号、客户名称、客户邮箱,客户身份证,客户手机号,客户登录密码) +- 银行卡(银行卡号,银行卡类型,所属客户编号) +- 理财产品(产品名称,产品编号,产品描述,购买金额,理财年限) +- 保险(保险名称,保险编号,保险金额,适用人群,保险年限,保障项目) +- 基金(基金名称,基金编号,基金类型,基金金额,风险等级,基金管理者) +- 资产(客户编号,商品编号,商品状态,商品数量,商品收益,购买时间) + +上述属性对应的编号为: + +- client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) +- bank_card(b_number,b_type,b_c_id) +- finances_product(p_name,p_id,p_description,p_amount,p_year) +- insurance(i_name,i_id,i_amount,i_person,i_year,i_project) +- fund(f_name,f_id,f_type,f_amount,risk_level,f_manager) +- property(pro_c_id,pro_id,pro_status,pro_quantity,pro_income,pro_purchase_time) + +对象之间的关系: + +- 一个客户可以办理多张银行卡 +- 一个客户可有多笔资产 +- 一个客户可以购买多个理财产品,同一类理财产品可由多个客户购买 +- 一个客户可以购买多个基金,同一类基金可由多个客户购买 +- 一个客户可以购买多个保险,同一类保险可由多个客户购买 \ No newline at end of file diff --git a/simpleInstall/common.sh b/simpleInstall/common.sh new file mode 100644 index 0000000..38c97e2 --- /dev/null +++ b/simpleInstall/common.sh @@ -0,0 +1,73 @@ +if [ "$COMMON_SH" ]; then + return; +fi + +function fn_create_user() +{ + user_name=$1 + user_grp=$2 + groupadd $user_grp 2>/dev/null + egrep "^$user_name" /etc/passwd >& /dev/null + if [ $? -ne 0 ] + then + useradd -g $user_grp -d /home/$user_name -m -s /bin/bash $user_name 2>/dev/null + echo "enter password for user " $user_name + passwd $user_name + echo "create user success." + else + echo "user has already exists." + fi + + return 0 +} + +function fn_check_firewall() +{ + host_port=$1 + firewall-cmd --permanent --add-port="$host_port/tcp" + firewall-cmd --reload + return 0 +} + +function fn_selinux() +{ + sed -i "s/SELINUX=.*/SELINUX=disabled/g" /etc/selinux/config + return 0 +} + +function fn_precheck() +{ + system_arch=`uname -p` + system_name=`cat /etc/os-release | grep '^ID=".*' | grep -o -E '(openEuler|centos)'` + total=0 + python3 --version >/dev/null 2>&1 + if [ $? -ne 0 ] + then + echo "You need install python3 or create the correct soft connection." + return 1 + fi + while read line + do + if [ "$line"x == ""x ] + then + continue + fi + yum list installed | grep $line > /dev/null + if [ $? -ne 0 ] + then + total=`expr $total + 1` + if [ $total -eq 1 ] + then + echo "You need to install: " > preCheck.log + fi + echo "$line" >> preCheck.log + fi + done < requirements_"$system_name"_"$system_arch" + if [ $total -gt 0 ] + then + return 1 + fi + return 0 +} + +COMMON_SH="common.sh" diff --git a/simpleInstall/finance.sql b/simpleInstall/finance.sql new file mode 100644 index 0000000..8fd377d --- /dev/null +++ b/simpleInstall/finance.sql @@ -0,0 +1,147 @@ +create database finance; + +\c finance; + +BEGIN; + +-- 创建表client +CREATE TABLE client +( + c_id INT PRIMARY KEY, + c_name VARCHAR(100) NOT NULL, + c_mail CHAR(30) UNIQUE, + c_id_card CHAR(20) UNIQUE NOT NULL, + c_phone CHAR(20) UNIQUE NOT NULL, + c_password CHAR(20) NOT NULL +); + +-- 创建表bank_card +CREATE TABLE bank_card +( + b_number CHAR(30) PRIMARY KEY, + b_type CHAR(20), + b_c_id INT NOT NULL +); +-- 给表bank_card添加外键约束 +ALTER TABLE bank_card ADD CONSTRAINT fk_c_id FOREIGN KEY (b_c_id) REFERENCES client(c_id) ON DELETE CASCADE; + +-- 创建表finances_product +CREATE TABLE finances_product +( + p_name VARCHAR(100) NOT NULL, + p_id INT PRIMARY KEY, + p_description CLOB, + p_amount INT, + p_year INT +); + +-- 创建表insurance +CREATE TABLE insurance +( + i_name VARCHAR(100) NOT NULL, + i_id INT PRIMARY KEY, + i_amount INT, + i_person CHAR(20), + i_year INT, + i_project VARCHAR(200) +); + +-- 创建表fund +CREATE TABLE fund +( + f_name VARCHAR(100) NOT NULL, + f_id INT PRIMARY KEY, + f_type CHAR(20), + f_amount INT, + risk_level CHAR(20) NOT NULL, + f_manager INT NOT NULL +); + +-- 创建表property +CREATE TABLE property +( + pro_c_id INT NOT NULL, + pro_id INT PRIMARY KEY, + pro_status CHAR(20), + pro_quantity INT, + pro_income INT, + pro_purchase_time DATE +); +-- 给表property添加外键约束 +ALTER TABLE property ADD CONSTRAINT fk_pro_c_id FOREIGN KEY (pro_c_id) REFERENCES client(c_id) ON DELETE CASCADE; + +-- 插入数据 +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (1,'张一','zhangyi@huawei.com','340211199301010001','18815650001','gaussdb_001'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (2,'张二','zhanger@huawei.com','340211199301010002','18815650002','gaussdb_002'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (3,'张三','zhangsan@huawei.com','340211199301010003','18815650003','gaussdb_003'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (4,'张四','zhangsi@huawei.com','340211199301010004','18815650004','gaussdb_004'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (5,'张五','zhangwu@huawei.com','340211199301010005','18815650005','gaussdb_005'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (6,'张六','zhangliu@huawei.com','340211199301010006','18815650006','gaussdb_006'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (7,'张七','zhangqi@huawei.com','340211199301010007','18815650007','gaussdb_007'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (8,'张八','zhangba@huawei.com','340211199301010008','18815650008','gaussdb_008'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (9,'张九','zhangjiu@huawei.com','340211199301010009','18815650009','gaussdb_009'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (10,'李一','liyi@huawei.com','340211199301010010','18815650010','gaussdb_010'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (11,'李二','lier@huawei.com','340211199301010011','18815650011','gaussdb_011'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (12,'李三','lisan@huawei.com','340211199301010012','18815650012','gaussdb_012'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (13,'李四','lisi@huawei.com','340211199301010013','18815650013','gaussdb_013'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (14,'李五','liwu@huawei.com','340211199301010014','18815650014','gaussdb_014'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (15,'李六','liliu@huawei.com','340211199301010015','18815650015','gaussdb_015'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (16,'李七','liqi@huawei.com','340211199301010016','18815650016','gaussdb_016'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (17,'李八','liba@huawei.com','340211199301010017','18815650017','gaussdb_017'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (18,'李九','lijiu@huawei.com','340211199301010018','18815650018','gaussdb_018'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (19,'王一','wangyi@huawei.com','340211199301010019','18815650019','gaussdb_019'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (20,'王二','wanger@huawei.com','340211199301010020','18815650020','gaussdb_020'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (21,'王三','wangsan@huawei.com','340211199301010021','18815650021','gaussdb_021'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (22,'王四','wangsi@huawei.com','340211199301010022','18815650022','gaussdb_022'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (23,'王五','wangwu@huawei.com','340211199301010023','18815650023','gaussdb_023'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (24,'王六','wangliu@huawei.com','340211199301010024','18815650024','gaussdb_024'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (25,'王七','wangqi@huawei.com','340211199301010025','18815650025','gaussdb_025'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (26,'王八','wangba@huawei.com','340211199301010026','18815650026','gaussdb_026'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (27,'王九','wangjiu@huawei.com','340211199301010027','18815650027','gaussdb_027'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (28,'钱一','qianyi@huawei.com','340211199301010028','18815650028','gaussdb_028'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (29,'钱二','qianer@huawei.com','340211199301010029','18815650029','gaussdb_029'); +INSERT INTO client(c_id,c_name,c_mail,c_id_card,c_phone,c_password) VALUES (30,'钱三','qiansan@huawei.com','340211199301010030','18815650030','gaussdb_030'); + +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000001','信用卡',1); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000002','信用卡',3); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000003','信用卡',5); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000004','信用卡',7); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000005','信用卡',9); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000006','信用卡',10); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000007','信用卡',12); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000008','信用卡',14); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000009','信用卡',16); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000010','信用卡',18); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000011','储蓄卡',19); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000012','储蓄卡',21); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000013','储蓄卡',7); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000014','储蓄卡',23); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000015','储蓄卡',24); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000016','储蓄卡',3); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000017','储蓄卡',26); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000018','储蓄卡',27); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000019','储蓄卡',12); +INSERT INTO bank_card(b_number,b_type,b_c_id) VALUES ('6222021302020000020','储蓄卡',29); + +INSERT INTO finances_product(p_name,p_id,p_description,p_amount,p_year) VALUES ('债券',1,'以国债、金融债、央行票据、企业债为主要投资方向的银行理财产品。',50000,6); +INSERT INTO finances_product(p_name,p_id,p_description,p_amount,p_year) VALUES ('信贷资产',2,'一般指银行作为委托人将通过发行理财产品募集资金委托给信托公司,信托公司作为受托人成立信托计划,将信托资产购买理财产品发售银行或第三方信贷资产。',50000,6); +INSERT INTO finances_product(p_name,p_id,p_description,p_amount,p_year) VALUES ('股票',3,'与股票挂钩的理财产品。目前市场上主要以港股挂钩居多',50000,6); +INSERT INTO finances_product(p_name,p_id,p_description,p_amount,p_year) VALUES ('大宗商品',4,'与大宗商品期货挂钩的理财产品。目前市场上主要以挂钩黄金、石油、农产品的理财产品居多。',50000,6); + +INSERT INTO insurance(i_name,i_id,i_amount,i_person,i_year,i_project) VALUES ('健康保险',1,2000,'老人',30,'平安保险'); +INSERT INTO insurance(i_name,i_id,i_amount,i_person,i_year,i_project) VALUES ('人寿保险',2,3000,'老人',30,'平安保险'); +INSERT INTO insurance(i_name,i_id,i_amount,i_person,i_year,i_project) VALUES ('意外保险',3,5000,'所有人',30,'平安保险'); +INSERT INTO insurance(i_name,i_id,i_amount,i_person,i_year,i_project) VALUES ('医疗保险',4,2000,'所有人',30,'平安保险'); +INSERT INTO insurance(i_name,i_id,i_amount,i_person,i_year,i_project) VALUES ('财产损失保险',5,1500,'中年人',30,'平安保险'); + +INSERT INTO fund(f_name,f_id,f_type,f_amount,risk_level,f_manager) VALUES ('股票',1,'股票型',10000,'高',1); +INSERT INTO fund(f_name,f_id,f_type,f_amount,risk_level,f_manager) VALUES ('投资',2,'债券型',10000,'中',2); +INSERT INTO fund(f_name,f_id,f_type,f_amount,risk_level,f_manager) VALUES ('国债',3,'货币型',10000,'低',3); +INSERT INTO fund(f_name,f_id,f_type,f_amount,risk_level,f_manager) VALUES ('沪深300指数',4,'指数型',10000,'中',4); + +INSERT INTO property(pro_c_id,pro_id,pro_status,pro_quantity,pro_income,pro_purchase_time) VALUES (5,1,'可用',4,8000,'2018-07-01'); +INSERT INTO property(pro_c_id,pro_id,pro_status,pro_quantity,pro_income,pro_purchase_time) VALUES (10,2,'可用',4,8000,'2018-07-01'); +INSERT INTO property(pro_c_id,pro_id,pro_status,pro_quantity,pro_income,pro_purchase_time) VALUES (15,3,'可用',4,8000,'2018-07-01'); +INSERT INTO property(pro_c_id,pro_id,pro_status,pro_quantity,pro_income,pro_purchase_time) VALUES (20,4,'冻结',4,8000,'2018-07-01'); + +COMMIT; \ No newline at end of file diff --git a/simpleInstall/install.sh b/simpleInstall/install.sh new file mode 100644 index 0000000..8d4ffd3 --- /dev/null +++ b/simpleInstall/install.sh @@ -0,0 +1,412 @@ +#!/bin/bash + +readonly cur_path=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd && cd - &>/dev/null) + +source $cur_path"/common.sh" + +function fn_print_help() +{ + echo "Usage: $0 [OPTION] + -?|--help show help information + -U|--user_name cluster user + -G|--user_grp group of the cluster user + -h|--host_ip intranet IP address of the host in the backend storage network + -p|--port database server port + -D|--install_location installation directory of the openGauss program + " +} + +function fn_get_param() +{ + fn_prase_input_param $@ + host_name=`hostname -f` + system_arch=`uname -p` + system_name=`cat /etc/os-release | grep '^ID=".*' | grep -o -E '(openEuler|centos)'` + install_tar="/home/$user_name/openGaussTar" #安装包所在路径(可修改) + if [ ! $install_location ] + then + install_location="/opt/$user_name" #数据库安装位置(可修改) + fi +} + +function fn_prase_input_param() +{ + while [ $# -gt 0 ]; do + case $1 in + -\?|--help ) + fn_print_help + exit 1 + ;; + -U|--user_name ) + fn_check_param user_name $2 + user_name=$2 + shift 2 + ;; + -G|--user_grp ) + fn_check_param user_grp $2 + user_grp=$2 + shift 2 + ;; + -h|--host_ip ) + fn_check_param host_ip $2 + host_ip=$2 + shift 2 + ;; + -p|--port ) + fn_check_param port $2 + host_port=$2 + shift 2 + ;; + -D|--install_location ) + fn_check_param install_location $2 + install_location=$2 + shift 2 + ;; + * ) + echo "Please input right paramtenter, the following command may help you" + echo "sh install.sh --help or sh install.sh -?" + exit 1 + esac + done +} + +function fn_check_param() +{ + if [ "$2"X = X ] + then + echo "no given $1, the following command may help you" + echo "sh install.sh --help or sh install.sh -?" + exit 1 + fi +} + +function fn_get_openGauss_tar() +{ + mkdir -p "$install_tar" 2>/dev/null + chown -R $user_name:$user_grp "$install_tar" + if [ "$system_name" == "openEuler" ] && [ "$system_arch" == "aarch64" ] + then + system_arch="arm" + elif [ "$system_name" == "openEuler" ] && [ "$system_arch" == "x86_64" ] + then + system_arch="x86" + elif [ "$system_name" == "centos" ] && [ "$system_arch" == "x86_64" ] + then + system_name="CentOS" + system_arch="x86" + else + echo "We only support CentOS+x86, openEuler+arm and openEuler+x86 by now." + return 1 + fi + + cd "$install_tar" + if [ "`find $cur_path/../ -maxdepth 1 -name "openGauss-2.0.0-*"|wc -l`" -lt "3" ] + then + if [ "`find . -name "openGauss-2.0.0-*"|wc -l`" -lt "3" ] + then + url="https://opengauss.obs.cn-south-1.myhuaweicloud.com/2.0.0/${system_arch}/openGauss-2.0.0-${system_name}-64bit-all.tar.gz" + echo "Downloading openGauss tar from official website at ${install_tar}" + wget $url --timeout=30 --tries=3 && tar -zxf openGauss-2.0.0-${system_name}-64bit-all.tar.gz + if [ $? -ne 0 ] + then + echo "wget error. The $install_tar need openGauss-2.0.0-${system_name}-64bit-om.tar.gz" + echo "wget error. The $install_tar need openGauss-2.0.0-${system_name}-64bit.sha256" + echo "wget error. The $install_tar need openGauss-2.0.0-${system_name}-64bit.tar.bz2" + return 1 + else + echo "wget success." + fi + fi + else + if [ "`find . -name "openGauss-2.0.0-*"|wc -l`" -lt "3" ] + then + cp "$cur_path/../openGauss-2.0.0-${system_name}-64bit-om.tar.gz" \ + "$cur_path/../openGauss-2.0.0-${system_name}-64bit.tar.bz2" \ + "$cur_path/../openGauss-2.0.0-${system_name}-64bit.sha256" "$install_tar" + if [ $? -ne 0 ] + then + echo "copy Installation package error." + return 1 + else + echo "copy Installation package success." + fi + fi + fi + return 0 +} + +function fn_create_file() +{ + mkdir -p $install_location + chmod -R 755 $install_location + chown -R $user_name:$user_grp $install_location + + local install_location=${install_location//\//\\\/} + + if [ ! -e $cur_path/template.xml ] + then + echo "cannot find template.xml" + return 1 + fi + sed 's/@{host_name}/'$host_name'/g' $cur_path/template.xml | sed 's/@{host_ip}/'$host_ip'/g' | sed 's/@{user_name}/'$user_name'/g' | sed 's/@{host_port}/'$host_port'/g' | sed 's/@{install_location}/'$install_location'/g' > $cur_path/single.xml + cp $cur_path/single.xml /home/$user_name/ + echo "create config file success." + return 0 +} + +function fn_post_check() +{ + fn_precheck + if [ $? -ne 0 ] + then + echo "Precheck failed, you can check preCheck.log for more details." + fn_precheck_result + if [ $? -ne 0 ] + then + return 1 + fi + else + echo "Precheck success." + fi + fn_check_user + if [ $? -ne 0 ] + then + echo "Check user failed." + return 1 + else + echo "Check user success." + fi + fn_check_input + if [ $? -ne 0 ] + then + echo "Check input failed." + return 1 + else + echo "Check input success." + fi + fn_check_firewall $host_port + if [ $? -ne 0 ] + then + echo "Check firewall failed." + return 1 + else + echo "Check firewall success." + fi + fn_selinux + if [ $? -ne 0 ] + then + echo "Set selinux failed." + return 1 + else + echo "Set selinux success." + fi + return 0 +} +function fn_precheck_result() +{ + input=$1 + if [ "$input"X = X ] + then + read -p "Are you sure you want to continue (yes/no)? " input + fi + if [ "$input"X == "yes"X ] + then + return 0 + elif [ "$input"X == "no"X ] + then + return 1 + else + read -p "Please type 'yes' or 'no': " input + fn_precheck_result $input + fi +} + +function fn_check_input() +{ + if [ ! "$user_name" -o ! "$user_grp" -o ! "$host_ip" -o ! "$host_port" ] + then + echo "Usage: sh install.sh -U user_name -G user_grp -h ip -p port" + echo "The following command may help you" + echo "sh install.sh --help or sh install.sh -?" + return 1 + fi + if [ "`netstat -anp | grep -w $host_port`" ] + then + echo "port $host_port occupied, please choose another." + return 1 + fi + return 0 +} + +function fn_check_user() +{ + if [ `id -u` -ne 0 ] + then + echo "Only a user with the root permission can run this script." + return 1 + fi + return 0 +} + +function fn_install() +{ + fn_tar + if [ $? -ne 0 ] + then + echo "Get openGauss Installation package or tar package failed." + return 1 + else + echo "Get openGauss Installation package and tar package success." + fi + export LD_LIBRARY_PATH="${install_tar}/script/gspylib/clib:"$LD_LIBRARY_PATH + python3 "${install_tar}/script/gs_preinstall" -U $user_name -G $user_grp -X '/home/'$user_name'/single.xml' --sep-env-file='/home/'$user_name'/env_single' + if [ $? -ne 0 ] + then + echo "Preinstall failed." + return 1 + else + echo "Preinstall success." + fi + chmod 755 "/home/$user_name/single.xml" + chown $user_name:$user_grp "/home/$user_name/single.xml" + su - $user_name -c "source /home/$user_name/env_single;gs_install -X /home/$user_name/single.xml" + if [ $? -ne 0 ] + then + echo "Install failed." + return 1 + else + echo "Install success." + fi + return 0 +} + +function fn_tar() +{ + fn_get_openGauss_tar + if [ $? -ne 0 ] + then + echo "Get openGauss Installation package error." + return 1 + else + echo "Get openGauss Installation package success." + fi + cd "${install_tar}" + tar -zxf "openGauss-2.0.0-${system_name}-64bit-om.tar.gz" + if [ $? -ne 0 ] + then + echo "tar package error." + return 1 + else + echo "tar package success." + fi + return 0 +} + +function fn_install_demoDB() +{ + input=$1 + if [ "$input"X = X ] + then + read -p "Would you like to create a demo database (yes/no)? " input + fi + if [ $input == "yes" ] + then + fn_load_demoDB 1>$cur_path/load.log 2>&1 + fn_check_demoDB + elif [ $input == "no" ] + then + return 2 + else + read -p "Please type 'yes' or 'no': " input + fn_install_demoDB $input + fi + return $? +} + +function fn_load_demoDB() +{ + cp $cur_path/{school.sql,finance.sql} /home/$user_name + chown $user_name:$user_grp /home/$user_name/{school.sql,finance.sql} + su - $user_name -c " + source ~/env_single + gs_guc set -D $install_location/cluster/dn1/ -c \"modify_initial_password = false\" + gs_om -t stop && gs_om -t start + sleep 1 + gsql -d postgres -p $host_port -f /home/$user_name/school.sql + gsql -d postgres -p $host_port -f /home/$user_name/finance.sql + gs_guc set -D $install_location/cluster/dn1/ -c \"modify_initial_password = true\" + gs_om -t stop && gs_om -t start" +} + +function fn_check_demoDB() +{ + if [ "`cat $cur_path/load.log | grep ROLLBACK`" != "" ] + then + return 1 + elif [ "`cat $cur_path/load.log | grep '\[GAUSS-[0-9]*\]'`" != "" ] + then + return 1 + elif [ "`cat $cur_path/load.log | grep ERROR`" != "" ] + then + return 1 + elif [ "`cat $cur_path/load.log | grep Unknown`" != "" ] + then + return 1 + fi + return 0 +} + +function main() +{ + fn_get_param $@ + + fn_post_check + if [ $? -ne 0 ] + then + echo "Post check failed." + return 1 + else + echo "Post check success." + fi + fn_create_user $user_name $user_grp + if [ $? -ne 0 ] + then + echo "User test failed." + return 1 + else + echo "User test success." + fi + fn_create_file + if [ $? -ne 0 ] + then + echo "Create file failed." + return 1 + else + echo "Create file success." + fi + fn_install + if [ $? -ne 0 ] + then + echo "Installation failed." + return 1 + else + echo "Installation success." + fi + fn_install_demoDB + local returnFlag=$? + if [ $returnFlag -eq 0 ] + then + echo "Load demoDB [school,finance] success." + return 1 + elif [ $returnFlag -eq 1 ] + then + echo "Load demoDB failed, you can check load.log for more details." + else + echo "Input no, operation skip." + fi + return 0 +} + +main $@ +exit $? + diff --git a/simpleInstall/one_master_one_slave.sh b/simpleInstall/one_master_one_slave.sh new file mode 100644 index 0000000..8c2b416 --- /dev/null +++ b/simpleInstall/one_master_one_slave.sh @@ -0,0 +1,400 @@ +#!/bin/bash + +if [ `id -u` -ne 0 ];then + echo "only a user with the root permission can run this script." + exit 1 +fi + +declare -r SCRIPT_PATH=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +declare -r SCRIPT_NAME=$0 +echo "SCRIPT_PATH : ${SCRIPT_PATH}" +declare PACKAGE_PATH=`dirname ${SCRIPT_PATH}` +declare USER_NAME="" +declare HOST_IPS="" +declare HOST_IPS_ARR="" +declare HOST_IPS_ARRAY="" +declare HOST_NAMES="" +declare HOST_NAMES_ARRAY= +declare USER_GROUP="dbgrp" +declare PORT="20050" +declare XML_DIR=${SCRIPT_PATH}/one_master_one_slave_template.xml +declare INSTALL_PATH="" +declare SYSTEM_ARCH="" +declare SYSTEM_NAME="" +declare PASSWORD="" + +function print_help() +{ + echo "Usage: $0 [OPTION] + -?|--help show help information + -U|--user_name cluster user + -h|--host_ip intranet ip address of the host in the backend storage network(host1,host2) + -G|--user_grp group of the cluster user(default value dbgrp) + -p|--port database server port(default value 20050) + -D|--install_location installation directory of the openGauss program(default value ~/cluser) + -X|--xml_location cluster xml configuration file path + " +} + +function die() +{ + echo -e "\033[31merror:\033[0m $1" + exit 1 +} + +function warn() +{ + echo -e "\033[33mwarnning:\033[0m $1" + sleep 2s +} + +function info() +{ + echo -e "\033[32minfo:\033[0m $1" +} + +function expect_ssh() +{ + /usr/bin/expect <<-EOF + set timeout -1 + spawn $1 + expect { + "*yes/no" { send "yes\r"; exp_continue } + "*assword:" { send "$2\r"; exp_continue } + "*$3*" { exit } + } + expect eof +EOF + if [ $? == 0 ] + then + return 0 + else + return 1 + fi +} + +function expect_hostname() +{ + expect < expectFile + set timeout -1 + spawn $1 + expect { + "*yes/no" { send "yes\r"; exp_continue } + "*assword:" {send "$2\r"; exp_continue} + } +EOF + if [ $? == 0 ] + then + return 0 + else + return 1 + fi +} + + +function main() +{ + while [ $# -gt 0 ] + do + case "$1" in + -h|--help) + print_help + exit 1 + ;; + -U|--user_name) + if [ "$2"X = X ] + then + die "no cluster user values" + fi + USER_NAME=$2 + shift 2 + ;; + -G|--user_grp) + if [ "$2"X = X ] + then + die "no group values" + fi + USER_GROUP=$2 + shift 2 + ;; + -H|--host_ip) + if [ "$2"X = X ] + then + die "no intranet ip address of the host values" + fi + HOST_IPS=$2 + shift 2 + HOST_IPS_ARR=${HOST_IPS//,/ } + HOST_IPS_ARRAY=(${HOST_IPS_ARR}) + if [ ${#HOST_IPS_ARRAY[*]} != 2 ] + then + die "the current script can be installed only on two nodes, one active node and one standby node" + fi + ;; + -X|--xml_location) + if [ "$2"X = X ] + then + die "no cluster xml configuration file values" + fi + XML_DIR=$2 + shift 2 + ;; + -D|--install_location) + if [ "$2"X = X ] + then + die "no installation directory of the openGauss program values" + fi + INSTALL_PATH=$2 + shift 2 + ;; + -p|--port) + if [ "$2"X = X ] + then + die "the port number cannot be empty." + fi + PORT=$2 + shift 2 + ;; + -P|--password) + if [ "$2"X = X ] + then + die "the password cannot be empty." + fi + PASSWORD=$2 + shift 2 + ;; + *) + echo "Internal Error: option processing error" 1>&2 + echo "please input right paramtenter, the following command may help you" + echo "sh active_standby_nodes_install.sh --help or sh active_standby_nodes_install.sh -h" + exit 1 + esac + done + + if [ "${USER_NAME}"X == X ] + then + die "no cluster user values" + fi + + if [ -z ${INSTALL_PATH} ] + then + INSTALL_PATH="/home/${USER_NAME}" + fi + + if [ "${PASSWORD}"X == X ] + then + echo "please enter the password of the root user&the password of a common user(the two passwords must be the same)" + echo -n "password:" + read PASSWORD + while [ -z ${PASSWORD} ] + do + echo "the value cannot be null, please enter the password again" + echo -n "password:" + read PASSWORD + done + fi + + if [ "${HOST_IPS}"X == X ] + then + die "no intranet ip address values" + else + len=${#HOST_IPS_ARRAY[*]} + index=0 + while [ ${index} -lt ${len} ] + do + expect_hostname "ssh ${HOST_IPS_ARRAY[${index}]} hostname" ${PASSWORD} + if [ $? == 0 ] + then + expectResult=`tail -1 expectFile|head -1| tr -d "\r"| tr -d "\n"` + if [ -z ${expectResult} ] + then + die "failed to obtain the hostname based on the ip address of ${HOST_IPS_ARRAY[${index}]}." + fi + HOST_NAMES_ARRAY[${index}]=${expectResult} + else + die "failed to obtain the hostname based on the ip address of ${HOST_IPS_ARRAY[${index}]}." + fi + index=$[ ${index} + 1 ] + done + fi + rm -rf expectFile + HOST_NAMES="${HOST_NAMES_ARRAY[0]},${HOST_NAMES_ARRAY[1]}" + SYSTEM_ARCH=`uname -p` + SYSTEM_NAME=`cat /etc/*-release | grep '^ID=".*'|awk -F "[=\"]" '{print $3}'` + if [ "${SYSTEM_NAME}" == "openEuler" ] && [ "${SYSTEM_ARCH}" == "aarch64" ] + then + info "the current system environment is openEuler + arm" + elif [ "${SYSTEM_NAME}" == "openEuler" ] && [ "${SYSTEM_ARCH}" == "x86_64" ] + then + info "the current system environment is openEuler + x86" + elif [ "${SYSTEM_NAME}" == "centos" ] && [ "${SYSTEM_ARCH}" == "x86_64" ] + then + info "the current system environment is CentOS + x86" + elif [ "${SYSTEM_NAME}" == "redhat" ] && [ "${SYSTEM_ARCH}" == "x86_64" ] + then + info "the current system environment is redhat + x86" + elif [ "${SYSTEM_NAME}" == "redhat" ] && [ "${SYSTEM_ARCH}" == "aarch64" ] + then + info "the current system environment is redhat + arm" + elif [ "${SYSTEM_NAME}" == "kylin" ] && [ "${SYSTEM_ARCH}" == "x86_64" ] + then + info "the current system environment is kylin + x86" + elif [ "${SYSTEM_NAME}" == "kylin" ] && [ "${SYSTEM_ARCH}" == "aarch64" ] + then + info "the current system environment is kylin + arm" + else + warn "the current system environment is ${SYSTEM_NAME} + ${SYSTEM_ARCH}, \ + you are advised to use the centos, openEuler, redhat, or kylin system. because OpenGauss may not adapt to the current system." + fi + info "installation parameter verification completed." +} + +function checks() +{ + system_arch=`uname -p` + system_name=`cat /etc/*-release | grep '^ID=".*'|awk -F "[=\"]" '{print $3}'` + if [ ${system_arch} != "$8" -o ${system_name} != "$9" ] + then + warn "inconsistency between the system and the execution machine" + fi + + egrep "^$3" /etc/group >& /dev/null + if [ $? != 0 ];then + groupadd $3 + fi + egrep "^$4" /etc/passwd >& /dev/null + if [ $? != 0 ];then + useradd -g $3 -d /home/$4 -m -s /bin/bash $4 2>/dev/null + if [ $? != 0 ] + then + die "failed to create the user on the node $2." + fi + expect_ssh "passwd $4" "$5" "passwd:" + if [ $? != 0 ] + then + die "an error occurred when setting the user password on the node $2" + fi + fi + + sed -i "s/SELINUX=.*/SELINUX=disabled/g" /etc/selinux/config && firewall-cmd --permanent --add-port="$6/tcp" && firewall-cmd --reload + if [ $? != 0 ] + then + warn "some errors occur during system environment setting on host $2" + fi + + INSTALL_PATH=$7 + if [ ! -e ${INSTALL_PATH} ] + then + mkdir -p ${INSTALL_PATH} + else + rm -rf ${INSTALL_PATH}/* + fi + chmod -R 755 ${INSTALL_PATH}/ + chown -R $4:$3 ${INSTALL_PATH}/ + if [ -f /${10} ] + then + mv /${10} $(eval echo ~$4)/ + fi + echo "check end" +} + +function pre_checks() +{ + if [ ${#HOST_IPS_ARRAY[*]} == 0 ] + then + die "the number of internal IP addresses of the host is incorrect." + fi + localips=`/sbin/ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"` + for ip in ${HOST_IPS_ARRAY[@]} + do + info "start to check the installation environment of host ${ip}." + sleep 2s + # standby node + if [[ $localips != *${ip}* ]] + then + sshcmd="scp ${SCRIPT_PATH}/${SCRIPT_NAME} root@${ip}:/" + expect_ssh "${sshcmd}" "${PASSWORD}" "100%" + if [ $? != 0 ] + then + die "an error occurred when copying the script to the target host ${ip}." + fi + sshcmd="ssh ${ip} \"sh /${SCRIPT_NAME} inner ${ip} ${USER_GROUP} ${USER_NAME} ${PASSWORD} ${PORT} ${INSTALL_PATH} ${SYSTEM_ARCH} ${SYSTEM_NAME} ${SCRIPT_NAME}\"" + expect_ssh "${sshcmd}" "${PASSWORD}" "check end" + if [ $? != 0 ] + then + die "an error occurred during the pre-installation check on the target host ${ip}." + fi + else + # local + checks "" ${ip} ${USER_GROUP} ${USER_NAME} ${PASSWORD} ${PORT} ${INSTALL_PATH} ${SYSTEM_ARCH} ${SYSTEM_NAME} ${SCRIPT_NAME} + if [ $? != 0 ] + then + die "an error occurred during the pre-installation check on the target host ${ip}." + fi + fi + info "succeeded in checking the installation environment of host ${ip}." + done + return 0 +} + +function xmlconfig() +{ + info "start to automatically configure the installation file." + install_localtion=${INSTALL_PATH//\//\\\/} + if [ -e ${XML_DIR} ] + then + sed 's/@{nodeNames}/'${HOST_NAMES}'/g' ${XML_DIR} | + sed 's/@{backIpls}/'${HOST_IPS}'/g' | + sed 's/@{clusterName}/'${USER_NAME}'/g' | + sed 's/@{port}/'${PORT}'/g' | + sed 's/@{installPath}/'${install_localtion}'/g' | + sed 's/@{nodeName1}/'${HOST_NAMES_ARRAY[0]}'/g' | + sed 's/@{backIp1}/'${HOST_IPS_ARRAY[0]}'/g' | + sed 's/@{nodeName2}/'${HOST_NAMES_ARRAY[1]}'/g' | + sed 's/@{backIp2}/'${HOST_IPS_ARRAY[1]}'/g' > $(eval echo ~${USER_NAME})/one_master_one_slave.xml + else + die "cannot find one_master_one_slave_template.xml in ${XML_DIR}" + fi + cat $(eval echo ~${USER_NAME})/one_master_one_slave.xml + info "the installation file is automatically configured" + return 0 +} + +function install() +{ + info "preparing for preinstallation" + home_path=$(eval echo ~${USER_NAME}) + export LD_LIBRARY_PATH="${PACKAGE_PATH}/script/gspylib/clib:"$LD_LIBRARY_PATH + sshcmd="python3 "${PACKAGE_PATH}"/script/gs_preinstall -U "${USER_NAME}" \ + -G "${USER_GROUP}" -X "${home_path}"/one_master_one_slave.xml --sep-env-file="${home_path}"/env_master_slave" + info "cmd \"${sshcmd}\"" + expect_ssh "${sshcmd}" "${PASSWORD}" "Preinstallation succeeded" + if [ $? != 0 ] + then + die "preinstall failed." + fi + info "preinstallation succeeded." + chmod 755 ${home_path}'/one_master_one_slave.xml' + chown ${USER_NAME}:${USER_GROUP} ${home_path}'/one_master_one_slave.xml' + info "start the installation." + su - ${USER_NAME} -c"source ${home_path}/env_master_slave;gs_install -X ${home_path}/one_master_one_slave.xml;gs_om -t status --detail" + if [ $? -ne 0 ] + then + die "install failed." + else + info "install success." + fi + exit 0 +} + +if [ $1 == "inner" ] +then + checks $@ +else + main $@ + pre_checks + xmlconfig + install +fi +exit 0 + diff --git a/simpleInstall/one_master_one_slave_template.xml b/simpleInstall/one_master_one_slave_template.xml new file mode 100644 index 0000000..9735758 --- /dev/null +++ b/simpleInstall/one_master_one_slave_template.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/simpleInstall/requirements_centos_x86_64 b/simpleInstall/requirements_centos_x86_64 new file mode 100644 index 0000000..b284d8c --- /dev/null +++ b/simpleInstall/requirements_centos_x86_64 @@ -0,0 +1,14 @@ +libaio-devel +flex +bison +ncurses-devel +glibc-devel +patch +readline-devel +bzip2 +firewalld +crontabs +net-tools +openssh-server +openssh-clients +which diff --git a/simpleInstall/requirements_openEuler_aarch64 b/simpleInstall/requirements_openEuler_aarch64 new file mode 100644 index 0000000..4412a9f --- /dev/null +++ b/simpleInstall/requirements_openEuler_aarch64 @@ -0,0 +1,12 @@ +libaio-devel +flex +bison +ncurses-devel +glibc-devel +patch +readline-devel +bzip2 +firewalld +crontabs +net-tools +which diff --git a/simpleInstall/requirements_openEuler_x86_64 b/simpleInstall/requirements_openEuler_x86_64 new file mode 100644 index 0000000..0c176b3 --- /dev/null +++ b/simpleInstall/requirements_openEuler_x86_64 @@ -0,0 +1,13 @@ +libaio-devel +flex +bison +ncurses-devel +glibc-devel +patch +readline-devel +bzip2 +firewalld +crontabs +net-tools +which +libnsl diff --git a/simpleInstall/school.sql b/simpleInstall/school.sql new file mode 100644 index 0000000..f830b05 --- /dev/null +++ b/simpleInstall/school.sql @@ -0,0 +1,214 @@ +create database school; + +\c school; + +BEGIN; + +-- 创建表student +CREATE TABLE student +( + std_id INT PRIMARY KEY, + std_name VARCHAR(20) NOT NULL, + std_sex VARCHAR(6), + std_birth DATE, + std_in DATE NOT NULL, + std_address VARCHAR(100) +); + +-- 创建表teacher +CREATE TABLE teacher +( + tec_id INT PRIMARY KEY, + tec_name VARCHAR(20) NOT NULL, + tec_job VARCHAR(15), + tec_sex VARCHAR(6), + tec_age INT, + tec_in DATE NOT NULL +); + +-- 创建表class +CREATE TABLE class +( + cla_id INT PRIMARY KEY, + cla_name VARCHAR(20) NOT NULL, + cla_teacher INT NOT NULL +); +-- 给表class添加外键约束 +ALTER TABLE class ADD CONSTRAINT fk_tec_id FOREIGN KEY (cla_teacher) REFERENCES teacher(tec_id) ON DELETE CASCADE; + +-- 创建表school_department +CREATE TABLE school_department +( + depart_id INT PRIMARY KEY, + depart_name VARCHAR(30) NOT NULL, + depart_teacher INT NOT NULL +); +-- 给表school_department添加外键约束 +ALTER TABLE school_department ADD CONSTRAINT fk_depart_tec_id FOREIGN KEY (depart_teacher) REFERENCES teacher(tec_id) ON DELETE CASCADE; + +-- 创建表course +CREATE TABLE course +( + cor_id INT PRIMARY KEY, + cor_name VARCHAR(30) NOT NULL, + cor_type VARCHAR(20), + credit DOUBLE PRECISION +); + +-- 插入数据 +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (1,'张一','男','1993-01-01','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (2,'张二','男','1993-01-02','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (3,'张三','男','1993-01-03','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (4,'张四','男','1993-01-04','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (5,'张五','男','1993-01-05','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (6,'张六','男','1993-01-06','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (7,'张七','男','1993-01-07','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (8,'张八','男','1993-01-08','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (9,'张九','男','1993-01-09','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (10,'李一','男','1993-01-10','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (11,'李二','男','1993-01-11','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (12,'李三','男','1993-01-12','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (13,'李四','男','1993-01-13','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (14,'李五','男','1993-01-14','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (15,'李六','男','1993-01-15','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (16,'李七','男','1993-01-16','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (17,'李八','男','1993-01-17','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (18,'李九','男','1993-01-18','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (19,'王一','男','1993-01-19','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (20,'王二','男','1993-01-20','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (21,'王三','男','1993-01-21','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (22,'王四','男','1993-01-22','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (23,'王五','男','1993-01-23','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (24,'王六','男','1993-01-24','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (25,'王七','男','1993-01-25','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (26,'王八','男','1993-01-26','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (27,'王九','男','1993-01-27','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (28,'钱一','男','1993-01-28','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (29,'钱二','男','1993-01-29','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (30,'钱三','男','1993-01-30','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (31,'钱四','男','1993-02-01','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (32,'钱五','男','1993-02-02','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (33,'钱六','男','1993-02-03','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (34,'钱七','男','1993-02-04','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (35,'钱八','男','1993-02-05','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (36,'钱九','男','1993-02-06','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (37,'吴一','男','1993-02-07','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (38,'吴二','男','1993-02-08','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (39,'吴三','男','1993-02-09','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (40,'吴四','男','1993-02-10','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (41,'吴五','男','1993-02-11','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (42,'吴六','男','1993-02-12','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (43,'吴七','男','1993-02-13','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (44,'吴八','男','1993-02-14','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (45,'吴九','男','1993-02-15','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (46,'柳一','男','1993-02-16','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (47,'柳二','男','1993-02-17','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (48,'柳三','男','1993-02-18','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (49,'柳四','男','1993-02-19','2011-09-01','江苏省南京市雨花台区'); +INSERT INTO student(std_id,std_name,std_sex,std_birth,std_in,std_address) VALUES (50,'柳五','男','1993-02-20','2011-09-01','江苏省南京市雨花台区'); + +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (1,'张一','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (2,'张二','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (3,'张三','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (4,'张四','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (5,'张五','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (6,'张六','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (7,'张七','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (8,'张八','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (9,'张九','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (10,'李一','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (11,'李二','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (12,'李三','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (13,'李四','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (14,'李五','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (15,'李六','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (16,'李七','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (17,'李八','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (18,'李九','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (19,'王一','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (20,'王二','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (21,'王三','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (22,'王四','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (23,'王五','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (24,'王六','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (25,'王七','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (26,'王八','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (27,'王九','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (28,'钱一','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (29,'钱二','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (30,'钱三','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (31,'钱四','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (32,'钱五','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (33,'钱六','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (34,'钱七','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (35,'钱八','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (36,'钱九','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (37,'吴一','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (38,'吴二','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (39,'吴三','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (40,'吴四','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (41,'吴五','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (42,'吴六','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (43,'吴七','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (44,'吴八','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (45,'吴九','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (46,'柳一','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (47,'柳二','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (48,'柳三','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (49,'柳四','讲师','男',35,'2009-07-01'); +INSERT INTO teacher(tec_id,tec_name,tec_job,tec_sex,tec_age,tec_in) VALUES (50,'柳五','讲师','男',35,'2009-07-01'); + +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (1,'计算机',1); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (2,'自动化',3); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (3,'飞行器设计',5); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (4,'大学物理',7); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (5,'高等数学',9); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (6,'大学化学',12); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (7,'表演',14); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (8,'服装设计',16); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (9,'工业设计',18); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (10,'金融学',21); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (11,'医学',23); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (12,'土木工程',25); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (13,'机械',27); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (14,'建筑学',29); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (15,'经济学',32); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (16,'财务管理',34); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (17,'人力资源',36); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (18,'力学',38); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (19,'人工智能',41); +INSERT INTO class(cla_id,cla_name,cla_teacher) VALUES (20,'会计',45); + +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (1,'计算机学院',2); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (2,'自动化学院',4); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (3,'航空宇航学院',6); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (4,'艺术学院',8); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (5,'理学院',11); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (6,'人工智能学院',13); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (7,'工学院',15); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (8,'管理学院',17); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (9,'农学院',22); +INSERT INTO school_department(depart_id,depart_name,depart_teacher) VALUES (10,'医学院',28); + +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (1,'数据库系统概论','必修',3); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (2,'艺术设计概论','选修',1); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (3,'力学制图','必修',4); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (4,'飞行器设计历史','选修',1); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (5,'马克思主义','必修',2); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (6,'大学历史','必修',2); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (7,'人力资源管理理论','必修',2.5); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (8,'线性代数','必修',4); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (9,'JAVA程序设计','必修',3); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (10,'操作系统','必修',4); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (11,'计算机组成原理','必修',3); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (12,'自动化设计理论','必修',2); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (13,'情绪表演','必修',2.5); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (14,'茶学历史','选修',1); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (15,'艺术论','必修',1.5); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (16,'机器学习','必修',3); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (17,'数据挖掘','选修',2); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (18,'图像识别','必修',3); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (19,'解剖学','必修',4); +INSERT INTO course(cor_id,cor_name,cor_type,credit) VALUES (20,'3D max','选修',2); + +COMMIT; \ No newline at end of file diff --git a/simpleInstall/template.xml b/simpleInstall/template.xml new file mode 100644 index 0000000..9a154ca --- /dev/null +++ b/simpleInstall/template.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- Gitee From 84a8986fd4b4800268f9f74fc3bfb2bf7e73fbea Mon Sep 17 00:00:00 2001 From: yuwenboj <1948505032@qq.com> Date: Sun, 11 Apr 2021 12:53:02 +0800 Subject: [PATCH 2/2] =?UTF-8?q?gs=5Fom=E8=B5=9B=E9=A2=98=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 87 - script/gspylib/common/DbClusterInfo.py | 6340 ------------------------ 2 files changed, 6427 deletions(-) delete mode 100644 README.md delete mode 100644 script/gspylib/common/DbClusterInfo.py diff --git a/README.md b/README.md deleted file mode 100644 index 3e40291..0000000 --- a/README.md +++ /dev/null @@ -1,87 +0,0 @@ -# 25-NovDB - -#### 一. 项目背景介绍 - -##### 1.1 openGuass 背景 - 数据库技术领域,数据库所使用的典型数据模型主要有层次模型、网状数据模型和关系数据库模型。但是由于移动互联网的兴起,数据规模爆炸式增长,单机数据库越来越难以满足用户需求。解决这种问题最直接的方法就是增加机器的数量,将数据库部署在多台机器上,分布式数据库就诞生了。然而分布式数据库存在节点之间通信耗时多、数据安全性和保密性低并且分布式的数据划分、负载均衡、分布式事务处理中和分布式执行技术缺乏创新。 - 近年来随着AI技术的成熟和发展,AI与数据库技术结合的程度更加紧密了,因此在2019年5月,华为公司发布了全球首款AI原生(AI-Native)数据库——GaussDB,实现了数据库两大革命性突破: -(1)该数据库首次将人工智能技术融入分布式数据库的全生命周期中,实现了自运维、自管理、自调优、故障自诊断和自愈。 - -(2)该数据库通过异构计算创新框架充分发挥x86、ARM、GPU、NPU多种算力优势。 - -##### 1.2 gs_om 工具 - GaussDB 200提供了gs_om工具帮助对集群进行维护,包括启动集群、停止集群、重启集群、查询集群状态、切换主备实例、管理CN、修改IP、生成静态配置文件、格式化列存文件、升级时间评估、安装,扩容,增加CN、节点替换和温备相关时间评估、管理虚拟IP、SSL证书替换、输出相对表空间路径、启停kerberos认证、ec工具、管理javaUDF文件、删除PostGIS文件、执行日常巡检并上报告警、集群拓扑结构比较、慢节点检测和隔离、显示帮助信息和显示版本号信息等功能。 - -#### 二. gs_om 功能介绍 - - 通过使用gs_om工具进行节点信息的显示的时候,集群中的节点信息会展示在一行上(即多个节点信息不会分行显示,对使用用户很不友好)。所以基于这些问题,通过对gs_om工具进行优化后,实现了一下功能: - (1)通过gs_om查看节点信息的命令(如:gs_om -t status --detail),可以实现将节点的信息分行显示。 - (2)为集群中节点信息增加了多个属性(如:id、diskusage),方便用户直观的看到该集群节点的数量和查看集群下各个节点的硬件使用情况。 - (3)通过gs_om命令(如:gs_om -t status --detail),可以选择在查看节点信息的同时将信息以文件的形式存储到本地中。 - (4)通过gs_om命令(如:gs_om -t status --detail),再(3)功能执行完毕后,可以选择是否需要输出更加详细的节点信息,以方便用户查看。 - -#### 三. 软件架构 - -![集群架构图](https://images.gitee.com/uploads/images/2021/0324/101029_f10ef6a8_1726416.png "image.png") - - 基本的架构图取决于搭建的集群所用到的服务器个数,这里的架构图采用的是一主两备的数据库集群。主服务器和备服务器会各自对应一个Storage,用于服务器的本地存储资源,持久化存储数据等。 - openGauss主(备)的作用则是负责存储业务数据、执行数据查询任务以及向客户端驱动返回执行结果,建议将主、备openGauss都分散部署在不同的物理节点中。 - OM是一个运维管理模块,当然里面也会包含gs_om这个数据库服务端工具,主要就是用于对openGauss实例进行管理。客户端驱动的作用是负责接收来自应用的访问请求,并向应用返回执行结果。 - 客户端驱动负责建立到openGauss实例的链接,把应用的SQL命令传输给openGauss实例,接收openGauss实例命令执行结果。 -![gs_om执行流程图](https://images.gitee.com/uploads/images/2021/0324/101107_40bf1bbf_1726416.png "image (1).png") - - 在CentOS7下激活集群,集群为一主二备,然后用创建的初始用户omm登录openGauss数据库,由于omm具有root权限,所以可以对gs_om进行修改,修改完毕以后利用脚本工具build.sh进行编译和打包,然后切换到测试用户testUser下对gs_om进行测试,目的是为了保证数据的安全性,如果测试结果与预期结果一致,则表示成功;反之,则再次切换到omm用户下进行代码修改,直至测试成功。 - -#### 四.安装教程 - -首先先搭建hadoop集群,推荐一主二备的模式,然后再主机安装openGauss,修改clusterInfo.xml配置文件中的集群名、ip等。 -可参考本人csdn博客具体的搭建: -1). https://blog.csdn.net/qq_41619524/article/details/110230996?spm=1001.2014.3001.5501 -2). https://blog.csdn.net/qq_41619524/article/details/109722264?spm=1001.2014.3001.5501 - - -#### 五. 使用说明与代码工作即运行结果 - -##### 5.1 使用说明 -1. 通过输入gs_om -t status ---detail来获取详细的节点信息,如节点号,节点名称、内存使用状态等。 -2. 在功能1的基础上,会提示是否需要将节点信息打印(根据提示将文件并输入到指定的文件夹)(如:txt、word等文件类型的文件),通过输入yes或者no来进行指令的确定,然后在指定的用户文件下可以查看到这个文件,文件里面存储的就是节点的信息。 -3. 在功能2的基础上,会提示是否需要将各个服务器的硬件信息现实出来,也是通过输入yes或者no来实现功能。 - -##### 5.2 代码工作 -(1)在文件src/manager/om/script/gspylib/common下设置了测试用例,systemTest.py。用于测试如何将单个节点的系统硬件信息显示出来并且比较哪个能显示更加详细的信息。测试可以通过在该文件夹下,在命令窗口中输入./测试文件名.py - -(2)在文件src/manager/om/script/gspylib/common/DbClusterInfo.py中的queryClsInfo()方法进行修改,添加换行符和属性id,实现集群节点信息的换行显示。 - -(3)在src/manager/om/script/gspylib/common下设置了一个configTest.py文件,进行测试获取各个节点的信息(首先要进行连接),你可能测试的时候需要设置一个config文件(用于填写你自己的所搭建的集群名和密码、IP地址等),如果进行了ssh的话,则不需要设置一个config文件,直接修改configTest.py中的主函数的即可,如果输出认证成功这表明连接没有问题,接下来就可以通过(1)中的函数来获取集群总各个节点的信息。 - -(4)将各个测试文件中的相关代码封装为方法存入到src/manager/om/script/gspylib/common/DbClusterInfo.py中进行测试。 - -##### 5.3 运行结果 -1.基本功能运行结果如下: -![输入图片说明](https://images.gitee.com/uploads/images/2021/0331/215508_79f8b148_1726416.png "功能1.png") -![输入图片说明](https://images.gitee.com/uploads/images/2021/0331/220407_dff20daf_1726416.png "功能1(1).png") -2.将节点信息打印成文本的形式,方便用户离线查看: -![输入图片说明](https://images.gitee.com/uploads/images/2021/0331/215659_8df8f0e7_1726416.png "功能2.png") -3.将各个集群节点的硬件信息详细显示出来: -![输入图片说明](https://images.gitee.com/uploads/images/2021/0331/215858_e57ba89a_1726416.png "功能3.png") -#### 六. 参与贡献 - -1. Fork 本仓库 -2. 新建 Feat_gs_om 分支 -3. 提交代码 -4. 新建 Pull Request -5. 对gs_om工具进行了优化 - -#### 七. 后期工作 - -1. 将gs_om -t status --detail 中的多个功能单独拆分出来,如将显示节点信息与输入为文件等拆分为多个,即通过不同的命令来执行不同的功能。 -2. 将gs_om 工具下的其他功能进行进一步的优化。 - -#### 特技 - -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 -5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py deleted file mode 100644 index 97a7a5b..0000000 --- a/script/gspylib/common/DbClusterInfo.py +++ /dev/null @@ -1,6340 +0,0 @@ -# -*- coding:utf-8 -*- -############################################################################# -# Copyright (c) 2020 Huawei Technologies Co.,Ltd. -# -# openGauss is licensed under Mulan PSL v2. -# You can use this software according to the terms -# and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# -# http://license.coscl.org.cn/MulanPSL2 -# -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, -# WITHOUT WARRANTIES OF ANY KIND, -# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, -# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -# See the Mulan PSL v2 for more details. -# ---------------------------------------------------------------------------- -# Description : DbClusterInfo.py is a utility to get cluster information -############################################################################# -import binascii -import os -import subprocess -import struct -import time -import types -import sys -import re -import pwd -import xml.dom.minidom -import xml.etree.cElementTree as ETree -import json -import socket -import copy - -sys.path.append(os.path.split(os.path.realpath(__file__))[0] + "/../../") -from gspylib.os.gsfile import g_file -from gspylib.common.ErrorCode import ErrorCode -from gspylib.common.VersionInfo import VersionInfo - -########################### -# instance role -########################### -# init value -INSTANCE_ROLE_UNDEFINED = -1 -# cm_server -INSTANCE_ROLE_CMSERVER = 0 -# gtm -INSTANCE_ROLE_GTM = 1 -# etcd -INSTANCE_ROLE_ETCD = 2 -# cn -INSTANCE_ROLE_COODINATOR = 3 -# dn -INSTANCE_ROLE_DATANODE = 4 -# cm_agent -INSTANCE_ROLE_CMAGENT = 5 - -########################### -# ID num -########################### -BASE_ID_CMSERVER = 1 -BASE_ID_GTM = 1001 -BASE_ID_CMAGENT = 10001 -BASE_ID_DUMMYDATANODE = 3001 -BASE_ID_COORDINATOR = 5001 -BASE_ID_DATANODE = 6001 -BASE_ID_ETCD = 7001 -DIRECTORY_PERMISSION = 0o750 -KEY_FILE_PERMISSION = 0o600 - -# For primary/standby instance When the ID > 7000 , -# the new id is start from 40001 -OLD_LAST_PRIMARYSTANDBY_BASEID_NUM = 7000 -NEW_FIRST_PRIMARYSTANDBY_BASEID_NUM = 40000 -# For salve instance When the ID > 5000 , the new id is start from 20001 -OLD_LAST_DUMMYNODE_BASEID_NUM = 5000 -NEW_FIRST_DUMMYNODE_BASEID_NUM = 20000 - -# master instance default port -MASTER_BASEPORT_CMS = 5000 -MASTER_BASEPORT_GTM = 6000 -# cm agent has no port, just occupancy index 5 -MASTER_BASEPORT_CMAGENT = 0 -MASTER_BASEPORT_COO = 8000 -MASTER_BASEPORT_DATA = 40000 -MASTER_BASEPORT_ETCD = 2379 -# standby instance default port -STANDBY_BASEPORT_CMS = 5500 -STANDBY_BASEPORT_GTM = 6500 -# cm agent has no port, just occupancy index 5 -STANDBY_BASEPORT_CMAGENT = 0 -STANDBY_BASEPORT_COO = 8500 -STANDBY_BASEPORT_DATA = 45000 -STANDBY_BASEPORT_ETCD = 2380 -# dummy standby instance default port -DUMMY_STANDBY_BASEPORT_DATA = 50000 - -########################### -# instance type. only for CN/DN -########################### -INSTANCE_TYPE_UNDEFINED = -1 -# master -MASTER_INSTANCE = 0 -# standby -STANDBY_INSTANCE = 1 -# dummy standby -DUMMY_STANDBY_INSTANCE = 2 -#cascade standby -CASCADE_STANDBY = 3 - -########################### -# instance number -########################### -# cm:cm_server, cm_agent -MIRROR_COUNT_CMS = 2 -# gtm:gtm_server, gtm_agent -MIRROR_COUNT_GTM = 2 -# ssd:ssd_server, ssd_agent -MIRROR_COUNT_SSD = 2 -# minimum number of nodes -MIRROR_COUNT_DATA = 3 -# etcd number >=3 and <= 7 -MIRROR_COUNT_ETCD_MIN = 3 -MIRROR_COUNT_ETCD_MAX = 7 -# max number of CN instance -MIRROR_COUNT_CN_MAX = 16 -# max number of node -MIRROR_COUNT_NODE_MAX = 1024 -# max number of DB instance(primary instance) -MIRROR_COUNT_DN_MAX = 4096 -# min number of replication for CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY -MIRROR_COUNT_REPLICATION_MIN = 2 -# max number of replicationfor CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY -MIRROR_COUNT_REPLICATION_MAX = 8 -# max number of azPriority for CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY -AZPRIORITY_MAX = 10 -# min number of azPriority for CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY -AZPRIORITY_MIN = 1 -# DB port set step size for CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY -PORT_STEP_SIZE = 20 - -MIRROR_ID_COO = -1 -MIRROR_ID_AGENT = -3 -MIRROR_ID_ETCD = -5 - -# cluster type -CLUSTER_TYPE_SINGLE = "single" -CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY = "single-primary-multi-standby" -CLUSTER_TYPE_SINGLE_INST = "single-inst" - -# env parameter -ENV_CLUSTERCONFIG = "CLUSTERCONFIGFILE" - -# default config version, it is used by gs_upgrade -BIN_CONFIG_VERSION = 2 -BIN_CONFIG_VERSION_SINGLE = 101 -BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY = 201 -BIN_CONFIG_VERSION_SINGLE_INST = 301 - -# page size -PAGE_SIZE = 8192 -MAX_IP_NUM = 3 -CONFIG_IP_NUM = 1 - -NODE_ID_LEN = 2 -INSTANCE_ID_LEN = 8 -SPACE_LEN = 1 -STATE_LEN = 17 -SEPERATOR_LEN = 1 -IP_LEN = 16 - -# GPHOME -CLUSTER_TOOL_PATH = "/opt/huawei/wisequery" - -# key words for json configure file -# Globalinfo -JSON_GLOBALINFO = "Globalinfo" -JSON_TOOL_PATH = "gaussdbToolPath" -JSON_CLUSTER_NAME = "ClusterName" -JSON_LOGPATH = "gaussdbLogPath" -JSON_TMPPATH = "gaussdbTmpPath" -JSON_MANAGER_PATH = "gaussdbManagerPath" -JSON_APPPATH = "gaussdbAppPath" -QUORUMMODE = "quorumMode" -REPLICATIONCOUNT = "replicationCount" - -# keywords for layouts in json file -JSON_LAYOUTS = "Layouts" -JSON_AZNAME = "AZName" -JSON_HOSTS = "Hosts" -JSON_IP = "IP" -JSON_CHANNEL_PORT = "channelPort" -JSON_INSTANCES = "Instances" -JSON_ID = "Id" -JSON_SCRIPTS = "Scripts" -JSON_CHECK = "check" -JSON_FAILOVER = "failover" -JSON_RESTART = "restart" -JSON_START = "start" -JSON_STOP = "stop" -JSON_SWITCHOVER = "switchover" -JSON_BUILD = "build" -JSON_KILL = "kill" -JSON_GETPASSWD = "getpasswd" -JSON_CHECK_PGXC = "check_pgxc" -JSON_CHECK_PGXC_GROUP = "check_pgxc_group" -JSON_CREATE_PGXC_NODE = "create_pgxc_node" -JSON_CREATE_PGXC_GROUP = "create_pgxc_group" -JSON_CHECK_PGXC_GROUP_EXPAND = "check_pgxc_group_expand" -JSON_UPDATE_PGXC_GROUP = "update_pgxc_group" -CHANGE_PGXC_NODE = "change_pgxc_node" -DELETE_PGXC_NODE = "delete_pgxc_node" -JSON_EXEC_WITH_TRANSACTION = "execute_with_transaction" -JSON_CHECK_SYNCHRONOUS_STANDY = "check_synchronous_standby" -JSON_CHANGE_SYNCHRONOUS_STANDBY = "change_synchronous_standby" -JSON_TYPE_NAME = "TypeName" -JSON_ATTRIBUTES = "Attributes" -JSON_DATA_DIR = "DataDir" -JSON_GROUP = "Group" -JSON_PORT = "Port" -JSON_REPLPORT = "ReplPort" -JSON_PEER_PORT = "PeerPort" -JSON_CLIENT_PORT = "ClientPort" -JSON_ETCD_DATA_DIR = "EtcdDataDir" -JSON_ETCD_CLUSTER_NAME = "ClusterName" -JSON_SCTP_PORT = "SctpPort" -JSON_CONTROL_PORT = "ControlPort" -# keywords for groups in json file -JSON_GROUPS = "Groups" -JSON_GROUP_TYPE = "GroupType" -JSON_GROUP_ID = "GroupId" -JSON_PARENT_NODE = "ParentNode" -JSON_ROLE = "Role" - -# keywords for StaticConfig in json file -JSON_STATIC_CONFIG = "StaticConfig" -JSON_NUM_PRIMARYAZ = "NumPrimaryAZ" -JSON_PRIMARY_AZ = "PrimaryAZ" -JSON_SYNC_AZ = "SyncAZ" -JSON_THIRDPART_AZ = "ThirdPartAZ" - -g_dom = None - -# The default network type is single plane -g_networkType = 0 - -# Oltp's inst type -# etcd -ETCD = 'etcd' -# cm -CLUSTER_MANAGER = 'cluster_manager' -DN_ZENITH_ZPAXOS = "DN_ZENITH_ZPAXOS" -DN_ZENITH_ZPAXOS_V2 = "DN_ZENITH_ZPAXOS_V2" -DN_ZENITH_HA = "DN_ZENITH_HA" -COORDINATOR = "coordinator" -CN_ZENITH_ZSHARDING = "CN_ZENITH_ZSHARDING" -GTS_ZENITH = "GTS_ZENITH" -OLTP_DN_TYPES = [DN_ZENITH_ZPAXOS, DN_ZENITH_ZPAXOS_V2, DN_ZENITH_HA] -OLTP_CN_TYPES = [CN_ZENITH_ZSHARDING] -# TP AZ names -azName1 = "AZ1" -azName2 = "AZ2" -azName3 = "AZ3" -AZNMAE_LIST = [azName1, azName2, azName3] -DN_ROLE_MAP = {"Primary": "P", "Standby": "S", "Normal": "P", "Secondary": "R"} - - -def InstanceIgnore_haPort(Object): - """ - funciton : Analyze the current instance role:CN or CMAGENT. - input : Object - output : boolean - """ - # we only support CN/cm_agent - if ( - Object.instanceRole == INSTANCE_ROLE_COODINATOR or - Object.instanceRole == INSTANCE_ROLE_CMAGENT): - return True - else: - return False - - -def InstanceIgnore_isMaster(Object): - """ - funciton : Analyze the current instance role:GTM or DN. - input : Object - output : boolean - """ - # we only support DN/gtm - if ( - Object.instanceRole != INSTANCE_ROLE_GTM and Object.instanceRole - != INSTANCE_ROLE_DATANODE): - return True - else: - return False - - -def ignoreCheck(Object, member, model): - """ - funciton : Ignore checking the instance information of table. - input : Object, Object, model - output : boolean - """ - INSTANCEINFO_IGNORE_TABLE = {} - if (model == "replace"): - # init instance ignore table for replace - INSTANCEINFO_IGNORE_TABLE = {"listenIps": None, - "haIps": None, - "hostname": None, - "mirrorId": None - } - elif (model == "changeIP"): - # init instance ignore table for changeip - INSTANCEINFO_IGNORE_TABLE = {"listenIps": None, - "haIps": None, - "hostname": None, - "port": None, - "haPort": None, - "mirrorId": None - } - elif (model == "upgradectl"): - # init instance ignore table for upgradectl - INSTANCEINFO_IGNORE_TABLE = { - "instanceRole": None, - "instanceId": None, - "mirrorId": None - } - elif (model == "manageCN"): - # init instance ignore table for manageCN - INSTANCEINFO_IGNORE_TABLE = { - "instanceId": None, - "mirrorId": None - } - elif (model == "expand"): - # init instance ignore table for expand - INSTANCEINFO_IGNORE_TABLE = { - "mirrorId": None - } - elif (model == "compareCluster"): - INSTANCEINFO_IGNORE_TABLE = { - "listenIps": None, - "haIps": None, - "hostname": None, - "port": None, - "haPort": None, - "mirrorId": None - } - if (hasattr(Object, - "instanceRole") and Object.instanceRole == - INSTANCE_ROLE_COODINATOR): - INSTANCEINFO_IGNORE_TABLE["instanceId"] = None - # init node ignore table - DBNODEINFO_IGNORE_TABLE = { - "backIps": None, - "sshIps": None, - "masterBasePorts": None, - "standbyBasePorts": None, - "dummyStandbyBasePort": None, - "cmsNum": None, - "cooNum": None, - "dataNum": None, - "gtmNum": None, - "name": None, - "virtualIp": None - } - # init cluster ignore table - DBCLUSTERINFO_IGNORE_TABLE = { - "xmlFile": None, - "newNodes": None, - "clusterRings": None - } - - if (model == "upgradectl"): - DBNODEINFO_IGNORE_TABLE.pop("backIps") - DBNODEINFO_IGNORE_TABLE.pop("sshIps") - DBNODEINFO_IGNORE_TABLE.pop("name") - DBCLUSTERINFO_IGNORE_TABLE.pop("clusterRings") - elif (model == "manageCN"): - DBNODEINFO_IGNORE_TABLE.pop("backIps") - DBNODEINFO_IGNORE_TABLE.pop("sshIps") - DBNODEINFO_IGNORE_TABLE.pop("name") - DBNODEINFO_IGNORE_TABLE["id"] = None - if (isinstance(Object, instanceInfo)): - if (member not in list(INSTANCEINFO_IGNORE_TABLE.keys())): - return False - elif (INSTANCEINFO_IGNORE_TABLE[member] is None or not callable( - INSTANCEINFO_IGNORE_TABLE[member])): - return True - else: - return INSTANCEINFO_IGNORE_TABLE[member](Object) - elif (isinstance(Object, dbNodeInfo)): - if (member not in list(DBNODEINFO_IGNORE_TABLE.keys())): - return False - elif (DBNODEINFO_IGNORE_TABLE[member] is None or not callable( - DBNODEINFO_IGNORE_TABLE[member])): - return True - else: - return INSTANCEINFO_IGNORE_TABLE[member](Object) - elif (isinstance(Object, dbClusterInfo)): - if (member not in list(DBCLUSTERINFO_IGNORE_TABLE.keys())): - return False - elif (DBCLUSTERINFO_IGNORE_TABLE[member] is None or not callable( - DBCLUSTERINFO_IGNORE_TABLE[member])): - return True - else: - return DBCLUSTERINFO_IGNORE_TABLE[member](Object) - else: - return False - - -def checkPathVaild(obtainpath): - """ - function: check path vaild - input : envValue - output: NA - """ - PATH_CHECK_LIST = [" ", "|", ";", "&", "$", "<", ">", "`", "\\", "'", "\"", - "{", "}", "(", ")", "[", "]", "~", "*", "?", "!", "\n"] - if (obtainpath.strip() == ""): - return - for rac in PATH_CHECK_LIST: - flag = obtainpath.find(rac) - if flag >= 0: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % obtainpath + \ - " There are illegal characters in the path.") - - -def obtainInstStr(objectList): - ''' - function : Obtain information of instance. - input : [] - output : String - ''' - info = "" - if (isinstance(objectList, list)): - for obj in objectList: - info += "%s\n" % str(obj) - return info - - -def compareObject(Object_A, Object_B, instName, tempbuffer=None, model=None, - manageCNinfo=None): - ''' - function : Compare object_A and Object_B. - input : Object, Object, instName, tempbuffer, model, manageCNinfo - output : boolean, tempbuffer - ''' - if tempbuffer is None: - tempbuffer = [] - if isinstance(Object_A, bytes) or isinstance(Object_A, str): - if (Object_A != Object_B): - tempbuffer.append(instName) - tempbuffer.append(Object_A) - tempbuffer.append(Object_B) - return False, tempbuffer - ### not the same type - elif (type(Object_A) != type(Object_B)): - tempbuffer.append(instName) - tempbuffer.append(str(Object_A)) - tempbuffer.append(str(Object_B)) - return False, tempbuffer - ### string, int, long, float, bool type - elif (isinstance(Object_A, bytes)): - if (Object_A != Object_B): - tempbuffer.append(instName) - tempbuffer.append(Object_A) - tempbuffer.append(Object_B) - return False, tempbuffer - elif (isinstance(Object_A, type(None))): - if (Object_A != Object_B): - tempbuffer.append(instName) - tempbuffer.append(Object_A) - tempbuffer.append(Object_B) - return False, tempbuffer - elif (isinstance(Object_A, int) or isinstance(Object_A, int) - or isinstance(Object_A, float) or isinstance(Object_A, bool)): - if (Object_A != Object_B): - tempbuffer.append(instName) - tempbuffer.append(Object_A) - tempbuffer.append(Object_B) - return False, tempbuffer - ### list type - elif (isinstance(Object_A, list)): - if (model == "manageCN"): - if (len(Object_A) != len(Object_B)): - theSame, tempbuffer = checkObject(Object_A, Object_B, instName, - tempbuffer, manageCNinfo) - if (not theSame): - return False, tempbuffer - if (len(Object_A) != 0 and len(Object_B) != 0): - Object_A1 = [] - Object_B1 = [] - for Obj_A in Object_A: - for Obj_B in Object_B: - if (Obj_A.name == Obj_B.name): - Object_A1.append(Obj_A) - Object_B1.append(Obj_B) - continue - for idx in range(len(Object_A1)): - result, tempbuffer = compareObject(Object_A1[idx], - Object_B1[idx], - "%s[%d]" % ( - instName, idx), - tempbuffer, - model, - manageCNinfo) - if (not result): - return False, tempbuffer - else: - for idx in range(len(Object_A)): - result, tempbuffer = compareObject(Object_A[idx], - Object_B[idx], - "%s[%d]" % ( - instName, idx), - tempbuffer, - model, - manageCNinfo) - if (not result): - return False, tempbuffer - else: - if (len(Object_A) != len(Object_B)): - instmap = {obtainInstStr(Object_A): obtainInstStr(Object_B)} - tempbuffer.append(instName) - tempbuffer.append(obtainInstStr(Object_A)) - tempbuffer.append(obtainInstStr(Object_B)) - return False, tempbuffer - - for idx in range(len(Object_A)): - result, tempbuffer = compareObject(Object_A[idx], - Object_B[idx], - "%s[%d]" % (instName, idx), - tempbuffer, - model, - manageCNinfo) - if (not result): - return False, tempbuffer - ### function type - elif isinstance(Object_A, types.FunctionType) or \ - isinstance(Object_A, types.MethodType): - return True, tempbuffer - elif isinstance(Object_A, type(dbClusterInfo())) or \ - isinstance(Object_A, type(dbNodeInfo())) or \ - isinstance(Object_A, type(instanceInfo())): - Object_A_list = dir(Object_A) - Object_B_list = dir(Object_B) - if (len(Object_A_list) != len(Object_B_list)): - tempbuffer.append(instName) - tempbuffer.append(str(Object_A)) - tempbuffer.append(str(Object_B)) - return False, tempbuffer - for i in Object_A_list: - if (i.startswith("_") or ignoreCheck(Object_A, i, model)): - continue - Inst_A = getattr(Object_A, i) - try: - Inst_B = getattr(Object_B, i) - except Exception as e: - tempbuffer.append(instName) - tempbuffer.append(str(Object_A)) - tempbuffer.append(str(Object_B)) - return False, tempbuffer - result, tempbuffer = compareObject(Inst_A, Inst_B, i, tempbuffer, - model, manageCNinfo) - if (not result): - return False, tempbuffer - else: - tempbuffer.append(instName) - tempbuffer.append(str(Object_A)) - tempbuffer.append(str(Object_B)) - return False, tempbuffer - return True, tempbuffer - - -def checkObject(Object_A, Object_B, instName, checkbuffer, manageCNinfo): - """ - """ - Join = [] - if (len(Object_A)): - Join.extend(Object_A) - if (len(Object_B)): - Join.extend(Object_B) - - # CN instance - if (isinstance(Join[0], instanceInfo)): - - # check instance role - if (Join[0].instanceRole != 3): - raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) - # xml must match action - if (len(Object_A) == 1 and len(Object_B) == 0): - if (manageCNinfo.mode != "delete"): - raise Exception( - ErrorCode.GAUSS_528["GAUSS_52808"] % ("deletion", "add")) - elif (len(Object_A) == 0 and len(Object_B) == 1): - if (manageCNinfo.mode != "add"): - raise Exception(ErrorCode.GAUSS_528["GAUSS_52808"] % ( - "increased", "delete")) - else: - raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) - - # at most add or delete one CN - if (len(manageCNinfo.nodeInfo) != 0 or len(manageCNinfo.cooInfo) != 0): - raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) - - manageCNinfo.cooInfo.extend(Join) - # GaussDB nodes - elif (isinstance(Join[0], dbNodeInfo)): - # get added or deleted node - oa_names = [Obj_A.name for Obj_A in Object_A] - ob_names = [Obj_B.name for Obj_B in Object_B] - Object_AA = [Obj_A for Obj_A in Object_A if Obj_A.name not in ob_names] - Object_BB = [Obj_B for Obj_B in Object_B if Obj_B.name not in oa_names] - - # xml must match action - if (len(Object_AA) == 1 and len(Object_BB) == 0): - if (manageCNinfo.mode != "delete"): - raise Exception( - ErrorCode.GAUSS_528["GAUSS_52808"] % ("deletion", "add")) - elif (len(Object_AA) == 0 and len(Object_BB) == 1): - if (manageCNinfo.mode != "add"): - raise Exception(ErrorCode.GAUSS_528["GAUSS_52808"] % ( - "increased", "delete")) - else: - raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) - - # at most add or delete one node - if (len(manageCNinfo.nodeInfo) != 0 or len(manageCNinfo.cooInfo) != 0): - raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) - - if (len(Object_AA)): - manageCNinfo.nodeInfo.extend(Object_AA) - if (len(Object_BB)): - manageCNinfo.nodeInfo.extend(Object_BB) - else: - raise Exception(ErrorCode.GAUSS_528["GAUSS_52809"]) - - return True, checkbuffer - - -#################################################################### -##read cluster functions -#################################################################### - -xmlRootNode = None - - -def checkXMLFile(xmlFile): - """ - function : check XML contain DTDs - input : String - output : NA - """ - # Check xml for security requirements - # if it have " 0 or name.find( - "dataNode") == 0) and returnValue != ""): - returnValue = os.path.normpath(returnValue) - return (returnStatus, returnValue) - - -#################################################################### - - -class queryCmd(): - def __init__(self, outputFile="", dataPathQuery=False, portQuery=False, - azNameQuery=False): - self.outputFile = outputFile - self.dataPathQuery = dataPathQuery - self.portQuery = portQuery - self.azNameQuery = azNameQuery - self.clusterStateQuery = False - - -class peerInstanceInfo(): - """ - Peer instance information - """ - - def __init__(self): - self.peerDataPath = "" - self.peerHAIPs = [] - self.peerHAPort = 0 - self.peerRole = 0 - self.peer2DataPath = "" - self.peer2HAIPs = [] - self.peer2HAPort = 0 - self.peer2Role = 0 - - def __str__(self): - """ - Construct a printable string representation of a instanceInfo - """ - ret = "peerDataPath=%s,peerHAPort=%d,peerRole=%d" % ( - self.peerDataPath, self.peerHAPort, self.peerRole) - if self.peer2DataPath: - ret += ",peer2DataPath=%s" % self.peer2DataPath - if self.peer2HAPort: - ret += ",peer2HAPort=%d" % self.peer2HAPort - if self.peer2Role: - ret += ",peer2Role=%d" % self.peer2Role - return ret - -class instanceInfo(): - """ - Instance information - """ - - def __init__(self, instId=0, mirrorId=0): - """ - Constructor - """ - # instance id - self.instanceId = instId - self.mirrorId = mirrorId - # host name - self.hostname = "" - # listen ip - self.listenIps = [] - # ha ip - self.haIps = [] - # port - self.port = 0 - # It's pool port for coordinator, and ha port for other instance - self.haPort = 0 - # data directory - self.datadir = "" - # xlog directory - self.xlogdir = "" - # ssd data directory - self.ssdDir = "" - # instance type - self.instanceType = INSTANCE_TYPE_UNDEFINED - # instance role - self.instanceRole = INSTANCE_ROLE_UNDEFINED - # instance rack info - self.rack = "" - # oltp zpaxos sub instance type - self.subInstanceType = INSTANCE_ROLE_UNDEFINED - - self.level = 1 - # we use port and haPort to save peerPort/clientPort for etcd - # datanode: use haPort to save replport - # repl port - self.replport = 0 - # sctp port - self.sctpPort = 0 - # control port - self.controlPort = 0 - # az name - self.azName = "" - self.clusterName = "" - # peer port etcd - self.peerPort = 0 - # client port etcd - self.clientPort = 0 - # instance name - self.name = "" - # DB state Normal or other, use to save dynamic info - self.state = "" - # get staticConnections from database,use to save dynamic info - self.staticConnections = "" - # DB role such as Primary, Standby - self.localRole = "" - self.peerInstanceInfos = [] - self.syncNum = -1 - self.cascadeRole = "off" - - def __cmp__(self, target): - """ - Type compare - """ - if (type(self) != type(target)): - return 1 - if (not isinstance(target, instanceInfo)): - return 1 - if (not hasattr(target, "instanceId")): - return 1 - else: - return self.instanceId - target.instanceId - - def __str__(self): - """ - Construct a printable string representation of a instanceInfo - """ - ret = "InstanceId=%s,MirrorId=%s,Host=%s,Port=%s,DataDir=%s," \ - "XlogDir=%s,SsdDir=%s,InstanceType=%s,Role=%s,ListenIps=%s," \ - "HaIps=%s" % ( - self.instanceId, self.mirrorId, self.hostname, self.port, - self.datadir, self.xlogdir, self.ssdDir, self.instanceType, - self.instanceRole, self.listenIps, self.haIps) - if self.rack: - ret += ",rack=%s" % self.rack - if self.replport: - ret += ",replport=%s" % self.replport - if self.sctpPort: - ret += ",sctpPort=%s" % self.sctpPort - if self.controlPort: - ret += ",controlPort=%s" % self.controlPort - if self.azName: - ret += ",azName=%s" % self.azName - if self.clusterName: - ret += ",clusterName=%s" % self.clusterName - if self.peerPort: - ret += ",peerPort=%s" % self.peerPort - if self.clientPort: - ret += ",clientPort=%s" % self.clientPort - if self.name: - ret += ",name=%s" % self.name - return ret - - -class dbNodeInfo(): - """ - Instance info on a node - """ - - def __init__(self, nodeId=0, name=""): - """ - Constructor - """ - # node id - self.id = nodeId - # node name - self.name = name - self.backIps = [] - self.virtualIp = [] - self.sshIps = [] - # instance number - self.cmsNum = 0 - self.cooNum = 0 - self.dataNum = 0 - self.gtmNum = 0 - self.etcdNum = 0 - # cm_servers instance - self.cmservers = [] - # cn instance - self.coordinators = [] - # DB instance - self.datanodes = [] - # gtm instance - self.gtms = [] - # cm_agent instance - self.cmagents = [] - # etcd instance - self.etcds = [] - # cm_server/cm_agent data directory - self.cmDataDir = "" - self.dummyStandbyBasePort = 0 - self.masterBasePorts = [MASTER_BASEPORT_CMS, MASTER_BASEPORT_GTM, - MASTER_BASEPORT_COO, - MASTER_BASEPORT_DATA, MASTER_BASEPORT_ETCD, - MASTER_BASEPORT_CMAGENT] - self.standbyBasePorts = [STANDBY_BASEPORT_CMS, STANDBY_BASEPORT_GTM, - STANDBY_BASEPORT_COO, - STANDBY_BASEPORT_DATA, STANDBY_BASEPORT_ETCD, - STANDBY_BASEPORT_CMAGENT] - # azName - self.azName = "" - self.azPriority = 1 - self.standbyDnNum = 0 - self.dummyStandbyDnNum = 0 - self.cascadeRole = "off" - - def __cmp__(self, target): - """ - Type compare - """ - if (type(self) != type(target)): - return 1 - if (not isinstance(target, dbNodeInfo)): - return 1 - if (not hasattr(target, "id")): - return 1 - else: - return self.id - target.id - - def __str__(self): - """ - function : Construct a printable string representation of a dbNodeInfo - input : NA - output : String - """ - retStr = "HostName=%s,backIps=%s" % (self.name, self.backIps) - # cm_server instance information - for cmsInst in self.cmservers: - retStr += "\n%s" % str(cmsInst) - # cm_agent instance information - for cmaInst in self.cmagents: - retStr += "\n%s" % str(cmaInst) - # gtm instance information - for gtmInst in self.gtms: - retStr += "\n%s" % str(gtmInst) - # cn instance information - for cooInst in self.coordinators: - retStr += "\n%s" % str(cooInst) - # DB instance information - for dataInst in self.datanodes: - retStr += "\n%s" % str(dataInst) - # etcd instance information - for dataInst in self.etcds: - retStr += "\n%s" % str(dataInst) - - return retStr - - def setDnDetailNum(self): - self.dataNum = self.getDnNum(MASTER_INSTANCE) - self.standbyDnNum = self.getDnNum(STANDBY_INSTANCE) - self.dummyStandbyDnNum = self.getDnNum(DUMMY_STANDBY_INSTANCE) - - def getDnNum(self, dntype): - """ - function: get DB num - input: dntype - output:dn num - """ - count = 0 - for dnInst in self.datanodes: - if (dnInst.instanceType == dntype): - count += 1 - return count - - def appendInstance(self, instId, mirrorId, instRole, instanceType, - listenIps=None, - haIps=None, datadir="", ssddir="", level=1, - clusterType=CLUSTER_TYPE_SINGLE_INST, xlogdir="", - syncNum=-1): - """ - function : Classify the instance of cmserver/gtm - input : int,int,String,String - output : NA - """ - if not self.__checkDataDir(datadir, instRole): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51638"] % \ - self.name + " Data directory[%s] is " - "conflicting." % datadir) - - dbInst = instanceInfo(instId, mirrorId) - - dbInst.hostname = self.name - dbInst.datadir = os.path.realpath(datadir) - - if (instRole == INSTANCE_ROLE_DATANODE): - dbInst.xlogdir = xlogdir - else: - dbInst.xlogdir = "" - dbInst.instanceType = instanceType - dbInst.instanceRole = instRole - if (listenIps is not None): - if (len(listenIps) == 0): - dbInst.listenIps = self.backIps[:] - else: - dbInst.listenIps = listenIps[:] - - if (haIps is not None): - if (len(haIps) == 0): - dbInst.haIps = self.backIps[:] - else: - dbInst.haIps = haIps[:] - # cm_server - if (instRole == INSTANCE_ROLE_CMSERVER): - dbInst.datadir = os.path.join(self.cmDataDir, "cm_server") - dbInst.port = self.__assignNewInstancePort(self.cmservers, - instRole, instanceType) - dbInst.level = level - dbInst.haPort = dbInst.port + 1 - self.cmservers.append(dbInst) - # gtm - elif (instRole == INSTANCE_ROLE_GTM): - dbInst.port = self.__assignNewInstancePort(self.gtms, instRole, - instanceType) - dbInst.haPort = dbInst.port + 1 - self.gtms.append(dbInst) - # cn - elif (instRole == INSTANCE_ROLE_COODINATOR): - dbInst.port = self.__assignNewInstancePort(self.coordinators, - instRole, instanceType) - dbInst.haPort = dbInst.port + 1 - dbInst.ssdDir = ssddir - self.coordinators.append(dbInst) - # dn - elif (instRole == INSTANCE_ROLE_DATANODE): - dbInst.port = self.__assignNewInstancePort(self.datanodes, - instRole, instanceType) - dbInst.haPort = dbInst.port + 1 - dbInst.ssdDir = ssddir - dbInst.syncNum = syncNum - self.datanodes.append(dbInst) - # cm_agent - elif (instRole == INSTANCE_ROLE_CMAGENT): - dbInst.datadir = os.path.join(self.cmDataDir, "cm_agent") - self.cmagents.append(dbInst) - # etcd - elif (instRole == INSTANCE_ROLE_ETCD): - dbInst.port = self.__assignNewInstancePort(self.etcds, instRole, - instanceType) - dbInst.haPort = self.__assignNewInstancePort(self.etcds, instRole, - STANDBY_INSTANCE) - self.etcds.append(dbInst) - - def __checkDataDir(self, datadir, instRole): - """ - function : Check whether the instance path is the same as with the - parameter of datadir - input : String,String - output : boolean - """ - if (datadir == ""): - return ( - instRole == INSTANCE_ROLE_CMSERVER or instRole == - INSTANCE_ROLE_CMAGENT) - checkPathVaild(datadir) - # cm_server - for cmsInst in self.cmservers: - if (cmsInst.datadir == datadir): - return False - # cn - for cooInst in self.coordinators: - if (cooInst.datadir == datadir): - return False - # dn - for dataInst in self.datanodes: - if (dataInst.datadir == datadir): - return False - # gtm - for gtmInst in self.gtms: - if (gtmInst.datadir == datadir): - return False - # etcd - for etcd in self.etcds: - if (etcd.datadir == datadir): - return False - # cm_agent - for cmaInst in self.cmagents: - if (cmaInst.datadir == datadir): - return False - - return True - - def assignNewInstancePort(self, instList, instRole, instanceType): - return self.__assignNewInstancePort(instList, instRole, instanceType) - - def __assignNewInstancePort(self, instList, instRole, instanceType): - """ - function : Assign a new port for the instance - input : [],String ,String - output : int - """ - port = 0 - # master instance - if instanceType == MASTER_INSTANCE: - port = self.masterBasePorts[instRole] - # standby instance - elif instanceType == STANDBY_INSTANCE: - port = self.standbyBasePorts[instRole] - # DB dummy standby instance - elif instanceType == DUMMY_STANDBY_INSTANCE: - port = self.dummyStandbyBasePort - # cn and cm_agent instance - elif instanceType == INSTANCE_TYPE_UNDEFINED: - port = self.masterBasePorts[instRole] - return port - for inst in instList: - if (inst.instanceType == instanceType): - port += 2 - - return port - - -class dbClusterInfo(): - """ - Cluster info - """ - - def __init__(self, checkSctpPort=False): - """ - Constructor - """ - self.name = "" - self.appPath = "" - self.logPath = "" - self.xmlFile = "" - self.dbNodes = [] - self.newNodes = [] - self.cmsFloatIp = "" - self.__newInstanceId = [BASE_ID_CMSERVER, BASE_ID_GTM, BASE_ID_ETCD, - BASE_ID_COORDINATOR, BASE_ID_DATANODE, - BASE_ID_CMAGENT] - self.__newDummyStandbyId = BASE_ID_DUMMYDATANODE - self.__newMirrorId = 0 - self.clusterRings = [] - self.clusterType = CLUSTER_TYPE_SINGLE_INST - self.checkSctpPort = checkSctpPort - self.clusterName = "" - self.toolPath = "" - self.agentPath = "" - self.agentLogPath = "" - self.tmpPath = "" - self.managerPath = "" - self.replicaNum = 0 - self.corePath = "" - - # add azName - self.azName = "" - self.cascadeRole = "off" - - self.version = 0 - self.installTime = 0 - self.localNodeId = 0 - self.nodeCount = 0 - # cluster properties - self.replicationCount = 0 - self.quorumMode = "" - self.gtmcount = 0 - self.etcdcount = 0 - self.cmscount = 0 - self.__newGroupId = 0 - self.cncount = 0 - self.masterDnCount = 0 - self.standbyDnCount = 0 - self.dummyStandbyDnCount = 0 - - def __str__(self): - """ - function : Construct a printable string representation of a - dbClusterInfo - input : NA - output : String - """ - retStr = "ClusterName=%s,AppPath=%s,LogPath=%s,ClusterType=%s" % \ - (self.name, self.appPath, self.logPath, self.clusterType) - - for dbNode in self.dbNodes: - retStr += "\n%s" % str(dbNode) - - return retStr - - @staticmethod - def setDefaultXmlFile(xmlFile): - """ - function : Set the default xml file - input : String - output : NA - """ - if not os.path.exists(xmlFile): - raise Exception( - ErrorCode.GAUSS_502["GAUSS_50201"] % "XML configuration") - - os.putenv(ENV_CLUSTERCONFIG, xmlFile) - - @staticmethod - def readClusterHosts(xmlFile=""): - """ - function : Read cluster node name from xml file - input : String - output : String - """ - if (xmlFile != ""): - dbClusterInfo.setDefaultXmlFile(xmlFile) - - # read cluster node name from xml file - (retStatus, retValue) = readOneClusterConfigItem( - initParserXMLFile(xmlFile), "nodeNames", "cluster") - if (retStatus != 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % "node names" + " Error: \n%s" % retValue) - nodeNames = [] - nodeNames_tmp = retValue.split(",") - for nodename in nodeNames_tmp: - nodeNames.append(nodename.strip()) - if (len(nodeNames) == 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "XML file" + " There is no nodes in cluster " - "configuration file.") - - return nodeNames - - @staticmethod - def readClustercorePath(xmlFile): - """ - function : Read corefile path from default xml file - input : String - output : String - """ - dbClusterInfo.setDefaultXmlFile(xmlFile) - # read corefile path from xml file - (retStatus, retValue) = readOneClusterConfigItem( - initParserXMLFile(xmlFile), "corePath", "cluster") - if retStatus != 0: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] - % "corePath" + " Error: \n%s" % retValue) - corepath = os.path.normpath(retValue) - checkPathVaild(corepath) - return corepath - - @staticmethod - def readClusterAppPath(xmlFile): - """ - function : Read the cluster's application path from xml file - input : String - output : String - """ - dbClusterInfo.setDefaultXmlFile(xmlFile) - # read the cluster's application path from xml file - (retStatus, retValue) = readOneClusterConfigItem( - initParserXMLFile(xmlFile), "gaussdbAppPath", "cluster") - if retStatus != 0: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] - % "gaussdbAppPath" + " Error: \n%s" % retValue) - - appPath = os.path.normpath(retValue) - checkPathVaild(appPath) - return appPath - - @staticmethod - def readClusterTmpMppdbPath(user, xmlFile): - """ - function : Read temporary mppdb path from xml file - input : String,String - output : String - """ - dbClusterInfo.setDefaultXmlFile(xmlFile) - # read temporary mppdb path from xml file - (retStatus, retValue) = readOneClusterConfigItem( - initParserXMLFile(xmlFile), "tmpMppdbPath", "cluster") - if retStatus != 0: - (retToolPathStatus, retToolPathValue) = readOneClusterConfigItem( - initParserXMLFile(xmlFile), "gaussdbToolPath", "cluster") - if retToolPathStatus != 0: - retToolPathValue = CLUSTER_TOOL_PATH - retValue = os.path.join(retToolPathValue, "%s_mppdb" % user) - - tmppath = os.path.normpath(retValue) - checkPathVaild(tmppath) - return tmppath - - @staticmethod - def readClusterLogPath(xmlFile): - """ - function : Read log path from xml file - input : String - output : NA - """ - dbClusterInfo.setDefaultXmlFile(xmlFile) - # read log path from xml file - (retStatus, retValue) = readOneClusterConfigItem( - initParserXMLFile(xmlFile), "gaussdbLogPath", "cluster") - if retStatus == 0: - tmppath = os.path.normpath(retValue) - checkPathVaild(tmppath) - return tmppath - elif retStatus == 2: - return "/var/log/gaussdb" - else: - raise Exception(ErrorCode.GAUSS_500["GAUSS_51200"] - % "gaussdbLogPath" + " Error: \n%s" % retValue) - - def initFromStaticConfig(self, user, static_config_file="", - isLCCluster=False, ignoreLocalEnv=False): - """ - function : Init cluster from static configuration file - input : String,String - output : NA - """ - # check Os user - self.__checkOsUser(user) - # get static_config_file - if (static_config_file == ""): - staticConfigFile = self.__getStaticConfigFilePath(user) - else: - staticConfigFile = static_config_file - # read static_config_file - self.__readStaticConfigFile(staticConfigFile, user, isLCCluster, - ignoreLocalEnv=ignoreLocalEnv) - - def getClusterVersion(self, staticConfigFile): - """ - function : get cluster version information - from static configuration file - input : String - output : version - """ - try: - with open(staticConfigFile, "rb") as fp: - info = fp.read(28) - (crc, lenth, version, currenttime, nodeNum, - localNodeId) = struct.unpack("=IIIqiI", info) - except Exception as e: - raise Exception( - ErrorCode.GAUSS_512["GAUSS_51236"] + " Error: \n%s." % str(e)) - - return version - - def isMiniaturizedDeployment(self, cluster_version): - """ - function: judge whether is the miniaturized deployment - input : Int - output : bool value - """ - if (cluster_version >= 101 and cluster_version <= 200): - return True - return False - - def isSinglePrimaryMultiStandbyDeployment(self, cluster_version): - """ - judge whether is the single primary multi standby deployment - """ - if (cluster_version >= 201 and cluster_version <= 300): - return True - return False - - def queryNodeInfo(self, sshtool, localHostName, nodeId, fileName=""): - """ - get cluster node info, if nodeid is 0, we get all node info, - else ony get one node info - """ - i = 0 - (clusterState, syncInfo) = self.__getDnSenderStatus(sshtool, - localHostName) - outText = \ - "--------------------------------------------------------------" \ - "---------\n\n" - outText = outText + ("cluster_state : %s\n" % clusterState) - outText = outText + "redistributing : No\n\n" - outText = outText + \ - "-------------------------------------" \ - "----------------------------------\n\n" - for dbNode in self.dbNodes: - if dbNode.id == nodeId or nodeId == 0: - for dnInst in dbNode.datanodes: - outText = outText + ( - "node : %u\n" % dbNode.id) - outText = outText + ( - "node_name : %s\n" % dbNode.name) - outText = outText + ( - "instance_id : %u\n" % - dnInst.instanceId) - outText = outText + ("node_ip : %s\n" % - dnInst.listenIps[0]) - outText = outText + ( - "data_path : %s\n" % - dnInst.datadir) - outText = outText + "type : " \ - "Datanode\n" - outText = outText + ( - "instance_state : %s\n" % - dnInst.state) - outText = outText + ( - "az_name : %s\n" % - dnInst.azName) - if dnInst.localRole == "Primary": - outText = outText + ( - "static_connections : %s\n" % - dnInst.staticConnections) - outText = outText + ( - "HA_state : %s\n" % - clusterState) - outText = outText + ( - "instance_role : %s\n" % - dnInst.localRole) - if dnInst.localRole == "Primary": - outText = outText + "\n------------------------" \ - "---------------" \ - "--------------------------------\n\n" - continue - for i_loop in syncInfo: - if i_loop[0] == dnInst.listenIps[0]: - if i_loop[11] == '': - i_loop[11] = 'Unknown' - outText = outText + ( - "HA_state : %s\n" % - i_loop[1]) - outText = outText + ( - "sender_sent_location : %s\n" % - i_loop[2]) - outText = outText + ( - "sender_write_location : %s\n" % - i_loop[3]) - outText = outText + ( - "sender_flush_location : %s\n" % - i_loop[4]) - outText = outText + ( - "sender_replay_location : %s\n" % - i_loop[5]) - outText = outText + ( - "receiver_received_location: %s\n" % - i_loop[6]) - outText = outText + ( - "receiver_write_location : %s\n" % - i_loop[7]) - outText = outText + ( - "receiver_flush_location : %s\n" % - i_loop[8]) - outText = outText + ( - "receiver_replay_location : %s\n" % - i_loop[9]) - outText = outText + ( - "sync_percent : %s\n" % - i_loop[10]) - outText = outText + ( - "sync_state : %s\n" % - i_loop[11]) - if dnInst.localRole == "Cascade Standby": - outText = outText + ( - "upstream_nodeIp : %s\n" % - i_loop[12]) - break - outText = outText + "\n------------------------" \ - "---------------" \ - "--------------------------------\n\n" - if nodeId != 0: - break - else: - i += 1 - continue - if i >= len(self.dbNodes): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51612"] % nodeId) - self.__fprintContent(outText, fileName) - - def printStaticConfig(self, user, fileName="", isLCCluster=False): - """ - function : printStaticConfig - input : String - output : NA - """ - try: - # read static_config_file - outText = "NodeHeader:\n" - outText = outText + ("version:%u\n" % self.version) - outText = outText + ("time:%ld\n" % self.installTime) - outText = outText + ("nodeCount:%u\n" % self.nodeCount) - outText = outText + ("node:%u\n" % self.localNodeId) - dnTotalNum = self.__getDnInstanceNum() - for dbNode in self.dbNodes: - if self.clusterType == \ - CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY or \ - self.clusterType == CLUSTER_TYPE_SINGLE_INST: - outText = outText + ("azName:%s\n" % dbNode.azName) - outText = outText + ("azPriority:%u\n" % dbNode.azPriority) - outText = outText + ("node :%u\n" % dbNode.id) - outText = outText + ("nodeName:%s\n" % dbNode.name) - - outText = outText + "ssh channel :\n" - j = 0 - for sshIp in dbNode.sshIps: - outText = outText + ("sshChannel %u:%s\n" % ( - j + 1, dbNode.sshIps[j])) - j = j + 1 - outText = outText + ( - "datanodeCount :%u\n" % len(dbNode.datanodes)) - j = 0 - for dnInst in dbNode.datanodes: - j = j + 1 - outText = outText + ("datanode %u:\n" % j) - outText = outText + ( - "datanodeLocalDataPath :%s\n" % dnInst.datadir) - outText = outText + ( - "datanodeXlogPath :%s\n" % dnInst.xlogdir) - k = 0 - for listenIp in dnInst.listenIps: - k = k + 1 - outText = outText + ( - "datanodeListenIP %u:%s\n" % (k, listenIp)) - outText = outText + ("datanodePort :%u\n" % dnInst.port) - k = 0 - for haIp in dnInst.haIps: - k = k + 1 - outText = outText + ( - "datanodeLocalHAIP %u:%s\n" % (k, haIp)) - outText = outText + ( - "datanodeLocalHAPort :%u\n" % dnInst.haPort) - outText = outText + ( - "dn_replication_num: %u\n" % dnTotalNum) - k = 0 - if self.clusterType == \ - CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY or \ - self.clusterType == CLUSTER_TYPE_SINGLE_INST: - maxPeerNum = MIRROR_COUNT_REPLICATION_MAX if \ - self.nodeCount > MIRROR_COUNT_REPLICATION_MAX \ - else self.nodeCount - for k in range(maxPeerNum - 1): - outText = outText + ( - "datanodePeer%uDataPath :%s\n" % ( - k, dnInst.peerInstanceInfos[k].peerDataPath)) - m = 0 - for peerHaIP in dnInst.peerInstanceInfos[ - k].peerHAIPs: - m += 1 - outText = outText + ( - "datanodePeer%uHAIP %u:%s\n" % ( - k, m, peerHaIP)) - outText = outText + ( - "datanodePeer%uHAPort :%u\n" % ( - k, dnInst.peerInstanceInfos[k].peerHAPort)) - else: - outText = outText + ("datanodePeerDataPath :%s\n" % - dnInst.peerInstanceInfos[ - 0].peerDataPath) - m = 0 - for peerHaIP in dnInst.peerInstanceInfos[k].peerHAIPs: - m += 1 - outText = outText + ( - "datanodePeer2HAIP %u:%s\n" % ( - m, peerHaIP)) - outText = outText + ("datanodePeerHAPort :%u\n" % - dnInst.peerInstanceInfos[ - 0].peerHAPort) - outText = outText + ("datanodePeer2DataPath :%s\n" % - dnInst.peerInstanceInfos[ - 0].peer2DataPath) - m = 0 - for peer2HaIP in dnInst.peerInstanceInfos[ - 0].peer2HAIPs: - m += 1 - outText = outText + ( - "datanodePeer2HAIP %u:%s\n" % ( - m, peer2HaIP)) - outText = outText + ("datanodePeer2HAPort :%u\n" % - dnInst.peerInstanceInfos[ - 0].peer2HAPort) - - self.__fprintContent(outText, fileName) - except Exception as e: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51652"] % str(e)) - - def queryClsInfo(self, hostName, sshtool, mpprcFile, cmd): - try: - clusterState = 'Normal' - roleStatusArray = [] - dbStateArray = [] - maxNodeNameLen = 0 - maxDataPathLen = 0 - maxAzNameLen = 0 - dnNodeCount = 0 - roleStatus = "" - dbState = "" - primaryDbNum = 0 - primaryDbState = "" - for dbNode in self.dbNodes: - for dnInst in dbNode.datanodes: - sshcmd = "gs_ctl query -D %s" % dnInst.datadir - output = "" - if (dbNode.name != hostName): - (statusMap, output) = sshtool.getSshStatusOutput( - sshcmd, [dbNode.name], mpprcFile) - if statusMap[dbNode.name] != 'Success' or \ - output.find("exc_sql failed") > 0: - if output.find( - "could not connect to the local server") \ - > 0 or output.find( - "Is server running") > 0: - roleStatus = "Down" - dbState = "Manually stopped" - else: - roleStatus = "Unknown" - dbState = "Unknown" - else: - res = re.findall(r'local_role\s*:\s*(\w+)', output) - roleStatus = res[0] - res = re.findall(r'db_state\s*:\s*(\w+)', output) - dbState = res[0] - else: - (status, output) = subprocess.getstatusoutput(sshcmd) - if status != 0 or output.find("exc_sql failed") > 0: - if output.find( - "could not connect to the local server") \ - > 0 or output.find( - "Is server running") > 0: - roleStatus = "Down" - dbState = "Manually stopped" - else: - roleStatus = "Unknown" - dbState = "Unknown" - else: - res = re.findall(r'local_role\s*:\s*(\w+)', output) - roleStatus = res[0] - res = re.findall(r'db_state\s*:\s*(\w+)', output) - dbState = res[0] - if (dbState == "Need"): - detailInformation = re.findall( - r'detail_information\s*:\s*(\w+)', output) - dbState = "Need repair(%s)" % detailInformation[0] - roleStatusArray.append(roleStatus) - dbStateArray.append(dbState) - nodeNameLen = len(dbNode.name) - dataPathLen = len(dbNode.datanodes[0].datadir) - azNameLen = len(dbNode.azName) - maxNodeNameLen = maxNodeNameLen if maxNodeNameLen > \ - nodeNameLen else \ - nodeNameLen - maxDataPathLen = maxDataPathLen if maxDataPathLen > \ - dataPathLen else \ - dataPathLen - maxAzNameLen = maxAzNameLen if maxAzNameLen > azNameLen \ - else azNameLen - dnNodeCount += 1 - if roleStatus == "Primary": - primaryDbNum += 1 - primaryDbState = dbState - else: - if roleStatus != "Standby" and \ - roleStatus != "Secondary" and \ - roleStatus != "Cascade": - clusterState = 'Degraded' - if dbState != "Normal": - clusterState = 'Degraded' - if dnNodeCount == 1: - clusterState = "Unavailable" if dbState != "Normal" \ - else "Normal" - else: - if primaryDbState != "Normal" or primaryDbNum != 1: - clusterState = "Unavailable" - outText = "" - if cmd.clusterStateQuery: - outText = \ - "-------------------------------------------------" \ - "----------------------\n\n" \ - "cluster_name : %s\ncluster_state : %s\nredistributing : No\n\n" % \ - (self.name, clusterState) - outText = outText + \ - "-------------------------------------------" \ - "----------------------------\n" - self.__fprintContent(outText, cmd.outputFile) - return - outText = "[ Cluster State ]\n\ncluster_state : " \ - "%s\nredistributing : No\n" % clusterState - outText = outText + "current_az : AZ_ALL\n\n[ Datanode " \ - "State ]\n\n" - nodeLen = NODE_ID_LEN + SPACE_LEN + maxNodeNameLen + SPACE_LEN - instanceLen = INSTANCE_ID_LEN + SPACE_LEN + ( - maxDataPathLen if cmd.dataPathQuery else 4) - if cmd.azNameQuery: - nodeLen += maxAzNameLen + SPACE_LEN - if cmd.portQuery: - instanceLen += 7 - for i in range(dnNodeCount - 1): - outText = outText + ("%-*s%-*s%-*s%-*s| " % (nodeLen, - "node", - IP_LEN, - "node_ip", - instanceLen, - "instance", - STATE_LEN, - "state")) - outText = outText + "%-*s%-*s%-*s%s\n" % ( - nodeLen, "node", IP_LEN, "node_ip", instanceLen, "instance", - "state") - maxLen = self.nodeCount * ( - nodeLen + instanceLen + IP_LEN + SPACE_LEN + STATE_LEN + - SPACE_LEN + SEPERATOR_LEN) - seperatorLine = "-" * maxLen - outText = outText + seperatorLine + "\n" - i = 0 - for dbNode in self.dbNodes: - for dnInst in dbNode.datanodes: - if cmd.azNameQuery: - outText = outText + ( - "%-*s " % (maxAzNameLen, dbNode.azName)) - outText = outText + ("%-2u " % dbNode.id) - outText = outText + ( - "%-*s " % (maxNodeNameLen, dbNode.name)) - outText = outText + ("%-15s " % dnInst.listenIps[0]) - outText = outText + ("%u " % dnInst.instanceId) - if cmd.portQuery: - outText = outText + ("%-*u " % (6, dnInst.port)) - if cmd.dataPathQuery: - outText = outText + ( - "%-*s " % (maxDataPathLen, dnInst.datadir)) - else: - outText = outText + " " - outText = outText + ( - "%s " % self.__getDnRole(dnInst.instanceType)) - if dnNodeCount == 1: - outText = outText + ("%-7s" % "Primary") - else: - outText = outText + ("%-7s" % roleStatusArray[i]) - outText = outText + (" %s" % dbStateArray[i]) - if i < (dnNodeCount - 1): - outText = outText + " | " - else: - outText = outText + "\n" - i += 1 - self.__fprintContent(outText, cmd.outputFile) - except Exception as e: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51652"] % str(e)) - - def __getDnRole(self, instanceType): - """ - function : Get DnRole by instanceType - input : Int - output : String - """ - if instanceType == MASTER_INSTANCE: - return "P" - elif instanceType == STANDBY_INSTANCE: - return "S" - elif instanceType == CASCADE_STANDBY: - return "C" - elif instanceType == DUMMY_STANDBY_INSTANCE: - return "R" - else: - return "" - - def __getDnInstanceNum(self): - dnInsNum = 0 - for dbNode in self.dbNodes: - dnInsNum += len(dbNode.datanodes) - return dnInsNum - - def __getDnSenderStatus(self, sshtool, localHostName): - sql_get = "select a.client_addr, b.state, b.sender_sent_location," \ - "b.sender_write_location, b.sender_flush_location," \ - "b.sender_replay_location, b.receiver_received_location," \ - "b.receiver_write_location, b.receiver_flush_location," \ - "b.receiver_replay_location, b.sync_percent, b.sync_state " \ - "from pg_stat_replication a inner join " \ - "pg_stat_get_wal_senders() b on a.pid = b.pid;" - syncInfo = [] - clusterState = "Normal" - primaryDbState = "Normal" - primaryDbNum = 0 - dnNodeCount = 0 - for dbNode in self.dbNodes: - for dnInst in dbNode.datanodes: - dnNodeCount += 1 - self.__getDnState(dnInst, dbNode, localHostName, sshtool) - if dnInst.localRole == "Primary": - primaryDbState = dnInst.state - primaryDbNum += 1 - output = "" - if dbNode.name != localHostName: - cmd = "[need_replace_quotes] gsql -m -d postgres -p " \ - "%s -A -t -c \"%s\"" % \ - (dnInst.port, sql_get) - (statusMap, output) = sshtool.getSshStatusOutput(cmd, [ - dbNode.name]) - if statusMap[dbNode.name] != 'Success' or output.find( - "failed to connect") >= 0: - continue - else: - output = output.split('\n')[1:-1] - else: - cmd = "gsql -m -d postgres -p %s -A -t -c \"%s\"" % ( - dnInst.port, sql_get) - (status, output) = subprocess.getstatusoutput(cmd) - if status != 0 or output.find( - "failed to connect") >= 0: - continue - else: - output = output.split('\n') - if not len(output): - continue - for col_loop in output: - syncInfo.append(col_loop.split('|')) - elif dnInst.localRole == "Cascade Standby": - subsql = "select state, sender_sent_location, sender_write_location," \ - "sender_flush_location, sender_replay_location," \ - "receiver_received_location, receiver_write_location," \ - "receiver_flush_location, receiver_replay_location," \ - "sync_percent, channel from pg_stat_get_wal_receiver();" - if dbNode.name != localHostName: - cmd = "[need_replace_quotes] gsql -m -d postgres -p " \ - "%s -A -t -c \"%s\"" % \ - (dnInst.port, subsql) - (statusMap, cascadeOutput) = sshtool.getSshStatusOutput(cmd, [ - dbNode.name]) - if statusMap[dbNode.name] != 'Success' or cascadeOutput.find( - "failed to connect") >= 0: - continue - else: - cascadeOutput = cascadeOutput.split('\n')[1:-1] - else: - cmd = "gsql -m -d postgres -p %s -A -t -c \"%s\"" % ( - dnInst.port, subsql) - (status, cascadeOutput) = subprocess.getstatusoutput(cmd) - if status != 0 or cascadeOutput.find( - "failed to connect") >= 0: - continue - else: - cascadeOutput = cascadeOutput.split('\n') - if not len(cascadeOutput): - continue - for col_loop in cascadeOutput: - col_loop = col_loop.split('|') - cascadeIps = col_loop[-1].split('<--') - col_loop.insert(0, cascadeIps[0].split(':')[0]) - col_loop.insert(11, "Async") - col_loop[-1] = cascadeIps[-1] - syncInfo.append(col_loop) - else: - if dnInst.localRole != "Standby" and \ - dnInst.localRole != "Secondary": - clusterState = "Degraded" - if dnInst.state != "Normal": - clusterState = "Degraded" - if dnNodeCount == 1: - clusterState = "Unavailable" if dnInst.state != "Normal" \ - else "Normal" - else: - if primaryDbState != "Normal" or primaryDbNum != 1: - clusterState = "Unavailable" - return (clusterState, syncInfo) - - def __getDnState(self, dnInst, dbNode, localHostName, sshtool): - sql = "select local_role, static_connections, db_state from " \ - "pg_stat_get_stream_replications();" - if dbNode.name != localHostName: - # [SUCCESS] hostname:\n when ssh, The third line is the sql result - minValidLine = 3 - cmd = "[need_replace_quotes] gsql -m -d postgres -p %s -c " \ - "\"%s\"" % ( - dnInst.port, sql) - (statusMap, output) = sshtool.getSshStatusOutput(cmd, - [dbNode.name]) - dnDown = output.find("failed to connect") >= 0 - if statusMap[dbNode.name] != 'Success' or dnDown: - dnInst.localRole = "Down" if dnDown else "Unknown" - dnInst.staticConnections = 0 - dnInst.state = "Manually stopped" if dnDown else "Unknown" - else: - lineSplitRes = output.split("\n") - if len(lineSplitRes) <= minValidLine or len( - lineSplitRes[minValidLine].split("|")) != 3: - dnInst.localRole = "Unknown" - dnInst.staticConnections = 0 - dnInst.state = "Unknown" - else: - columnRes = lineSplitRes[minValidLine].split("|") - dnInst.localRole = columnRes[0].strip() - dnInst.staticConnections = columnRes[1].strip() - dnInst.state = columnRes[2].strip() - else: - # The second line is the sql result - minValidLine = 2 - cmd = "gsql -m -d postgres -p %s -c \"%s\"" % (dnInst.port, sql) - (status, output) = subprocess.getstatusoutput(cmd) - dnDown = output.find("failed to connect") >= 0 - if status != 0 or dnDown: - dnInst.localRole = "Down" if dnDown else "Unknown" - dnInst.staticConnections = 0 - dnInst.state = "Manually stopped" if dnDown else "Unknown" - else: - lineSplitRes = output.split("\n") - if len(lineSplitRes) <= minValidLine or len( - lineSplitRes[minValidLine].split("|")) != 3: - dnInst.localRole = "Unknown" - dnInst.staticConnections = 0 - dnInst.state = "Unknown" - else: - columnRes = lineSplitRes[minValidLine].split("|") - dnInst.localRole = columnRes[0].strip() - dnInst.staticConnections = columnRes[1].strip() - dnInst.state = columnRes[2].strip() - - def __fprintContent(self, content, fileName): - if fileName != "": - g_file.createFileInSafeMode(fileName) - with open(fileName, "a") as fp: - fp.write(content) - fp.flush() - sys.stdout.write(content) - - def __checkOsUser(self, user): - """ - function : Check os user - input : String - output : NA - """ - try: - user = pwd.getpwnam(user).pw_gid - except Exception as e: - raise Exception(ErrorCode.GAUSS_503["GAUSS_50300"] % user) - - def __getStaticConfigFilePath(self, user): - """ - function : get the path of static configuration file. - input : String - output : String - """ - gaussHome = self.__getEnvironmentParameterValue("GAUSSHOME", user) - if (gaussHome == ""): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ - ("installation path of designated user [%s]" % - user)) - # if under upgrade, and use chose strategy, we may get a wrong path, - # so we will use the realpath of gausshome - commitid = VersionInfo.getCommitid() - appPath = gaussHome + "_" + commitid - staticConfigFile = "%s/bin/cluster_static_config" % appPath - staticConfigBak = "%s/bin/cluster_static_config_bak" % appPath - staticConfig = "%s/bin/cluster_static_config" % os.path.realpath( - gaussHome) - if os.path.exists(staticConfig): - return staticConfig - elif (os.path.exists(staticConfigFile)): - return staticConfigFile - elif (os.path.exists(staticConfigBak)): - return staticConfigBak - - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ - ("static configuration file [%s] of " - "designated user [%s]" % (staticConfig, user))) - - def __getEnvironmentParameterValue(self, environmentParameterName, user): - """ - function :Get the environment parameter. - !!!!Do not call this function in preinstall.py script. - because we determine if we are using env separate version by the - value of MPPDB_ENV_SEPARATE_PATH - input : String,String - output : String - """ - # get mpprc file - mpprcFile = os.getenv('MPPDB_ENV_SEPARATE_PATH') - if mpprcFile is not None and mpprcFile != "": - mpprcFile = mpprcFile.replace("\\", "\\\\").replace('"', '\\"\\"') - checkPathVaild(mpprcFile) - userProfile = mpprcFile - else: - userProfile = "~/.bashrc" - # build shell command - if (os.getuid() == 0): - cmd = "su - %s -c 'source %s;echo $%s' 2>/dev/null" % ( - user, userProfile, environmentParameterName) - else: - cmd = "source %s;echo $%s 2>/dev/null" % (userProfile, - environmentParameterName) - (status, output) = subprocess.getstatusoutput(cmd) - if (status != 0): - raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] - % cmd + " Error: \n%s" % output) - return output.split("\n")[0] - - def __getStatusByOM(self, user): - """ - function :Get the environment parameter. - !!!!Do not call this function in preinstall.py script. - because we determine if we are using env separate version by the - value of MPPDB_ENV_SEPARATE_PATH - input : String,String - output : String - """ - # get mpprc file - mpprcFile = os.getenv('MPPDB_ENV_SEPARATE_PATH') - if mpprcFile is not None and mpprcFile != "": - mpprcFile = mpprcFile.replace("\\", "\\\\").replace('"', '\\"\\"') - checkPathVaild(mpprcFile) - userProfile = mpprcFile - else: - userProfile = "~/.bashrc" - # build shell command - if os.getuid() == 0: - cmd = "su - %s -c 'source %s;gs_om -t status --detail|tail -1" % ( - user, userProfile) - else: - cmd = "source %s;gs_om -t status --detail|tail -1" % (userProfile) - (status, output) = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] - % cmd + " Error: \n%s" % output) - return output.split("\n")[0] - - def __readStaticConfigFile(self, staticConfigFile, user, isLCCluster=False, - ignoreLocalEnv=False): - """ - function : read cluster information from static configuration file - input : String,String - output : NA - """ - fp = None - try: - # get env parameter - gauss_env = self.__getEnvironmentParameterValue("GAUSS_ENV", user) - self.name = self.__getEnvironmentParameterValue("GS_CLUSTER_NAME", - user) - self.appPath = self.__getEnvironmentParameterValue("GAUSSHOME", - user) - logPathWithUser = self.__getEnvironmentParameterValue("GAUSSLOG", - user) - - if not ignoreLocalEnv: - if gauss_env == "2" and self.name == "": - raise Exception(ErrorCode.GAUSS_503["GAUSS_50300"] - % ("cluster name of designated user" - " [%s]" % user)) - if self.appPath == "": - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ - ("installation path of designated user " - "[%s]" % user)) - if logPathWithUser == "": - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ - ("log path of designated user [%s]" % - user)) - - splitMark = "/%s" % user - # set log path without user - # find the path from right to left - self.logPath = logPathWithUser[ - 0:(logPathWithUser.rfind(splitMark))] - staticConfigFilePath = os.path.split(staticConfigFile)[0] - versionFile = os.path.join( - staticConfigFilePath, "upgrade_version") - version, number, commitid = VersionInfo.get_version_info( - versionFile) - try: - # read static_config_file - fp = open(staticConfigFile, "rb") - if float(number) <= 92.200: - info = fp.read(32) - (crc, lenth, version, currenttime, nodeNum, - localNodeId) = struct.unpack("=qIIqiI", info) - else: - info = fp.read(28) - (crc, lenth, version, currenttime, nodeNum, - localNodeId) = struct.unpack("=IIIqiI", info) - self.version = version - self.installTime = currenttime - self.localNodeId = localNodeId - self.nodeCount = nodeNum - except Exception as e: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % staticConfigFile + " Error:\n" + str(e)) - if version <= 100: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ - ("cluster static config version[%s]" % version, - "the new version[%s]" % BIN_CONFIG_VERSION)) - elif version >= 101 and version <= 200: - self.clusterType = CLUSTER_TYPE_SINGLE - if BIN_CONFIG_VERSION_SINGLE != version: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ - ("cluster static config version[%s]" - % version, "the new version[%s]" - % BIN_CONFIG_VERSION_SINGLE)) - elif version >= 201 and version <= 300: - # single primary multi standy - self.clusterType = CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY - if (BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY - != version): - raise Exception( - ErrorCode.GAUSS_516["GAUSS_51637"] - % ("cluster static config version[%s]" % version, - "the new version[%s]" - % BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY)) - elif version >= 301 and version <= 400: - # single inst - self.clusterType = CLUSTER_TYPE_SINGLE_INST - if BIN_CONFIG_VERSION_SINGLE_INST != version: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ - ("cluster static config version[%s]" - % version, "the new version[%s]" - % BIN_CONFIG_VERSION_SINGLE_INST)) - - self.dbNodes = [] - try: - for i in range(nodeNum): - offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE - fp.seek(offset) - dbNode = self.__unPackNodeInfo(fp, number, isLCCluster) - self.dbNodes.append(dbNode) - fp.close() - except Exception as e: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - staticConfigFile + " Error:\nThe content is " - "not correct.") - except Exception as e: - if (fp): - fp.close() - raise Exception(str(e)) - - def __unPackNodeInfo(self, fp, number, isLCCluster=False): - """ - function : unpack a node config info - input : file - output : Object - """ - if float(number) <= 92.200: - info = fp.read(76) - (crc, nodeId, nodeName) = struct.unpack("=qI64s", info) - else: - info = fp.read(72) - (crc, nodeId, nodeName) = struct.unpack("=II64s", info) - nodeName = nodeName.decode().strip('\x00') - dbNode = dbNodeInfo(nodeId, nodeName) - info = fp.read(68) - (azName, azPriority) = struct.unpack("=64sI", info) - dbNode.azName = azName.decode().strip('\x00') - dbNode.azPriority = azPriority - - # get backIps - self.__unPackIps(fp, dbNode.backIps) - # get sshIps - self.__unPackIps(fp, dbNode.sshIps) - if (not isLCCluster): - # get cm_server information - self.__unPackCmsInfo(fp, dbNode) - # get cm_agent information - self.__unpackAgentInfo(fp, dbNode) - # get gtm information - self.__unpackGtmInfo(fp, dbNode) - info = fp.read(404) - # get cn information - self.__unpackCooInfo(fp, dbNode) - # get DB information - self.__unpackDataNode(fp, dbNode) - if (not isLCCluster): - # get etcd information - self.__unpackEtcdInfo(fp, dbNode) - info = fp.read(8) - # set DB azName for OLAP - for inst in dbNode.datanodes: - inst.azName = dbNode.azName - - return dbNode - - def __unpackEtcdInfo(self, fp, dbNode): - """ - function : unpack the info of etcd - input : file,Object - output : NA - """ - etcdInst = instanceInfo() - etcdInst.instanceRole = INSTANCE_ROLE_ETCD - etcdInst.hostname = dbNode.name - etcdInst.instanceType = INSTANCE_TYPE_UNDEFINED - info = fp.read(1100) - (etcdNum, etcdInst.instanceId, etcdInst.mirrorId, etcdhostname, - etcdInst.datadir) = struct.unpack("=IIi64s1024s", info) - etcdInst.datadir = etcdInst.datadir.decode().strip('\x00') - self.__unPackIps(fp, etcdInst.listenIps) - info = fp.read(4) - (etcdInst.port,) = struct.unpack("=I", info) - self.__unPackIps(fp, etcdInst.haIps) - info = fp.read(4) - (etcdInst.haPort,) = struct.unpack("=I", info) - if (etcdNum == 1): - dbNode.etcdNum = 1 - dbNode.etcds.append(etcdInst) - self.etcdcount += 1 - else: - dbNode.etcdNum = 0 - dbNode.etcds = [] - - def __unPackIps(self, fp, ips): - """ - function : Unpack the info of ips - input : file,[] - output : NA - """ - info = fp.read(4) - (n,) = struct.unpack("=i", info) - for i in range(int(n)): - info = fp.read(128) - (currentIp,) = struct.unpack("=128s", info) - currentIp = currentIp.decode().strip('\x00') - ips.append(str(currentIp.strip())) - info = fp.read(128 * (MAX_IP_NUM - n)) - - def __unPackCmsInfo(self, fp, dbNode): - """ - function : Unpack the info of CMserver - input : file Object - output : NA - """ - cmsInst = instanceInfo() - cmsInst.instanceRole = INSTANCE_ROLE_CMSERVER - cmsInst.hostname = dbNode.name - info = fp.read(1164) - (cmsInst.instanceId, cmsInst.mirrorId, dbNode.cmDataDir, cmsInst.level, - self.cmsFloatIp) = struct.unpack("=II1024sI128s", info) - dbNode.cmDataDir = dbNode.cmDataDir.decode().strip('\x00') - self.cmsFloatIp = self.cmsFloatIp.decode().strip('\x00') - cmsInst.datadir = "%s/cm_server" % dbNode.cmDataDir - self.__unPackIps(fp, cmsInst.listenIps) - info = fp.read(4) - (cmsInst.port,) = struct.unpack("=I", info) - self.__unPackIps(fp, cmsInst.haIps) - info = fp.read(8) - (cmsInst.haPort, cmsInst.instanceType) = struct.unpack("=II", info) - if (cmsInst.instanceType == MASTER_INSTANCE): - dbNode.cmsNum = 1 - elif (cmsInst.instanceType == STANDBY_INSTANCE): - dbNode.cmsNum = 0 - else: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] - % ("CMServer", cmsInst.instanceType)) - info = fp.read(4 + 128 * MAX_IP_NUM + 4) - - if (cmsInst.instanceId): - dbNode.cmservers.append(cmsInst) - self.cmscount += 1 - else: - dbNode.cmservers = [] - - def __unpackAgentInfo(self, fp, dbNode): - """ - function : Unpack the info of agent. It should be called after - __unPackCmsInfo, because dbNode.cmDataDir - get value in __unPackCmsInfo - input : file Object - output : NA - """ - cmaInst = instanceInfo() - cmaInst.instanceRole = INSTANCE_ROLE_CMAGENT - cmaInst.hostname = dbNode.name - cmaInst.instanceType = INSTANCE_TYPE_UNDEFINED - info = fp.read(8) - (cmaInst.instanceId, cmaInst.mirrorId) = struct.unpack("=Ii", info) - self.__unPackIps(fp, cmaInst.listenIps) - cmaInst.datadir = "%s/cm_agent" % dbNode.cmDataDir - dbNode.cmagents.append(cmaInst) - - def __unpackGtmInfo(self, fp, dbNode): - """ - function : Unpack the info of gtm - input : file Object - output : NA - """ - gtmInst = instanceInfo() - gtmInst.instanceRole = INSTANCE_ROLE_GTM - gtmInst.hostname = dbNode.name - info = fp.read(1036) - (gtmInst.instanceId, gtmInst.mirrorId, gtmNum, - gtmInst.datadir) = struct.unpack("=III1024s", info) - gtmInst.datadir = gtmInst.datadir.decode().strip('\x00') - self.__unPackIps(fp, gtmInst.listenIps) - info = fp.read(8) - (gtmInst.port, gtmInst.instanceType) = struct.unpack("=II", info) - if (gtmInst.instanceType == MASTER_INSTANCE): - dbNode.gtmNum = 1 - elif (gtmInst.instanceType == STANDBY_INSTANCE): - dbNode.gtmNum = 0 - else: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % ( - "GTM", gtmInst.instanceType)) - self.__unPackIps(fp, gtmInst.haIps) - info = fp.read(4) - (gtmInst.haPort,) = struct.unpack("=I", info) - info = fp.read(1024 + 4 + 128 * MAX_IP_NUM + 4) - - if (gtmNum == 1): - dbNode.gtms.append(gtmInst) - self.gtmcount += 1 - else: - dbNode.gtms = [] - - def __unpackCooInfo(self, fp, dbNode): - """ - function : Unpack the info of coordinator - input : file Object - output : NA - """ - cooInst = instanceInfo() - cooInst.instanceRole = INSTANCE_ROLE_COODINATOR - cooInst.hostname = dbNode.name - cooInst.instanceType = INSTANCE_TYPE_UNDEFINED - info = fp.read(2060) - (cooInst.instanceId, cooInst.mirrorId, cooNum, cooInst.datadir, - cooInst.ssdDir) = struct.unpack("=IiI1024s1024s", info) - cooInst.datadir = cooInst.datadir.decode().strip('\x00') - cooInst.ssdDir = cooInst.ssdDir.decode().strip('\x00') - self.__unPackIps(fp, cooInst.listenIps) - info = fp.read(8) - (cooInst.port, cooInst.haPort) = struct.unpack("=II", info) - if (cooNum == 1): - dbNode.cooNum = 1 - dbNode.coordinators.append(cooInst) - else: - dbNode.cooNum = 0 - dbNode.coordinators = [] - - def __unpackDataNode(self, fp, dbNode): - """ - function : Unpack the info of datanode - input : file Object - output : NA - """ - info = fp.read(4) - (dataNodeNums,) = struct.unpack("=I", info) - dbNode.dataNum = 0 - - dbNode.datanodes = [] - for i in range(dataNodeNums): - dnInst = instanceInfo() - dnInst.instanceRole = INSTANCE_ROLE_DATANODE - dnInst.hostname = dbNode.name - # In the upgrade scenario, there are two different read methods - # for static config file. - # First, use the new read mode, and judge that if the new read - # mode is not correct, - # then rollback by fp.seek(), and exchange its(xlogdir) value - # with ssddir. - info = fp.read(2056) - (dnInst.instanceId, dnInst.mirrorId, dnInst.datadir, - dnInst.xlogdir) = struct.unpack("=II1024s1024s", info) - dnInst.datadir = dnInst.datadir.decode().strip('\x00') - dnInst.xlogdir = dnInst.xlogdir.decode().strip('\x00') - - info = fp.read(1024) - (dnInst.ssdDir) = struct.unpack("=1024s", info) - dnInst.ssdDir = dnInst.ssdDir[0].decode().strip('\x00') - # if notsetXlog,ssdDir should not be null.use by upgrade. - if dnInst.ssdDir != "" and dnInst.ssdDir[0] != '/': - fp.seek(fp.tell() - 1024) - dnInst.ssdDir = dnInst.xlogdir - dnInst.xlogdir = "" - - self.__unPackIps(fp, dnInst.listenIps) - info = fp.read(8) - (dnInst.port, dnInst.instanceType) = struct.unpack("=II", info) - if (dnInst.instanceType == MASTER_INSTANCE): - dbNode.dataNum += 1 - elif (dnInst.instanceType in [STANDBY_INSTANCE, - DUMMY_STANDBY_INSTANCE, CASCADE_STANDBY]): - pass - else: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] - % ("DN", dnInst.instanceType)) - self.__unPackIps(fp, dnInst.haIps) - info = fp.read(4) - (dnInst.haPort,) = struct.unpack("=I", info) - if ( - self.clusterType == - CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY or - self.clusterType == CLUSTER_TYPE_SINGLE_INST): - maxStandbyCount = MIRROR_COUNT_REPLICATION_MAX - 1 - for j in range(maxStandbyCount): - peerDbInst = peerInstanceInfo() - info = fp.read(1024) - (peerDbInst.peerDataPath,) = struct.unpack("=1024s", info) - peerDbInst.peerDataPath = \ - peerDbInst.peerDataPath.decode().strip('\x00') - self.__unPackIps(fp, peerDbInst.peerHAIPs) - info = fp.read(8) - (peerDbInst.peerHAPort, - peerDbInst.peerRole) = struct.unpack("=II", info) - dnInst.peerInstanceInfos.append(peerDbInst) - else: - peerDbInst = peerInstanceInfo() - info = fp.read(1024) - (peerDbInst.peerDataPath,) = struct.unpack("=1024s", info) - peerDbInst.peerDataPath = \ - peerDbInst.peerDataPath.decode().strip('\x00') - self.__unPackIps(fp, peerDbInst.peerHAIPs) - info = fp.read(8) - (peerDbInst.peerHAPort, peerDbInst.peerRole) = \ - struct.unpack("=II", info) - info = fp.read(1024) - (peerDbInst.peerData2Path,) = struct.unpack("=1024s", info) - peerDbInst.peerData2Path = \ - peerDbInst.peerDataPath.decode().strip('\x00') - self.__unPackIps(fp, peerDbInst.peer2HAIPs) - info = fp.read(8) - (peerDbInst.peer2HAPort, peerDbInst.peer2Role) = \ - struct.unpack("=II", info) - dnInst.peerInstanceInfos.append(peerDbInst) - dbNode.datanodes.append(dnInst) - - def initFromStaticConfigWithoutUser(self, staticConfigFile): - """ - function : Init cluster from static config with out user - input : file Object - output : NA - """ - fp = None - try: - staticConfigFilePath = os.path.split(staticConfigFile)[0] - versionFile = os.path.join( - staticConfigFilePath, "upgrade_version") - version, number, commitid = VersionInfo.get_version_info( - versionFile) - # read cluster info from static config file - fp = open(staticConfigFile, "rb") - if float(number) <= 92.200: - info = fp.read(32) - (crc, lenth, version, currenttime, nodeNum, - localNodeId) = struct.unpack("=qIIqiI", info) - else: - info = fp.read(28) - (crc, lenth, version, currenttime, nodeNum, - localNodeId) = struct.unpack("=IIIqiI", info) - if (version <= 100): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] - % ("cluster static config version[%s]" - % version, "the new version[%s]" - % BIN_CONFIG_VERSION)) - elif (version >= 101 and version <= 200): - self.clusterType = CLUSTER_TYPE_SINGLE - if (BIN_CONFIG_VERSION_SINGLE != version): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] - % ("cluster static config version[%s]" - % version, "the new version[%s]" - % BIN_CONFIG_VERSION_SINGLE)) - elif (version >= 201 and version <= 300): - self.clusterType = CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY - if ( - BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY != - version): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % ( - "cluster static config version[%s]" % version, - "the new version[%s]" - % BIN_CONFIG_VERSION_SINGLE_PRIMARY_MULTI_STANDBY)) - elif (version >= 301 and version <= 400): - self.clusterType = CLUSTER_TYPE_SINGLE_INST - if (BIN_CONFIG_VERSION_SINGLE_INST != version): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] - % ("cluster static config version[%s]" - % version, "the new version[%s]" - % BIN_CONFIG_VERSION_SINGLE_INST)) - - self.dbNodes = [] - for i in range(nodeNum): - offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE - fp.seek(offset) - dbNode = self.__unPackNodeInfo(fp, number) - self.dbNodes.append(dbNode) - fp.close() - except Exception as e: - if (fp): - fp.close() - raise Exception(ErrorCode.GAUSS_512["GAUSS_51203"] - % "cluster" + " Error: \n%s" % str(e)) - - def __appendInstanceId(self, static_config_file): - """ - function : instance id append to the old cluster. - input : file Object - output : NA - """ - try: - # init oldClusterInfo - oldClusterInfo = dbClusterInfo() - oldClusterInfo.initFromStaticConfigWithoutUser(static_config_file) - - # get max CN/CMA/master-standby DN/dummy DN instanceId of old - # cluster. - # CMS/GTM/ETCD instanceId and nodeId will not be changed. - maxCNInstanceId = 0 - maxCMAInstanceId = 0 - maxMasterDNInstanceId = 0 - maxDummyDNInstanceId = 0 - # new DB mirrorId shoud be refreshed. - # CN mirrorId is const -1, so no need to refresh. - # CMA mirrorId is const-3, so no need to refresh. - # ETCD mirrorId is const -5, so no need to refresh. - # CMS and GTM of new cluster will not simultaneous exist with - # old cluster, - # so no need to refresh. - maxMirrorId = 0 - for olddbNode in oldClusterInfo.dbNodes: - for oldcnInst in olddbNode.coordinators: - if (oldcnInst.instanceId > maxCNInstanceId): - maxCNInstanceId = oldcnInst.instanceId - for oldcmaInst in olddbNode.cmagents: - if (oldcmaInst.instanceId > maxCMAInstanceId): - maxCMAInstanceId = oldcmaInst.instanceId - for olddnInst in olddbNode.datanodes: - if (olddnInst.instanceType == MASTER_INSTANCE and - olddnInst.instanceId > maxMasterDNInstanceId): - maxMasterDNInstanceId = olddnInst.instanceId - elif (olddnInst.instanceType == DUMMY_STANDBY_INSTANCE and - olddnInst.instanceId > maxDummyDNInstanceId): - maxDummyDNInstanceId = olddnInst.instanceId - if (olddnInst.mirrorId > maxMirrorId): - maxMirrorId = olddnInst.mirrorId - for oldcmsInst in olddbNode.cmservers: - if (oldcmsInst.mirrorId > maxMirrorId): - maxMirrorId = oldcmsInst.mirrorId - for oldetcdInst in olddbNode.etcds: - if (oldetcdInst.mirrorId > maxMirrorId): - maxMirrorId = oldetcdInst.mirrorId - - maxCNInstanceId += 1 - maxCMAInstanceId += 1 - maxMasterDNInstanceId += 2 - maxDummyDNInstanceId += 1 - maxMirrorId += 1 - mirrorIdDict = {} - - for newdbNode in self.dbNodes: - if (len(newdbNode.coordinators) > 0): - ## refresh CN instanceId here - newdbNode.coordinators[0].instanceId = maxCNInstanceId - maxCNInstanceId += 1 - - if (len(newdbNode.cmagents) > 0): - ## refresh CMA instanceId here - newdbNode.cmagents[0].instanceId = maxCMAInstanceId - maxCMAInstanceId += 1 - - for dnInst in newdbNode.datanodes: - if (dnInst.instanceType == MASTER_INSTANCE): - masterInst = dnInst - ## refresh master instanceId here - dnInst.instanceId = maxMasterDNInstanceId - maxMasterDNInstanceId += 1 - - ## get related standby and dummy-standby instances - for dbNode in self.dbNodes: - for inst in dbNode.datanodes: - if (inst.mirrorId == dnInst.mirrorId and - inst.instanceType == STANDBY_INSTANCE): - standbyInst = inst - ## refresh related standby instanceId here - inst.instanceId = maxMasterDNInstanceId - maxMasterDNInstanceId += 1 - - elif (inst.mirrorId == dnInst.mirrorId and - inst.instanceType == - DUMMY_STANDBY_INSTANCE): - dummyInst = inst - ## refresh related dummy-standby - # instanceId here - inst.instanceId = maxDummyDNInstanceId - maxDummyDNInstanceId += 1 - - ## refresh mirrorId here,Must refresh it at last. - mirrorIdDict[maxMirrorId] = [masterInst, standbyInst, - dummyInst] - maxMirrorId += 1 - - for mirrorId in list(mirrorIdDict.keys()): - mirrorIdDict[mirrorId][0].mirrorId = mirrorId - mirrorIdDict[mirrorId][1].mirrorId = mirrorId - mirrorIdDict[mirrorId][2].mirrorId = mirrorId - except Exception as e: - raise Exception(str(e)) - - def setInstId(self, instList, nodeIdInstIdDict, newNodeId, newInstId): - """ - instList instance list - nodeIdInstIdDict node id and instance id dict - newNodeId new node id - newInstId new instance id - - """ - for inst in instList: - if (newNodeId in list(nodeIdInstIdDict.keys())): - inst.instanceId = nodeIdInstIdDict[newNodeId] - # the New agent instance - else: - inst.instanceId = newInstId - newInstId += 1 - return newInstId - - def refreshInstIdByInstType(self, oldNodesList, newNodesList, - instType="cmagent"): - """ - """ - nodeIdInstanceIdDict = {} - # get the node id and cmagent/cmserver/gtm/etcd/cn instance id dict - for oldNode in oldNodesList: - if (instType == "cmagent"): - for cmaInst in oldNode.cmagents: - nodeIdInstanceIdDict[oldNode.id] = cmaInst.instanceId - elif (instType == "cmserver"): - for cmsInst in oldNode.cmservers: - nodeIdInstanceIdDict[oldNode.id] = cmsInst.instanceId - elif (instType == "gtm"): - for gtmInst in oldNode.gtms: - nodeIdInstanceIdDict[oldNode.id] = gtmInst.instanceId - elif (instType == "etcd"): - for etcdInst in oldNode.etcds: - nodeIdInstanceIdDict[oldNode.id] = etcdInst.instanceId - elif (instType == "cn"): - for cnInst in oldNode.coordinators: - # warm-standby: the number of nodes is same,so refrush - # by id - # addcn out cluster:refrush by id or nodename - # addcn in cluster:refrush by id or nodename - # deletecn out cluster:refrush by nodename - # deletecn in cluster:refrush by id or nodename - # expand:refrush by id or nodename - # shink in tail:refrush by id or nodename - # shink in mid:refrush by nodename - if (len(oldNodesList) == len(newNodesList)): - nodeIdInstanceIdDict[oldNode.id] = cnInst.instanceId - else: - nodeIdInstanceIdDict[oldNode.name] = cnInst.instanceId - - # sort instance id lists and set newInstId = the max ID num + 1 - instIDList = list(nodeIdInstanceIdDict.values()) - instIDList.sort() - if (len(instIDList) > 0): - newInstId = instIDList[-1] + 1 - else: - newInstId = 1 - - # refresh instance id by oldClusterInfo - for newNode in newNodesList: - if (instType == "cmagent"): - newInstId = self.setInstId(newNode.cmagents, - nodeIdInstanceIdDict, newNode.id, - newInstId) - elif (instType == "cmserver"): - newInstId = self.setInstId(newNode.cmservers, - nodeIdInstanceIdDict, newNode.id, - newInstId) - elif (instType == "gtm"): - newInstId = self.setInstId(newNode.gtms, nodeIdInstanceIdDict, - newNode.id, newInstId) - elif (instType == "etcd"): - newInstId = self.setInstId(newNode.etcds, nodeIdInstanceIdDict, - newNode.id, newInstId) - elif (instType == "cn"): - if (len(oldNodesList) == len(newNodesList)): - newInstId = self.setInstId(newNode.coordinators, - nodeIdInstanceIdDict, - newNode.id, newInstId) - else: - newInstId = self.setInstId(newNode.coordinators, - nodeIdInstanceIdDict, - newNode.name, newInstId) - - def flushCNInstanceId(self, oldNodesList, newNodesList): - """ - function : Refresh CN instance id - input : oldNodesList: :The cluster nodes list from - static_config_file - newNodesList: :The cluster nodes list from - new oldes - output : NA - """ - self.refreshInstIdByInstType(oldNodesList, newNodesList, "cn") - - def getMaxStandbyAndDummyDNInstanceId(self, oldNodesList): - """ - function : get max standby and dummy DB instanceId of old cluster. - input : oldNodesList: :The cluster nodes list from - static_config_file - output : NA - """ - # get max standby and dummy DB instanceId of old cluster. - maxStandbyDNInstanceId = 0 - maxDummyDNInstanceId = 0 - for oldNode in oldNodesList: - for olddnInst in oldNode.datanodes: - if (olddnInst.instanceType == STANDBY_INSTANCE and - olddnInst.instanceId > maxStandbyDNInstanceId): - maxStandbyDNInstanceId = olddnInst.instanceId - elif (olddnInst.instanceType == DUMMY_STANDBY_INSTANCE and - olddnInst.instanceId > maxDummyDNInstanceId): - maxDummyDNInstanceId = olddnInst.instanceId - return (maxStandbyDNInstanceId, maxDummyDNInstanceId) - - def flushDNInstanceId(self, oldNodesList, newNodesList): - """ - function : Refresh DB instance id. When refresh DB id, the node id - has been refreshed. - input : oldNodesList: :The cluster nodes list from - static_config_file - newNodesList: :The cluster nodes list from - new oldes - output : NA - """ - # get all old node id list - oldNodeIdList = [] - for oldNode in oldNodesList: - oldNodeIdList.append(oldNode.id) - - # get max standby and dummy DB instanceId of old cluster. - (maxStandbyDNInstanceId, - maxDummyDNInstanceId) = self.getMaxStandbyAndDummyDNInstanceId( - oldNodesList) - # set next primary/standby and dummy DB instanceId - maxMasterDNInstanceId = maxStandbyDNInstanceId + 1 - maxDummyDNInstanceId += 1 - - # refresh DB instance id of new nodes by oldNodesList and - # maxMasterDNInstanceId/maxDummyDNInstanceId - oldLen = len(oldNodesList) - newLen = len(newNodesList) - minLen = 0 - maxLen = 0 - if (oldLen > newLen): - maxLen = oldLen - minLen = newLen - else: - maxLen = newLen - minLen = oldLen - - # refresh DB id one by one by old node - i = 0 - for newNode in newNodesList[0:minLen]: - # refresh DB instanceId if DB numbers not equal. Only for move - # DB instance - if (len(oldNodesList[i].datanodes) != len(newNode.datanodes)): - break - else: - # refresh DB instanceId one by one (primary/standby/dummy in - # cluster_static_config ) - instid = 0 - for dnInst in newNode.datanodes: - dnInst.instanceId = oldNodesList[i].datanodes[ - instid].instanceId - instid += 1 - i += 1 - - # refresh the new node DB id - for newNode in newNodesList[minLen:maxLen]: - for dnInst in newNode.datanodes: - if (dnInst.instanceType == MASTER_INSTANCE): - ## get standby/dummy instances - standbyInsts = [] - dummyStandbyInsts = [] - peerInsts = self.getPeerInstance(dnInst) - for inst in peerInsts: - if (inst.instanceType == STANDBY_INSTANCE): - standbyInsts.append(inst) - elif (inst.instanceType == DUMMY_STANDBY_INSTANCE): - dummyStandbyInsts.append(inst) - - ## refresh master instanceId here - dnInst.instanceId = maxMasterDNInstanceId - maxMasterDNInstanceId += 1 - - ## refresh standby/dummy instanceId here. Only do it - # under new dbnodes list - for tmpNode in newNodesList[minLen:maxLen]: - for tmpdnInst in tmpNode.datanodes: - if (tmpdnInst.instanceType == STANDBY_INSTANCE): - for standbyInst in standbyInsts: - if (tmpdnInst.instanceId == - standbyInst.instanceId): - ## refresh standby instanceId here - tmpdnInst.instanceId = \ - maxMasterDNInstanceId - maxMasterDNInstanceId += 1 - elif ( - tmpdnInst.instanceType == - DUMMY_STANDBY_INSTANCE): - for dummyStandbyInst in dummyStandbyInsts: - if (tmpdnInst.instanceId == - dummyStandbyInst.instanceId): - ## refresh standby instanceId here - tmpdnInst.instanceId = \ - maxDummyDNInstanceId - maxDummyDNInstanceId += 1 - - def initFromXml(self, xmlFile, static_config_file="", mode="inherit"): - """ - function : Init cluster from xml config file - input : file Object for OLAP - dbClusterInfo instance - inherit: instance id inherit from the old cluster. - append: instance id append to the old cluster. - output : NA - """ - if (not os.path.exists(xmlFile)): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] - % "XML configuration file") - - self.xmlFile = xmlFile - - # Set the environment variable, then the readcluster command can - # read from it. - os.putenv(ENV_CLUSTERCONFIG, xmlFile) - # parse xml file - global xmlRootNode - try: - xmlRootNode = initParserXMLFile(xmlFile) - except Exception as e: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51234"] - % xmlFile + " Error:\n%s" % str(e)) - - self.__readClusterGlobalInfo() - if "HOST_IP" in list(os.environ.keys()): - self.__readAgentConfigInfo() - self.__readClusterNodeInfo() - self.__checkAZForSingleInst() - IpPort = self.__checkInstancePortandIP() - return IpPort - - def getClusterNodeNames(self): - """ - function : Get the cluster's node names. - input : NA - output : NA - """ - return [dbNode.name for dbNode in self.dbNodes] - - def getClusterNodeIds(self): - """ - function : Get the cluster's node names. - input : NA - output : NA - """ - return [dbNode.id for dbNode in self.dbNodes] - - def getdataNodeInstanceType(self, nodeId=-1): - """ - function: get the dataNode's instanceType - input: NA - output: NA - """ - for dbNode in self.dbNodes: - if nodeId == dbNode.id: - for dataNode in dbNode.datanodes: - return dataNode.instanceType - - def getDataDir(self, nodeId=-1): - """ - function: get the dataNode's data path - input: NA - output: NA - """ - for dbNode in self.dbNodes: - if nodeId == dbNode.id: - for dataNode in dbNode.datanodes: - return dataNode.datadir - - def getHostNameByNodeId(self, nodeId=-1): - """ - function: get the dataNode's name by nodeId - input: NA - output: NA - """ - for dbNode in self.dbNodes: - if nodeId == dbNode.id: - return dbNode.name - - def getClusterNewNodeNames(self): - """ - function : Get the cluster's node names. - input : NA - output : NA - """ - return [dbNode.name for dbNode in self.newNodes] - - def getClusterDirectorys(self, hostName="", ignore=True): - """ - function : Get cluster all directorys - input : NA - output : List - """ - clusterDirs = {} - clusterDirs["appPath"] = [self.appPath] - if (ignore): - clusterDirs["logPath"] = [self.logPath] - # get cluster all directorys - for dbNode in self.dbNodes: - nodeName = dbNode.name - if (hostName != ""): - if (hostName != nodeName): - continue - nodeDirs = [] - # including cm_server, cm_agent, cn, dn, gtm, etcd, ssd - nodeDirs.append(dbNode.cmDataDir) - for dbInst in dbNode.cmservers: - nodeDirs.append(dbInst.datadir) - for dbInst in dbNode.cmagents: - nodeDirs.append(dbInst.datadir) - for dbInst in dbNode.gtms: - nodeDirs.append(dbInst.datadir) - for dbInst in dbNode.coordinators: - nodeDirs.append(dbInst.datadir) - if (len(dbInst.ssdDir) != 0): - nodeDirs.append(dbInst.ssdDir) - for dbInst in dbNode.datanodes: - nodeDirs.append(dbInst.datadir) - nodeDirs.append(dbInst.xlogdir) - if (len(dbInst.ssdDir) != 0): - nodeDirs.append(dbInst.ssdDir) - for dbInst in dbNode.etcds: - nodeDirs.append(dbInst.datadir) - clusterDirs[nodeName] = nodeDirs - return clusterDirs - - def getDbNodeByName(self, name): - """ - function : Get node by name. - input : nodename - output : [] - """ - for dbNode in self.dbNodes: - if (dbNode.name == name): - return dbNode - - return None - - def getDbNodeByID(self, inputid): - """ - function : Get node by id. - input : nodename - output : [] - """ - for dbNode in self.dbNodes: - if (dbNode.id == inputid): - return dbNode - - return None - - def getMirrorInstance(self, mirrorId): - """ - function : Get primary instance and standby instance. - input : String - output : [] - """ - instances = [] - - for dbNode in self.dbNodes: - for inst in dbNode.cmservers: - if (inst.mirrorId == mirrorId): - instances.append(inst) - - for inst in dbNode.gtms: - if (inst.mirrorId == mirrorId): - instances.append(inst) - - for inst in dbNode.coordinators: - if (inst.mirrorId == mirrorId): - instances.append(inst) - - for inst in dbNode.datanodes: - if (inst.mirrorId == mirrorId): - instances.append(inst) - - return instances - - def getPeerInstance(self, dbInst): - """ - function : Get peer instance of specified instance. - input : [] - output : [] - """ - instances = [] - for dbNode in self.dbNodes: - for inst in dbNode.datanodes: - if (inst.mirrorId == dbInst.mirrorId and - inst.instanceId != dbInst.instanceId): - instances.append(inst) - - return instances - - def getClusterBackIps(self): - """ - function : Get cluster back IP. - input : NA - output : [] - """ - backIps = [] - backIpNum = [] - # get backIp number - for dbNode in self.dbNodes: - backIpNum.append(len(dbNode.backIps)) - if max(backIpNum) != min(backIpNum): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51227"] % "backIps") - for num in range(backIpNum[0]): - ips = [] - for dbNode in self.dbNodes: - ips.append(dbNode.backIps[num]) - backIps.extend(ips) - return backIps - - def getClusterSshIps(self): - """ - function : Get cluster ssh IP. - input : NA - output : [] - """ - sshIps = [] - sshIpNum = [] - # get sshIp number - for dbNode in self.dbNodes: - sshIpNum.append(len(dbNode.sshIps)) - if max(sshIpNum) != min(sshIpNum): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51227"] % "sshIps") - for num in range(sshIpNum[0]): - ips = [] - for dbNode in self.dbNodes: - ips.append(dbNode.sshIps[num]) - sshIps.append(ips) - return sshIps - - def getazNames(self): - """ - """ - azMap = {} - azNames = [] - for dbNode in self.dbNodes: - azMap[dbNode.azName] = [] - if (dbNode.azName not in azNames): - azNames.append(dbNode.azName) - for dbNode in self.dbNodes: - azMap[dbNode.azName].append(dbNode.azPriority) - for azName in azNames: - azMap[azName] = max(azMap[azName]) - azNames = sorted(azMap, key=lambda x: azMap[x]) - return azNames - - def getNodeNameByBackIp(self, backIp): - """ - function : Get Nodename by backip. - input : String - output : String - """ - nodeName = "" - for dbNode in self.dbNodes: - if (backIp in dbNode.backIps): - nodeName = dbNode.name - break - return nodeName - - def __checkInstancePortandIP(self): - """ - function : Check instance Port and IP. - input : NA - output : NA - """ - nodeipport = {} - for dbNode in self.dbNodes: - nodeips = [] - nodeports = [] - cmsListenIPs = [] - ipCheckMap = {} - backIP1 = dbNode.backIps[0] - nodeips.extend(dbNode.backIps) - nodeips.extend(dbNode.sshIps) - # get node ip and node port from cmserver - for cmsInst in dbNode.cmservers: - nodeips.extend(cmsInst.listenIps) - nodeips.extend(cmsInst.haIps) - cmsListenIPs = cmsInst.listenIps - ipCheckMap["cmServerListenIp1"] = cmsInst.listenIps[0] - ipCheckMap["cmServerHaIp1"] = cmsInst.haIps[0] - nodeports.append(cmsInst.port) - nodeports.append(cmsInst.haPort) - # get node ip and node port from gtm - for gtmInst in dbNode.gtms: - nodeips.extend(gtmInst.listenIps) - nodeips.extend(gtmInst.haIps) - nodeports.append(gtmInst.port) - nodeports.append(gtmInst.haPort) - # get node ip and node port from cn - for cooInst in dbNode.coordinators: - nodeips.extend(cooInst.listenIps) - nodeips.extend(cooInst.haIps) - nodeports.append(cooInst.port) - nodeports.append(cooInst.haPort) - # get node ip and node port from dn - for dnInst in dbNode.datanodes: - nodeips.extend(dnInst.listenIps) - nodeips.extend(dnInst.haIps) - nodeports.append(dnInst.port) - nodeports.append(dnInst.haPort) - if (self.checkSctpPort): - nodeports.append(dnInst.port + - dbNode.getDnNum(dnInst.instanceType) * 2) - # get node ip and node port from etcd - for etcdInst in dbNode.etcds: - nodeips.extend(etcdInst.listenIps) - nodeips.extend(etcdInst.haIps) - nodeports.append(etcdInst.port) - nodeports.append(etcdInst.haPort) - ipCheckMap["etcdListenIp1"] = etcdInst.listenIps[0] - ipCheckMap["etcdHaIp1"] = etcdInst.haIps[0] - if (len(etcdInst.listenIps) > 1): - etcdListenIp2 = etcdInst.listenIps[1] - if (etcdListenIp2 != backIP1): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % ( - "%s with etcdListenIp2" % etcdListenIp2) + - " Error: \nThe IP address must be " - "the same as the backIP1 %s." % - backIP1) - - # CMS IP must be consistent with CMA IP - cmaListenIPs = dbNode.cmagents[0].listenIps - if (cmsListenIPs and cmsListenIPs != cmaListenIPs): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % ( - "%s with cm_server" % cmsListenIPs) + - " Error: \nThe IP address must be the same " - "as the cm_agent %s." % cmaListenIPs) - if (g_networkType == 1): - # Check - ipCheckMap["cmAgentConnectIp1"] = cmaListenIPs[0] - if (len(set(ipCheckMap.values())) != 1): - errMsg = " Error: \nThe following IPs must be consistent:" - for ipConfigItem in list(ipCheckMap.keys()): - errMsg += "\n%s: %s" % ( - ipConfigItem, ipCheckMap[ipConfigItem]) - raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % ( - "with cm and etcd") + errMsg) - # create a dictionary - nodeipport[dbNode.name] = [nodeips, nodeports] - # check port and ip - self.__checkPortandIP(nodeips, nodeports, dbNode.name) - return nodeipport - - def __checkPortandIP(self, ips, ports, name): - """ - function : Check port and IP. - input : String,int,string - output : NA - """ - ipsCopy = list(set(ips)) - portsCopy = list(set(ports)) - for port in portsCopy: - if (not self.__isPortValid(port)): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51233"] - % (port, name) + " Please check it.") - - for ip in ipsCopy: - if (not self.__isIpValid(ip)): - raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + \ - "The IP address is: %s." % ip + " Please " - "check it.") - - def __readAgentConfigInfo(self): - """ - Read agent config info from xml config's tag - :return: NA - """ - # Read agent tag - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "agentToolPath", - "CLUSTER") - if (retStatus == 0): - self.agentPath = retValue.strip() - checkPathVaild(self.agentPath) - - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "agentLogPath", - "CLUSTER") - if (retStatus == 0): - self.agentLogPath = retValue.strip() - checkPathVaild(self.agentLogPath) - - def __readClusterGlobalInfo(self): - """ - Read cluster info from xml config's tag except nodeNames, - clusterRings and sqlExpandNames info - :return: NA - """ - global g_networkType - self.clusterType = CLUSTER_TYPE_SINGLE_INST - - # Read cluster name - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "clusterName", - "cluster") - if (retStatus != 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % "cluster name" + " Error: \n%s" % retValue) - self.name = retValue.strip() - checkPathVaild(self.name) - - # Read application install path - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "gaussdbAppPath", - "cluster") - if (retStatus != 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "application installation path" + " Error: \n%s" - % retValue) - self.appPath = os.path.normpath(retValue) - checkPathVaild(self.appPath) - - # Read application log path - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "gaussdbLogPath", - "cluster") - if (retStatus == 0): - self.logPath = os.path.normpath(retValue) - checkPathVaild(self.logPath) - elif (retStatus == 2): - self.logPath = "" - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "application log path" + " Error: \n%s" % retValue) - if (self.logPath == ""): - self.logPath = "/var/log/gaussdb" - if (not os.path.isabs(self.logPath)): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % \ - ("%s log path(%s)" % ( - VersionInfo.PRODUCT_NAME, self.logPath))) - - # Read network type - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "networkType", - "cluster") - if (retStatus == 0): - if (retValue.isdigit() and int(retValue) in [0, 1]): - g_networkType = int(retValue) - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "cluster network type" + " Error: \nThe " - "parameter value " - "must be 0 or 1.") - elif (retStatus == 2): - g_networkType = 0 - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "cluster network type" + " Error: \n%s" % retValue) - - if "HOST_IP" in list(os.environ.keys()): - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "corePath", - "cluster") - self.corePath = retValue - - def __getAllHostnamesFromDEVICELIST(self): - """ - function : Read all host name from - input : Na - output : str - """ - if not xmlRootNode.findall('DEVICELIST'): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51200"] % 'DEVICELIST') - DeviceArray = xmlRootNode.findall('DEVICELIST')[0] - DeviceNodeList = DeviceArray.findall('DEVICE') - allNodeName = [] - for dev in DeviceNodeList: - paramList = dev.findall('PARAM') - for param in paramList: - thisname = param.attrib['name'] - if (thisname == 'name'): - value = param.attrib['value'] - allNodeName.append(value) - return allNodeName - - def __readClusterNodeInfo(self): - """ - function : Read cluster node info. - input : NA - output : NA - """ - # read cluster node info. - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "nodeNames", - "cluster") - if (retStatus != 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % "node names" + " Error: \n%s" % retValue) - nodeNames = [] - nodeNames_tmp = retValue.split(",") - for nodename in nodeNames_tmp: - nodeNames.append(nodename.strip()) - if (len(nodeNames) == 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "cluster configuration" + " There is no node in " - "cluster configuration" - " file.") - - if (len(nodeNames) != len(list(set(nodeNames)))): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "cluster configuration" + " There contains " - "repeated node in " - "cluster configuration " - "file.") - - # Check node names - nodeNameList = self.__getAllHostnamesFromDEVICELIST() - if len(nodeNameList) != len(nodeNames): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51236"] + \ - " The number of nodeNames and DEVICE are not " - "same.") - for nodeName in nodeNames: - if nodeName not in nodeNameList: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51236"] + \ - " Can not found DEVICE for [%s]." % nodeName) - # Get basic info of node: name, ip and master instance number etc. - self.dbNodes = [] - i = 1 - for name in nodeNames: - dbNode = dbNodeInfo(i, name) - self.__readNodeBasicInfo(dbNode, nodeNames) - self.dbNodes.append(dbNode) - i += 1 - - # Get cm server info - for dbNode in self.dbNodes: - self.__readCmsConfig(dbNode) - - # Get datanode info - for dbNode in self.dbNodes: - self.__readDataNodeConfig(dbNode) - - # Get cm agent info - for dbNode in self.dbNodes: - self.__readCmaConfig(dbNode) - - # set DB port for OLAP - for node in self.dbNodes: - for inst in node.datanodes: - inst.azName = node.azName - self.__setNodePortForSinglePrimaryMultiStandby() - - def __getPeerInstance(self, dbInst): - """ - function : Get peer instance of specified instance. - input : [] - output : [] - """ - instances = [] - if (dbInst.instanceRole == INSTANCE_ROLE_CMSERVER): - for dbNode in self.dbNodes: - for inst in dbNode.cmservers: - if (inst.mirrorId == dbInst.mirrorId and - inst.instanceId != dbInst.instanceId): - instances.append(inst) - elif (dbInst.instanceRole == INSTANCE_ROLE_GTM): - for dbNode in self.dbNodes: - for inst in dbNode.gtms: - if (inst.mirrorId == dbInst.mirrorId and - inst.instanceId != dbInst.instanceId): - instances.append(inst) - elif (dbInst.instanceRole == INSTANCE_ROLE_COODINATOR): - for dbNode in self.dbNodes: - for inst in dbNode.coordinators: - if (inst.mirrorId == dbInst.mirrorId and - inst.instanceId != dbInst.instanceId): - instances.append(inst) - elif (dbInst.instanceRole == INSTANCE_ROLE_DATANODE): - for dbNode in self.dbNodes: - for inst in dbNode.datanodes: - if (inst.mirrorId == dbInst.mirrorId and - inst.instanceId != dbInst.instanceId): - instances.append(inst) - - return instances - - def __setNodePortForSinglePrimaryMultiStandby(self): - """ - function : set the standy DB port. - input : [] - output : NA - """ - for dbNode in self.dbNodes: - i = 0 - for dbInst in dbNode.datanodes: - if (dbInst.instanceType == MASTER_INSTANCE): - dbInst.port = dbNode.masterBasePorts[ - INSTANCE_ROLE_DATANODE] + i * \ - PORT_STEP_SIZE - dbInst.haPort = dbInst.port + 1 - peerInsts = self.__getPeerInstance(dbInst) - for j in range(len(peerInsts)): - peerInsts[j].port = dbInst.port - peerInsts[j].haPort = peerInsts[j].port + 1 - i += 1 - # flush CMSERVER instance port - i = 0 - cmsbaseport = 0 - for dbInst in dbNode.cmservers: - if (dbInst.instanceType == MASTER_INSTANCE): - cmsbaseport = dbNode.masterBasePorts[ - INSTANCE_ROLE_CMSERVER] - dbInst.port = cmsbaseport + i * PORT_STEP_SIZE - dbInst.haPort = dbInst.port + 1 - peerInsts = self.__getPeerInstance(dbInst) - for j in range(len(peerInsts)): - peerInsts[j].port = cmsbaseport - peerInsts[j].haPort = peerInsts[j].port + 1 - i += 1 - # flush GTM instance port - i = 0 - gtmbaseport = 0 - for dbInst in dbNode.gtms: - if (dbInst.instanceType == MASTER_INSTANCE): - gtmbaseport = dbNode.masterBasePorts[INSTANCE_ROLE_GTM] - dbInst.port = gtmbaseport + i * PORT_STEP_SIZE - dbInst.haPort = dbInst.port + 1 - peerInsts = self.__getPeerInstance(dbInst) - for j in range(len(peerInsts)): - peerInsts[j].port = gtmbaseport - peerInsts[j].haPort = peerInsts[j].port + 1 - i += 1 - - def __readExpandNodeInfo(self): - """ - function : Read expand node info. - input : NA - output : NA - """ - # read expand node info. - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "sqlExpandNames", - "cluster") - if (retStatus != 0 or retValue.strip() == ""): - return - nodeNames = [] - nodeNames_tmp = retValue.split(",") - for nodename in nodeNames_tmp: - nodeNames.append(nodename.strip()) - if (len(nodeNames) == 0): - return - - for nodeName in nodeNames: - dbNode = self.getDbNodeByName(nodeName) - if (dbNode is not None): - self.newNodes.append(dbNode) - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % "expand nodes configuration" + - " There is no node [%s] in cluster " - "configuration file." % nodeName) - - def __readClusterRingsInfo(self): - """ - function : Read cluster rings info. - input : NA - output : NA - """ - # read cluster rings info. - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, - "clusterRings", - "cluster") - if (retStatus != 0 or retValue.strip() == ""): - return - rings = retValue.split(";") - if (len(rings) == 0): - return - for ring in rings: - ring_tmp = [] - ring_new = ring.strip().split(",") - for ring_one in ring_new: - ring_tmp.append(ring_one.strip()) - self.clusterRings.append(ring_tmp) - - def __readNodeBasicInfo(self, dbNode, nodenames): - """ - function : Read basic info of specified node. - input : [] - output : NA - """ - # get backIp - dbNode.backIps = self.__readNodeIps(dbNode.name, "backIp") - if (len(dbNode.backIps) == 0): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51207"] % dbNode.name) - # get sshIp - dbNode.sshIps = self.__readNodeIps(dbNode.name, "sshIp") - if (len(dbNode.sshIps) == 0): - dbNode.sshIps = dbNode.backIps[:] - # get virtualIp - dbNode.virtualIp = self.__readVirtualIp(dbNode.name, "virtualIp") - - # Get cm_server number - dbNode.cmsNum = self.__readNodeIntValue(dbNode.name, "cmsNum", True, 0) - # Get gtm number - dbNode.gtmNum = self.__readNodeIntValue(dbNode.name, "gtmNum", True, 0) - # Get etcd number - dbNode.etcdNum = self.__readNodeIntValue(dbNode.name, "etcdNum", True, - 0) - # Get cn number - dbNode.cooNum = self.__readNodeIntValue(dbNode.name, "cooNum", True, 0) - # Get DB number - dbNode.dataNum = self.__readNodeIntValue(dbNode.name, "dataNum", True, - 0) - - # check dataNum - if (dbNode.dataNum < 0): - raise Exception( - ErrorCode.GAUSS_512["GAUSS_51208"] % ("dn", dbNode.dataNum)) - - # Get base port - if dbNode.dataNum > 0: - dbNode.masterBasePorts[INSTANCE_ROLE_DATANODE] = \ - self.__readNodeIntValue(dbNode.name, "dataPortBase", - True, MASTER_BASEPORT_DATA) - dbNode.standbyBasePorts[INSTANCE_ROLE_DATANODE] = \ - dbNode.masterBasePorts[INSTANCE_ROLE_DATANODE] - - # Get az name - dbNode.azName = self.__readNodeStrValue(dbNode.name, "azName") - # check azName - # Get az Priority - dbNode.azPriority = self.__readNodeIntValue(dbNode.name, "azPriority", - True, 0) - #get cascadeRole - dbNode.cascadeRole = self.__readNodeStrValue(dbNode.name, "cascadeRole", - True, "off") - if (dbNode.azPriority < AZPRIORITY_MIN or - dbNode.azPriority > AZPRIORITY_MAX): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53206"] % "azPriority") - - if (dbNode.azName == ""): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51212"] % ("azName")) - if (dbNode.azPriority < 1): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51208"] - % ("azPriority", dbNode.azPriority)) - - def __getCmsCountFromWhichConfiguredNode(self, masterNode): - """ - function : get the count of cmservers if current node configured - cmserver - input : masterNode - output : cmsCount - """ - cmsList = self.__readNodeStrValue(masterNode.name, "cmServerRelation", - True, "").split(",") - if (len(cmsList) == 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % ("CMServer configuration on host [%s]" - % str(masterNode.name)) - + " The information of %s is wrong." - % "cmServerRelation") - cmsCount = len(cmsList) - return cmsCount - - def __readCmsConfig(self, masterNode): - """ - function : Read cm server config on node. - input : [] - output : NA - """ - self.__readCmsConfigForMutilAZ(masterNode) - - def __readCmsConfigForMutilAZ(self, masterNode): - """ - """ - cmsListenIps = None - cmsHaIps = None - if (masterNode.cmsNum > 0): - self.cmscount = self.__getCmsCountFromWhichConfiguredNode( - masterNode) - cmsListenIps = self.__readInstanceIps(masterNode.name, - "cmServerListenIp", - self.cmscount) - cmsHaIps = self.__readInstanceIps(masterNode.name, "cmServerHaIp", - self.cmscount) - - for i in range(masterNode.cmsNum): - level = self.__readNodeIntValue(masterNode.name, "cmServerlevel") - hostNames = [] - hostNames_tmp = \ - self.__readNodeStrValue(masterNode.name, - "cmServerRelation").split(",") - for hostname in hostNames_tmp: - hostNames.append(hostname.strip()) - - instId = self.__assignNewInstanceId(INSTANCE_ROLE_CMSERVER) - mirrorId = self.__assignNewMirrorId() - instIndex = i * self.cmscount - masterNode.appendInstance(instId, mirrorId, INSTANCE_ROLE_CMSERVER, - MASTER_INSTANCE, cmsListenIps[instIndex], - cmsHaIps[instIndex], "", "", level, - clusterType=self.clusterType) - - for j in range(1, self.cmscount): - dbNode = self.getDbNodeByName(hostNames[j]) - if dbNode is None: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % ("CMServer configuration on host [%s]" - % masterNode.name) - + " There is no host named %s." - % hostNames[j]) - instId = self.__assignNewInstanceId(INSTANCE_ROLE_CMSERVER) - instIndex += 1 - dbNode.appendInstance(instId, mirrorId, INSTANCE_ROLE_CMSERVER, - STANDBY_INSTANCE, - cmsListenIps[instIndex], - cmsHaIps[instIndex], "", "", level, - clusterType=self.clusterType) - - def __getDataNodeCount(self, masterNode): - """ - function : get the count of data nodes - input : masterNode - output : dataNodeCount - """ - dataNodeList = self.__readNodeStrValue(masterNode.name, - "dataNode1", - True, "").split(",") - dnListLen = len(dataNodeList) - dataNodeCount = (dnListLen + 1) // 2 - return dataNodeCount - - def __readDataNodeConfig(self, masterNode): - """ - function : Read datanode config on node. - input : [] - output : NA - """ - self.__readDataNodeConfigForMutilAZ(masterNode) - - def __readDataNodeConfigForMutilAZ(self, masterNode): - """ - """ - dnListenIps = None - dnHaIps = None - mirror_count_data = self.__getDataNodeCount(masterNode) - if (masterNode.dataNum > 0): - dnListenIps = self.__readInstanceIps(masterNode.name, - "dataListenIp", - masterNode.dataNum * - mirror_count_data, - True) - dnHaIps = self.__readInstanceIps(masterNode.name, "dataHaIp", - masterNode.dataNum * - mirror_count_data, - True) - - dnInfoLists = [[] for row in range(masterNode.dataNum)] - xlogInfoLists = [[] for row in range(masterNode.dataNum)] - ssdInfoList = [[] for row in range(masterNode.dataNum)] - syncNumList = [-1 for row in range(masterNode.dataNum)] - totalDnInstanceNum = 0 - # Whether the primary and standby have SET XLOG PATH , must be - # synchronized - has_xlog_path = 0 - for i in range(masterNode.dataNum): - dnInfoList = [] - key = "dataNode%d" % (i + 1) - dnInfoList_tmp = self.__readNodeStrValue(masterNode.name, - key).split(",") - for dnInfo in dnInfoList_tmp: - dnInfoList.append(dnInfo.strip()) - dnInfoListLen = len(dnInfoList) - if ((dnInfoListLen != 2 * mirror_count_data - 1)): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - ("database node configuration on host [%s]" - % masterNode.name) - + " The information of [%s] is wrong." % key) - totalDnInstanceNum += (dnInfoListLen + 1) // 2 - dnInfoLists[i].extend(dnInfoList) - - # If not set dataNodeXlogPath in xmlfile,just set - # xlogInfoListLen = 0,Used for judgement. - # If set dataNodeXlogPath in xmlfile,each datanode needs to have - # a corresponding xlogdir. - xlogInfoList = [] - xlogkey = "dataNodeXlogPath%d" % (i + 1) - xlogInfoList_tmp = self.__readNodeStrValue(masterNode.name, - xlogkey).split(",") - for xlogInfo in xlogInfoList_tmp: - xlogInfoList.append(xlogInfo.strip()) - - # This judgment is necessary,if not set dataNodeXlogPath, - # xlogInfoListLen will equal 1. - # Because dninfolist must be set, it does not need extra judgment. - if xlogInfoList_tmp == ['']: - xlogInfoListLen = 0 - else: - xlogInfoListLen = len(xlogInfoList) - - if (i == 0): - has_xlog_path = xlogInfoListLen - - if (xlogInfoListLen != has_xlog_path): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - ("database node configuration on host [%s]" - % masterNode.name) - + " The information of [%s] is wrong." - % xlogkey) - - if (xlogInfoListLen != 0 and xlogInfoListLen != ( - dnInfoListLen + 1) // 2): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - ("database node configuration on host [%s]" - % masterNode.name) - + " The information of [%s] is wrong." - % xlogkey) - xlogInfoLists[i].extend(xlogInfoList) - - key = "ssdDNDir%d" % (i + 1) - # ssd doesn't supply ,so set ssddir value to empty - ssddirList = [] - ssdInfoList[i].extend(ssddirList) - - # dataNode syncNum - key = "dataNode%d_syncNum" % (i + 1) - syncNum_temp = self.__readNodeStrValue(masterNode.name, key) - if syncNum_temp is not None: - syncNum = int(syncNum_temp) - if syncNum < 0 or syncNum >= totalDnInstanceNum: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - ("database node configuration on host [%s]" - % masterNode.name) - + " The information of [%s] is wrong." - % key) - syncNumList[i] = syncNum - - # check ip num - if (dnListenIps is not None and len(dnListenIps[0]) != 0): - colNum = len(dnListenIps[0]) - rowNum = len(dnListenIps) - for col in range(colNum): - ipNum = 0 - for row in range(rowNum): - if (dnListenIps[row][col] != ""): - ipNum += 1 - else: - break - if (ipNum != totalDnInstanceNum): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ - ("IP number of dataListenIp", - "instance number")) - - if (dnHaIps is not None and len(dnHaIps[0]) != 0): - colNum = len(dnHaIps[0]) - rowNum = len(dnHaIps) - for col in range(colNum): - ipNum = 0 - for row in range(rowNum): - if (dnHaIps[row][col] != ""): - ipNum += 1 - else: - break - if (ipNum != totalDnInstanceNum): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51637"] % \ - ("IP number of dataHaIps", - "instance number")) - - instIndex = 0 - for i in range(masterNode.dataNum): - dnInfoList = dnInfoLists[i] - - # Because xlog may not be set to prevent the array from crossing - # the boundary - if xlogInfoListLen != 0: - xlogInfoList = xlogInfoLists[i] - groupId = self.__assignNewGroupId() - if (len(ssdInfoList[i]) > 1): - ssddirList = ssdInfoList[i] - # master datanode - instId = self.__assignNewInstanceId(INSTANCE_ROLE_DATANODE) - # ssd doesn't supply ,this branch will not arrive when len( - # ssdInfoList[i]) is 0 - if (len(ssdInfoList[i]) > 1): - if (xlogInfoListLen == 0): - masterNode.appendInstance(instId, groupId, - INSTANCE_ROLE_DATANODE, - MASTER_INSTANCE, - dnListenIps[instIndex], - dnHaIps[instIndex], - dnInfoList[0], ssddirList[0], - clusterType=self.clusterType, - syncNum=syncNumList[i]) - else: - masterNode.appendInstance(instId, groupId, - INSTANCE_ROLE_DATANODE, - MASTER_INSTANCE, - dnListenIps[instIndex], - dnHaIps[instIndex], - dnInfoList[0], ssddirList[0], - clusterType=self.clusterType, - xlogdir=xlogInfoList[0], - syncNum=syncNumList[i]) - else: - if (xlogInfoListLen == 0): - masterNode.appendInstance(instId, groupId, - INSTANCE_ROLE_DATANODE, - MASTER_INSTANCE, - dnListenIps[instIndex], - dnHaIps[instIndex], - dnInfoList[0], - clusterType=self.clusterType, - syncNum=syncNumList[i]) - else: - masterNode.appendInstance(instId, groupId, - INSTANCE_ROLE_DATANODE, - MASTER_INSTANCE, - dnListenIps[instIndex], - dnHaIps[instIndex], - dnInfoList[0], - clusterType=self.clusterType, - xlogdir=xlogInfoList[0], - syncNum=syncNumList[i]) - - instIndex += 1 - - for nodeLen in range((len(dnInfoList) + 1) // 2 - 1): - dbNode = self.getDbNodeByName(dnInfoList[nodeLen * 2 + 1]) - if dbNode is None: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % ("database node configuration on " - "host [%s]" % str(masterNode.name)) - + " There is no host named %s." - % dnInfoList[nodeLen * 2 + 1]) - instId = self.__assignNewInstanceId(INSTANCE_ROLE_DATANODE) - - # ssd doesn't supply ,this branch will not arrive when len( - # ssdInfoList[i]) is 0 - if (len(ssdInfoList[i]) > 1): - if (xlogInfoListLen == 0): - dbNode.appendInstance(instId, groupId, - INSTANCE_ROLE_DATANODE, - STANDBY_INSTANCE, - dnListenIps[instIndex], - dnHaIps[instIndex], - dnInfoList[nodeLen * 2 + 2], - ssddirList[nodeLen * 2 + 1], - clusterType=self.clusterType, - syncNum=syncNumList[i]) - else: - dbNode.appendInstance(instId, groupId, - INSTANCE_ROLE_DATANODE, - STANDBY_INSTANCE, - dnListenIps[instIndex], - dnHaIps[instIndex], - dnInfoList[nodeLen * 2 + 2], - ssddirList[nodeLen * 2 + 1], - clusterType=self.clusterType, - xlogdir=xlogInfoList[ - nodeLen + 1], - syncNum=syncNumList[i]) - else: - if (xlogInfoListLen == 0): - dbNode.appendInstance(instId, groupId, - INSTANCE_ROLE_DATANODE, - STANDBY_INSTANCE, - dnListenIps[instIndex], - dnHaIps[instIndex], - dnInfoList[nodeLen * 2 + 2], - clusterType=self.clusterType, - syncNum=syncNumList[i]) - else: - dbNode.appendInstance(instId, groupId, - INSTANCE_ROLE_DATANODE, - STANDBY_INSTANCE, - dnListenIps[instIndex], - dnHaIps[instIndex], - dnInfoList[nodeLen * 2 + 2], - clusterType=self.clusterType, - xlogdir=xlogInfoList[ - nodeLen + 1], - syncNum=syncNumList[i]) - if dbNode.cascadeRole == "on": - for inst in dbNode.datanodes: - inst.instanceType = CASCADE_STANDBY - - instIndex += 1 - - for inst in masterNode.datanodes: - inst.azName = masterNode.azName - - def __readCmaConfig(self, dbNode): - """ - function : Read cm agent config on node. - input : [] - output : NA - """ - agentIps = self.__readInstanceIps(dbNode.name, "cmAgentConnectIp", 1) - instId = self.__assignNewInstanceId(INSTANCE_ROLE_CMAGENT) - dbNode.appendInstance(instId, MIRROR_ID_AGENT, INSTANCE_ROLE_CMAGENT, - INSTANCE_TYPE_UNDEFINED, agentIps[0], None, "", - clusterType=self.clusterType) - - def newInstanceId(self, instRole): - return self.__assignNewInstanceId(instRole) - - def newMirrorId(self): - return self.__assignNewMirrorId() - - def __assignNewInstanceId(self, instRole): - """ - function : Assign a new id for instance. - input : String - output : NA - """ - newId = self.__newInstanceId[instRole] - if (INSTANCE_ROLE_DATANODE == instRole): - if (newId == OLD_LAST_PRIMARYSTANDBY_BASEID_NUM): - self.__newInstanceId[instRole] = \ - self.__newInstanceId[instRole] + 1 + \ - (NEW_FIRST_PRIMARYSTANDBY_BASEID_NUM - - OLD_LAST_PRIMARYSTANDBY_BASEID_NUM) - else: - self.__newInstanceId[instRole] += 1 - else: - self.__newInstanceId[instRole] += 1 - return newId - - def __assignNewDummyInstanceId(self): - """ - function : Assign a new dummy standby instance id. - input : NA - output : NA - """ - if (self.__newDummyStandbyId == OLD_LAST_DUMMYNODE_BASEID_NUM): - self.__newDummyStandbyId = self.__newDummyStandbyId + 1 + ( - NEW_FIRST_DUMMYNODE_BASEID_NUM - - OLD_LAST_DUMMYNODE_BASEID_NUM) - else: - self.__newDummyStandbyId += 1 - return self.__newDummyStandbyId - - def __assignNewMirrorId(self): - """ - function : Assign a new mirror id. - input : NA - output : NA - """ - self.__newMirrorId += 1 - - return self.__newMirrorId - - def __assignNewGroupId(self): - """""" - self.__newGroupId += 1 - return self.__newGroupId - - def __readNodeIps(self, nodeName, prefix): - """ - function : Read ip for node, such as backIp1, sshIp1 etc.. - input : String,String - output : NA - """ - ipList = [] - n = 1 - - if (prefix == "cooListenIp"): - n = 3 - elif (prefix == "etcdListenIp"): - n = 2 - - for i in range(1, CONFIG_IP_NUM + n): - key = "%s%d" % (prefix, i) - value = self.__readNodeStrValue(nodeName, key, True, "") - if (value == ""): - break - ipList.append(value) - - return ipList - - def __readVirtualIp(self, nodeName, prefix): - """ - function : Read virtual ip only for node. - input : String,String - output : NA - """ - ipList = [] - value = self.__readNodeStrValue(nodeName, prefix, True, "") - if (value != ""): - valueIps = value.split(",") - for ip in valueIps: - ip = ip.strip() - if ip not in ipList: - ipList.append(ip) - return ipList - - def __isIpValid(self, ip): - """ - function : check if the input ip address is valid - input : String - output : NA - """ - IpValid = re.match( - "^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[" - "1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[" - "1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{" - "1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[" - "0-9]{1}|[0-9])$", - ip) - if IpValid: - if (IpValid.group() == ip): - return True - else: - return False - else: - return False - - def __isPortValid(self, port): - """ - function :Judge if the port is valid - input : int - output : boolean - """ - if (port < 0 or port > 65535): - return False - elif (port >= 0 and port <= 1023): - return False - else: - return True - - def __readInstanceIps(self, nodeName, prefix, InstCount, isDataNode=False): - """ - function :Read instance ips - input : String,String,int - output : NA - """ - multiIpList = self.__readNodeIps(nodeName, prefix) - - mutilIpCount = len(multiIpList) - if (mutilIpCount == 0): - return [[] for row in range(InstCount)] - - instanceIpList = [["" for col in range(mutilIpCount)] for row in - range(InstCount)] - for i in range(mutilIpCount): - ipList = [] - ipList_tmp = multiIpList[i].split(",") - for ip in ipList_tmp: - ipList.append(ip.strip()) - ipNum = len(ipList) - if (ipNum != InstCount): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] - % ("[%s] of node [%s]" % (prefix, nodeName)) - + " The count of IP is wrong.") - for j in range(ipNum): - instanceIpList[j][i] = ipList[j] - - return instanceIpList - - def __readNodeIntValue(self, nodeName, key, nullable=False, defValue=0): - """ - function :Read integer value of specified node - input : String,int - output : NA - """ - value = defValue - - strValue = self.__readNodeStrValue(nodeName, key, nullable, "") - if (strValue != ""): - value = int(strValue) - return value - - def __readNodeStrValue(self, nodeName, key, nullable=False, defValue=""): - """ - function : Read string of specified node - input : String,int - output : defValue - """ - (retStatus, retValue) = readOneClusterConfigItem(xmlRootNode, key, - "node", nodeName) - if (retStatus == 0): - return str(retValue).strip() - elif (retStatus == 2 and nullable): - return defValue - # When upgrade,may be not set XLOGPATH in xml.Make special judgment - # for xlog scenario. - elif (retStatus == 2 and "dataNodeXlogPath" in key): - return defValue - elif (retStatus == 2 and "syncNum" in key): - return None - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - ("[%s] of node [%s]" % (key, nodeName)) + \ - " Return status: %d. value: %s. Check whether " - "the dataNum is correct first." - % (retStatus, retValue)) - - def __checkVirtualIp(self, clusterVirtualIp, dbNode): - """ - function : Check virtual ip - input : String,int - output : NA - """ - allIps = dbNode.virtualIp[:] - allIps.extend(dbNode.backIps) - tempIps = [] - for ip in allIps: - if (not self.__isIpValid(ip)): - raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + \ - "The IP address is: %s" % ip + " Please " - "check it.") - if ip in tempIps: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] % \ - ip + " Virtual IP(s) cannot be same as back " - "IP(s).") - tempIps.append(ip) - - for ip in allIps: - if ip in clusterVirtualIp: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51224"] % ip) - clusterVirtualIp.extend(allIps) - - for dnInstance in dbNode.datanodes: - for dnIp in dnInstance.listenIps: - if (dnIp not in allIps): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51229"] % \ - (dnIp, dbNode.name) + "Please check it.") - - def checkDbNodes(self): - """ - """ - if (len(self.dbNodes) > MIRROR_COUNT_NODE_MAX): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ - ("nodes", - "be less than or equal to %s" % - MIRROR_COUNT_NODE_MAX) + " Please set it.") - - def checkCmsNumForMutilAZ(self, cmsNum): - """ - """ - if (cmsNum != 1): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ - ("CMServer group", - "equal to 1") + " Please set it.") - - def checkGtmNumForMutilAZ(self, gtmNum): - """ - """ - if (gtmNum < 0): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ - ("GTM", "be greater than 0") + " Please set it.") - - def checkCooNumForMutilAZ(self, cooNum): - """ - """ - if (cooNum <= 0): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ - ("CN", "be greater than 0") + " Please set it.") - - def checkDataNumForMutilAZ(self, dataNum): - """ - """ - if (dataNum <= 0 or dataNum > MIRROR_COUNT_DN_MAX): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ - ("DN", - "be greater than 0 and less than or equal to " - "%s" % MIRROR_COUNT_DN_MAX) + " Please set it.") - - def checkEtcdNumForMutilAZ(self, etcdNum): - """ - """ - if (etcdNum > 0): - if ( - etcdNum < MIRROR_COUNT_ETCD_MIN or etcdNum > - MIRROR_COUNT_ETCD_MAX): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % \ - ("ETCD", - "be greater than 2 and less than 8") - + " Please set it.") - - ###################################################### - def checkDnIp(self, networkSegment): - """ - """ - for dbNode in self.dbNodes: - if (dbNode.dataNum > 0): - for dn in dbNode.datanodes: - if (dn.listenIps[0].split(".")[0] != networkSegment): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51220"] - % dn.listenIps[0] - + "\nAll datanodes are not on " - "the same network segment.") - - def checkNewNodes(self): - """ - """ - if (len(self.dbNodes) - len(self.newNodes) <= 1): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51231"] - + " Please check the cluster configuration file.") - for dbNode in self.newNodes: - if (len(dbNode.cmservers) > 0 or len(dbNode.gtms) > 0 or - len(dbNode.etcds) > 0): - raise Exception( - ErrorCode.GAUSS_512["GAUSS_51215"] % dbNode.name + \ - " Please check the cluster configuration file.") - if (len(dbNode.coordinators) == 0 and len(dbNode.datanodes) == 0): - raise Exception( - ErrorCode.GAUSS_512["GAUSS_51216"] % dbNode.name + \ - " Please check the cluster configuration file.") - - def __checkAZForSingleInst(self): - """ - function : check az names and DB replication - input : NA - output : NA - """ - - # Get DB standys num - # The number of standbys for each DB instance must be the same - peerNum = 0 - for dbNode in self.dbNodes: - for inst in dbNode.datanodes: - if (inst.instanceType == MASTER_INSTANCE): - peerInsts = self.getPeerInstance(inst) - if (peerNum == 0): - peerNum = len(peerInsts) - elif (peerNum != len(peerInsts)): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53200"]) - - if peerNum > 8: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % ( - "database node standbys", "be less than 9") + " Please set it.") - - - - def __checkAZNamesWithDNReplication(self): - """ - function : check az names and DB replication - input : NA - output : NA - """ - # AZ map: name to prioritys - azMap = {} - # Get DB standys num - peerNum = 0 - for dbNode in self.dbNodes: - for inst in dbNode.datanodes: - if (inst.instanceType == MASTER_INSTANCE): - peerInsts = self.getPeerInstance(inst) - # The number of standbys for each DB instance must be - # the same - if (peerNum == 0): - peerNum = len(peerInsts) - elif (peerNum != len(peerInsts)): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53200"]) - - # Get AZ names in cluster - azNames = self.getazNames() - if (peerNum < 2 or peerNum > 7): - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] - % ("database node standbys", - "be greater than 1 and less than 8") - + " Please set it.") - # Check az names and DB replication - # When the number of standbys is less than 3, the AZ num must be 1 - # When the number of standbys is equal 3, the AZ num must be 2 or 3 - # When the number of standbys is equal 4, the AZ num must be 3 - # When the number of standbys is greater than 1 and less than 8, - # the AZ num must be 3 - if (len(azNames) != 1 and peerNum <= 2): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53201"]) - elif (len(azNames) == 1 and peerNum == 3): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53201"]) - elif (len(azNames) != 3 and peerNum == 4): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53201"]) - elif (len(azNames) != 3 and peerNum <= 7 and peerNum > 4): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53201"]) - - # Check AZ replication - self.__checkAzInfoForSinglePrimaryMultiStandby(azNames) - # Check DB peerInsts num of configuration in each az zone - self.__checkAzSycNumforDnpeerInsts(azNames) - - def __checkAzInfoForSinglePrimaryMultiStandby(self, azNames): - """ - 1.Check if AZ info with etcd number is set correctly. - 2. Check if the azPriority value is set correctly. - return: NA - """ - az1_etcd = 0 - az2_etcd = 0 - az3_etcd = 0 - az1Priority_max = 0 - az1Priority_min = 0 - az2Priority_max = 0 - az2Priority_min = 0 - az3Priority_max = 0 - az3Priority_min = 0 - az1PriorityLst = [] - az2PriorityLst = [] - az3PriorityLst = [] - syncAz = False - thirdPartAZ = False - - for dbNode in self.dbNodes: - if dbNode.azName == azNames[0]: - az1_etcd += len(dbNode.etcds) - az1PriorityLst.append(dbNode.azPriority) - if len(azNames) > 1 and dbNode.azName == azNames[1]: - syncAz = True - az2_etcd += len(dbNode.etcds) - az2PriorityLst.append(dbNode.azPriority) - if len(azNames) > 2 and dbNode.azName == azNames[2]: - thirdPartAZ = True - az3_etcd += len(dbNode.etcds) - az3PriorityLst.append(dbNode.azPriority) - - # In a primary multi-standby cluster, AZ1 has a higher priority than - # AZ2 and AZ2 has a higher priority than AZ3. - az1Priority_max = max(az1PriorityLst) - az1Priority_min = min(az1PriorityLst) - - # Each AZ requires at least one or more ETCDs. - if (az1_etcd != 3 and not syncAz and not thirdPartAZ): - raise Exception( - ErrorCode.GAUSS_532["GAUSS_53203"] % "AZ1 must be 3") - if (syncAz): - if (az1_etcd < 2 or az2_etcd < 1): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53203"] % \ - "AZ1 must be greater than 2 and the number " - "of ETCD in AZ2 must be greater than 1") - # check az2 priority - az2Priority_max = max(az2PriorityLst) - az2Priority_min = min(az2PriorityLst) - if (az1Priority_max >= az2Priority_min): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53205"] - % (azNames[0], azNames[1])) - if (thirdPartAZ): - if (az1_etcd < 2 or az2_etcd < 2 or az3_etcd < 1): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53203"] % \ - "%s and %s must be greater than 2 and the " - "number of ETCD in %s must be greater than " - "1" % (azNames[0], azNames[1], azNames[2])) - # check az3 priority - az3Priority_max = max(az3PriorityLst) - az3Priority_min = min(az3PriorityLst) - if (az2Priority_max >= az3Priority_min): - raise Exception(ErrorCode.GAUSS_532["GAUSS_53205"] - % (azNames[1], azNames[2])) - - def __checkAzSycNumforDnpeerInsts(self, azNames): - """ - function : Check if AZ info with DB number is set correctly. - input : azName List sorted by azPriority - output NA - """ - az1_datanode_num = 0 - az2_datanode_num = 0 - az3_datanode_num = 0 - syncAz = False - thirdPartAZ = False - - for dbNode in self.dbNodes: - if dbNode.azName == azNames[0]: - az1_datanode_num += len(dbNode.datanodes) - if len(azNames) > 1 and dbNode.azName == azNames[1]: - syncAz = True - az2_datanode_num += len(dbNode.datanodes) - if len(azNames) > 2 and dbNode.azName == azNames[2]: - thirdPartAZ = True - az3_datanode_num += len(dbNode.datanodes) - - # Each AZ requires at least one or more ETCDs. - if (syncAz): - if az2_datanode_num != 0 and az1_datanode_num == 0: - errmsg = ErrorCode.GAUSS_532["GAUSS_53201"] - errmsg += " The datanodes num in highest priority az[%s] " \ - "should not be 0 " % azNames[0] - errmsg += "when there are database node instances in the" \ - " lowest priority az[%s] ." % azNames[1] - raise Exception(errmsg) - if (thirdPartAZ): - if az3_datanode_num != 0 and ( - az1_datanode_num == 0 or az2_datanode_num == 0): - errmsg = ErrorCode.GAUSS_532["GAUSS_53201"] - errmsg += " The datanodes num in one of first two " \ - "priorities az[%s,%s] with higher priorities" \ - " should not be 0 " % (azNames[0], azNames[1]) - errmsg += "when there are database node instances in the" \ - " lowest priority az[%s] ." % azNames[-1] - raise Exception(errmsg) - - def __getDNPeerInstance(self, dbInst): - """ - function : Get DB peer instance of specified instance when write - static configuration file. - input : [] - output : [] - """ - instances = [] - instIdLst = [] - - for dbNode in self.dbNodes: - for inst in dbNode.datanodes: - if (inst.mirrorId == dbInst.mirrorId and inst.instanceId != - dbInst.instanceId): - instances.append(inst) - instIdLst.append(inst.instanceId) - - # In a primary multi-standby cluster, - # since the CM update system table depends on the DB read/write - # sequence in the static configuration file, - # we must sort the DN's standby list by instanceId. - if dbInst.instanceType == MASTER_INSTANCE: - instIdLst.sort() - instanceLst = [] - for instId in instIdLst: - for inst in instances: - if (inst.instanceId == instId): - instanceLst.append(inst) - return instanceLst - else: - return instances - - def saveToStaticConfig(self, filePath, localNodeId, dbNodes=None, - upgrade=False): - """ - function : Save cluster info into to static config - input : String,int - output : NA - """ - fp = None - number = None - if upgrade: - staticConfigFilePath = os.path.split(filePath)[0] - versionFile = os.path.join( - staticConfigFilePath, "upgrade_version") - version, number, commitid = VersionInfo.get_version_info( - versionFile) - try: - if (dbNodes is None): - dbNodes = self.dbNodes - g_file.createFileInSafeMode(filePath) - fp = open(filePath, "wb") - # len - info = struct.pack("I", 28) - # version - info += struct.pack("I", BIN_CONFIG_VERSION_SINGLE_INST) - # time - info += struct.pack("q", int(time.time())) - # node count - info += struct.pack("I", len(dbNodes)) - # local node - info += struct.pack("I", localNodeId) - - crc = binascii.crc32(info) - if upgrade: - if float(number) <= 92.200: - info = struct.pack("q", crc) + info - else: - info = struct.pack("I", crc) + info - else: - info = struct.pack("I", crc) + info - fp.write(info) - - for dbNode in dbNodes: - offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE - fp.seek(offset) - - info = self.__packNodeInfo(dbNode, number, upgrade=upgrade) - fp.write(info) - endBytes = PAGE_SIZE - fp.tell() % PAGE_SIZE - if (endBytes != PAGE_SIZE): - info = struct.pack("%dx" % endBytes) - fp.write(info) - fp.flush() - fp.close() - os.chmod(filePath, DIRECTORY_PERMISSION) - except Exception as e: - if fp: - fp.close() - raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \ - "static configuration file" - + " Error: \n%s" % str(e)) - - def __packNodeInfo(self, dbNode, number, upgrade=False): - """ - function : Pack the info of node - input : [] - output : String - """ - # node id - info = struct.pack("I", dbNode.id) - # node name - info += struct.pack("64s", dbNode.name.encode("utf-8")) - # az info - info += struct.pack("64s", dbNode.azName.encode("utf-8")) - info += struct.pack("I", dbNode.azPriority) - # backIp - info += self.__packIps(dbNode.backIps) - # sshIp - info += self.__packIps(dbNode.sshIps) - # cm_server - info += self.__packCmsInfo(dbNode) - # cm_agent - info += self.__packAgentInfo(dbNode) - # gtm - info += self.__packGtmInfo(dbNode) - # cancel save gtmProxy info,need a placeholder - info += self.__packGtmProxyInfo(dbNode) - # cn - info += self.__packCooInfo(dbNode) - # dn - info += self.__packDataNode(dbNode) - # etcd - info += self.__packEtcdInfo(dbNode) - # cancel save sctp begin/end port,need a placeholder - info += struct.pack("I", 0) - info += struct.pack("I", 0) - crc = binascii.crc32(info) - - if upgrade: - if float(number) <= 92.200: - return struct.pack("q", crc) + info - else: - return struct.pack("I", crc) + info - else: - return struct.pack("I", crc) + info - - def __packNodeInfoForLC(self, dbNode): - """ - function : Pack the info of node for the logic cluster - input : [] - output : String - """ - # node id - info = struct.pack("I", dbNode.id) - # node name - info += struct.pack("64s", dbNode.name.encode("utf-8")) - # backIp - info += self.__packIps(dbNode.backIps) - # sshIp - info += self.__packIps(dbNode.sshIps) - # dn - info += self.__packDataNode(dbNode) - # cancel save sctp begin/end port,need a placeholder - info += struct.pack("I", 0) - info += struct.pack("I", 0) - crc = binascii.crc32(info) - - return struct.pack("I", crc) + info - - def __packEtcdInfo(self, dbNode): - """ - function : Pack the info of etcd - input : [] - output : String - """ - n = len(dbNode.etcds) - - info = "".encode() - if (n == 0): - # etcd count - info += struct.pack("I", 0) - # etcd id - info += struct.pack("I", 0) - # etcd mirror id - info += struct.pack("i", 0) - # etcd name - info += struct.pack("64x") - # datadir - info += struct.pack("1024x") - # listen ip - info += self.__packIps([]) - # listn port - info += struct.pack("I", 0) - # ha ip - info += self.__packIps([]) - # ha port - info += struct.pack("I", 0) - elif (n == 1): - etcdInst = dbNode.etcds[0] - # etcd count - info += struct.pack("I", 1) - # etcd id - info += struct.pack("I", etcdInst.instanceId) - # etcd mirror id - info += struct.pack("i", etcdInst.mirrorId) - # etcd name - info += struct.pack("64s", "etcd_%d".encode( - "utf-8") % etcdInst.instanceId) - # datadir - info += struct.pack("1024s", etcdInst.datadir.encode("utf-8")) - # listen ip - info += self.__packIps(etcdInst.listenIps) - # listn port - info += struct.pack("I", etcdInst.port) - # ha ip - info += self.__packIps(etcdInst.haIps) - # ha port - info += struct.pack("I", etcdInst.haPort) - else: - pass - - return info - - def __packCmsInfo(self, dbNode): - """ - function : Pack the info of cm server - input : [] - output : String - """ - n = len(dbNode.cmservers) - - info = "".encode() - if (n == 0): - # cm server id - info += struct.pack("I", 0) - # cm_server mirror id - info += struct.pack("I", 0) - # datadir - info += struct.pack("1024s", dbNode.cmDataDir.encode("utf-8")) - # cm server level - info += struct.pack("I", 0) - # float ip - info += struct.pack("128x") - # listen ip - info += self.__packIps([]) - # listen port - info += struct.pack("I", 0) - # local ha ip - info += self.__packIps([]) - # local ha port - info += struct.pack("I", 0) - # is primary - info += struct.pack("I", 0) - # peer ha ip - info += self.__packIps([]) - # peer ha port - info += struct.pack("I", 0) - elif (n == 1): - cmsInst = dbNode.cmservers[0] - # cm server id - info += struct.pack("I", cmsInst.instanceId) - # cm_server mirror id - info += struct.pack("I", cmsInst.mirrorId) - # datadir - info += struct.pack("1024s", dbNode.cmDataDir.encode("utf-8")) - # cm server level - info += struct.pack("I", cmsInst.level) - info += struct.pack("128s", self.cmsFloatIp.encode("utf-8")) - # listen ip - info += self.__packIps(cmsInst.listenIps) - # listen port - info += struct.pack("I", cmsInst.port) - # local ha ip - info += self.__packIps(cmsInst.haIps) - # local ha port - info += struct.pack("I", cmsInst.haPort) - # instance type - info += struct.pack("I", cmsInst.instanceType) - instances = self.getPeerInstance(cmsInst) - peerInst = instances[0] - # peer ha ip - info += self.__packIps(peerInst.haIps) - # peer ha port - info += struct.pack("I", peerInst.haPort) - else: - pass - - return info - - def __packAgentInfo(self, dbNode): - """ - function : Pack the info of agent - input : [] - output : String - """ - n = len(dbNode.cmagents) - - info = "".encode() - if (n == 1): - cmaInst = dbNode.cmagents[0] - # Agent id - info += struct.pack("I", cmaInst.instanceId) - # Agent mirror id - info += struct.pack("i", cmaInst.mirrorId) - # agent ips - info += self.__packIps(cmaInst.listenIps) - - return info - - def __packGtmInfo(self, dbNode): - """ - function : Pack the info of gtm - input : [] - output : String - """ - n = len(dbNode.gtms) - - info = "".encode() - if (n == 0): - # gtm id - info += struct.pack("I", 0) - # gtm mirror id - info += struct.pack("I", 0) - # gtm count - info += struct.pack("I", 0) - # datadir - info += struct.pack("1024x") - # listen ip - info += self.__packIps([]) - # listn port - info += struct.pack("I", 0) - # instance type - info += struct.pack("I", 0) - # loacl ha ip - info += self.__packIps([]) - # local ha port - info += struct.pack("I", 0) - # peer gtm datadir - info += struct.pack("1024x") - # peer ha ip - info += self.__packIps([]) - # peer ha port - info += struct.pack("I", 0) - elif (n == 1): - gtmInst = dbNode.gtms[0] - # gtm id - info += struct.pack("I", gtmInst.instanceId) - # gtm mirror id - info += struct.pack("I", gtmInst.mirrorId) - # gtm count - info += struct.pack("I", 1) - # datadir - info += struct.pack("1024s", gtmInst.datadir.encode("utf-8")) - # listen ip - info += self.__packIps(gtmInst.listenIps) - # listn port - info += struct.pack("I", gtmInst.port) - # instance type - info += struct.pack("I", gtmInst.instanceType) - # loacl ha ip - info += self.__packIps(gtmInst.haIps) - # local ha port - info += struct.pack("I", gtmInst.haPort) - # peer gtm datadir - info += struct.pack("1024x") - # peer ha ip - info += self.__packIps([]) - # peer ha port - info += struct.pack("I", 0) - - else: - pass - - return info - - def __packGtmProxyInfo(self, dbNode): - """ - function : Pack the info of gtm proxy - input : [] - output : String - """ - info = "".encode() - info += struct.pack("I", 0) - info += struct.pack("I", 0) - info += struct.pack("I", 0) - info += self.__packIps([]) - info += struct.pack("I", 0) - return info - - def __packCooInfo(self, dbNode): - """ - function : Pack the info of coordinator - input : [] - output : String - """ - n = len(dbNode.coordinators) - - info = "".encode() - if (n == 0): - # coordinator id - info += struct.pack("I", 0) - # coordinator mirror id - info += struct.pack("i", 0) - # coordinator count - info += struct.pack("I", 0) - # datadir - info += struct.pack("1024x") - # ssdDir - info += struct.pack("1024x") - # listen ip - info += self.__packIps([]) - # listn port - info += struct.pack("I", 0) - # ha port - info += struct.pack("I", 0) - elif (n == 1): - cooInst = dbNode.coordinators[0] - # coordinator id - info += struct.pack("I", cooInst.instanceId) - # coordinator mirror id - info += struct.pack("i", cooInst.mirrorId) - # coordinator count - info += struct.pack("I", 1) - # datadir - info += struct.pack("1024s", cooInst.datadir.encode("utf-8")) - # ssdDir - info += struct.pack("1024s", cooInst.ssdDir.encode("utf-8")) - # listen ip - info += self.__packIps(cooInst.listenIps) - # listn port - info += struct.pack("I", cooInst.port) - # ha port - info += struct.pack("I", cooInst.haPort) - else: - pass - - return info - - def __packDataNode(self, dbNode): - """ - function : Pack the info of datanode - input : [] - output : String - """ - - info = struct.pack("I", len(dbNode.datanodes)) - for dnInst in dbNode.datanodes: - instances = self.__getDNPeerInstance(dnInst) - # datanode id - info += struct.pack("I", dnInst.instanceId) - # datanode id - info += struct.pack("I", dnInst.mirrorId) - # datadir - info += struct.pack("1024s", dnInst.datadir.encode("utf-8")) - # xlogdir - info += struct.pack("1024s", dnInst.xlogdir.encode("utf-8")) - # ssdDir - info += struct.pack("1024s", dnInst.ssdDir.encode("utf-8")) - # listen ip - info += self.__packIps(dnInst.listenIps) - # port - info += struct.pack("I", dnInst.port) - # instance type - info += struct.pack("I", dnInst.instanceType) - # loacl ha ip - info += self.__packIps(dnInst.haIps) - # local ha port - info += struct.pack("I", dnInst.haPort) - - maxStandbyCount = MIRROR_COUNT_REPLICATION_MAX - 1 - - n = len(instances) - for i in range(n): - peerInst = instances[i] - # peer1 datadir - info += struct.pack("1024s", peerInst.datadir.encode("utf-8")) - # peer1 ha ip - info += self.__packIps(peerInst.haIps) - # peer1 ha port - info += struct.pack("I", peerInst.haPort) - # instance type - info += struct.pack("I", peerInst.instanceType) - for i in range(n, maxStandbyCount): - # peer1 datadir - info += struct.pack("1024x") - # peer1 ha ip - info += self.__packIps([]) - # peer1 ha port - info += struct.pack("I", 0) - # instance type - info += struct.pack("I", 0) - return info - - def __packIps(self, ips): - """ - function : Pack the info of ips - input : [] - output : String - """ - n = len(ips) - - info = struct.pack("I", n) - for i in range(n): - info += struct.pack("128s", ips[i].encode("utf-8")) - for i in range(n, MAX_IP_NUM): - info += struct.pack("128x") - - return info - - def saveClusterLevelData(self, rootNode, user): - """ - function : save cluster level data info. - input : documentElement, string - output : NA - """ - # Add XML comments - # Create a cluster-level information to add to the root node - clusterInfo = g_dom.createElement("CLUSTER") - rootNode.appendChild(clusterInfo) - clusterMap = {} - # get clusterInfo - clusterMap["clusterName"] = self.__getEnvironmentParameterValue( - "GS_CLUSTER_NAME", user) - clusterMap["nodeNames"] = ",".join(self.getClusterNodeNames()) - clusterMap["gaussdbAppPath"] = self.appPath - clusterMap["gaussdbLogPath"] = self.logPath - clusterMap["gaussdbToolPath"] = self.__getEnvironmentParameterValue( - "GPHOME", user) - clusterMap["tmpMppdbPath"] = self.__getEnvironmentParameterValue( - "PGHOST", user) - if len(self.newNodes) > 0: - clusterMap["sqlExpandNames"] = ",".join( - [dbNode.name for dbNode in self.newNodes]) - # save clusterInfo - for (key, value) in clusterMap.items(): - clusterInfo.appendChild(self.saveOneClusterConfigItem(key, value)) - - def saveNodeLevelData(self, rootNode): - """ - function : save node level data info. - input : documentElement - output : NA - """ - # add node-level information - # Node deployment information on each server - devlistInfo = g_dom.createElement("DEVICELIST") - rootNode.appendChild(devlistInfo) - (cmInfoMap, gtmInfoMap) = self.getCmAndGtmInfo() - i = 100000 - for dbNode in self.dbNodes: - i += 1 - # Node deployment information on the dbNode - perDevInfo = g_dom.createElement("DEVICE") - perDevInfo.setAttribute("sn", "%d" % i) - devlistInfo.appendChild(perDevInfo) - # save name, backIp, sshIp on the dbNode - perDevInfo.appendChild( - self.saveOneClusterConfigItem("name", dbNode.name)) - self.saveIPsItem(perDevInfo, "backIp", dbNode.backIps) - self.saveIPsItem(perDevInfo, "sshIp", dbNode.sshIps) - - # save CM info - self.saveCmsInfo(perDevInfo, dbNode, cmInfoMap) - # save GTM info - - self.savegGtmsInfo(perDevInfo, dbNode, gtmInfoMap) - # save CN info - - self.saveCnInfo(perDevInfo, dbNode) - # save ETCD info - - self.saveEtcdInfo(perDevInfo, dbNode) - # save DB info - self.saveDnInfo(perDevInfo, dbNode) - - def saveCmsInfo(self, devInfo, dbNode, cmInfoMap): - """ - function : get GTM instance info. - input : NA - output : NA - """ - # CM deployment information - cms_num = len(dbNode.cmservers) - # Save the CM main information on the CM master node - if cms_num > 0 and dbNode.cmservers[0].instanceType == MASTER_INSTANCE: - for key in list(cmInfoMap.keys()): - # if key is ip info, Has been saved in IP way - if key in ("cmServerListenIp", "cmServerHaIp"): - self.saveIPsItem(devInfo, key, cmInfoMap[key]) - else: - devInfo.appendChild( - self.saveOneClusterConfigItem(key, cmInfoMap[key])) - else: - # Save the cmsNum,cmDir,cmServerPortBase,cmServerPortStandby of - # CM information on the other node - devInfo.appendChild(self.saveOneClusterConfigItem("cmsNum", "0")) - for key in ("cmDir", "cmServerPortBase", "cmServerPortStandby"): - devInfo.appendChild( - self.saveOneClusterConfigItem(key, cmInfoMap[key])) - - def savegGtmsInfo(self, devInfo, dbNode, gtmInfoMap): - """ - function : get GTM instance info. - input : NA - output : NA - """ - # GTM deployment information - gtm_num = len(dbNode.gtms) - # Save the gtm main information on the gtm master node - if gtm_num > 0 and dbNode.gtms[0].instanceType == MASTER_INSTANCE: - for key in list(gtmInfoMap.keys()): - if key in ("gtmListenIp", "gtmHaIp"): - # if key is ip info, Has been saved in IP way - self.saveIPsItem(devInfo, key, gtmInfoMap[key]) - else: - devInfo.appendChild( - self.saveOneClusterConfigItem(key, gtmInfoMap[key])) - else: - # Save the gtmNum,gtmPortBase,gtmPortStandby of gtm information - # on the other node - devInfo.appendChild(self.saveOneClusterConfigItem("gtmNum", "0")) - for key in ("gtmPortBase", "gtmPortStandby"): - devInfo.appendChild( - self.saveOneClusterConfigItem(key, gtmInfoMap[key])) - - def saveCnInfo(self, devInfo, dbNode): - """ - function : get CN instance info. - input : NA - output : NA - """ - if len(dbNode.coordinators) == 0: - return - # CN deployment information - # get CN instance - cnInst = dbNode.coordinators[0] - cnInfoMap = {} - # get CN instance element - cnInfoMap["cooNum"] = '1' - cnInfoMap["cooPortBase"] = str(cnInst.port) - cnInfoMap["cooDir1"] = cnInst.datadir - # save CN instance element - for key in ["cooNum", "cooPortBase", "cooDir1"]: - devInfo.appendChild( - self.saveOneClusterConfigItem(key, cnInfoMap[key])) - # If listenIp is the same as backIp, no listenIp is generated - if dbNode.backIps != cnInst.listenIps: - self.saveIPsItem(devInfo, "cooListenIp", cnInst.listenIps) - - def saveEtcdInfo(self, devInfo, dbNode): - """ - function : get ETCD instance info. - input : NA - output : NA - """ - if len(dbNode.etcds) == 0: - return - # ETCD deployment information - # get etcd instance - etcdInst = dbNode.etcds[0] - etcdInfoMap = {} - # get etcd instance element - etcdInfoMap["etcdNum"] = '1' - etcdInfoMap["etcdListenPort"] = str(etcdInst.port) - etcdInfoMap["etcdHaPort"] = str(etcdInst.haPort) - etcdInfoMap["etcdDir1"] = etcdInst.datadir - # save etcd instance element - for key in ["etcdNum", "etcdListenPort", "etcdHaPort", "etcdDir1"]: - devInfo.appendChild(self.saveOneClusterConfigItem(key, - etcdInfoMap[ - key])) - # If listenIp is the same as backIp, no listenIp is generated - if dbNode.backIps != etcdInst.listenIps: - self.saveIPsItem(devInfo, "etcdListenIp", etcdInst.listenIps) - # If haIp is the same as backIp, no haIp is generated - if dbNode.backIps != etcdInst.haIps: - self.saveIPsItem(devInfo, "etcdHaIp", etcdInst.haIps) - - def saveDnInfo(self, devInfo, dbNode): - """ - function : get DN instance info. - input : NA - output : NA - """ - if len(dbNode.datanodes) == 0: - return - # get DN deployment information - dnInfoMap = {} - dnInfoMap["dataNum"] = str(dbNode.dataNum) - i = 0 - totalListenIps = {} - totalHaIps = {} - flag_j1 = 0 - flag_j2 = 0 - isDnPortBase = True - isDnPortStandby = True - isDnPortDummyStandby = True - for dnInst in dbNode.datanodes: - # get the first standby DN instance port on the current node - if (dnInst.instanceType == STANDBY_INSTANCE and isDnPortStandby): - dnInfoMap["dataPortStandby"] = str(dnInst.port) - isDnPortStandby = False - # get the first dummy standby DN instance port on the current node - if (dnInst.instanceType == DUMMY_STANDBY_INSTANCE and - isDnPortDummyStandby): - dnInfoMap["dataPortDummyStandby"] = str(dnInst.port) - isDnPortDummyStandby = False - - if (dnInst.instanceType == MASTER_INSTANCE): - # get the first base DN instance port on the current node - if (isDnPortBase): - dnInfoMap["dataPortBase"] = str(dnInst.port) - isDnPortBase = False - i += 1 - # get the peer instances of the master DN - instances = self.getPeerInstance(dnInst) - for inst in instances: - if (inst.instanceType == STANDBY_INSTANCE): - standby_inst = inst - elif (inst.instanceType == DUMMY_STANDBY_INSTANCE): - dummy_inst = inst - dnInfoMap["dataNode%d" % i] = "%s,%s,%s,%s,%s" \ - % (dnInst.datadir, - standby_inst.hostname, - standby_inst.datadir, - dummy_inst.hostname, - dummy_inst.datadir) - standby_node = self.getDbNodeByName(standby_inst.hostname) - dummy_node = self.getDbNodeByName(dummy_inst.hostname) - # Get DN listen IP and ha IP - for j1 in range(len(dnInst.listenIps)): - # listen IP is not generated based on the default only - # need backUp - if dnInst.listenIps[j1] != dbNode.backIps[0] or \ - standby_inst.listenIps[j1] != \ - standby_node.backIps[0] or \ - dummy_inst.listenIps[j1] != dummy_node.backIps[0]: - # single DN configure multiple listene IP - if flag_j1 == 0: - totalListenIps[j1] = ("%s,%s,%s" % ( - dnInst.listenIps[j1], - standby_inst.listenIps[j1], - dummy_inst.listenIps[j1])) - flag_j1 += 1 - else: - totalListenIps[j1] += (",%s,%s,%s" % ( - dnInst.listenIps[j1], - standby_inst.listenIps[j1], - dummy_inst.listenIps[j1])) - for j2 in range(len(dnInst.haIps)): - if dnInst.haIps[j2] != dbNode.backIps[0] or \ - standby_inst.haIps[j2] != standby_node.backIps[0] \ - or dummy_inst.haIps[j2] != dummy_node.backIps[0]: - if flag_j2 == 0: - totalHaIps[j2] = ("%s,%s,%s" % ( - dnInst.haIps[j2], standby_inst.haIps[j2], - dummy_inst.haIps[j2])) - flag_j2 += 1 - else: - totalHaIps[j2] += ("%s,%s,%s" % ( - dnInst.haIps[j2], standby_inst.haIps[j2], - dummy_inst.haIps[j2])) - for key in ["dataNum", "dataPortBase", "dataPortStandby", - "dataPortDummyStandby"]: - devInfo.appendChild( - self.saveOneClusterConfigItem(key, dnInfoMap[key])) - self.saveIPsItem(devInfo, "dataListenIp", - list(totalListenIps.values())) - self.saveIPsItem(devInfo, "dataHaIp", list(totalHaIps.values())) - for key in list(dnInfoMap.keys()): - if key not in ["dataNum", "dataPortBase", "dataPortStandby", - "dataPortDummyStandby"]: - devInfo.appendChild( - self.saveOneClusterConfigItem(key, dnInfoMap[key])) - - def getCmAndGtmInfo(self): - """ - function : get gtm and cm instance info. - input : NA - output :(MapData, MapData) - """ - cmInfoMap = {} - gtmInfoMap = {} - for dbNode in self.dbNodes: - - if len(dbNode.cmservers) > 0: - cmsInst = dbNode.cmservers[0] - if cmsInst.instanceType == MASTER_INSTANCE: - instances = self.getPeerInstance(cmsInst) - cmPeerInst = instances[0] - cmInfoMap["cmsNum"] = '1' - cmInfoMap["cmDir"] = dbNode.cmDataDir - cmInfoMap["cmServerPortBase"] = str(cmsInst.port) - cmInfoMap["cmServerPortStandby"] = str(cmPeerInst.port) - cmInfoMap["cmServerRelation"] = "%s,%s" % ( - cmsInst.hostname, cmPeerInst.hostname) - cmInfoMap["cmServerlevel"] = str(cmsInst.level) - cmInfoMap["cmServerListenIp"] = self.getIpList( - cmsInst.listenIps, cmPeerInst.listenIps, - dbNode.backIps[0]) - cmInfoMap["cmServerHaIp"] = self.getIpList( - cmsInst.haIps, cmPeerInst.haIps, dbNode.backIps[0]) - if len(dbNode.gtms) > 0: - gtmInst = dbNode.gtms[0] - if gtmInst.instanceType == MASTER_INSTANCE: - gtmPeerInst = self.getPeerInstance(gtmInst)[0] - gtmInfoMap["gtmNum"] = '1' - gtmInfoMap["gtmDir1"] = "%s,%s,%s" % ( - gtmInst.datadir, gtmPeerInst.hostname, - gtmPeerInst.datadir) - gtmInfoMap["gtmPortBase"] = str(gtmInst.port) - gtmInfoMap["gtmPortStandby"] = str(gtmPeerInst.port) - gtmInfoMap["gtmRelation"] = "%s,%s" % ( - gtmInst.hostname, gtmPeerInst.hostname) - gtmInfoMap["gtmListenIp"] = self.getIpList( - gtmInst.listenIps, gtmPeerInst.listenIps, - dbNode.backIps[0]) - gtmInfoMap["gtmHaIp"] = self.getIpList(gtmInst.haIps, - gtmPeerInst.haIps, - dbNode.backIps[0]) - - return (cmInfoMap, gtmInfoMap) - - def getIpList(self, masterInstIps, standbyInstIps, nodeBackIp): - """ - function : get ip data from master, standby instance of gtm and cm. - input : ips - output : ipList - """ - ipList = [] - for i in range(len(masterInstIps)): - if masterInstIps[i] != nodeBackIp: - ipList.append("%s,%s" % (masterInstIps[i], standbyInstIps[i])) - return ipList - - def saveIPsItem(self, devInfo, ipType, ips): - """ - function : save IP type data to XML parameter - input : ips - output : NA - """ - for i in range(len(ips)): - devInfo.appendChild( - self.saveOneClusterConfigItem("%s%d" % (ipType, i + 1), - ips[i])) - - def saveOneClusterConfigItem(self, paramName, paramValue): - """ - function : save param info and return it - input : paraName, paraValue - output : Element object - """ - paramInfo = g_dom.createElement("PARAM") - paramInfo.setAttribute("name", paramName) - paramInfo.setAttribute("value", paramValue) - return paramInfo - - def listToCSV(self, obj): - """ - convert a list (like IPs) to comma-sep string for XML - """ - return ','.join(map(str, obj)) - - def __writeWithIndent(self, fp, line, indent): - """ - write the XML content with indentation - """ - fp.write('%s%s\n' % (' ' * indent * 2, line)) - - def generateXMLFromStaticConfigFile(self, user, static_config_file, - xmlFilePath, version=201, - newNodeNames=None): - """ - function : Generate cluster installation XML from static - configuration file - input : String,String,String - output : Cluster installation XML file - """ - fp = None - indent = 0 - if newNodeNames is None: - newNodeNames = [] - - # Write XML header - ## file permission added to make it with 600 - fp = os.fdopen(os.open(xmlFilePath, os.O_WRONLY | os.O_CREAT, - KEY_FILE_PERMISSION), "w") - self.__writeWithIndent(fp, '', - indent) - - # Get cluster info from ClusterStatic - if (static_config_file is not None): - # get cluster version - cluster_version = self.getClusterVersion(static_config_file) - self.initFromStaticConfig(user, static_config_file) - else: - cluster_version = version - # Cluster header - indent += 1 - self.__writeWithIndent(fp, '', indent) - self.__writeWithIndent(fp, '', indent) - indent += 1 - self.__writeWithIndent(fp, - '' % - self.name, - indent) - - nodeList = self.getClusterNodeNames() - nodeNames = '' - for item in nodeList: - nodeNames += str(item) + "," - nodeNames = nodeNames[:-1] - backIps = ",".join([node.backIps[0] for node in self.dbNodes]) - self.__writeWithIndent(fp, - '' % - nodeNames, - indent) - self.__writeWithIndent(fp, - '' - % self.appPath, - indent) - self.__writeWithIndent(fp, - '' - % self.logPath, - indent) - self.__writeWithIndent(fp, - '' % - self.tmpPath, - indent) - self.__writeWithIndent(fp, - '' - % self.toolPath, - indent) - self.__writeWithIndent(fp, - '' % - backIps, - indent) - if newNodeNames: - self.__writeWithIndent(fp, - '' % ','.join( - newNodeNames), indent) - if self.isMiniaturizedDeployment(cluster_version): - self.__writeWithIndent(fp, '', indent) - elif self.isSinglePrimaryMultiStandbyDeployment(cluster_version): - self.__writeWithIndent(fp, - '', - indent) - indent -= 1 - self.__writeWithIndent(fp, '', indent) - self.__writeWithIndent(fp, '', indent) - - # - ctr = 1000001 - # For each node - for local_dbn in self.dbNodes: - # Device beginning - self.__writeWithIndent(fp, '' % (str(ctr)), indent) - - indent += 1 - self.__writeWithIndent(fp, '' % ( - local_dbn.name), indent) - if self.isSinglePrimaryMultiStandbyDeployment(cluster_version): - self.__writeWithIndent(fp, - '' % ( - local_dbn.azName), indent) - self.__writeWithIndent(fp, - '' % ( - local_dbn.azPriority), indent) - self.__writeWithIndent(fp, - '' % ( - self.listToCSV(local_dbn.backIps)), - indent) - self.__writeWithIndent(fp, '' % ( - self.listToCSV(local_dbn.sshIps)), indent) - self.__writeWithIndent(fp, '' % ( - local_dbn.cmDataDir), indent) - if not self.isMiniaturizedDeployment( - cluster_version) and local_dbn.virtualIp: - self.__writeWithIndent(fp, - '' - % (self.listToCSV(local_dbn.virtualIp)), - indent) - - if not self.isMiniaturizedDeployment(cluster_version): - # ETCD beginning - if (local_dbn.etcdNum > 0): - # Common part - self.__writeWithIndent(fp, '', indent) - self.__writeWithIndent(fp, - '' % ( - local_dbn.etcdNum), indent) - self.__writeWithIndent(fp, - '' - % (local_dbn.etcds[0].port), indent) - self.__writeWithIndent(fp, - '' - % (local_dbn.etcds[0].haPort), - indent) - - # Repeated part - i = 1 - for etcdInst in local_dbn.etcds: - self.__writeWithIndent(fp, '' - % (i, etcdInst.datadir), - indent) - self.__writeWithIndent(fp, - '' % ( - i, self.listToCSV( - etcdInst.listenIps)), - indent) - self.__writeWithIndent(fp, - '' % - (i, - self.listToCSV( - etcdInst.haIps)), - indent) - i += 1 - # ETCD ending - - # CM beginning - if len(local_dbn.cmservers) > 0 and \ - local_dbn.cmservers[0].instanceType == MASTER_INSTANCE: - try: - cmsInst = local_dbn.cmservers[0] - self.__writeWithIndent(fp, - '', - indent) - self.__writeWithIndent(fp, - '' - % (local_dbn.cmsNum), indent) - self.__writeWithIndent(fp, - '' - % (cmsInst.port), indent) - self.__writeWithIndent(fp, - '' - % (cmsInst.level), indent) - self.__writeWithIndent(fp, - '' % (local_dbn.cmDataDir), - indent) - if not self.isMiniaturizedDeployment(cluster_version): - peerInst_listenIps = '' - peerInst_haIps = '' - peerInst_hostname = '' - peerInst_port = 0 - masterInst = None - for peerInst in self.getPeerInstance(cmsInst): - peerInst_listenIps = peerInst_listenIps + \ - peerInst.listenIps[0] + ',' - peerInst_haIps = peerInst_haIps \ - + peerInst.haIps[0] + ',' - peerInst_port = peerInst.port - peerInst_hostname = peerInst_hostname + \ - peerInst.hostname + ',' - if peerInst.instanceType == MASTER_INSTANCE: - masterInst = peerInst - - if cmsInst.instanceType == STANDBY_INSTANCE: - peerInst_listenIps = '' - peerInst_haIps = '' - for secPeerInst in self.getPeerInstance( - masterInst): - peerInst_listenIps = peerInst_listenIps + \ - secPeerInst.listenIps[0] \ - + ',' - peerInst_haIps = peerInst_haIps + \ - secPeerInst.haIps[0] + ',' - else: - masterInst = cmsInst - - self.__writeWithIndent( - fp, '' - % (masterInst.listenIps[0], - peerInst_listenIps[:-1]), indent) - self.__writeWithIndent( - fp, '' - % (cmsInst.hostname, - peerInst_hostname[:-1]), indent) - else: - self.__writeWithIndent( - fp, '' - % (cmsInst.listenIps[0]), indent) - except IndexError: - # No CM in this instance - make blank entry... - self.__writeWithIndent( - fp, '', indent) - self.__writeWithIndent(fp, - '', - indent) - self.__writeWithIndent(fp, - '' - % (MASTER_BASEPORT_CMS), indent) - self.__writeWithIndent(fp, - '', indent) - self.__writeWithIndent(fp, - '', indent) - self.__writeWithIndent(fp, - '' - % (local_dbn.cmDataDir), indent) - if not self.isMiniaturizedDeployment(cluster_version): - self.__writeWithIndent( - fp, '' % (STANDBY_BASEPORT_CMS), - indent) - self.__writeWithIndent( - fp, '' % (local_dbn.name, - local_dbn.name), indent) - # CM ending - - # gtm beginning - if len(local_dbn.gtms) > 0 and local_dbn.gtms[0].instanceType == \ - MASTER_INSTANCE: - try: - gtmInst = local_dbn.gtms[0] - self.__writeWithIndent(fp, '', indent) - self.__writeWithIndent(fp, '' - % (local_dbn.gtmNum), indent) - self.__writeWithIndent(fp, '' - % (gtmInst.port), indent) - # No GTM in this instance - make blank entry... - if not self.isMiniaturizedDeployment(cluster_version): - peerInst_listenIps = '' - peerInst_haIps = '' - peerInst_hostname = '' - peerInst_hostname_datadir = '' - for peerInst in self.getPeerInstance(gtmInst): - peerInst_listenIps = peerInst_listenIps + \ - peerInst.listenIps[0] + ',' - peerInst_haIps = peerInst_haIps \ - + peerInst.haIps[0] + ',' - peerInst_port = peerInst.port - peerInst_hostname = peerInst_hostname + \ - peerInst.hostname + ',' - peerInst_hostname_datadir = \ - peerInst_hostname_datadir + peerInst.hostname \ - + ',' + peerInst.datadir + ',' - if not self.isSinglePrimaryMultiStandbyDeployment( - cluster_version): - self.__writeWithIndent( - fp, '' % (peerInst_port), indent) - self.__writeWithIndent(fp, - '' - % (gtmInst.haPort), indent) - self.__writeWithIndent(fp, - '' - % (gtmInst.listenIps[0], - peerInst_listenIps[:-1]), - indent) - self.__writeWithIndent(fp, - '' - % (gtmInst.haIps[0], - peerInst_haIps[:-1]), indent) - self.__writeWithIndent(fp, - '' - % (gtmInst.datadir, - peerInst_hostname_datadir[:-1]), - indent) - self.__writeWithIndent(fp, - '' - % (gtmInst.hostname, - peerInst_hostname[:-1]), - indent) - else: - self.__writeWithIndent(fp, - '' - % (gtmInst.listenIps[0]), - indent) - self.__writeWithIndent(fp, - '' - % (gtmInst.datadir), indent) - except IndexError: - self.__writeWithIndent(fp, - '', indent) - self.__writeWithIndent(fp, - '', - indent) - self.__writeWithIndent(fp, - '' - % (MASTER_BASEPORT_GTM), indent) - self.__writeWithIndent(fp, - '', indent) - self.__writeWithIndent(fp, - '', - indent) - if not self.isMiniaturizedDeployment(cluster_version): - if not self.isSinglePrimaryMultiStandbyDeployment( - cluster_version): - self.__writeWithIndent( - fp, '' % (STANDBY_BASEPORT_GTM), indent) - self.__writeWithIndent(fp, - '', indent) - self.__writeWithIndent(fp, - '', indent) - self.__writeWithIndent(fp, - '' - % (local_dbn.name, - local_dbn.name), indent) - # gtm ending - - # cn beginning - if (local_dbn.cooNum > 0): - for cooInst in local_dbn.coordinators: - self.__writeWithIndent(fp, '', - indent) - self.__writeWithIndent(fp, - '' - % (local_dbn.cooNum), indent) - self.__writeWithIndent(fp, '' - % (cooInst.port), indent) - self.__writeWithIndent(fp, - '' - % (self.listToCSV( - cooInst.listenIps)), indent) - self.__writeWithIndent(fp, - '' % (cooInst.datadir), - indent) - if not self.isMiniaturizedDeployment(cluster_version): - self.__writeWithIndent(fp, '', indent) - # cn ending - - # dn beginning - if (local_dbn.dataNum > 0 and local_dbn.datanodes[ - 0].instanceType == MASTER_INSTANCE): - # Find master DN - dnList = [dn for dn in local_dbn.datanodes if - dn.instanceRole == INSTANCE_ROLE_DATANODE and - dn.instanceType == MASTER_INSTANCE] - if len(dnList) == 0: - # No master DN found in this node, so skip... - indent -= 1 - self.__writeWithIndent(fp, '', indent) - ctr += 1 - continue - # Find min MasterDN port value - dnPort = dnList[0].port - for dn in dnList: - if dnPort > dn.port: - dnPort = dn.port - - if not self.isMiniaturizedDeployment(cluster_version) and not \ - self.isSinglePrimaryMultiStandbyDeployment( - cluster_version): - # Find min StandbyDN port and IP value - need to optimize - snList = [sn for sn in local_dbn.datanodes if - sn.instanceRole == INSTANCE_ROLE_DATANODE and - sn.instanceType == STANDBY_INSTANCE] - snPort = snList[0].port - for sn in snList: - if snPort > sn.port: - snPort = sn.port - - # Find min MasterDN port value - need to optimize - dsnList = [dsn for dsn in local_dbn.datanodes if - dsn.instanceRole == INSTANCE_ROLE_DATANODE and - dsn.instanceType == DUMMY_STANDBY_INSTANCE] - dsnPort = dsnList[0].port - for dsn in dsnList: - if dsnPort > dsn.port: - dsnPort = dsn.port - - if self.isSinglePrimaryMultiStandbyDeployment(cluster_version): - # Find min StandbyDN port and IP value - need to optimize - snList = [sn for sn in local_dbn.datanodes if - sn.instanceRole == INSTANCE_ROLE_DATANODE and - sn.instanceType == STANDBY_INSTANCE] - if snList: - snPort = snList[0].port - for sn in snList: - if snPort > sn.port: - snPort = sn.port - # DN common part (1/3) - self.__writeWithIndent(fp, '', indent) - self.__writeWithIndent(fp, - '' - % (local_dbn.dataNum), indent) - self.__writeWithIndent(fp, - '' % (dnPort), indent) - if not self.isMiniaturizedDeployment(cluster_version) and \ - not self.isSinglePrimaryMultiStandbyDeployment( - cluster_version): - self.__writeWithIndent(fp, - '' % (snPort), indent) - self.__writeWithIndent(fp, - '' % (dsnPort), - indent) - - i = 1 - dnInst = None - for dnInst in dnList: - if not self.isMiniaturizedDeployment(cluster_version): - # Find SNs - instances = self.getPeerInstance(dnInst) - snList = [sn for sn in instances if - sn.instanceRole == INSTANCE_ROLE_DATANODE and - sn.instanceType == STANDBY_INSTANCE] - snListenIP = '' - snHaIP = '' - snHostNm = '' - snDir = '' - sn_HostNm_Dir = '' - sn_Xlog_Dir = '' - if len(snList) == 0: - # Will it ever come here - can be removed??? - print("<> No SN found for DN(%s)" % ( - dnInst.name)) - else: - for sn in snList: - snListenIP = snListenIP + sn.listenIps[0] + ',' - snHostNm = snHostNm + sn.hostname + ',' - snDir = snDir + sn.datadir + ',' - sn_HostNm_Dir = sn_HostNm_Dir + sn.hostname \ - + ',' + sn.datadir + ',' - sn_Xlog_Dir = sn_Xlog_Dir + sn.xlogdir + ',' - snHaIP = snHaIP + sn.haIps[0] + ',' - - # Once only per Host, the ListenIP entry needs to - # be written. Part (2/3) - if i == 1: - if self.isMiniaturizedDeployment(cluster_version): - self.__writeWithIndent(fp, - '' % ( - dnInst.listenIps[0], - dnInst.listenIps[0]), - indent) - elif self.isSinglePrimaryMultiStandbyDeployment( - cluster_version): - self.__writeWithIndent( - fp, '' % (dnInst.listenIps[0], - snListenIP[:-1]), indent) - self.__writeWithIndent(fp, - '' - % (dnInst.listenIps[0], - snHaIP[:-1]), indent) - else: - self.__writeWithIndent(fp, - '' - % (dnInst.listenIps[0], - snListenIP[:-1]), indent) - # Find DSNs - if not self.isMiniaturizedDeployment(cluster_version) and \ - not self.isSinglePrimaryMultiStandbyDeployment( - cluster_version): - instances = self.getPeerInstance(dnInst) - dsnList = [dsn for dsn in instances if - dsn.instanceRole == INSTANCE_ROLE_DATANODE - and dsn.instanceType == - DUMMY_STANDBY_INSTANCE] - if len(dsnList) == 0: - # Will it ever come here - can be removed??? - print("<> No DSN found for DN(%s)" % ( - dnInst.name)) - dsnHostNm = '' - dsnDir = '' - else: - dsnHostNm = dsnList[0].hostname - dsnDir = dsnList[0].datadir - # DN repeated part (3/3) - if self.isMiniaturizedDeployment(cluster_version): - self.__writeWithIndent(fp, - '' - % (i, dnInst.datadir), indent) - elif self.isSinglePrimaryMultiStandbyDeployment( - cluster_version): - self.__writeWithIndent(fp, - '' - % (i, dnInst.datadir, - sn_HostNm_Dir[:-1]), indent) - if dnInst.xlogdir != '': - self.__writeWithIndent(fp, - '' - % (i, dnInst.xlogdir, - sn_Xlog_Dir[:-1]), - indent) - else: - self.__writeWithIndent(fp, - '' - % (i, dnInst.datadir, - snHostNm[:-1], snDir[:-1], - dsnHostNm, dsnDir), indent) - if dnInst.xlogdir != '': - self.__writeWithIndent(fp, - '' - % (i, dnInst.xlogdir, - sn_Xlog_Dir[:-1]), - indent) - self.__writeWithIndent(fp, - '' - % (i), indent) - i += 1 - if not self.isMiniaturizedDeployment(cluster_version): - self.__writeWithIndent(fp, - '' - % (dnInst.listenIps[0]), indent) - # dn ending - - # Device ending - indent -= 1 - self.__writeWithIndent(fp, '', indent) - ctr += 1 - self.__writeWithIndent(fp, '', indent) - self.__writeWithIndent(fp, '', indent) - fp.close() - - def __getInstsInNode(self, nodeName): - """ - function: get instance in specified node - input: node name - output: instances list - """ - for node in self.dbNodes: - if node.name == nodeName: - insts = node.etcds + node.cmservers + node.datanodes \ - + node.coordinators + node.gtses - return insts - return [] - - def __getAllInsts(self): - """ - function: get all instances - input: NA - output: all instances list - """ - insts = [] - for node in self.dbNodes: - insts += node.etcds + node.cmservers + node.datanodes \ - + node.coordinators + node.gtses - return insts - - def getInstances(self, nodeName=""): - """ - function: get instances in the cluster, if nodeName is specified, - return the instances in the ndoe - input: node name - output: all instances - """ - if nodeName: - insts = self.__getInstsInNode(nodeName) - else: - insts = self.__getAllInsts() - return insts - - def isSingleInstCluster(self): - return (self.clusterType == CLUSTER_TYPE_SINGLE_INST) - - def getEtcdAddress(self): - """ - function: get etcd address - input: NA - output: etcd address - """ - etcds = [] - etcdAddress = "" - for node in self.dbNodes: - etcds += node.etcds - for etcd in etcds: - etcdAddress += "https://%s:%s," % ( - etcd.listenIps[0], etcd.clientPort) - return etcdAddress.strip(",") - - def mergeClusterInfo(self, oldClusterInfo, newClusterInfo): - """ - function: get etcd address - input: NA - output: etcd address - """ - # should not modify newClusterInfo, so deepcopy - tmpClusterInfo = copy.deepcopy(newClusterInfo) - - # name/clusterName are different between old and new cluster. - # clusterType/appPath/logPath/toolPath/tmpPath are same between old - # and new cluster. - self.name = tmpClusterInfo.name - self.clusterName = tmpClusterInfo.clusterName - self.clusterType = tmpClusterInfo.clusterType - self.appPath = tmpClusterInfo.appPath - self.logPath = tmpClusterInfo.logPath - self.toolPath = tmpClusterInfo.toolPath - self.tmpPath = tmpClusterInfo.tmpPath - - # get max nodeId of old cluster. - maxNodeId = max( - [int(oldNode.id) for oldNode in oldClusterInfo.dbNodes]) - maxNodeId += 1 - - for dbNode in tmpClusterInfo.dbNodes: - # CMS/GTM/ETCD will be dropped in merged cluster. - dbNode.cmservers = [] - dbNode.gtms = [] - dbNode.etcds = [] - - # nodeId will append to old cluster. - dbNode.id = maxNodeId - maxNodeId += 1 - - self.dbNodes = oldClusterInfo.dbNodes + tmpClusterInfo.dbNodes - self.newNodes = tmpClusterInfo.dbNodes - - def isSingleNode(self): - return (self.__getDnInstanceNum() <= 1) - - def doRefreshConf(self, user, localHostName, sshtool): - self.__createDynamicConfig(user, localHostName, sshtool) - self.__create_simple_datanode_config(user, localHostName, sshtool) - self.__reset_replconninfo(user, sshtool) - - def __createDynamicConfig(self, user, localHostName, sshtool): - """ - function : Save cluster info into to dynamic config - input : String,int - output : NA - """ - # only one dn, no need to write primary or stanby node info - dynamicConfigFile = self.__getDynamicConfig(user) - if os.path.exists(dynamicConfigFile): - cmd = "rm -f %s" % dynamicConfigFile - (status, output) = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception(ErrorCode.GAUSS_504["GAUSS_50407"] + - " Error: \n%s." % str(output) + - "The cmd is %s" % cmd) - fp = None - try: - g_file.createFileInSafeMode(dynamicConfigFile) - fp = open(dynamicConfigFile, "wb") - # len - info = struct.pack("I", 24) - # version - info += struct.pack("I", BIN_CONFIG_VERSION_SINGLE_INST) - # time - info += struct.pack("q", int(time.time())) - # node count - info += struct.pack("I", len(self.dbNodes)) - crc = binascii.crc32(info) - info = struct.pack("I", crc) + info - fp.write(info) - primaryDnNum = 0 - for dbNode in self.dbNodes: - offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE - fp.seek(offset) - (primaryNodeNum, info) = self.__packDynamicNodeInfo( - dbNode, localHostName, sshtool) - primaryDnNum += primaryNodeNum - fp.write(info) - if primaryDnNum != 1: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % - ("master dn", "equal to 1")) - endBytes = PAGE_SIZE - fp.tell() % PAGE_SIZE - if endBytes != PAGE_SIZE: - info = struct.pack("%dx" % endBytes) - fp.write(info) - fp.flush() - fp.close() - os.chmod(dynamicConfigFile, KEY_FILE_PERMISSION) - except Exception as e: - if fp: - fp.close() - cmd = "rm -f %s" % dynamicConfigFile - subprocess.getstatusoutput(cmd) - raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \ - "dynamic configuration file" - + " Error: \n%s" % str(e)) - try: - self.__sendDynamicCfgToAllNodes(localHostName, - dynamicConfigFile, - dynamicConfigFile) - except Exception as e: - cmd = "rm -f %s" % dynamicConfigFile - sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames()) - raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \ - "dynamic configuration file" + - " Error: \n%s" % str(e)) - - def __create_simple_datanode_config(self, user, localhostname, sshtool): - simpleDNConfig = self.__getDynamicSimpleDNConfig(user) - if os.path.exists(simpleDNConfig): - cmd = "rm -f %s" % simpleDNConfig - (status, output) = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception(ErrorCode.GAUSS_504["GAUSS_50407"] + - " Error: \n%s." % str(output) + - "The cmd is %s" % cmd) - tempstatus = self.__getStatusByOM(user).split("|") - statusdic = {'Primary': 0, 'Standby': 1, 'Cascade': 3, 'Unknown': 9} - try: - with open(simpleDNConfig, "w") as fp: - for dninfo in tempstatus: - dnstatus = dninfo.split()[6] - dnname = dninfo.split()[1] - if dnstatus not in statusdic: - fp.write("%s=%d\n" % - (dnname, statusdic['Unknown'])) - else: - fp.write("%s=%d\n" % - (dnname, statusdic[dnstatus])) - except Exception as e: - cmd = "rm -f %s" % simpleDNConfig - subprocess.getstatusoutput(cmd) - raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % - "dynamic configuration file" - + " Error: \n%s" % str(e)) - try: - self.__sendDynamicCfgToAllNodes(localhostname, - simpleDNConfig, - simpleDNConfig) - except Exception as e: - cmd = "rm -f %s" % simpleDNConfig - sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames()) - raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % - "dynamic configuration file" + - " Error: \n%s" % str(e)) - - def __reset_replconninfo(self, user, sshtool): - # add for cascade - local_script = os.path.dirname(os.path.realpath(__file__)) \ - + '/../../local/Resetreplconninfo.py' - cmd = "python3 %s -U %s -t reset" % (local_script, user) - (status, output) = \ - sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames()) - for node in self.getClusterNodeNames(): - if status[node] != 'Success': - raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] - % cmd + "Error:\n%s" % output) - - def __packDynamicNodeInfo(self, dbNode, localHostName, sshtool): - # node id - info = struct.pack("I", dbNode.id) - # node name - info += struct.pack("64s", dbNode.name.encode("utf-8")) - info += struct.pack("I", len(dbNode.datanodes)) - primaryNum = 0 - for dnInst in dbNode.datanodes: - self.__getDnState(dnInst, dbNode, localHostName, sshtool) - instanceType = 0 - if dnInst.localRole == "Primary": - instanceType = MASTER_INSTANCE - primaryNum += 1 - elif dnInst.localRole == "Cascade Standby": - instanceType = CASCADE_STANDBY - else: - instanceType = STANDBY_INSTANCE - info += struct.pack("I", dnInst.instanceId) - # datanode id - info += struct.pack("I", dnInst.mirrorId) - # instanceType such as master, standby, dumpstandby - info += struct.pack("I", instanceType) - # datadir - info += struct.pack("1024s", dnInst.datadir.encode("utf-8")) - info += struct.pack("I", 0) - info += struct.pack("I", 0) - crc = binascii.crc32(info) - return (primaryNum, struct.pack("I", crc) + info) - - def __getClusterSwitchTime(self, dynamicConfigFile): - """ - function : get cluster version information - from static configuration file - input : String - output : version - """ - fp = None - try: - fp = open(dynamicConfigFile, "rb") - info = fp.read(24) - (crc, lenth, version, switchTime, nodeNum) = \ - struct.unpack("=IIIqi", info) - fp.close() - except Exception as e: - if fp: - fp.close() - raise Exception(ErrorCode.GAUSS_512["GAUSS_51236"] - + " Error: \n%s." % str(e)) - return switchTime - - def __getDynamicConfig(self, user): - gaussHome = self.__getEnvironmentParameterValue("GAUSSHOME", user) - if gaussHome == "": - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ - ("installation path of designated user [%s]" - % user)) - # if under upgrade, and use chose strategy, we may get a wrong path, - # so we will use the realpath of gausshome - gaussHome = os.path.realpath(gaussHome) - dynamicConfigFile = "%s/bin/cluster_dynamic_config" % gaussHome - return dynamicConfigFile - def __getDynamicSimpleDNConfig(self, user): - gaussHome = self.__getEnvironmentParameterValue("GAUSSHOME", user) - if gaussHome == "": - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ - ("installation path of designated user [%s]" - % user)) - # if under upgrade, and use chose strategy, we may get a wrong path, - # so we will use the realpath of gausshome - gaussHome = os.path.realpath(gaussHome) - dynamicSimpleDNConfigFile = "%s/bin/cluster_dnrole_config" % gaussHome - return dynamicSimpleDNConfigFile - - def dynamicConfigExists(self, user): - dynamicConfigFile = self.__getDynamicConfig(user) - return os.path.exists(dynamicConfigFile) - - def checkClusterDynamicConfig(self, user, localHostName): - """ - function : make all the node dynamic config file is newest. - input : String - output : none - """ - if self.__getDnInstanceNum() <= 1: - return - gaussHome = self.__getEnvironmentParameterValue("GAUSSHOME", user) - if gaussHome == "": - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % \ - ("installation path of designated user [%s]" - % user)) - # if under upgrade, and use chose strategy, we may get a wrong path, - # so we will use the realpath of gausshome - gaussHome = os.path.realpath(gaussHome) - dynamicConfigFile = "%s/bin/cluster_dynamic_config" % gaussHome - lastSwitchTime = 0 - lastDynamicConfigFile = "" - fileConsistent = False - fileExist = False - if os.path.exists(dynamicConfigFile): - lastSwitchTime = self.__getClusterSwitchTime(dynamicConfigFile) - lastDynamicConfigFile = dynamicConfigFile - fileExist = True - fileConsistent = True - for dbNode in self.dbNodes: - remoteDynamicConfigFile = "%s/bin/cluster_dynamic_config_%s" \ - % (gaussHome, dbNode.name) - if dbNode.name != localHostName: - cmd = "scp %s:%s %s" % ( - dbNode.name, dynamicConfigFile, remoteDynamicConfigFile) - status, output = subprocess.getstatusoutput(cmd) - if status: - if output.find("No such file or directory") >= 0: - fileConsistent = False - continue - raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd - + " Error:\n" + output) - if os.path.exists(remoteDynamicConfigFile): - fileExist = True - switchTime = self.__getClusterSwitchTime( - remoteDynamicConfigFile) - if switchTime > lastSwitchTime: - lastSwitchTime = switchTime - lastDynamicConfigFile = remoteDynamicConfigFile - fileConsistent = False - elif switchTime < lastSwitchTime: - fileConsistent = False - # if dynamic config file exist, but file time is not same, - # send the valid file to all nodes - if fileExist: - if not fileConsistent: - self.__sendDynamicCfgToAllNodes(localHostName, - lastDynamicConfigFile, - dynamicConfigFile) - cleanCmd = "rm -f %s/bin/cluster_dynamic_config_*" % gaussHome - subprocess.getstatusoutput(cleanCmd) - - def __sendDynamicCfgToAllNodes(self, - localHostName, - sourceFile, - targetFile): - status = 0 - output = "" - for dbNode in self.dbNodes: - if dbNode.name == localHostName: - if sourceFile != targetFile: - cmd = "cp -f %s %s" % (sourceFile, targetFile) - status, output = subprocess.getstatusoutput(cmd) - else: - cmd = "scp %s %s:%s" % (sourceFile, dbNode.name, targetFile) - status, output = subprocess.getstatusoutput(cmd) - if status: - raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + - " Error:\n" + output) - - def readDynamicConfig(self, user): - """ - function : read cluster information from dynamic configuration file - only used for start cluster after switchover - input : String - output : NA - """ - fp = None - try: - self.name = self.__getEnvironmentParameterValue("GS_CLUSTER_NAME", - user) - self.appPath = self.__getEnvironmentParameterValue("GAUSSHOME", - user) - logPathWithUser = self.__getEnvironmentParameterValue("GAUSSLOG", - user) - splitMark = "/%s" % user - # set log path without user - # find the path from right to left - self.logPath = \ - logPathWithUser[0:(logPathWithUser.rfind(splitMark))] - dynamicConfigFile = self.__getDynamicConfig(user) - # read dynamic_config_file - dynamicConfigFilePath = os.path.split(dynamicConfigFile)[0] - versionFile = os.path.join( - dynamicConfigFilePath, "upgrade_version") - version, number, commitid = VersionInfo.get_version_info( - versionFile) - fp = open(dynamicConfigFile, "rb") - if float(number) <= 92.200: - info = fp.read(28) - (crc, lenth, version, currenttime, nodeNum) = \ - struct.unpack("=qIIqi", info) - else: - info = fp.read(24) - (crc, lenth, version, currenttime, nodeNum) = \ - struct.unpack("=IIIqi", info) - totalMaterDnNum = 0 - for i in range(nodeNum): - offset = (fp.tell() // PAGE_SIZE + 1) * PAGE_SIZE - fp.seek(offset) - (dbNode, materDnNum) = self.__unpackDynamicNodeInfo(fp, number) - totalMaterDnNum += materDnNum - self.dbNodes.append(dbNode) - if totalMaterDnNum != 1: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51230"] % - ("master dn", "1")) - fp.close() - except Exception as e: - if fp: - fp.close() - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % - dynamicConfigFile + " Error:\n" + str(e)) - - def __unpackDynamicNodeInfo(self, fp, number): - if float(number) <= 92.200: - info = fp.read(76) - (crc, nodeId, nodeName) = struct.unpack("=qI64s", info) - else: - info = fp.read(72) - (crc, nodeId, nodeName) = struct.unpack("=II64s", info) - nodeName = nodeName.decode().strip('\x00') - dbNode = dbNodeInfo(nodeId, nodeName) - info = fp.read(4) - (dataNodeNums,) = struct.unpack("=I", info) - dbNode.datanodes = [] - materDnNum = 0 - for i in range(dataNodeNums): - dnInst = instanceInfo() - dnInst.hostname = nodeName - info = fp.read(12) - (dnInst.instanceId, dnInst.mirrorId, dnInst.instanceType) = \ - struct.unpack("=III", info) - if dnInst.instanceType == MASTER_INSTANCE: - materDnNum += 1 - elif dnInst.instanceType not in [STANDBY_INSTANCE, - DUMMY_STANDBY_INSTANCE, CASCADE_STANDBY]: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % - ("DN", dnInst.instanceType)) - info = fp.read(1024) - (datadir,) = struct.unpack("=1024s", info) - dnInst.datadir = datadir.decode().strip('\x00') - dbNode.datanodes.append(dnInst) - return (dbNode, materDnNum) -- Gitee