diff --git a/pkg/ring-controller/agent/deploymentworker.go b/pkg/ring-controller/agent/deploymentworker.go index bde839194021e76a723aeca424d674faafaa93fa..69f830e33e15294fb3d8e83c508b1498247b07e4 100644 --- a/pkg/ring-controller/agent/deploymentworker.go +++ b/pkg/ring-controller/agent/deploymentworker.go @@ -62,8 +62,8 @@ func (w *DeployWorker) doWork(pod *apiCoreV1.Pod, podInfo *podIdentifier) (bool, } // scenario check C: if current pod use chip, its' device info may not be ready // check basis: limits + annotations - if (podInfo.eventType == EventAdd || podInfo.eventType == EventUpdate) && !isPodAnnotationsReady(pod, - podInfo.String()) { + if (podInfo.eventType == EventAdd || podInfo.eventType == EventUpdate) && (!isPodAnnotationsReady(pod, + podInfo.String()) || pod.Status.PodIP == "") { return false, false } if w.configmapData.GetStatus() == ConfigmapCompleted { diff --git a/pkg/ring-controller/agent/vcjobworker.go b/pkg/ring-controller/agent/vcjobworker.go index 38f1aa109a5f85737cc5ce5f12b5a8a22a837ef4..1e3b878e6f58a9c701b03dc66c37ffc436c0281c 100644 --- a/pkg/ring-controller/agent/vcjobworker.go +++ b/pkg/ring-controller/agent/vcjobworker.go @@ -249,8 +249,8 @@ func (b *VCJobWorker) doPreCheck(pod *apiCoreV1.Pod, podInfo *podIdentifier) (bo } // scenario check C: if current pod use chip, its' device info may not be ready // check basis: limits + annotations - if (podInfo.eventType == EventAdd || podInfo.eventType == EventUpdate) && !isPodAnnotationsReady(pod, - podInfo.String()) { + if (podInfo.eventType == EventAdd || podInfo.eventType == EventUpdate) && (!isPodAnnotationsReady(pod, + podInfo.String()) || pod.Status.PodIP == "") { return false, false, fmt.Errorf("pod %s doesn't have device info, so no longer dealing with it", podInfo) } diff --git a/pkg/ring-controller/ranktable/v2/ranktable.go b/pkg/ring-controller/ranktable/v2/ranktable.go index aa31ff7af98e0467793374101dbcdc1b7905106d..b99872eeb90b2868ff5cbfa18905361dbb7243ef 100644 --- a/pkg/ring-controller/ranktable/v2/ranktable.go +++ b/pkg/ring-controller/ranktable/v2/ranktable.go @@ -56,6 +56,7 @@ func (r *RankTable) CachePodInfo(pod *apiCoreV1.Pod, instance ranktablev1.Instan // Build new server-level struct from device info server.ServerID = instance.ServerID server.PodID = pod.UID + server.ContainerIP = pod.Status.PodIP rankFactor := len(instance.Devices) if rankFactor > common.A800MaxChipNum { return fmt.Errorf("get error device num(%d), device num is too big", rankFactor) diff --git a/pkg/ring-controller/ranktable/v2/types.go b/pkg/ring-controller/ranktable/v2/types.go index b8beba60e6937fa0a30d1c1096c85da8386f07fb..d17b5c6401727f4e7d8b3981131436a07c292b29 100644 --- a/pkg/ring-controller/ranktable/v2/types.go +++ b/pkg/ring-controller/ranktable/v2/types.go @@ -33,9 +33,10 @@ type RankTable struct { // Server to hccl type Server struct { - DeviceList []*Device `json:"device"` // device list in each server - ServerID string `json:"server_id"` // server id, represented by ip address - PodID types.UID `json:"-"` // pod id, equal to the last integer of pod name + DeviceList []*Device `json:"device"` // device list in each server + ServerID string `json:"server_id"` // server id, represented by ip address + PodID types.UID `json:"-"` // pod id, equal to the last integer of pod name + ContainerIP string `json:"container_ip,omitempty"` } // Device to hccl