From 25ff10e6e7466997a140a5a79d4fa476e81f429c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Thu, 7 Mar 2024 10:52:56 +0800
Subject: [PATCH 001/302] =?UTF-8?q?=E6=9E=84=E5=BB=BA=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E9=80=82=E9=85=8D=E8=93=9D=E5=8C=BACI=E7=8E=AF=E5=A2=83?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build/build.sh                     | 12 ------------
 build/build_tf1_with_opensource.sh |  2 ++
 build/build_tf2_with_opensource.sh |  2 ++
 tests/run_python_dt.sh             |  5 +++++
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/build/build.sh b/build/build.sh
index ad7db096..0eb688fd 100644
--- a/build/build.sh
+++ b/build/build.sh
@@ -103,14 +103,8 @@ clean()
 if [ "$(uname -m)" = "x86_64" ]
 then
   echo "-----Build gen tar -----"
-  source /opt/buildtools/tf1_env/bin/activate
-  pip3 install setuptools==65.6.3
   bash ${ROOT_DIR}/build/build_tf1_with_opensource.sh
-  deactivate tf1_env
-  source /opt/buildtools/tf2_env/bin/activate
-  pip3 install setuptools==65.6.3
   bash ${ROOT_DIR}/build/build_tf2_with_opensource.sh
-  deactivate tf2_env
   gen_tar_file
   echo "-----Build gen tar finished-----"
 
@@ -121,14 +115,8 @@ fi
 if [ "$(uname -m)" = "aarch64" ]
 then
   echo "-----Build gen tar -----"
-  source /opt/buildtools/tf1_env/bin/activate
-  pip3 install setuptools==65.6.3
   bash ${ROOT_DIR}/build/build_tf1_with_opensource.sh
-  deactivate tf1_env
-  source /opt/buildtools/tf2_env/bin/activate
-  pip3 install setuptools==65.6.3
   bash ${ROOT_DIR}/build/build_tf2_with_opensource.sh
-  deactivate tf2_env
   gen_tar_file
   echo "-----Build gen tar finished-----"
 
diff --git a/build/build_tf1_with_opensource.sh b/build/build_tf1_with_opensource.sh
index 37cfcf64..ff59571c 100644
--- a/build/build_tf1_with_opensource.sh
+++ b/build/build_tf1_with_opensource.sh
@@ -60,7 +60,9 @@ prepare_pybind
 prepare_securec
 
 # 配置tf1路径
+source /opt/buildtools/tf1_env/bin/activate
 tf1_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow_core
+deactivate tf1_env
 
 project_output_path="${MxRec_DIR}"/output/
 VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
diff --git a/build/build_tf2_with_opensource.sh b/build/build_tf2_with_opensource.sh
index bf4a5b03..08aaf164 100644
--- a/build/build_tf2_with_opensource.sh
+++ b/build/build_tf2_with_opensource.sh
@@ -60,7 +60,9 @@ prepare_pybind
 prepare_securec
 
 # 配置tf2路径
+source /opt/buildtools/tf2_env/bin/activate
 tf2_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow
+deactivate tf2_env
 
 project_output_path="${MxRec_DIR}"/output/
 VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
diff --git a/tests/run_python_dt.sh b/tests/run_python_dt.sh
index e0d92666..a64a0913 100644
--- a/tests/run_python_dt.sh
+++ b/tests/run_python_dt.sh
@@ -20,6 +20,11 @@ set -e
 CUR_PATH=$(cd "$(dirname "$0")" || { warn "Failed to check path/to/run_python_dt.sh" ; exit ; } ; pwd)
 TOP_PATH="${CUR_PATH}"/../
 
+ARCH="$(uname -m)"
+if [ $ARCH == "aarch64" ]; then
+  export LD_PRELOAD=/usr/local/gcc7.3.0/lib64/libgomp.so.1
+fi
+
 # build mxRec and get output directory
 pip3 install setuptools==65.6.3
 bash "$TOP_PATH"/build/build_tf1_with_opensource.sh
-- 
Gitee


From 7bbe353809783d414ff22dfbb756fe77fbce6bbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 11 Mar 2024 19:30:29 +0800
Subject: [PATCH 002/302] =?UTF-8?q?=E5=90=8C=E6=AD=A5master=E5=88=86?=
 =?UTF-8?q?=E6=94=AF=E7=9A=84tools=E5=B7=A5=E5=85=B7=E7=9B=AE=E5=BD=95?=
 =?UTF-8?q?=E5=88=B0develop?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tools/atomic/gen_mt_data_0to1e.py             |  78 ++++
 tools/atomic/model_info.md                    |  19 +
 tools/atomic/sparse.sh                        |  60 +++
 tools/atomic/sparse_lookup.py                 | 266 ++++++++++++
 tools/atomic/sparse_lookup_with_grad.py       | 277 +++++++++++++
 tools/atomic/sparse_ops/__init__.py           |   7 +
 tools/atomic/sparse_ops/config.py             | 111 +++++
 tools/atomic/sparse_ops/ops.py                | 133 ++++++
 tools/atomic/sparse_ops/utils.py              |  23 ++
 ...3\346\236\234-tf1.15-rec0630-cann530.xlsx" | Bin 0 -> 32363 bytes
 tools/feature_admit_tools/get_hist.py         |  16 +
 tools/feature_admit_tools/static_key_count.py |  61 +++
 ...71\346\257\224\346\226\271\346\263\225.md" |  21 +
 tools/model_convert/README.md                 | 119 ++++++
 tools/model_convert/model_convert.py          | 287 +++++++++++++
 tools/model_convert/model_convert_mt_v2.py    | 246 +++++++++++
 tools/mx_rec_perf.sh                          |  71 ++++
 tools/parse_data/data_parser.py               | 133 ++++++
 tools/parse_data/run.sh                       |  11 +
 tools/perf/fast.sh                            | 391 ++++++++++++++++++
 tools/perf/host_set.sh                        |  17 +
 tools/perf/msprof.sh                          |  24 ++
 tools/perf/mt_1207.sh                         |  60 +++
 tools/perf/perf_flame_graph.sh                |  37 ++
 tools/python/images/clip_image002.jpg         | Bin 0 -> 9453 bytes
 tools/python/images/clip_image004.jpg         | Bin 0 -> 8027 bytes
 tools/python/images/clip_image006.jpg         | Bin 0 -> 21733 bytes
 tools/python/images/clip_image008.jpg         | Bin 0 -> 26810 bytes
 tools/python/images/clip_image010.jpg         | Bin 0 -> 24851 bytes
 tools/python/images/clip_image012.jpg         | Bin 0 -> 17452 bytes
 tools/python/images/clip_image014.jpg         | Bin 0 -> 18658 bytes
 tools/python/images/clip_image016.jpg         | Bin 0 -> 6056 bytes
 tools/python/images/clip_image018.gif         | Bin 0 -> 70465 bytes
 tools/python/key_2_emb_formatter.py           | 220 ++++++++++
 tools/python/optimizer_process.py             | 116 ++++++
 tools/python/readme.md                        | 110 +++++
 tools/stat_info/main.py                       | 339 +++++++++++++++
 tools/stat_info/readme.md                     |  45 ++
 38 files changed, 3298 insertions(+)
 create mode 100644 tools/atomic/gen_mt_data_0to1e.py
 create mode 100644 tools/atomic/model_info.md
 create mode 100644 tools/atomic/sparse.sh
 create mode 100644 tools/atomic/sparse_lookup.py
 create mode 100644 tools/atomic/sparse_lookup_with_grad.py
 create mode 100644 tools/atomic/sparse_ops/__init__.py
 create mode 100644 tools/atomic/sparse_ops/config.py
 create mode 100644 tools/atomic/sparse_ops/ops.py
 create mode 100644 tools/atomic/sparse_ops/utils.py
 create mode 100644 "tools/atomic/\345\216\237\345\255\220\346\265\213\350\257\225\347\273\223\346\236\234-tf1.15-rec0630-cann530.xlsx"
 create mode 100644 tools/feature_admit_tools/get_hist.py
 create mode 100644 tools/feature_admit_tools/static_key_count.py
 create mode 100644 "tools/feature_admit_tools/\347\211\271\345\276\201\345\207\206\345\205\245\347\262\276\345\272\246\345\257\271\346\257\224\346\226\271\346\263\225.md"
 create mode 100644 tools/model_convert/README.md
 create mode 100644 tools/model_convert/model_convert.py
 create mode 100644 tools/model_convert/model_convert_mt_v2.py
 create mode 100644 tools/mx_rec_perf.sh
 create mode 100644 tools/parse_data/data_parser.py
 create mode 100644 tools/parse_data/run.sh
 create mode 100644 tools/perf/fast.sh
 create mode 100644 tools/perf/host_set.sh
 create mode 100644 tools/perf/msprof.sh
 create mode 100644 tools/perf/mt_1207.sh
 create mode 100644 tools/perf/perf_flame_graph.sh
 create mode 100644 tools/python/images/clip_image002.jpg
 create mode 100644 tools/python/images/clip_image004.jpg
 create mode 100644 tools/python/images/clip_image006.jpg
 create mode 100644 tools/python/images/clip_image008.jpg
 create mode 100644 tools/python/images/clip_image010.jpg
 create mode 100644 tools/python/images/clip_image012.jpg
 create mode 100644 tools/python/images/clip_image014.jpg
 create mode 100644 tools/python/images/clip_image016.jpg
 create mode 100644 tools/python/images/clip_image018.gif
 create mode 100644 tools/python/key_2_emb_formatter.py
 create mode 100644 tools/python/optimizer_process.py
 create mode 100644 tools/python/readme.md
 create mode 100644 tools/stat_info/main.py
 create mode 100644 tools/stat_info/readme.md

diff --git a/tools/atomic/gen_mt_data_0to1e.py b/tools/atomic/gen_mt_data_0to1e.py
new file mode 100644
index 00000000..b9c89c65
--- /dev/null
+++ b/tools/atomic/gen_mt_data_0to1e.py
@@ -0,0 +1,78 @@
+import numpy as np
+import tensorflow as tf
+import random
+
+np.random.seed(0)
+
+line_per_sample = 10000
+samples_num = 10000 * 800  #
+sparse_feat_list = ['feat_ids']
+# todo
+sparse_feat_len = [100]
+
+# uniq_ratio = pd.read_csv("./uniq_ratio.csv")
+# uniq_ratio["uniq_num"] = round(uniq_ratio["uniq_ratio"] * 301)
+
+num = 0
+import sys
+
+hot_zhanbi = sys.argv[1:][0]
+hot_zhanbi = float(hot_zhanbi)/10
+print(hot_zhanbi)
+
+tfpath = "/home/insert/data"+str(hot_zhanbi)
+import os
+if not os.path.exists(tfpath):
+    os.mkdir(tfpath)
+    
+tfpath = "/home/insert/data"+str(hot_zhanbi)+"/tf"
+
+part1=np.array(random.sample(range(0 , 2), 1) ) 
+
+def write_records(writer,line_cnt,file_cnt):
+    features = {
+        'label': tf.train.Feature(
+            float_list=tf.train.FloatList(value=np.random.randint(2, size=line_per_sample).tolist()))
+    }
+
+    count = 0
+    for i, sparse_feat in enumerate(sparse_feat_list):
+        np.random.seed(count)
+        # global num
+        # print("process num:    ",num)
+        print("===sparse=", sparse_feat)
+        part2=np.array(random.sample(range(0 + 100*line_per_sample*(10*file_cnt + line_cnt),100*line_per_sample*(10* file_cnt + line_cnt+1)),int(100 * line_per_sample* (1- hot_zhanbi)) ))
+        features[sparse_feat] = tf.train.Feature(
+            int64_list=tf.train.Int64List(
+                value=part1.astype(np.int64).tolist()* int(100 * line_per_sample * hot_zhanbi) + part2.astype(np.int64).tolist())
+        )
+
+        count += 1
+    features = tf.train.Features(feature=features)
+    example = tf.train.Example(features=features)
+    writer.write(example.SerializeToString())
+
+
+def gen_tfrecords(tfpath):
+    file_cnt = 0
+    line_per_file = 10
+    line_cnt = 0
+    writer = tf.python_io.TFRecordWriter(f"{tfpath}_{file_cnt}.tfrecord")
+    sample_cnt = 0
+    while True:
+        write_records(writer,line_cnt,file_cnt)
+        line_cnt += 1
+        sample_cnt += line_per_sample
+        print(f">>>>>>>>>>>>count {sample_cnt} end.")
+        if sample_cnt == samples_num:
+            break
+        if line_cnt == line_per_file:
+            file_cnt += 1
+            line_cnt = 0
+            writer.close()
+            writer = tf.python_io.TFRecordWriter(f"{tfpath}_{file_cnt}.tfrecord")
+    writer.close()
+
+
+if __name__ == '__main__':
+    gen_tfrecords(tfpath=tfpath)
diff --git a/tools/atomic/model_info.md b/tools/atomic/model_info.md
new file mode 100644
index 00000000..a14533cc
--- /dev/null
+++ b/tools/atomic/model_info.md
@@ -0,0 +1,19 @@
+
+### 业务领域/场景
+原子操作测试
+
+### 模型框架
+TF1.15.0/TF2.6.5
+
+### 使用方法
+#### 生成数据集
+Python3  gen_mt_data_0to1e.py  5   (这里5的含义为重复度50%)
+默认生成在 /home/insert/ 路径下
+
+#### 运行测试
+Sparse.sh 需要根据实际环境进行配置
+测试 sparse lookup
+./sparse.sh  8(卡数)  sparse_lookup.py  8(emb size)  5(重复度)  1 0  0 
+
+
+
diff --git a/tools/atomic/sparse.sh b/tools/atomic/sparse.sh
new file mode 100644
index 00000000..56968da1
--- /dev/null
+++ b/tools/atomic/sparse.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+local_rank_size=$1
+host=localhost
+py=$2
+my_dim=$3
+chongfudu=$4
+all2all=$5
+pre=$6
+slp=$7
+rm -rf /root/atc_data/*
+rm -rf /root/ascend/*
+rm -rf kernel_meta_*
+
+
+export ALL2ALL=$5
+export HOST_PIPELINE_OPS_LIB_PATH=/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/libasc/libasc_ops.so
+export EMPTY_TENSOR=1
+export ENABLE_RUNTIME_V2=0
+mpi_path=/usr/local/openmpi/bin/
+so_path=/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/libasc/
+interface="enp61s0f0"
+ulimit -c 0
+export ASCEND_GLOBAL_LOG_LEVEL=0
+export TF_CPP_MIN_LOG_LEVEL=3
+export ASCEND_INSTALL_PATH=/usr/local/Ascend/latest/
+export ASCEND_HOME_PATH=${ASCEND_INSTALL_PATH}
+export ASCEND_LATEST_INSTALL_PATH=/usr/local/Ascend
+#export ASCEND_HOME_PATH=${ASCEND_INSTALL_PATH}/
+CANN_BIN_PATH=${ASCEND_HOME_PATH}/bin:${ASCEND_HOME_PATH}/compiler/ccec_compiler/bin
+CANN_PYTHONPATH=${ASCEND_HOME_PATH}/python/site-packages:${ASCEND_HOME_PATH}/opp/op_impl/built-in/ai_core/tbe #:${ASCEND_INSTALL_PATH}/tfplugin/latest/python/site-packages
+PYTHON_BIN_PATH=/usr/local/python3.7.5/bin/
+export PATH=${mpi_path}/bin:${PYTHON_BIN_PATH}:${CANN_BIN_PATH}:$PATH
+export PYTHONPATH=${PYTHONPATH}:/usr/local/Ascend/latest/python/site-packages:${so_path}:${CANN_PYTHONPATH}
+export LD_PRELOAD=/lib64/libgomp.so.1
+CANN_LD_PATH=${ASCEND_HOME_PATH}/runtime/lib64:${ASCEND_HOME_PATH}/fwkacllib/lib64:${ASCEND_HOME_PATH}/lib64:${ASCEND_HOME_PATH}/lib64/plugin/opskernel:${ASCEND_HOME_PATH}/lib64/plugin/nnengine
+export LD_LIBRARY_PATH=${so_path}:/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/libasc/:/home/insert/src/platform/securec/lib/:${CANN_LD_PATH}:/home/opensource/opensource/hdf5/lib:/usr/local/lib:/usr/local/python3.7.5/lib:$LD_LIBRARY_PATH
+export ASCEND_AICPU_PATH=${ASCEND_HOME_PATH}
+export ASCEND_OPP_PATH=${ASCEND_HOME_PATH}/opp
+export TOOLCHAIN_HOME=${ASCEND_HOME_PATH}/toolkit
+
+export BETTER_EXCEPTIONS=1
+mpi_args='-x BIND_INFO="0:48 48:48 96:48" -x SPDLOG_LEVEL=debug -bind-to none'
+# rm logs
+rm *txt >/dev/null
+rm -rf /root/ascend/log/*
+
+# rm shm
+for i in $(ipcs -m | tail -n +4 | awk {'print $2'}); do
+  ipcrm -m $i
+done
+
+num_process=${local_rank_size}
+host_string=${host//_/:${local_rank_size},node}:${local_rank_size}
+echo run in $host_string
+
+interface="lo"
+
+#python3.7 -c "import tensorflow;print(tensorflow.__path__)"
+horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
+  python3.7 ${py} --local_rank_size ${local_rank_size} --hccl_json  hccl_json_${local_rank_size}p.json --my_dim ${my_dim} --chongfudu $chongfudu  --pre $pre --slp $slp |tee temp_{$my_dim}_{$chongfudu}_{$ALL2ALL}_{$pre}_{$slp}.log
diff --git a/tools/atomic/sparse_lookup.py b/tools/atomic/sparse_lookup.py
new file mode 100644
index 00000000..570c683e
--- /dev/null
+++ b/tools/atomic/sparse_lookup.py
@@ -0,0 +1,266 @@
+import os
+import sys
+import time
+import argparse
+import numpy as np
+import tensorflow as tf
+from mpi4py import MPI  # must before emb_cache after SparseOps
+import psutil
+import sys
+from sklearn.metrics import roc_auc_score
+
+from tensorflow.python.ops import math_ops
+from tensorflow.python.framework import ops
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+from npu_bridge.hccl import hccl_ops
+from npu_bridge.estimator import npu_ops
+
+from mx_rec.graph.modifier import modify_graph_and_start_emb_cache
+from mx_rec.core.asc.manager import start_asc_pipeline
+from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
+from mx_rec.util.initialize import get_rank_size, init, clear_channel, get_rank_id, set_if_load, \
+    terminate_config_initializer
+from mx_rec.constants.constants import MxRecMode
+from mx_rec.core.embedding import create_table, sparse_lookup
+from mx_rec.util.initialize import get_ascend_global_hashtable_collection
+
+from sparse_ops.config import set_ascend_env
+
+USE_PIPELINE_TEST = False
+USE_STATIC = False
+USE_HOT = False
+USE_EXPANSION = False
+
+from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
+
+
+class WideDeep:
+    def __init__(self, input_data, feature_spec_list, hashtable):
+        self.lbl_hldr = input_data["global_labels"][0]
+        self.input_data = input_data
+        self.feature_spec_list = feature_spec_list
+        self.hash_table_list = hashtable
+        self.forward()
+
+    def forward(self):
+        for feature, hash_table in zip(self.feature_spec_list, self.hash_table_list):
+            self.embedding = sparse_lookup(hash_table, feature, 1024 * 1024 // rank_size, dim=None, is_train=True,
+                                           name="merged_embedding_lookup", modify_graph=False, batch=self.input_data)
+
+            # with tf.control_dependencies([self.embedding]):
+        self.op = self.embedding[0][0]
+        return self.op
+
+
+def input_fn_tfrecord(feature_spec_list, rank_id, local_rank_id, rank_size, data_path, file_pattern, total_batch_size,
+                      num_epochs=1, perform_shuffle=False, training=True):
+    line_per_sample = 1024 * 8
+    total_batch_size = int(total_batch_size / line_per_sample)
+    num_parallel = 8
+
+    def extract_fn(data_record):
+        features = {
+            'label': tf.FixedLenFeature(shape=(line_per_sample,), dtype=tf.float32),
+            'feat_ids': tf.FixedLenFeature(shape=(128 * line_per_sample,), dtype=tf.int64)
+        }
+        sample = tf.parse_single_example(data_record, features)
+        return sample
+
+    def reshape_fn(batch):
+        batch['label'] = tf.reshape(batch['label'], [-1, ])
+        batch['feat_ids'] = tf.reshape(batch['feat_ids'], [-1, 128])
+        return batch
+
+    all_files = os.listdir(data_path)
+    files = [os.path.join(data_path, f) for f in all_files if f.startswith(file_pattern)]
+    dataset = tf.data.TFRecordDataset(files, num_parallel_reads=num_parallel)
+    batch_size = total_batch_size // rank_size
+    dataset = dataset.shard(rank_size, rank_id)
+    dataset = dataset.repeat(num_epochs)
+    dataset = dataset.map(extract_fn, num_parallel_calls=num_parallel).batch(batch_size,
+                                                                             drop_remainder=True)
+    dataset = dataset.map(reshape_fn, num_parallel_calls=num_parallel)
+    insert_fn = get_asc_insert_func(tgt_key_specs=feature_spec_list, is_training=True, dump_graph=False)
+    dataset = dataset.map(insert_fn)
+
+    dataset = dataset.prefetch(int(100))
+    return dataset
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='base')
+    parser.add_argument('--local_rank_size')
+    parser.add_argument('--hosts')
+    parser.add_argument('--hccl_json')
+    parser.add_argument('--my_dim')
+    parser.add_argument('--chongfudu')
+    parser.add_argument('--new_key')
+    parser.add_argument('--slp')
+    args = parser.parse_args()
+    local_rank_size = int(args.local_rank_size)
+    comm = MPI.COMM_WORLD
+    rank_id = comm.Get_rank()
+    rank_size = comm.Get_size()
+    print(f"rank {rank_id}/{rank_size}")
+    local_rank_id = rank_id % local_rank_size
+    set_ascend_env(rank_id, rank_size, local_rank_size, host=args.hosts, file=args.hccl_json)
+
+    # create session
+    sess_config = tf.ConfigProto()
+    custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.parameter_map["use_off_line"].b = True
+    custom_op.parameter_map["mix_compile_mode"].b = True
+    custom_op.name = "NpuOptimizer"
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes('must_keep_origin_dtype')
+    sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    custom_op.parameter_map["enable_data_pre_proc"].b = True
+    sess_config.gpu_options.allow_growth = True
+    custom_op.parameter_map["hcom_parallel"].b = False
+    custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:fullmesh;level1:pairwise")
+
+    custom_op.parameter_map["iterations_per_loop"].i = 10
+    # custom_op.parameter_map["enable_dump"].b = True
+    # custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes("./dump")
+    # custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes("11|12")
+    # custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+    # custom_op.parameter_map["op_debug_level"].i = 0
+    custom_op.parameter_map["op_wait_timeout"].i = 500
+    custom_op.parameter_map["op_execute_timeout"].i = 500
+    custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes("op_impl_mode.ini")
+    custom_op.parameter_map["graph_memory_max_size"].s = tf.compat.as_bytes(str(30000000000))
+    custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(str(30000000000))
+    #    custom_op.parameter_map["profiling_mode"].b = True
+    #    custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(
+    #         '{"output":"/home","training_trace":"on","task_trace":"on","fp_point":"","bp_point":"","aicpu":"on","aic_metrics":"PipeUtilization"}')
+
+    global_start_time = time.time()
+    tf.set_random_seed(10086)
+    np.random.seed(10086)
+
+    my_dim = int(args.my_dim)
+    print("my_dim=", my_dim)
+
+    hot_zhanbi = args.chongfudu
+    hot_zhanbi = float(hot_zhanbi) / 10
+
+    # if hot_zhanbi == 0:
+    #    hot_zhanbi = int(hot_zhanbi)
+
+    config = {
+        "data_path": "./data1/data" + str(hot_zhanbi) + "_" + str(float(args.new_key)) + "/",
+        "train_file_pattern": "tf",
+        "test_file_pattern": "test",
+        "batch_size": 1024 * 8,
+        "field_num": 128,
+        "send_count": 1024 * 1024 // rank_size,  # 65536 * 10 > 39(field num) * 16000(bz)
+        "id_emb_dim": my_dim,
+        "ext_emb_vec_size": my_dim,
+        "train_epoch": 1,
+        "dev_vocab_size": 100000001
+    }
+
+    # model run parameter
+    print_steps = 300
+    evaluate_stride = 80000  # eval every 200 steps
+    eval_steps = -1  # 8 ranks 34
+    stop_steps = 95
+    # Hybrid step1.1: init cache
+    emb_name = "wide_deep_emb"
+
+    dev_vocab_size = config["dev_vocab_size"]  # 23120
+    host_vocab_size = 0
+
+    init(True, rank_id=rank_id, rank_size=local_rank_size, train_interval=100, eval_steps=-1,
+         prefetch_batch_number=1, use_dynamic=0, use_hot=1, use_dynamic_expansion=0)
+
+    tf.disable_eager_execution()
+    ######################################
+    feature_spec_list = [
+        FeatureSpec("feat_ids", feat_count=128, table_name="merged_sparse_embeddings", batch_size=config["batch_size"])]
+    with tf.device('/cpu:0'):
+        train_dataset = input_fn_tfrecord(feature_spec_list=feature_spec_list,
+                                          rank_id=rank_id,
+                                          local_rank_id=local_rank_id,
+                                          rank_size=rank_size,
+                                          data_path=config["data_path"],
+                                          file_pattern=config["train_file_pattern"],
+                                          total_batch_size=int(rank_size * config["batch_size"]),
+                                          perform_shuffle=(not USE_PIPELINE_TEST),
+                                          num_epochs=config["train_epoch"])
+        train_iterator = train_dataset.make_initializable_iterator()
+        train_next_iter = train_iterator.get_next()
+
+        train_input_data = {"global_labels": train_next_iter["label"],
+                            "feat_ids": train_next_iter["feat_ids"],
+                            }
+
+    sparse_hashtable = create_table(key_dtype=tf.int64,
+                                    dim=tf.TensorShape([my_dim]),
+                                    name="merged_sparse_embeddings",
+                                    emb_initializer=tf.variance_scaling_initializer(mode="fan_avg",
+                                                                                    distribution='normal', seed=0),
+                                    device_vocabulary_size=dev_vocab_size * local_rank_size,
+                                    mode=MxRecMode.mapping("ASC"))
+
+    model = WideDeep(train_input_data, feature_spec_list, [sparse_hashtable])
+    MODIFY_GRAPH_FLAG = False
+    if MODIFY_GRAPH_FLAG:
+        modify_graph_and_start_emb_cache(dump_graph=False)
+    else:
+        start_asc_pipeline()
+
+    with tf.Session(config=sess_config) as sess:
+        sess.run(tf.global_variables_initializer())
+        sess.run([train_iterator.initializer])
+        # build model
+        print("start build wdl(single domain) model")
+        print("=========start============")
+        # start run loop
+        total_start_time = time.time()
+        current_steps = 0
+        train_finished = False
+        time.sleep(int(args.slp))
+        while not train_finished:
+            try:
+                current_steps += 1
+                print("current step =", current_steps)
+                #
+                run_dict = {
+                    "adam": model.op,
+                    "lbl_hldr": model.lbl_hldr,
+                }
+                if current_steps == 1:
+                    total_start_time = time.time()
+                start_time = time.time()
+                print("start sess run")
+                results = sess.run(fetches=run_dict)
+                print("start sess run 1")
+                end_time = time.time()
+                print(f"current_steps: {current_steps} ,step time:{(end_time - start_time) * 1000}")
+                if current_steps <= 5:
+                    total_start_time = time.time()
+                if current_steps % print_steps == 0:
+                    print("----------" * 10)
+                    try:
+                        print(
+                            f"current_steps: {current_steps} ,deep_loss:{results['deep_loss']},"
+                            f"e2etime per step:{(end_time - start_time) * 1000}")
+                    except KeyError:
+                        print(f"current_steps: {current_steps}")
+                    print("----------" * 10)
+
+                if current_steps >= stop_steps:
+                    train_finished = True
+                #
+            except tf.errors.OutOfRangeError:
+                train_finished = True
+
+        # train_finished
+        # emb_cache.destroy()
+        # MPI.Finalize()
+        print(
+            f"training {current_steps} steps, consume time: {(time.time() - total_start_time) / (current_steps - 5) * 1000} ")
+
+        terminate_config_initializer()
+        # emb_cache.destroy()
+        # MPI.Finalize()
diff --git a/tools/atomic/sparse_lookup_with_grad.py b/tools/atomic/sparse_lookup_with_grad.py
new file mode 100644
index 00000000..3d7d37e5
--- /dev/null
+++ b/tools/atomic/sparse_lookup_with_grad.py
@@ -0,0 +1,277 @@
+import os
+import sys
+import time
+import argparse
+import numpy as np
+import tensorflow as tf
+from mpi4py import MPI  # must before emb_cache after SparseOps
+import psutil
+import sys
+from sklearn.metrics import roc_auc_score
+
+from tensorflow.python.ops import math_ops
+from tensorflow.python.framework import ops
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+from npu_bridge.hccl import hccl_ops
+from npu_bridge.estimator import npu_ops
+
+from mx_rec.graph.modifier import modify_graph_and_start_emb_cache
+from mx_rec.core.asc.manager import start_asc_pipeline
+from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
+from mx_rec.util.initialize import get_rank_size, init, clear_channel, get_rank_id, set_if_load, \
+    terminate_config_initializer
+from mx_rec.constants.constants import MxRecMode
+from mx_rec.core.embedding import create_table, sparse_lookup
+from mx_rec.util.initialize import get_ascend_global_hashtable_collection
+from mx_rec.optimizers.lazy_adam import CustomizedLazyAdam
+from sparse_ops.config import set_ascend_env
+
+USE_PIPELINE_TEST = False
+USE_STATIC = False
+USE_HOT = False
+USE_EXPANSION = False
+
+
+def create_hash_optimizer():
+    return CustomizedLazyAdam()
+
+
+def get_sparse_optimizer():
+    sparse_optimizer = create_hash_optimizer()
+    return sparse_optimizer
+
+
+class WideDeep:
+    def __init__(self, input_data, feature_spec_list, hashtable):
+        self.lbl_hldr = input_data["global_labels"][0]
+        self.input_data = input_data
+        self.feature_spec_list = feature_spec_list
+        self.hash_table_list = hashtable
+        self.forward()
+
+    def forward(self):
+        for feature, hash_table in zip(self.feature_spec_list, self.hash_table_list):
+            self.embedding = sparse_lookup(hash_table, feature, 1024 * 1024 // rank_size, dim=None, is_train=True,
+                                           name="merged_embedding_lookup", modify_graph=False, batch=self.input_data)
+            self.loss = tf.reduce_mean(self.embedding, axis=0)
+        with tf.control_dependencies([self.loss]):
+            self.op = tf.no_op()
+        return self.op
+
+
+def input_fn_tfrecord(feature_spec_list, rank_id, local_rank_id, rank_size, data_path, file_pattern, total_batch_size,
+                      num_epochs=1, perform_shuffle=False, training=True):
+    line_per_sample = 1024 * 8
+    total_batch_size = int(total_batch_size / line_per_sample)
+    num_parallel = 8
+
+    def extract_fn(data_record):
+        features = {
+            'label': tf.FixedLenFeature(shape=(line_per_sample,), dtype=tf.float32),
+            'feat_ids': tf.FixedLenFeature(shape=(128 * line_per_sample,), dtype=tf.int64)
+        }
+        sample = tf.parse_single_example(data_record, features)
+        return sample
+
+    def reshape_fn(batch):
+        batch['label'] = tf.reshape(batch['label'], [-1, ])
+        batch['feat_ids'] = tf.reshape(batch['feat_ids'], [-1, 128])
+        return batch
+
+    all_files = os.listdir(data_path)
+    files = [os.path.join(data_path, f) for f in all_files if f.startswith(file_pattern)]
+    dataset = tf.data.TFRecordDataset(files, num_parallel_reads=num_parallel)
+    batch_size = total_batch_size // rank_size
+    dataset = dataset.shard(rank_size, rank_id)
+    dataset = dataset.repeat(num_epochs)
+    dataset = dataset.map(extract_fn, num_parallel_calls=num_parallel).batch(batch_size,
+                                                                             drop_remainder=True)
+    dataset = dataset.map(reshape_fn, num_parallel_calls=num_parallel)
+    insert_fn = get_asc_insert_func(tgt_key_specs=feature_spec_list, is_training=True, dump_graph=False)
+    dataset = dataset.map(insert_fn)
+    dataset = dataset.prefetch(int(100))
+    return dataset
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='base')
+    parser.add_argument('--local_rank_size')
+    parser.add_argument('--hosts')
+    parser.add_argument('--hccl_json')
+    parser.add_argument('--my_dim')
+    parser.add_argument('--chongfudu')
+    parser.add_argument('--new_key')
+    parser.add_argument('--slp')
+    args = parser.parse_args()
+    local_rank_size = int(args.local_rank_size)
+    comm = MPI.COMM_WORLD
+    rank_id = comm.Get_rank()
+    rank_size = comm.Get_size()
+    print(f"rank {rank_id}/{rank_size}")
+    local_rank_id = rank_id % local_rank_size
+    set_ascend_env(rank_id, rank_size, local_rank_size, host=args.hosts, file=args.hccl_json)
+
+    # create session
+    sess_config = tf.ConfigProto()
+    custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.parameter_map["use_off_line"].b = True
+    custom_op.parameter_map["mix_compile_mode"].b = True
+    custom_op.name = "NpuOptimizer"
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes('must_keep_origin_dtype')
+    sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    custom_op.parameter_map["enable_data_pre_proc"].b = True
+    sess_config.gpu_options.allow_growth = True
+    custom_op.parameter_map["hcom_parallel"].b = False
+    custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:fullmesh;level1:pairwise")
+
+    custom_op.parameter_map["iterations_per_loop"].i = 5
+    custom_op.parameter_map["enable_dump"].b = True
+    custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes("./dump")
+    custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes("1|2")
+    custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+    custom_op.parameter_map["op_wait_timeout"].i = 500
+    custom_op.parameter_map["op_execute_timeout"].i = 500
+    custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes("op_impl_mode.ini")
+    custom_op.parameter_map["graph_memory_max_size"].s = tf.compat.as_bytes(str(30000000000))
+    custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(str(30000000000))
+
+    global_start_time = time.time()
+    tf.set_random_seed(10086)
+    np.random.seed(10086)
+
+    my_dim = int(args.my_dim)
+    print("my_dim=", my_dim)
+
+    hot_zhanbi = args.chongfudu
+    hot_zhanbi = float(hot_zhanbi) / 10
+
+    #    if hot_zhanbi == 0:
+    #       hot_zhanbi = int(hot_zhanbi)
+
+    config = {
+        "data_path": "./data1/data" + str(hot_zhanbi) + "_" + str(float(args.new_key)) + "/",
+        "train_file_pattern": "tf",
+        "test_file_pattern": "test",
+        "batch_size": 1024 * 8,
+        "field_num": 128,
+        "send_count": 1024 * 1024 // rank_size,  # 65536 * 10 > 39(field num) * 16000(bz)
+        "id_emb_dim": my_dim,
+        "ext_emb_vec_size": my_dim,
+        "train_epoch": 1,
+        "dev_vocab_size": 5000001
+    }
+
+    # model run parameter
+    print_steps = 300
+    evaluate_stride = 80000  # eval every 200 steps
+    eval_steps = -1  # 8 ranks 34
+    stop_steps = 5
+    # Hybrid step1.1: init cache
+    emb_name = "wide_deep_emb"
+
+    dev_vocab_size = config["dev_vocab_size"]  # 23120
+    host_vocab_size = 0
+
+    init(True, rank_id=rank_id, rank_size=local_rank_size, train_interval=100, eval_steps=-1,
+         prefetch_batch_number=1, use_dynamic=0, use_hot=1, use_dynamic_expansion=0)
+
+    tf.disable_eager_execution()
+    ######################################
+    feature_spec_list = [
+        FeatureSpec("feat_ids", feat_count=128, table_name="merged_sparse_embeddings", batch_size=config["batch_size"])]
+    with tf.device('/cpu:0'):
+        train_dataset = input_fn_tfrecord(feature_spec_list=feature_spec_list,
+                                          rank_id=rank_id,
+                                          local_rank_id=local_rank_id,
+                                          rank_size=rank_size,
+                                          data_path=config["data_path"],
+                                          file_pattern=config["train_file_pattern"],
+                                          total_batch_size=int(rank_size * config["batch_size"]),
+                                          perform_shuffle=(not USE_PIPELINE_TEST),
+                                          num_epochs=config["train_epoch"])
+        train_iterator = train_dataset.make_initializable_iterator()
+        train_next_iter = train_iterator.get_next()
+
+        train_input_data = {"global_labels": train_next_iter["label"],
+                            "feat_ids": train_next_iter["feat_ids"],
+                            }
+
+    sparse_optimizer_list = get_sparse_optimizer()
+
+    sparse_hashtable = create_table(key_dtype=tf.int64,
+                                    dim=tf.TensorShape([my_dim]),
+                                    name="merged_sparse_embeddings",
+                                    emb_initializer=tf.variance_scaling_initializer(mode="fan_avg",
+                                                                                    distribution='normal', seed=0),
+                                    device_vocabulary_size=dev_vocab_size * local_rank_size,
+                                    optimizer_list=sparse_optimizer_list,
+                                    mode=MxRecMode.mapping("ASC"))
+
+    sparse_variables = tf.compat.v1.get_collection(get_ascend_global_hashtable_collection())
+    model = WideDeep(train_input_data, feature_spec_list, [sparse_hashtable])
+
+    train_ops = []
+    for loss, sparse_optimizer in zip([model.loss], [sparse_optimizer_list]):
+        sparse_grads = tf.gradients(loss, sparse_variables)
+        grads_and_vars = [(grad, variable) for grad, variable in zip(sparse_grads, sparse_variables)]
+        train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
+
+    MODIFY_GRAPH_FLAG = False
+    if MODIFY_GRAPH_FLAG:
+        modify_graph_and_start_emb_cache(dump_graph=False)
+    else:
+        start_asc_pipeline()
+
+    with tf.Session(config=sess_config) as sess:
+        sess.run(tf.global_variables_initializer())
+        sess.run([train_iterator.initializer])
+        # build model
+        print("start build wdl(single domain) model")
+        print("=========start============")
+        # start run loop
+        total_start_time = time.time()
+        current_steps = 0
+        train_finished = False
+        time.sleep(int(args.slp))
+        while not train_finished:
+            try:
+                current_steps += 1
+                print("current step =", current_steps)
+                #
+                run_dict = {
+                    "loss": model.op,
+                    "adam": train_ops,
+                    "lbl_hldr": model.lbl_hldr,
+                }
+                if current_steps == 1:
+                    total_start_time = time.time()
+                start_time = time.time()
+                print("start sess run")
+                results = sess.run(fetches=run_dict)
+                print("start sess run 1")
+                end_time = time.time()
+                print(f"current_steps: {current_steps} ,step time:{(end_time - start_time) * 1000}")
+                if current_steps <= 5:
+                    total_start_time = time.time()
+                if current_steps % print_steps == 0:
+                    print("----------" * 10)
+                    try:
+                        print(
+                            f"current_steps: {current_steps} ,deep_loss:{results['deep_loss']},"
+                            f"e2etime per step:{(end_time - start_time) * 1000}")
+                    except KeyError:
+                        print(f"current_steps: {current_steps}")
+                    print("----------" * 10)
+
+                if current_steps >= stop_steps:
+                    train_finished = True
+
+            except tf.errors.OutOfRangeError:
+                train_finished = True
+
+        # train_finished
+        print(
+            f"training {current_steps} steps, consume time: {(time.time() - total_start_time) / (current_steps - 5) * 1000} ")
+
+        terminate_config_initializer()
+        MPI.Finalize()
\ No newline at end of file
diff --git a/tools/atomic/sparse_ops/__init__.py b/tools/atomic/sparse_ops/__init__.py
new file mode 100644
index 00000000..53640a7e
--- /dev/null
+++ b/tools/atomic/sparse_ops/__init__.py
@@ -0,0 +1,7 @@
+"""
+init
+"""
+from __future__ import absolute_import
+from sparse_ops.config import get_path
+
+__all__ = ["get_path", ]
diff --git a/tools/atomic/sparse_ops/config.py b/tools/atomic/sparse_ops/config.py
new file mode 100644
index 00000000..f10d12fd
--- /dev/null
+++ b/tools/atomic/sparse_ops/config.py
@@ -0,0 +1,111 @@
+"""
+配置文件
+"""
+from __future__ import absolute_import
+import os
+import json
+import psutil
+
+
+def get_path():
+    """
+    打印当前行号
+    """
+    return os.path.dirname(__file__)
+
+
+def gen_config(server_str, local_rank_size, path=None):
+    """
+    生成hccl配置
+    """
+
+    def _device(local_rank_id, rank_id, server_id):
+        return {
+            "device_id": f"{local_rank_id}",
+            "device_ip": f'192.{local_rank_id % 4}.{server_id}.{1 + local_rank_id // 4}',
+            "rank_id": f"{rank_id}"
+        }
+
+    def _server(server_id):
+        return {
+            "device": [],
+            "server_id": f"90.91.141.{server_id}"
+        }
+
+    conf = {
+        "server_count": "-1",
+        "server_list": [],
+        "status": "completed",
+        "version": "1.0"
+    }
+    rank_id = 0
+    servers = str(server_str).split('_')
+    conf['server_count'] = str(len(servers))
+    for server in servers:
+        srv = _server(server)
+        for local_rank_id in range(local_rank_size):
+            dev = _device(local_rank_id, rank_id, server)
+            rank_id = rank_id + 1
+            srv["device"].append(dev)
+        conf['server_list'].append(srv)
+
+    conf_str = json.dumps(conf)
+    if path is None:
+        path = '/tmp/hccl.json'
+    with open(path, 'w') as file_handle:
+        file_handle.write(conf_str)
+
+
+def set_ascend_env(rank, rank_size, local_rank_size, host, file=None, dev_id=-1, dev_index=-1):
+    """
+    配置昇腾相关的参数和环境变量，生成hccl配置
+    """
+    rank = str(rank)
+    rank_size = str(rank_size)
+    local_rank_size = int(local_rank_size)
+    host = str(host)
+
+    os.environ["MOX_USE_NPU"] = "1"
+    os.environ["FUSION_TENSOR_SIZE"] = "2000000000"
+    os.environ["MOX_USE_TF_ESTIMATOR"] = "0"
+    os.environ["MOX_USE_TDT"] = "1"
+    os.environ["HEARTBEAT"] = "1"
+    os.environ["CONITNUE_TRAIN"] = "true"
+
+    os.environ["RANK_ID"] = rank
+    local_rank_id = int(rank) % int(local_rank_size)
+    if dev_id != -1:
+        os.environ["DEVICE_ID"] = str(dev_id)
+        os.environ["ASCEND_DEVICE_ID"] = str(dev_id)
+    else:
+        os.environ["DEVICE_ID"] = str(local_rank_id)
+        os.environ["ASCEND_DEVICE_ID"] = str(local_rank_id)
+    if dev_index != -1:
+        os.environ["DEVICE_INDEX"] = str(dev_index)
+    else:
+        os.environ["DEVICE_INDEX"] = str(local_rank_id)
+
+    os.environ["RANK_SIZE"] = rank_size
+    if file:
+        os.environ["RANK_TABLE_FILE"] = file
+    else:
+        gen_config(host, local_rank_size)
+        os.environ["RANK_TABLE_FILE"] = "/tmp/hccl.json"
+    os.environ["HCCL_CONNECT_TIMEOUT"] = "600"
+
+    os.environ["JOB_ID"] = "10086"
+    os.environ["SOC_VERSION"] = "Ascend910"
+    os.environ["GE_AICPU_FLAG"] = "1"
+    os.environ["NEW_GE_FE_ID"] = "1"
+    os.environ["EXPERIMENTAL_DYNAMIC_PARTITION"] = "1"
+    os.environ["ENABLE_FORCE_V2_CONTROL"] = "1"
+
+
+def bind_cpu():
+    p = psutil.Process()
+    try:
+        bind_start = 48
+        bind_count = 96
+        p.cpu_affinity([bind_start + x for x in range(bind_count)])
+    except IndexError:
+        print("error cpu bind info, skipped.")
diff --git a/tools/atomic/sparse_ops/ops.py b/tools/atomic/sparse_ops/ops.py
new file mode 100644
index 00000000..35fe2462
--- /dev/null
+++ b/tools/atomic/sparse_ops/ops.py
@@ -0,0 +1,133 @@
+"""
+sparse ops
+"""
+from __future__ import absolute_import
+import tensorflow as tf
+from npu_bridge.hccl import hccl_ops
+from sparse_ops import utils
+from mpi4py import MPI
+
+MPI.Init_thread(MPI.THREAD_MULTIPLE)  # must before emb_cache
+utils.init = True
+
+
+class SparseOps:
+    """
+    embedding相关的接口
+    """
+
+    def __init__(self, fallback=False):
+        # context
+        self.fallback = fallback
+        self.all2all = hccl_ops.all_to_all_v
+
+    def get_a2a_args(self, lookup_vec_size, mini_bs_w_field, rank_size, send_count, emb_vec_size):
+        """
+        获取a2a args信息
+        """
+        if self.fallback:
+            send_count = tf.cond(lookup_vec_size > send_count * rank_size,
+                                 lambda: mini_bs_w_field // rank_size,
+                                 lambda: send_count)
+        all2all_args = {
+            "sc": tf.cast([send_count * emb_vec_size] * rank_size, tf.int64),
+            "ss": tf.cast([send_count * emb_vec_size * i for i in range(rank_size)], tf.int64)}
+        all2all_args['rc'] = all2all_args['sc']
+        all2all_args['rs'] = all2all_args['ss']
+        return all2all_args, send_count * rank_size
+
+    def forward_alltoall(self, all2all_args, restore_vec, hot_pos, emb_vec, emb_vec_size):
+        """
+         emb的前向通信
+         all2all_args：用all2all用到的参数
+         restore_vec：恢复向量
+         emb_vec：输入的emb
+         """
+        emb_vec = tf.reshape(emb_vec, [-1])
+
+        result = self.all2all(send_data=emb_vec,
+                              send_counts=all2all_args['sc'],
+                              send_displacements=all2all_args['ss'],
+                              recv_counts=all2all_args['rc'],
+                              recv_displacements=all2all_args['rs']
+                              )
+
+        result = tf.reshape(result,
+                            [-1, emb_vec_size],
+                            name="after_all2all_reshape")
+        if hot_pos is not None:
+            result = tf.concat([tf.gather(result, hot_pos, name="hot_pos"), result], axis=0)
+
+        output = tf.gather(result, restore_vec)
+        return output
+
+    def forward_alltoallc(self, all2all_args, restore_vec, emb_vec, emb_vec_size, rank):
+        """
+         emb的前向通信
+         all2all_args：用all2all用到的参数
+         restore_vec：恢复向量
+         emb_vec：输入的emb
+         """
+        emb_vec = tf.reshape(emb_vec, [-1])
+
+        result = hccl_ops.all_to_all_v_c(send_data=emb_vec,
+                                         send_count_matrix=all2all_args,
+                                         rank=rank
+                                         )
+
+        result = tf.reshape(result,
+                            [-1, emb_vec_size],
+                            name="after_all2all_reshape")
+        output = tf.gather(result, restore_vec)
+        return output
+
+    def backward_alltoall(self, emb_grad, hot_pos, segment_ids, num_segments, all2all_args):
+        """
+         emb梯度的反向通信
+         id_emb_grad：原始梯度
+         segment_ids：恢复向量
+         num_segments：压缩后的长度
+         """
+        # unique_local_grad 2node shape 37755 same with rc total and num_segment
+        # unique_local_grad shape is [40052, 80]
+        if hot_pos is not None:
+            unique_local_grad = tf.math.unsorted_segment_sum(emb_grad,
+                                                             segment_ids=segment_ids,
+                                                             num_segments=num_segments + tf.shape(hot_pos)[0],
+                                                             name="backward_combine")
+            hot, cold = tf.split(unique_local_grad,
+                                 [tf.shape(hot_pos)[0], tf.shape(unique_local_grad)[0] - tf.shape(hot_pos)[0]], axis=0)
+            unique_local_grad = tf.tensor_scatter_nd_update(cold, tf.expand_dims(hot_pos, 1), hot)
+        else:
+            unique_local_grad = tf.math.unsorted_segment_sum(emb_grad,
+                                                             segment_ids=segment_ids,
+                                                             num_segments=num_segments, name="backward_combine")
+
+        unique_grad = self.all2all(send_data=unique_local_grad,
+                                   send_counts=all2all_args['rc'],
+                                   send_displacements=all2all_args['rs'],
+                                   recv_counts=all2all_args['sc'],
+                                   recv_displacements=all2all_args['ss']
+                                   )
+        return unique_grad
+
+    def backward_alltoallc(self, emb_grad, segment_ids, num_segments, all2all_args, rank):
+        """
+         emb梯度的反向通信
+         id_emb_grad：原始梯度
+         segment_ids：恢复向量
+         num_segments：压缩后的长度
+         """
+        unique_local_grad = tf.math.unsorted_segment_sum(emb_grad,
+                                                         segment_ids=segment_ids,
+                                                         num_segments=num_segments, name="backward_combine")
+        # unique_local_grad 2node shape 37755 same with rc total and num_segment
+        # unique_local_grad shape is [40052, 80]
+        unique_local_grad = tf.reshape(unique_local_grad, [-1])
+
+        all2all_args = tf.transpose(all2all_args)
+        unique_grad = hccl_ops.all_to_all_v_c(send_data=unique_local_grad,
+                                              send_count_matrix=all2all_args,
+                                              rank=rank
+                                              )
+        return unique_grad
diff --git a/tools/atomic/sparse_ops/utils.py b/tools/atomic/sparse_ops/utils.py
new file mode 100644
index 00000000..07cf796d
--- /dev/null
+++ b/tools/atomic/sparse_ops/utils.py
@@ -0,0 +1,23 @@
+"""
+utils
+"""
+from __future__ import absolute_import
+import tensorflow as tf
+from mpi4py import rc
+
+tf.get_logger().setLevel("ERROR")
+rc.initialize = False  # if = True, The Init is done when "from mpi4py import MPI" is called
+
+
+def ops():
+    """
+    返回emb相关的算子
+    """
+    return tf.load_op_library("libcust_ops.so")
+
+
+def dataset_ops():
+    """
+    返回emb相关的算子
+    """
+    return tf.load_op_library("libasc_dataset_ops.so")
diff --git "a/tools/atomic/\345\216\237\345\255\220\346\265\213\350\257\225\347\273\223\346\236\234-tf1.15-rec0630-cann530.xlsx" "b/tools/atomic/\345\216\237\345\255\220\346\265\213\350\257\225\347\273\223\346\236\234-tf1.15-rec0630-cann530.xlsx"
new file mode 100644
index 0000000000000000000000000000000000000000..195f0ed29faf77934d67422b1775a106b03db272
GIT binary patch
literal 32363
zcmeFY^LJ#?w=WvowyI)V9oy`nW81c^j%{?D?%3(rPCB-Yj`^y;=i$C_?tgIZ{$Y=*
zvG*E#%{Au-bL}c+IY=lhFc>g6FfcH3Fm<vW>T+-}FeF$oFibEw2pus8dsj1iR|9n~
zM>7{aCQmzCvLYx5ngTEg(Eb1C_#eCi)nDao2wBm(VP8mb^kOs>QA?rfT%=b`2&EC)
z#^#pkt8ufOtUq7l_jy9MJy|!$qf<GyOgIQ8?OQfEnZM5t%|xBK(-FY&5fEzRKE2-A
z+=_8X)8QQ$2P&R}4PLyBFP>YagAvA~EJx+hY)D77!`A(bgNLYNFnNf6;;u7Q^{&w%
z>rwczRT>{eHWP{vo-uuCRKXRWm9LYxNw2+3meujzExHJwn$++O9Wj-;*Ox4F;9$<0
zMaAKZSW5;QO^5)P*HW2nCaVwxI-wYOg0s1!C&@`wRT`j*<=n2X&h{NMy#BVWYN=Jo
zmuQ8+l0?01YplVe&F$7;;!Gm;?15vXt)ha~x~qPt;1w!hBeou834loZL3bU9vP%G|
z-IFd?&H@>#TLbP|!B4bu%fAnfEX@yVGJho!k>Qit`vqgGVDT7Y(*%{c<C`N4FxBXM
zuBt%nYe7!;*>hH`9wh9#+M9O>)_a4;9L7@w5vG1#2=`1dY$iizQ>_VOppFgY;brd5
z8<6OmSyW!hU%a+^gB&*+hZt~|P@O+5wjZQmV4t6mV9NhLfH!?*0o;MARvv_3L=eCY
zoXu=qn3?{a{|~VL2c!9a3B5W=L8+ewDf~+MEn?(xapOChsGNt0bO*V*f1u1dYExV>
zkZ_}ing~suAOun>pv(VlY;EI9-02AU!!Ad43<efIpxL7)JpIMV4VI3|IZe{3dS?LL
zeer(rAyZ1$i^jbxfv&2xyijg*3m`dvBmN6*nn{xY9<!7v97iBC=-ZHj_NMVuE%>6S
z+C@$H&lc{2)8y&wfTgsu6J()CUitI6OzaUCV~dqq-w|8#`)7i$YL>iKKaKO8`6#>$
zOl^Pv70>R$eD`Hk%o$Oo;J|odo0b};EVvK&*2r-)p6fNh4c}iiay=RxLt4HG(z^c+
z5__`C?=+w*8c<+h1YmIBp0>>Yg%b}4XB%S&2b+I7`2S!A93+OId;e!2)hP;c11xCa
zSHYj;`}Xs-?M(G2Bv?ju?ZHdm6@`|$Xfw5&J~x$*aT)HD<P0~-uU>!4*9*OIalXPt
zq;e)vt4!eI*)@z|B?aVdUb^Ry(<WD}5`6E3+>f|>F#kkpq6mN=uar2{Lz30UXqpx+
zU@BC>TfvFF+W*5D?R$bSOXsHn9jQR%5SHz8?Yc7>pSgdTvEkx=`Yr#hr)Bc5Y=6~a
zIt-Ft6xu2QWc%<#^BA^p()7T{yt5PWJ}u1%^eC5spxr<uMIr20btr{;$p&~CF@If_
z#Jket>965HV1I&}1p~O{E*>S8p9Qu#q3f5*Kg^|-6Si_NR(5m=<m5lqQR-2hKNxqR
z>c1};k_7iDYrg&vvZsHzM`On65qSamssCPQB+hfp%#dJUUqDw;K?MY=v;Vcis=w+f
zt+8PF<u`tQ*Y-jFg-LBkv05?XF1AY52<{^yO63%_I51@!fm8VYs7H;A<!MCvl`vp&
z_-Ngy*5%1aM{|>LNs@+FWFHKWFs_R&x+y>NyfQ}$nAV~~)Sw-uK8`26GkSTL+>t!F
zZ&I~E1uMxcm|LYngh)>BG(QyP)a@ISJ;~C>JI(&hk=4+blC##1AR3!oU@lrjLOx&D
zt6Y^j@~nGcH(7ci!cmt2{ZfV%hd*=r#_7m8f7X2C@|7FRiIL9eicfie(aYT#kZQh!
zJ+hg!yk`AGbq&BH*>S{R=;EowY15FbIR_Vp6*$3tfk%P!!U(<?$LmCig2~!KNbiT$
z;Cq(S7=_fhg_FD%Dn|dQag>S_{FrWe1KEP6fT~%vi`(TU@Q1@d`-1H&d7beP6M~T!
zs|WzE5;=gaaP6=h8@oE3hd>QWp7h+evMabwOJ{|h<rWMzH4$iq*AWoRYMy%Vjcq(3
zwQ;*Yw>IEjH^NVs&aw3|?$b8xP+4o6c?Ll#&EkADhfT-3rk3iMr{fDZ*=K#9Cs9}i
zFx|!ej|OMD&!#Ce;tuNis53P$*rY&R_ouGV%>2ssZ?!6c<NOIBF`VuOoe_A`?&Z{T
zc&J=)O|~Wcw)P)9aYrd6&#swNg1He<LWI*|0cpzhCFt_vJIrXs$qbs+O$SUHDBi&o
z7Bfe32FvbR<UBSKH`03~SAyL(`2yih28usBmYJZoGNepe3tQgFO$)hli#obK%CbMm
z5p?=?Ork}cgdU?}vN2kdI24SfxLtFojs-Ir_XcU=d^<~A@{>$=rllM2oQQTE+Xjr}
zrHIoF6XZR-AS!C_O)Z|TE=4IF<vN-XnQvVWr)H?3Pv*F7AuSL`5jAPR>bX}%^z7)h
zNpq~pFBXxQY&7|JtJ*P%*d*sa8a=KoR$db#dP$&o_GrJ@P84S8&i|5yM^TfCaJaBA
z>A;C)Aj1jWU4i?tFdRekaKeN9&Kf8BiHGzvQYz+om&TpT(IM+RV7*08z^tEs;uk|^
zvqjc{jpvQuUKPOncZ=U$Aaf+47Hog*i312Z{~cS~%yJh(ApYh>0Ruz+FKoHEdfA$}
z{NwI*&B^#RF0^jkV}7`Hn|qT{di<nd&n_m25{aM<u&y}_GfcTe2pNZStgibx>XVAc
zf^<QJL_x`j64UWfiq{AZVPq|eh<)x)cJ&YACDkJwL>V6X^=k%4;t4u^%x0!_?j<>9
zH{uGkD)a*++)EO<M~UYP?lyIw=sGSudRVdyhV?krjfqFIre4M*i_U%|4FaZbC5Ci=
zy3e5`n-(Izol1$Y-0O&E*r1!+-J^ZSiORM+`r3K%oq}F8YL$>-$*O9ligOR5h9<Hd
z(L|lM!;y}{f*OV@`~^NgO+8p{vz-0+z1RGAgaA7GLkq#mc<I15juMuekYn-3Tu%zs
zb3|Xc#@BT=i#7KZx)XKZaje$A!p`SoRRob%$9#Y2aXVr7p)G=*+x#<M1w=EsbBKc|
zX0&x&-20z&niu47^seY!o~PO*uxJX0%6Sg@RM4Qo>}imimM@>KhaE^i-sG;VmF12%
zRRJ!xA)%p`_0q{&^YRS$QaTWMoNV99epIt}-AX(?4PlV66y6!Gz-wt>ju8bb8}+8K
zIKAB5cSBC%@5f*-op)F~ek<_9qeK-V<E3m$GxlR`_BiB=w%mfXIMPll-QiG}XUGe6
zvaRm)UUv^DArX)G+Sh5CSqHKg&PcO3K^$r7hgjSQL=+?5#aq!H*Sb!nWF2FjRF;$Y
zMSR=gm>#*IV~`SMj*BqS15!bps;&^xdDJplq1sseTLU=`o`ip<gTZHENAtr>QhRhB
zH{lHD5Y)U&Q$H$rM|btu#|+D7`Ka93&d5VGg&MgUli8$Fs<mVAOR#XF)%nEeGmNt(
zI$)}W4lUXe@Td*{4zs(vPXd5Ey|DS4b<5yeB6?DfLbhwzICAr5whG>Xo7B3m!Jhdd
zpX}Yrwh8f}-vpNb+WRqnO*gIa6AJmxGj&L%&{JTS2iyacvLzz?;+0KIhT?^)FU}U;
z`FQ_$7wY-sW81WrqfwV6%xb?TQUo*a;xEf)GGQUjIvMu*;lSSP;2(jH6PcA{hmsLt
zCr7_F1koieX;J~zc0!n?UCmR)nrwoxNjq^mOWyr8<>w7fn+RtB|7v~HUg6PkXh6mM
zR+6T)ah&0|Q_Yjbe1}wnU+F9IF5&TaW$c`88BE*F#qd9nFFu#k!g0WF+INCGL%f|=
z^ZhV4Uf`dL?vU5+D#S2EsVkjAn$`74aS$JQK1h~K*F^2kJl>`4+t|>;e29Af;D$R^
zLSoNtH-(N<cIMWQg}wegc>AKG%LHIXKCHl1a@;i=_1&So8T9?7WiEoH`^MIOB(#_2
zAShSx8g?VE`h<hlRo`{3DfMk9o=U@44PE&B*SR(avObm3HY2sj@t|6Y_lawg=QuDH
zXWr94lOZ|rd=#?>20{qw!XpevFq3vJYeES7(_0>zy7!J<_9=w=ZI9dFnfZUqG$K7u
zhAW^eV;~m@_%EjUr{XS_W@fG~|F?1c&llGJ*EUus_9}ubwlK(Ii|zBvbU=z)o-n~#
zY2uM?m)aA~@~POOz4`>Z7tRBjav0Wb|0Z{B_%Sv-mr$_&iul827%6rNNFdonnf5?6
z6i@WY!Ay?8gg<Xz%f3LIEO(PG%NSHgh>4Kl6yl>cDuNy?lh@@JqeHY-7(ZqU_aGXz
z%R1al-9kFnkZ-sWNb&0dci}l`e|B&u2**374NC~qs;hR49?qmg8<#%qU3HO|Ds1|S
zv+kDgASB=%RjsZs(?s%3LlRfOBeCTH?^$ZDMsUUt<YN9i2N(VPMAZj*n@O^NyGj4?
zH?Educ4o}~Isa!l&b4)72)VKRneT*<{$4(D?TuqN+;Fa$a>Hv=mnMAEd7!Dn$CufR
zgAYm)OcgEsp4N#aYw)5hi^#~)i2BaFku)=&aGYp<p7_VPHajV8Lk}n6Ew{MXC0;O>
zxBY1<H^W;h1F<hD&7fQPRv;&KIRiMK7U04|!{MtWof3;~25<k2$nfUTyybsJv6_S)
z{|ojuYFU&~D9O3`D?>Co62K{F(h*`y0WTpjR7#LBo|g}%i<MDBw)7jTB=DvSYOvs8
z)l^h+`SpF4Il%Y+x&>7*TqwYYS1P|xJPOO&Q2e<&3tT6dfM1Y-K2$4`>0lGRh>^66
zuwjq98{n5fLB11{W-uq@@APVnZf7Q{ZvUNgALfNRc^qme0$YGOpQM@!IhduqN%Ms>
zaLi4&XS43!hTe}LP!gbJWgSz~pnn2`{|Bs;l(^mJUcbq92l;)GGkdM@rRka=<rMYL
zb%+RVr{xYO&0vJh)6e<yJKJ}HlwBK}C+~_4#M3l?<Y*chl%<){pBfiTnjOLNcpFZA
z*+wkonrsKW2=zv(de+hKa{FbvF|&3<uwcr<(Lk9mZ5cc81R`u6=#;QIo*kwHBD<5y
zeNag0bfqE{2%n1%1wtp0MU1gXR%W<b6o0}~1oH5l*peAP*(~poNA{!=6fZwLoj-ph
zqY8a#`_$uV(J&jmC@{4id%wT`5fE?_^mv47@NvK0;CedmX|$W~`#PJPAK;het>ttX
zJ)9r#xj%eGD)hFW;6TFD*uu-lvLl9JvZFURyorp{4?+B}2l-))(GQ0%2O&|gR@lxA
zK=gDADo0%EZANr(GQ`Kdo_lLHJM$knO5!o^#u&tLOkiC#T0aS-w`S_^@AqPJQ<^%W
zx`Yvxs%`6_`313_KOLwaC1=LnmzKFiD+|opg_2Y98%AMuufFsXW$3v~M7Ic5D0vjT
z;fv(aV*|o!P#lQz1H)SxbYVox8Mn?iZuw%{H8%RCxG<Wbi%|9=GQQb&h8|)JGNp=P
zsm<?nNJ}yS9&vq5cV9$BOMYaq)d>xWwwl&DY1N_M(f?+-{CPDiVGC#jMlj`eM6zP8
zm_%&A4YL231V+i9ckH66VtFlPy7oS1bcrpn4}en=_WQ*c(yvcb1!oJ_mdSVpmpdG;
zqmRpFth_pjP{Va-l}(z&=&j}OG!(Pvf2-(<VU-xRjQ!>DS1$sZn5)4zkI;`7LSv=3
zcA!1>T=(lVJkIjG+a#aFC5BPyxnN#B^20f!BKkB0CF^cBhe24mP6gtRoMhR$+1Wz)
zPT2y4P+up$gDfy&N#{pyt1c4qvvbwyzT7%Q`@t)(2fXv~HRg6^5tyEzNNBGvujY~E
zoFaKFVo=cGG4>(@2mwq*Q(qamJjN8x6|y715dc-2Q8Kp^yGQCPm>>LR2_1pFA;H5n
zPhVwo6;kS5Du1tpzM`ga3UDaZn()Z{;XUiD<gK%Q`}F(TV8xuj#CFs);l;S;{p!uw
zB(L8sY?kcd<vLBhxqO?6Ci{30ysKB(AqcT)IRuHfa6z+?tlI=;f$lp&Adh1|mY8<K
zlYE7ae<5WFQ&U)PZ>T6y=1td$k2%9S*WX?ot#tCo?}u0C@Q~Gy23-xk$%Qb8QQuCi
z+Mz-@N;TJ-JdZ1lQ#T>k%8qI|$dvOKJR^B?LKQbW6$2DetUU8j{gq6uSy!!D<<s=S
zZ<0<22rPEk*q#Ru+Y(&|5hAN1;pRjsM<&=m8Z$d%YZ0b0CB2i`D>A{AjhWfW5?s|*
z+MT?t-K;7Fq)He8v91kqEORWuxH}B|lWDKoL+zOn3v5bl#%+Yo?5A-{=r$4*KU#yR
zsMKZ#`0byYtl`q>0US2Zn8$ei!=5m0fCU}gBDGdOe3?A^r*xw<eBAm7@f1GYWW_NU
z*2D~-lmSKq#(-30j#;82J9^DPv`wwOEMa<wEGINq{t-{MUVv5j2A)cuB5BZ$TCJmh
z)t~`MHE@NfuZuJ;p$n4lh#Rd$ON*HvHa;paCdB!4k~Jv%CwAh&v&X67nB`Ju^c-(W
zJb*7o*}W*kG_Q{{s&-;liJlZsX(g+-@Qmq8MkVA!s?vr9>Ef6RjMbq<*&yBNf@|{0
z8UH?V)vee<Jzc5&yCS1=tqoHf3dW+B?3cX-zj(-A7RLMuHU;fY#s27q29>o&b_O^k
zr3J1hJ2y!lftz}2p^@CFZ*Ia2XR^6ZFNf=$i{`K`L0`Nb19*O@9xM0da!M)Rm2!Wl
zwfl|J252Ba7xZ#b{PwY-x{T7E`csF7v(KNr9HvwhV5^CE!BU;}YSgxHrheJmOHtfP
zRMzqU+1ghSbAG9xP`9x)Jg$?42*AP3?9OMl?!SNcYujz_A+5~z+5!mkdDx_iG_0O4
zAWT~OD8qT@N*1ovxFK|XYuO~cYd0$9Z9VdlA|Pt4+9>$rfMtDP--5OvJN72opo~hF
z>aOg-{UY+JuQBv{OzmaF7yNS3-43+v`@fN63EGVrK6WrLI(2X`y#HoY{y&S-nJbq-
zJe+#;2sz<FGt&|7M!$tBV%krqN{MO?*8ecU09B+y5o0W3iv6P!s7-=2Gk&SSycVP1
zrfGlIt@-iA{^AY$(1T*SD;3{~g1PuRoHkCWi<0C(*XNg;r+g{jhn60n&&&PS@wJYs
z?6V(AV|q9H`?oKPlV??D4tavl`91Hu$vdN?8}EbnXE7e!OX*7aZ=PO{`wsc<OP{wZ
zsN5}En_XR<okE3SXB|J3HaE8dp6>4ME*ICvA8|c8vQg8Qd~XjAAKnKAKD}NiPc|af
z#$%Q)3m>l(tBQ^lw+wXkgfhQ9-94DBwtD?NJY78ao2|Lk)zjIXPtexo>)|y0bhWAP
z*Sgm7@Ub||@%XX(uy!TrdwY8L4x9D#a=Nkjo=`hWO?_qH_cHEK>tN5^?LYkZp|_&j
z$@jd$7@+yqv+1$7FQRl+r@yhezR}sXHG6x)ad>@}@MG^}uu4pB=kc@LeC+V<;r5jt
z{JOB>)JcqPO-yG|%;X+Gc!m-;I_>s);-OiWFZkTEhdX&K5uo9F`^@8iBcsN7Pu+7o
zRjCwSSl7{Rl^?MAw)-jI^{{eAkCx`(=Irx)c#>?8lJBn_f1f;Wk>AFB9H6K7;mG~^
znEV=37vS=IdAs;d*_EQCxDWmQa(Jkt@<v${%q66&YmoQ$l#TlP-1X8y@a=J&`_Ur(
zYIC;#_jtg~>Bs5oxIy9L-RKhc<0JQ#MDNOPfg<h}?w+1b_Yg<F(d&}g(%XfRh0_3f
zL>QOHfc138&t7gG0pGCao~xcI+$nQ^H=o;=+sQ$XtIGhtzy0aynp>TztlS-y2g4~X
zH}gwEf<2u+Vffbv?x{$pi<A5WJNeXCbpttGG0gtl?*y84S6jCZk@|if!%Ht)bTh+c
zuwWYkzw?ETkKSv4^ru?<{v*(z|7sC#GR^UV6mZJE`0^Vs_$*ZDF_)ubXJmxWFH-0c
zHLl~HdHxLW_Iz#jhtw$8Rcvqi9dJ>LnbJpk;R-QQ`1!W`vAbf?!Y!0z_;ee6O~M+N
zmOuABiurf+Df0~U4Zr@;RX%opuKzUsovhHS?}v8V+qvzBcGKJW^IL=W^GeLK#rtgo
z^I?9|`1I6KKIiy!cTT_`VnP0cC7w56X9($gvijvPUMFdPN}hu1RfLbdtU=`6<Y*Nl
z__6l$s8Yb2+WVrW(CfuoRR?D5?ZI@9w~vd*=X}q{tHH;~*2iCikLQE&X~iu#x=rTK
zu$8Aa-`*ce*4%gYOPp>?9)4@6W(V&{28vnsEYo8}G>ZK}Z+DLYPvfK?dvyUH5sw}L
z?|+55eZ6kE%~Uw&g+wpbV$K{sFE39Y)0qwOWJ50N-nZ#DH*2l3>+M!6tzIJ@je<}g
zK4zcf4d#Z2|IL|{RP9Bygm9ak^Rm#L$>91Vd1yy1t;4Chvg|avo~W|h+tVsn#h;hf
zBDmay#ZCoTPvxtb!Jmx~US=|5?{~|{va1s@Q`u>%3e?2ALnMjAQoWt70p|Te<ezM>
zg8480KTC1=38D+FvukbMM=eyRtP~e#u-sG@>{i@R)s7+P|HLE=RdW8|SM%nQ@vAOr
z!LI%{HtXf#!us^pP*YFH5d&}BySm-z;Y{A0lh&fIaw}C#uZff0V6n;W@AWQ2fw9od
ztg;)GdfA=)svp>(PI$i3w5M=4+6jzwW?a4!B%c;G;a#DvjW(w2RvFczFV`a1eI)O%
zHv0~5t|f**W8BAC;d?6Of5YP-IR*A0Dd&&_vj`p)tO;g%OP0m32DKK%IAia(FIKv9
zQvPPDf8{_P|7wud$urqMgR2ZbdHsuUyEk+!B~PZqsXQ$oMlOw`4cvTwq!V}7_teKh
z_g#xyj~B;|zwv%KcGKTWb;?5DjU7vG7FT`@>D-6|abgH+LK2f*&710Ch*ig*z>rG8
zK5&JDZELEm8`b@=EH9e7<79Pait51Do$JDFh+vO7yUc%il#zvghez<Egf?gNar#Yx
z+zxM-ed!L;eiP-nP4HkkMzIsz(EaZbUwJ=)MykmQZa<6T0KO0E7V}sBy+1$ChRaTT
zxNZ(u#2*MnSAD;j=cqGI$lqkzctbawRDiLgI|QvPbUK*vmBX^zj5!_uWaUl4A#uAY
zwLWTKC!=q#t&Iq0^uy=%PNltr9-}hn3fjri@3l2X#A>@HI^<h}_>#RiGkbw-Z+Wo6
z)SiX80=IJZSGcVruCn&j-j=6t2^-cc+nY|U6*)!W+Sa|FvkPK#05H(I7I`xx0-JW^
zMB<D^C+FCIw<`Fvv${m8r|PAQF+5Ab6!H-rnmO#A#%{GAC!aj;O1@KJ)y0o4gOS@<
z@ceMvO*sO))+V|hvZ>or&YyB?+c}x^VkW_iAI0SBbx&kh>37o7N&n?t_JOMJnRg?I
zOUpPeC6j^21t^H?Gx5q^_dE+)-w1`%G9hU*2>8pV+J~mvSw*MQX&K<g4vDP(lGUC>
z@UpQqP%2^MfazN31X?6h@U@R_z7X%*d`(dhx>FcNyi|Lv%(aoyzU29mW@q5a7%}25
zInSU3PCFrOr3PqzLb|E!WfVdI#Y#Op3yrAzI|a#)^LHt(-pXPxM0z$Ic$-MMa{Rh;
zN$f-#t*(q!G9}$Z&rRl-U;8pnkK-;Q!=$;i<wvjBpO}vHV)x;+lFP!+FaCFOi&JJ+
zgS9qQ_2oW=(N0_gQ}(n#8LI{$9ZGN30_0Em8^onC9K)3bxXI~tvy1rq?yG2{bAtFa
zaJ}l)%-_}?tvEyaQ#L$CV@!P_8no+hV}Fv$cERXVVeKtWoCneONY?rw1>*i3k!^5q
zz83uY>1^3vbP}2v7<RXUzBpxJ<j3i(Zxh~JxmFn?AA!MpdlLk~#V<+-VIG+^8t}zK
zuePumU8t!47MJxxza`<K)(oPOh#1^4EMP$6!nRBU_59HZ>np%jL2uZWk+!q{P^waf
zsHoXg*jFk`@Q`#q#}iSl!3#YrpV{1m|ACk=q1$+p8F5Rhc=cIc;mIc6`W!#1FXnl^
z5=$S-%q2KyfZS5<5CArb0_K!!3!X&`vD<x34iie2`z!4EjK{d4*-a!vQA4m~1AV)n
zwuX+!Xvs5<!bqLgRoZNxoDPLm=awAYT#7<ew*i#p;n114zTNPw5W7Lu;5N?E9zXSq
zSe#+==G_z+rT$keBS~QF^&T#FHhv+pkCTon0Nh*3SR6F8z`_N1nLVwLFj_HC^R+M8
zas9!PadE171RD(iRTC>7(gam6m1Fl6K`OYO*FbT1<{^zC^2%=hPq2i2u_t=Rv&&-`
z=|i%K!Pfz@h4amyofOMThsXr9?u>`i&f}GcaYQjT)vB%NDGD=GPH?M9?{LBmVp6))
zI$Bx8UqBI0%<<^%{W(4DKI1`y`fIA87%G9nUX|5-PWi=_#G?@jM{!4-E*nz?G?q;k
z2zDWj7G>j(cxRgwdWc)43ya+=(qf@B!>0N2*K|`DgO%4fYa=shv?C5GNu3r5KUOxX
zu#47Z)+rshndpeIw1#uMQXTbr81psWaRfbquyIcLRb;3#*D3WZCNZ$bKa}#|xA7Ir
z8P+P48KM9->~UN3UM-m4guy|MC*;h3@ax^ZlB&lknw~=0qsHm(f1GB0w5FvY@G1jA
zg*OhSLjcYg##v$aM|_ana_#}a(p@>q#tw1jyZ|FB|0bE}z&nSN6P!}OzBZFtyZ@Q`
z!Z(d-m<LCPf)Zv3JCphhYYxh6evYCf%Ekw}5uid@+rl{0u!2dvv3U~yqtP&ZZcGIw
zd_^d-%6+O~T{(rQ_Xcuda)Klco?Ql?oy$j6Y8i9Cipm36J;2!k?adWeRIJvQbe<Zo
z#`6n)-+Y92Y^}K5o$4+t_c{ftRegYlW8YwKE?3nvW%W=+eccP|B6BPxhdO!5bEmNd
zY`jdt^211s_cAN%x1D(@xP!BV@=sKaKb}cOQu|a`l!Ern$;NMqi;Q7h6?Utjh_&1$
z<0@fw65~{@8lTCe?DFK*Qado1mxL#XZ&a|Izy!6l0Wzn#-$yUB{@!lGTdpo^0K+IN
z?9%>?HAfnDS<u|DuWe(LpFkm>z5FqXf!R;1&4TFWN4xU?Zmn&V1U}99;T9E5RshW(
z!;<WVpb%W&hMPG+ZcT-HyYAMu>+aHJ8<aJfZ234}9aSTexQ$T0{G!%uh@|nW`U`xs
zq%ClmBfKw<G>xB)wd@+$G?`q^BW@?KBuR=$H4>L(3(7G1v?e=>wVsr%Dev!Y)CN=F
z#)Czv6bwVa0B)~LNl-&3@niby5SDvM^9nk4L`Kcg>ahv9IU(hW<=@^SE>y}2Q~aP>
z#;~Ykrw-_OlRsawy`_HQt;;K+)r(UMQk-XDsY03Ac&j^sW!doOksbIEQ5nlQ5XvX>
zeGw`X@7Q9tw<R3T6F*M`<KJoQWQ?-5SUop!TGT?bM{L``z`&9lvR7ROq3MpwDRMK3
z_y)9IXQ>iX_syVeD->;<QD|biN?Fp+-27>oJdU8M0ww!rSCR{$0uD0KLt;^C-Cp6}
zszq%AQD${+$pmw&Yo0O7Fde|NCgWR~J3Tt$>VpWS=5&^GgcG6Q)hctR_rE<P5rGY;
zjLEQJC~cPf1khv`SYd<)bi3OPC)+nRfi=cC+ckOxq|S>FGbrp1!;@X{E+l2+EmV&X
z=meK0LJEIJWEFw_%s8-PM2rP~#rq|bXpRw))*MR$&v2pDmKaxW@9No0$88}qW-LVm
zOYLARmmPk`7LYhDpQLOLg_GnfQ%@Pi8l#X5(hHO1e|n(<lK{0%9L4A~Y0qk~`iqj|
za~rXo0HPwg=!hFYW?_1GH#<I&#WAsUOOcod_sB)R^^>Cu>rcrqZA|QlW5yG7A#f>D
z#KZA8W4V7RXI6CP^p~Od0mMv)lNBP9Zu-mUthyJBT>G6zg77AVi#7*-?2rI=M_zZ$
z=`*ew$FNzYJ&udC+Uc(=p`tV6kE>s4FE}x=Q76|mTVz}2^(Nja475SEIfDw!7Yt#n
zR<=XpgN3<__{qPg;>FWi&08`kFGeu34DfUTQ(0*h74}ig@PAs@7vS5HCXyCx=)%kL
zc@xu)OC71>CUZd*>M)uMbxa(^017uKk62suZv&^Id>s>IMKM5-^svuh&pDZf*ezSO
zC~Xue%~3bI##T)lE2ZXn4ky22*`sY`7gzBOuDbKE=V!K%$CgWgf7K|%8Kqmp*;!HF
z;~}$MWv*3TwL6rwBOR@rY>IT5O{liL8VzQOLN&;8S;K0rH~|%X5UXv22mGe}1~l|8
z9UI+%PIpP#8qjTPmAT0D&RVo{k~?*Ia8Z@ftx{`P!BA$H^lH9ktKb?^eNyWtI)Ubx
zz)Qp{<mzB@at}3QPB>Z%mB09AAXzp9a0b$G)hCH4PPm$~$&*We%O?9ZXhDY}%#Xpj
zOR@&R!EYUUXhFZ+8iI=s6SWncqq?<GHnzf2j7A~zFrytt#@+C0rZSD?U()o9uHzf-
zdD-`1W$HJ!b;W;!^L^zPfUg{0>ERX2<Hv19=nAr#P)zmgYM!=LqBr%)NR4Nrq;>xk
zdth2d=?sNk36@O>&YAa692bt^!w6cE;a_yx;nT41cYqiH=xn&PK!Jyr=U5f}4%>*v
zpmT#$DHXp+O-ubZn<#Ar{JB-rNJc@iPHPJ(3K`uh9lRW384&-5(0c$|SY$-Sev>D5
z?_At|<tO!A&P0N{;#zknHTqb?(g4{3ZsX`_-FYXaibtnyTDM#~@H}GWq7Bk7V5Xax
zS%DE%)L=i5G+HOXDTx(<1vcY(4FribiFz^!0&}Z({~r3K<x9TUpC7?1w~05vURx`W
z>!aED<(l$Y*v@DrBmOBHoaErBEt81D3{|HnQZt~+c4f>dv-*#W=C{x1YHCkfe<%<l
zviO*OkqOLH2s<v|c*{sq-~m<)UOoU9A|F}DWy2g}WG2Z=N;oK1N|GK-*0j?_!wPJT
z%wzt|e4;ZUL}sq3y%$(f{Fm5n^}Tm{y-pllQ*<Pg`CUe}iM_^gAbv?;R(PLG`FC8y
zXupiJHj7{w;fbyN0LUooX9K$u#I8xSD+XG=5a&*R+B`=5{uDt-M(?L=65$E)N&B(-
z{JYp%wpUx#Ii=zlt)UaEzNBFk<6puVZN&0#E##;-`?hUs#bHZfG|#m*zd?YFcsP{j
z>|+jB=1xuzRfQzP5fA~}L^>oy$2zcTL5tqZF=zn_JpaD}XOJ<p;QCq=qRL<8uHHsP
zgf|_D<p%FgS(yvpfD+x<dm&84(pF<jWzu3@;z*f0nff1>?(Pmu8E9$`>s={{ioY~;
zgnqs(p~4te=vvc$dKb?scjpQL)TM$6PoUTo8#{-pT&hZga23)ZgSm7U-AfEl>GUtz
zlX=I-Bn(QYzy`o?o%+a5mFxO?YOOrSQe+~8VTej7i9U>WtOpRNI;Ar)zCKo%>Adwt
zwSdt(=TCkKEvcnZ{Ef)St&BkhZT{g7wAn;1E;NO-dI(K$ED42<B!W_1PMR{8Qzru2
zSA78+XOi3fC+_A_mLEYMu+=Cq_*2BPNli7kN+2SL`%M21uupPy%dVorF<vVDNB3!*
zQDQ&kgLm7`oGJa+@9UwhxrB-~lw$B}ZH=S~91!&_b-{aURJ5Pbbyw6WDWQUW#jT5z
z6);$Tfk4^t43f$66;2%$0|jZ<Njs1{e+S7kU3ksTIW?Vi$}wb8<9m+5;?&VXSv>O2
zxqPhdMxaigWQ~%Mv~shp!XUVH?FCOlb;5JDKlLUU2gq>}r#OEiSK(s=c`A3-h6>Nw
z0ib!m9!=rQp=UOfV!-1kq0gvhByO5ZsuKk_=UEmfKo@|1ab@{e5D@HnxWMG@r<7|N
zwMa&tST5~=ShTu=hh7`ouZKu3N>{PpCLt_mB<`J!{?BrHUucl%<WlQ3=w#$f##ugE
zJ7W+?d@1Rm3fcg@HTnb<*1oPN;XOKBwaH5@#kzEUUtEFhRC0T_PsdO8rH8REUciEZ
z|7f!uhzSA+2`>o<@RhjJfuc%KnC6GEgi}jIqU8|#m0$z1VE42P@MBcouWtUlYdIj}
z84qU1n88AtX}PDS^RH8;kw|I3WpIYZ#(YBL8~p+J%AY){qM2t+xNjrfb(X~P8;+mi
zaUtMMqU2v&6Kuv~RBQ3ioLCYLrhv=|9d`mPZ3HoQz?gs`*7w9D_z#NoR4l934c(%T
z@o$$hmB!hlSh6K8w4i)N)@a%v{JN*SOtj((>RKU+au4{)odj4dEP<is{Y~WIcV6!d
ztZB7}kNYkxqhChljU~M!?VSx!0htnScuf)=sjEbIZg6idtD+5hVwraSVO`qHfiT0}
z{oC1b0@XhFF#4vpuwr`YdYh=cSX*W_>Eul+1$CmLmOLj=wi*s3St&swN0MULjZmge
za7EFa(6AYu%dt_frr64rw@_LU0NaKT#_#^4%-ulMzQrcuM=(npP_WRcpkOh$r|W3j
zc=KLIDy$=Zq{P-22)6l$*~0RU-k_%wig*k-4Kg{BsDd<(aYmg;3*^O6vL2m0?;ij$
zDNlb(P^K~SZ9qlJ?R=x4M2R~#_E+r}@*}D<DB(v`^FZtzTIb%bY(HSRsdS;DGFNZ*
z0J-NGwyxe1JyFA%U5HCg{3^G=#Ru9nbO=QwlXgz2I>IduFG%gY7t+%aC<SzLbZ?BA
zoy8C${>zGCV1C{3*jonabSJbHm)UOjF+)on1R2ZK0L@>-NoJ^Aq7qPve=A$&dINfl
zIdAEIF|d{{lm|UIYk*uU83byq9>+Kb@U+SAt<2ui_5c%z;230X?iA%CrjcaT3tK}c
z%RL6suNc)`)N`E1+O$ZchV1{06dQmiueiJY#N!_@iG@MgqSn098jDOKlG2{fTImwD
zHr8&iH(VKz8{2AKG4sI!wKwGWw&DNU8~0v~g(Z=-NQzk_=$PdkibO3aro?@Bxw7GU
z{5O;4c+O%ELTU^NAmyS#v@{4+<T5bX!2h6XHBRvG>N(dFQLLd2Ju85xT!t5M_uL4e
zlbIe|BvhFb_wF$IUK+@`3Fj=eJTA#1S11XSO#mvcm892eHH`aTU=4A2=yrlX8zkT{
z`YsBWSC}zKXth-Z1LOhP^J^m#KspSHY!2>Dw8X$r@-TFfmGuDmU9?6@07jsD2~5<P
z9b!yFzsx$f5YQeIDzpi?bCW3cI_(8~ALMs;!qZbeoopzEQRY4YROzB|NL-vnpeFME
zrRGa>_mJH{;_)@an$7~5mAkIuYSJY|g=z{73QpSur}Sx}v$*=ko*#v>H&+0&Gxrcy
zJe1k^gE~>c(P}*Z`W<UPpX)BN{FMENE^H}mC`mGAZElM%St$dJB#4C+mr^!L5u&gt
zjC;hf((Rm05V(Wu;lGvmSAm*Ajt;u0GgD9_SqbE$akJy*?<=LDkePzv+%I20RiUWN
zwaYWDO+$QaZIZwUF!uUwf<XCia43e8U0t9%s2}_f3rQ6fXFs<}Q6%-o8$;I!DO5UB
zT@MT@l*OfVka(32X}9k?XUP{zOJ-B3IPoYj^-I2q8<Obg?f++w!G#K>>UYbzj)2*~
z>6E}P`P%QQO@A{<K6Jip>~CJnPL7W;&C2b_OJ=kGhX-!s1aTOg2_;gOC-VV$ZR@S$
zK}d?x_UM%~=m@>OnW=0iw5m*tt9a<*@ygVR5=>$2_;yqO3QNv;pw8~cKhhS@wEX+w
zj&Z}_p0v8JuHekXjp6t_Zm`d$N!Qyy#Z^CW@dw-O&V7Y7j9p|!cmLlGgZGNY(tY&O
z(0%jZ7pr9I)h@*%@$NC5%cN36`Vjhcb^iG;D_6hcJJd#4c7VS#$(rlrjg%y_1^!Jr
zNdR8{Qrhr8I}GQ)K^^-F31xCAmGtnY^S*h!_H1yJiN=L*)w&it*=;M|B@8tlGn-M_
z49%Xnw5Q9=5#7U=0Smf^E}rJ6Fiar`dIuJRf1z(CYi$Xg0r4}6nHJ$Al}sTo)%4jf
zxTjk+3VGg75&|lFUQR7~{CwOzZqKqw^*`RK@;^RM1D^W>-tMPAuRkt9yXnK}%t{~O
zs`8U}3CgbzyDw`!9UU+42jl5K<_ccV8s@p#(o=J&+hlOYLS2Y{TxnA*IZGvpX2qIo
zj33dV{H#}VROi>4Cy_~n(pt&nzIzG$sy$Xc!YhNp<&_XZ=fQq~q=nrP^g06foVYEt
zjsb*R35Onm2d^!<&<{HZet*`#+-m~0y`1;$^#uy3_J$UT=s8zX1rbxkL8WH29?7>h
zPLvs>Q7oF%rq)2}?cPK=qJ5@rJa1ZCco7`*yYt-|pLftcOsotVIEO$hyf>Ex1u=td
zf;x94#O@=-Y8?00n$pzbUp!LWz!qD|75QC!;5sLa0fIwcCvng;P<=`~z`y<B9rE?z
z&B`UoAvkX`^BmUwr$A>}wTo&*#BsLv@?;|*v+C$qm;SoE8XtF~#{jo74sB<Y<r@1B
znR2krBm(%}hGO|Le=hLpL-_~Dd5=W8v6LJl;G~*pXvjFx;c2FP!Wy$vz}_Zhk*z=F
zk#Tb06t~sJd}Nd120J|Vj1Ke7X@LDiK>|?att;jZc$G^k(MH<42}6019F6ITupEv1
zA?Z?^I}BySkcj~V?R!i!P%S~ShDnhF*^PtU?=fZ#@`VyY3+0LJjbq%Qli%S`o23`=
zoBc4Be}{oLv?shqanRT8J``LVdLok_wL-GF2jEfKB-=Vs(7x^EHX184E$@2IO?|XE
z;;$d8=19ZZijcB7?*Eh`7qMpCiKWU@nL6&JqO$2jIl^D@GWPqv6N<)#PM5qM47`?)
z4~7iYgLbzTxO`s`!VPYjz2JF8@fCFu`UyLKR{jf^IysU8@D&1MwsDQl(1%B7Wa5uD
zSGSEb;h(`W4hs`x3&{$i&)D=P*7|fY!S^+NQEWw86`^}YCrT5kNFR))Tw1`i#i|X_
zX7@DIH4LC(5D!Km+<RiB2y@!$D}jr2*VCHKXAbE`o&H=0a{`c1u{l$z=0}0^G2s-+
zGX!ZjykNhYvDh?5FB^~RgaOjLay9AQ2l2kDs+y}dPC|x_LDCI1kxZW^ZpOv!#>jLJ
zQVm^h`@XEI>3O#xC2h&w^rKWJB5>`nm2UwsMHs1Q^vNj(9Zx;hUx+iM$AMHZc7&a9
zn0wz0Ag59E3h~h1Z6WkSBWHt%(?r}6S}V1k4Mn1iZ_;}+z0>BmM<nsiip@DJ2yaG*
zX5=W0$KcS5Z)fiQ#0-j})kKIy_DQlx;^7nb`s42}Q<|=(=-_Hdt)TMcBkykYD~a!-
zYXS|!S+SDwGTHh@%D;#^jgCC&3yw*Wh^g?c58we7V&k`a@w6Ry&{Q#`FXiDMO4#q^
zhqMx$FY?znq#Gp}xC<GQ%kdj8DmYZ>B(k`1+K<qA5w|teV>eh4x5Z+knF2-olp91K
zMnxs<%JQOODmuc;Ba2e{t*Tooy|83D(&2E$=phup|Dl^)E6;RR_AwQfb9A1G4}mOr
z?S8eBlM(Q8&&nYtkb&F2GCUZc=yI$g_R_(UCnu1RE|jJ~YjQ|3jB#Xc#0Z*<rRiOc
zPmx6ri=&d`FEbMg?jKbm3e2r$VPMz8ztnYZVD78jYu&9W`cSOiq~!3x@I`+J)1bJ{
zyf+EDl9rC4v4zSfUZ^Jxs&#YDh19gi?3dVG6PD@z{l@l}S%7wwatT5eDTm-qTxPJD
zLFP>kf5$-Gej@9PRul^^is+?zSMMA}>u;wpE<*yJ@A`yPxtsu^#th(9ULYCqJ9J%Q
z6;&n<*{ty~B|5WNsHaL>(581Z8aN{QT{DI5IeN%uuw!98;nZc&JAzECDm*hw_e3b7
z4z;gT3S7F*;WNPfv?>V>w{bGAY)tG)_wP+oG_aqDm!&nWg@@%B4U)Pbe%tXJQox@_
zW<KZvwQuqPa?0b}<~J%vq_>*xBSD7&{HA~zviMp!tE#e@wLkKZ$t+9lu!3Rica}?7
z9$Se)q6#~@7PAL+ER@^s2Ll?+*!8H%>Z!##9!W`AmG4SenF>`v3{Y_n%h+e^C7Ho{
z+S;)=Qu01iT}zDMK6a79DQDoi`~6%J#~edFnl59%|6)TPRsRIRDt>C}>}8MC1-vbd
z{?1ItJ)<)XqyQV#N6hubyrU>VB%TnR{l*k#f{W1Gdw?Y>2y<*!qY!f=gAO;Qt`?As
zN#0Bv!W`vVK~WMjhWwNU&RTdwT+uIzv>^$LKy2KK!=#Nz{u5C;Aj4L-y%00XoGp#;
zH!`@-<S%tu7IZ3ZdiMI*$ciK_yv7S&j-kHRL(TP@&7(r*nful#UP)17SEzO#!+7=i
zWUt1<qM>bB)1+dTBAP$atkW8*_3*LDfk7FZRKM81hNh~t8O5EQNOoQ&j=IvTf>5o?
zr@t@cYHFHf4Mn;d|Glu+G%GWuP?2NFn>9Bcupp0)SXrJVjo)Yoh)raxea5yZ;KH*v
zZQeEgmv6{X(RS&MVhzOD9{J)0DR`4pi&O14R5q_&m=pwlU1HAhsfV%|pln&RG8-MY
ztwUElW5>S9kG`5#d{ERFOl|l(^UGssI?Qg27Wm)BCXWNz^AL6xT8p9NOu6C|3S=ha
zv5m~t?E_m)%^?iPP;050F*n*5-G>wvlTgyuRN?2+VtZ->Ef8t4GQFvELMw6+f?0Bt
zc(8kaDHd7IaQ#qCg^kKC^+z*pUB1Ol{=iwo?Fex>G5iIF;ms?JLE`XbUtZW|dV#%^
zlJlu;B?{-KxI~IpRWcqZ-USLXUA(|10WXrokW~#8mIrAOyS#XHY<{=^y7dK0MV>Kx
zsKie8dxoJuphb`{E(;VL8ylDkbi)7-X-3YTiz0Dy8mi!dSo?L2`?yV|foo*AjvHWT
zSOS4Mp$!{&`2iKM#;Z1M>J}UF9gIz$7hEzV;}i%3s?Kg1`-xPl{@_L87#}AAI|zf&
z;8}F}ZN)=PxdHvjD+%Hwu<i-0*bLIW#F*SNW7%I#Oi{rV0Ib+C<hS^J;7LgDuijDa
zAm*fKaoE3{sHe%M!f>{=DrL%szY8W88nj5KfM43MR1fu=V4S(a^)ddcN@d`t5*`!F
zNY25E-8Dyt{F0hjeH1SA{j4sU0?g(!_?JNJ&LS%zf}*t{C1>#zZIy;Lhzu=}T<Qi6
zmBzC$(Q$9<Fh|Q>WCCDemTsvu@S-AS(a-&Ypz>8HOM+{Pj>V1<a;%P6`*0`{2vKNb
zrNTWp5IPi%^=F#Xf6LexwCYo}xNg*?aykmw2O9$!B&7=E$d`z2|LnN|n7<<|?#O2F
z(!szGibe|SD-V=nRk<RKe&3yy_(uiY9@hS0gMVQ9+g#}n2QLHLTt_>9qRF13Ruv5B
z!#ws)Ewa~yGD=LA*yBpMM^-M&W(;&KpkGQHgrk#;O8qC|qW`d0j<XRZP%`e;PuH8q
zYq9Xc3?({9gbcRDYCl+uwwHjOb`HqOWSQOk;}%U|=`H%gD?fq+xO9u7R*u42Amj%1
z8uoo3-5u4rh`B<U6>&iQ#icB77;Y3Es2J~xQ~OM8{;VR0(;-u2IMZwttHM(I%!!u(
zJ=jXZ9mI-(RSfu29jYE>vpkQsf#`Oy^+!Ieq%&T4=kIbVhT?Rb6Y_$sDU)3i4X85l
zOj2TP{0CD@kYQXZk{yEeLlZ&L8v8wZ*3(`x%6vGPFbCe^G*d$qzU;fYgA78>bE*J9
z-q<t?4p8iD!X%KCSb`dUY?m5on(#zaYwccCM?)!$VJ7jlCMdAmi|5cEVN!m?(1CbB
zRDsUESF8BhQ~`rLq}k3lc|FjftCQ(65nv&@v<J$vk+~}rs)n?HSKl0;GbSI!x}Pyf
zB1aLl;Z_pI6nxe_u;mbKlQRCznYs;p&%0NzPcgo?6<Y%I=V%vQWsZzuUA?}+`kIst
zgiDz0s-r?nvX&eB9TuYqS^TBrZyDmV9O%)K2O{1-#vnBd7z2V(&z(qg>q3~z!ebE>
zT(5;={&3zKl>ni2HPqyS@_#?o5A``NGSc5j*K?<*qUU2O9NH%R1M^-?N`%Tg8V9=r
z{0s>`1ZFVKj=}oUhELLI+Rv_N)j($th!xM?je54cSd~!d!8zg@MKE#?2ppF4Uv?H4
z;Pc{ec&QO(=H|o#M-PoA><0k&476@(UOeL7#%*weelKl%6^pPSf21S`5CNLYmfN;P
zhSb+~s<v*MXqzcuK!R4!H8Gf_l9XWMR{@9)01x!!zKNhuT^4_aIZM~MqNqiga^U^Y
zLEhwgaiOG8l|o06YoK9VFKsxSD=JuIEmY|s=)Ucb=B@T!+xEitKImn#hofTG&~rs$
zQs3nQJQ;b^47By%7sO3@foU#ilw2AF)txrYR7UlQr&Zt~v>joWG$r2B3xRi{EK6;^
z!c5FHBj3*u%)Dh>3R4@5%8GpU@qz{REqMVoq*y47gnjq%C~ymV(*v7ikYwB4QF&SV
zNXg!|Z!Qr@*Y<-n7mb-O7}9P?&ks6Gq$BaCMP)avB#$4t%Jlx!#d5RsFI7dR1GEmK
z+}kKx5+JAm0E1q7XEu_a48S3`R`}S&?|8&J;F7i6L8d8Z5^Xx3hfrHOCiXDlY9u;z
z_2Hx883u2cjG;16@`U7u>Ldhvi8(+&CS5qv%xElI6Oc`TI|65=^zd@gA^AGu2#}#&
z+}`I_n>G${KB+KGPlFm=Wc(^>qGpA0H7Dtbn+kdnWtB=+ET4}!>jPZFD8d~TF51)H
z=R8iuf--|08;^g6?kxZ<{&AeTxXmwt2bP!_l<9}0<utCoW{|0Lz`w#M@?ant;_Mtn
zB8ZA841tan*kOUrCTPC!OcVu7$oDvgC=HtacAZHmIg<hyuL3`y?R>~%ubFG_vAGK#
zy^bf{i0vUvLch<V9}CtDVUwc>j~Lf~TNa0U&0Kdg{+;j$a#CX)D&@58j0zN{t8nBX
z6HJf*qH~uW-F9qJ6OZ4r$E6fI(MO8NOcFcxK2&vJUF!#4GPUL$&I$mFT<uvJ2Rel!
zlT|q4;#;&%K%4Z7F5D(y&!{hqrEh5F<~A5;Z1k<ILWSv><)I)G(zum7Yt4TkbC9Ib
zR6Nm6K7C-Qz;3F54t2(*rp|M_Yq}qQfwOZEsS<FvEx_ys<wH%z!4<TU)7=VRjl}Gn
zUv(rC|NTCpK=cg_c;>-+q>?@`R$y0*XJn>(W<x`Q7~w<=qVmwZS@csrntLe8F-TP`
z2>ldY*#N5C3X7FgXE(sXWr;luB^A6+Y%Xm(&>JLul9W#Zs1Nwm1dqbPc5DA6o9d^;
zG2Ww}MJyL97O@a|_G8+V*fQHZtGX1RS5lOJKOC-8?iVjdRQw5rYwTFRtL{5lD5EOY
z(1`<tPN;ujNaj`Zib`j3Uj$U{<Wwn=doD=B{zw+i1s4?vkXybHa?1}J%IzAo!+jPx
zLpDoO>XT?q=eaZ52O9vdC8Qj$A)9S6XaE!|$t<iv^u}d*<ESG$35M`dj8t???$*ai
zcpy)<!lr>NNeVZ4sWlMe<B0i2K@|JC8)22q0f&(Ubk{ppiY9}u(mP~haihG)@~qg8
zR(x7MZp<9$bwx*!JP3a$mXV!<4M`43b^vL;Phm*JNph$n7ab9FSWoTf81fFNXHfPE
zR%ffIni@srqS3&pk=q3+=OnWR>=0+qZrBWr*i$n?Fi5qdjPzm4l155{N`#|=@K+qd
zv(whKrJ<CBhgFUAXbS9Ov5iiXctCi5qG~hpld}HXe!1iu1oF@mEEvee03M#iebuyH
zY>qV5F7kNo72HhhNR!=Y(pY8d`=JDXnL9b$LQu!B-MViOoXzKTJz{T8PccNo_s}XW
zU+IyvIXa6COXL)46f~OYO_FcVt~weK9Yq8&I#XjJE}t(xP++%OJSIk@_m^I`SrAYD
zhhlda{wi@!GHu(PxFaJ+kV0rMo7*qwJ8pK6^+ur-4q4jM64RoKm|vdlQZfeV=1%rL
zjIUfY<u#2}2)S<p0}AF$!9!$_gOeubqSp%7U3G}Bu-WJ~1CQD?1gQ&Os&C1*Os_=3
z?3us|aQ7bv;@#|K8L)Soa2o3-WNR6PM$Mv65y?1yCVf|wz$qRX0d+b$=^?a>MsTMV
za$+;q`_fy~tgM!6Jc`c!2ni`CZX0!W)}W!=c!ePCiv}DXV<f}<{7q=TP?b?(!6wz`
zO}jWQ8S{U&_trsiZB6?q2?+#GAXtz|f<rR6TX1)G4Gx0_7%T(}?(R;|gbZ#0g1h@5
zg9LY%xs&t$zMFG+&;6?I|G!yPd)Ka+erl#?t=`?cpVe*3DIL&hFPmd_^@Kgw5xI+^
zPU^~#E7cC41hI~7>faEE5Q)&_^J^zX;bw|{C0YlL>BNRpV%l~M#6BYHLPWs`L1u(t
z69pYY@H15X8}r#`ArPXViHklu+S0Q)`{rge5l{0i_b>%Juz%XJSdO+hAAgR*%KPDJ
znvv;zDTJ#Iib0qiJZFA8mSFKGFUVS<G?EFATprMs&c+yFb&zQqseeM*AV?+7Nej&A
z_OYVr{52vFoGFCxsXB*h*QX5mJ_N^XJKzyfv@-Lw1yw~!UPotLM?`rX={+Q3#bdXT
z!?>Zwa&?TZQQ+^Gwn-5kKY5lTzGn2P(EI7|eD%%aO=?^$ucNItBLnZFF#ufE=X7Go
z`}%BaZ*_`Ou<_<n0|eUJ^SPSxf%ggC&iUN+3AT7%&TNvne{aJR^G`&qUDq!QLg6uH
zo%v3~+T%euhJF2gQM=Ua^yu_c_Lb(spIZ}t_MJS_7hglMS>Kf$;`Y!I&^Me53H&C}
zyTVy$BnjN6d-=mmue^Qr7rvbyOIQ0+(|hQu8IaCa-X(gvl$&yd$s+YTA{w1O@%u5W
zTVqXwdcH(pW*)gj`k|ebH-u+}pDIqz4jtpuh}l+M+NXd#l*%VE*|8RO8&N;ovut66
zC78K5{ug8zz79CQO~#{6uS2ajkBzAD8<6XW1N>v2w)9gOO+OLYau{G<ZY-NNGMy!j
zNg{&S-Z<|QsBXFpRJg@MT9<Z~R#SfvzA|(+L<K$ilCB#*gYSkCT=K-x3Go+e+H5gD
zdq6>zFoPk2*0Ch>+A#3R1B54crHjkt%P9=NT=p=POEMfYj!c?yQo<{MO7ZYbXw(9P
zaVZ}XcobxyHYd!fXY1C&G|NeY3Gx6jZU_Ug9XjFfFi@ej_-&X7;mes3p@ok;Bkxy4
z?~RsP*-N0Gd)ig{C=3JN@HAH$MO@O*q`!uj@)?uG98;$iA^=6q4uV%O@cg4rN&m12
zMWde*L#tFZKV?28D>=I(TtHGZxpw}CpHzvL*P{`0<L6JzXVl_roi<=fIVk|9`B+&<
z`!pF-DtJaJhDb6SeUhwvC|yl>^kxtd$7R0URNe{xqB{NcO(*uA$nSW{HXwHX9Z2fy
zBcPBilPIVJiP+<3R?U;BY-QRzU9PSI+(E%GP|6qc*<M8x|2#_h(((Rvl+dI@szacN
zzpGN+gGH8i)c*ULbwr5hB-X<JbGDyY#7hVK_<1OrK}Q^Z+AoBjOQVQz>PE?3mBkMI
zEGloJe%q_mWlBzaU6ddGT;v#04v65ygi@vF7U?=RTXm0WJh7_qm3SCWmr&!fEPo<D
zE){x&qZ4tg#2Z~_S!1(d4hL2D&$-s#0Yj6hew*VxH6MremPlI``!O5HlHZz2C~hH}
zV+UunKCWjy#XN{_AP9K?!sUJQd6s*mrXQCy)(zpgo)|YGoPDe&$z`Ic5$a!J7Q|#p
zd!l`08e({l#RJGfAd0Yd^X~|EKP+<239YoK2=IoyJW0k?HV0t|mP1DnF+2N3#N%#6
zn9<7IHuUSsxqy)FGhN~*VU?r=+2-EA-SwHUw$o+Yyc6{k+#}}T41@)6h#_(M6&=%5
ze$AdRclz61yFmE5{iJnI@@Pgve^SkJ`*N?BleRa9Z2HzUbuvpTiGe_1@cG%%(cNXo
zZO8W^fySn~=EfHAX~|qHC+F08i|1Rxou1oB05DUau9>@`?(XjBY;SA*s)w$ga8dUz
zGIS19UcN66)oZTvxyqaVCezY%wZ40|xDOKGqBB3Q0PXFAp!@Lc-L;PGKAlTO?=VhI
z&hu;d@CP3!clbh3EDj+62=Z|{_24mYcXIM@arg3ZP&ik}^sKFGYH6yiFKkh$AX?jH
zG)QDALhP7W)VteSIlJ3iofvSP!W3N}_B#=3z}+1~b-9BDrOtsCK!-d3zTd`z|LdG0
zVpA|GVmHq7|LnJMvM_RdZ>r+#XlZBuS0#eHQDzxr=I5vfdI!AzvOLqOsLy)xMTG2~
zhP6f;Ag&OpL60w*G=ybh<?2!D!(Tot`cFzJhIl7KtK@Id3yKVWkN_gj``|d)0eD&>
zU9UgAV?eFgI3hw@vtm@lM+NNcm)aUV3S@|!g5YA?J^Astx>582W@QaJ=?)2=9p1xo
zM#`Wx?10MD$_Pr%4`K9=+9d69JYR*CMu&TP$4ZjZhUw~Qbhe&F*P};l<y>&GqCdzD
z6w_71dBuevM<gz?7|7(?K$u;rZZ`Ejnwcehm|CU2Xaa;MOGJgSg!-(KvQaF*Upbg$
zN&z_7lHzbgq%b$|=G*AxOX6lNQ-NmL21d0JNr{@+$db$2_<;2E(@|&%JC$kNXoJ$I
zyEH(%$0g0#?oAIr@OzGf56@1SEL70e=wXRg@zsm|Qg~W5>e%%j{H3uV%Qecs?RaW6
z?uu4L6oC*WMM8S|pMSu_$i_s(!pPF@_hxLHnE|^w^{0W$UcOMjEH4UdDJ4@Q-caKZ
z{dXP@K9&7MF`fAumeX`CAKGbgnI`v<>G@?w<|UN7e_%r}V<>e|0W?*Z*tg3e;4(L`
z=?FhMaJ~dL-0n_g`Xtq)ZeJfB+4HsN%Gb;ZDopKeH0-xD)Cp$n`+)fb^f*ra`%XVx
zU)<eY!R%a{eE9eQ6`A__3W-;VjEtb6uJfzyxv7QI(_LrJT7eAt{h<xLMVGO?I`Xeg
z4Rwo+3wA!6HK~Qh({_VC@Ve^{?ic4rT>%bWMlH=uWsIbggjSmH#%)-3-<9yV>J2=B
z7BUeJ>IOujSs>4U<V3>$jJ*1>F6h69`^SG7{_ekrGfg{R+y9KlAa|T&@^$`xk3O8=
z$E0(neWMJ+|9A9bCei#gaF_sw0zW)IULqYoSoae`MI4d)S^qJ<Z(<%}9zVP{a+F`l
zE77N`Z;neBYVdT7S|-R>IoIc_0Mk$2Qb_|wIZLO9joU-9m^u2V6?5${*NA4B$4!29
zDMn?#5)liGz@#G9W*ImA)uR}fO(&ujSdRIgc$Q`S{jWa7q-<u9_kn|9BVHBkBDR50
z%nsrdR#o#~gNhm1{32eR&|<Z$Y$1_=Kx{0k7usy&7QaRm^RlH(Se%?KX~%{OvgJiG
z1BJ1|U(mBVThUH%B_&z0pIFgO4wq(Yi_`=<V(q@zVn4C&UvCP;!YAS3h_f-A!OV*l
zs-+o=w+j|+vP_?7pSqH6E6Gwvtp_jg=*Ik#R%e=Fk=mj*?&*A^6d=V!)2ijC$H1xA
zP;Z^i8f+dTPvhV!H>Z8&f;VQHOwFwwsVyP)mO81nqO+~nD8zJeYFpPKC2021d=)S;
zuYXiqA%ts+J0=f2YD@V{(`oghXPIpt*Oh>SA&3TVBu`GMDMG8VOfoJh&j4%aR+dxQ
z<D=9&<c>Nht=p^3+${||a}DVse7sAe#bkerrs2it5|>2Q{?@F?R&#cOr>^4=oiK>2
z8kvo;_E8>|)NHZMju9m8HT&KxrZgUHXx>bzvL>K1#;z<TBK6a;_NY4Lg1k16^i^#1
zh)G`E#w!Yv#~tqZft|gTuYN(SCokejq68XplD@fj(uFHE?+JMI*8#ahCIkwSWXaCI
zNhH%>btZg830LzNH*F*(`u+^RIV_?|y=H{6=$Um-5Uaz<?k01wBU%|pJqm-o&`h2~
zmXu2Gp;HvCO_v!h$Lf$AX@=E}v>1To&<?1B$j?{Jm+DaT?NaIOCXkajt>EazE%B;2
zb{08S{^TB@0c<)tfNc**zL;yW)!U=aOA1dlj>K(}p>7Zc&GNrK2nO!zUT&!=usqpm
zeX-l>GsMsk35PdaWBi*1au(zUw;?Rg?{Z?~h*DxEE>6xMTTR>lsVHXY{NDCYT?j-8
zzW-ZJ452Omq1KRx%$O)k4NJUn^XY?bPop>8v<Mo5YnQ5=Sy($PRZ?iL&IlA3-s~`=
zUEbv_2)32(CC#X?4Xd)#hDCq@63BuZ`x6AuL&KZW>2iE&SaurLa`=~R^4}b;;+5ox
zaxewwrBCiqV<s?26ZM&9k(S1>MDl34R#9xZ=z*N?Y~P(56^npg0<KWTFO#K@;2c<f
zt`}l^1WdhvXR`~Mw7PAc{c*M39FeprjfQSDDaG>`h(bbLJ`4Xk3k-e;At@s6L%(p5
zkZAsMAF@Zd0M73nRo**0|MmqK?_DzkjV<j$j6NID8cUiDzn6TEsJFyId_*Mu(YU)&
zHH!<`kHin-Dak`Mo`+fmn1k;KkdY}c{MyJ+$Y0&TvHHU>(=fl+%5f<Z5fM)lzfK||
zPWniRr-*NhL@})Sb@ScBpW>*2C_o{;XLO!`77+MRjLdr@!=faW7moG6svisi4(mF|
zCNpY$@=~AJgV(v(xH;ugx6gelK3O?K(>#D^fL!P19f1MoVawR8+2p2q5?<90aikmD
z^cNnT6ed!qj0qLQl{Bh^+;p6dEk`}W)(u(g6d%_d6+Z3maws(6O8^`oW?Etl{G~TH
zJu4|~jYL>IhJkl2V^-)m-YIMzw{J)6J1ncMXCa(PW!!|I*s%{wl*f}IBX}G^gl?R`
zb$yRiiQC|>+A5sZCVEn#hSJlY=*4PYS5pKC;x>O_e1e{k%n7d&m7j75NUhQtUnz$$
zS8}r_o62twzZv|zU}Vo;In%STnq6O^MRzFIX21ZiIU~6sY62ehczs)*HOMI9cE#F-
zopY|K+HqGOp4W7NGWQ}ed5;cCDqfnG8+8dUb_@YO5cKP>UvGri4i{|QsaVe|^e4x5
zWwv@b)Qzx5R27oH`9=?P*>9|u2Gb;}TuQt56k0}Shu#8+-=vYSm^+}T7!-N6wr7R!
zmH@T&_UgX9*Z#D+YAa51WV3I#!hhUsdIhc4Y!zR3#k*VJec^U}xvyfIp2%6+t-<7G
z(BXA85My$e774wWxBk-b{Pvf0hgRlz2ab1UEWuJ*m-edO5s&#+oMlKc@mXKXmTWqu
z?bn0&4%lr+7e|wHar_R9L4S*`Wt!*euIx5*xBVfkxeFjrdd*ik_M?iPi<Wj@4c5ym
z19yO5x7Uk;+b~k{yOsPTL=4#;iU0FZ^z30)HNRlqF1aVue$;~@gAs<zOUJyQ$1IM(
zx68VtT5svA=Ed^uXCu}db*@hroC}|*P1vNu&of;4U2g*y6Q>u;lU_ByUN!j<Sy%YH
z)>IG9;xyUM%rm)z$cL$&+&E86fv&BLo%VzKI2AC1O-D|-hrF1K%08@Q;Ir15w9{~K
zYq~)l!JPZOI$svb@Yn)O^?NT%z3jHAZeO}Lr3g*tCt}5u=*3rm4URiv8MI`6YHb0h
zeSda<HZ)ni|Arwi9>+Gxu~kD-Rd24XYwvZUd$4!ZXvz@WO%LMUhFQS#UFHgaF-mW!
zB<s!V6tI6&CN$>H)`3$k-*|AFhR!<oa}o0sgR-$f3!zqQ_TFx-1^99-sGgOh{Lo;M
zE-UA7GTX*sz00ym`c>F&m`u{B!T06~9TM-I8E)%E^+-~&l0HjW(8g{3jnl62p&OLE
z<UE!iU7n{hztGj;uri%@B-*9LJ-lNT&UoJ9(0kxr;xeGi(Fod9@8kDKT{||1l%8?&
zu~vBaG|eoheH>Bcd+{*$71N7{BHReW^KZ8%y-y<VLJ(zOWf2v65O(Qz^+6Mmo!xsA
zXG@UXU%^R#oSgOeJEY%L9gT=WikMPOW8^9UGcgKJwY6;~{M?e{%v{^4lev=hv_lks
zI{AE5eVi>ZlG-E*mnpOL35-pcO+d#w8I?(T#k$|<o<Vur5zdF~)LRmVk3FtU{|NF~
zD~7-XwOKPV3QLGeGCC>@f7~|}@+s6W2<M8K0sRF=UD-pH+#*u0XE{6c()bD%^2G!U
zmaq21h=zirR0PzEV^m^P@ubBv=rq;jsAw!74B^$g&r?#;c=9kP^&iE2c1|+;GR@cb
z>1AZYQ0HTqX;M!)i7qQ+&h}yW0be+G$+RTIG^tHIm#u)4$10(1ldtSSPi<>FDyrq{
z=pH=N53cG8)t>BIMMAuy^#zvQOibd{;crbzyoqMHMkyL@u|Lt}=cnd7X({h9N&->4
z49?_Rd>%h}l9Kq85)$IBJ^7K$4bpPFu>l>aScnCnN?b2u*SKzr1<hp+a;AIU*n<x)
z-OTsT&pe*|G^2yfKDFmrQEijg*T0D83Z%@4;7-LiUqHPzt?g~imqjgi<WRutO<(hI
zPLN_xjxUQ3LupT##Q6NuFnqvGHa$QiOd%*tju0&KCPBb-p<ah^{wYM#;-RU&RGp;d
z8XuU~B*)b|;AogHkud>zsx5!m098Tg#PdCIV|h)BZ*NwG?qt0$nL)dqbu=gt>@PdS
z8J%r{UuS0VieFK!Y3!TckVdK@YUE+8_^<7Nb7G%`{Rs4}&pPbCrr1Q4Xdqt;r1z-B
zQ9gLBYBz^Ef46O@CIlx$q*@NAS`F{lq1^7hIGJN7nqt3D`lvS~;tL?|{$)XM^bkN1
zY~cMg+PuU{1WQ~6tCN4qa9+>v5P+p|r#P|n?-jSuUnW19ML1N|h`{<O!l^O^nJ7Af
z?41~m?CpPtb^q6m`n}?jkn&Vo?Z%m&Hc=enw3@##mr=`(hZWC$I#c{9oq84$#yX?P
z^}%!~P_JB9=o70hm+BVkONXt+ugZeRG@EuRAzVLf`Sv$22ac1-%IL}1ml`XnsdBkq
zJtMModQU*d-IuY?PZcd;<d|LN8&}#IOL2GbAeCFkPvo%xI&NoSo@i-T99T=3%9GR(
zFRHCx+K{XMag|s$WtwJP>5rES3Na$+43A<(nl<(xNmX#!9T<o?G3fZ*@###pGMRU|
zl*K<MDp+NVbDo@gm}|~}mNN3H`%?pVcfLyPI$CZ!MXF4;5w5@cD+sDMh{Ogx2g{Z^
z?l_H|%kEu{OSHzmunj6D_$?pggYxK;LjP97lKj)4^2sWRE>1z9x4u`?-G?5FAJ_UC
z*k9VU$qtkxo^%)lb6#auaAXST`?$4S9^Jw#EcnlI0~_`Oqv*z$3<y}44{(xqa9B4>
zPlHc@8&pVVCJUlpF>-^*2hvzBwx)e+ImHOQ4vtg(g3GLoCOzw)gM`G0yGJ!`33qW7
zY;L%CI{3_-WlRct8xPH0%oHBAoek`B%rqiMNMETN(x=&{+4yS4%DVHOn?1NpZ~|Cx
zo;1P(J)MbY6k@)5`jO|>03zTBd;Q;a)*g#<bcrC=g9+l@XMbN0CLqW6|93(BXEFRf
zhQ%n!v@<;qSbPi@>G4Q~klFcHBggY|KTYHE#OPTduo;aKZMgLq`%n^pxn#1(ed#zz
z)p1m`^$VjkHXNvw^7OgydUzDrAZKcTUO%hbz2I>P2^JNPuSnn4S;11^QD@tCTTD&H
zRx#f^vb6$H)P1au=|g1N^_Zn$p?P+7W<bP;NGuq@28WAc6`SEemTAZyHEcjq)I4vA
zV$^fC9`#8?Hp=5!-ZW~uyF>8T-dp!<4QhF++a7=UjarrJB|onENco7)YTH<jeG~s1
zI%s*O{#Eg}@r81(tm3C>ju`!%)y4Vf+ibFgZY;YTnyedBMl-x<D|c+{`k}tlghXFo
z<~$9?ehlo@xjU%U%BH8#B9C=P6sC$9c~khk!{hv{;#%*gQPv>Zos{nIOxpUUzuN=V
z$IVv+6PyV3{rAl4K=GyCK)l0)n0-V@v`Bx#+uwohUq<<_==Nm}B@%7cOT;Jt_m^p;
zoVEY&I?@5RI4tKxE>p#!4h}s8XTvg<*;7wpFPMm>jOpU;&c(sAXKZ||{nsNI0t0ED
zv>WW&DDEjjIi){^=1BZ%rf8H!R{XyDC5_Od5G;@HlFzmRP@nnZ{2FJcjr|!AaokJn
z8UKE9+^b!xbqJq(hmlZO<2vmikT9sRx3_d-F-p{$<UOAso&TETm&90>w!&Ikl1K_3
z3Sm*#T&@Uh`Upq%UKDN98H#vo2<N_8pk3eb^=*#0PoxfHV$%mGE)zuF6(!9U*Tpf^
zrm+1GV!a7y3g5r_Ac*+&|E)xJI*fx~&ybK(^naI0ysyOnU;5igvsR&(!VSDGl<n6r
zkx)FWbTFevdVKgWXFg)_xsvEoQPE35Z`VxD(CXCG6_>$&+(&E+dg;mWh}WgM%k8<~
zVxwSF#?(;?lo9HuactKV;ktQyFbmy>R_Kpk4}{v^1n}Bc^q#WrRX1TYc<e8@Z$MjU
ztE7_!n*<v{&;r-C2|Q3HsH?=|9Llc1i)~x}*(x2^RTo}U(_M(Xz;mK=_7g<PSm%gQ
zfxYM?eP@=Y6PGS}t8vkpDIf(3jU2?&hw>iPOIEJs??{h&GhXlq9u>H*#b<fL$~=fc
zXi`3WN?J==60EIFwKcXU98D^{9kQb=Ec|X_j99xUF^mp2BIlp48R8h)FCT`uc*CH;
zOs;%nZiDj*>>3wHEm{X=+?ZnVyGs<iR7vlRU7=fl4-CoS^;uv(W79(yA4v7*BM;bn
z?jie)!Mx_YHSFx_2<jBbyNvJ8sS@bEeO?D-tHAb9PA$idh^@YFd7V4jH!P0@IUzUm
zTI2=D7MozC>nmMT^%B%NwJV(@G*LoAyGi}D1AQ>MHH3@V%(05ZhaHJCB*wp?ui<;w
z&{kpi@VB5ZQh94+EoM;@L()Z>A6AbR?D1Qe(UweVi`}_M7f&wAt>bMatPVb)0@Q-T
zPF%(`U&wJRsTP>}S;qi--LZ+U#_IzEi|uFf?c8a9UYtKkNA$q>ILLDP*>R1aSj$vK
z8Q3!xG~e{GjVD3VUmw+$x}SdHtP)*I{RO>)MZuCcQoKh-b3W+eEZQ#z-eRaey;c)e
zT{$kY>nJ%_2jjk)UoQ<+#ox;uk+<EpEtHHCm7kF#y}fgbft?a0{{X*`oXC#QyX*LJ
z{)$JdaLvkI|J@?lwjZ%6t;+c$9%a3A+L}mz63LicDAw!_x>Wu#B}Cn=IJlgVtKuDl
zA9?SzQ7D|f{Pfb+u^B6ot1IJl#kN$~c39kx?pHT@v826p@QX_IZ><E4jLZ6d;>amV
zd-&OvGFn=5#3?eb%p(;DXMoXs0TGeQYW|aTn-#_a2mT-E4?&qJ#W!fXDQh>R^g8z1
zeu;c*+_QrtIfcWuIeI0XtYUOp+sM_=eJfY%S6m6Llrp-VT7vv-EzZ8!LU+m~f&)+Y
z$i8<WTX~B008c|x%|u;7?-ImG#EwVle&t28i5q-&l6^-ph<E7=P!8>;ppLq1+nI1E
z`{9D6r&dR?9W~?vU$KumRlBsyPDQ<|isA`*bmZs7B^uozCG$pPdG=-v-El_=c;tI$
zkF_VY<>HTmBbVakR;E<*=Cmxg#pHWy!D*h@F^$WyxJKM!uI@mo>TR>+oax9c&ejJ|
zvqNx9Uz~D-T6PM|D#*GauE?GWr{WEHcKE<lf1j#FDphZx#Q>Q!gxgx#JRF}J;vl`9
zVXs!5@NSE;j1Exj0Bk*F(Uo^I9s-BltcFx2G|mFkVjxbqDTxP`Dt7S-arHeKj7~tg
z{dfF9z%2{#Sm&Ugwng6?NhnM2apeU6n;Ii;om@{eC`R1&p$$5Qt)ehg>xTYd&_RI$
zVpTa1TQc0RVda0!b3RAfp{4JM&&|fu!wD%`2)R75E$44@2~cJp^BS>Xc@I{Y<MKqt
z3YR6`J6UQF1L(X9d0$i%P;B4m+xz02mmpNv1ZI`1wyFbj^(#alw~uz3*n3{ar<tiK
zy7f8SbxUq4Zr-<x&Na0=&pU52k&5)42f&Lmdm(Q;;8eJWvHS?db$mG*zEZ$Z=ikOW
zv=*Ib<Dd|y%^uC)H^J+zQ5Fe2&<$XbI%$$+*b!OIDD1WG9RodAW6N?zu@w{D(7m*~
z6NUwX7pZPw`w=E)=Vv;8hxq}jQ=Wimu1MMU<%_dQJm{T-?h2QQe6ukM$|t884trEm
z`A&kuyle0n%aeXSC_h-%LI0;Ry<`qERmj~Qx59A+D^aBR$ToW!1wb2i88V@uoA?#0
zOHfFsxpaakUr+T4+(Nvj)$uKay*-U|5qR6xQC06MI`frpW%FYFYY)3kGfsX74Zo?)
zn|N<F=*1n_k3-d`Hb^}NP6+Xtd82w*alHLDLy6n{=_%&SMhJfmb|Er}oR|izoxN*~
z=8Cx}L(>yC`{wMWfRsp!0;5LbXb_b81V~qd)$(MB;mC^#UgYf)$luAeH#ZV464JFc
z!`8W*8;n&7`8Wx_@vd=Gbx=+Q_k_x?dkD`A!Acq>!K$iX4v(O#^sk3}JA_jwE1qLy
zWY;|Z02X&A45HNzc5DuB23%GgHrfp>I`9>EQ~e@8nE%$HGRJ`p81^RlC4YP7<&0lK
zr(U*1nA)A~DZxBR;HehLGR12~<34NPxc73O`D$kr7}Pz%R>v@}(wG>iACkM}Zn@D*
zl7||fSXPo@RY84PJnL5Mb{?{k#vdHHVtLtdV2%gwojTEiIYl-Bvi!mo^{H<|yx-Lr
z?E)LLgg#-it#VRkuI!ksS_(^P`R%|k`HSQ|#`oK1D~6o5)AE;dZ3@EDV0V>2TuuoW
z1VwQMvGX+IL4()GyQC*UWMh$zC-mS%??~BwtHieMx~ogSQ`A$!O%;mLZH))%F9qp`
z0l3J#vQE04`un9Vrxt{Vv1BMW(h!ZXZOheIAC-dFErq(myKg)c?J`8A7c^QhE>J#X
zJ2q#!c(d?+MV;~qnatFMkJRe4Ch8fQm*08BonDHIuM+WV>@oPHlT9bDWUJUXX-2l6
z#>jy*TS{I^bn*StIkEp37kiAf_Si}=Jb!zfp-4e9vKjE?1g?8cfO6Y?PI1K08wwb}
zPBVyuqnA%vXuV(a_O$<T86mc(RK8soa;bExFk=#E?$x3#?5&|cl0yQxJ>zB*EIBL=
zFRXtT<Snq*iNAqbcsVeHP|)?y8@=|GoF5T|hueZ3Dn~w4HsWCWGpF~ry=xFKw8oVa
zo2wx_x%r_$2wwSc=8nF1OE)v_k&o7Q6?4Ma&e+H?8R!arc=RBgaHrN7JW~6i-;0bL
z*j=FjI;RK?HM2d>IUK2F6a=8n1mvoqho5%{>BfsGwANnnco%D+`xJ0x`_qhFtgSo=
z=S3?iJki6dk~v4-oj=rQ{({*R?lkDVO1R32pUA+P03WX{JXC*@b^=>L3IDF<sY~2)
zYM3M&NUCUyd(l2~ow^cS;;U!P7fnVVF>*=~OJmIyxFR=GX#AE?1G?;?P@2};a3=uO
ze+!xa4*uyr7|zRhNm3;;d$Y`3=49azV_mr4{gmt{*-`u0iIcQ$?)-u6t)ko6kzu2I
zx&AW<oLsPwt1p}Zs1VEOHn=B4;JU!<Ez`^zQ4HS!pTg*jL_#$1C9FI%tD1p#9vFh*
z$8D`~J`raPH{*9M?A<#4<AS5S^(W8ZNo{8FLu{XcSAjfeg}uTu4qE~aifJ^J1Ww|c
z44_?rcXP$f3bq$}<_cMO=Ge9Qqb7P(XV~1_u6@r{)5@)8H>2u^-_^A#oVLFtydXdj
zm`k(l)Am3YN=Yexc*7jG=E^x7doT^G*6aXEpH#5gtPJkFUTP6eR;rKpZEE&TQ%G)c
zIy8FTJT%cYz$g~22+Qj$S!5ZcwZmAET}wSONFn56xvjRj!CJ>*!+G6g&1Zkcs<mis
z0#?!|o2(gYna!2c!Q8GxCaA`_b+}zjf!jtv+Kb3AYfKh%(5m{l_@|(~`&Jf8u)6j_
zzs7ekkA)*iqZF8<l`1ZepcQKFX}o~nC81Hw==YN%Qhpm(d8iUttZ%OUFy5YUCV$od
z7N=-8P+m6D9w&m~fVY0teqb&Z)5aX|#uan%Xd~dp`wcimrRK|!1%KD}IsTEUY({Sq
zUvIO26!x|)L_pX&A23sL%)fk>*^U!(SCkPcJ0!duxXb=IHqLy&4mH<#{lfvc!84sq
z)ZkZFZ&$z&q??|m#$<eD>t%#JSL~6}vc=m_R6jRmFS*-`%p_OAtiIQP0Vm5df!3OL
z&hupLZ|AR2E(he=u>y}zH!nV!*=0;^!&LY<!x+O>cmTe~VKCm@3t27&K!=3?_g;X(
zbXc7I<i_o3eTRHoTIx@dl<>($6~78&dN{as&_&Oh(Yc9-mQiLf8`RQRUXoV6LH8C+
zZ$KL7Dlw4ZMV7&Wweg~3PJE~kW@yCsHmr|UgDArs`l7_Vw$VNV$rWP`gqe<3p>nqg
z^RvtN(RElqiNoesXZ<~E%3vChmg!EHiBF!;fgY#8zS#mCOwAB%VH70o+7P6{;b($-
z$c4|>`)2WH_VMc86my>~pQ12p%hjqy@`46PWCCFztdo*>*<OHYXd8`y2BRHRlqkO4
zT(u=-z>h9*q!*<lp%Qs{Al3n|sv&hF1wV7lv*rQ~NAZr5udS=05avG`E9e8WADuaT
zS2m{DOye)@7A6Q-v41f!wk)bWwz4iEfPwmKZ)2mSEuSqi|K>|@B)&XR=Pp}|3h@$J
zO(5PRUE$H#u}8;ZMIQ{pvSe=v!j4P{OcV&3Yb5I;8iW<Vvbafxpdo_!!<h%_m#&0U
z7&xgBHP+Pn4)C(meJq>P6Ee4;R5%7q*&BMr&DEHyp{&bhw?B6EZM~J991T801DM}v
z<l*WCI+ve@xA`7@7}xzW$x2r~oI>8=;<Z<JK(OD6)_x)Drq|)*7U)<<tBel!A@M%<
zZ01;Cv?SX+cF$Y+l6buAz_;n>UvB@YW@{p|CS(Z|?;~Fk8EZmR<DK5(a_7Z^jk~I#
z&A2ql-QaGOs9_l}=X_~pay8kQyH4Uox?=0zFsWvJIv}yOi+%32t(OvrWyOaxq+mhT
zkCO_Q83u_v_>(q!v&b{+FRuUa!6+Q|%vtwZ@Z8J0aURz%EX&0l%GBrU_~KFBeDHh*
zQm)SMqRRNpKgBY@Inrq$cicj#EHybS_QN6T2J3P@W7kM84sfXoqv?D_!>Q4NbR*+&
z;TR9-GLGTO2y?EX3)^L!1pUh5eZI{r+Y<UnV*WEsJ<_iBDsK(zsj3_!A`qZ!yLr|w
zfaaWfde`)3vcu{75-j#`4*k|`deJ()%Vrzr&XP+a`mXF0$6F8wM&3q88j*eZg&W8m
z;@3!+I4yDxHW51hs4Z7EpDU_kY8&8SzmUFd8<#6|*(G#I5y6msl#>YI6O)ioa8?C&
zPOtT11ijG1dEVKzzMWyFbv_PpgI;b(fO|d3#t*JWhI(4ox3eJF-2-DoQ$k(?JVl00
zi=Ma9?GM_vk<9fX$4p4KaO|^dN;%VDvUdEw>k-tjj_*sdYVC;}!RP**o3v?C(=pK9
zr+0bq0q#LIe~*3#>aup6$vMQi2K#J3h;`SG>?Vy%#V3MQP78j4za8_5Z++uuvOuTZ
z23PEyhVZ_e*Dg2($=vbQGlDgI<#FcsyN=UT1lF!sLni#T9?2XRo2x5Xd{HF>CL6oB
zc*oV$?@+)!n0MOY6L~&A4<@d`Hi=+uI=MSCFEc+nq}rv)bCJ6~BTs}1g<=j)$mb)l
zx{@>8E36|}Z}l9gcSla0R%D~O2TurLvcSkoZij-%Z*z#tDVS_|=8!`{MQgHd%cW0O
za^QI=dW-9e9qW8A42kp9Nr6{(fY|3xB0T`5gW$JKfP!<)lRMK(OCPoxoNs4V<9F>9
zmlkRe1Lz`_I<JBSNMW~osw#cyBtbfbwh6^+S6123b_{4uU4>CF7o_MhNY1P>pBwjq
zcQO@V*_(dkf0nk8BS2?5`2B;{^G}gyC0IU{<4POL$^tmMC7zboN*r35UutONqlDc3
zKG-v~mkpj$P_~ed`p@m^s8%Ib!FvR6$Dk9l3I|ZA$B2%I<%uJ91*LQDd`Tr#%i~$1
z@Q`cmn0Sg_ul0uJ?h~?QIEzMP>sUjn<B$lIIVjn3ar}|Cl(K$<P6e^Tt`ek`-fkCC
zM5bHS@ENM0Bis-}-8KtqB0f&1F*z9X&Ib!BLaLxTyggPRwcW0azyxUZXtQq&j5I>3
zs9cf7xMcF$Ub_HTI6R`i5-RD{KISzzRrdJymcMFj<JAp8Z86!)iDGS!@}`O210I+y
z-)<wRufFe#E&hi!4Ey^#kKwkx8GAl%|4uZFer(naLF7V?5hS?(Lo{?YGDh?|V*Km$
z*At{PwMEdJ>T_?26`!yD7_#$Mca>gx?CIsZnyL@UHQkF}J8#dksozpmf~pDFEE$>T
z$C)W1uUj$jgk>d36eP3I?vknADqItC$=9<{m9a$!bqmyO@GkN%y2^9dkw33<(V3D{
zk0(nlHJTfJuYLO6vBJcsG5w{HPPo#0gEh4;yl2^VJE^Il=5I#hcI9uofvFMvY-W%>
zE;*j@0ISL3Xy9_9kQ!m{59_84wp{HJZ`SeI=XD~?XKBLk^Z6w-8Aqoc(5W^Fl4nEc
z#d}Q~v@(+0i4$z`H?Jo3wI<${M0hFcpt5^B_ExO;NT{W)A;->SjkK=nCzP;-AK}ki
z)tp{cR`<~uj@PGS_@JV6@tY3#^s3hD=FL(X%0n93r<9^;LC*u;ggqtH=R}hv^>1#p
z*2$+|ax@pbdsuf{O1!pp(4MbYhDxRrb9yd`gev=973!(FP{&@n_py2!I<IJ=LZxNr
zY^x{GX!zPR<`MJy6PZb|`C}^F2ZvFg1A;7fuH))mZOxs^eN@+AIX0z`7l{QX7_`jC
z9ee@})Rq-JgZrCfQPEt_X|s}EL*toa9LB{mLK~P1{4ZkO?rD`c!egngRpEv6m<hmk
zWgzkhmhewnoTptZk2J}1K1wA>E{n(L|JX9tIM?wp2<2B<$AACB=mB!CHzlzmH5z&@
zlc-(d)oWv1`U{NShTgpP9>Pqx6Wr}+>~PcXT0?20Z=53Y(X(goG)1#KpLa>t3b4_x
z_*1fd&LerdR4Ac9tpG=wP8@1U6S%T90LW*0U9VJB=nDwe7e_Vd(jGj9Idm)iLLmbG
zL_=?!mPnHD4<L`sdm1X-Ud(JB>60$t=6H2W@swT7oo6t9<hHI7O%{>7#vM;_FyAQK
zHJa31s)9D~COUdFVQti}Vwv|alyn{@%sAnA5NZ1xFMmSkGm;eN68AT**+zT8S~*^@
zpCLzn$9KDIaddjPy39HJW<sNbUG9jHSgIb)97yE2?w!f>C>OQLP{3<8;Mvm?W9<EO
zSp{#@lN`%$Wz8(~0z@{g?_x_<Uw)Nl6o2(2MiR&GK=$gh$jliUh48$;Oa4%+Henk;
zP{0d(bQUTIM;S<8PnYSKZEaS68TRda@yHC-R#S$HpJDHYK}jK@-}E*j&u*SMHR+<Y
zwywLbv(7Z7vzEnUU^%xtoAFq1_|``EhcoBbs+dx{{&|PpyOa*721E9Tu4En(%L|%M
zE@Nhcu>?x<Tg)_-f9o^!;Ne@uivIWR%=e$9>-=}GW<{BQ0{pZ4>)!#E5$5A>y<qPP
z{<9<I-vv<+?1BI9{+Rc1?sq)=gJg&H7tY`NAKn+f-_q@ma4+Ux!hagP-50&zqv?+*
z8{)zH|LME<zjbW74|u<|(H}s01eXE<c)!uneSrJT82$h(5a0hse_Au#N4ZZi|AQhY
z_aBwqC!F6$xKEM(gD@oj7s5XX)AyzSNpJiki-e@4g2+eyuVlyjSocYCf6z44{`w~O
zD02TwnY#~qpXBlfbVutyKkuKEm;3Vnq#FE@MnWpkMMAnyJh(6Y&&2HCrK=78CjCEY
z+WX@F%&z@ioXPm_+Wlv)?LNvs(@TFxaWT1%@|UY{AK<=w@du*X<u4unm$RWL^BCcZ
S{H7J*BGn=2M``ZAul^sOuhE|X

literal 0
HcmV?d00001

diff --git a/tools/feature_admit_tools/get_hist.py b/tools/feature_admit_tools/get_hist.py
new file mode 100644
index 00000000..1afe061f
--- /dev/null
+++ b/tools/feature_admit_tools/get_hist.py
@@ -0,0 +1,16 @@
+import json
+
+import numpy as np
+
+file_name = "slice_0.data"
+data = np.fromfile(file_name, dtype=np.int64)
+data = data[1:].reshape(-1, 3)
+result = {}
+
+with open("admit_hist.json", "w") as f:
+    for d in data:
+        key, count, _ = d
+        result[str(key)] = int(count)
+
+    sorted_result = dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
+    json.dump(sorted_result, f, indent=4)
diff --git a/tools/feature_admit_tools/static_key_count.py b/tools/feature_admit_tools/static_key_count.py
new file mode 100644
index 00000000..53e5237f
--- /dev/null
+++ b/tools/feature_admit_tools/static_key_count.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+import argparse
+import json
+
+import tensorflow as tf
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--file_path", type=str, required=True, help='path of the dataset')
+
+
+def static_key_count(file_path):
+    admit_threshold = 30 
+    dataset = tf.data.TFRecordDataset(file_path)
+    dataset = dataset.batch(int(1), drop_remainder=False)
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+    offset_value = 2**48
+    shift = 1
+    result = {} 
+    table_list = ["history_poi_seq_id_list#vector", "poi_id_end2end#vector", "rt_day_click_event_poi_id_list"]
+    with tf.Session() as sess:
+        while True:
+            try:
+                examples = sess.run(next_element)
+                example = tf.train.Example.FromString(examples[0])
+                features = example.features
+                feature = features.feature
+
+                for name, values in feature.items():
+                    num_list = []
+                    if name in table_list:
+                        num_list = values.int64_list.value
+                    if name not in table_list:
+                        continue
+
+                    if len(num_list) == 0:
+                        print("===================")
+                        num_list = [0]
+
+                    for num in num_list:
+                        num = num % offset_value + shift * offset_value
+                        result[num] = result.get(num, 0) + 1
+
+            except tf.errors.OutOfRangeError:
+                print("EOS: OutOfRangeError")
+                break
+    temp = {}
+    for key, value in result.items():
+        if value >= admit_threshold:
+            temp[key] = value
+    sorted_result = dict(sorted(temp.items(), key=lambda x: x[1], reverse=True))
+    with open("key_count30.json", "w") as f:
+        json.dump(sorted_result, f, indent=4)
+
+    print(sorted_result)
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    static_key_count(args.file_path)
+
diff --git "a/tools/feature_admit_tools/\347\211\271\345\276\201\345\207\206\345\205\245\347\262\276\345\272\246\345\257\271\346\257\224\346\226\271\346\263\225.md" "b/tools/feature_admit_tools/\347\211\271\345\276\201\345\207\206\345\205\245\347\262\276\345\272\246\345\257\271\346\257\224\346\226\271\346\263\225.md"
new file mode 100644
index 00000000..2cee54c6
--- /dev/null
+++ "b/tools/feature_admit_tools/\347\211\271\345\276\201\345\207\206\345\205\245\347\262\276\345\272\246\345\257\271\346\257\224\346\226\271\346\263\225.md"
@@ -0,0 +1,21 @@
+## **特征准入准确性对比使用说明**
+
+
+-----------------
+### **工具简介**： 
+
+mxRec开启特征准入后，进行准确性比较工具。当前支持模型保存格式SAVE_EASY=False。
+
+### **环境依赖**
+
+该工具在tf1环境上进行测试，环境配置如下，供用户参考：
+
+> **tf1**
+<br>
+tensorflow == 1.15.0 / 1.15.4<br>
+numpy == 1.21.6 <br>
+python == 3.7.5 <br>
+
+### **使用说明**：
+1）指定数据集，使用static_key_count.py查看数据集中指定阈值不同key出现的次数
+2）开启准入后，保存的HisRecord使用get_hist.py工具，查看运行后不同key出现的次数
diff --git a/tools/model_convert/README.md b/tools/model_convert/README.md
new file mode 100644
index 00000000..945b2329
--- /dev/null
+++ b/tools/model_convert/README.md
@@ -0,0 +1,119 @@
+## **模型转换工具使用说明**
+
+
+-----------------
+### **工具简介**： 
+
+将基于mxRec+NPU训练保存下来的NPU格式的稀疏表，转换为可被GPU、CPU加载使用的格式。
+
+### **环境依赖**
+
+该工具在tf1/tf2环境上进行测试，两种环境配置如下，供用户参考：
+
+> **tf1**
+<br>
+tensorflow == 1.15.0 / 1.15.4<br>
+numpy == 1.21.6 <br>
+python == 3.7.5 <br>
+
+
+> **tf2**
+<br>
+tensorflow == 2.6.5 <br>
+numpy == 1.19.5 <br>
+python == 3.7.5 <br>
+
+<br>
+
+### **使用说明**：
+
+<br>
+
+**使用范例：**
+
+示例如下：<br>
+`python3 model_convert.py --input_path=./saved_model --output_path=./saved-model-out --rank_size=8 --estimator=1 --ddr=1`
+
+打屏日志输出 `convert model success.` 代表模型成功转换完成。
+
+**参数解释：**
+
+`input_path`:   类型：`str`。 NPU格式的模型保存路径。
+
+    特别说明：
+    1）estimator模式下，在NPU+mxRec训练模型阶段，模型保存路径请设置为：
+    model_dir = {路径}/{get_rank_id()} 
+    示例如下：
+    
+    from mx_rec.util.initialize import get_rank_id  
+    model_dir = f"{params.model_ckpt_dir}/{get_rank_id()}"
+
+    此时在进行模型转换的时候，input_path 填写{param.model_ckpt_dir}即可
+
+    2）session run模式下，在NPU+mxRec训练模型阶段，模型保存路径请设置为：
+    path = {模型路径}/model-{get_rank_id()}
+    示例如下：
+    
+    from mx_rec.utilinitialize import get_rank_id   
+    self.saver.save(self.session, f"./saved-model/model-{self.rank_id}", global_step=i)  
+    
+    此时在进行模型转换的时候，input_path 填写./saved-model即可。
+
+`output_path`:  类型：`str`。 转换后CPU/GPU 格式输出的路径。该参数可以用户自行设置，若该模型输出路径不存在，会新建目录。
+
+`rank_size`: 类型：`int`。 NPU+mxrec训练模型时所用的卡数。范围为[1,16]
+
+`estimator`:  类型：`int`。是否使用Tensorflow的estimator模式。默认值为0。0代表不使用estimator模式，1代表使用estimator模式。
+
+    estimator 使用参考链接：
+    https://www.tensorflow.org/guide/estimator?hl=zh-cn
+
+`ddr`:   类型：`int`。是否采用mxrec的ddr模式。默认值为0。0代表使用HBM模式，1代表使用DDR模式。 <br>
+
+`dynamic_expansion`:   类型：`int`。是否采用动态扩容模式进行训练。默认值为0。0代表不使用动态扩容，1代表使用动态扩容。 <br>
+mxrec的ddr模式使用请参考《mxrec用户指南》。
+
+<br>
+
+**加载说明：**
+
+由于tf1\tf2接口更迭，故本工具在编码时tf1\tf2采取的接口不同。<br>在加载的时候根据tf版本情况采用不同的加载接口。示例如下:
+
+- tf1
+<br>
+```python
+restore_table = tf.contrib.lookup.MutableHashTable(
+                    key_dtype=tf.int64, 
+                    value_dtype=tf.float32,  
+                    default_value=initialize_value,  
+                    name=args.table_name,  
+                    checkpoint=True)  
+
+with tf.Session() as sess:  
+    saver = tf.train.Saver()  
+    saver.restore(sess, args.path + "/model.ckpt-0"）  
+    lookup_embedding = restore_table.lookup(key)  
+```
+
+- tf2
+```python
+restore_table = tf.lookup.experimental.MutableHashTable(
+            key_dtype=tf.int64, 
+            value_dtype=tf.float32,  
+            default_value=np.zeros((240,)),  
+            name="deep_sparse_table")  
+ 
+restore_table1 = tf.lookup.experimental.MutableHashTable(
+            key_dtype=tf.int64, 
+            value_dtype=tf.float32,  
+            default_value=np.zeros((37,)),  
+            name="wide_sparse_table")  
+
+# 这里restore table的顺序需与保存的顺序保持一致，保存的顺序在模型转换的时候会输出  
+
+checkpoint = tf.train.Checkpoint(table_list=[restore_table,restore_table1])  
+manager = tf.train.CheckpointManager(checkpoint, directory=args.path, max_to_keep=3)  
+checkpoint.restore(manager.latest_checkpoint)  
+ 
+lookup_embedding = restore_table1.lookup(key)  
+```
\ No newline at end of file
diff --git a/tools/model_convert/model_convert.py b/tools/model_convert/model_convert.py
new file mode 100644
index 00000000..7608917a
--- /dev/null
+++ b/tools/model_convert/model_convert.py
@@ -0,0 +1,287 @@
+import argparse
+import json
+import os
+import re
+from enum import Enum
+
+import tensorflow as tf
+import numpy as np
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--input_path', type=str, required=True, help='path of the model file to be converted')
+parser.add_argument('--output_path', type=str, required=True, help='output path of the converted model')
+parser.add_argument('--rank_size', type=int, choices=range(1,17), default=8, required=False)
+parser.add_argument('--estimator', type=int, choices=[0,1], default=0, required=False)
+parser.add_argument('--ddr', type=int, choices=[0, 1], default=0, required=False)
+parser.add_argument("--dynamic_expansion", type=int, choices=[0, 1], default=0, required=False)
+
+slice_prefix = "slice_"
+sparse_file_prefix = "sparse-"
+data_suffix = ".data"
+attribute_suffix = ".attribute"
+hbm_prefix_list = ["HashTable", "HBM"]
+ddr_prefix_list = ["HashTable", "DDR"]
+min_file_size = 1
+max_file_size = 1024 * 1024 * 1024 * 1024
+
+
+class DataAttr(Enum):
+    SHAPE = "shape"
+    DARATYPE = "data_type"
+
+
+class ModelConverter:
+    def __init__(self, input_model_path, output_model_path, rank_size, estimator, ddr, dynamic_expansion):
+        self._input_path = input_model_path
+        self._output_path = output_model_path
+        self._rank_size = rank_size
+        self._is_estimator = bool(estimator)
+        self._is_ddr = bool(ddr)
+        self._use_dynamic_expansion = bool(dynamic_expansion)
+        self._load_ckpt_path = None
+        self._input_model_path_list = []
+        self._table_list = []
+        self.table_info_dict = {}
+        self.sparse_file_list = []
+
+        if not os.path.exists(self._input_path):
+            raise FileNotFoundError(f"the input path {self._input_path} does not exists. please check it.")
+        if not os.path.exists(self._output_path):
+            os.makedirs(self._output_path)
+        self._build_input_model_list(self._is_estimator)
+        self._build_sparse_file_list()
+        self._check_mode()
+        self._build_table_info_dict()
+
+    def convert(self):
+        insert_op_list = []
+        var_list = []
+        hash_table_list = []
+        # load old checkpoint and get var list
+        if not os.path.exists(self._load_ckpt_path):
+            raise FileNotFoundError(f"the checkpoint path {self._load_ckpt_path} does not exists.")
+        ckpt = tf.train.load_checkpoint(self._load_ckpt_path)
+        var_names = ckpt.get_variable_to_shape_map().keys()
+        var_values = [ckpt.get_tensor(name) for name in var_names]
+        for i, name in enumerate(var_names):
+            var = tf.Variable(var_values[i], name=name)
+            var_list.append(var)
+        
+        # get key and embedding from file to insert hashtable
+        for table_name, emb_size in self.table_info_dict.items():
+            initialize_value = np.zeros((emb_size,))
+            # create mutable hashtable
+            if tf.__version__.startswith("2"):
+                hash_table = tf.lookup.experimental.MutableHashTable(key_dtype=tf.int64, value_dtype=tf.float32,
+                                                                default_value=initialize_value, name=table_name)
+            else:
+                hash_table = tf.contrib.lookup.MutableHashTable(key_dtype=tf.int64, value_dtype=tf.float32,
+                                                                default_value=initialize_value, name=table_name)
+
+            for rank in range(self._rank_size):
+                offset, key = self._get_key_and_offset(self.sparse_file_list[rank], table_name)
+                if self._is_ddr:
+                    emb_data = self._get_embedding_array(self.sparse_file_list[rank], table_name)[list(offset)]
+                else:
+                    emb_data = self._get_embedding_array(self.sparse_file_list[rank], table_name)
+                insert_op = hash_table.insert(tf.convert_to_tensor(key), tf.convert_to_tensor(emb_data))
+                insert_op_list.append(insert_op)
+            print("build save table:", table_name)
+            hash_table_list.append(hash_table)
+        if tf.__version__.startswith("2"):
+            checkpoint = tf.train.Checkpoint(table_list = hash_table_list)
+            manager = tf.train.CheckpointManager(checkpoint, directory=self._output_path, max_to_keep=5)
+            manager.save()
+        else:
+            with tf.Session() as sess:
+                sess.run(tf.global_variables_initializer())
+                sess.run(insert_op_list)
+                saver = tf.train.Saver()
+                saver.save(sess, self._output_path + "/model.ckpt-0")
+
+    def _get_key_and_offset(self, sparse_file_path, table_name):
+        if self._is_ddr:
+            upper_dir = generate_upper_dir(sparse_file_path, ddr_prefix_list, table_name, "embedding_hashmap")
+        else:
+            upper_dir = generate_upper_dir(sparse_file_path, hbm_prefix_list, table_name, "key")
+        attribute_data_dir, target_data_dir = get_attribute_and_data_file(upper_dir)
+
+        with open(attribute_data_dir, "r") as fin:
+            validate_read_file(attribute_data_dir)
+            attributes = np.fromfile(attribute_data_dir, dtype=np.uint64)
+        data_shape = attributes[:2]
+
+        with open(target_data_dir, "r") as fin:
+            validate_read_file(target_data_dir)
+            key_offset_data = np.fromfile(target_data_dir, dtype=np.int64)
+        key_offset_data = key_offset_data.reshape(data_shape)
+        offset = []
+        if self._is_ddr:
+            offset = key_offset_data[:, 1]
+        key = key_offset_data[:, 0]
+        return offset, key
+
+    def _get_embedding_array(self, sparse_file_path, table_name):
+        upper_dir = generate_upper_dir(sparse_file_path, hbm_prefix_list, table_name, "embedding")
+        attribute_data_dir, target_data_dir = get_attribute_and_data_file(upper_dir)
+        with open(attribute_data_dir, "r") as fin:
+            validate_read_file(attribute_data_dir)
+            if self._use_dynamic_expansion:
+                attributes = np.fromfile(attribute_data_dir, dtype=np.uint64)
+                data_shape = attributes[:2]
+            else:
+                emb_attributes = json.load(fin)
+                data_shape = emb_attributes.pop(DataAttr.SHAPE.value)
+        with open(target_data_dir, "r") as fin:
+            validate_read_file(target_data_dir)
+            emb_data = np.fromfile(target_data_dir, dtype=np.float32)
+
+        emb_data = emb_data.reshape(data_shape)
+
+        if self._is_ddr:
+            ddr_upper_dir = generate_upper_dir(sparse_file_path, ddr_prefix_list, table_name, "embedding_data")
+            attribute_data_dir, target_data_dir = get_attribute_and_data_file(ddr_upper_dir)
+            with open(attribute_data_dir, "r") as fin:
+                validate_read_file(attribute_data_dir)
+                attributes = np.fromfile(attribute_data_dir, dtype=np.uint64)
+                data_shape = attributes[:2]
+            with open(target_data_dir, "r") as fin:
+                validate_read_file(target_data_dir)
+                ddr_emb_data = np.fromfile(target_data_dir, dtype=np.float32)
+            ddr_emb_data = ddr_emb_data.reshape(data_shape)
+            emb_data = np.concatenate((emb_data, ddr_emb_data[:, :self.table_info_dict[table_name]]), axis=0)
+        return emb_data
+
+    def _build_sparse_file_list(self):
+        if self._is_estimator:
+            latest_ckpt = self._get_latest_ckpt_name()
+            sparse_file_name = sparse_file_prefix + latest_ckpt
+            for rank in range(self._rank_size):
+                sparse_file_path = os.path.join(self._input_model_path_list[rank], sparse_file_name)
+                self.sparse_file_list.append(sparse_file_path)
+        else:
+            latest_ckpt = self._get_latest_ckpt_name()
+            latest_step = latest_ckpt.split("-")[-1]
+            pattern = re.compile(r"^sparse-.*{}$".format(latest_step))
+            for folder_name in os.listdir(self._input_path):
+                if os.path.isdir(os.path.join(self._input_path, folder_name)) and pattern.match(folder_name):
+                    sparse_file_path = os.path.join(self._input_path, folder_name)
+                    self.sparse_file_list.append(sparse_file_path)
+            if len(self.sparse_file_list) != self._rank_size:
+                raise AssertionError(
+                    f"the sparse file num should be {self._rank_size} rather than {len(self.sparse_file_list)}")
+
+    def _build_input_model_list(self, is_estimator):
+        if is_estimator:
+            for i in range(self._rank_size):
+                model_path = os.path.join(self._input_path, str(i))
+                self._input_model_path_list.append(model_path)
+        else:
+            self._input_model_path_list.append(self._input_path)
+        self._load_ckpt_path = self._input_model_path_list[0]
+
+    def _get_latest_ckpt_name(self):
+        ckpt_path = os.path.join(self._load_ckpt_path, "checkpoint")
+        if not os.path.exists(ckpt_path):
+            raise FileNotFoundError(f"the input path you provided {ckpt_path} miss checkpoint file.please check it.")
+        with open(ckpt_path, "r") as fin:
+            # validate open file
+            validate_read_file(ckpt_path)
+            latest_ckpt = fin.readline().rstrip()
+            latest_ckpt = latest_ckpt.split(":")[1].strip(' ').replace('"','')
+            latest_ckpt = latest_ckpt.split("/")[-1]
+        return latest_ckpt
+
+    def _build_table_info_dict(self):
+        tmp_file_list = []
+        table_upper_file = os.path.join(self.sparse_file_list[0], "HashTable", "HBM")
+        if not os.path.exists(table_upper_file):
+            raise FileNotFoundError(f"the sparse file path {table_upper_file} does not exists.")
+        for _, table_name, _ in os.walk(table_upper_file):
+            tmp_file_list.append(table_name)
+
+        if not tmp_file_list:
+            raise FileNotFoundError(f"under the sparse file path {table_upper_file}, no file exists.")
+        self._table_list = tmp_file_list[0]
+        for table_name in self._table_list:
+            table_path = os.path.join(table_upper_file, table_name, "embedding")
+            attribute_file = get_attribute_and_data_file(table_path)[0]
+            with open(attribute_file, "r") as fin:
+                validate_read_file(attribute_file)
+                if self._use_dynamic_expansion:
+                    attributes = np.fromfile(attribute_file, dtype=np.uint64)
+                    data_shape = attributes[:2]
+                else:
+                    emb_attributes = json.load(fin)
+                    data_shape = emb_attributes.pop(DataAttr.SHAPE.value)
+                self.table_info_dict[table_name] = data_shape[1]
+
+    def _check_mode(self):
+        check_dir = os.path.join(self.sparse_file_list[0], "HashTable")
+        model_dirs = []
+        for _, dirs, _ in os.walk(check_dir):
+            model_dirs.append(dirs)
+        if not self._is_ddr and "DDR" in model_dirs[0]:
+            raise ValueError(f"wrong mode choose! you choose hbm mode, however ddr dir exists. ")
+        if self._is_ddr and "DDR" not in model_dirs[0]:
+            raise ValueError(f"wrong mode choose! you choose ddr mode, however ddr dir not exists. ")
+
+
+def get_attribute_and_data_file(table_path):
+    if not os.path.exists(table_path):
+        raise FileNotFoundError(f"the input table path {table_path} does not exists.")
+
+    attribute_file_list = []
+    data_file_list = []
+    for file_name in os.listdir(table_path):
+        if file_name.endswith(attribute_suffix):
+            attribute_file_list.append(file_name)
+        if file_name.endswith(data_suffix):
+            data_file_list.append(file_name)
+    if len(attribute_file_list) != 1:
+        raise AssertionError(f"under the table path {table_path}, ther must only one attribute file. "
+                             f"In fact, {len(attribute_file_list)} attribute file exists. ")
+    if len(data_file_list) != 1:
+        raise AssertionError(f"under the table path {table_path}, ther must only one data file. "
+                             f"In fact, {len(data_file_list)} data file exists. ")
+    attribute_file = os.path.join(table_path, attribute_file_list[0])
+    data_file = os.path.join(table_path, data_file_list[0])
+    return attribute_file, data_file
+
+
+def generate_upper_dir(sparse_file, dir_prefix_list, table_name, data_type):
+    temp_dir = sparse_file
+    for dir in dir_prefix_list:
+        temp_dir = os.path.join(temp_dir, dir)
+    return os.path.join(temp_dir, table_name, data_type)
+
+
+def generate_attribute_dir(sparse_file, dir_prefix_list, table_name, data_type, rank_id):
+    temp_dir = sparse_file
+    for dir in dir_prefix_list:
+        temp_dir = os.path.join(temp_dir, dir)
+    return os.path.join(temp_dir, table_name, data_type, f"{slice_prefix}{rank_id}{attribute_suffix}")
+
+
+def generate_data_dir(sparse_file, dir_prefix_list, table_name, data_type, rank_id):
+    temp_dir = sparse_file
+    for dir in dir_prefix_list:
+        temp_dir = os.path.join(temp_dir, dir)
+    return os.path.join(temp_dir, table_name, data_type, f"{slice_prefix}{rank_id}{data_suffix}")
+
+
+def validate_read_file(read_path):
+    if os.path.islink(read_path):
+        raise ValueError(f"the path {read_path} to be read is soft link.")
+    file_stat = tf.io.gfile.stat(read_path)
+    if not min_file_size < file_stat.length <= max_file_size:
+        raise ValueError(f"file size: {file_stat.length} is invalid, not in ({min_file_size}, {max_file_size}]")
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    convert_instance = ModelConverter(input_model_path=args.input_path, output_model_path=args.output_path,
+                                      rank_size=args.rank_size,
+                                      estimator=args.estimator, ddr=args.ddr, dynamic_expansion=args.dynamic_expansion)
+    convert_instance.convert()
+    print("convert model success.")
\ No newline at end of file
diff --git a/tools/model_convert/model_convert_mt_v2.py b/tools/model_convert/model_convert_mt_v2.py
new file mode 100644
index 00000000..df3fde7d
--- /dev/null
+++ b/tools/model_convert/model_convert_mt_v2.py
@@ -0,0 +1,246 @@
+import argparse
+import json
+import os
+import re
+from enum import Enum
+
+import tensorflow as tf
+import numpy as np
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--input_path', type=str, required=True, help='path of the model file to be converted')
+parser.add_argument('--output_path', type=str, required=True, help='output path must be local path')
+parser.add_argument('--rank_size', type=int, choices=range(1, 17), default=8, required=False)
+parser.add_argument('--estimator', type=int, choices=[0, 1], default=1, required=False)
+parser.add_argument('--ddr', type=int, choices=[0, 1], default=0, required=False)
+parser.add_argument('--save_easy', type=int, choices=[0, 1], default=1, required=False)
+
+slice_prefix = "slice_"
+sparse_file_prefix = "sparse-"
+data_suffix = ".data"
+attribute_suffix = ".attribute"
+hbm_prefix_list = ["HashTable", "HBM"]
+ddr_prefix_list = ["HashTable", "DDR"]
+min_file_size = 1
+max_file_size = 1024 * 1024 * 1024 * 1024
+
+
+class DataAttr(Enum):
+    SHAPE = "shape"
+    DARATYPE = "data_type"
+
+
+class ModelConverter:
+    def __init__(self, input_model_path, output_model_path, rank_size, estimator, ddr, save_easy):
+        self._input_path = input_model_path
+        self._output_path = output_model_path
+        self._rank_size = rank_size
+        self._is_estimator = bool(estimator)
+        self._is_ddr = bool(ddr)
+        self._is_save_easy = bool(save_easy)
+        self._load_ckpt_path = None
+        self._input_model_path_list = []
+        self._table_list = []
+        self.table_info_dict = {}
+        self.sparse_file_list = []
+
+        if not tf.io.gfile.exists(self._input_path):
+            raise FileNotFoundError(f"the input path {self._input_path} does not exists. please check it.")
+        if not tf.io.gfile.exists(self._output_path):
+            tf.io.gfile.makedirs(self._output_path)
+        self._build_input_model_list(self._is_estimator)
+        self._build_sparse_file_list()
+        self._build_table_info_dict()
+
+    def convert(self):
+        for table_name, emb_size in self.table_info_dict.items():
+            result_key = np.array([])
+            result_embedding = np.array([])
+            for rank in range(self._rank_size):
+                if not self._is_save_easy:
+                    offset, key = self._get_key_and_offset(self.sparse_file_list[rank], table_name)
+                    emb_data = self._get_embedding_array(self.sparse_file_list[rank], table_name)[list(offset)]
+                else:
+                    key = self._get_key_array(self.sparse_file_list[rank], table_name)
+                    emb_data = self._get_embedding_array(self.sparse_file_list[rank], table_name)
+
+                if rank == 0:
+                    result_key = key
+                    result_embedding = emb_data
+                else:
+                    result_key = np.concatenate((result_key, key), axis=0)
+                    result_embedding = np.concatenate((result_embedding, emb_data), axis=0)
+
+            # save result every table
+            transformed_data = dict(zip(result_key[:], result_embedding[:]))
+            save_path = os.path.join(self._output_path, table_name + "_key_embedding" + ".npy")
+            save_dir = os.path.dirname(save_path)
+            os.makedirs(save_dir, exist_ok=True)
+            np.save(save_path, transformed_data)
+
+    def _get_key_and_offset(self, sparse_file_path, table_name):
+        if self._is_ddr:
+            upper_dir = generate_upper_dir(sparse_file_path, ddr_prefix_list, table_name, "embedding_hashmap")
+        else:
+            upper_dir = generate_upper_dir(sparse_file_path, hbm_prefix_list, table_name, "key_offset_map")
+        attribute_data_dir, target_data_dir = get_attribute_and_data_file(upper_dir)
+
+        with open(attribute_data_dir, "r") as fin:
+            attributes = np.fromfile(attribute_data_dir, dtype=np.uint64)
+        data_shape = attributes[:2]
+
+        with open(target_data_dir, "r") as fin:
+            key_offset_data = np.fromfile(target_data_dir, dtype=np.int64)
+        key_offset_data = key_offset_data.reshape(data_shape)
+        offset = key_offset_data[:, 1]
+        key = key_offset_data[:, 0]
+        return offset, key
+
+    def _get_key_array(self, sparse_file_path, table_name):
+        upper_dir = generate_upper_dir(sparse_file_path, hbm_prefix_list, table_name, "key")
+        attribute_data_dir, target_data_dir = get_attribute_and_data_file(upper_dir)
+        with tf.io.gfile.GFile(attribute_data_dir, "r") as fin:
+            emb_attributes = json.load(fin)
+        with tf.io.gfile.GFile(target_data_dir, "rb") as fin:
+            key_data = fin.read()
+            key_data = np.fromstring(key_data, dtype=emb_attributes.pop(DataAttr.DARATYPE.value))
+
+        data_shape = emb_attributes.pop(DataAttr.SHAPE.value)
+        key = key_data.reshape(data_shape)
+        return key
+
+    def _get_embedding_array(self, sparse_file_path, table_name):
+        upper_dir = generate_upper_dir(sparse_file_path, hbm_prefix_list, table_name, "embedding")
+        attribute_data_dir, target_data_dir = get_attribute_and_data_file(upper_dir)
+        with tf.io.gfile.GFile(attribute_data_dir, "r") as fin:
+            emb_attributes = json.load(fin)
+
+        with tf.io.gfile.GFile(target_data_dir, "rb") as fin:
+            emb_data = fin.read()
+            emb_data = np.fromstring(emb_data, dtype=emb_attributes.pop(DataAttr.DARATYPE.value))
+        data_shape = emb_attributes.pop(DataAttr.SHAPE.value)
+        emb_data = emb_data.reshape(data_shape)
+
+        if self._is_ddr:
+            ddr_upper_dir = generate_upper_dir(sparse_file_path, ddr_prefix_list, table_name, "embedding_data")
+            attribute_data_dir, target_data_dir = get_attribute_and_data_file(ddr_upper_dir)
+            with open(attribute_data_dir, "r") as fin:
+                attributes = np.fromfile(attribute_data_dir, dtype=np.uint64)
+                data_shape = attributes[:2]
+            with open(target_data_dir, "r") as fin:
+                ddr_emb_data = np.fromfile(target_data_dir, dtype=np.float32)
+            ddr_emb_data = ddr_emb_data.reshape(data_shape)
+            emb_data = np.concatenate((emb_data, ddr_emb_data[:, :self.table_info_dict[table_name]]), axis=0)
+        return emb_data
+
+    def _build_sparse_file_list(self):
+        if self._is_estimator:
+            latest_ckpt = self._get_latest_ckpt_name()
+            sparse_file_name = sparse_file_prefix + latest_ckpt
+            for rank in range(self._rank_size):
+                sparse_file_path = os.path.join(self._input_model_path_list[rank], sparse_file_name)
+                self.sparse_file_list.append(sparse_file_path)
+        else:
+            pattern = re.compile(r"sparse-.+")
+            for folder_name in tf.io.gfile.listdir(self._input_path):
+                if tf.io.gfile.isdir(os.path.join(self._input_path, folder_name)) and pattern.match(folder_name):
+                    sparse_file_path = os.path.join(self._input_path, folder_name)
+                    self.sparse_file_list.append(sparse_file_path)
+            if len(self.sparse_file_list) != self._rank_size:
+                raise AssertionError(
+                    f"the sparse file num should be {self._rank_size} rather than {len(self.sparse_file_list)}")
+
+    def _build_input_model_list(self, is_estimator):
+        if is_estimator:
+            for i in range(self._rank_size):
+                # for mt, need two rank id
+                model_path = os.path.join(self._input_path, str(i))
+                self._input_model_path_list.append(model_path)
+        else:
+            self._input_model_path_list.append(self._input_path)
+        self._load_ckpt_path = self._input_model_path_list[0]
+
+    def _get_latest_ckpt_name(self):
+        ckpt_path = os.path.join(self._load_ckpt_path, "checkpoint")
+        if not tf.io.gfile.exists(ckpt_path):
+            raise FileNotFoundError(f"the input path you provided {ckpt_path} miss checkpoint file.please check it.")
+        with tf.io.gfile.GFile(ckpt_path, "r") as fin:
+            # validate open file
+            latest_ckpt = fin.readline().rstrip()
+            latest_ckpt = latest_ckpt.split(":")[1].strip(' ').replace('"', '')
+            latest_ckpt = latest_ckpt.split("/")[-1]
+        return latest_ckpt
+
+    def _build_table_info_dict(self):
+        tmp_file_list = []
+        table_upper_file = os.path.join(self.sparse_file_list[0], "HashTable", "HBM")
+        if not tf.io.gfile.exists(table_upper_file):
+            raise FileNotFoundError(f"the sparse file path {table_upper_file} does not exists.")
+        for _, table_name, _ in tf.io.gfile.walk(table_upper_file):
+            tmp_file_list.append(table_name)
+
+
+        if not tmp_file_list:
+            raise FileNotFoundError(f"under the sparse file path {table_upper_file}, no file exists.")
+        self._table_list = tmp_file_list[0]
+        for table_name in self._table_list:
+            table_name = f"{table_name}/table"
+            table_path = os.path.join(table_upper_file, table_name, "embedding")
+            attribute_file = get_attribute_and_data_file(table_path)[0]
+            with tf.io.gfile.GFile(attribute_file, "r") as fin:
+                emb_attributes = json.load(fin)
+                data_shape = emb_attributes.pop(DataAttr.SHAPE.value)
+                self.table_info_dict[table_name] = data_shape[1]
+
+
+def get_attribute_and_data_file(table_path):
+    if not tf.io.gfile.exists(table_path):
+        raise FileNotFoundError(f"the input table path {table_path} does not exists.")
+
+    attribute_file_list = []
+    data_file_list = []
+    for file_name in tf.io.gfile.listdir(table_path):
+        if file_name.endswith(attribute_suffix):
+            attribute_file_list.append(file_name)
+        if file_name.endswith(data_suffix):
+            data_file_list.append(file_name)
+    if len(attribute_file_list) != 1:
+        raise AssertionError(f"under the table path {table_path}, ther must only one attribute file. "
+                             f"In fact, {len(attribute_file_list)} attribute file exists. ")
+    if len(data_file_list) != 1:
+        raise AssertionError(f"under the table path {table_path}, ther must only one data file. "
+                             f"In fact, {len(data_file_list)} data file exists. ")
+    attribute_file = os.path.join(table_path, attribute_file_list[0])
+    data_file = os.path.join(table_path, data_file_list[0])
+    return attribute_file, data_file
+
+
+def generate_upper_dir(sparse_file, dir_prefix_list, table_name, data_type):
+    temp_dir = sparse_file
+    for dir in dir_prefix_list:
+        temp_dir = os.path.join(temp_dir, dir)
+    return os.path.join(temp_dir, table_name, data_type)
+
+
+def generate_attribute_dir(sparse_file, dir_prefix_list, table_name, data_type, rank_id):
+    temp_dir = sparse_file
+    for dir in dir_prefix_list:
+        temp_dir = os.path.join(temp_dir, dir)
+    return os.path.join(temp_dir, table_name, data_type, f"{slice_prefix}{rank_id}{attribute_suffix}")
+
+
+def generate_data_dir(sparse_file, dir_prefix_list, table_name, data_type, rank_id):
+    temp_dir = sparse_file
+    for dir in dir_prefix_list:
+        temp_dir = os.path.join(temp_dir, dir)
+    return os.path.join(temp_dir, table_name, data_type, f"{slice_prefix}{rank_id}{data_suffix}")
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    convert_instance = ModelConverter(input_model_path=args.input_path, output_model_path=args.output_path,
+                                      rank_size=args.rank_size,
+                                      estimator=args.estimator, ddr=args.ddr, save_easy=args.save_easy)
+    convert_instance.convert()
+    print(f"sparse table has been converted to numpy file. output path is {args.output_path}")
+
diff --git a/tools/mx_rec_perf.sh b/tools/mx_rec_perf.sh
new file mode 100644
index 00000000..fe1ee706
--- /dev/null
+++ b/tools/mx_rec_perf.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd.
+# Description MxRec性能分析脚本 V1.0
+set -e
+
+file="$1" #请输入spdlog文件
+
+calculate_average() {
+  awk '{
+    sum += $1;
+    count++
+  } END {
+    average = sum / count;
+    print average
+  }'
+}
+perf() {
+  echo "read batch cost"
+  cat ${file} | grep 'read batch cost'|grep -v timeout|tail -n 20| awk 'NR%2==1'
+  echo "===================================="
+  echo "key process cost"
+  cat ${file} | grep 'key process cost'|tail
+  avg=$(cat ${file} | grep -Po '(?<=key process cost:)[^,:]+(?=,)'|tail -n +20 |calculate_average)
+  echo "Average: $avg"
+  echo "===================================="
+  echo "分析host和device流水，当 host key process 提前训练step时，host性能不为瓶颈"
+  echo "按输入训练step打印标志，（默认为step） Enter打开分析，按q退出"
+  read step
+  step="${step:-step}"
+  cat ${file} | grep -P "key process cost|${step}"|tail -n100|less
+}
+echo -e "\e[45m\e[1m        =========MxRec分析脚本 V1.0=========           \e[0m"
+echo
+
+stuck_check() {
+  echo -e "\e[106m--------卡住、getnext超时问题定位----------\e[0m"
+  echo -n "超时通道为："
+  cat ${file} | grep -Po "aicpu_getnext.*GetNext"
+  echo
+  echo "检查每张卡发送lookup数量："
+  for i in {0..7}
+  do
+      line=$(cat ${file} | grep -P "send"|grep "h2d"|grep "1,${i}"|wc -l)
+      echo -n "$line "
+  done
+  echo
+  echo "检查每张卡发送h2d数量是否相同："
+  for i in {0..7}
+  do
+      line=$(cat ${file} | grep "send"|grep "h2d"|grep "1,${i}"|wc -l)
+      echo -n "$line "
+  done
+  echo
+  echo "检查每张卡接收数量是否相同："
+  for i in {0..7}
+  do
+      line=$(cat ${file} | grep "r recv"|grep "1,${i}"|wc -l)
+      echo -n "$line "
+  done
+  echo
+  echo "每张卡最后接收batch为："
+  cat ${file}|grep "trans emb"|grep "info"|tail
+}
+
+hot_check() {
+  # 查看hot emb去重率
+  echo "表名及去重率（去重后/去重前）为：（应该要小于0.4）"
+  cat op_summary_*.csv |grep gather_for_restore_vector |awk -F "," '{print $6,$14,$15}'|sed 's/"//g'|sed 's/ [0-9]*;/\//'
+}
+
+perf
diff --git a/tools/parse_data/data_parser.py b/tools/parse_data/data_parser.py
new file mode 100644
index 00000000..53c59fb9
--- /dev/null
+++ b/tools/parse_data/data_parser.py
@@ -0,0 +1,133 @@
+# coding: UTF-8
+
+#  Copyright (C)  2023. Huawei Technologies Co., Ltd. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+# -----------------------------------------ReadMe  Begin--------------------------------------------
+# 1. 功能描述
+# 本工具用于单测tensorflow数据解析阶段耗时，便于分析数据解析阶段是不是整个pipeline的瓶颈？堵塞了pipeline的流畅运行？
+# 2. 注意事项
+# 数据解析逻辑主要包含在make_dataset函数中，本函数缺省使用criteo数据集。如果需要测试其他数据集的解析耗时，可根据需要重新定义make_dataset；
+# 3. 绑核
+# 为了模拟真实场景，bind_cpu默认模拟了80核cpu、8worker平均分配核；如果worker数目不同、真实cpu核数不同，可根据需要重新定义bind_cpu函数；
+# 4. 启动执行
+# 4.1 单worker执行： python3 data_parser.py
+# 4.2 多worker执行： bash run.sh data_parser.py
+# -----------------------------------------ReadMe  End--------------------------------------------
+
+import os
+import sys
+import time
+
+import logging
+import psutil
+
+import tensorflow as tf
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+def make_dataset(data_path, batch_size=102400, line_per_sample=1024):
+    def extract_fn(data_record):
+        features = {
+            # Extract features using the keys set during creation
+            'label': tf.FixedLenFeature(shape=(line_per_sample,), dtype=tf.int64),
+            'sparse_feature': tf.FixedLenFeature(shape=(26 * line_per_sample,), dtype=tf.int64),
+            'dense_feature': tf.FixedLenFeature(shape=(13 * line_per_sample,), dtype=tf.float32),
+        }
+        sample = tf.parse_single_example(data_record, features)
+        return sample
+
+    def feat_cast(feat):
+        for name, tensor in feat.items():
+            if tensor.dtype == tf.int64:
+                feat[name] = tf.cast(tensor, tf.int32)
+        return feat
+
+    def reshape_fn(batch):
+        batch['label'] = tf.reshape(batch['label'], [-1, 1])
+        batch['dense_feature'] = tf.reshape(batch['dense_feature'], [-1, 13])
+        batch['dense_feature'] = tf.math.log(batch['dense_feature'] + 3.0)
+        batch['sparse_feature'] = tf.reshape(batch['sparse_feature'], [-1, 26])
+        return batch
+
+    file_list = sorted([os.path.join(data_path, file) for file in os.listdir(data_path)])
+    dataset = tf.data.TFRecordDataset(file_list, num_parallel_reads=4)
+
+    num_parallel = 8
+    dataset = dataset.map(extract_fn, num_parallel_calls=num_parallel)
+
+    line_cnt = batch_size // line_per_sample
+    dataset = dataset.batch(line_cnt, drop_remainder=True)
+
+    dataset = dataset.map(feat_cast, num_parallel_calls=num_parallel)
+    dataset = dataset.map(reshape_fn, num_parallel_calls=num_parallel)
+
+    dataset = dataset.prefetch(10)
+    return dataset
+
+
+def bind_cpu(rank_id):
+    process = psutil.Process()
+    cpu_kernels = {
+        0: 0,
+        1: 10,
+        2: 40,
+        3: 50,
+        4: 20,
+        5: 30,
+        6: 60,
+        7: 70
+    }
+    try:
+        process.cpu_affinity([cpu_kernels.get(rank_id) + x for x in range(10)])
+    except IndexError:
+        logging.error("error cpu bind info, skipped.")
+
+
+if __name__ == '__main__':
+    RANK_ID = 0
+    if (len(sys.argv) > 1):
+        RANK_ID = int(sys.argv[1])
+    bind_cpu(RANK_ID)
+
+    DATA_PATH = "/media/mxRec/data/criteo_tfrecord_small/train"
+    train_dataset = make_dataset(DATA_PATH)
+    iterator = train_dataset.make_initializable_iterator()
+    next_batch = iterator.get_next()
+
+    input_data = []
+    for example in next_batch:
+        input_data.append(next_batch[example])
+
+    COUNT = 0
+    TOTAL_TIME = 0.0
+
+    with tf.Session() as sess:
+        sess.run(iterator.initializer)
+        while True:
+            try:
+                start_time = time.time()
+                result = sess.run(input_data[0])
+                end_time = time.time()
+
+                COUNT += 1
+
+                if COUNT > 1:
+                    TOTAL_TIME += end_time - start_time
+                logging.info("StepId:%d, StepTimeCost(ms):%f", COUNT, (end_time - start_time))
+            except tf.errors.OutOfRangeError as e:
+                logging.error("End of Training Dataset")
+                break
+    logging.info("StepTimeCost avg(ms):%f", TOTAL_TIME / (COUNT - 1))
\ No newline at end of file
diff --git a/tools/parse_data/run.sh b/tools/parse_data/run.sh
new file mode 100644
index 00000000..b3ab73bb
--- /dev/null
+++ b/tools/parse_data/run.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2021-2023. All rights reserved.
+# Description: performace analysis tool
+# Author: MindX SDK
+# Create: 2023
+# History: NA
+
+for i in {0..7}
+do
+  nohup python3 data_parser.py $i > rank_$i.log 2>&1 &
+done
\ No newline at end of file
diff --git a/tools/perf/fast.sh b/tools/perf/fast.sh
new file mode 100644
index 00000000..bf916090
--- /dev/null
+++ b/tools/perf/fast.sh
@@ -0,0 +1,391 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2021-2023. All rights reserved.
+# Description: performace analysis tool
+# Author: MindX SDK
+# Create: 2023
+# History: NA
+
+# -----------------------------------------ReadMe  Begin--------------------------------------------
+# 1. 功能描述
+# 本工具用来分析模型执行过程中pipeline中各个pipe的耗时、以及各个pipe中的子模块(Step)的耗时，以便于发现系统瓶颈。
+# (pipeline的基本原理是：每个pipe的耗时近似相等，pipe之间的耗时能够互相掩盖起来，这样，才能减少堵塞和等待，提升吞吐。)
+#
+# 2. 使用方法
+# bash fast.sh your_log_file.log
+#
+# 3. 注意事项
+# 基于spdlog::debug，mxRec中添加了TimeCost打点日志，因此，在执行前务必确保run.sh中设置
+# SPDLOG_LEVEL=debug  (如果没有设置，本工具会退出，并给予提示)
+#
+# 4. 解读结果
+# (1) Pipeline: 整个Pipeline由多个Pipe串行构成，性能分析结果分Pipe呈现，例如Pipe-1/Pipe-2/Pipe-3/Pipe-4等；
+# (2) Pipe: 每个Pipe级都会有一个整个耗时。(我们希望每个Pipe的耗时近似相等，这样Pipe之间才能互相掩盖，流水线效率才最高)
+# (3) 子模块(Step)：一个Pipe可能有多个串行的子模块(Step)构成、子模块又可能包含下一级子模块(SubStep)。因此，在性能分级报告中，
+# 下一级的子模块耗时用--开头，再下一级的子模块耗时用----开头；依次类推；(上一级的耗时中包含了下一级的耗时)
+#
+# 5. 性能调优
+# 通过分析报告，我们可能会发现：
+# (1)耗时特别长的Pipe；
+# (2)耗时特别长的子模块；
+# 需要具体问题具体分析，针对性的调优或者开展深度优化。
+# 例如：如果发现Tensorflow数据解析慢(Pipe-1)，导致供应不足，可以调节Tensorflow侧解析数据的num_parallel参数；
+# 如果发现CPU打满而导致数据预处理阻塞(Pipe 2: Data Preprocess)，则可以调低KEY_PROCESS_THREAD_NUM (默认为6);
+# 如果发现H2D阻塞(Pipe 4: H2D Send Tensors (no DDR)),则可能需要排查NPU侧GetNext或者DNN训练是否堵塞。
+# 然而，对于一些深层的问题，可能涉及到需要开展深度优化：比如Pipe拆分、串行改并行、锁优化、执行逻辑调整。
+# 另外，本工具也可以作为性能优化的参考，例如优化了某个子模块，可以对比观察(优化前vs优化后)该子模块的耗时，
+# 同时对比观察端到端耗时、吞吐变化等。
+#
+# 6. 该工具也需要不断升级，和代码同步更新，欢迎大家修改、完善。Good Luck!
+# -----------------------------------------ReadMe  End--------------------------------------------
+#set -x
+
+LOG_INFO() { echo -e "\033[1;4;32m$1\033[0m" ; }
+LOG_NOTICE() { echo -e "\033[1;4;45m$1\033[0m" ; }
+LOG_WARN() { echo -e "\033[1;31m[WARN]$1\033[0m" ; }
+LOG_ERROR() { echo -e "\033[1;31m[Error]$1\033[0m" ; }
+
+logfile=$1
+
+validate_options()
+{
+  if [ $# -ne 1 ]; then
+    LOG_ERROR "NO log_file"
+    echo "[Usage]: bash $0 log_file"
+    exit 1
+  fi
+}
+
+check_spdlog_level()
+{
+  $(grep 'ReadEmbKeyV2Static' $logfile > /dev/null 2>&1)
+  if [ $? != 0 ]; then
+    $(grep 'ReadEmbKeyV2Dynamic' $logfile > /dev/null 2>&1)
+    if [ $? != 0 ]; then
+        LOG_ERROR "No timecost-related logs, please check 'mpi_args' in your run.sh,
+                make sure SPDLOG_LEEL=debug, and run again!"
+        exit 1
+    fi
+  fi
+}
+
+parse_pipe_1_data_parser()
+{
+  LOG_NOTICE "Pipe-1: Data Parser"
+
+  $(grep 'ReadEmbKeyV2Dynamic' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    LOG_INFO "Step-1.x ReadEmbKeyV2 Dynamic"
+  else
+    LOG_INFO "Step-1.x ReadEmbKeyV2 Static"
+  fi
+
+  grep 'read batch cost(ms)' $logfile | cut -d" " -f10| \
+    awk -F "[:,]" '{sum+=$2} END {printf "read batch cost: avg=%0.1f\n", sum/NR}'
+
+  grep 'enqueueTC(ms)' $logfile | grep -v 'timeout' | cut -d" " -f14 | \
+    awk -F "[:,]" '{sum+=$2} END {printf "--|enqueueTC: avg=%0.1f\n", sum/NR}'
+
+  grep 'elapsed from last(ms)' $logfile | grep -v 'timeout' | cut -d" " -f13 | \
+    awk -F "[:,]" '{print $2}' | \
+    awk 'BEGIN {sum=0; count=0} {if($1<1000) {sum+=$NF; count++} } END \
+    {printf "elapsed from last: avg=%0.1f\n", sum/count}'
+}
+
+parse_pipe_2_key_process()
+{
+  LOG_NOTICE "Pipe-2: Data Preprocess"
+
+  grep 'getAndProcessTC(ms)' $logfile | cut -d" " -f7 | \
+    awk -F"[:,]" '{print $2}' | \
+    awk 'BEGIN{count=0; total=0;} {if ($1<2000) {total+=$NF; count++;}} END \
+      {printf "getAndProcessTC(filter>2000ms): avg=%0.3f\n", total/count}'
+
+  LOG_INFO "Step-2.1 GetBatchData"
+
+  grep 'getBatchDataTC' $logfile | \
+    awk -F":" 'BEGIN { max=0 } { sum+=$NF; if($NF>max) max=$NF } END \
+    {printf "--|getBatchDataTC: total=%d, max=%0.1f, avg=%0.1f\n", NR, max, sum/NR}'
+
+  grep 'getBatchDataTC' $logfile | \
+    awk -F":" 'BEGIN {sum=0; count=0;} {if($NF<2000) {sum+=$NF; count++;}} END \
+    {printf "--|getBatchDataTC(filter>2000ms): count=%d, avg=%0.1f\n", count, sum/count}'
+
+  grep 'getBatchDataTC' $logfile | \
+    awk -F":" 'BEGIN { total=0; none_zero_ms_num=0 } { total++; if($NF>0) none_zero_ms_num++ } END \
+      {printf "--|getBatchDataTC: total=%d, none_zero_ms_num=%d, none_zero_ms_rate=%0.3f, zero_ms_rate=%0.3f\n", \
+      total, none_zero_ms_num, none_zero_ms_num/total, (1-none_zero_ms_num/total)}'
+
+  LOG_INFO "Step-2.2 KeyProcess"
+
+  grep 'key process cost' $logfile | cut -d" " -f10 | cut -d ":" -f2 | cut -d"," -f1 | grep  '^[0-9]' | grep '[0-9]$' | \
+    awk 'BEGIN {sum=0; count=0;} {if($NF<2000) {sum+=$NF; count++;}} END \
+    {printf "--|key process cost(filter>2000): avg=%0.1f\n", sum/count}'
+
+  # fast-unique related start
+  $(grep 'ProcessBatchWithFastUnique(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'ProcessBatchWithFastUnique(ms)' $logfile | \
+      awk -F":" '{sum+=$NF} END {printf "----|ProcessBatchWithFastUnique: avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'FastUniqueCompute(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'FastUniqueCompute(ms)' $logfile | cut -d' ' -f6 | \
+      awk -F"[:,]" '{sum+=$2} END {printf "------|FastUniqueCompute: avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'GetScAll TimeCost(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'GetScAll TimeCost(ms)' $logfile | \
+      awk -F":" '{sum+=$NF} END {printf "------|FastUniqueGetScAll: avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'all2allTC TimeCost(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'all2allTC TimeCost(ms)' $logfile | \
+      awk -F":" '{sum+=$NF} END {printf "------|FastUnique_all2allTC: avg=%0.1f\n", sum/NR}'
+  fi
+  # fast-unique related end
+
+  $(grep 'uniqueTc(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'uniqueTc(ms)' $logfile | \
+      awk -F":" '{sum+=$NF} END {printf "----|UniqueInRankTC: avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'processSplitKeysTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'processSplitKeysTC(ms)' $logfile | \
+        awk -F":" '{sum+=$NF} END {printf "----|processSplitKeysTC: avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'getScAllTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'getScAllTC(ms)' $logfile | \
+      awk -F":" '{sum+=$NF} END {printf "------|getScAllTC(AllReduce-AllGather): avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'uniqueAll2AllTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'uniqueAll2AllTC(ms)' $logfile | \
+      awk -F":" '{sum+=$NF} END {printf "------|uniqueAll2AllTC(All2allv): avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'buildRestoreVecTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'buildRestoreVecTC(ms)' $logfile | \
+            awk -F":" '{sum+=$NF} END {printf "----|buildRestoreVecTC: avg=%0.1f\n", sum/NR}'
+  fi
+
+  # common start
+  $(grep 'key2OffsetTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'key2OffsetTC(ms)' $logfile | \
+      awk -F":" '{sum+=$NF} END {printf "----|key2OffsetTC: avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'featureAdmitAndEvictTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'featureAdmitAndEvictTC(ms)' $logfile | \
+      awk -F":" '{sum+=$NF} END {printf "----|featureAdmitAndEvictTC: avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'globalUniqueSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'globalUniqueSyncTC(ms)' $logfile | \
+      awk -F":"  '{sum+=$NF} END {printf "----|globalUniqueSyncTC, avg=%0.1f\n", sum/NR}'
+  fi
+
+  $(grep 'pushResultTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'pushResultTC(ms)' $logfile | \
+      awk -F":"  '{sum+=$NF} END {printf "----|pushResultTC, avg=%0.1f\n", sum/NR}'
+  fi
+  # common end
+}
+
+parse_pipe_3_get_tensors_async_no_ddr()
+{
+  LOG_NOTICE "Pipe-3: Get Tensors async (no DDR)"
+
+  $(grep 'getTensorsSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'getTensorsSyncTC(ms)' $logfile | \
+        awk -F":"  '{sum+=$NF} END {print "getTensorsSyncTC, avg=", sum/NR}'
+  fi
+}
+
+parse_pipe_4_send_tensors_async_no_ddr()
+{
+  LOG_NOTICE "Pipe-4: H2D Send Tensors async (no DDR)"
+
+  $(grep 'sendAll2AllScSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'sendAll2AllScSyncTC(ms)' $logfile | \
+      awk -F":"  '{sum+=$NF} END {print "sendAll2AllScSyncTC, avg=", sum/NR}'
+  fi
+
+  $(grep 'sendLookupSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'sendLookupSyncTC(ms)' $logfile | \
+          awk -F":"  '{sum+=$NF} END {print "--|sendLookupSyncTC, avg=", sum/NR}'
+  fi
+
+  $(grep 'sendRestoreSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'sendRestoreSyncTC(ms)' $logfile | \
+      awk -F":"  '{sum+=$NF} END {print "--|sendRestoreTC, avg=", sum/NR}'
+  fi
+}
+
+parse_pipe_3_get_and_send_tensors_with_ddr()
+{
+  LOG_NOTICE "Pipe-3: Get and Send Tensors (with DDR)"
+
+  grep 'parseKeyTC(ms)' $logfile | \
+  awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+  {printf "parseKeyTC TimeCost(ms)(filter>1000ms): avg=%0.1f\n", sum/count}'
+
+  grep 'getAndSendTensorsTC' $logfile | cut -d" " -f11 | \
+  awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+  {printf "--getAndSendTensorsTC(filter>1000ms): avg=%0.1f\n", sum/count}'
+
+  grep 'getTensorsTC' $logfile | \
+  awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+  {printf "----getTensorsTC(filter>1000ms): avg=%0.1f\n", sum/count}'
+
+  $(grep 'sendRestoreSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'sendRestoreSyncTC(ms)' $logfile | \
+            awk -F":"  '{sum+=$NF} END {print "----|sendRestoreTC, avg=", sum/NR}'
+  fi
+
+  $(grep 'prepareDDRDataTc(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'prepareDDRDataTc(ms)' $logfile | \
+            awk -F":"  '{sum+=$NF} END {print "----|prepareDDRDataTc, avg=", sum/NR}'
+  fi
+
+  $(grep 'hostHashMapProcessTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'hostHashMapProcessTC(ms)' $logfile | \
+            awk -F":"  '{sum+=$NF} END {print "----|hostHashMapProcessTC, avg=", sum/NR}'
+  fi
+
+  $(grep 'sendUniqueKeysSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'sendUniqueKeysSyncTC(ms)' $logfile | \
+            awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>200) {sum+=$NF; count++;}} END \
+            {printf "----|sendUniqueKeysSyncTC(filter>200ms): avg=%0.1f\n", sum/count}'
+  fi
+
+  $(grep 'sendRestoreVecSecSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'sendRestoreVecSecSyncTC(ms)' $logfile | \
+            awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>200) {sum+=$NF; count++;}} END \
+            {printf "----|sendRestoreVecSecSyncTC(filter>200ms): avg=%0.1f\n", sum/count}'
+  fi
+
+  $(grep 'sendTensorsTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'sendTensorsTC(ms)' $logfile | cut -d" " -f9 | cut -d ":" -f2 | cut -d"," -f1 | \
+            awk '{sum+=$NF} END {printf "----|sendTensorsTC, avg=%0.3f\n", sum/NR}'
+  fi
+
+  $(grep 'embHDTransWrapTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'embHDTransWrapTC' $logfile | \
+      awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+      {printf "--embHDTransWrapTC(filter>1000ms): avg=%0.1f\n", sum/count}'
+  fi
+
+  grep 'hostEmbsTC' $logfile | \
+  awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+  {printf "----hostEmbsTC(filter>1000ms): "; if(count==0) print "no match result!\n"; \
+   else printf "avg=%0.1f\n", sum/count}'
+
+  grep 'h2dTC' $logfile | \
+  awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+  {printf "------h2dTC(filter>1000ms): avg=%0.1f\n", sum/count}'
+
+  grep 'd2hTC' $logfile | \
+  awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+  {printf "------d2hTC(filter>1000ms): avg=%0.1f\n", sum/count}'
+}
+
+parse_pipe_3_get_and_send_tensors_sync_without_ddr()
+{
+  LOG_NOTICE "Pipe-3: Get and Send Tensors sync (no DDR)"
+
+  $(grep 'parseKeysTc HBM mode (ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'parseKeysTc HBM mode (ms)' $logfile | \
+        awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>2000) {sum+=$NF; count++;}} END \
+        {printf "parseKeysTc(filter>2000ms): avg=%0.1f\n", sum/count}'
+  fi
+
+  grep 'getTensorsSyncTC(ms)' $logfile | \
+      awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+      {printf "--|getTensorsSyncTC(filter>1000ms): avg=%0.1f\n", sum/count}'
+
+  grep 'sendTensorsSyncTC(ms)' $logfile | cut -d" " -f7 | cut -d ":" -f2 | cut -d"," -f1 | \
+      awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>1000) {sum+=$NF; count++;}} END \
+      {printf "--|sendTensorsSyncTC(filter>1000ms): avg=%0.1f\n", sum/count}'
+
+  $(grep 'sendAll2AllScSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+    grep 'sendAll2AllScSyncTC(ms)' $logfile | \
+          awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>200) {sum+=$NF; count++;}} END \
+          {printf "----|sendAll2AllScSyncTC(filter>200ms): avg=%0.1f\n", sum/count}'
+  fi
+
+  grep 'sendLookupSyncTC(ms)' $logfile | \
+        awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>200) {sum+=$NF; count++;}} END \
+        {printf "----|sendLookupSyncTC(filter>200ms): avg=%0.1f\n", sum/count}'
+
+  $(grep 'sendUniqueKeysSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'sendUniqueKeysSyncTC(ms)' $logfile | \
+            awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>200) {sum+=$NF; count++;}} END \
+            {printf "----|sendUniqueKeysSyncTC(filter>200ms): avg=%0.1f\n", sum/count}'
+  fi
+
+  $(grep 'sendUniqueRestoreVecSyncTC(ms)' $logfile > /dev/null 2>&1)
+  if [ $? == 0 ]; then
+      grep 'sendUniqueRestoreVecSyncTC(ms)' $logfile | \
+            awk -F":" 'BEGIN {sum=0; count=0;} {if($NF>200) {sum+=$NF; count++;}} END \
+            {printf "----|sendUniqueRestoreVecSyncTC(filter>200ms): avg=%0.1f\n", sum/count}'
+  fi
+
+  grep 'sendRestoreSyncTC(ms)' $logfile | cut -d" " -f6 | cut -d ":" -f2 | cut -d"," -f1 | \
+          awk -F":" 'BEGIN {sum=0; count=0;} {if($NF<200) {sum+=$NF; count++;}} END \
+          {printf "----|sendRestoreSyncTC(filter>200ms): avg=%0.1f\n", sum/count}'
+}
+
+main()
+{
+  validate_options $@
+  check_spdlog_level
+
+  echo "+----------------------------------------------------------------+"
+  echo "+                         Profile Result                         +"
+  echo "+----------------------------------------------------------------+"
+
+  parse_pipe_1_data_parser
+  parse_pipe_2_key_process
+
+  $(grep 'DDR mode' $logfile > /dev/null 2>&1)
+  if [ $? -eq 0 ]; then
+      parse_pipe_3_get_and_send_tensors_with_ddr
+  else
+    $(grep 'parseKeysTc HBM mode (ms)' $logfile > /dev/null 2>&1)
+    if [ $? -eq 0 ]; then
+      parse_pipe_3_get_and_send_tensors_sync_without_ddr
+    else
+      parse_pipe_3_get_tensors_async_no_ddr
+      parse_pipe_4_send_tensors_async_no_ddr
+    fi
+  fi
+}
+
+main $@
\ No newline at end of file
diff --git a/tools/perf/host_set.sh b/tools/perf/host_set.sh
new file mode 100644
index 00000000..0120ebb9
--- /dev/null
+++ b/tools/perf/host_set.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2021-2023. All rights reserved.
+# Description: performace analysis tool
+# Author: MindX SDK
+# Create: 2023
+# History: NA
+
+# cpu with high-performance
+cpupower frequency-set -g performance
+cat /proc/cpuinfo|grep MHz
+
+# clear cache
+echo 3 > /proc/sys/vm/drop_caches
+free -h
+
+# swap off
+swapoff -a
diff --git a/tools/perf/msprof.sh b/tools/perf/msprof.sh
new file mode 100644
index 00000000..c1821c83
--- /dev/null
+++ b/tools/perf/msprof.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2021-2023. All rights reserved.
+# Description: performace analysis tool
+# Author: MindX SDK
+# Create: 2023
+# History: NA
+
+
+curr_path=$(cd $(dirname $0); pwd)
+
+# ---------------config start---------------------
+model_run_path=/path/to/model/run
+run_cmd="bash run.sh"
+# ---------------config end---------------------
+
+# ------------------------------+
+#            msprof             +
+# ------------------------------+
+output_path="${model_run_path}"/msprof_out
+
+cd "${model_run_path}"
+rm -rf "${output_path}"
+
+msprof --application="${run_cmd}" --output="${output_path}"
diff --git a/tools/perf/mt_1207.sh b/tools/perf/mt_1207.sh
new file mode 100644
index 00000000..fc0af5db
--- /dev/null
+++ b/tools/perf/mt_1207.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2021-2023. All rights reserved.
+# Description: performace analysis tool
+# Author: MindX SDK
+# Create: 2023
+# History: NA
+
+#set -x
+
+LOG_INFO() { echo -e "\033[1;4;32m$1\033[0m" ; }
+LOG_NOTICE() { echo -e "\033[1;4;45m$1\033[0m" ; }
+LOG_WARN() { echo -e "\033[1;31m[WARN]$1\033[0m" ; }
+LOG_ERROR() { echo -e "\033[1;31m[Error]$1\033[0m" ; }
+
+logfile=$1
+
+# ---------------config start---------------------
+batchsize=9600
+parallel=8
+nv_throughput=820000
+# ---------------config end---------------------
+
+validate_options()
+{
+  if [ $# -ne 1 ]; then
+    LOG_ERROR "NO log_file"
+    echo "[Usage]: bash $0 your_file.log"
+    exit 1
+  fi
+}
+
+print_throughput()
+{
+  LOG_INFO "=========Throughput====================="
+  nv_sps=$(awk 'BEGIN{printf "%.2f\n",('${nv_throughput}'/'$batchsize'/'$parallel')}')
+  LOG_NOTICE "batchsize:${batchsize}, parallel:${parallel}"
+  LOG_NOTICE "nv_throughput:${nv_throughput}, nv_sps:${nv_sps}"
+
+  grep 'tensorflow:global_step/sec' $logfile | \
+    awk -F" " '{sum+=$NF} END \
+    {printf "Throughput: avg=%0.3f, xA100:%0.3f\n", \
+    sum/NR, sum/NR/'${nv_sps}'}'
+
+  grep 'tensorflow:global_step/sec' $logfile | \
+    awk -F" " 'BEGIN {sum=0; count=0;} {if ($NF > 3) {sum+=$NF; count++;}} END \
+    {printf "Throughput: after filter(<3), avg=%0.3f, xA100:%0.3f\n", \
+    sum/count, sum/count/'${nv_sps}'}'
+
+  grep 'tensorflow:global_step/sec' $logfile | \
+    awk -F" " 'BEGIN {max=0} {if($2>max) max=$2} END \
+    {printf "Throughput: max=%0.3f, xA100:%0.3f\n", max, max/'${nv_sps}'}'
+}
+
+main()
+{
+  validate_options $@
+  print_throughput
+}
+
+main $@
diff --git a/tools/perf/perf_flame_graph.sh b/tools/perf/perf_flame_graph.sh
new file mode 100644
index 00000000..dce91600
--- /dev/null
+++ b/tools/perf/perf_flame_graph.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2021-2023. All rights reserved.
+# Description: performace analysis tool
+# Author: MindX SDK
+# Create: 2023
+# History: NA
+
+#set -x
+
+curr_path=$(cd $(dirname $0); pwd)
+
+LOG_INFO() { echo -e "\033[1;4;32m$1\033[0m" ; }
+LOG_NOTICE() { echo -e "\033[1;4;45m$1\033[0m" ; }
+LOG_WARN() { echo -e "\033[1;31m[WARN]$1\033[0m" ; }
+LOG_ERROR() { echo -e "\033[1;31m[Error]$1\033[0m" ; }
+
+# ---------------config start---------------------
+model_run_path=/path/to/model/run
+run_cmd="bash run.sh"
+flame_graph_path=/home/FlameGraph
+# ---------------config end---------------------
+
+cd "${model_run_path}"
+rm -rf perf*
+
+#---- perf cpu-clock on all workers and build flame graph------------
+perf record -F 99 -a -g  "${run_cmd}"
+wait $!
+
+perf script -i perf.data | \
+  "${flame_graph_path}"/stackcollapse-perf.pl | \
+  "${flame_graph_path}"/flamegraph.pl > perf_mxRec.svg
+wait $!
+
+LOG_INFO "perf_mxRec.svg is created, please check!"
+
+
diff --git a/tools/python/images/clip_image002.jpg b/tools/python/images/clip_image002.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fd387c49710c1d99303cce3767ff720cad502944
GIT binary patch
literal 9453
zcmc(EXH-<%vhHf41QDbW5Co)=43cw_)F2{~vq+{TNs>$x1f*?1vZO{b2#CZc2nZrk
za+8`Q8AOmEIrPPS_Sxr+JMI{7pLg#Ycdb!>=A5f$eO2F@t5y|m0{0D|ysxUR3V=W$
zz~}M?a8rO1KuSzZLQF(Ta)pG9jFkKuJ>@kDifc?aXs^?=v9RBPus~Qj`6LB6xy5)`
zAwtSRV$!k-3JUB3sv0VC_a)^OWPd9Gk&%&IyL#;wCFLzyE(n+GzkT680aRCjD7<QX
z5G#O31;VES;o1Ra000qQTKn7JKQ9m-J^>*S@f8wMvdaZElmH$GA0LkZpOBD%;Bs}~
z<vc(@MR@(T;5{O0ZA)TSH?UAhT=o@+Vnr*B&d@fSu$6l#3F!@5I{KS;*f}`4xJ5+8
z#3dx9l$2Fep=#>)b@lWO42_IUtZi)V>>XfFJv_ahd;9qMg}n%mh<q7^h)+mNN=|u`
znwFEBmtRm=RQ#^8s=B5YU02`G_PPB_M`u@e&+rIlbZmTL5<9=JxU{^oy0-p(XLs+{
z{=wnV@yTyoAOQa#u>L~!pKwuK;=&^!z$YO7jSGb5eJS`<1cbK*iLT$%Cbo2=W)%v#
z0#=O6u4pBJ2<vRqSh)|8-e41%zq9ij+8@aN9<b2=CuDyC`xh=OK#mW(3?4oeAP?*z
zcr=vb<!cyRe;nQA=QoB#4iJ>upg92Kj_HTa+?km<CD@Hr#@)v+opbA;PYhk;%$S5e
zOUl)-xPx^X5}zB;*|Jfs!8m~&1Se?mLU!tW=PX(XCBHb+GRNFC!U-C}0i_m21*n2d
z=94cc2-tTAkbZ5(sg4eHNZMKrGf8*h+NRHN*JFvrxX&uJLd=X1`xOX8#&3{VR>kFM
zGjrsFs!(|K0I!R#XE7j1srs<vYBy6P7<H#&_=jDH$hr&JM=G^$wkPC5tIyqi<q|<r
zD%|{sqz{Gs7UlEh99}3}_S%n<;s7a9Z5NrDD{Ez%c(d4*Ew7IXLoew<;V-uFgXl{(
z3ZUfURIfC=(|9D(o%+IOjfgnd^ozvaTXOf@O<d(HOP?|oB>a`UPA~Wo06V=Cw)fGU
zwkvB&D!p2i6!9gXBtKdXW9nd@_suFs^{!JR+ytK5H)N75rSp_>Kr$||G(4aVGON(x
zfn0SR5ivD^{mdS4lSwFD=YDS>pwK1b^)t3Dg@+4+5LzqQX$1|wx3@_1i~~()c`}}l
zppI5q%f!MdD}~CY=#}N{J~K0WiV0^aK_NA*qlo8rqBO<q2;KYQAuGo;^q<@G)vRL^
zSL5Xp8={jCP%GZ0H+?c{&(;8^3VIF>Vr-0Ri2kdIo}3!Hp&Vl>%LbUBJD)APY@^X3
z<E|9OP%7}FN9V1k$;8yNNb+>!hi-M`yPQkb`m6`SYh_wgQv6l2d4f=yru1qmZhi@3
zdX8d_V)n}K)b*AsVBOy&_@5R$y~es2;!x=*1KQ<W!`r6crEVe+T!6L;_!DOi0{G8T
zVeQYK>4_FQReo0r{?IRUrR#8|V8rWHG@n|iw6}Cvt_J1v=-^Mbve8iM{Mqkz_pDv^
zd=-Q@SqO<k;?;7@`41S1H}h)a?9qz5+23Eab{{MSd4lxf+mj{PmJ((WoNw_ig*f);
z(Dkr$SER=W2gE+d0ReExgSW)ci;AF}G`}=Y_y+BFuEOHCc-8#-CFxUw1VdBtG<siI
znH&k%$n@zh4!^;Z#DYcmy&7Mo6IJgzTSiTq=#t+f&sPPx4%Bj`G~|sRY^%sTatT)?
zNfLx`(t%=pKOMZ&djyc^kj7tGQ;}Tq6>OTU`0k;Z6EcHs|6tbba))a1u;R@Wq~h1v
zyS}jsFP{;SFC=MfQC()XhID5>??g)T-Ri6%kaexpA5d181772~UfUG{qdaxN`a7CE
zYLdiEIr*B8pK)C8B>aTZ5^e9!N=K<)*?$*B-BsF^AG+&?O;t?5ep@fYPS-mY_u4Hl
zL`*$DU+ZIfV1J%~O?8FyebiN{<g$JfVVmT!NB%7~Nb??}E02{ZuiuWQ3?oxcj2+`E
ze_G2?CNkRU8)Ib*i*7#MR#2skgFa%z20^;+SeVw`tphCWF=@Vfn&g{$NK-V)etP;V
z-`jDP=L_7@FFp&HS#xX`*<kL(W9f`pze-Sb(XIlFAz2TfzK4B9hL9DTZ`6BC`we}a
z;8gMC?<<||->ERt%WL-TW)g2XOvj8ula?|II4JsBAQk<v#8j}@1V(3)w#NLPmqxay
zbGsphu%tmx7Y>-kul0{L$ybll@<Fjh=joKNZYjcF+S8wNfyEH0v5u8P_@pK5IeG1{
z1P$R0nz2?D4OeH;>1O?p&X7e3CR7dM%OSO)F0qCTDpW*pL(_|#(#iF`Ig&KW90}@x
zN@+qprlmt!;HOGKx^E!35<B%Vu3Z>H4zxs_JQAt_r!rOB*we2B-#~b@?_Bwc&{Jz1
z%vyMy$~1k`P6gltrxG^-VZ)cxQaU!)l5}qga}pr*1K{h#x(Z|}G_dO4FC*c{Xn5rf
zvXv!kj<*}Aj$rN~ola+w+C42sry(WuSGOVI5qXnHh{S*hP_2vNh~ZrFynUy?LzaP&
zF_7E8k>p1*Z6a5U=N$NDYf@wo)NL>Z12XxO{=Z*=|BhU}N<q_rSwN>*@Q+T&c9??V
z<gBwEX9PNC53NQTqFdq}tm4Ycq1Zz+UsVRTE(l6Nw*E91vFJCbAgdq(94qn;^J+E-
zwDT}6%G`7YKLV+MAK`bN`J+!4k8j$_z&jh^&GK;tkMMJGfKiLKrmUuv{Qj$b@CarQ
zKlgMl<U$YY3HRTY?@3o61Zyi?$1ZM?HPWq?KTLvstc_<?_U*L}L4LEAkA9Ss-LbV2
zSKL!(wT9oX*(NXDE8`YD)6}5bF(NUC{z{3oCeB(m{-v|TrH@vem^xS?#amYEfSLvA
zTXeq`G)Jm!grwPWH*%1+2U2jyS5T;L8KDz;BfM-L1WQ;BXn&C-GJjo6H9r|<Ly`0#
zBLk(?1kT{sDH?5gpZ{j|wu=%S`pNz)j=i!MyLm$^l<7*>jqF`$`;5J>wjZAP8Oj>m
zIGK08Icn3+H>wzdI?J%DL6&*k952dUa{jJ3B;wSD(CvNI^8~_R5k0-?ChLCHaP&QY
zxo0o!`Ba;gzVplAC?TIuOMyJs@FhD%awEtkEC*w%r+<2I4lSeOatp8;6eSKWWizZo
z!4E247jB?<jIqNoCzE;SU~2v!+(jz_zMak!@7*<`3d}3}AmzD|tT|supBs?qxO+RT
z(->|Z$UB*8`&p&92WN|WEN^x8UZ~t}ym|5Uv9G+t4Zh%zE7!Lq7PnR>#`!{!)(8PX
zB)RSZO3~95vxff(#r>kls9Cc-{w*!1YS3e%hdj5dH|mxI8GPvnueK7M*{`RS#Ry{a
z5U?i%N|26g=15gIck;+|4e_lkC0(^TF}J7My{;5EpeiA1WThY4JK*{>>p>mT6zhV(
z_#T$VN2ttSVe}X@psrH;lKD#TyV25!o94s0@i+PE(#JR;uHKm{{qxPz8$IR=jWGr)
zOUF+HX0||(z2di+vAYD-^0ptI(}o23gjmT`?RjlXiH3kH6;9{l7M-Z-FEexP2cL>W
zp|;!;cf*o$qim3q<gZy7T*TeIsaw2j1rdRpnWc}ZGlZwQtkDr_w;)mX0zW?=rZ|v!
z?O|p><Pnl(B8FVW7ZHFOmp-NSZjg(0yN&k06Me(IqJy;VdDka;#^>ug7T771X|Wy0
z+6_Z>mWi&(und&cT}YT&&>so}h<q4_0+q5y;V|RbqZ~44MQoq5WlO;QmqjukUsEoI
z)&onJ$DCpPs2)x}^KvoC1RJ+4x2zPd8w#B=Cx+q=BcSMK+tR9wxwo-!L3KV}>F-Vs
zVtd9LabzV#b~b@={RB1{UgnE1$Ojn%EVlA{1A4N#S9b72^x`$J#&W&aw~$@eLz<2O
znQ=CTD4WEPU>mE6vvte5<XNtqs2-@4Q=Gzl3d`ai-kX>)tTc7m_Z-o%%}`FUmpbgA
zZJC*n{{AMnpNKM-ikh)fPY2%Q)lFEf<X5K>3W^{0%N;-LZ<WM+g3!_ZdK>H)qClJf
zp`pG$v&@;vsJ&F!kwNCkYEFjduaXm$QnVjanJsCU=dj`e`;)wIzQ*@bHU<yGQ^eM5
z$x?I7R!MGaXSHR!X2jzFK@K*Qhr%tftim~SAGr&8_8X6Os_6wI=VQ*8n{-ufYJD3f
zz2{OfRUt%09hL6=;4-;2++6?SuA-NbM1P_klJU)cELbX9@#5sPwU+Sa%(s_nDRgR|
zIlhV=bS>6x=($4t;a4V9q!QybitVC}o*et@xpZbjcD`7Bqs|~7Sx#1;@*I|l4z9Oj
z`{oG-@r@!6G(dHlSq?8X>843I{Vt<E`?lDHzo7B8y4&rzHf}tBZY1c46cJ3yne*Qk
z5s?s^-H(RP6U;Tal+iJg0Tp$%6k5Zz&Oba!M31ZYtLu|)%!edw-Z7BScJIq@cW2oC
z@&uN$`k*Cd6JpHv>JC05&`!y)-S1G@;?Mbhlu{%`knX{Y-Qd@z%w|@>JZg`+Rs@u7
zV+Q>~rbH68|2TZFG4E{6{fu3LCGOw=rp=3^3)NHwir90ja}|+&ku@AJb{C_orF%K_
zGl=+D7dCYM^Air>b_T%OS_okt$}4pAY`^lb_kR>A>c%6ztyJif^;KAINOTvbyOUZD
zmaJZ#ur5<~i2|=}iYRk+w~DgXL6NziXoNc!=gK8YXEZ&f7GESaZ3J$dbp`dzJ~7`2
z%@6ZHlDbaf03X_~n)%*Ds|5Oy2_6x9mh+_UOZ$gkga8&^<)K@j83l>=zVLfneuOm7
zAuhR-)7LANau3ajI;PLNfizJtS9oAsFYBkbV%8|0N@U91?oaNg!xNs^W%G)I?sFVc
zXVv<oFlC|TYw7Es13#_I7y|~7qa~VLTMN<c+u2E2iP5Ou*TS7lzdr3kBsE^rx^?5@
zfKfy1W(1jrXKXiY!LYrw-Pum&we${xTkpA9`ip0o<Cp469P_$8uFGeV+bMjZ>h2}{
z?!$zM%5TKx%?5A}P~VReltzSlY+_4Q0xC@M{GGAN;q%4;9bE<kPS!GMCGIL+FX%0`
z!HNR-Ofx6`nBu<X-V+=UeD_^-?U`XpR?%L*8lQ<>8cQ$bn=k#}3JaY);<OruY-`Z*
z2B;gm9mceh-$9$-RW#Y(Rw{2)PN=hXykAf~4z*^qMt;+E5hbdNQMPs=h~!w2pXG}|
z9i4IO-$z@F?_<i}!O)l1m-!de1mhX!3fT}L@#Er9iv)q(nXtz*uS#stO61zPv_Q{5
z81f1ApuescTSfAol$%;v8+Y&z4MF@B>+#|Nnli~|3%Cc@JO|v1$FF=(c&#`f90#Cw
zg6z*YcWE#DzZ_?6m#jv~>y=FY4tmjlF`bLLy??(cWwzxj0+G^Xar~2Z1>P2W8@~F{
zA{7UaN-i+}yAP+csmd0*e{czh6&L(?@L?1}%U=E1!4|Bo_XMo1a!Ka@f1-Z1`jFRP
zmc-7on=0E%p4*~w+ST(^W-6E7Yq$TAKho`JEHct$%Us2&>^)m3Pk}|X5F;~Rkd?bE
zAc|S@dx6b3q@8k^HhZejd0>4gP-h`MCSx6It-yE{eB5dpr9{Ifji<YX^7q%}B>Iix
zul!5*uape<XZ|(vS4MU@)O5tWtXT*>Y{5S(t_JjKalr2QMSYEm&J+&lwK!QBr9OI?
z!LgIFKZrmiPb&3iM?htm^Nk%?g`h7#b-O0ouY9BA^iZGZ5t&XJQbkf>^jk?&!Eb^+
zs9bqaP4AShf6Vm*3;;z+wSkc`eovj8Nd+DOHZB$~7SFtrmw1n*pmkhIyStK8>v~<>
z&pw5%Jx!>;9?4Ddk)ry^Uh86cjalWrmowJ09gj#Z*7Bl_bLz?H<u?I$-*xo?og|*T
zo<#lF8K^6jPZ;gh<e%dRu`dVa6R(11n=J>H^)g#^T6CId(7rII+A1fsvW=ors7J@#
z1t-Qk`B9F<E&*qJM_dod`2wmC)%?*=Df<p)SzlL3z%7H(vtJ@#8OHFGN0_Nc$kE;3
zW_OH)?_z+YnWRH?x$`kR(*TnbGn(zA_nG~G*sAhsF5O1ILlW&RQ$n2+dK(%x0hXAz
zF)B3PhEUCi^D{;~UH2;(#Rm@y)+^XXG~Tp-jT6p;&st8!)MP<cmhYy|tZ@HuZhc;>
zbuUJf<DvA}%J{`}6|-IUZVuryxc8P|*E=`m4`sg+rXqcMHjVGm_3Af>`5&H<=-)jG
zVh^Nw(l+ibbRIFQh$zrRh=a8;S(f@6HynO5frhu`JIjgqUeP;aIOg6j*Hvx4@%^Dp
z5&1~NGaejpKZ=GOT2vN$<ulCyQskw5G#f%A&k#y)!1Thq+^K0uq{54Ua?Q`3=KNQ?
zX@?u#=1Bd8YH=goNWsdFSk$hp3O!R!b1ZsL#6JXF@)j04t2|TpDZPHltu`Ay_)E+&
z?mRtjnlpi9d8*^402EZWk+Ha)vv{C0-gP_$!Dz>Q=UIDo_1gVpttyST>%;0KQ!;H>
z(MN7^Au&D>d6Sq!^<loeX0n|1Jkz#H&eNM238+FQOh&vR*Q4|lZaM$1eXxxm;)H!l
zX=r7XEu`96y4Te6HNG9f-8izaM(~$2%{~sORLWbQHUB7Q)5S-b%=_%)BJxRAyKciZ
zOP-oZv*Xo2&4?CO8rovnV;gd!w@p0|tU(e!c%54)%k8Z7IZd?b1)2(GVq-@mbZhLT
z5+KDSpG0d=m&sn(jXCc8hk|t;K756y%7|qVO<Cn423YL_<bjMAjpCxlkmcCL*M?hh
z+93A=tbkzM)!DiYB^ZH2kB*yZ?&{ZAZB=MdxnJe5=Ola0Tmj6fiifc8&FAZr7W2$)
z++$^V4hKT4Uvgn{LTc>{Z1P?n<KV>6_okmUwGzsYnPr#!5JIvl{_DflxnS6NY+<CF
zF0G@8fpk8pPlsTb;XWT3v=Z%0tvo!Dl`L{9wB;=eEMy7Qv~TZeLhJWX(4t%j#&FOv
zd)?rmc1N=S=KSo`n&N;tuyGsgf6lFOP7RHHR9js;EYb?$`8r>wOQ;t(y@-sLJP<J_
zo49|le|8OZx&I{%ns#?PHEJ-|MYk`KXKmzp?IeQd;xaNk@g)yv<gyDKYOA=}Q*v3W
ziBw>GdRs1^JoEbzCuN#416hAQzrEgIr4f<?b7gD2J_~UQmWDHUt_cr(I4r@|hU89}
zy2PrD<UG3P8`KpYf<g%PGKFnurIO6A<mFs`8!g6Bk^S{dImngf7fnOIeC(8CHHH-<
zt_nBBZ$`-u6tAC*X9Sp5R8)+M>=c@swt|!MyF&0v0y<GrgPC1P;^6e}&pfqNga!W1
z`WZ++^`Sl@!<uCw=}tULo8&EgasdMfWbW3}l~lqPE81|;9s%s_m>Cd^F2fo{#AD-Q
z@O8G2va!*pZ*?NuVV>N3lOSa^n52^_B3Kf^wWn%{QhO6>ZIu@jNeu4!qqg>c)VB)%
zDtZC`EN~V6v*L9*^2h)5!+&IH4nVOp4p=Tf_YkGwmR5B~>YX4`c}f@_6%}K2lH1&+
z>{3>6z|(x=ty`g`F-94nG+%Mc(SSA$&)Xr)nVQSXhdAI9pJLc#FAe}%oW)u2Wh_^1
z%pz+&k_XC+ae&5L)mO=>M(GH_{X%85)-zTifh{$boeik)?kNuV1}@ePw7pDO$}iN4
z_v~d>$507<OwSw1_e;oDWe1Y)#6d0IAaOuAlwrCzKAw?6$cWUe@0Z1Rmn3IlUjE_T
z3$+(j$$ZJ(-=Ti^65oVFerOtL`z(i6Wfl^z+<Ll+iYuAJmVId+Q?*kjvL*kK%f_7$
z|IA<Ce<o#rmpYuL#QoBGLma?n<MC7VTH(5h;N(-CV`-s)qsN}dot*X4fz9g+`GFh;
zv0A5&G`j)Was=-zdO1L~s}g-ohqmVdQOlokfJpN)4(JB=t@vSE2dZ$uj$XvxGf>$R
z7Eb~>;tN^rz;~|4-{&$NX4d^`l0Kl^!tdxO*%)~83S}p{=beu3VRkQ?l}}~F_cXRH
zJ*Zu-0o)wBPHe@91AbtleLpU5KCzNj$~sRgAIRCbG2F@+Oq-^S7oKG56nVrzye!ud
zAINNoPP_7k*mp8sp5NB8K4>?d0>l&%<A=KRlq`zj_zzE&FctDXEPbs=uS%rE7-Sda
zrEc%#%MzaIL+pqr4>4(&jS@>LR<cUyXS3rTo|b_zP$838w&iEvjZ4{n8{hKmm1KJ!
z$yVK8Qlrq#dk;(83_CTcMeiRISGFCdOt&1fzHGL=6aG;FB_zIz0~GTdhuf2D!=pA{
zugV|otHWL41rI;W$c@_=(>g{FYAMC(H;;&)?Ys7;R2MIw2ZT+|x1?EI#$zce2V;lU
z$VaO!&<gqrIq4p{lXPg&f9lg~{Ux8Iagf@wS`y?LSn!8WH9P`o_jh?xuAL2^rd-BR
zC2Y;1^_S>ftlecES#~`1=jw8~-jicue&&BT;uY~ArV<Afg{{FenPKS{e(2Q)zD$Z_
z6;`qi)#-*jDCBKXqN%6YyoSP=12GxetRqcNC$Ccz!}?A}ZS{9-vC>_`_U>D~%)D@K
z^xN{xvMu(i`j`c*D#9V%U9FH%tEdcqq#2-&1Mc;1!PoP}-rL?UuWCvC>36Rd!K44B
z6H5+yxWi5auz7qcy1CcmSYPns3(XM2&G~0<30rxk11W?b)8YUUbQF9|G7tyAIdQ;?
zsrHs%jLkTpCDBxac5_@f!LTJObHIIYC3I}FbaVd-|F_rlH0Ur+_yX}QX!hoCw7Y=w
z&%)TAJM&nz2HP$G2V9}sa{LUR7v14FC%)VAbD4QN?I__Q%)8}q4hNVdgfI3Oj=_C5
z`Hp@b8XcDKeXwAwWbhPLWhLI!gb32J0MPn^^JMwW;|2T#I|eqJE&l28FdWe4xS(}H
z@fZip^=KVw%0L^v4`#N0SW6#wF6&zbHZ}&1#(ECtSk!bn#J6|D==LDVKxM*2tSemC
zaBHZB%U}||I7%ndcYenBf;rZrr#avf>qxsl8ty#1)gj!H5v13$o$ixz5tcG{xcj2|
z!^6hJ<KglBE&stZO^A-5I%B1=x!l?eB|3if-G^IIOUz~%@nkBEvg!nGk!TkqVX>KD
z0es>ed9(4$UyC;91&b333;0}*>Q2hb^q-D~k!b#Fzw#EnEIX-sj^oG1g5cu|{^r%u
z*Wy+iI!W&nraw5<Pcn4na1Bx$c+6w9OPFlPYwbStW_$R@MB#f0nJkmzUq>VDNCE83
zI@9G2Cm8s@HUT@8Qn!1ktMf3;xC)!E*Iu9PCD7(6cCT{gi#Wc=5U8-ly;VlWut9QD
zsm_cLN%1PlDPB5l`_hjeHGY%l=jrjio$@FO;rAAdp6AP?QcuOB4(sDD)9<LQAzt*&
z(c#ZUT=5cA8r)nH?$g#4xhB#2wTl7eFu7+!X5o)V=kYsG5Q4a}*-*dN=kWU$ze$;a
z16(X@y!SYlhue%^Y2Kf@J2H4+8s%cGas1iW9SXiD94@$or$8s)UoEPdtofM1XKX%B
zQE(SCA7tI{N5Rvkg*;RB$b7oJwXqlTht9uj)kyh@Mi^?Pxb9jkAEAbd75eYa8c+0r
zqAlI!e5zs_ph^RaCO2nlr%Z)*8(+SELRPo?a9f;udf_9(1+^B{9uA;xob@}qv{ZLd
z>$>>Wve~$dT^w-!RFZJN&}SWee0O8Tzq(QZy-wi$Be_4Op9%v^(yh{OIrcW+edSgP
zV;y7(`(gj&UP$z6jfcg*Xj8R+%LE4)rryDnvp2-iKCLc`CpOc+W6ho9yO6SfOmo?q
zvb%j*nf_g!{<}ih$9^CytsAjrgv_~auE)IC*`xJALO$GRsAlq4l*@{`{hnQP=w<-_
z-q{1`Z`84VJ!}+(U>l0XG{EG~0ua+bd#_x7wPS;C{n>kk{nd@V9GX45wp%br!iX?X
zMFxNGQ6nIt>-ymU`rT0r5c1XKS(Mhg@7D{~;fiK(x#HZJkQRZ^WY<YBr9RIl?s+$*
zY)hmibC~Ud*Gsl35iJ(C@km=E=^9oo8LPICC==2@8{(#;FcjY%vb;r3R;P329p=&E
zGbEP7F_=B^<-)UmHU9eIB}`SmF}9sa(zzhsLeP}i-$iiRnno4zJ{erFT;J$iKJ~e<
zt=v`k@zj)tGj$}BvEcx()RK!R{kT<Y`*n{3GN#<0_CEu8ZX)E%H-2)QTr@BI8cPd!
zUZCcsCH4$N6_}dBG4kBaV4v@C$L@Uj+v?1_OG$U)Pzu&MO^PAi-M3B*M8Tk;?gb6e
zh8RavjqqzleCMCyFw2qK1=8-mqUj#RopNclZS3Uec(W}w>#H$x9}T>~aEtM2=JEZ*
zH1YAVw3L_pcU>Tve1)&;)p}BxGJVR#$9T(S)MfihuU{!&^l^bSrdk7(#1s;@q{qco
z04E=mBg~8L`cPbM=oqI~@+pQ=64ZF^$n-zoJN>`)V)6akwqC;>%z8N{hdB+;IhF4?
zs6QhD?+-w*4Og`JYPdo>!OSpB`-4yrOA;v;$;dJ}U5%TV_o0brhbd1K%j@X;*ZWKn
z`H{w$vZc)V<~nGdWK!7fD(3WbkkB#FXlqq@-au9V1kX)-NQI63l37pwW^sU~iZ|Np
z+tE_ytEQUO1TyhQ;X;OT!6BVaW1=Pv7`ujAbnTNU!n)g#=;ugOX`s=`(4bV3!++5a
z{>|q9ZD}l6&wX^N)cq`Z(Dbb@n=N0k8a}C{Oj@0cC})axjA^e;{1@X|o8)wd>E|(@
z3EYE{a<}I$R53W<{oQl!8uS^`<nY|z{1FaNfXe=PnEcIl*WW#N{x{YU<0k(DFZ6>*

literal 0
HcmV?d00001

diff --git a/tools/python/images/clip_image004.jpg b/tools/python/images/clip_image004.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bcb5cff76d9c555e3cb414a2d30b1716a902d31a
GIT binary patch
literal 8027
zcmch6XIN8f)9#{%-UJ1tD4{oL(jkCS1O%x9qVx{Z1O$Z$C<4-})F{1o2q;oSI!f<d
z+NSrOasuwXU-$Q(^M2=?A765<l`F|}&6@k3xo75C3q6LO11MCL?kNFSSXjVw%o{*Y
z0(Sr+5Qq?jPee#SNK8ybLQY3PPDVz~boCk)9qSEtHt-EF3n!n104KL74+~i6u8^pd
zjGUYtyMU6KqO7U}L{8?y2o^ChF*zwYBLxMc3>TP7=KuMHZUQI?09EWN94r<9n-U9$
z5)1teU<LpzJdC#&4*&VW!p6bH!v_%%5)oq-R8s)hSU5P?xHx!txVV_rzL@_3TuMBu
zn}YKASG3GPEUwf-{?Q)^zzXHfG}`?;titAQ0fa<XuhG)cv$1n<a&e2?78Mhhki2tO
zQAt_lo~n+np8i7vLn8}ID{C8DJA3zM9-dy$y?p{-zX^I991<E68yBCD_&zB)BQq;I
zCpRy@prW#>x&~fb_vvfPx7N1yj?S)u!J*-i(XsJ~`JW4mOUo;Ntgh|u?H?Q-9iN<@
zUEsn3aDKu18`*!sMTx<Mjf;ze3%bCCh3$noa42!{ZVKX4$!mekT(7VQ`4dnpM1L%A
zCIky>@6edL^%GrX6`5z-y?}O!?Eel}!2cJrzk&S=*91U<gM|qm4kZ8qmWpK8k&4##
z(A~4cT(e`7H6CFdG!QxZF<7K9&(VXn<SC0Cm2{ad_-6TD`t!Lt6ip*pewJLK8cC~G
zh{wKGvYfh<+D!%!uZDY@<U?+J6`UCRuGPdM4jwzft1DN`$hUgGsB~q%;<C+nMFd5Z
zp#gt1plYQGmQ`->!rM|V1{5_QYolw>Y|&KcaOn`{Pr%8zt}c9&iq(eI;)Z_&=p;E;
zt-5BbxxMuC>;blxA_-BsKJAUUKsh|>r0fsap`4gQeQ<=sgSS96g75==zebE`NN7j4
zfv3n+AC@Au^(!LI?^4~+TRjt=IN{TISXJ5;apGoyVf*(mCbZ=6OvEVW@#l1&ODXvF
z!$Gq3!$e$IJ`(fqmby||N;cCpCDsqAD>^CQ;#}1&n~&>bKL@4{%VE3wr;-dNh<RA+
zu|3RZ%qzR1kTIj@O)n(iLou~5kb8a?-@G~69nPBjfUskAHT2`XWL~^}5ryG60Xl6_
zr^U4K^@Hr%TK{Y%Iidg`&JM=6U>em_a}Pc9EYNw@aYmEGQcr|MhdwPBa6Qp&Q}Juk
zk%+~?VI=e3T;0|)pkWngW}vYBK>Wnfe2)vh<YyelP=e20$1c=WLT6wa-4|+Y=c2+<
zwr1xqOi^Q>gqL(|$`nYu{4smNbE~~O)n+Co88G5(ZkBngqX<xIsZy7J4bTv7&j#42
zwXI)W{gc&iPdk1TsFv!CJ;1Te=GMhl!lGmP8BdudE}u#QGm|6D2nBfMv|?Tm0i4--
zd}*2DbmALaoIpnCgRB5K)>}HOa)gSU+PY;7WVr*eB7l~nIGC~C0y@p{%Jwo0=0yai
zF7jD_M)4w&|M?(YC;x;7v<#DB2){3OW4@zjp}AgEGE;AYRBmfi;>UZDMr70U?23di
z8Q}hvviqx#c#6j+WX5XbSyj`mK1PY|O0`jT%8<LcPq`ZM*q1bGi(1P7o$PVr7M^CV
zmD?_UCZ*YmZ8rX|K-ql8=hDz=r0uVBElsA}Ib5`_Fvc~x0+-?53P~8rro^hI3K(nW
z%R@#x>0m^?(}GuNwc@0+PR`C7@dn+=hU4wU{)d*3{o%&#NYoecqDR+`i!pf)LIY27
zF3sA#3_glXJj}J3-Q$>ONBNc_QqVvQkK<dE4;;CR21W~YrsUZwzOluId1#I4`1LX$
zLyh`T->cC;I`m{WGrplQ@f1`Oi8wbo6+;88){m+Lud)~mG6-{M*efSYch{iY5RU7g
z5!`6t$f*>uhX&fina2}8EOberf#-vZzG&dq9=Q|w=~3gJGfJg^Bf!u?k0)xJnWMa5
zE819`#RE?lyHz1bwmN6xaYd>KE3?9Vjw7plG?1W-Ium&zHhXp+IC<wU&xHe<YL+1a
z&u$=6W7Ele^D)0Cho=)xwb;D0Km(-@(Llv!nP!}e<enX*(h0f(LrL3S_)uCaQw3R>
zKm(nPr!4P`6-vQMb64Q7TN3RiKQiqyL^Js2NWY<h0=nW`8<QrdE0{V({|^JdgXkp(
zy^ry(-x$Nv{3O?QUxuIWj_6M^l@%K}$3Zdq#&eh4jRCWU)zb#&+~OMpH*S^5O@0w_
zR|fk+FRU>CNk!+LIiNa-UDs($f)q@S`%anSn&cip5!|rs<Lin6a^Zd=#{GW<Bl;N8
z*zp^V;959dS&M_Hfw!-cAmTJ{!eEBLd|z%n{bh;INa95}_J4(=rg6U)4KRtg$)bUa
zaK8O3oZ5WXjju*%3Mt>ubo3>slODRpLs%De`ugQGjW|s{GQ|N6NH^{-DF1DXA+m7^
zCBOL*4akn7?qlerbCdaew}8=~5%CczksYA&k?ihUR{+~$^}8J!u*MMH3qd>|NaG)u
zW2eHR!;Ss^Zx*Nx=O{!O6(nBr|IqO9(_;uCR2q)r@&s)yJjEAqc<{$g@avrMZ;mbu
zTN9ciNxytT0~n{k+o#Z|7q-w|OtQv8_mp1pCd3H6^WXnSM;yh~@C<5gTi@h%+8ZZq
zWT95WZJ?kUk#)uY-hVJ)uSi-Sx+zq)0f0b^K7$L%Y|-yjvqR{dZw2AxA?!~F`GSNg
z=~iOwloB1k;RVM$2d>2an-{LhIG_aPrOBqf^Q|9HZvAu#eio>Njw!?GlYlB{h0{^O
z-gR54d!0?PaUt1{IwrNL+WQ1EqmNMyb4UGX0EVHQ9Stmo>Muj8AaTttvPV+l#Jvo|
za%I@GT`Iji6&SRYeE(f)q)a`!{@TM-fSk=2+On!{xWMGY)~pws8dr!0#{Wu(#&OR{
zFK>>guBoY79p@;MMWzYJ{w8#B77?8q4umoe2JOvyiZU!jaBEgQs>*BKkptfpT-`h@
z%v6PQnsiFZH%o}X{<emZDn|V`j{1HB!TysEPRKB0q%!$xyy+<G%A=nG22R>pq-lbq
z5Tiz9l0(?3GV}KTS!q3HjD!0^`C3DH#;B^3#UM^2h{!Y3S&o^G1<*v*r9l51i6l=O
z3z&71hNlVCqJj0H09%9l(6w6P7#(Eh<Uyg0T8&O1C8#e}Vb!z8r7Xr5>)$nEB_dF3
zch$7u-c1)F2+2^f2)?DWpep&N29xOp1~eeqAoAHF^{$45ID5{0LRL!ZYBzQ^i$%nu
zM|&|Q*AI_>lbcoXH=@OD+?daiiO1!m`xwE7m&Pz))x!>re^x^(OivqKFxVD%4|5vg
zfj6mglX4JI4bDY}(hJRH{;fG`wYyd!2_`~vDS*NGe&E?aSEyf~Se{#O7NvhW`9U7)
zvWojO(}uLo5HhYYj_Rnoy;1r(mk`-_ch>bCFLno*@q6=R#OVq=_lrjZ$&SNG)zssb
z{4DH|`zeYIZUh~49VKcTnD*dxSbF!woHexl!MRk=tFElz&c!9r9-sb%67_s98`W0?
zEzI#_^*3ih7ij;BGvu1`$(_abEU`wxPpv2;D-O+Kyr-vd9@o3iTsI_^p0U+Da1Goq
zvtD2C^Vrmk`f<FIBt4SE|NeFJSbli`O#Ru)OoY|g<l4y7?YeL9W;id?eJj}qZi*iy
zRwBea3;JJPcX%IGb9P2+O^0h_0Y;sbDh;yLI#n2}KYx4N9}nS8^*PvXI?Auo5P4yB
zBS^0q*EI0HVYH-i$n?hOn)eWlAiN51X(ZFYd}0N8?SoeDu2<H(`D=+RcW>wQgwuhM
zSMqvIzHIS)89|IZ3hqqoa?sx!5%BYtScs^%h%DBK=q~)4lC?K6wxr>?)wQtOX4{ad
z`;aC6Jl)TaOy$n4qk1!ga_~mO7s$J6Yq76-w(^okdRk-T<>{e*&OZ-JwFcwgck?D*
zVdgi)E)=`&-sV0!He#Hx#jgz%-Dwr*-KyLdI6lsItGLgy+V?ItEPZwdI>(?p3t~;N
z>@Kphe4xbl_QRp8Ugimex1Tk7EkduCu9|qZROn&OV-e@8unuvuT9WJ0*Rh4f3-k%r
zk6byW>c<3YS5Ss9GiA{lZt>@}aOBhertTeRXKf7oNSnpG@7CwH08eIUelw!LK$f`r
zo9b#wE;ns{&61mNZ|{>pS4*q<VJ+5gF?T4fZ={p$_E{br6n^sPc;U@&*w^ye-qCj)
zXOe+oB{Ey&a44id^v%N3NjCCjq`<qmZZ(mpa7W(g1@4jBxdZUZA1+YtmoPkbbqj-8
z*w(}WV&)6!Li!J-;be9j_mB*&1d1cT3tS>IIZ=(*!)MPg-E@<uR?0<iJ3Vf2P)A~L
zsWPJQa4nMM-Q0Tjvp&+tE!<1*CthYw-aI?kgPmwop@BYE$j&v)1W>Bh?}~hD7ZG&B
zO*YLw%i3~~wbt-ycD!L9zYjKoN!e_+L|C!wS!}GDniLZh4G<}qoRM*$urVWy9y5Z=
zeI*zQkL}@UbRO4KUx4_0!BunR?W9XE4cY)62=rJ>AIiYJ3T8a|DZ1_!*Ic72>+lVb
zA4C?Bw<QYki$B`>aSKKLy77QP6M=^YHVQ;eczb!HOX}Y&w9OyswD~}3eAc3LDclI=
zMjocPu?_3vM^Ee0-m!e;xgT~uEJJcrGaWOR=|<A;+O+h~JMnFyfolELg?O;-YdnJK
zREEW~hu5F9?lRqml1($u)H})4FsUQHfqL2NbAL%T+oceX(9qjYQIdOQd;zt+u6%h`
z?CE-@*IU_+oE?e0JiqhT2(3GMjs`xT_5b<d!qxv!_Og0#*3#=Ud!RB3jiK#BjYgH1
z`!`vXb;O=r?cy<w+n7qF&CSqN18NYi)jm8r!qte-^=Z1PAqCJHOyc79eql8Au!tb5
zHeH^9(=z%K_f2ysmWFfn$0%tku6InPg3*;SVm^Mi@ve{sB;+G=G>@R%vV$Dwka!*}
zZ%MK9D%f7{ZbA8DhB8lprb){E`@I!%9;D0pv?sG_T8gAGqpItYgWA@>`f@)X;~V)E
z^3x5-_IS21Y36au#q4MuVTEK@9$uM-&S{|mA){55;MYACK#9xSY`1h5%y8R+LME3Z
ziy;U^<oOY_tvXvz;n*aQ<+)*Z0UBt^MA_Uez)hM+Ig0Q^)b&EL$-shLo>Z+VaGB@e
z(3QsOTuhV03_*UqC;h(}Is4}1;70<>AKGf%;Y0&Tb*$Q|<xY4hPY1}2_ch@m8ScwB
z^zl@9g#^<baL$dOr>D%v8)zV$<ERP^?C&*Reo-yOJuTWsdXvs`8;wSy@R;|-#_5We
zR%6azwX?db*GZUP6(}GlxkeL_cpQ^i?<SRH!QV|xkvVmg(~#*#qaJH!Y^8R0%xq1@
zHr}?{0a6$6^}fskW^=5J_^x-u1Eo8)pWKNWj77a@#{dXtp3sK&K0^bIN);<61J)(4
zC)B2YF`)S(FQL|d^PAezw-3~yKsYW`GC;!Ur{RQVvK__#iH|<^+7*efszt7;1owWX
z!yi7S%`xEYrhM9tGZX3-CTbFh25!C}Jide=iEm(0tqA44swhnkV_k%GrGY04v8971
zSZ{DH^vfR}mN;)7gJ1$oSZF}ohbgXNW~yn@+L`au{2Vvbs6E=ldCNLAcH9jOT*OoO
zBRm1WCx(~Ei?pjHk9h1wGTkPshr;_&S*G}kv<>V>UE`(%WThT@{KsJU;ny}+eq2Wv
z+9nsjpa_+jV`#9O#>g@BGD3Y_QB#jw>A&N0&U*JwYv$v?x=QV0?N)OMwiL^wDi=Rl
zmA6SC)+7GKm!#BWi2G8lZzhK$?wn8Z0H2cdCuC2ER?5p?!IGkhRo^R??yaP4Y57|E
z1AlhL`H-&tppekj{FstB-*I|i(moESOMjmb93A<|m*<V->t-A}U7lyotaF1Oztq+y
zvAp>fBQcRz7IW{Y022~R<P+b~i!_k*jG-$koeMSisaZv69zL~TOZxRJT~XQ2(&_1D
z_7GU!(bzJPMA9{i%ypnHcZfZFoZgZoD?mE48L<kf%EBZ)X1<)wRF*`=GXJFv;i~HN
zR@PdzQ@(NIJJI$6Va*{Dj7<yT2rfpah5o=E-eZ+m(*nfSJW4y4*{gKk*ybn>wm%Ko
z?$12ecmhg)&wS9)|D!RhWo`e@oF7>eYRIW!x#GCFSP&<d*VW{np6a8Utk7=erZ`>`
z>XE>KZ%&KbIb9XDKR8pny~*_}PW{O28yMaG?cufI++W%iE_7!|=56V%CO?7+>bOwR
zoL0h~WENk8Q%u<hNEw(KEWCfe7$UkPzP`Smyz~p_>OT`Z)t+^5bj#wb{=5`H1X*_G
zt4h?^bY|`%S6Ddvnd3cwb!3NN{xr%v627c7<K62g^-v6%&4D<IL?P*aGpH8hY|{Dm
zZ|2JCV%kK>3E^G_**HtI9O0>0ktXZPX%;oQ)3ogOcS-y<#1e-h)mL^)Am^%@r(3Y&
zKQJIU5XYM52M)3ZlU~Py=YOe#pvUhnTimI`t!;ew^my+Pg5|It7Jb|I`Gi$AUUaxV
zKB0@y%LrZ~;~HkONT}8JRkuMn<WBO>1`4H9xjE=vO8iISCgrJE^j$MW8xsFl5)3Q2
zkzF4GrdmaWjJbo=N`4wpM!FjOOY@Nz{8wRgGIXwoWG4pQesR0>7*qY!oOr3Ggw=0G
z+RwerB<*bkex`2QQ>pab)jh2hUqOb^V|FJO8TDV7j&v-B&$S+MJvlZxp@#OXma2=f
z5}VU2nSExpL}b#+ra|eQQmu&-3@137@1$)=1%@#fyF)K35T*@X<P?UeZDu59->>&#
za>wjYB5&`{EXaBDGxVz>If482La28Nm{#_E_Lq>Y@BVP^<~fyE68urEeVo-^NVxm|
z23y}6rd*SBMNqjm6q;-+BYxx@87vPwFW+gPA~RsXmnI{x>nxyp+{MdULwcS;PK$uP
zFEY6x<`Z-_Q^kXhPDMC}p#3cK-VZ)Dr}`x5Lzu@U*I!d*7km{T?!QGGVd{9_sFa0i
z1-`kGth)aF*)!|7{7+6@_nmH4U+XUkOD{24r;m7F^2ztGwW6+@Ad8#!{AW4pvg0w^
z#Uf!{RplMeOm^be(ZB(HBc>d0DkIo@&;Sy@6vNE<L#d-A$T%YJTphN7(zOREq5;Au
zm}T;q{oFZfm?52QWVt=5a8^{(+7k`XH~(7ad)ZfcMK;SDnL`dc5*>Y7&q_a}VH)J`
z7c$<SutWnkdzqU;(8^OS(=R_>pOiiUgA5zNsL$0L2sZi^B;I!8_HGIE*NR`Rlo|g4
zndcf+5-c)t9XxIi)%jWcd_nU2X**K%cJHq%|LW*h&D=4q@^#@8T-avHMtbfK36a27
z$+eZI<u8TYTm=`;Mj^k0GvNOx4=OM9N(F9Kr|gkGu$CT``F*LOe_texLlba2Wd|h3
zF4FiPP~|aR`<+ir6ZxILAHPo^^ud2L&hw0-fj@3zhC}E?<caDb8aN|0!Bo$q60hGm
z7$PnF?yfszy60Sdy)d}qx@AQI9U~ccxXwTL$Lr#T{Z6rVS$CWsq^c9uyQKW<_lzdv
zjDJi)SFx99G=E)5@zr?QdFdfd5+-cz>mzAzr=fX8-!)N?Csme_{U!Hv@~I2@K2EGB
zbrN<(cQQm&mst5H8V1cis0p30rRG&Kzk1J4q1C;UL6oq>7Fz>gJtNXC5+aIHHjan~
zQ(fE?g`ojn$Q*PaNfT3nnRg(oawNIThYp93fs{XQpaY`tw#;1OP1$QTRStJ4L@J6*
z1f-zPub+=|q@KANaEl2rRmEZTvz~dpR@5DvGFEhaSN|cJu4mlQ(oKP>8ulVbdh{Bc
z{-tYmXg5c~S4Cz{4qDum+-mCqQ;$~Q*??Mh$~x5}4bhvU_E|PPT9q)7h3Li-Hz<*o
zcTThLXZ|$aC8q?1a{S<${pC2?_USei1W!`ND>0j);`zR(8%{A}K@CoCLEe!d(o__7
zQLOE#k8tnpu=AU?SrJ(Eyh(1aGa{6EQptSRld|!m{c#J)Ub`p)_9qE=@L_PcHmh^x
zT}w*ka<6#Yw0aAe{SR{H@uNBK^v{qIXtq+1;X6Uz_IwW3fd{%)T~GQ;M%qqu&GE;K
z0cdda;O?rC7kyfBX4gpR%0@E#Yu-0XSEVh_UOjJhAJXHhDl+5NimOY4GfrVoaM3?+
zp`8!Q-Q_YAqf*Y{8co#qim<px0cJf{|6!2zrh?41=?009<p>S+=}hy6-nP4ovEA2)
zay4yYN%0{XAxm=<FGz=pdr98e4m=KYyh<H2bp@8VO(o@Zwp>w@_PNaSY#~%tqFGxl
zzxsA-)_rhf`8{#}@(KY9zGc!{+BVNY%LaG=sRoZFoQJi`Vs5ciqFn~hVlLIid$pCi
zZtMlCL!k4KFYlC^6HDt&s#dygC(L6K4;lBFlD2&3!aLV#S(0DSS&-!~-FiP6knReX
zCQ0=uan9xubW@}HcF4N?mZy+Zr&43CqrY6~x}nB^3_(^#XsldD=*4aO|FHwdr)8^g
zNZ%S$^qEVIJTG=JmA4@$NS_>*$3#n;w}Y+1l#D@oRBB6M@!)OG^$|GD4974V{Bwi~
z_I&Zgl0i3*a!|KmcZPdJ{Q^^U=YXk?ZWu^jO_3VFJil6ciy%MK;X(s@EhrF@6Dw{3
zRqU60reoKRG(u_~e87erzB|kr#N4ehXf$%dJpNDw9$UfLlqg7B#APrA{V>0vJVa8l
z9*_0useG?Ef;*drA>F#6-;=NQbt~A##N9pJJzZ+Z9Y3YZN~%*%a&h@XMsoP;6~8wr
zmV_MJG<SluaZ*ifVpW<_3<cpxYrmaNhNFT5t?uHMG&YB;j33XHrh_T(Em{UZh^G3$
z15AXZ8~PFI^YZbbQMJYkFn8l=>XFc|aa+X}84gaIc7m0@sr=xcLU!w0%Ta3&A<sBu
zWF(3S%?Fv52WDJN%iH+F1Ql;Q<KS7^I(@6v%PVZ`#LgA<%+WmB<BMdQo(e<Tj>3MP
zdJtaOz4lVS^y(Cc?4Fp3UZ|6ZCpouwEX_?%Dk1lSqBHf>1rF*PQX+!<@QjR{)OUoj
zJq9b+Sxf}x1E0>92ac2{ZY0n<6i*j_`QB~Zc^xLkn_ou5t-G+zu#*0>+(m}V7TeB=
xJz!$STNk%hyFWE7ij1)RseGQDk@8EBwP~PzfV=-6m(Pd(#}e(Yq{cyy|1UT}!D9db

literal 0
HcmV?d00001

diff --git a/tools/python/images/clip_image006.jpg b/tools/python/images/clip_image006.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8759d287be51d491663043f531f5e3041284811f
GIT binary patch
literal 21733
zcmd?R1ytP2mN(is1SddnN#hpWH3Sdt9wb1c!QF!f*Wm8%E(y{Q0t9z=X$T(tYtFed
zC-=VjzL~ji-d*pl2dkRq-$m7yU+t>BcWoYLA6EfbvXU~A02mk;fE)B1@VEdF2RuVU
zLP0`&hJuWOiuw!<lMo9N0|S!`{{;>q6$K47kOKIUj+u{@j)9x;C6N6MJGX$4sHi9n
ztE8NSuq>a5sL<0OFsP`gnCO_KSXiV&^gw!{|Mu5oHvk(M1{t;<4(25Q78?c*8|JYW
zKn?)FAV8^oBKU7#FtBj&2#83?D9=!#9U8CzurP3Nu<&pQ2=MUG-rms90r1!eIFxK+
zh|iUckY3v3viry8AOm05e8E#0`$@%N>=1zR4F3fIArUnVEgd}rCl@ylFCV}78wp7%
zX&G5nHFXV5Eo~hW)AwfP79dMUCubK|H+PS~k3qp9p<&^1@d=4Z$)8hFbMx{G3X6(M
zN^9%t8ycIMTUx*N^!D|G2L`{5PfSit&&<xvudQ!vZf)=E?(LtPo}FJ@Uj4ehdEyHO
z0QV<b|IXPz@P!TK3l<(84j$==FBn)?XoJIsN1$Xw#1T_QGO~aElHDH}_jPPe%@-6P
zhssYpV~4S4_*9&0)F)4@{lVFPjj@1#inD)b>@U9N0cdbAQ02j414IDlq?VUfjG{m3
znT8{02k?fqEj76vhnPfVr~=Wno-eoYNW?yC{+5|rcMhQf&BPCqq=ZX+t+F4nDooUp
zMCYp7c2qColjnQ{tm4+++n(+}5WEkA<@-X34AIeP*y_(J9C8Gi&BXQdxYI4jVf?x(
zYvI)3h>LI4;Qpwmp4Jk*R$;kgI3c`XUpV+K%GaruTN_*{pb>dEYG=+=<EQ_E0y{OU
z01qMk`10cT?lk)07nkhFr4`?&o<ZBPh`?W(hVcj5tu8T4+o1dpr;be|06N%TW^71m
z0^ytVtM?2U!8u%q<G{IgM#3HrvWI}GF;NtwM*wBXBj8|3<ea8(p8!4Gv5T-s%Xh%i
z_vT9R9;L(<LYt`qRQ8*rO4r2aXHUQhZ)w1fpUis87x|D|rOrqUddIvCfFX^fY2cdb
zjC7{{$jmhx5vnZd0QUa{>-bCKvpU_FOodE|xe$h;+KioPlGSWs)*;(|wT?J0Foio^
z+JTRviD0kKhV~Ogh4B&4w4kT0XgomW|Fw<#5zx{EqO5gWy}+INIV6?6$M1#nl9+xy
zQ}UgULlN_@IC+T@H(7bfM}T_stxB(LP@CxMO)%A)80jOFm#r=nH+zo&_)%Bi0~fu(
z-mE<<t76a<_wzwwxo6)7zAEK0!KjD=e@r2_yP}ntP>eUgkmbZ$^l4sIii8l?c3kt4
zIG4=4nY<vCBwjMJlJL~Zmj*VV*Vk2!f7i{-02F>%`xIKyE=1TInn6D#Eh^G}Eqii3
zcJE_M9o=_XB%iu&<-Xb+ygAQR23dZ8n7w0*yu;VeU?b3bIWy!z6Io$at4uW}kY|KU
z#rmEK`^)iso%1Us8L#B%ZC$|%pI6!1c`M||FRTJ5u`@HL(2s)LoN9^+g0pPxcBCuO
zKME*QlGK=dp^8G9j-0vJx{1Dd=#<b!*Hw>K8#}V4%2ai=b9QLV{z&Y`R>?~U*K$$*
z$=gJ|`A2;T*{GWNl%2g`nOEt1IQ!Wwks~l%;d0`VzC|>5SJBFwlYH(NIn3n1_+|jx
zd7rt?FNO65=^r*a>ohj(JEO4y>AXE3gdQyMx~i01A39K~oZTxLiXm$tjTwbntG2$i
zxn|1e_QySB9YJh@*0u5=tZ^<F>N^9Q&y*rwtSIE3o85fZHF5=eFEl-btGMRfU$oXq
zOWS`3Mb?zu(2=K<p{!lUJpx`gpXDtSF4VK5k+jgg0G{T!KLT_g0o_Pj_cl<iTKljz
zq$vcuL2PyQ-RGB>ehN;!%=+xS;6KCHPk*r9rj^sdAMspk)btZowM?*a#n(gzY3od0
zA-7W6ugNiotqrXo4Ru%KoiQ$ib(zv}ZSMubS-hxS%Or~REH#x1?kDZRgiR_YA0-!N
zs+i9ld>P=XE>`YBvvfOJPJ+X5FEv}86mgFR8B4SgzeGTc*@ZA)R`zVj@l`@51`=&-
z-=7+_vZU!pD<*KSqLAQ-WJq!f8ABPU?wqP*`sh+KOe}6qytABbJGggV?DPs*E?*8F
z-6~JU#@g4_fw^m(Q0nbT#O-!a*TtefY9?As=8k;3Rakyv(t{JiY#c5RT;|tSqQRe+
zu`H41_Gshzk<62I0of6V*8Vz^%*nar8o0_5fPZx}^UgeYa4WjMgW412OV6t7^{lJ2
zM?i%})di|I_u{ZKg~SLhOp>t&0;!B}KsC*Y_flk@o+TM+R}d;BdMFi%1?Bn!Pk`19
zj3`l;dFvw}ll~FlD{?dW+qmlbioK57GUjMHjwyDmK#uRRxoq$d2b6aoFeQM?WmQp3
zdAGXDV$OISZIBn(3vx`hfa}lfg>2~&I}HHPZv*{Kpbp)-^wdn^JLHW9em@T3QRhd1
zpz6be??v&g(9GS5;_dY#V7h{z4vypS%J8gD<ap;1pc7rU<U5M{2<XbXx17lJV~0R0
zS3Bc2249nLMcbuJz^9*ImHF7r+}+aN-$MnWo=OMXlm;DHco52z@6Dupli}H6*45RM
zfGo0~E&tV3c^f)%t?$apLxXm%A45am*9|A^h;+2!$p<y!PeJ{=Ktf)^<dVBm-875J
zRSPryft8}>tKydvQ?yZ__et(R`A}2f)`cKO(X=1=Ut^K|HN!tm4C8N9OZ~>plvy~S
z;$Jw`&f*bxE}3`q1}dZ9_)-Jke|Q8qJpyFC0UBqvT|09k$Hz~);y!rrj(>P~1YDF3
z<HG0;L&2403e{cLy`|tn)%B&~?JZQrWt>U~c7Js({%wTvF;Tqszk25VK0=+u{|_f?
zY$)%`2ffhDGaa@Kx~pEr2U}0gRss`PNO@RCesre-qJJ0BZ$d8hNa{cjYr^{246?%+
z`#K=v@*QPo(WJma%pQI6wXVC0)*uBtw-gDP1uUD;;OGte3HTqiJb0CLf4s|>GIi%&
zxO)?E89xYfx@N9<`+9XCkA|C6EtBm)qI>tZIsZ;wUnkn#^DR9BbQTsDVvI3P-4E*7
z2xmKHqa63b2Mxo-s@@Pgy~_e|LlIH`2$);yx;I?fAUB{pcy}fs@*kt4jx!nuUFV2E
zW93~OAvJoJh=cs+y_E`Er5FEiSup?4txXB2jbgo1=UU24roKqfkbs!DvM-jH5P*OB
z&p>y@W_h7QCjn5EO}lE*FfS&j4A&>t(AbRF$X{X${syJHwUjcr?A2d12oVEH2h@rg
zB$0s;D&!_3f-)eQ4$rlN7HgXXUa2?}+e&js!gqdHGRjez7YyPhdJ3@KS&$R0u*lFi
zY`I|$6pT^48$yl-{+kk^;Dpc7N9cK55O4uEqksiXoQ2JLl(vGYlPKXsQHO9V>H}bx
zibISfh=_fk*aQu4VAp$JmM$AM{GV%x<QF~n6ZiDzUqw7HZ)vhb5pb3NTa84rBX_rx
z(!D-|R-u}Qz3E2%&}G19q(gByy3+&ZS7xG#j6BPf&N4Glq45Nv-jvBlR&t!=9eX39
z+O22!{#pQ|+g<ZZ=?N-`1i<vv4L44=hckANNx7}pA>tc~_&%m3zZM~Q-=$`+k)Jev
zB+j6*2zMf~yH3EC$$o}>yzzXAeM4@hsy)&0aHwm=zwPrjEe7D+7ITIx-|3dS{e_Jc
zObZE0go)cDz~8wH_tnA8HQc1JH*^>Dym<!Q;jy^7Cs~Z8t(`qfJ~4OHC&$$Rpj|ae
zvF!&3`N$e}k%i5lx~fvtH#LHYmXUKox{e8=1*0WkRf!Nszn>R54U;*Mz?yNIpfh_r
z((~^0i~EJjub6t#BXMV{{Ot3!^WG(ElU_R+Rm|Yb)AuO-hsD>YNY-dKGrsobHm-FL
z?w`ZeTY|ml^|$5)>W_eru973ZwbmSu0E58?Ttj7oOcL{!nVq&~t%~kYupPV|3XGhs
zi2VXBuoA&JQFcLU!g?ELe(y3y77?!RS((o}^?81+{|X$CZBwS%wi>5tD><}J3`fQh
z%#hN_7$AGSau?+mhF1mtqr+L#?d9*aUl&G!<>`t@mB*B&G1SPFwHrGrCyvoS2g{(e
zvm-k`M?1Wo8`wBGFeshe=G@H<2hjbK&F&<+fIscdWKZFr$8%KCfkNTa%YSPKL>Z!Q
zZCduy6roOGNMjQ0ht;7r4*5`T*{JpNH@pIqVDN#Sa}Dm^$aq*eB|S4?odlkG10z>E
zx35*xtpUfTd!X;z`^B9C7ef5vavQi_R?AE7ZBRvC$7c@jl=sBd1QC#f9~y!C+{(+$
zX-f+g6AslWYJ4{=j-vP$r4#$yE=|~9>|O0}#~%S@Q8IYi4}R&Xc|`|dAMA!V#BGyi
zA}u3E{bu1vJ=4##$$~p@Kl-820$fYW%0Nf8>?=BnVXgyjhCUjqj>7sekEzG2)F#c@
z0^6G`NE^$`7!pl(G3--ErrO2PD+7h|3Wx06UOFqri4MheV@V$m18kJlUOA%xG08G;
zd6Mm8J2C6w8898cfVb4nZCuV&rdSS->c3dqsJjj{!KyF%hWPP;Ic*V_{xB42!O{Z*
z2M|$;h``u=Iv9)JHJ@Jq)xq0{4WMvBJwRCk_iv4YA`XU-)B;8%!kd0_WrJK+No)j}
z_dpa?0#^y<-y4usNe&Be1hN3+hSLFZV`B2C{DkBY{3vkPJur0uI&}Gq?2?jFme6;(
ze$W2B<ukn?4uu{uIRW)gUW5uP#s{%J=0DsCPG=Dv=ckvQ3rsv2JA3BD#@G^}?S6~8
zX9-^R=|?2yxZKFQ#g$g_C!R}75+;thmLCkQHB$r|=m}^EncE7*t_rgHu@>Y$oKkq2
zw&UB^yPKJzrVs<32W@^3P(qTCaBQWGU~cdh+LGS#487}TlR}Lst${=EBc#CKP&YrR
z5A!ub<EP0VTe9P0%VC1u+3H#Sfut>=JbuAeDth`!H|;}3)AP~*pU*T^ze(bEsr>aG
z!D=Ej;dYwy)$P}w+c~@+{EQK-0|kBdnqT=Rwzq5Q8BQ6s#F7+vsWgFW>fzt!oNgif
ztCs7Uph7D%Mj(#jXX^e%?5Jx<_#s_oukomG(~sqqo=#GW^DDXL3yC<Dm%_&Mu}#kx
z(a67*=~~2%a`i=32p1>Yd5o_U<G%OJa;P1y-iUC1Cy87oo0~dubN&dR@TfW%y&Dlx
z)H=U+RFl_LY|h-@Kk#~9Q72o44-jjHeNOUI`l7tq>9$P-Z_&~tDB(&+t1Cw0S?~8B
z0tm1+dB;`Oj{r>N<qn*fhyadg``2SdvqUYV-OM(0fH<X9AH1(~I{FqXm`<}0rxg}=
zDpaRoYtuM9CWi+D_FnsbOoPNJ-Q5{hz5J?5OI3#S&-qvoJCo2a_f{I8V79S$uXwN)
zC8!G)FHfzKCg@XVr8<T3Ue$;K%&Xv5QoAbmcwYNH2OEK?K!O^<KoY7lUSk43UMc{`
zIo>X=^7zX1ppV}ppsI6+d~HKGsL(QIaqn!fOsv!07N6V5OOfH*O3cE-p04nV>bRcQ
zetK}1El_9agY*}65n2(2tRSl~Q3S3flh=nIO4;T4WBepBU;6Pa@NIGo0^MWv^haOD
zaA)HA=xy4A?HpAtm>I-Dwr6d|IGbM0g0Xw4)<4}ZXOx*><+6n4vG!t6NEJMuB}?B5
zrAb)MdnH|oao2^`mLsXQ+lPPU=Q<VQkCaWTow=*3^0YFFCK_}tt9wIIqQ@rUdS#sY
zF7o|2fhblo(gz_QagDrMW`v6#Nw0+@rWc*C5W^qOg7S=<^WQX{h8SinfabF)k!Z8|
zAQD}Q-xCFPGlx#Y%9!(?3tzv~ur{G`DIc;T_Ew_qjk#6^Q_>TWn3k-@@!vD*fZ*9U
zgR}!8V)AHDOZl<v4vXhvH<rJD(f=SywE=7Zy&pD7eVzbnp`G(F<&Vlw(utGu<ZEnj
zt0T#P6LyEc?CwYMN+sKNv@)yxO4X9iI_T{OKpEU<hCAWn>Z=d91f^ou_i@URC&!*{
z@U5N%W{sE0Slwr3Q@w&){1!UakIj>~l#ZDVk9Mopwr@jjHmS0rICwXc<&eO`Y*z{<
z8b;1Kq;wdEEnR?mSo5`+oV8+?$*puQoNW$ro~s(Fb9yW#Xl!q~E5eJ@$Y1g|IqGsh
zAGWt^-JYx1(qQ-|O|j7otSOB=5|sNCZUu#Y$U_6Y?Ak@QiZ;-$?i_nFEA2;kn*}5}
zav6jy5IEp3iD~4=uWt3}ealpMbIhx`hZ~%H(xuPpY0}gUO>9FBSMWIMOOl5!NTl{}
z?5-coxJdL~c)r@-N@a+OC*9PbuzY?!@xqv(ShYzI7KxPMLd8{+HQn8G!!x}-m+sv2
zHmTtigJADlQ3|64Qbo(^pKs##4wHiM&ysgGwpVv@N^ERQljc^N!zDr>>;fz+7lH1;
zpL-Yi+?WW92GR*%XI?848oXRR`f^U2+Rtdd>!Fa_rg?nttK$W>;t@6;wH!PJZ+5PF
zY>V}aJbTlUZt1%6zR@hB_Z?G+WfiH`jW=JR-omoMzUym7<5+TLuRSe*OXG$g9~Yw{
zcb2cq>B1;qqFSCfHpQw5+2q2(_9H+ACp?6q&Sm})u*PKh(ley7DJTWSo~nS{7S}Q2
zHXU;$rv}uwoUaT-drwlA4p;A~-q5lH?`zSNVrr`K9$5jiP(rCGoF?_c)`rHSAO;Wp
zIhG{SMXA*?M@aYj%!QNg)T_dY^Dn|L&s_T>+XN)H0a?>^$5gilWoG!>(|UxIAN(@f
z;sw__5Dc5qi*<FggWsUjnQ0Qs2J!8jz}6nsLtbAJ4<zC&?}I*mV&~RiEyZ<=lKBGZ
z=Hh63g^``yet0o6qu8SgP0IKr;T7J#q4+$OB25L?lpdd%0eVFlumo8PpJqR6?C$;0
zZq9qGFlO%B#LD$CNzS&6Kx(i1;rXJ|cA7V3ymobvaYCY*i+6|x0oWibNVSRKny~E_
z<TCX@WRpXsK|YtnL(?a>$O{s7xjl{&4lIH#-Am0ilt-FldAmY+E6dv>>4_uqaVJ6}
z;~dk`M3Ai$9LHX|{4qQ~Ns%Vc%Kp-~uA!Bk0~YghzKI=a-KcQF(!_+(Yx@M#eIe(&
zt}<O?bkH!QtXR^DpwoSTmwn?G?jwLhRlIo6ieGoBhhBzlQz2A~_1xNfT!f*f9M<Vm
zOm>_JFTix_vlx~n@|R~KiBwq}*FL$Iv6nJj1uJ_c8kcg-!cK9E;?49c#5gd#7Z+TT
zQ_k?IHp0tp_ySIb%h%Su_*9UsHq32C$y0Go72Q6}^pR&7ixh_rzCEcU^_~^jXqp-m
z`XS!^`v{98idV_SV7Q0~81Y}93S8wYUM(ngo!x@1;!;fOYE4&gYfG9bR&7W&PJjjC
z<nzIKMfJ0;jskw8S7LB*MMjzzR7+f8YP39nMkH&p`k}Hn{C)mfFWzapVzpx+(rYhu
ziJUTINEg#B<ag2NrEKB9z3we->2>1+8>N2LO%Yyjb%mc8&{@W0=B6`a(uIrdvzA9M
zXs0Kd2SNs;s3M$&?DsB8#idyLuVn1tmg<h#uwWtm+;Hcl-k@(+G#^X)gl#)Bpm`Oq
zNCD+Wtl-s=Z_?(xBi<){irPu1VfAwlU>mN4W}uZ=eF#{IpJ%2(w^}Z=oI-B!P17?!
z=~WZ^i#`=szWvuTHWv#wKQcZu#DM|MG_@*f$*Md&OuGedxldwz?5EPmipXOsj78xy
z1L_J8t)|-JxNn6@Ya$S+Q$oK`c$ffJIP=wst@iG{9|5uWX{D6SR!BG;l6^QzAyy_e
zysJ_CQ*xVG5ZB92S;6Gc{-xBn@8AKK8}T|sL_BlU8L<-(j`lBlMt(x<0#2pLmt)7x
z9k<xd;WSZH(yknB&b;TVYO;65`+*TwoXK}0Ycm(X_QZLSx*9}YTN^x=8+^U9<#J1$
zv<%DT1<+}(RI;n_uWs^S$7^+MYo8vQ_qMU$#^t!WPzV_U*z-5fNnqW}<|A}31RSOx
zJdm@t7Ge|$hIu?il$Ytt+#h5N&*<>ooi=On9qw!Jc^tmyMfTA0og+3h2n<*s>KsWr
zl=<cI^q(pKNN%R7JYK{WFDfKe5Aj8sGN@Ub#3zuYJ5`X%PRiuF2h7DzX_~?z84c_r
z5$0EY`<h!s-$z^U2oNx=>}3__P6pjht~h=;T#y(<loDLEpY;hS>}c_382SEfT*|KN
z1<nh0cJrd|?)`3~M#{?zKr0<nInptPV_@UXe4YtrM>>Xdnd2;{%7Tn7*_)u0RPLC#
zIf8v~181tvbNasK7U#CR8CN^XoA*6><s-EyHdhbZqK+*-9oNU2Wg>F5+>uVVg87+>
zY)0@#ygNlD+x3jnhm{T*TRUED=%k9%EfpS@0t=yt5e7Tz^~qO0Dz=fH>;sPgXG#8u
zqjSVo<;i=HC|oui=l3XGiT3cNBa>+|U1sC!6QFdPTDp-2?UokI4|Cz%5(|FZ%Q(G9
zfJsNb<oJ=>RArt!1~G^>#6W?MmTLhEQwf7EucCYXPS}~(i`}$is@(dpH{MLmFGPI1
z$<?^S6Yp4ibnRa9)TzC$t(}|ucEL(VoFqO4afTrX$?h1#Q!{rJI{;CL-r`*ZIV!3}
zMJB5L+a)SdOv;=nn!wrI(%39}-AO-y48ZiFAg9gmFK^-ByoSH~31jfnjDlTK(Ik8*
zK=bVNb(E;J!}3Luhds`{)k}h{W!-Nvj<Jbcn<l!Kt)yxyTa&wtoNj^g-V*h{jy9|s
z2I#WWNz4iNvaF~5hG_M0_-EMGLXVw35u96Pu$WNUZdFc}1nb2uoMkLTd6-mrTqG6I
zzY6ivH!q%_>v4AnOq#O@Ho(J*^T0KJ;5X;Wdlh<K%!O@4eJ<yq#nO}2?tylWw7@kC
z&RzzuRlv?!Uk=-pP@PdQCK$4X*>(n(>ijZuc0Y&6E@)+#wu0Wo>IXGCziMV98~{eK
zy5mlF%@kdnA~juXyYT2dq`db@(tKfGF|>j0CquZG?sR7v86CPI*~wqftIS}D4zlY<
z{eVoi6R++xTjeCLsTwTL?%B}U$|2>9t<ok9VB8Gg-XO)qOR>w@_b?BybnY>`ifLF3
zXRd9_l|;SWJ+;qd72CmO)`6a4Ui~r>IXDq$d{?R+1t6rUT|E(wc7&f0^TmyQos%(%
z1K67(xks=VDS&LM6B>Vn`K3vWm{tzzgqdJEtk?`*=R=y0Bh9qz*Gig;k6es$l+MdF
zd~R|4t|?FHqb_Hs;%6|q$d!US(t#S6B5wBsSzG6rt#PWHI}Z1^tb`;ItPdVUJ4QdZ
z^C;2>O&rDs%NG`lTN6!VdW&Jq_#jbPBUM&6_&M&*>l}7)koMpyBPRw__BvZ&W;XV8
zmf}Gz^&`O0J1S7=gexdPGgB{Mef>viJ9bu@r}xyTsN$Z&kK-!4aT%5@1ygtgd`Cj=
z%(dvZ_l!xx#FHXx<XLax2Hs9L$+#p_22}h)Zw}@gaQtyuVS~}Hc|zS^fS*Q_Y&JcF
z8MmhP?RsRrRGNUqyd(!bLxNoc^?8j=Hmu{GVxjCODuu|5a%A58OQOY{V`s+Ti6dJI
z&yS%J#8SY*#jOzF&k8r%mZZzq1F`e|p*`MLLL)+qS(C%<sT#!xv}sm$Tapom4wcA@
zNP9XOudK+jt7TxcqqZ9p%8VwughG!~hlLbnD|l}O3Ra2|4DUp~H<T`79?rdeP8VYW
z(m0M)e4&zGvLj>pQjoOV)%3his=Nnl+#CT<41j|u5UyoYxz<6PN?4&qk|bXl(xiY~
zI`eruJfsd9EvBNWa;46wVH>>N^N}_|i5rn3-%t<g>#K-h6}+Nx=`|L&Yfch6N!6Ey
zyDu~1_$HpB4eVZnQC(;$HrCAQZ9*Y^*aOz9GnMA&FfE$Y?5UR7-RwD9+DF$JBzw3o
zH1>69Kys75jnS|T+0zbFkm5;h-<T8JS!NcL>dk*5bd{8GK@05-6IQLaKLBrNzS>Z4
znfUnGok&up&*$=`&s3rXl}jxSQw@U-PHg-Z<elhJv|$B3k3;cSl9Gm%ovv%5s^V{)
zvsxdanZ{GaBsWREi}TV7dsl`?VQji2dMv@tcZ*7Z-krsh>v6ro44|C*(!xEB(02!|
zi}Rv|e9c!rZ=!L)AEKxL!356~4VI*SOtjhEoZ>_nr!{F_FWg8BdCMH(T-_7o=?reg
z=y|QsS?`>bhjM|;Cl1+wv}qJ{-61T9T05D%hnAP=z)9wY7vT~Xwk*ULA(RP#BBY;!
z({{7DMDH7Vv4dLpv=T9RnizO55eu5~PFd3F3)iQc3~9|*zE6(r#K}|wtwQ1l?NYQw
zItIRs7Fm`FUG(S=!E*gj<TD`_MPm<&7H;X&8gDPIV!<;L<DfgsbGct-U`zM#oFs}k
zurVFA{m~vqmuSinQTIF|Hu|+&;d%Isv2lF)R8!NP$i<Huyu;J|m^_@4?SUX~QBg_X
z=el^&s1hs<BolH3HXkL37LSHp1$;6NjO&BJER<9m?`@#*#8STgM*zW%YDrhWaKipS
z9pkgu_h}fh)On~PJ=nW!<v<Nn9&R6YB%20%kr(q;Mm&XQxLOLPHlxsKrdOAB<y%<h
zyqa^2z6{!GnN$-9=LRF2C9eyad0tFZaWXe4OGY|Uxk{VJFPgd@Dll+tAc2ZJwIVkq
zZx5^cVDV#0S>AhLGz<C|)Zf39;ap?Rtg~`zN{%DvK4mqHG806@+X9fn0MMLEZ=^zK
zf+~Zv1V90(B`PnOV+X~Xa*Td#H0kg|1F>TFKJPo=daw8kKc5PxxnF)flYaFgkr?6t
z8%%ZPCqgnsc=x`P7$T^<$3{TQek5WYv17SY^>d>W#3|2zIocB}FQ?fh_wF<}nDJd4
z+Q2MD>CsLVS!bg59kshJ|33CmsRRijo9?K47|l{ooaF0GZ^z}R?5L(UQ&dys!#xX3
zbh^kY&I!C12yO>DTp?=>O_63%gj_;<em__6g*CI`@4BuAxmx7IpwWA2qkoT~{S__x
zx40TCE4)#)R0knh1HyiAC?X;J^joCo0Di=$@aBoZH?PK)a(YL`#pT@a!N6V@P1h<L
zOBU*9{T%$S`>{4kkNY_?g&Sr~t(5h$g71v%?2U{ta+n~Fj=)}83+b?Yv|0I8n<fA8
z`t1{i41~F$@3;73&-IYCzD7@gyxx(l`f$S3NlS`B8BgX3J<Od<?L+82lN3$(f+}6T
zQ+Nr73KpDpjt)~<UcY31Z5}5znK?gAx3Rd1fCPX^M~jom^F6pZ!W5TW>6yNqq32)P
zw;gc6m%GkToRWJtUBNZ`l7mNrkSp3`y+cH$*0YVXEeU}TqwzQl!@j}oI!a1RlvquJ
z*sVu4s(VSE>PP7!s<{m1`&cR=yktVchXy&#lh>o!D}#lCmsd$~l5tc&5}X!$kgPC1
zlGVdZj_FM|tv9cZU4Wc&-#1rN5`KR>7D5HfE__0`3D>9hbL-X;|MZ))r@V}~&5|8b
za`g>%Gc=crpKUz`jeL$W2nBJoOibDL@h64DZfN7pSQQ)XO7Mr%!4%+yCn?FEQ#s{p
z<w=>VDUDuz#!#Hjl7MT&DyG7L0mdj=z7E&5SJEXI5l}78)+E9f1<-ZxvW+LhW%6GX
z<oXeX)?~j?D&+^TB5+ZJb2G)3#xFjL3rBL{8Ir%fWI1JPBiT5RO3GR^{FWgB<H9NF
z7l7>`u%01-i~JO*|DO!H>rIyw|MGXnF6_a`z?_eL82w;Nf*ozg6gj&?5=HkeDl>e`
zDMPh4vV=R$6)~B<UV3<VT%H0x^NWl3A=(9)O9K;sX=RitRW@ym`Ai%Q&T^d~!95Cv
z!T}thXfH}yfH(BZPG_2nlycdIZ(8a79Ond0Kjka}CA(WcN7Ph?{kjNNV_uOvs89+w
z*pBUOTy%2^<r$fzP%%Qu;+S?T-o<jB(PdgQ<OkpVN}M5rw~w>xjYi^sdf7yyV>AJo
z@Jo4f1&cU7YxUr;O&bG?z8~3kj6C7qOY*#D>s7%)+RG}?s4H(76&>MQvV2FqosjYi
z8=`sUmvYjk4Iis@zi{)=Q$-YVw52<PJ@zard&`X-7}tg_eqM%|H@@N@xo4pv{xwTw
z#w#>}T*{q^=4n-8V_W~(&d4)3(RTTydtTRdn9LnmMB-F=8g8X{FmV@H&{cJynp#sk
z<s`3gBVZ+MTk*<PzB_%mCU+?!SkG$~Oi=zykga4d6WBImJxyPfx|ImojNh^#CTCCg
zibB4VaH;<Yi!6kWD#z*(k@7Z1y6h@Nnmp0LaF(c@3~{1<$e~OSqYD&H-{@X*L>(i{
zJ0iu^V6&!=KeESSbN<UD1!K!TJ@9(E10OD+{%2Gk&S2v_GWLbmD||{m@7Pj52XNdR
zo~z!^zRLpps?T5DU4Je}VV`kl#eQy@2<&sgRl3l4XnplVq^f_1D;IRr6jEL^krRMg
zrt8=`t`ToFkiVICaussbc2#n2!~gR@v$f;CT_aAfKNcD2y@Q&S^7(_JNTQvx)aU<d
z(h~iF?Yv0U_bKu4TSDURT#Nto`Pk&WRpvCXb4I$pq1Mv~Ehp*5W!~hS&newj^Toz=
zD>l^SnuPhjr)02u(wOC2)G)8dclPR?%`(Wg;$^00w1N<Q5*d*hPvA)V#F%Y9>nBaM
zCBl9IGTzF<aKY{k&Mb_EU_Y?3xACrB0#{6$fPc*D>bo5RAxpSbuJ!~&q1yQ&<VS#1
zgH?&1=dsO_^j=d@hpU?MHX?Zl^Sg$#aUu5oegnZ#ZjD%iM?h^~l&|dK?@5{e&HF0%
zKrDDu^PKWf<manSnD<r&KiNWUpqQUMKgjN_EvDKz){Kw%xTU$I$zQ^mSH@jNG}H@J
z%?QefoJ{geW%E1@9m2qX5eOY6>}S+tQMK}TH;ovUZ`sOZzc<8{o@y%5-hy!~0NSJ{
zca7NKyJMWvNiVbuZr2#w^Dy-Y>NsGY9JXz$eU~TGQC*a=RE@!QtiwabQP-3b7T{7B
zWk`KAS=eX2adfmO={7)6tod~YC_Z-RGlp?XJk{dzrH9U8mh5iI>J>sm&}L|(4s&0b
z!US{g<!vPv+oIH4Zj28O^i?7UcE2bmZlFnDKkF>4^3;UJBn=+TfTgc`^7ubu*>=d;
z5JE{ZvZpoD)<x<pd8Qrib5HkeWTjXAk<FyPmQZc_m5Nm8HH>q~Gpe*bj3f;o+R^f0
z&@57gr5)RJ_h7Jdgcp#1v^~GKJkXh_d?&Yeq@9mrd%ESfv1um>kAs|JTeWc4RV4=X
zvG@l`q=@-;v*W{iCj^zayqOzN_1VuC62D7a(kE;NAPs{Rir|*3GGxE*=^)!d476d?
z)l+0>_fo$ke{~J{GVc@9#9H52%Q#TcEb#$`D$gXNNTlH9C-e<Z-osRP8{28MHeDY7
z*KOsI-|ol7;7wu_bxJCJMIJc`snD&rPE?g=lCzN}uxc6wqvZ!64;;)+YAmC)ew22d
z{YtJ9eEY2_BKKvkpAwq$MuqOQAgyqTaQ(8=OCd={w<#?$!CE=<eRSchnfEt~$xq>=
zO@rt@7Zu$VefZDX^E}Cy0f=*1Ia=34j$nlbxv%_{WyO~K-#r>9S%(S3v9jG62Ak=H
zMk_tXK&!Lk&kGH4TgO&o<cx!*WZsD^E#{k|{d7!C7X6v7L)KKXBKciqFaDb<XO#MD
zKUZ5?TA^LTqHFXF(*NQ$h5xA)|9P5R|6x3;=T`C?s_yC@Ztm1oo{Roh<q?iRu<RaK
z^`e|q1*|d?s3U{MtzktW?8to`g&|HsLVraI91lh=$d_M-pERpUQ?F^RpBRImQMCB7
zX<Oj9Z9-`ue>`3?`zugo+B`|0+)?nVqK4KuwxV4WZe+UuRLc4cqH|-GRbpY$2)n%e
z(6_j)v_O3w%}gQ4(}F}h{>cutN5H_>LW%<2nJOmK)_|>ggK)NIqJz}8isGS*y(`$x
zc_<QPvkl)qVz|~Nqrm2?Bmj@nja2x`NSjBevvIyMIYVa}tKXMhq=t7(Rj@eW`0~ra
z1Xn?Un=Crx8)4hjnPjp2;r8UWC#}g;rUJOTVtY3ezH6}cttPIH=7r*J+3sv#EDH(l
z9gvC77Be%7Jp*PJe%>!FL83D}IaLHmO7jt4mOYFo+xjw0m3t$8pK~8;j5IH5B8;1)
zkm=|KkO}o!s~U?*;HnV(pTa(a)Y6(xk@X&vR_<^c)_&V@hwEH7({Gre&MWj%?Q}C6
zL5WHhgY<j`7qz2L_}f+U24hewWV+hxR$tK9jDx*kXf)&l?;;SqG<vHI=B+$??hwA#
z&b3ZlAtE^rY?+vyTU_L-T}(M$8)`&(>%DlAzB6C0=Os}aymg7H*%0MRIl6$Wm=0m8
zF`<Gx!Ze`M@t+;Xjtv~I0nSv~IWvZG4h!n(LcK#p!T4)mo)3YbrDzAfa&Mr%mA943
zgbjt&(3~?pVcr>c=*zELv#YBva4k8Okb9>`0ILV7pbmE$d>IqJcfJGtfRn!%(m{CJ
z+t|D2jFXH~1;)(rJ4NRb3+6KJG`xm3n%(w;>Xt7QNdh0rzy^v9FW~ZXTafR(;y}ZK
z8glmRhSc%Y4c`VnHV?e_0!ShsGIWJH&sEkln~hw~YU^v&W8Bg%2NC+C5ySlmV4IIV
zxVi0y?3Sb2m>wj(sLR*Hzj27GUV0-F_d)pVrGJbN^5$jf15Qhv?ve^sYjf&19z{x)
z#gqxgGh6gCfTH{)%*4|P_y452fBWny+3(Er3p}3j-@0P-`|kb}^vsZdihRnuj4C~h
z1<&v3Hp}xUt)P&-p#o;Lw$P_z#~!2-04s8vmv9zTf-l#pPS7V?6;jK{u3v_wN*d_P
z*$e{SdF*g0Ds)b9wG?I!IVVi4aMX<u<cj0?g^E&a)nafQh>~95^}E@wKVYKEFh7(T
z*;y04qgG=WYx=yH<iZ$n6D`}_HuEO6F_9{hi85xinQ@+bk(O8*M%p&eMdTU(EcU*d
z24)Z!W$~;4cX88%IaAx3S*Q!r5J2PNKRs@WxkQ|x2T~E`kR`Im=}4a{xDD&L&vPxx
zVIOiRSDbb&x!je-br9iX9aC48<TJ>>a()qT=dUPeT;o&$dn~nJWmf`M2z*Y0j%cdn
zzOZe(i}G+}zg*uw^Kg<k?0c+f85H6E-G5DjNHWFbvav5{d@<zo;enuW(LAaHWG%?&
zTrhGnXGxdp`F;Ev-J0mlqQmP>4|5^4i;6eRXSkdWNUCEA0hk*KXNI@^LHL0wgf$i~
zH25RG&)eowG_)p<q#^~nWB8m%E^_ZI5*2%qG~Pta7bNaAPLgdEa$IYp6ZRY1$F{f)
z2x56?9H%}4mN}=ISK3Vd8`Gg7KyUPC=++oA33(^NVZ}0+0^Ta~GKJq5%1q<DOVcwk
zFxN6fWOYX~`_+^93@8Q;^J7~$26hUq?4XP(Z+BfqlK79jD+<(Wk5ep8p-y^W+<u9b
zN0)5_Cx{_}>5-=TGtN?VzgB2Wxfl#@UC8<xc~H20hpXo4FS(8c8<?s5ab1l8c?o($
zH@DvxL@*Zjlo!7g>`N|>%?h=nBRe-)Z~BkY`L^{75Y-e|jL)s;tt>BnDq9FrGm~zw
zS2yrukvVdwni^%hm#23}QjrkW5H{7$ExoR~-;nx))fNKB+$lUwD7i#}PYQ_*;b&G>
z_xhV|i1_}+A2azvLpr5JpKn9Hd#F57sTlhy1HZVm)9Ml6HufYI7D-_zo!>cdVBL`@
zWvKdt8}}!^fEfQD?cfhR$`dP%+ck8t&p)_PP7FEXqPA=)Ij|oJ8)n1buvWk@Zr*__
zfQ#<Zqg4Lbl5%VSm?Mi12pj^6<Id>Mzcto02W%*)`s00t#8DrIC<otQVU}=;uX*wr
zKpN!a1V*{Hwm*Fj74Dwhh=u4Fn^9n0BgII0i1C@d*M@n&4?{?<!zb}Q!Up)YdRNER
z`uv`CUv{)nB@K~q5Kyh+rh>Gl`Uv>SPHW=m?PNO1D%i3v8i3{^u@kt$Hw>cMmRO8K
zG*qVuNAI6j2La4(vt#4W^Ae*|&?Z!sK6w~^l0@~3`;z$TNG&?~Cry>QA=e|I-QsD{
zW--2C{~dIC5nj6u66#MMMgP%h(`e+A+HZdlB}wu>*CG4~<MbGSn`tNc+l_~-iwZ)u
z+Pe<RQY~CQ_RXwItMC0r`;%pEj2XC=uUYF;KE%M#78vaj6&6%THl6f=@KflGh`2X6
z=r&v&>&+|K#+#4;8uB3_GxEtc75DSnh)j*{hRibOyh{;h5$hfL3gQ_*FDr`oJXk($
z>&|BgZ|~0gp}Lo9FJNZ1MO^mDKJaDA%<BrgPo1JY^-65YDEI&^3|n)-a0h1}=mz#^
zd@9n?Mlm}*w8dZNh)sIeSM9bJFK4rM^4^}ZZqkmTK2DWUhf+T+Z179}D#{MvNBl#l
z;ZJcnUjyHt$Q2FxQfiYd!D)E8bMx|>0;A3C<-(VXBrN=nnN}~Prp*ko_B|w1a8>lV
zbEvM+=_!pqJ>vjV#dm`@^pP%M`-<fq&Apv1lQF0=ksA|Y+cL!%6utA+aDJ(LADK~B
zv`1K(*V(cK0zKjoH}}59@RR(ga$32lhI$y>n>MimQKk70uUj%idOd=`!H9L<_Zj9S
zWw(jOJ4p5<XJ_UlbldgoGd8cN@rN+7D_B%tnXuE1mf>~ZS>pd}C>dj_pUia9C4{ej
z`z%=lKIVfYrs|~95hB0a_{I5^MTO0FtGuMZOM5EmwSIBH3VRHuxUyS_W^26U3=@}H
zqqg;1oa@H5Ms^PKoI!Xzkpdgqj}>CBNk>N@s`jXhr8$Rjif%~vC3%p%_gb0U_3>7c
zcAz1X9EA!iylHHd&~@&}%9Dpd+ErzE%BZ~(jRWFwbsZw@{6x#jn@~1Lus`DHS47Ic
zQ6aX*JJvudx$`#4!bt{}xm8*hhxc~GzvY=5*}bJ_7m|*in6n+;(r`n?_cFewXro9H
zlDHiI)y%x22GFe5;=BNHq{{Y*bhnI2*z0?4y27OF%U94NOYeWB28s3rvw)fBSIGa3
zBMou~^fY7RD2HYEg?hT6B$9o%o(J`^7KhueRWUa+GjS4nn^oJDUvNIY)l##9I!92a
zyHkh%62n2eqnm<URz;J@g^iP$T4jITN?uNbUnmB9;SEd5$A}gdsjK8)qPzCmE;EjX
za#X27n?#&-hVA2WOreBtpanzoV59&P2)M(!v6b`2q(AljEp=+9jkzJ4vNzTy`vaB#
zo(7XsF@y+`KgToALS)o}G?ck`Pp`dlbBdZ?6m$&9`(dixE?q~@fmydu>RysUyhEn-
z>bUz@`-N)~SaZA`BBjkylD;ZuKRR@pNX_0NNQS}KUOi4OH{Ln?p1O50*hPC8&A;W7
z6Fk|k1KCzIv^cKGiNz(Eh{ZY^XE4x=6&KSE%*lDgd8qNT;@jxE)lpZb)uR|aLS6H0
zjaW^QAloT90=sw1$;n|(fn>S_jnMKWSc=uGY&_fIm_M$KK3yICj~gr~!$^MF;P@Sa
zX0%f9Ye1MkEs4LXe!66r4mju$;3Kn3elrsr-aA>OGUh>Azt^wa#?x4JCAB3vMH;dt
ztA((%Kv?N0+CJqx<aXiO7R*ThiQsUWBqQBVZbzUQnzcE%H8OFTUthjlGH-O-3Oj3<
zi23lrS6KyOcohAu{ShGEHRY5+{5lJQC6qC|GpDogZIe-YmHTdo4-%nPvR>ix;L<i>
zeU$HSiza8w3==t~YxSJoI&ktFOps%B@2lTabaC$_&{1|jcCDtQr>mFp29qS1mTj;p
zqntnRc-TnWU_W)c$dkHaQ6tGklV<Ua(eead|9Wp$f}?+U?>O|nyP#(AZmL)8!nc{z
z#Zk_(^nTdLn&@@wi*sCBSG!lI_weTr9SXM==eTK_w|S?9-?u1YXG%+er<EcKGY!Bs
zx&|9n<zT*70BK<Gj6H^yAFeqQn3P}He)jMWK`HC&>1vBkEXj~4v#=j`6Y}Eq6op^Z
zz55wi=1x|Eo|WQgx0NIX(`gF78gVI9!gViuBgEKC<Wj)D<Q=N}JzfgGp8;|u8?{<;
zx4Y+iP;~w2<{snL5?UkWqxPw$t_k$z3%J5@9${%FoO9vR!Y6;c#G%BdV9X@xA0(uZ
z8T&5-5Gj9SX->ZN2%9(U>2!m<#G{VGLN0K*^Ic2iiNsG#6)8)bxcsTrhuyo9NhP>{
z^>5sjC#O-F32jf*<UP4YZxR1MNaT}ZRg3fg<pllD2mrQH@f-h*s(BKz7FDK039hB~
zY!rZ0UJ#n!+Y}UEyXPKxAXQjLF&Lan`iZjj)pm~t&Iu{-(84A_`oP$&D2V!FUjHh4
z?J2U*KAWSZSsXT#fvJRerKDP>-@R4L&qqM;Ik=^Him8G|Uzio)BR!|Tp{>k^wwg0C
zBx@M$4+CfJSqoQoro+W@_KZ4Zoc@z^kucBPlss;qy{ZWMajxqQIa<S6Xq6$PLNt&R
z`*JUqD+5{2q%(EWHXDdk)aV`rujye1*`9CY$>FS6&VP(^|7OG80`DF<aIKl{I5S<#
z0;vE_I}N-aa|ZIBg%Q8kU#-N7-6nCILE2L+9y<{DL!W<uRo#CLxfdVjjkRrs>Bt^h
z@waltD_0IO#+;lm^nVc&p@}9y@H&vwO*H_+vKGWvbXV`-eeyP0A$*yOZbm^8fOw=}
z9|f%xHT5x{+%5;f#Wc5(c5NIf8XZ9NoI9&VZ-*busxOnS<f~Z$=jCOF=vs3I^_TtY
zoaV1Coio~Fz$GZ@=3Ud}q%^R|XSUFRic*B0N=K6m&<nc7&2-yIc3!Z%E*(FN-GyX>
z&HCQ}ds^LU>grEd2J1d6A}>3T;B@bFQB7K{c_JTYZUY^$;uN{1ff-3Jd^QX1Q@s<o
z`3q8)=0{zA4z*c%=c*rdpJejUo{|NW`@Zi<zfLyjIys~L^#cLq#bZK~e^qQt8TXn<
zE!gbM=cu4b5+WOLFz4LVCkcvph=`RoFs7_L9ag<AHij)K(Hf)#i<#ow&%ZbbIX)CI
zR8=p}sc8o#&96g->pS13G$#k^w>!A+ig4^(oWoE~)3klo9n7_i$<LyX6T_b(E5dnx
zqx3*9ktNkM;njkVs(}ed6+y-gA0zIJ6t5AS2mrV@d6`sv&s=`!xwzoWnqxH~vsrn7
zAAI11ktIg~eJDeS(qMK-wjO1~2dTYdqdGf2P+f9!Rcuc^)y{Lc`b6|yJT{h)+J0}k
zh+hpoYkBD`Gnw6WwCh&t{2WhLIT)-^AGd$He#9%*!`hs%VGqm}uDa_$1kl!xRWLad
zmkp`AZMJ?%#2e{1y3_SQyb#*g7g1=Mpm3{DD;12SM+m$Aym?V<R^1Rxn`-qbbVEry
zr7|7BNBgT|bZ<`y1g#?UC^yp=evnJh_+n8->L*A-p6bW7TPjx|+T7cu2W2g!aK-mh
z>R{hOp8=ttx^;up)>P=J0<jh<P0<$?r9c@!fZ#%>$lkW1ov$TEJPbTkB#(fJ@0m|k
ztwZB}uawo&Wc}+>Y+FX<QjqkQ9{~{Z0S`2!r)Mlr&jfZ>?(=Ruo&nk<R33&x_|90m
ze$ptOWP_9f{!VvKvq9-<vuB<mIB=xgYg)OFz0OjN0Vx5H(p>-4@x#Eqw>&^Uw-}1{
zFA?rdK3OLhw7<qP7!XH~ZLgt|2|;l?FH)49jasixGoEd|ty}3hzAZN63KjV&Ss={#
z8dVniLDr3X{L0G8_5X#a%N(cT7X(QM5zZ~~jY3fIkyn*G4Mjbkt!J!T$u_+$K4I5&
z3p^7pqT~DwfE#Yxc*r!pyt>Rrom79G-z^4(hsgOEZC6*<>3M+-1LDa0)*T@hQ&Lw?
zXi=vx<t{6X-P1myrYq~8zAaZNw&zIe1(kNaucDaTeKLmQw!ZJ`+pLWzS%}Mi#PS-d
z{8REqH#ey-Wc}^KY@5NlDVXw|kATDI?>Fgw`ne_O3;@8H?Jwwap^iUE&VLvTBsjCZ
zF?kS%78?@*IwVy1)!@nhgrDc%<`V}gaUTvjk9g<}@E<{|!7U!df4qZ*;7RW(#VMvZ
zkKPLo9v>e;36r|i;~PzJhSuP*`$j^A51kP2Z%~0m|8Ky<vF`xm5}wal{F{s(eCk7F
z`~sk8@`a+w*Vp5TuK!3z+=+1J>`RV+EfrkoSOvd{=U8++2cDe$AR4O2g_VWto75+8
zK@q(0kMaqG%ItQb6RJ2UeE+~XltZao8Z?vPw!P?I*YS5Cg+v&nt0u3V0w|=Pz&r6n
z31OZg9x5G2<{7>{M~Z_sD1fgq#r*yh%!u#S6ZMY&A*3tW{y;CSHB{gKwuJt1v2A-R
zD1lg?Kn;e<7;+4`ySIS7z<aaDiY1li`lsBFW#`q<)BjF8nSlWOr(8vgmk8<BSh7k>
z<_X-9G)YjHH+vpT&Ax*r_)-nckX_$F@vk*k<&^52&pynx)DJ}X%slz=B<=&u=$iNu
zYW1v_0AAvc<hQNObrFp_O|OY)NV~;jS}q8<9KZ^4h^=gGP4hIMG*@F!Ok0M6tV{ii
zb{_nuQ#q1sc5G;kCC^*m7fojuoKp|qbyFyA^}FLxaJbPOX?Utt%HrIqpcg-G(*0Y6
zI^(82#A@r?lV}#d)6AFmIN}RpM{TM|tlc6*%z~v;9fbwwXpB$!NZG-e0v@W?t2J%)
zFIe^T3B(v%DosgBo;k+vXN^uyvUkQK^x~fyey7U%5aC@Zo{r(Z1cu)1a8;;I?eP%0
z==&~3rZXI%<l7O^w2|7Oy;n4^P;S!PY(GHFvba%|=lit46rosK`bRtUglAF)9GSB}
zY!!6h8{ONb`_oLd?)+`}K(R%nh8iLB$40Y2_O$e7CV#X-AIUvE8t|}zE}y5}cM}S(
zH~*+w{<rYh+e^?p<Zq!S^Bl$_JpT{Xy8gECG6RDD!B(B0{|)mfRM*Ldzn()aN(Y?W
zZwA3`=iqM!^Y08f6kB3Y+kOvOf!ff&L!ro@o7Y=k@b|0@S*K!T;BecMX}Y?)`qN%&
zyzdXRaaharya0+gT$+-^c_3arh&SNE*#3M<)(9$w4GN{*OdX{4$7&1rc%H_8%RP%+
zzQ23~Y<w*%4nl!b>=L}_zPh@+w;Jg)DpfCW%fQ{2WUJ%m@k;X>CG2w)CQ3Tiu8G!Z
zncP<85N3XE5eA@sjUBlV%*S^y->9i#i!}R6bk*S#<oNi8Xe1acAFaZ3vYX7d#5cq%
zim32IhPWmxEk1h94*Ww38biaeB+~elZ6HznZP4EP(!7D8Q!{Ct_k3h+hVPC+n#&N!
za+oE@h-4qwYLQ6frm@J-IbK<`Gr}<<rQGLKh8_$3H&hROEm#W!b#H>%LRTmPuUZ4Z
zr(CF@8<TxnC4Rio`7jJU3PRp}$;0QrRCRm}vVMv|?%iYdCJd}n3kx_?6Q*Oe^a*h0
zNwbYUxXQ%HjCh(rGs4cqC4xro`P4nLx`V}XY|Q|(O~0<z*J6Mxp_B1efih<P{X4_q
zoW+juch~*^=7`uBVx%HpWjSY1(J1_wT+X6&BwI6uq%a8aL-*0@U0~K_NJ);xgzxlM
z59npK!o@?CWl7Zl+!)_y>zqp}N90I4mPAoS-uCXrv1rQOp%|xN7~XDPlA4XE#43!o
z3OU~JgRc`{TN%w*+dMA}_l1?BeFhLS=e1MsgyEZJZVSnQ_G;L(Nv}Dw`f}x=!j_7A
z=mnKVIkcY#So2}jY(o@MO0qS|^_iZ=S(S@R3tGRT<x{skcPlieE#9_yjb^mG^@G`R
z+vspo0?q5umEZ^R)2Glti?BteS$>rJ*k+S;9i}E}SywKbXGye}wPKg|<&m?hJlolZ
zG(X5I7G2Ew<oeF6JV-Ix`#P5IvaJ{{)=3DLpt<5}J!`ZN=?4>xc$jALZG`XU!1w_5
zt|@G6_|SemT%I)v|GH-|Z=Bdp-3M_B1X6Drv7m(cx-}D^@wklcf^M8#QUzI(h=djy
zb{vghnCusm2KU18vt|<gpKcltw=x7;T0@Kc!o2tTJzheA3|-YvNKS0vhTf#lHb^(m
zeZ>ns^Nv0U(mfgH{kJIO?>Y|d`<?(U0jFXtdW*0-dW-r8SfLx4vc7zp-3`L-2!HNL
z26T6Z_-+)PNKDRdJWyZd@$P>DFF|btG%iod3F8x<b6+?+dg*<vc=YPm4`*(`BcQkQ
zd_xG^{oojK6AtK-P_mIvcD+IPJJNc7s+LsK-oyUZ5jyR|xb?>2iw@uBu8r6GZ>1jr
zq8)@_H3NR={?l;twDT+dM}#33_QKs~d2mD(cP>LWy_{^+b9GDzcfW7lv4^j{Z-4OQ
zoZ}+^=XN6ETw;g#=~VFu00#jKbBh}bhuZ!|L;tbwrSHj_*yLv%;V;nnyk2#6HSPZ)
zEz7skP-zi90*t&n?*G?-!zE6ya&FgO(041mD&F%J8W8p!tmi><iUCrS4THaNUz}i}
zu_qnaxQbu6__y!I`EP#T1Fm}e2yz+o$_*_48Pe~8;sk63*3bb4TcFi!CZIKb#uvrs
z?fZ9s#p2(doBKD!ef{_7KSM7=+@ck|zMb+f#KG|atimr`(pz?RJJ2`jvVj%nfbHP@
zA20uC0D6+ww?qD;J}4mL^MT4PB1aeNFHp(>?(jzs9qp`~-TNVy&&dP&5qR`~oCvVE
zUsS5b0(8Wj`VIU30o}9!5;nk;^zHlizq}w<t;2AI$FA9C_K)I05d;a7A1~3Ps2M#_
znEr}7ZZ(U&|3ZAZ<csce=D--){GXwp|DfNp1M|OdgQIf&{@?a1Ky6j|w`%8rk#_xe
z_dgR}Pn1Bp!1_w732S_rnkm!+6DlaQUV_SKP(uB8IVb(M-S_3+#s6gF`nJ^r9bGuD
zW6xhm+Heqmu}XL4rgxxZX>r~qt`OLin*XPwjuD(@stW%d`u+QN-6BvY!fnZs#&6vB
z_TK>q`2}DA-2cLTaZai0W9wRAn;w|{d>LXubI0~VFLwFOFK64TH$7QJ<EY-%wHKy`
z{WF|+n)&?7XWOp_CHDjM?MB3CWOsT-e%;N&Wv|SQ+wQbI=2Je$Cz5yI_>YTMtR(-*
zT{PMKx_fQiZLf{o3f!p@t!!6)9`>=HI`a7yo3`4yYw1F2+hyOr@c#L9W=>L(&yIlo
zMiU$YJmmINg?oK7%dY))JJ&pL>-@6kW<MPDj0#TgX1Dn38nbPZHFHto<@YCNh0XjI
zP;}h#xc!^{-!JOe<6ocP^FNfCW>P71+x2StJbz2C^Lt!gKQou#lQi@A_Q1&SY2Ep2
zZX8r^TkC!MXPaB<wgY@S`p+CcJ?GbB)4OW|Z@=tM%+FlqEcPt-qkz2X%sy#b;Eti=
zt#<dct-am1w7cyPu)iNHwe4TkBtQSh&UXaPbKUYyPWs@xbnRs39lJW(XVsP+KX80+
z-Zi-y6JNGYyLUb0(|pFH32h$j^5yD{pX=JzVW!3(+c<zREj3#JmLy9+DOdhhZVxC|
zegWq$1^tU5(!q)Ie@q8wQ~PRQL|+NQEG>S368v1xM&&o+%jMrdva{4=Az;;C|GpKJ
zBS7VcYpGdI{erlj$E)-uZ@>R{{hw8)cyQYBKi=<AN)S*{x&W9c<soGSFsojI=L)_P
zklY3=KG?3@EC-r~RHECn=6be43yS%BfrUJ{)N)5IEr2DwJG3~zeE<Fz6JSPOpI??I
yuyNZwo#ajt<_q%iHMNe58gyXmBPgN4Yb!7pe8U!g6QY^jgTF2(@!Ylg|2F~O!c^n{

literal 0
HcmV?d00001

diff --git a/tools/python/images/clip_image008.jpg b/tools/python/images/clip_image008.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8de3734033fd1db4eec31f5cc4d7880b47d92a44
GIT binary patch
literal 26810
zcmdqJ1yo(jwk^8Q72I8e2Y1&HJh)qMcL*LlKtgbW1b27W;O_438r<ct?0t6RoPF-O
z_qG3Dd;gI(TbqPhRils5N3StwRjsF)rxgIQq^N`_01ONa-~##wc$x<Y17Lwb7$7t(
z%rh7`I9PZj9AqR!L?l8?EL0paVhVCnVp0++Ixa>kYIYhDQs!68>^yvef`Sx`qEaIK
zl3W6UeBV0(gM))ZLO>!wMke5ULHdI4fBf^*0zi2NI0vtU03!i_qkuu6fIW2phyVaE
zD3G<^4gSXu7&rtZ6g2P|3@jX|K@~Cp91H>i91;Qw3KA03+5_}=03-?&>T@O`Xf$~}
zAc-A1vrk0YGt!r3tr!ZU$7C$}_P#K%m{{02xa1U+R4=Gm+1NQaxwwU2iHM4cOGqjz
zDXXZescRS*8X23InwdMib#!ueadq>1?;jBO;bTx_)aU4!*tjq8=^2?>**Up+`Q;Ur
zRn;}Mb@grS9i3g>J-vNn;}erp(=)Skt842Un_JsEyL%_6XXh7}SJyYU-*JHfApV5)
z1KB^}LIL3dhlGTH1b)W_2JQ^HAW$Hoo-;wC3dsZY?9fP<eV(Dej7TeMg&}28IL6So
zABDvvV_hXb`3~(5WdA*2zW+<eegON4YYqSp0R{>l1PVX^aDD+FgrOS_V1x!<PdUVx
zEnitb`})C(*zi6hLSfbdiv!E<OCecM-_F{c;#DoqP51E;e%MqJ<^6l42hYwYKq_qR
z{j1?s`WyQvz%Yz}>`6v{?4ygstyhT32}J`%h&91$_LBhZD-PngX-WVf$Br^5wWsiI
z7~P!x4ME*RcF?rBcmYnB!@Izc0fE4%7RvqLiO-h*+M9`*OX#h(1np*3z{{gljL&2r
z;x@k^y7o|-T6&ATtptnR?0*9Al067?h0*KWtWxeivgL^YjDB>Kqd^uSZ9AsfTP<C?
zJEMD5L`?;r|M|65rLCAp#=RNk>5&}SWu$nTpLgH09xh;sKJ_Acq?E4phJdNSS({Fe
zSIF~P(n97(;d^s8!!oiHR(mQ#Jzpd@U9rokL|5kRafo|!o-q2O#g*EsNbyWOb!>Q)
z-Kk99DNdXuXO>bxMVW!mh~dOJ7$DKd2eS4S=_GeSjP~U9Wh86(cE5fGO@v>l2WU(%
zq&cMf<JeQ8b+Ny06y*hxA5y=&*X#KstH)n=Y;5RY3JYQuSg}x<5!q6DN{*Cd7e^9#
zo+Dn{@0-J)0J!l%fWSlGql?rH(n-c&H+i|K@Zacds&AG}n`aYm@C;_pcT=W{ehk1y
z`~*;27LrMQ_Om1Y|JddK$&G{V<S?I0RM#aqc}<8l6KV8~+{@L6h9PVYHg!nqG55ZB
zUnqzu1H%edb?FD<5cz%J^_WNIHsPx}P?QSe8bzVg$efv2RknMcNuJ?}V~5<I^g!%c
z*gR@@qJt6r6{R2H(#pCDW|NYdjae41Yphn6%#7K?6L_%_E_(v7XX&zM<_dZObZRuu
z9wv7!@og<`CP~s*8R<4{K*A^Lp53lMl->zF0g_QcOf&s)XTcyo$zbjHqKH91lp~by
zPLu;F(I*iVS`fNA7o4b)=w_8+1H}BlZnD$8pa1ym&tA;Uss2xC3}G*JU;3TpQa^k?
z5q2du;l<s0g7h);qnC+wIBS>OzzRgm3OtckiYHhqLdjq7e&DR*W9Yn^R&kc~ASi0+
zP&V+0WEPh)2sQ~7<?21hppRYKKYXcHmeW}+rdB$}YKdoNY?WoN3gOo+3D|x7!Repu
z-lXHTva>VeDJsN5{Qz0^5HIh~BZXnrVb9cqK|cM7-ausGqxnEtG1-i`eL`=qqZVi^
zJ}QWuzwxyNO?!M+S9W>jDOuiTH?3uvs|g}ZD_)$_frWKo>c7k91gG+0N63$m3G!O_
z>tC6c=MsL%S`}3)eDgn~jn(xpwJnmQf?)VdaVeuH@9&yHM5r=Ng-=7z$ZGUECR!I3
zAqB@>dWobD68Iu?un2;RmkkD0jyOXAqID6AQIWPz?!KtUm%ztn{U^W(l=&2Nskl@+
z4DgWY6cK3wL+(0)<yt+b(8Y;P<#cIR`*}S6<^bGtT~VepL)grBIOyTpB5XZgeCu}A
zP=Z-3zRI?EC2o;YRH)2tOszX=Pk<0=b<Ohaz9&F;*!7fMB7v%eHHRaj=lhss%<Aeo
zW<yugo)PL7Dx&ZM5|*bD`baXIMSZPz2+jAWe|%*063^>q^9c|}bW-#X#d29t)+D0)
z5HSDf>b2etN|B!p58~XFXO_}SJ=<+SY219W4id{B?Ji&Q<#Xf~5|z$_gp}dl1!NbI
zUJWSGbgoZ8Qpp#EWZr-`p@KAH)w~`1$m`V=RyyzXkA{T3fmPZedo;iU_9ws@Xr3Vy
zSQq=*(6D_j!!79VP0pBQyv+T>Cab@<>A*%_O+c^uhavp!KN`AI5KjJS(i7l^O+T#v
zZj|T+hTkQg%|G0ealpR(mv!ZOhpGm)9+$VUj=CNJ%am7t!qoYXpneSXcc^}h1VZ(X
zI4u8d9F+b*_0zrlKST1jsQ$*&KcM`-<!IAAc4l+f(uA0x*>Z|kM67<}&U*VkDR#=!
z!l7L8oC^s2!gU9;{BkWD7{tNwJ`p{)$~HgUMf6LIzLVt>%8svk6>#DWPju6>BHS+d
zm&JAqiD%P6;}A%P)n&f`PSLAbd@QKB3cDT!F;s^HE(~%`N1JbOy(AI4^35xE445M!
zO1s;W-W7!<7mc-j**jeA`IiYe93t=^WZc8cy**slx$+)=dJq)Pe>fdP>OA(q^!`j%
z|E&Odl8}Ksic8Wo@}vU&-;>~%_UV5T?!O<J-w5~L$%2}HP28VZ@NY8nKV*T^pPBm?
zOa3`=|1~v#WpQ!W`ori2(NIN|HV&J_O7sG>kB+WU&%#W*BLUNRlOj9&^%Fq(4}r=0
zT}|X8Tx5GIOrzxDUXO`Ak1}nUX$`>2u{7L9KR;e5@FGlg1iJT3_o=wF>d*+h4Xt|u
zSa98Qg3|C;mH5?^@YYrJqVt=g$E7IVh;Gzgwo-nL{PJTk{M9glp&}&sdA`;dGh<71
zD$<AyY8AdDC0~1Lf&WUeFwaAXcdeM1Non4{di635MHI04kutfii2L8G(f=@Se^aA>
ziNjCb`b9v0ApXVjpKA4&nE&RcKjg|SXnS6|(jX>K;27jVT&nMq%|-(OnnF|^p8z^c
zPk<fpntf`Wn`cC~51*a@f*8ox5f0aC(yrDghnjJ&d<zQQu9o>=b)SyyRN16ec%V`8
z^Xy?Z?4+{xEId<r%?nj+J6~pCD9&ptXiJqCIWXgOq5AP%B!~s{2>D%oed91=>mjXn
z=U8%oFY*NN2paQ{l@3WVY^qcVHfonl%^l*}4t2@tbvHumh}4?T7<|lApTN3xd&DTu
zXD~GKsGolHBzXdS?Ig0UdIF?;X#Q5Fa)DqzEL&SsV;pis@#Xnx^t9U%FB5^CXhxB`
zn!5H-3Hmp4ut@AhsS?f&r+c$Ixl0k*8Vyj&b?98pWE@U4pPgVmT++N*+nO4LSd@aN
zk{KCel#|V2Q<+z(Ov*uJwZ%4eX?AI`nE@kn*Bw0+o<;w<Xz>IPK=E4E5Bt$mK@IL!
zw1pY@Kuz`6*VZVDT8t``0DR{>8=J#IOV3HlVH|2?Q3xD3WORmN33PY4OHrHoBCqW=
zhPwm#%YB`j>ran&7->d%ns4T%*vEn4hxq-Bum*b5yz6N|t}X;_J}H^#QPRdLCnu`>
z@;uu1T>Mkh%d~^w<sTOO#PV`odAvx!F4CgG5i|ddT_YOy6@dshooJg4v6!P}*Q)a?
z$WLE@rQgJ=kPM9<{>G=8395G)ry1r?fVZAX-)$xOS6l0<yy^w0*-Ng(Hoy0y_rJ9t
ztob5)ud(k;@MrkTG?lH%1yA`K`9JL9%WaAX{L8QCKY3$qrE?V47gF#rdLw@*b`UJ^
zH|}%z6Zao2!^{bL+JDlO|3~C&{y;7|`M7hR`#lc-|L90?Pi1+oKgY_8FT;#wGbs+c
zXK`GBaV_=hv=uG=91KbHeQ5~m2k*k?q3AVS+w=U{-|-jyjUOeR05IeqoEPEuu~9#N
z?qZiT7&>m;CI#l=w04<#q3!0a-g)SL<R7$^mw);0#vmAFCe&fq?>kY%e)N`NT{V3F
zPGtOUtx&jRZZI!FpkgRz$igu?!ihDgAt=;ge8M1Ta<1X)&hD;Q6;?Up;}7rul6NX@
zn<3}*!FETY<C#puO?D$Dq!W(YGn4MJsPj5m3F@=jgO|C%ec&XY<>n=<2AGDeO)oPp
zg+2eSFZTMjMe~Xo1pYlPVpv0GlmoDe%HxhRzq!nDHg`VlqnK&3c}WA0{@^D-C7;rn
z-@*434AS|bMPh$(QL(sD%ErcaB+LK4ivwIgrs%GU>!#kG^H7TN;{oavoOk4n18HBw
zI@OX!@mi}UWT+C7{R>c{3I8P6FA%x;n+%Fr_brk}>J)IEtvGL}B!A~!-krR=01MJz
z5=QltrVDe;i$u|Qe<bl-*etf!zgh^&mhaU4qaoo0-+xXP;s2f8ytgT`djb%Bm)O|9
z@X^@juOEeejh0fJh?08WcNvQ&nmbJTGU$~#cT}Xn^Qg}6FogeV{fjqJ9#QeooJtGr
zE3HUGF1`Kw5&P?u61T}v=J7+#MlIwe9LOuwwZ*tQx5;d9x0HvNYx!$e)^juFZs2Bb
zqQCwfQQ%6{lb9{0f|QZ_H+V&$PbgLzY!P9436R|42^4S~*-PDB)8a&pgh{-~<ZQ)T
z`Fi$4E8XSqNW%Ui$WP6GenOsFEmcy!h{tY~y&~Fx#ZXFEN1P()LXzdDU74bQX#u~d
zAweRc!Bn=$f9jWbGAswy3c~uPf8g&D({wLBq&mXzN|kyOtI{q;AUt<-Qa?;Zo1e!l
zJFcs|Ogcecf*36ZShj;eIN{lo^&=a9YNTfUlvQfI?-F+Mp8C}xGCJpspk-u>ts1;z
zj!1qde*xQjl?om~B`M<BG8pX^kArj4)IX*Yg`WicN#p;|bW*vYMlU*^nJ-RWQ}a8~
z)59z_7z~vQ^8_}Cz4k8|9snvuMj)Ab4go4`%Di^KD*`*r1pH&zJeZCQ&yedBXFWVR
zarxt>j>>!DY<;8%i=F^e1*Z}$+LiM4>G{x`EQyhP9(mzfT5#(&oGPLUJ$p=v2E2y1
zqeF-K9-m$7f<H9Us$5PvV-9XCEXpi1P!109IO5>se_ExPDI~8@t%#tvBB_oPTdXJe
zR71toa|A7l;3(|?j)}mko7-;`2$92D#4FxYL_#C52<1(8ev!w@m3=(<ZBAaf2p3~Q
z<!;!cOP{BhDfkk|$Jwd!snwzQ3tvT}6{mk%&}uQ6RZFeGXI^?+mf*8pZMw?>8$B|V
zzWK~#!;kXfJ+!tkIJ=^0`SZI|s#whI7d3#Mh;TK-TRAAWA`ZC(CEt6x&n3@do}>Hw
zzSn=IYioP0MaXFHS}3jBuBsxpKfQM%QxkLMhohiDtQy7;y8R9D-l5YeQVO4@UR6nJ
zoz|ARsW=O2H1P$M-j_6<V`f#z_4#-eh-7};#>T2-RZO|pmm}@y@<%i8s+VI{P?F;S
zD{fQSS?Rs*fS4ER^i$&5O0Hkh;NRmLlyY_n7EI$tClxge^G&=!WfZkv2m9CpT&9RQ
zeT2hfapZa2(M8Cx)0Y7N9Ld6=6m_E`LMQ^g*pu=!8|DHg-WPGuzGPBXf(R7_lKDt`
z!4Xw!%VJmSuv<u=!TF}sn$*-_6d)EM45hG5Hac*<FK^^=>cMu@!gsj7DzM<SY~{!e
z4?dvf@Ko~NEaD}ib(&_B*(Hsm44?@}7Nr~*reHY@oo(d}o!AdU+^cb}wwLBBZ-<z#
z4I#6njInP4WH`B_jc4;rU0oJ6UluyS&TqLaz0QNAA4~Nc25w;(Q3lCnbIG@xK&j!c
z7d?Za%2ZL6>l1#lBh-_X3LZrD+XeYZDkX6*8siO1mHuxpDp>=v_{pNZa>xK2Qfh}t
zwLGF*1j9vBZvT0$z{p8e`B3r(>cPA6q<3YZLii5Bj$8E_R1mRs#~BOS!z3S4h49y)
z)RR6p<|~ycwmEWj*~^%}c=z1?L<+g?Qy*haG!77kqmRt;7CD3PV5w?!5BI&BV_m%3
zR)iRt2#!5qPliX??458<Cth=+8m(PV19Ifr>jPMxD8H6e_yQgHh1NC7=D~Uz?WWdf
zg5c2L+S9%WZx+AAON{X+0M@E{%;WQl+6n3Sk5jTtX^1DtA;fq=0T<l@K1Bj+_f(od
z#W;S9T8rw~L#BLbSF%`GFn&vTVaVGf`D@IJY_V1MT;sCvZZmtU^@wM#$-J0RE~Kyy
z-PmmTOBCa%mrIjdMloxybn6T6AxJz{ll|tWKE4w(!T04H+j5037cR&CM9>?u?A??0
z7Ve1IivGg-!aYGEO`?Q2lvB#&$}%lL_Tl1I*@LnjDcxM<Ok=5^6?{VTbyP$m1C^?I
zs1RM^?PS*p#uxUrMQeKLwhF2Mk`Acnn$FoUT?l-R`;@vqnP<|yRi}*=%`P~+ZU$S!
zuN?%@+4yMWC#`YS?i1Kx^^NG&7P1|x%C1yPNGYSn!xh$FMJYB{*4I{RN|1<soa2Is
zhA`9qg54!%H>12L)uEzc7zY2zfv&B16`uX*pgnjU0$5T+=1DewL^r4D94S{`$n!4y
zrFMe#Xf2|{^f)@P!CO9xZJEg3Vg_ppRGFjP&1bEoK!2b>mZgel%3276Qj@!;W~P}E
zIC?2mMY3PXams*7698Ra<ZrL%yVq0nP1!o;4np{zui{-4jB8xu8Q97hvHiK;)Zv>n
z<ExDBUQg0VsRT*}OYo|~dt*8Uk*uOS(uMkZGtUb)Si}k*(`D9tbX#VkFHU{uZGdN=
zk@EUPBF7B(Gt;7!2SHZn;efkg;sO^U%N1aTodTwdWtfsJ4OC!4h0RCK@RS)Vohicv
z^C0wP)f)yAb-0ZpHd(Qb3vX(ZjdpK6TY^R)*~XDyPW06Gu8TEETkLc+vEfdw2y$+u
z!9)G902z$d+e$l2TKs~6HAJ4Gv93y-G=7Z~cx0%rRe`j`eqc4nY|AoL74@dvid}PU
zjMla}mV_C3?*_tpsjinONdmgahClsM(>T;oVUH|0*AWPrcLYXyh7J~Mm$sNNFTO72
z`nG{JSjhT}RhRn-fCk8M;%uP=1YhLmt@bYp$a{z`h_H27@uqczeBG$!F)$LK_BlO8
zIu4Bnk+w`p!p_!evD$SF^K{~fih1dMXtQJtfoGqG)PcC+8C;|J7BdQH-VfLje+(S+
zCMuYf$}^2+59*1M##~4|1m068S7b=!4I=hfVOc!ClOImOoEq-!_Dm8}oT_A_lbM#N
zv`FY>7v)1Q6A<Le-I`1bl$S!m=%`?(Nug)%3z7ho9gaLIIYJ01j#{(f0c+UZ-fWuz
z!hPX|!VnuxJh^y7-N&;>HWQR963O%7oK-o51x!Cfb&YomFQ>GXyxBWeABN}GI_qcD
zFMxFKffXdX)h$h9b1|IQC=104jAVukCFMbmUI4SvLb)07iQP#L0#N;O<)|xo^$0!z
zrVn&(0Yw_Wz9m=UCows$kTi=#kQjKMYpMiM8SZK$X{ktdn@K^UMk`x;NDLp{Ew4fz
zWAIl0Ll=0ez8cy@#QvdttE0fY#?l%aAkQ$u1s<AZc_}jkikAf8q4G>q<Li7vo)MX0
zeQ#3g$*_Fuu5y^a%~WquJf8ZTg?Q1FbNJRs`bzXV<y|{A7si~tApZMgysuV5*sT^X
zv?Z(`=e~jd@Q81ayE~1YKXT1|^7bw_^QL%!4s~ceq<pg-Rf&qPqL-78rlhespyj2>
z;@xfx`W(v{Q_DTo1jFh$tx-em;^r&nc~M!dx2f<+!m?lMCyGkQE2`>4iP)K}$eQV4
z@C<RZtXo>K*LiW<CL8%&jWC<Mr~+K9t&YHHm<CH<Omc2JF}9aLBu_3WyGzZKygR}C
zwjeq3Rg_;3^kBlRvIC@e$K>E@<Um|rUd><wX*XX0LhNxDceoO98VYk!lq4mVyf;hk
zD~2XUrbu2<d-^#IXxg&Q*=6y@Y_0l3FYhOOU2pYJXo;2wY4)QdI6RZux^OX616onH
zV*o^?!~t~zWTCE9ra8;5Mj<qhW0WEq4-)Gd1y%m1@nj%nO_Sg|8e0mpb!0KyxsSDH
z6ZdN@m_xPu$%{0Z<zM~e-uGK>er6Jdl8n}yKz0SrjFI0n<n|k8HST1nYlgCy5j#8q
zz*q}`#9z@S4Bv}}tIvwZyU(#6_ej0o3_lWJ5U#37Oi5^Zy@Xm9-`?lX-^p8k6T*?w
zl-_oD1D+oi_bz9i??l@!`ME7IGecsZ7KJM{B20G<NoFeS+&fVO{v`@u9{UgIohW_T
zKtY$a!YQ}?IR`BXq&X#4y?&M#FsB~(OxQf`D<`jIQOruL$7v7!jidu85vA-?DLC*i
zTed=4#?#9;P6%W1nIxxm$nv$2gPn3Y$xHd=kRTIvC)(A^xC3Y2KLMH-D~_rcXBKOm
z*pIq7hwKEga?|tKGms}STUA8ky0^-dZ{+O-d#{MscLgzb@>wj{8FlCqqt@(|?W#T~
zEWgF<0^{@HoiJM?U>caImwPViWUK<KEQ6g#3J<oj&I*<#V(ir>lygI9lR$YpOw)g=
zGq4=IZp>kVf4(~?SwEG0Xj<TmW6>H1;G$<r*+9|&G=Qp$sOu2Azm@twrT`@lRXKId
z#w&$FdE$N^d@$vsssuPXDsxqi7X%5^rJwzL@$g*sheQBx(knKniijIS=X5H`BE?Bw
zT4l}+xB=$vZpW4^oNzt*S{?a&pDB2XJ_q9w(uYPzYZ;;fr*oTfpGSjaU$eb|=N}tc
z4@DH*aPzyJ;!Vb#Ae|tYNQ(hBRaFOYUKxtLkZTiEf~OaTXWZ~$Mm(9K?Wo9!+dfzg
zE-ov3)zS;)-6eRKIIj6-Tv0qw$s!X|(eiY&wcVPS0ehHi#JjzGgkkLjrqOhaoFmNE
z@J`59J_-ubrG=||fRAs#;E|y%SEN`?I+rc-^^#kIm>(V#J+x#1DWuO3SOIA5_*$+S
zF^Bfu3-XDvtgYdSUb=DAU}uK7o}TtlY^&k2PT}mG`mOk>E0(OP%`q)4-URZ~_P{xe
zIv{X!!+q*@f6iSy!G_E4<7=LHN+o8bRYcJ-j|#;Q$<ZntT1;K*^<c?SfIWJmv43iz
zLty*#$Q#XtxR}k}WQiEO9B-PQy#<s>K`n>rBDFBl2~{23b-Ly`?fXDK=f3KP50>^f
z+x#gEx)v(kniUh)M!qvchVRz|FHebuy=|qaa9V+c7i<Ap7WtZA4vf{)5qs1fu6$bY
zM_2^ZY9<)0Nwm4yYT_a47|3;%40XNzVt~s8V24V^qZCc!g@+X7rkNGBVu%!oVUf;X
zUC2(m!?{m16)f19bUsn0pvAz16e=;{z-nSchI%JXNUYJ(bqvsdrlm48HqwRl^Hqun
zz*!GI6GT5B#C+^mk5_4SszP}c^i@fhQG1yIf)od%Sv|0W+qUNH-8N_sg#PJ$>=_4G
zB)fsLjN99-=zF2;jF-*kkgxOi`U?9nLeVS0{CihSa8ePdshbWjzNP?^94R_z(QPG+
z`9MM>o0=c8M#p9~kZrOi4;I$z#mKdj#neE#D~S763HlVHoWq_e2<qj*Se|sMV-F|k
zNxne`w}YS&ngHsv`wF;^94O)726jE$Ig)_!^a@aAp=Ab_kg8yaN<+;A<`6RIa~gaS
z6>6?LRruz9wh;8P5wc{F#~CcLq0CosBY4Z3L0;MNf=22(D`xC%3}<53K@$XG#zo;Q
zuTfejbB0cLWx#+R4;n$-698#l5bv{!W&C3jYDQJxha}!s%47cpbi>Pjd{0OAgnSM2
zsW$f)V@ltud`S00lc>Nq(d7l7xp}e$uF>#`KCIUGKBrIa^Blz;-sH_8g_S|-58lS6
z%#mE+6Tr>OHLtGM4`^^!CN85v<5VV)phg7NrNS$wEj?tds;z2D*cg0e*m~Ea#BkKo
zogeBjGPXN~n_jgzm0?=GJcv0?+|cGfd|P`ACe*u~$LCQMTBFOLGJXi?>!50jZ1~Wl
zJu?S`?~^#LH*IO+Do+;mPGSq)R9iikv(J~go!2fN&{Z+U05zDE)xkrjviU6zxP*m0
z<KNRy&{0M?;+THfuPjy2-)~`uKh|b7jrK)~g%PYvtS6sYoG2Efwtn!F+3A~`YhQ{N
z&kc2ZTGq`Jm=R@PE2d7;Su?yD<H8#jSuBf)wcmyWsUAz0s3`>Iwpf#90+0A%j1}Q1
z>FKI^n~mc);_d-xFbGb{A{A*g`SB`_2}~B=@WwMtw6Z(VW2e+GY~d^W_PeGt&A<)~
z)0uadGs6`FDEuwX7@K82XYlss;#CbW7Z67HH;F7%c-S<|4=Z@sWG3NA=z#da#JNy^
zM+<C`WYZvTSwM<gMv>|1@?y*7WohP~B39eW@>dw;f`E6k8wQ-)@srIHR&V<a?A*D9
zksm1pwpn|tq)crh2ZM`vs65KrzWGxEBEbNLgkL{9LJ%p98a9?QiDjviZEl`!s!PVf
zvA=71xlI{B>`Wl55vh&oJXK^(;!BlGp%Q<ggDL~Ql0k*P9hwuYlC_m%Wxy&oV8lEC
z^C}1{Je(=Y9|$n?d`q8adFwa(JUZvK%{*N6>)_yO_E5eZ!T!(xRNpK9@}fk!`S6(s
z(P_R&M{L?OCr5;Vz!7ZjJJ6Oh<IHIkp}gh<l2pcG4t$d>bI?0c+5GlR@sbZIJTi8D
z6Vvsd**THUm~iI!sLCeNo3*;5Ifsz4Xn3@^GTe0(6LnxJV~?fnEb#K`>fL>ZlXh-S
zTM4>s?PeyOjxVz<@4pm@bhc}VAT+hVW-;Dll#%7Loz;p8#y#B04C0)vgxh7~+=%R@
zNMa+ay>k!;I~6<jnfB>gO4_UR#F&sta)~S)e0N-FMJj<Dx)KX?hOpa>XvntHUKBEM
zM!3k=(yYGD#8GL*Ib8B)a^&IZUwk>|9>S`v(Ye)qsYrd%GfR&MwnB(R%I2THQ|}*X
zQ8WFle0~t2)elv7Zj=A`Vlmhxw>z!)JQ04<-M!yRSXL@`_RW;i0!{#!j;)y*cOs}b
zOulW~%@`-~rSt_<7m<|4zw+oj8$spQN|t@6vXz&$s5RSld9=|^%OYktkD+P+QHjib
zvJeqJr%>pf1|{A@noT7p0Y=r=f(irV_45_r&hDt@svf^k9lsD?{UF4jKGs*X<*m!3
ztSrM*Js!!e;&q8}D&<NJ+FX^1g)-kUf)z6BTVX?tR>4V%vvPG_JQpO)S%8FOu0mrZ
zFk`jZ6fm|1=}I8`x${aSlIiqteRDTOvSD=)Prd@n*NnVEf;{w<5(@q*K(7)!ABLdr
zsu)zAuha9g<-SoXj+569U05jBiiMQgf}UgKBJxVSwTmyK_>!k(9<h&Lve^sQ0hJS<
zl}O!#z0EoNjdT0j-MGG<3L*p-F$eq&vu0*Vo@PH<NgSZYBx1%eIRj~kC(NzPCQV0}
zRYjA<2IC1Z&p%mf&;GF3<ZQS;4sl$ZAB5?B!8NETQSaVhM(JObU>GW5YGgVRNM`j;
z_eBe&Tkr(8Sin({&Eu&39zB06I+3Mx5ykOf!#9Xz*x-mEn!I#x69$L$<*LES@{$CE
z#Q`h?r-9V$!2}=qAus1HXnyONd<7R9g}U*>!a1@}cl^LmE;wI$JhzCJoo6x*3)2=N
z0Q=_I_^`{7Y5p$fo8^=3EsDeBH-qyPHC%_uDbZ?`qx40Em8Lzy<W>FBTtI(#GETx;
zhM=9?)c^Pb=Xv9YK@(PUFyoh4{IzR!+9qwcF=?y&rh0Y3Qc#g?)x+3EXN+K0v-s=Y
zuHF1qGxCBs5=M>`)v;KbZ}2p2OC!2=HF@R<Y5_=jh0>a0lT|$94jYZKtaARZlHS#R
z(W-zEyRXDQ@pccT$+D6e1U=d02-&x93NL>J+f8)Zm#zO=U50{}xBh7;LO;y4>&iNa
zZxCP~*nS(27pJEV{RF7O`ciBkRp=8${Q5o<6Mx0PMls&-E7nAO=RsSCmHtPvnlLYk
zP>8VC@JvMCQY80fGK|ZIaq2z|)W|X$!NY{OZiFr(FjcujjvGwi9z8dqQi!s?(X<1z
z_2@+>=9-sAll#<BVz2M`>2OT8X-nlpS8rG}OZd&V&@UIoHP_9sS(sHh!?}FH&hk}#
zOo~K&g1tC9;=n9zP{HP3$~UBa-Xyw)x5zo?YP+C=7gog@Bq+W5RSwbFZUMl(mF6cn
zWzg)WW|q!DahMe|LpA*Q`em}Qty8Yzc>dJ!bY;6EUClfxYG;F6Vz3pCwHqo>BiJQD
zf?d{$L9P`CZyC7R(?e!JY>;@K3ahwVAFsLJHb5=y#eIWnO+*qm3ilw2z5ew@yo^t-
z8AmK**2*1uhxn_~u3Y^Hz#EJjh&sXECxFRP)uCe%|3!puov4X0i7#wqs<-&CUc}9$
z*R{lX7&~_7T5OzkFr-1FTPPQP5MUtX)mQgVp9C9L>Y9QM7Gva;sI+{WoQ*hJy7?#N
z?|JeT)aC?7$fUF9KV(n{iu40TvAYFddEC`hy`8?DO3DV_866;(r+I`BBZ`$@u=X?x
zWyzXfA)8~4i{X1(xY|QM_YlV)@$SMkPLChHoT;Ear146-`B?iRM2`L7a7cAJEkDA=
zM82vtMHVHqzyqE73<#!6?WyP2i@h8A7(LORKRfJXm0m0QMTp#k(N=7lFMkuDZ?0kW
zZH~@HH@bvyYNd}jO8-7fU0eaMVcoO1I~_mtb>!QH=5RHy==4Bc4F&6G7*x)y%}<CN
zs1MLC7*q0s*!5kX?A$YlEuaoMSa~qHoT80^#1#h4oZT8jRxAopYTA~w(Ql&#M(!sT
zltg=EK!)msy20-4q$1Ms^5o6WExjxyE6UT1OZ*7H9MGQG<_(8TYA9*at{Sy5sIRVa
z37jn|$!xfjuzlCkTH4^Y&ES{J$hV)9?(O!aUOKZ{OpZAhTr)d{F^5YbA9%_Z<!q7n
zMysC{Q>OmiP?((sSy1_anBzg}syz%&xAlVvI#~Z;dViipQLbpY@K#G8hx|9#z5(!q
z1wMxhu7a-^<HhHT<fM(Je)#tv0l_7dd5~MxJ87ncgnA<Aa^Qq1A$r$A1fnPDmA*?R
z;~S!P5<a7bo^gU|!Qd>BTn9{3HAu2u`8s=38pgxb3uHWJzOy^rQ9E3FrVwy<xaV4Y
z^_o!Ck+(df^cOc6FCV`VID7^W#_^fWc~obAb9B#BTS&Okn_-v6%`{7?Bg9U;T38e(
z;4G@1JWzSC44LpUwPpOW9}&(n&WY10hS-p!<@%_+V6{QN3*>^x_UIA=1w_ToXf=7N
zYYiyn@b+p9^LYYXNYD|dF}<@RFICPP;e26tXAh{%ewwxsu`O|Oo<(nkxVu~~O3uS>
zjIZ%k!SU`obOT<-uLHspp@>)fb{ytnmK7`N>tp6%Mz=r3%B;K{t{t`$#8%~2UXw7(
zF3KF+xh(7UfsgQG@w<1ABgg=(IP!T=7rLor<(P-$@+wMiNZ9fC5SwlyB2I=f^))Yo
zQp>&AB8~eot~}HDl5UJiWIJR&Xq^v8915!5++nJKWubfQb+5M=UDt64Hw+D(QlC}z
zOH>hn?>#dhGL0d@kk}F5jn69Fjapb1ri-rVc0Og8SMW%~&?IBt-omB~CTuFuKRR7%
zTg+)l=G1>vLJFXMabZDYF>^?f2=@_S6#Rk`a)l5G!C&c!7(kzu*?5_<Q?J<RM5qT}
zrftcDX?gR0JB^qL|GbM@owXsNXnxoRqLco@qM(c&)e+7&oA-j(X`R4brRb=@B!^eR
znK5K@sB=!j(tsE2i0W~wocm)Ln%`TdxVEU^;NV1EyO+>f=|NP0UoME)9O1`$f!n+9
zTXWFo-U{qzJON5-o6o_Nkv@F6r%li<bIn0IJ>2jW<Ku>yi+x${vMC|6`W`I6Q_iD6
zLw$Leu^aU(G866fp7=Tc=YIFGD;p~^3J&=?Tl+%J9a}!Lvj(*=!@9cASDg;fRl1Zx
z+lS?Q%-?F!Hp#;*{RQO~0dq_~zAs|q>C3PJX)O<}vN?#1nI#w!z6v?`1V^rbO>b>y
z(^N^pi5D;J@u*?0cHt@q0|gYP3YF+kN;DJsvYHmRO5ToizO^QqbhykAI9mSzwG=mO
z7ZL0{s6KjLaEK|zwO*O@1Q^ZIjEm8A=8BNCm0<?6X%E^b4dSf1vCFry8mu(1yWl@x
zX|3=@L`0<0P(!3>TiQEQNm>coAN-^eUNtTq*Fk*W8+^4}LwJFhusXjru2*S+b2y_u
zh_Rf+`GSY>2GC9zn7E>Ar#)dOhWL$|O?p7RI@mBd^KN{nItH<ZGxu%6=Hu`kU(#gL
zXQo%NF<o--9fa)n-FPw*8`k(L(*bXn4{90o*o(j{>I@efXh<p|Gwt0`b~S~3$iQI;
z#Il4Oy{t@dc~AWj*VtNU)x&gwWXE3A+&=n$togivfW}dHc7YD4RI(pN$9uPn%6EX`
z%!p$!g5(ye{c;TMb4|<^`j;*hc3bQ)Ejg$l>i677u>xN<_Sj1{v#&Y|PD}E(bDH?7
zLb&KqcT0+57KbK3_pwLqAON(|3-~ik2nJNXIf4&U4cL8w-Xd&og2|ojw_`<F6dZra
zLIMN5LAYK8P*2aJFUw&|&@Q?tt5&S+BgYduA1>m$(0tYb^^I;q%PWsU=x+GR!z~C<
z^IgWgP_i;XD^!9|pl8#6cVS~4YcE<{+7L9gi`mehZK0on9}<_Z94!w)yX-5mHIb@L
zT3dcwM-GV+9G!pt#ln)4-ku2Z_6{C)&#*S&g?^BPfdmDaXmAS_vEhpIdc5J)Wen&U
zgl~Xpkm0t{87yUoXT-a^d2HLgN&9dG&zkky3)mH#D29lOn?TB`GUMZ|>^Z&h&RTZc
zS5ca&b}6if3$^D8_+eLpSxzC2;Ef-~J8;RDNn&wCF8KX|%$h4TUo}|7iB$O0Fxi#)
z?D~x-2FrPG9-J4w6hLFmh6|%IN0t2o>q^2d-4IK}et$BIO;P^@Si7gLbV~}JE-@om
zK(ha`h}r`e?AWuf;`Qk`&5}Sy>DukcrHZmP;se@(l!Z08xBBP&GV3PnlWvWo>|O5g
z(JJvJxRA5mUT}}hoVYPay-F+28D^&BjcG-~0zJBY4KY{iD=Kl*7(q3Z=^$~o5dKXg
zQ--2UHBm#O&&Ylg$rEoOfLfy^Rqi}+*!n1VF7LW)o%UMoHjW(vHaINZ&+ALIwax44
z*43-ZMIifGjbg7}x7s!mx)DaZRt+*)#cWo=$wRP@K96>a+KvBmfya47;wzPlX%=80
zOxz!)x0G~&5q#)q6fch1HM#NCzav6i5rsicxa>ew6k1k>hQx#4A_>vmmCky#nI&uD
z4sG*GO$k_lki1WqP}?a@UZ7lMZC%U~UYubv!$m}fZ~p}b#!y*~dO6&ud6)hkQoKwe
zmvh32fFZZx_8OIBicoo*>`F7l*ISdHFqb2pG4CGEv>jEfUXEg5FNjpl>pm}M@fE-K
zVI#wmTLdzS<C9wH^Wa0=e@Nwy92G$a$p6h1n6s|NPtGxoN2^;?%dVIK_oZl3{iJ1v
z10~qk)7tK0!v-`dt2Rtn9>Bu&=MppHQ^v14d$0)NFJCIpE$|gOS(CV5mnc{kPlPCY
z3J*CWBzO^4Q6w^K*Va!s!qMz}(?Fu<TvuyrOQ@)VF{o>Zbm3gb7^!+lU!u#<YNJ=P
zR{Ipv;lVv&XFm6CpDEySu>-$PMbOE^gBusHb#PE@7{4r&45RSEhVWJ;9O3Ab?^iv1
zYlcvu(XMQh`<=(jeYv2ijpo_8B?jK-9ie;3Vl~$ey_6%WlJWkTF~*1VdKS9OMu_Z4
z-C4-Xa|^??j9Q&!WnP7^Y}Gw8ssIko+#Um72BVL?_P5gYue4W{>1<cF;ibR}!eQ1a
z`NZ5t)y;983NG(Hn7ply*;;!A2Cu0_@Kuh8{jAGjZ>ee`Nk>+F49&xmJR&IFwIGIg
z#hYcCIIM_|4Nk&_aEvEcV-s3~c%}9>w5fYr5dB`rgd$^gKQ7&yuf5}4rKKSgVNP0p
z{9ttZ9*T5MyHo4{Dd-(>ZJ>U&c!Wbwt98ejW|S;OqcrU)nO%vh69B1W!WrwbIs|uN
zK?F_GDb*QJc6pJJi;bJ)*HnGSLm)~sX%+)RT6Y&bm^2vE)h5Wv)v^weC3(qVRxCtU
zsLC{O6ynP9<uQ!<{ha6Pha+f4piEVjqnDXJ^5Q}}ApB5=sqKj0k99mx9d6awdgV@j
zyLXA>d|0N5O0Zw)pI(gEpYM20i-Gox*n#$pB*+9ly!E=c!g}<kPCG#p@K2y!b>spg
z_S|$!xr|R}WG?`~BOPV#cub`@Y&LbrSfQFIA;iqLklj8|^KcbC&XU2Im&z0*quX;H
z@uz2IB>T3jH%`z`tzDkZDU4Ii;(h4DzcJ*0XndFB4<z<m%1E@BRw-Q^s!`eHok@m-
zKSZV0v(4?Z)gfCsr<rTO*$$e+jB&-}oc_jw52hdo#NZ34EM4GU*kh4~sw%6o>V)`u
zx%?RjdDud{mB60jYJ`|gN7}3|lWH;pUl{vL)gQhndb@T<u&2}M0w1l@0k$9V6PeYO
zx{OdNJc6YAypPeAhEBHy-eF@XG`|6Xcb-Cy1C@cVC>LRL?2GB5!Yv9uQDL^vqw~}R
zLt$HJgpV#({;vE<4%$s3mmfUG=o%-^`doc^@8=zY*T-4Ge6j^^t!wh6$cy%?#@mE_
z2Qtx>DWiOR5#4<b)njhq3;5#cn)WJj3r{Op$E@c1pNkG1iP0pwC0-RT1wmwmlFC+2
zYxKEp)-~N($^%_+Ig}!mb!XtUe4oKLQvrNg=3<VPwc<1z%a__Q#(HU<MQpf`>9fCh
zw(niAA3fxtU4;(T^YBSUbDgK(gA_rO4D`zRB{sY{{s9nsNVaTO>5#SxfkS$Hy|Sj6
z0jeWiQ^gFtN9uS<mlwc=)0dcdrag%c7Ff{Y$YUW9*(b}5H*>N@ypUgxFu+&Jk|H^b
zIhR%PVcW%X?9JK*e)y*iJdad@!D-vpmLZ2wCT$~hI>46kc-92%1zvwg{ELl5SL(8F
zhM|;w@0wbP%LvZY_p^0gMw-)C;N=}rn67IwwiKin<8Zut|9MDaU4TR5FgHtYVYNH3
z#45eA^kXKoI2}SeHUvprStQvoKErFY-QWquojL!d1dL#n^32N!t&f>TZG%na;58@K
z`-e*nb1y!ul~~!(e@l_Csj924v53>*7d4iSsHG+4KfuZnN)cAp9kV$+Z}{X|c|#U?
z*?n-#oO**d;1GN8{+kM)4{#h~c~lS^t=rk7a5uy1T&2{tpC3QHr@uyzlh>~oBaJY3
z2>HSH!uiI_jp_7=%whO=opPMCNGIJJ4YbBYd4j9)+gk6h>%LU!8Ifz)8Lnh8tm2P%
zEC8%McR3)PSC;v=wRr@g_3GLno*0x;T6p9eOQv80vJ1W;ppg8Y)3kH>9v-c=^PX)B
z-E7o@gn~W(_GT+jb1d70MDA=StTg#zy$I!kQ#h1O+A!<Q(cz&1s+KIee3bVXr<@m>
zAU4OXq3@N3yM+Q&S5hf66`^5D<NCs>0C#7?tBKH%hZx$0*||=F4uApew$0JWV=JN;
z#(D9D8gqaGY(%)2n+B%0os$zs|5}UzRJ&+>a0Qf$Z%6Lv`T){I^2`;ls>!{MKM1%d
zI$a@wwcQkG(FV6>E;~To3c7&S7daLC3Za0JG_o;V=wjGBplj+0a0fdElh%}S;2=g@
zT|WOF3OzAD5pYe0k2W-Suvza5{74>+l4TVUz`0<Th~!BXvxW@|WAut4DJ_)6C+Zd!
zBG0eTT$M$mtC}54{vorTuYC^7R272KL_x-urJE?(z)bOF)U1a30(#vY<*d<LfqL=F
z4K9JVA(>BrmfGUJ7a2)Xul82e@~9nrIYA4gh>DV9#ro5S)i@$a-iZ*jgz6NcLmVdC
zNA>^$mMH1T!}s;?$IWo{wRoNfZN@JOYN4IwSz6mgwd{BEI!uiUYOQULuGK#d21=-t
zREKg|4IrR=6q;ey3EuFj;XLv%-}YzyQpixUctC44G1P}kFkj}sVu%z|A4IiB@Xx!U
z{`wsfw8pW0FH6_a@b<bW8}o{Zc|lvu+K4(4JP3G#pDYVOu%8NQ669XsuJ60%l1^{i
zut2}Q!!qT~rE_rSU*~RmNYTHCyx7ep8?C4O299VAOme-1jUtKcN)?McdNXUYX?XTN
zS652-1L<ONrrt6quU$9bYIkseWJS<8_;tH&aOr|rG55H_<&^x3&@C(!uR`jTgWFp<
zAf<^7>ml?PS_SHbrKW>Hl0#P#g=h55ZR*EN-4Iz4&JGS{d5e2Y^9xK;PQDD>$O&X7
zU?@yit-MjgLc?s6bOT7h<O<P|h!jRfny?Z{(pq0LvQVxJ=it>zHPoC1r&L6Rtv+#;
zabXJ!A3fApXA5H1V{H8ai<Kx>o_dk8yeIq0E52tkQ=W^?@L#1Xr&UqAH+=BTTUH}(
z8wfd{0H)_H#U0BU9~DR)o<;P+1_;HF`uQv+)-duOf)C=o_M^krRYOwX9)ULbNDJE1
z+{pFzlhxLMJ_V~0l%K)PluC(JzHqUuqse6kZE0IfHknE=a<^NLtjGr8(!#XLP;OR|
zZ|<H%3kG^k^@wDIv>m*?#1+Z{__EMaXsZ3y)HvP3863_2hn%X~Dv=21@}A+8Hz>8O
zI{*S|3r)NR1erlA&)Mzf&kR!V^Yw*#Qs7>ne1h?BgLaM>=!)1Mve&Sk@fqS)9nJ@z
zH&tViEG)dF<o!?+(tW)Das-F(pPoYSo4AQCMnOAC7C<{mvZu6bCrWH68FjMi#d2w>
zvlsZE2?Y#!+{$H{(ZPi<nBj1cMSk;qlt}vc1PD*`dyFf2d_OTod8e>IbhAl$rqNSB
z&PKdi>{#HW&xljG$?M^)FmE~+$8Ytu!ZIg&bNn;W18n(a?^jI=&=E=&n}TjH)8<o~
z$5+Emn*ED3DnY9yL#dq;FG(rLrPuXj*i&MGT$Egc<ahh*6$_eTbhBlerMy?FCeQRC
z0dm$f&7`C`GSL+e3B{n}p>6{AU^<V~VF!!OkA5IPE3i+16J@R8ihF?pzV`y#2VUO?
z$}qh3-D4j$uTd>5fHwyEVk&((b;jZ36mSk`+4GE()7qP}4(fe7T^$LdD~^eM`c)zh
znw0cGueXP<ENJfuP~o_Iiv%ijylFto-Abz5geA_nSzT4|LB-_&OVt3yaL+4!DE#3Y
zoz+MEOJvYlo3bZBZ}SZrt86YrK!U<!w?#VSqozkHWen52)8frG&OJ3w_6dZ>D(;1Z
z_lE_Mv-Lx2^co`f{vokxk{YkyhLisdXsF43$?+3_@z+sEG&P40;p`mfGp8=&(@=Wg
zPz_97C?RMYdT|aDRgKhAX6wH_06{N5T0lz(Yk7G_MdCwwYgE{2b1mvG=G<297aTLb
zJOOAxF7s?7Iy)7(;dla&SdXf`?AKtfH0Y4SK?m=njU^@w#A^ycGAMcl7S+L+N=8Lz
zm_c?<<o0(aIR$k*4o-XR@DV)%N}d21VwctpPXM8vC%`e3*L6Nu%4u`^efx~?qi6RM
zfZ_?TAze=czk9o)o6~`O#0Q*Fr34*n&HB9ymPgfVuQ>N-Uc)n>4=mq*tP#=oGTv`K
zCk%T6092m<K^b=(+MsWMIQO~+Z@yRlGah;e&q2qJ44wc{UZ)g`7$1N0=I^voauC?`
z3cLv{4ZME{dz5|x^d!7hq5R!)f!|{@@C1l{B-$bbx$W{#w^@z;lfQHS1BxopE|W4)
zw+kuPDSuJyZ)48%KZS1aD@m;X7b1f8n?H$uqfzdbS08Ap{tQVE>E@2;KH&)v!M;-{
z`&%9={Zn}UL9(BDIiLO=`7gQkx4?fR`~MtAI`cC=1pA&0k?|`1c!SqPUUBJ6s5#Vr
zuAqH~8<AkU`ylH4mqyI~Bf|ek41M+lcy3-S6zR9qWsRp|n`%*bQk3NHsulf*0K3ca
z&1-5$M%AN65xjMziL9Mg1N$gUoB~vP%yz2(71ltCTY=wG;x!F$qHuNWyc2Ey(@wM;
zVd=A(FL*iqb&R%dXhC|?l5G>MHsq&W(`OG^8>kH=F$RQV89Xwgl8<av4}Jv?rHfux
z9VW9CuH`_uX@GUhJ@<;z^1lt?&7UEp(vZrZ-{*GI8syvKr7kJw_#kR)fADOs<zylN
zSi?5)eOIzt^KZT6edjJHSN@^IDLj9qY10qwZJ#W(ByXzYc8Z2tP%NJRs4~G=t!r!e
zhTp)~<&Ax_hGSPW!r<U%ZlRF#Gb^%}i(YRteG{(OVc^C3$N4-gj0zny4OLIzsfTP+
zXQ^mGg<V226oNs~bGgUo{!-unZ`hOH{7=ER{xN95%H0bq&zU#9Ruc=79B?_G6S>ym
zfrJnHIfZ0pQ<;$6YuIYwLNtaskd8!d{-smPPRqz`^D>CEpI2@@_PCb6%Tq`0FY+`;
z^WRAZqW`_p@qdQ^{?Ac>_E1lAt@T2-vVt<#&H;@!*2;41gWL*on$S&{<%(Uhn{rh4
zuM<P21%ee*LkDtjBK%B=!1cvnQ-j{msX<^S$tIq>`b)7)8bXI+T-@5CvKYM>J~Wiu
z#`#uCHm|BGmRoSlplR1~U+AQz){R*MPofprJSUHsP;z7Z<m2!(2k2<>dtZW?A^tYd
z$?K6ksDNr`gAT@|COgAMuk{HB{^36+ZgkLb<c&Y7lJ0W<Vv;}!rc$YfUHUBJ;G8e8
z;hoh)^*EQse9Y!jpW5{bmdxb^zm-p}$(|d(r}gjZ-~v6At2czkT@?vsTfDl-c;lUD
z;5DO~GwleJo-*o7a)X`|>Yh<rxqPJNN-_C;x(stLz!xwWHT204k9lEj<6qIxcUOZP
zoNp@yE``>0lpH=!HiZk?V{o+k0v~O0nER6xG&=EW<5Hvv#3s?w6TrCUYim&e9B8XK
zJWrU=CHXdJica0FA3qAIy4DO;n#p_MkRN&i#DZ!(CU;n`zv>8qPRvhvF2=jB5Lj+B
zGo&+<7Hco^;};Mxix3uLljr<s?Px_8cx8NgxvZKgAkdQ9Q}<Px|IlO5w>kY$a4BkZ
zYrKST2|$(=w2V!;2NfJ%{=ZMU@{=nn{`4!c@)k21Z);FBZhU4+Z^UWZHU)vCp%>46
zHN8+e`SJ&>iw%Wc(OTdoeHhgZ=erm=7aVp~lDjp+@a~0Sx2Jl=9tVZWru^RXKM%Cq
z;qUk!#asoCl+{PQgBPGP(Elhq8veQLC?LfDnxHU3dvqkuBKbu>1CW?JF;RRlk|QLF
ze-deaIrd<)NZ*x&F0Z8f*TDh%3t7;K?yyhIcW_}3VCI`fW^>WeNt<ihZ#3K1m656?
zz>^xVlHC>R6+?!C<Ua2z$*Vf)#B(>PSLbf$5)EaK<FR+R;TFCVON)e=tUML-s-(Og
zdjhmcr6b*Z+I<3CBd4ruMPx>ssK(_;mQXT~-hCN*V=)&}=;q5d;VXWm55C6P-W5^V
zD(O%C%r+<7A<+<5s>wkvpzm!>ZGAjeQq0(Ss+JEn8UFLdGedX=&Mudt`9YKR=}}r|
z+@ep^J>>xU&+qz#5g${P!>%RX%HG1wc&)tj3VD0hH6L-|HhoRBF!1aG!Fa1*uHK^E
za<tvrK<oppxdxjF*PM*KI50dXmkoYys?32reZKEfg1o+x?k!imbO*9i8bZq(=!Jmc
z@hng}#l8a7b4`Jdkd)V;;vR+`^Z+8{j&%P__9SN*?}(v5rKaU^OLa1P66@fktKYhO
zv76`%@EdlQL&Nh}xDkgz{q9_g%gX|0X|{!dSW^-g08U65o-J4}#Lnyr>`wD4`KZ7S
z14spYU>;e$K;H8jdDH#`07ttw-43(8t+Ltha^Sj}Vu<AW+Ua5EsQ2OBe7CoacZJ@i
zcPc(Tl5h?0tul5=iDl=yhL)sNF&pL?gRyp?sXu$-ds4`g25Zi%;)X|5ZL~+<OQP=~
z5xXa=r+k1q5IBA=&=u-(g;b#OD0am{DVFT+VXR(-c}5XGHYdQ&3EIb_@!BAi$As8`
zuYiB=^6{*FP07sq!l>4Nsq5A07#1o#y<IquVN{}Vz0I%gCpwPYNjb+V1=aeXE!DVO
z>kIrvnm0rXL)sVnB}^n9Z;n_lAs8@&2#Pr(DvQU13E8hM_$;I(JziNj7x6}VlxvEU
zX{u{lJ}*04@h8)LaGP8$ba63<zPeNCK7@V_V^c-XzRlSM`c{H}3eGU$>V?~K&*PRu
z&58JJQ%^F{b$rug#T>KmyaAr`p`~pGs!7fpw`mLYl;2Gh_$>ww-L)PXHKzh~@o!a3
zes$JwB*Q;BB2q274?8kW!T{BjztSS(H+S>82TtyLfykfpee?IP;r(~!n~nZw1K<2%
zpiYOG1;h8zC?6^QjIPu_nUc)czBkz%@h|||FZ(;p-);F$wWjY>W8Enn{twYC`5w(d
zZW@-~aQ^D4KWH`eomMhIpvRN{B84)(8+h@5mw~$ipN<Qce{~`y$T<|>{-18HJRIt^
z{f{g|CQ^uEtO+4YM7BD@;A9MD$j%63-wxSH$b=fgYn?3fa_k9Xr|gkoNU~&C_Uv2q
zjyjz-?|IK3?{)ot*Zap@b6xYy_j&H;UOvzFd7k_JY}59;llJA`2ZgC&anJR=me&rH
z<CT#s|J-EOb_IojTcp{h{KT)td%L!~Af0f(iU^(VyQV)8Bl4D?{a#E1)5IWOrSdX3
zG?+FQHR?@!(53P+^PM_RcuyGbvNI{l<$Qu0K3O2XIH*Up_r$H!)C1F3z)M;}X~C(0
zGqp|)iH^Zp*O7U=cto6szx{#`igytdIC@09Mnz%q>ZDa=#<(!`#N~pNr>!3W5Y5^e
z8z+W$(8z&;)P5P(a?QFFw&mKfb=y8X$+dI;`>y@}KKA~*0%np@$J@7>?bPIQY0~hk
zi#*PFJ!*BsLO{_({2DpC1P1+5;cT}UsXF@e((~iO^cDM5I3<6nGP!=uw1HQG>1<>+
zylwb?Rp2%_tGA~lp=7sWsg#=7N2DrS>3j8@H+C2+eiPu9?9zWShQQA8Z4Cc}c8CMC
zv%X}3MHPoZB3;<0)}kxm=g(Zplwj1^)JhhSC-7z3ZC%`R{0Pv(N3xUX)VNs^6|uJl
zimjJq<j4n8W%5V1AQw4gU+nkx_zqWjZqFqD*6c<Mm%I9?<B>)Vs(=hct{Fz*N^fwm
zW_&`KPTSc5BCwdqcf84y1#Ne4TX$*5sDgpbZ~S#zD*(JF5U1(jtLY@d18H;{R&zcg
z9ml`i0NG;|r8>2TYj!a<{dPu4=kB_*A}wQdds0};ay9QTti3ou@lW%zR1geu^1a~k
zOY7D>Z^1ZL`EqvuSS6DXuD*prb1{N1LVBVkZqR?7PED|X8v;^OXpkhGDg6cNJCR^N
z*KjnzO|-N(R6o6X*Lpz23`L|a5W?r>>HE)v=&ov|g-OdyXWE3+q0`DkkZ~(BJp{au
zZwbv4%D7*hsT9Ka$^yPynZAc?9rP^yvCmN4_}=pa+B;y+O|j$UcXrcz@1$^|r*00r
zE_q4Qb{PB4Vg4;ht|G={EkNLmJ51TeCFGA&S+*J}X)dp>YO%+cT#l$8fwl_tesisw
zBa6PCIq&2b+OzLxMyivtxL-67K*;0RoY$QUq>-o8xtw3VI5JnAnzH8~n>1yZq&lE6
zR&k-<LGf)L{q6?;?y;;7J5N6X{F~$#Eu!S-M~_qmo};+moIp8$vN0<9LTA#mNox*u
z`EC<jVfJC8Fw8NpFdi*-{0+nyl+1>#EB}488hy*+!A*+H5WUeD!q#bdVrEW@PuL+t
z3Z23j3Ek8L1;!#qAJ83B6~Q@yx~~!9Fld+lK&WCY_+37{%2sh+3cET7+}maRIzDD8
z&Cp!DY#kXg`0{=!x2+V>=%R*T^pTXoP#VWTZ|pLoPnvC6ZANVa>LWm*C}89<i30EQ
zlm&`MTA11bGb6^xQ!*k>sIEy$R=2x3v9+=Yp;@Trf}vOZyU$l)_I}nV9L-<MlWI&l
z*)lI7Ef@NVOZ_BXsL!%7StPzFv8LFgWLA0`@5=GT2(XPa(Czi${XI0KFg;;h7v$Sy
z1>_i(R{TAy=DDi#gK{&=-OMPOwVm<!N}Y}HL0H|-T{G_9yHj?zYI5V~<;QDGCuXGx
z&gM7txO5kXq*7kj11rWO$$V4_3S_^C@{OAEV4AH_UY_Uw&1C34HNA5A2v9x{QQ=Qz
zX<ru{Y+R{$mNU5u*UY7xI$A7kmgOok*6FA45GyF6KvbWd963bN_*2V<OecWtpltHO
zt9!~qdXJ$JDtF@GL$^lV(DH-*ie8Qm_8dPy$i(QKDXzqu6V0Uj=GH#gIpDaUVN&aY
zL!uXXEPOabTWw>fdvRdGHpioH>`#q;6&Gq4VnOxFs9?VMIIwpuyEDVKIEQs&J4hA}
za*2}I>gm!LkS{pQ=XvA1GQ~($0YXs>>TcNmqDd@HxO84mx$)YV%k5jku^_Rn(+0pL
zVLzqAluIh`6C1d)s*#dIQ$LF;!3gR&wmm+~fZ+k4cs@$rBDnoTl-oX|k*TtS5Uh5b
zX!%}YHY(2#copkyGK0wVu{}&i^_7;>i<p<Xo5&z7KAde+81{6mL!AVjxBL1W^y)d-
zg3Do+h#qM;MtHD`cK~v_L^}g}NB7~pCvi=c>M86>X6t|mVwcmvSx*=2A+`XW&bvf4
zV(A*>EPf|@jX>JP%-VLGUEo~c`_}U_caIe;ZEXZ+eOP6(%81qt@oeokq4Li<a*x|<
zdc*$yVG4sOKd6`*Dm=u3I>c^Ewv|P_uW<C$OVY<TWUV4^SXyD8@#~&Tt&5{PKdgdj
z=K;dQJ;el7SqQ9i8p9VhQ!T#2!&eZ5<_@$>c}3}T0GH@1RmAD*BqiP`vp;**K`z=R
z!@@2~|4I(F7@3n>euyR*4NR4MPFIU4m{*~AqzOQ9C@xC(VYy11%D|iE)H`@>;VeJu
zLl8!=iSfosg2+_B&nOUgLz{7gtDaQmZ@ORy6_W929&WVfE!2EON6#HIrQ9;mZWnz>
zu<GC{Ah;-Qmg)pSM(aRvw|$>pOlxs-L6~3zXHW6Ths2uK^k4Ap8%WnrCmG{#g(LXW
zTV<w(3hW(7(GUI(AO+oemT{k253|%C5B}dYC6G9<(nDZfWPt*`Iz`Dz!v~Ax?LW9b
z=uA@RcSx5=bBiR#8XZ*EM?k^-{n~NTISu9K0{Bdi{)i!G*<||8iq+}6Z{9ci{$sIv
z+~uW9uWv<@JFj)Vur>eCJDlF@wTdL66GrL7OX1MhuOYB%w$Vwjyg->$fw03{>t84f
zhSNm7$m_-o+<mks^~!y$+n+~i%6i=!tjs8ugZVyVuO}tF5DLy~k8oJ^#I8`ibr{8z
zjFLE9?X`)z^L9GSZ!7onktd-?1nfhNs)TZsI4UJb)i6q46f3i$HPr<S{kfI^D=reZ
zEK4iFcUMnI4K1G<ccO8?sBdZKA5C5P;imF%EAui)1r!r13WVa6*dRry?qsSsNT&Yt
z5A_joGCRl_V@k$tipR_|dv;UPW~Qy(!)-FF6h6^hBBIo7!qGs0^=-cyzJ$6baC;H(
zm8zHiot`1j*fTi2C-U7HVQtfr;c6E=o#q~gS|zh2{no9EY0R*5V-0v1B_pp0dU*$R
zv$964diTNfSu;4zVAKRAu{S-+RNUBoCqS<v^30pP6w47V)(G{gGp?f_CavGkY{3&O
zBJ(p6QKrv>618vsI%&2J4pzk3z@QJ~4HjqgTZ@ExUO(Zy()Ii)Tq+RvFiPR!ZZ4{n
z#&@gU^hspjV9T3;B@TO+Ev#)14%dbYovU@UtSg*T;x>Hh5hq@v^GmxkynBK-t^YN6
zV#o!KjAZV6N=KHcpMpFeI+P_8z$v|eerJ>M5O)`b(AatPp~EiR@qw|zjFgZolX)z^
zk0qG?F(XX@`E9I?oki;<Q5oQ|Djrsz#kHjcgZZ3Uc;HRB<Em5-0RZwq{;5-V1YRH#
z6w}cM>)eFAIFpyjL+4{TL0+Tz!u^82YE7@T<9tC6K4Rl0VIs!B9o9a5?#89Zcaaqp
z#6<*(#SuR+t9APf(1Yef`FOU5%_%SO7+RHx6;TS+2nULffMm~ttcphfDgeOyD7ys2
zV#vu7Ia{m4W5g7tc0#oE(S5Dqowoq(fflwN<CLdLjetn%Rv>S3@T{E$T1K+sXiZu>
zGFED}wai32_ZK7$_q?Jswg1&s(5Urt6Ur!eh+S3$%)NbdIhM{Nn)ZUdH=-6erF@jK
zopG|K?3K8C;LYgqi9NQo*omQg_hX>|k7hU6o9zHQ#L7MWup4JF$W~AdkPXuneHl3t
znFm6$X?M5d?r!Yw#zy4!hAz*ajciaM9uM($&5zM6#tQ^^=r*U_G{J22OM_!^Xn~bE
zmxGWgE3G^8zyNx-(UDkh&aI+FOEpAR@6?>u#~!blz@&8XF6@e2v?>7PruRF)?QF9x
z3Ys{=1Exys(N=bcU*bnsbt){XvC>A2t|^2jk+$zM*_b%6eHHB`D)Z8C*u3<{blHr6
z>*?L!{O#N&N_xw}pys6*@*<ll;FP>5*}3~!Oo&d%qncsgP(DE}y!Ed@qjiFnoU<P=
z-(frsEAnh(glT6xLD5pB*U9kqAgs+iIazd%QaIbkkNFZ}vOWoMk?bBa%Xhj|RQ__e
z#shYbm)Q7GH|I*jj-nxT;Y7e~`2{?R=NO*zw(L<NqRQdw%jERDw1@V*7k&J^t!45r
z^J@v-3K6!q<Q{rK+3Pf|@D8HVX5v-Pxu!5Wp5h{_n)wWQGK`*OjHQhH*btILDS?O~
zm37(j?mFn${T|{c*;VUCYitkX6qI@szOcVJhY&tn9`{Zt_i_erP%!yQ*;tgfJ3;fj
z0otf?@shE0_DTYCvrV4877arBJ)VtyNZ<pc^Rl+es}oQMrp9>c8@wPGWWuv8CW0K@
zpN{4C@f)to4<M|FE$T+Xr&7|-v)AfE#Iq&Zc2WhI@E69yP`4f8m=em4(24<)_hYMq
zltr%QDXE=zaH@YRa;+Ge04HFkFZ!FwobxA}m2MW>?%jmsXJ2PP2omc>8B-NXE6@fq
z!1cllvzwx2Z5v62Ps+W<L#0zPj2Kp<uYjwr%lLPnd7{Fn5B3o1$kUGuMolDJOX8K;
zQZ05`;@9^a`kSpPBa^FRs+zet1f}|TKeTXYmTZG}JIYs-s_bLhJM~cW)N&jaBn`;;
zHZ6PvR9ebvY|y+>LGiev88AVtT!w}6tMI`NwM<L2nz07O6`CPWt~Nr&<St_GTn*hR
zHW%i1*2esXjTl}_ry99C?=gXz^2IjYT!YqcmoU{Urj+4E%)G~CEg1}?!6_(I_M9s_
zVyu;Pj1RkekV#F-oYsJN`(G?MG_U%HTx+tBy_9pCJ?q&_AU*XvllhZM_ctl+5xUrw
z<BIFMe`d|59AkuOYyI%-U-pk}M4=S#61&Jqg$MufqxL214p8z#GVhnmi66g7tVTp4
zDFgg3h%mnC>DyZAA4uhLI2cIO?Ool<OlSXHNyK5l)BHysj&Fu%#5Qed69Z0-U7`Iu
z+BhiHIGEbO9RHU7D1RWQ{~xv<O!@OmRa!b)Q2no1)_!a|NND{Vo_JuN18*dmp!<-j
Rgu^1>Am0AoU&%hc{uh%k)`S26

literal 0
HcmV?d00001

diff --git a/tools/python/images/clip_image010.jpg b/tools/python/images/clip_image010.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..71b63575957f04d9e844f6425eeb562470201840
GIT binary patch
literal 24851
zcmeEubyS>P(q}hLaDuxu!QI`0Gz1Oq!QGwUfdGNVHMqMwB)D5}m*DQM%lmyhGw;lq
z`F3{ySUG3A&pqAe^zG`BTlK4_s-AwCdszdZ%SeJH0YD%S;QsmqyetC50dL^p5#iz9
zAR-_lA-zFH!$(I$MMWdQ!NtS}k-ej$B%`FDVdQ0@q2r>bpk#Z`#>FovA|mpRMN(Em
zNQPHfMDPzHKqMq2G!!&qbaY}tT1r~M|LMm|I{*U#umV*J4Ws}-VE~~qfG^zuQUCx5
z`^xPf4FC5B5DFRw77iW(@eR`JhB|Zr6c8F33I-Y$76#^ZxA*Jk02mBdOmbFHxVK85
z;3*ui*!-ij5Gci}I<S?;PeJU4jsb{oaB%VP38>yt)6mj!aB^|;@bZbjmync_2FrX@
zQB_md(9|+AHZe6bx3F|_c5!uc_wWq-5)}M3Bs450HZDFPF)2AEJ0~|Uzo4+FxVomc
zuD+qMskyVOyQjCWe_(K8a%y^Jc5Z&*_xi@>*7nZs-u~J7#pTuY&F$U&A94W!(Elmc
zUnTnoxiDVkf`Wm8hJpV>E+CZK>w?CBfhA{!!xUA5|K#wNg3TWRODsC8ssoXdUHKH-
z&~f|?4v6D7)!84S{fA`#o?rq0OOpLnu)oQ*06>NYz8Vi210W0-Z#^At^7h~yt{+FF
zG91Q{fEHSsB0c?jKzebUadYL{+ww3pS8BjEEYI!-uWZHE*R9AQZwI2=8l6Uv$8C_g
zB8PN4K2)jcDLjt7^Kd2oz7pGVp}*<q`zI2Sx8hfkcQ*<T_iY<`fv{8RT%GRUS&1II
zjS92^6=pb&w1=NZe*_#1^}YaT8*J{UT1T75^PWBrYRbVE+nwld+PD2HYW^->C`ljr
zv61ar5K9*$TyXZ^LgNO)4-?w^Y6;iTyXwyl3s;-F`Ofxl`u;|n5#CuAKG}Z(M8J34
zx5IpQZhryfmppUsS~y@Oth+dDVYE10<}(g+b4}D@mWE*ksBAjtJSaDqJbmtZc=y%q
z>2o9x3DJ}WRAm4fS>R0KGsX|uXLt1?&=_i>_4v_PT){T!VOT=%-m$nHbhvY=o|}@&
zwX6E&3m`!Mnv+D&s;%uAxJ7yzv9Ev88F?#t#eDZ$ug=#CC+(Bqat8&CVvA8x-=`Cm
z%Vp0+r34MIG!v#;xG3n`HW2{4c>5e*ah+#7bblB96f55H{8K9F1<=V|<U968_&4EH
z+fI<@uxET+1)G$I?Fqs=RZW$ovspI*AC8_Gju|qhEOS}m1Xi~5)#ZcPg~%hB<;1HC
zw8IqQ)vI$JMiO1$_w_8=TJ8NB^~TC_<}U!d+l=SWN5Ut;F93o=pGB+dw(6&R(%KCn
zg(U8cSv#$PrZbbYvd1Vso-k$(FfSPmYjzFr;KuK0#7O}?cPS6nH{&kNzKe$nZ)D!n
zH#+pGZQb|oqV@&wlkm~}dG$>A@ovF)73G<;Ecdn<MeBNHfvO&js%-(Z=v-DM?-~vq
z*oa<Ssb%DR5;|T${5eh}<ja~nT71YRLT2`pO|ZyI;{`zR=sR3t#JuDC^^R!l`UMdB
z0@xXye7?0i%X#qaegRzUSiAr}bKlPDaPq&koE5vCTP#|b2vs)_`m%#nZK<ZDO*NdD
zuxCe&NH3G6z3UVxyC`*syYc+#(B$#t^aVf}^(RI-eE+PHJ&qRuy5@?7+U(>IcJWdS
zhDp4nv>}h+{YU+7ba9dr(0FrS0gohEfH9S7FnrMxoS^yj__M_A3n0l0-<SVUxPQmD
z;ZflQAmo~H)8!jMy7)}}0?6Q3c=+Ac+w65@J2N=3E_PPejgFtPIIQ#zFeX3EK~NTV
zU16W-x=r%}m|=;R4!>s|!}^ON1jF0Xo=G<y_5VyM^5_Nd_60DOR`7JXx}oc|?@DW#
z$n0TVe{5f!tU^g@lu8ghs}Zoiy^c%&Rzf)H`;yP)A<FqPd_f(_jBtM&=>^(<*KgZ@
zrC$>#4;xee@^T7)a6221qA#D>bwpw^gRfP?>YL-}&(niw%Ixj;<3~A<-_BkDEZ1z$
zZnwVC{eP!X;XkXxdGa8ku#&SjdKuAHZ7wMnr7E&qP>>F3?CtzQjfX7Nf)s8VN5X@q
zVI;_wTHqWFnmVwDWZ};Y<NcjR|3MGc)y?#@uo}h~OCJlbtNzX`zh3}<X5{avw1m<8
z!|t_>83FLy{sS|sWK?<8w%*)79T?6fC8@f|bLPIsuDPMR)!)xjX#dg){x9EZ$;+96
zIn>~iRx#+vQufTb@{f{1Kb5U3++EbYy0`YLd(WIvzq<DeV3qNibM^o4{r`ISfB8Hp
zV*)^_z?DcV(oOxyyRTJ=7j0dS5Tpm~?u?5at5<+&xSfjDzfCt`Bu@Jdu=nUiTc2LM
ztBRMrz_r@27(+=<x+=|k0YtWp-M>cT>axENtazjrXX=l)^RLb#{sIWgITE6K#Q{R$
zhh5(@)y5m3Je*UydlANTui8H_;QQyDD&7S>`WG8!F92_}XSzC*nc6u{tjdIYnAXoW
z{@-`3FWXC$tEAJuDzb~0VPF3l;-LSI%*Pi%{f;2-JtOwWQ<m%>SZMkJP%8OPG&DV)
zpr1qj$fR;9*l~p|)+HW*fX|d2X0{^JMB25`Ak;zqc=H126TZLn4p6u|c>zFtuMSL<
z!H)UjU$k2!G(JIASczyb;3f0|a_Xw;sxXmc-7_(pqPs2OwgtgAC94Lp&t5Npjf^MH
zztOsajoYpdUZY8&iei@Q$}?Lwj~;Wv2vw#Lx$C^*Y`R7i-f4U0eF4no9Af&8`93<m
z04hYVV>rTCqO*%pq5ONKHv3>pa%l?vwS(}m-Tt;odcmnWp8Zr(XbZqT6eCABZKT`X
zo$~@9#(LsguyR~60AL)Xq|ke01yO4DPATTFUXw=f4>i63(hpt$J}-d#tI^Xl7w@tw
z<^>@=<=_S@t}MZzlwP?KxgDK0o1zQ4j1rqiVCmjxR)H8hwIHJXNOxVIQ?Vd-o`<Db
z(XA!lF_#xWo9~mSKuhG^aof$SzAAKKh$>He9%r~_s`FAQMklnLW4Ne~U?&Y2ShynB
zDbp=wGZ0!!yteYPlS|8vHh6kx!ehLg_agJ0xzQMFB{km-qP40qta+w;DvNU@qEYnl
z7<_?R7mmviaT)<QwVg&T&Fe4gfss}ajzO(~Hj@E5fpwo-jRcNHJza~?f{s&qd$_wz
zWUZp@buv;k6Jsay51+Be@7zc)mVHlFUjTZMOdHQ$6;BT@fP?Xgj+q^1r_9yG_i9Vz
zUf&441!}#GI_y~Qo!`g{?%RCq)VKbo9ENukprgImf#7iS-Wm)KN$RD~K0g-TX?{KL
zZO=bQbo(A}^dDZ2&1}{_Jsz$QNwb{NNtyC|=km9p23NXrfh_!ijxN;9wJEh{lD3-G
z;5(z*<l)%XgwjnZ+!Xa=;sTJ(j(=KTo^$%&Nag(5Oqa+TS*Iq4YBSNphG{s{W%<GM
zi9xt*?8XL;2-fHhzf^x`E|pg@4x09A=Qg_{OCw>K7k(84+oO$J<@|F_Gf_Y9a(Bs3
zJO3um$(v7`*h)Ql_hF6g=qS}p5U`##^_kj5aJWXRRjHg&rWazgVz?Tmfhu&`v6ET;
z{qH#Q_>*D}j-k^6yJoq#xe^oVe@X9c-z8iu)%Z<dQ+34)a}NdfIo;-Jq4a)V*Su;n
zsWS%|5>t7XKv^s5qViamBOJ5#_c;6fO#_)dym;)iq*r5qcmuw=lGed}aI%w`lbH+l
zmm`qBF<N-<AZz$VtVZI`x6@aMwkg~0L<6wHWVL0rp$PL|kb(fnB)REU<<j{JZw3Q}
zqF(@4dn<n;k7}fL>fM{eInlwV)x}%iCFa`f@u>@=S2zT%Admk6T1DF;CyX9%WI1L2
zLsbh*2-6xd+={50Ff$&{=B(^LKl~zS0T_06M08+{DH`fI!`#9@IfhiC!wW!g_W9|R
zkzFWHI<!BX!#`02q=yCmD-HWmXJ`Hkdm`@s-|}0HvH{&52YpZMuMRfxC%^lDg8BcQ
zMromHSN?pp<~t_*{Db-b*x|tA_-^Hu!3&*g+=tIA9ePmf`s#2c*h;<|#~05nzK11R
zLVt6b;thERP~<hl*rspu#7{U6pb|sMTCGjw`ElHpOlJq%>~Gj4H3xddnHRwA&TGu|
zqxlh0npm|W*ed6Q83&e$7(`P<0n;Uh#<@^pTm6eK1((BNad9V{F*t;<@Wn2TYq$Lk
zT<NL6RL1i;g{xu>q0AAGv_>5PAOt@rDA!RCml&<-*=1yxO^2$dME_k`K4v75>+D-u
z28rv~L|5u?C=t>$*Ti~zOB~0A<#)Fxgu0HBQd6pUt(<wHPFQCP{-F19RC60-fgL;o
zZ?%$B-^CX$e+ba8)eY_GV)FncL?fqb)o^_G#{<#y*;@VLAPjCEXEcr3D^Htbq$d-T
z_kfiW<MYA#Dptks@Rmlibsf7Od9mU1(m8Mwy1iug8y-2f3Z^`ukGc$sSOQCtB|D4;
z@NX<&)vH<b^Y~IyDV}Jj07?>A|K(Dr-9Y-GwJ>SZvJf9*e4eBDZ1JWe>C_W|D731z
zzhBDdQp;VvT#VD07)V)V2!A8#iS|hZziojaBy1jOeu#7P+h*;Xp7-9m5QQ60uV|Ia
z5&89rjhp$HQE##B<)qD8yKUl()3*UX`>-3}Vy!F_5v@T%Oo$9s{q3}ZGU1N}dtou1
zohV$3{S7F!HM~qc$w{d(;LR90V(~04`%$Z5<~kHrPFM}j&H9*aFaf{E(e?@u&oCDX
zzH$@#PQAcC!KZZ^?~+)MBA(<eYmy=Oo#XLJ1}pEnACH^~b@OCdeoLFIKE<FSfpu22
ze>+klY~;rnHlC}p7Xb2b%z=DD!}>MCnkk#410KUib7+TMtS5Ryebhxc#L#0-G=?JO
z?DG%kUM@w}9OfL`A7N(03tO<Mn%oqHzO1JhTvugDU@8BPtOBffF8dkWbV8`RqGeuH
zm;45KU|YyuWFJ6$F7n8(i}cicucD3<wVZ3kcZ|_r%?$b!w@H9E-eJK_ka<RvvVE;C
zH6(z-Pm!<);WxtTJc1YQwzsI(-M5z_ew<auS@1fH!Io{Sm7#JU?ct9*1w+HQP_$@p
zFbb_$ypsl|h;<Kc)*$cjP#5S;W9i{ckV{BZN94Spt7>A%^jtg3@To=Ew%wj#ybAfP
zT{2*%P?HtapUpU2uBizJ9Dr^ha#Chk_>zb#Z1K}A%<G%^f-tCi5YPApfI;Vzc95-r
zY~irn<Fd)GfT#;OZ^3Bb_e`JXWYEHn^9t28Hcd8VW+*+>^f>`tlBCft@ud>S?@k$C
zq`o($CGLo!CX9dFB<-SfL3THwpR|mhiM4Vk*TkOTNn(L;(|id)=qDmV=bK*O(|`)2
z$pyZ8!Jiilu5pOFVS%9NcCkgx0K*sln$JJ>w}#~yv=#lFR!Zjgj;zRx$gI5#8;d(j
zD)6O@%p1=+ZkUx+nT~ADTv_zNw#X&`jp@pHcsI&&Al}}HcKIawzu{qju{RL+I8008
zAv?kE4NdmHwFrdH3O-7$aO?wUvcvwZCHYXWCo8?`{6uU1m!JJtuTXUteBD_f^8o`S
zCici~g2C92Ui`Nd+>&aENSjTHebj|(u4S?pAyHBGL}qUkcIlI=PshFHDwxbttm5L+
zxdv3AsfjRBlBa@rnq*Ip>A|(x&NrSBX~9DCZm>UpM{oDz3TMnjbIRf?yF$*F6IP!b
zFpDr=06)e^Bg`y4iBf`g(vlSJVP@KR29_Q?_9h`#eENtw9xNeR^IAyS8lC8_{GD5s
zOWu>u5*){MgnZc3)+8h4Gq06O+Kwcd7U1QmgUaD&u#uaUlMCoqhYjqIcBr1}+-)D(
z<qqd}=C4<Xc949D%%%qwC+|crypZ?CTlG6`BdJFl&<Cn7fZteCFy9>Y&^YuI><xm~
zrPZzdrB;>Fx=50EUkjKO{itCms!W_7)<%kh348WbYA`H@?MBac2PfcCD_2j()Eb?!
zH~m$$p>O-Ka~IhCGltGe7a4_CyWD&&8`BqNh8Oy*!_L$3*sJhubG8dPiRm^}y(M>K
zV>-7{<cGBpsT|+Gx5dYUVYzE|=+C|Wh0%aoVMgX}+!#!|6(U1!0k?)Qrf@NlRd@C-
z)}w&&((QVEJ~h)Vie`!5X{Bl`xK3)CAunN@x}h$e7Tu#1s<^MVP=rO^(d})jLywej
zaNXAliph=i<xk=AUl>NyO)uh1$(?%TKk^9GDfHtePcP~8SlLi@pVNsGB$`qC!F%T6
zhBz11?lK2GHEZi2_nwd`dN-XMDK6;DQ+?6mx5O!Pk3zu55z)5SA>o_xF8^Ar=?yW4
z^jc(9KfIL!mA(a>t{t5$JM4{NrT_l5rzJwp#)L0+j3;Or1ZlExE9h|njf;#xEeY6>
z4O=cFUf&cqTPF|*rP*YIi?r*eltSuj{a8T`=R1P8B=MnEYb6hYWOI4lY(D4o87%$%
z#|JZ92JAJ^TY^$qIfcZ|1VL%PKH9m!WW_ZfMiCYAQ17B)UAWmYN(`iv+lqXmb!dRk
z8k5=N6eKRoscEQ1g3IyXFAb|{9X5;t(%ag~Lj|S+qv$hZY)Fm~IYeD-62d#KU83Cp
zaIBG^L~xln3He2~w~rCtyRU9b<Jx+f39<*OX3u2Cs-_ResYXy3$Mv(4$GQS;?tBTl
z6;Sr~%<#P$^^P^&^su6{FI3)j1KXEa{iN|m6*#dQzAzuzl+_PeUuaXM|I!R-5!1G(
z#s>tiU5F6BUr39nm1DR`IO{DpVmL1ZepMayd{Bv5UhIGx;W(3eYlM0Xm3q0|q;M<Y
z@|I0k`d4k6d1G`YWLrAXI#0T*a8z>1xnrQfYT@&B3TIFpi`G**VT(jVcuix!ZxWYN
z-v^S{hJ_!zoL!No?ia3E*9%1U8e{(kqjJ_+{O$ZO^J7X(aSG|LIpqC<&9lVCdkqPp
zjOzjzR=A19GrziJX$BD%V}Qc%G*VFmCz^){c`hID7|HP<SO{%?nMhbl5`aGz#nZT~
zbepsCuRn*|fBIhIMvo$RR>|t4jr|@`bPvNm+}wgfGiz#k4hTAYrpK}QYfw4URTDFI
z7Q;mkoi0Z>s6nN#za$*pekn%i?m$X$*Fs{Q-(wqsN1hJW+*DJAzOf8fX(2jg<2m-`
z8_#hM6Eu7;Y9-hr3uyW_vf$P$&wd^onN^HKLfA54yV+EefHLbOV@GM|+FyF8<ll#y
zxj~F}Aa~jjTwV*#wvec$CvJ!(3y}1!<q}18gj;g*W50<&x_@q~ILDuiY_-B!?cq9?
z!)pxC)}ZpB3|m)|gY-*N2FC9?q&k;n-9$K5x?2to$u!w0i}ZYX$o0_31yf?W2LE=a
z<RLCJu-NCj{+>I4skb8jc7UQ6JINSll#L%1jbm9@uePlzWpN~oKV;iIfs%~rw{Hk)
zV<{uM)D+VtzxMp+n!+DbZXc+q#3?=(H+4{M?Gz|mPW{A9murl{GAbN$9c@~|9#0I6
z{BeOc7<q91+EpW+-L{b`u{f{o<UcTFfii(o71AJ<b?O0I`C5G@TF~j@2|x0s?XnU2
ziaj*++eYj}E^O4UN{o@gq&76$RLH@3F#+#R3ZF9FC%&e@0#jrEV%d}&E1^;Zn};m_
zFr#3YRRp7BotCLIzvZifi$7jASYW)jT>MaQfEZ2bao@_@XMC+G<yg9mGGJfUW>Okp
zSb#s!r!O<80Dq@P+*BJb!dwnAwQyXNX162F=Xc4M`>j1z_bo0O)YCJR6f_*QWkMF%
zB<aYt7H$wILTp&w9IT8l-A{HZ!KgNQxG{P8;3XXevd@%I?D2!HR}fCq%zQ=tf4X4o
zntkG7x4IL4!3WQ1CxU=PCZhLSApn)58-<A)&I23C;{ozX6OAEhfpufnsz`Wp4cY~8
zAJ1O;B~bktvk}w~addc`AD0^geiMX(*}JIE%fn*}fZ3^&6Am#7JiPEu_+4to8q}iO
z!v2oyENyOpXyb<x0rpWtJX4yTneD2v6_?-ea7asaIian_xg!Gpn3gpk?A4E_?EWa-
zxw%{m%Siiy%3?ob8uZ}LJNP`rl^&TW&eMI@&3hMGBS`(_r!6-0WIDFHU}7mnc2(w9
zO`}h_%a)kABgk)S-(ZMZB+H|Aq>J!wZXBl+xfNKt-Zb#V=ly7wr-&BoqGZ>pvH$K5
zpwVl6tF*6&h|5Q}sI{tq-R){RFJy8)AW(y@l3KMd#er7T;!+}R<pEm*f}HoBKYnX5
zEN7EGnDS#?nqsk%ypWY$<R?u2)<X8>ND5XXFf|HZd?kDQ&`ekU`uvdBnz0t{gDq?3
zR^!IDp~9~>MNARJU0G5_W*jmj4wR#6Y7r@1rZ&Zk;E(+SedRQUlB0=*J;ifWQ*^tf
znle0H^t5#CPY9go)9c<^a)RWk!%@j@SPKyWa6mx?%EgtOiH*h@mbq^THa6sek#u7F
zgsxBlBGeEtWzmUp-IuT3h`vpq^XklHUH~|<&>00-E~r7~0wO4v^|SSknh1uN0}K<r
zN<bb!R3FSmWFDBQ?%G?PUi%w=$VR$27JFKrZf8O-uz!t2`bbH8&rR)``>=Cn{rcE7
zF4Sr3XcR*M|F_7|OlIJRDM%aD)uGxfp>D$bNSEOX`y|F;@5a)O&{r&=&zL%{R`?9t
zqMV(8L#Wt5?Xj3NJXxwE8hpxlsT?V|sb-pqX%n)tz-zSdtAmm!`XX&Jighn?rhfM?
zX&Mw#OXJ~hHfi@mLCg5^7VPd`F0mwjH?!~wtNIEDYpx(lJpM#7ZwbTsy<;<zAt(^6
zYsQa0U&|TRZ|Y`(;1QYrgWYnwZUTXYh~1~9>h~$12U|dN0!!tiUz|ERjx-~(_tj>)
zMX@It>gz{b)7?hc;e^a#GJdL@yN`XCk>2I^EHm>(Icr465jIQC`W*f#nEmk}#+&cL
zs+Nn}DyMGBdWOG>TqGRNKON{@VC&H#xL<J1l}B7YcX<4%(&}#0%0642rsH!xeM@x5
zhJeGK)YTnuiSZGwaKI4pVk5%j6NIaZ?6njP<Y<_!+?!Ee*1@C*F$uvJj(56Ojo*$h
zLewK{DI7`r(U$b!7W?B|JL5Mp`_X}l$;1iOcpVR@y2-{+!hZIHn-mRm{Njak?`k46
zUMA{7l1YS##{87@TD-CcnM?b1|Kovw%*y)0iA?>}5i@n%n7!>|6C$pZsd!=k;N_ir
zpZq*`sk3ZIg|lqQ5Unw*cNSK{%Ju#zvM9u30f@hY1)S2~cTF**T#>EhB^Oa1p2Oz>
zVA|7lvr9R=kwYqt?$3&k+hh?$C`B5wdLUNrQ%nLEA;g#`iDQ;9J>{aaKM#;bsK~M1
zy4wzcy*#?Qs9I#s`k|_m+4d25@1)=t^Ua~l^1&41gkbt&NxvF)B7h*cTxeFLwYEvq
zCMOLPsT}uygqAL03l)z2hMQ<(rbye}@%cb<%BV%Qp*dk7JAqb7jW_?*5jGk(iAZ<^
zCU%Lf-JUG0f?I2*@~0Stx(XC*U~9CKWGaT=x?hN6t}?;-D56?56{Eji<p=)-y#Mn0
zZ-F?p?hFUhA-0Nfj6JM`nU`ImR@q4E00*+PCi*535$O~`{J61Lc~;yNj4?syWZ7|9
zEr#*+L@3j{G$bZrXZ+N!0+)M})X`vnS7r<gCx*kssw0257A_T=QEn)yVfJ203*q0C
z6G6@LvtiM#2iDvrw3sAeqWCZmvpgvhBUgE)++6Bhdj)fZZnO<f{rCv17XiRXfvbM3
z0RchpB2vC<-hAzr+jLP1IyT8FwqrU}c#y+D!IKa9<o%)R+kLM32G{GF(IUa#4NQ`X
zT9Y<%MT_R`j1o)+TbM62pRr7+6eRJXDVH2{uF4Zj6bk27Efr1I@%4~{VGt6i6<+|S
zT)dA(0>lOS3p(0_Ax!axRp>Id0RiE7=v7D0Bu!)47h+5Jb7-xWW$7g*zAzVBOQ7fu
zN-28Okf9s@d)3zr!9k4xC(jlKrh&DiP=V&>5J2gD>EO!4ucJo^;IG}Uq0SAE`Hd#F
zvMC>HAr<+DSf1r}9(=*Q@!PKoiAr3E4Eeh*;&x>11Ay2DytRvorV=@!2L&72+(4yJ
z!`c{X9LsP~HYjZWF}%yNRl037Onv>b+DMGb2t{O*Wz7Mllm5qDI1AB2t)&$mtuDUS
z>&4HT#2+asIX^0&y^GHcl~}%{2Z=kqC&FiaKKZCYAQ@3+kpjsm)Ip%K7|Pp!|AprQ
z^=Vkv;Lw(b7{&5HS!cital&RZ=6Nn7d8(9O!?Col++CQ-0}E&)AeAR;#c_ReDj$`9
zgWl)}|H~=E^0U@<2IpCgY#h7ays|?;E-k4}%1*iU!hy&GbG$e9raG+?A70fCcpFMy
zQdH-{&y(gzqU~#iO)D6h@n=GR6e~Mt38wt1qejE(zSC7sVKZ?AmXb*4e+~SX1fQX_
z6HLU!A__HPya2#=1%D(2$ra113mOI2HZ4OOF)(H-tDEJh`nHnCNzj6F$gr{9o;qq#
zOLgma>`?uKrSs4yn1FuhCPDtmtyzv+JIeZ)O+*kK){#wa!!lu219jzyQ`DSX0p3o1
z)sIGqHmyxqe=*gC<`9lUDxf<xdJ?P1#(R73UiYbPzOqsS&LEs^NxB$Sv!;{YDBL)D
zuLt1#Yk$URRWQ}bH<yO`lQJ8;4kpXo7|$pj{-bFAB0SrYUsb=MD?&FVvKa^!(7$=6
z(J+h4)!f>gqqV}}S~-Z&h*Sseo&IJ=?EuX?hPSD#x0lHEjQ>?mqrx;@V{Xs@L0MBj
z0n>Ps)@YH)EQJsLV|L5b5yR&yC%&v`%_xJk4oNWHbs>C+bC>xIsYNF!;2kU;kJGrT
zk*h*JJ@LCA+ysy!*n$P>>LBZ*OnK~#Kys4B5Ykg<l~wuHglQ&$AEM<b`%8Lqo5eEr
zUzp=^qX27eJYzcJPrmwOd=LEnoJcB7wecoF&j-w#zC_pg<yn?DN5%~pbrS_i<IoAJ
zqio)MGg3hdR{YMBwpno>U=j(JkPj_Tmp2_+cc@9O%;)=m(rU+jMJMEzbnrt;{1{md
z3(%ar@)G>$P#wLg`b_z@INhOx4Ek0Hr^1XYiz-f;f9$P|J%x9%Ln==1(i-GyY;jIN
z*fJOiMfQQ#0KtaDs3~^^;e5c_56-C{KCISy`&+l=y~~e7;I;)b{^l@dk&Zp0Fe|wy
zFr4TYTzy8#{C-PBTqH%s8D5&o^;#70k?zQ80VQ=NB`(wocB?)q>JWVy-01PWIUh{!
zkbb+Ti}D}WM}LG^aR=cTbQq@5MG+X(?wfm6^(o5Q1XjVOFVgmebdtW%C(0|Svad5)
zx^35Lg>|n@4$M8@HQ{^oOq4hDsb|PH>o0)8n%0yBj>;6@<=(QUsqz(Rp#sQN(n5KK
zl}*#}R<vndUApE6$|q7U3`!d4Oro_NyfF@Hdwxp@?fQ4$cnYCjnniin%6+sS4r8Uu
z`L`QWCf+p5Ql!_>5)8#k-;g(KxnXtZ+Ls~IVv9<xwE-&iEhcr1Q{@6f^C91OROc_O
z4cN!{?Hx}3K=fTG_wXZxXyoFoFtcsICC}yB#0ZD$8ApwDRc~tzYddL#W3${;{lbAl
z@rOu;1f22O@f>S63nFBPRm#d@w}#18cQ-X|Tt}fy>@JH%u~HEs+<H%=ZBSqe;D}@e
z-z>~5f<U8YtBI&w5>=><(LPYGh0S7obw6fa;9|64V5{(f7co8@6BV$5GU|D=ZbkpC
zB|<^0>F19o2?r+vOelsmzB`j(zUuJny<QsV>KKjRaudzj_f_q=Deh&{S{BNjrU*!t
zfGQs7eK{wsF5m7+g~V?3MXIpeLCFh{-Y296R~4s-+x5)4AL)x-d?rPV*Zp~Qn7Pr%
z^n?TcsgM?_DKACjLpot0X5s#ex1)<~ram@Jy$*XR>p9bPEneDIb?H_`<(M~5#My+R
zkq(gnfKxBuMN08~d^cav$Z@haS03A*$Xz5}xA9=h&;4tBetYR2fG06_H?g`#K-Xx^
zkFScUW8X9=ec4BK+xTh@mCm2mM6Ly`ipRI#?r}|gOK4v4OHOu85WGDavz-FE{*w?0
zqu)$x@kiD1e^@)D4c6$+ra0Kfh)Uj!%oA#ov+tk|IfU^MA0r+%$$_vj@n5t&_z|VE
zc5Fkke(fWN0H~&*Tm#de<DwO?7F&xju2YFiQR%-<#B~(mP8%^=-*D3#bjbJTvHcW}
zJZU&>_+X4oQg(_u-CQ?E=6jn+*KZS*84^q+yT2pgyz9OlR49mg>b)Tnjps)U=52>B
zgFYd+Xc1)F-q%Jv6oovG5*C#W8Pk*2Iz}9JQJxDH{>=JyiO*yZ<Ob$2XAiFdj^!~l
zpm%2FC2)NFRRcirI8O91apH_WE_l;n!+3QN*DAur+6OOS##=C4f_05XJ)c#@t^RKI
zL5yO&R6?BylQp)n+)L?`&Lxn;1iw2()id>!b{=ok{i+;Ii3MCD>tu&-gYiLJg{9(n
z%i%@_vfsw^=z>QaVRKtNE1xpTTX2ptY@!9@D*U5kv>l#W00Ilm{X2y}BtgSilhPsH
zg%`$)z;H#oF}(B)gS_)o->}Et!LMi4p1pdmcZ@O{3c<we%B=AXby3(}2EZZfU5(Au
za7gMjdBJFoRMNbjjEQ>!FL^6^uo9910MBV^XX$=?rqe8ET&bhw@a$Mpa)GUMG31)M
zZ6-JK(?8U`|5Av7;$9Y(v5o=J=t-%FPA9Fsaq^zLEcJc+3W65nb<4Hf@4#XbL~|G?
z9b{Dk006acA_uT|vOA=7f}$h}D*-@s*+ZMcN)yr_AjXDXIY272ro7TSLFn9;vU1!%
zH(XhbGVXDBXB_Sn00eriIVgK1+_-7kX1HBzYab|C-?%wNQJcL%fCz1-0|j|0V?~H3
zh6r0InrjMH6g^0_B-24;9EG{{zmR~jdspS)6YHbT@;8G9=4ESXc@G>W1S~8HY5QOu
z)U;bpb>%t;8q$}aTqT8i42hVfRc|TzFX}1h``+*f!ecq37C4R;-OQ`+JILXpCw>0S
zU`qbz6}&pyq&+m`VQYJGg+L(7NH-<ES&6Kr31C2L)cv-1K`<D@lhQ3mxYxnNiIrj;
zeo|Na<_loW$bko60Hxd05x6VU5yuwwnv1&SQ1Xlk#gDo3t@B)lSbw)1My{qSB-ayW
z`TdI|#HszVNTOJL!<SdJ0c`NF6yv*<z2@Z&3$X5UKpq~#s%l|gdD9+!m2-rCx1#W!
z^!w9Dm)n+(Y2xOdteb^Jq0}|c8uHDVHcbrfYD9#e_}kbiW2f{w8;Y>5o?+|l;C=0;
z`Em+#`0utYtc^UIXmQ9SEsdF|+591<rW$z>^FF6)Fp*_ooq`|=NS|UtXRij+5hik7
zrwGRQjd$OtE=)FWN!|lp-M<2p+7EQB$RomF(#<(-{z^}y^yK2w1Wdjj%CU=d9H}Vi
z$O@8p)6;crh5Ogxp6WXF=^UwQGZ)yXfZS{TAb^FEhSqU~uZhQ|MxV9of~@lh*U1{?
z7Ql0-!p<{OK-HJXXsn^`f{CIbX4Q&oZg^N7)oTMH$-YaY<v*jO({&s6Og}TR%&7Hn
zgwa{!*U7-fzFF=Rg%xg=8a7-xTTz;_K253pZ81$|(>CHmNEXx%QX|O|d!L|#2=O%b
z@a|2`k~>7xE{Ci7X9mz6{~TkepJ#_?+D2Bb^>x^!aP@hgUaXt~BG~(>?rdO`&E6e4
z{3;VQ!3)8bxy*KRd3)jH{M7v}7*)!%PA9J|v<~UyaIB`!g!DQt7NI*RG*rnE8X&13
z7{5Upgu}N|P`IC>p<W+`s~z7Mp;#H6?V{U1zw6~K?<7L3i#@WkJW#YhgX8F3-@72a
z?MB#I;Hukr7q>&oD(KO}Hmlzh+39J$ncFbSNc3ich)FX<-P)DRY{ieylRm6|`h<a*
z^Gl3|$<LT8icrdkO|@>iKExJEh`c<ktkJqx%eT0;#*~#+c|PS8@LO^!RB38^p_Mej
z-RCx>uUGfPLD-{vB%r3|2SZ(r;q%20p&^C~-E43$Sw@hbKjRbMWqDzu$nYMM?A75P
zjZ`_+7=G<M+>65E^1PT$A@OsaQim3N+KSK13RG;0TGi-d?2aV&WlWKt>Tmk(?z-mC
zepzCw?Y~ZXsN~{u4ebhy9ktoG;eTvn=Wo846>R=kNt}k=A?oesW#<~Zk4uxl^-&h4
zi2NhDrmlDusTUA8uw(3posN5$l^m81*n~N4OX8v*s#|vu(liRRbw9L#|L7C8{wP|w
zue`Wq;rJsn`CZ*A$nrM{uknIP#5_F2W0Pb$r$ld<vZfNk|6O|JnBRg*sglj$sS{O%
zN@PsW96Bev@jJMjnG?)0a8e#Ky0nb9rJ1aIjRn9Rm0upY>f`NE{i@Q|I>F?EcwA1|
zToW*zCE-}S<p+u<;#zkYE5OULgwIsc@y=VG)R|i$EE?&V$>FT6WRLlVw*o-?WiwYG
z=&@5~<?g-nAX$dpGu!}2|D$-q$bT!V9ommg0Sj3DS^}S7UdF2ZFFNR-z#P{r^g<@D
z7W$4)b&h2WRr&*fPaP{lSgZav*Rp#5C*>67l<AKE{L%@NdU^(mB{;kF_AUeD)Cbh~
zir*6y6I*Qw>l=eDMhqBZ?EOaJXo-ZOykxe*`j&h;eBUqCzju`FC#XqorJRCY3`{~s
z*c#)*!vxyd-n*;A6Tc1vnwIaYtM3tc+N9Pj>i75}m_29@R|iYlBXRNY+p7b7oPo>Z
zI_9&c(^n%*>!ThN)yijJJZt#4?%|HMrFK$fi=RlXEneT>x5n&x7bv`?8{_>xYS88E
zw|V%`1!2#y#JPsgnn!E3m>Pe*);*OItWUHl?DKG(joF7Ir2{9=n6P8hVWlE7lZ6}E
z<(u!rn@Bj|tbh^}uo*iNHuq+6(F_ZX956SI`w0ns%#eS0h5h)nrD2tFc!R5My3#JW
zU#%;>A7-?)gwye8zGr`6RFsdG!xk#ToCNplb`$NxEa~qEpRJSau!o}<&P`IAFkVBK
zx%s;+xpp%^*2YTyw4K?H=GJA#tpII{{B6+cRzq^rPt%SaLMeF4?W|qGw2xgxkn*z}
zHZv>i0iCNJ=7c1rD*Fs)qk@UX!Yji0-`+CXp%cga4_+?ysA5<K$;xS{ZdC(21$k=U
zRi_v(z2qd3T0)sB7;Qnxn&0TKH(A6x4+R`<xBaxz{UsdKKk8`iALNO8J6}5u*lf#8
zvh&$qutD!oy7uh4kKv72nm2zrlX9>fU)If4ukOIg*;xq0Tk76ii(wy=0qmNW)9Ad8
z(IAw1?(gHmD_LN-wsG8<3>~rDc?-|)rCy{oWGOh_LrNStxNAzeow9kF85-fgVA%HU
z$+x?xsFRoIE4T?|Zd<ir*GvWtW1O6iKK|776XxP7?C9TF$<!6Qmavi7Nwb|)4x7`4
zC+9+M(oI7o|7KRPM(e8|5qSS3zYun@SEv0TNHThlG!)9jT$-O?KdfnFpuVjtz_i@$
z&{t*+bbwhHiKu&;jYX+-0!2_K?e^%Cv743u@~-JY6VXbO!~R}Snz%w#z3Rl_IAlY}
z(J%RU1d|#i;dt$O^Y(yZLeNM_PD$J5Y$Zq_Zxj!uamd(+C_MEtdtvVF#Ey%EeM5s*
z=+qSubb}L$l+6&`JINbi1hQ3y`8?d6o(k0diPq>90W~+^!r6t8bC=g(UUd$E6Is5>
zvP52fZPx<{k2CTZDgVfQj$FOW8GtP@TPlqR38kH2?P{hKdGD&pbQ)-xu=N~*ZwRGz
zvMnV}I2nw;=y_2IeFyRTR%jZ>3>A2bFt-mrdNa%qxw~-5dXx|LcOszsX|O#_XqZ9c
zZ;}8qQR}_j%d^aFDK#mrP4J9~h^FF42otN%#5g7uP;x7W=VN?X-z^44v42@$-%Fh)
zENDyYJSoIy>gn#3tyJF3tP>x^Y`XJP6w-5aI*^j}pY5{4ZgfDl^j~VR!5bpUeR!p(
zvybHFvn@MsFnL(|I23AR7?du0etC@b7Si-dvwEno$BKYvN{xP5RHCdjHk$uEdIFr7
zqu&u@fD?pg3_kH{4s}=*FLjtwpQBLhb_Y7uY%(RL>NBky)o4h-wkF9sFIB{h8uCG1
z|AO{yRr~TvkX6W|fss{_9&WV-j1&<4t0JnPhd>HoA-hSAu?~d|Y;8-z>w?p8F|)2c
zKo!*cNL2LjHJydU+r7xYd9qb%TdzhA<?6K%O0~Go92;c*^H@l2)9f`9e4VMjbvxq1
z?USF<+-|Y*6+VJG+6KcNde1z3js9hDf2K?I*L+3P#o89e#F(|#t`Qx0DCJwG{#_wu
zot-H0OBN-WWDXB+Vck#@W0|kBDj1*YJ2a_QcjXi{WUYRWkPQIXfhOLcB2;HfQ^nWV
zH>nLUV`gA&!D5XWimkoU&+3u9H;Id`wX=Yxd{0iNXpp(vCc39;S65NJ++$&9!8kqC
zZE7s3I*-RQ0D|lQRJBR5`U=Nv6Ry<>O=`6p+~~4Fh9dCkO?m;DQ@;q_r7*~4%-K8-
zYkNc*^q%Nf{NSu*WGwR;wq|UNcs3nlFmk*3`SU6sEy89;nW8R1^XEiD2YECR=Dw!<
zngayYn>OY)X5U6~klza2UBdBeNeeT1sSxk;d@qaWsMjyJ0}EFMmAYJ*LJ_vWh_Cc_
zBvN_Y4}|jU3e0n5j|06_{P1H}SG#oIZxfb3^V^l=wVq4xMyOFi`{)Mvhi?RUTk$KN
zG0tY#z;M-cxe#C4kJ-f;?(Ysa+9vTFEW$xD&Gp?4dmoe<n%v0|B0;fITal~mG`ijQ
z;Vxy+`I^}E`qf?EJ|+P~AKGrb>G2<x>~Fa27xzkyV~&n&m9`WOO~f9F);0}xCRe_@
ztZ(CZ`qxy(A0LvtHHJyPA^&tEPIS@uxr_K`Z8;7%v-5+TBsuX>>&NW04r86QNt21v
zcbaP2JoqLO5D_C%wu)u=IkQNam{AKRhV=yV&&kB1<~MMQw_)kuMbd_U|9+wo-HZ^L
zlWGMw43WAcn;k&t#B(f$Dd2in(R&TUJ&fLbEsnIaY>@z{RZm)9ZkbfqhXHPvw+acX
z+Q-T$_7cE8vn5st&7T5kL>r%lgS4{!XvK5(uJ`!FO>9FQGC1EGp$eikyo-Inm3u&&
z``H6O-(Y)Qk8FE&|1iBz0~NO*lwltQno&Ehe*x_Khkp{vh${IgPTb9mHC`IsxX|B4
z7AZ!1sR&Es>@Bpi+8;a!^UXchTr{G6leDn?vAAcX6u15eb_&d(a1-4tT4bL2hHbam
zdL~+Hz{vKOm=lpHYta42t%*ak=HP8@X+3jEO2<Gte#5eH5p1JLhU}kSb4Q=F3-Bz&
zQ2NduVdaQ0#3nc#iaE-f2E%Q6dKK1S!T(O1UMY!!8_J2`u0saCj~qOJf4Qa-rwD^)
zX2%dC5G6Ur;PbIcDM8-n<I%vzN}!o_C*YT)L=zmkiqGK&DRTi#n9IN?8}f*27G-g3
z=E%>eZ88JDIC*Uunf0!no$Hh1WcIdLMKZT0Z#~l+*PumkGsiC<Oq04yY=a#_9q{h@
zvaYYQQAic)LMa1A5hi#Arr!j9?dA4%=(R$VI^($SZSq2CS@>>O1Ddha-?pxS8VQd=
z1ws=cdrootkWJ^D9%r}!0%Qj&Qt+|J-|Uv&y<zVl=gBePbD2CsFVZ$w$*KMJg^1nE
z$y;9Drw$tuU9cAv5s`|s)r%Mb=)b&I6L_Xf@OG2?sz5WNoMn+(0jf3mg(i<ny-$aR
za4_4PJD#_+uEdzVMEna+riq8#>1KAdf#{El|MY9cKketY{Dq~uZX87^_88e>fXBhz
zbjNEBM7wVDzQ>7y+d-$9YCZ-7RcL7Icn(aG3k;kUaL#Ih;(GxcJ}6W%X3S?luo+ov
z!;J?<`yYyM$QCSopvGK$zrI0YZwx=&C%4jAt~aJRRl!jPj(IH)YAxn<%3QY)SAU78
zS?!8qS-!h0vgIs!!WFBWVJ=tOpP!Z+Hl&ieo3fb?97VGeQ?MlQgQUxvn<mhLLJ6e7
z=5r9m=)g#M6bZNd`PEiA4=dy&j6r;K;<g>AriaA{TRi7PzBnbYmwlCVbbEOr9{cCq
zt6spEPhDB5T#=?ly7#vZ%FdR&3quASWMjWl-d=ecs7Z?i1)qj8E5v3ce_GK-AK?fP
zpK3(wg&ayQBEubKrD)=q&J1c=q2+)J-GHx&w-Job=bF40+dIg6tKbfPuO~q(7WW+0
zn2Ckopt>NF7`8nZnurb$pWSu=*v{Ci%S_Z}L0q}8L0RcTo0$di#Bj2Oz9k>1(#1t_
z3n)<=ObJY6I<j`GgzDrYqv&vnB;%wEPqVo-W0fhOwE4aJPws$0priZ=x50A8YdqEr
zULmw!$BSV()~ye>HJZwBQdSbdw!Ivp-=l~*p21J2j58CFc@icUon(l({mE7NJ(K0#
z9Jh~)Wf-6cHTIJo!@0N_l)xQkRN&5-&a-oUvq@S+jrPJq4;FJ;e;B~}Y~(eNka<b#
zO&+$$h+)xa%;f`ifp`=pLV@{$t@hqwRMMlHb6nM=id|9O7g|_~u<nO8>umAj%_d0X
z3}6E|IPi55SpdJ4`&y7|AHf%iOMt<fnExCF{A1Kl^~Cu=P6h<IUf~@lbo{Ebwt-LT
z&-v-+-o2`zmgH9j4~a`xnhf?air_at?$5-55t)0Dc35biY{$iVawJ8>VrQ{rvn*ou
z2%8uTtOCUGxz456NV}ja8SBdQp7B0`DeKd0KrI#|s3wYAMt6vJ;=slm{34TCW}Q7;
zoLSan=I#$1XE<OnpetjlACOv*2Rii^T<bIOq*>S&Y^eib-XeQ9R1K7)q&J#p`T@A=
zr+LJ5E<4`O?MrwY)1D$->M)_+8z@<~h~)(74|<w929%_29lX5`DoidT61DKt<f)RN
z7%*eb1FMUBM<={(7y>?nL&tR{%GS7b>$gUeFLK07`I-s(Nvs>Be(ik|(ayw-jJXZk
zcml}?rgE*Ad;}#9h08jYRr8u7$mER3H<<A&h1y^GD90nmtw^6S>9;Bk8!Q-@Ki8k?
zl{GQa*$iQYdq1K}O5qJ0N&S%@rL4--);4P%tz%S+Z&FmeDN0>Kt2i%#LkM4YBET1m
zTZOpp@Qme|x!Wlvs*IC2H5ywW@g=Czr<IBfW;D+cL3*R@?BGL3;?fo7<w|7yMOUK+
zd4)qaNF$Jukg_d`u+A7G8X-=}>O*)wj_kKn-|{u7F7y~n#kPWxTO`+o>Vk6i??c{7
z6-hb0%cR6a5~iZ;Uk*^J35KRDdUj#pi@($Z#h{382dsd@UB7KIC(<Aw+;xDs+=z&r
zGf}?DagBE5TFIJcS;*^T`)MDjm873;=XeR6*p&>h>D~F5&2RcEiHstu!5kCA>nmvO
z{&i{;>c>{LZcklR^du$4Yv}Bx)l3ea;SYgHeE|e%{}`}xUh2@&Aug5l`M9UbobBa4
zv%X>`j!zh$mc#-=-7W&>8R}3hFVNox%B8QH+gjUfS;a?WuVVLqITARs4vg<V>OTS(
zxGT>rf@Nh(2S*I++0nb{$;K2Ob}zKl7s~R#HykOmFd)H7&yS8z{T3xd(~Ikw<=75y
zUZ$jpC+~;ZnuE&p++^V;Qi(ItyA^e^wxMn`d(dGUp&z*jiqKTI!^7*}z$??%N1I=e
zMJ_h+$o2rS@o%N2e){Hl3b^j!e3Z+RBANiZRP!kIkB(O=`>j={e)0o3vd?5SotdWD
zAw>I(bKi?;SA)=+@T&sU@q1CFDy)CO+Q@bmJF8SquGoZ~qExT~RY6NK_uSO!x7a3+
z^kxi3!zo7v4`NGCnL`hq`+4fAMgWqe9&>F=*M>yQE;~y8ZkeX@IU35s<#0^){!xkd
z_s8jpZ6;QcD~K@f4-NgtB$y+znjKOvM9?d~GGgMzgBmprQ<v`66;?RDpo4ij`gwZ!
zKB{-&ys3^{B^9C$TT4bYLz!wT!siieb}q`$pPX6XO)#Putj6o4#C+Cs1F$Z_Ws^|W
z3*qst1@H8~79BN112^z{S?@1zdKlr%7Z#rqW_Sb!s4&Wx&xxw;Q&O5%Z)I{~wPimg
zy$)I(DGvy?`mMzPVEk|kKJR3f(R;ggZaW9?CVe<mlmgjCM730k1Eh$;uZ})Q24N?B
zmdy*9P^dHICTu9RR#kUh9v0~g4;*OcU=F$LYq2tUb+Z0nQIwxO!q`6YbXX&A6lJ2w
z(Bmqc$pt;d*R@})bHfI_KNWfb#I2<QZzQEgewmeN&RnUFAEFFu{b+}Q>w<lQ2o1`p
zump@}S1~_I5Z6~WJKI=R1A-+RGlrlY+EwvD>4IP7Etu-QKQPKMZr9bhYZ}L=(9Si#
z&pE-Khcx1ieLNB&j8kWR0U!qUAXt6pnci-IY%d$7vY5E303mP_NK6Wp<%m5W1Z3@J
zNeytKUH}$Ifk1uX8~y#q;p4@OmPP!r1`go3lFs05v+Q?$uEB0;!v?Ew@fKB;^lb6U
zz$M2BNMNLAl6&R=_ji;+8y+1g>&ax6hb1Ni9mmY-z){b$*M={aI>S#6M5-<Y^K6=p
z6uGKl0+sVcThNEkd7&GzTFs_Kf+L&PZZiXRPH#Ys5V`&S`92<^fLsD@Xyvei0?QfC
zT05%ne2ww2#JLVIOaPEF72Q*H5b(!{d9=@N-aj3M`^$(NLOD4ux`4!Hq<!!0q7`!^
zr{wD`FAY1RO)-**Bk?Ob$&lqnpB{%ryfw8ilTvm_BkaXe1PP`_et>-|`uAsm>CzM1
z+<GsKBQy8Y9fm<H7DIkxqIExp#oYv)=I^T=c0^9CViZaugrd}$Cu8UNOhKlVTAeW8
z8EsL9eYwwK`ZAO*Na)ft)@hG(zbF_e>S_~~C=q>6p29e<Bm>Nu2ukl%V8hOSOPLj`
znbPVAP2XK?r4adGzz%2aKMEBa257VTUB)*wgmYGnuoLrJ#BRf0^pV9L-j-&fmDeH{
zOxQ!!J%X(FLCER&JeDk84_*hg^^`Wzhg{a%wof6LnMWswU#6nlfY#*xc)^!0uq*wY
zx1)qqLlF03Sv$+fHVn*$)hWx$O5a3KqcFR>_lC->biM978y@%vfliTCZed=krWb%e
zL8H)Fxs&{vPGUn<_e|xT;}D1JmcmczVUu88*NDuct7962Z`VJGYU_i46Uc@xtYg@o
zf4KXE2Lp=-4KRLop&4fsSi~_nu4lco&*4&WXmB8YZO$*vm?VZ7XMBL1OWWuc&_s#a
zIsF)FldvvCdi`??&RnzUgsE6Fvz_DDyP*@#J2#5>*)ux7ZX;L=k<<jtjLv`PrRROT
zg_f4@8pVD3h36*}Z+Sl<+oK}eR5qFREDz27hyW3O(;3+_v8<VU?FyW?kz3loFrb5@
z4<aM(J~HG(M)QnwFBLcdx$asEO=1_t4TW;|V-+DnGT0zZ`wo(8IyI+{e8$Dj1Nn7d
zUH2F}mI?1uxumJ52rN!}0yRbLK<3-{I0~#A$|J$c0S=ROOcf=1b8<vn&2Fh?cNFpY
zs&cm=D8w<#OaV;?y8?ZkeLlDOps=F^QMP7|Mi`2X*dgGAA0N+#v=K&kSjZ1-Nx&oR
z)o|Z>Ny3!PQCkgl0xwcg3i_KS#g?#52s939kn_L3b1C9=#<Ot0`8{8&B4|R6G@2$E
z`2%nX*z1TOXnFBk1uA4F+2;XP2h*B>kY&jwpqoHYzA}I;;jacw(vJr%z46#N?1%t2
z$^CGVz59m=soyPL-lE4xwzg$MUr>mwTi#>>)uW{Gh=IqJaYFRz*9(*;hA|480|Ld9
zpP+K$?lhoV9N>w7kjA280SOKzjTNd=yMpCfnsC$Z+=1{<tbySdqwYd+WTald>zXy^
z?QC0jq#*C2Z$h9?7o~`zd%-*<+@3_Zu4meJvn-3bK1yNuTqOl^1*aR~1Q23FwOL3S
z2m_mDlmy=`={ky)tZyjwY+%<ituJ&-j!XO2#=sm?6B~N0ZbyaZ4sJyw%3kOdTyTVc
z<2NJ>Db+Vd59+fl8Qr~+%!C!4^@HDtJy0e^>psl}d$^yx{Ti&iPx3ZENQVZGEII&1
z=)W~`UQtbL`5zB8B27_xl_DtRB279(gb*X42nYnYAcz4B5JE?#gE!QG^w6YBZ=oYi
zdQGUIh;%}iqJlbu|NsBqnTPq$%)?pxJneNJ&RV~{zvZ)SlK9(FbuIpyC4_2ZV86zO
zgFchlRPj6GGYyObMm#iWiULhTvZ$etou?dZ78zKzQxxJH0r~+5#A8pQ%UzIa0l!2D
zH_tvc9j)8j%vC)ljnDoKwy${KmglfG&d+bvSXnC16DcKL!~VvyvK=|#raMsYso19;
zp3Bw9wK4;FKHh`hRm@1St!(F^7*Qf4Y~&DE_EJgYlY+z-Rku<iqGdDQfkD_4^*UL^
zL4Q%%Jke6uP9*n_)ge?B$2_o{Zoda6uAz5m(t7-8LHBf=)DzU1R{#l42s8(*sCN?+
zx?QG=(+wo7cT1;SWjACPj_Ucn1)?_8Cn)?zA()XpXs+DR$ababXv5oQDqws7*L$-}
zp6M*B=+&onu?XNm%oD;*$T`{AuCNvPS0J7Sy7pqONryn4EWys%Ls0qQRJSsWaykS%
zq8eX!*kpSGr^*)k1yfA#&e$h7Dvol9o|(WKU3MuSRMWi$e!;Xe$pEiYz%33)PEAql
z*>kBvQJwv0U|JXiC}3|Btb9Ixp$c~kJr3V}I_t`FgRxrRvK7b#wH4;TK}96sZ#IO_
zQjJQWZYk2th>aG?n~s{+w=^^aN<(Z@-_8u(RR;(MZ4L8j+H@=-atP~H%!r(v&vqG-
zC^5^BBZWO504i$o{3DavhlPcS$F?GA&c1PS{)rE*C|2$HJMlsd;pW0IO1a)qpC-*%
z-W!tMsn<A3?b=39nKa5!2rAxgW*5B~C{_`%moq6<QRZrg_k9yxb<azdh3SjYj01<j
z6^acUK_fi>SsH~;u728%Rg3lG{ktwq#_FhgH(-Ik173a3xsiQWdi^0apUt-#B46za
z8qmPDj_B^T*D-Sun&w4N(N1>~Iy{k0-z=skMxI1zg?BfOz}0Gc03}xukG1YATYl-A
z`WwK``S*Cn!UC$*gg)LTey8_r`BwuW9!3*MPw6E}GC2a>nUsFkZq}(NRjgOVtlw-3
zrHvQ(C=)`A?Tj1pAHAIMK<VeKSN?iU%xp_TWp-{dW3WDF@AcSEdS}LQ+LC1%vx)Z5
zNGLK$cO~bEK23yZtB1#<tHb(Ba}Mv#-@;8pyLgychu^1%!6+VKLqbMCthTChtScIh
zpX6`8?-@1F8EO0B?1cfW3m?20EreUp&y3CK_Hc)VB_Sqo-%Gi}cl`Ne#Erf{vkVC3
z&-bJ{WZH~Cd<tSV_f}Modz@hy3bGA3lgs%$y$7=<i1{gp*|Lxv8faC(>kxy%0%w!w
zNZG!L3RkWt`)<Wtl<&z{Flf79^swaE0I25t$!wRj_XMy0;*diVs#%2`fvt@WCq#W#
zMqzZni^XjbIykBvZ>B&6;-~M}GcXo6M=?bxos?rPzW1U}#d0o3$*aDjSq<lg2amf*
z<wv?$USX{sCM#`y&~0P?ymm*+{Tj@1r4ga}KFg0HkYA0?L^SGfI&D)do5}D6BfXA5
z+9%c5%SnnuZo7%~U$8YtS9~{>R;Mz`vHpy6)_MWl3%Yg?PMgk@1Pw8o7&-|U`m5=O
z>l6uvTkOgX>BwhDzZ_c`S34%}y?O8}X<hZNCHT07?%~steB5<S$`3URz#+fPLY!BL
zoB`!8pDyx?w&Y*B@}D`_f2_uJj26=mr6mW|w^q%@1ld;OXi;PKF?s+<k>e>TKiy(c
z$3Cb}A_ubp6p90p#cy|Y3Bm3<)6Au~N^zOT8xdUYrF&5hxuHk)THmd$*E2>hYoT1-
ztC~v#5El&%56%Vy7<!sYu~%-J(Mp~^H8-5J3cl9j;AL<%74ArdI|0@^0ZQ@_GG_De
zew2qOTO7BuP1O!5GzOlK)LAsjtBlrd<DnesnV+|}Sg7k4QU3|eXc*-WsHlV7{X<e<
zl5aq_9GEP4|LFM-Qgfj3HQW()j%RYN^}$HbczJpGlNcJKwb_RDRqXax!*yEi*ib_E
z9M<bI$1`6F&kb9h`}%K-`IB3@fZ_Z)X>`Jke|(P&sL2jz7We5>$x3K!;LFf2dmg!M
zYG!8HX&P_>_WAZRvrcgI+R&jy5ut!yz_t-h6sqIZLUrwPwbvfJHS2ekuWN_CzHpi2
zz_H3o9924;8`M#J-TrAGN9g$<bZ32-GU)%%dfput`ne3(5JHc`KQ2&HE3$cvKPVmB
z!kX7jR?9bngMB3NiXNN4OK73uz`T0;#hXnrCC_-+@1B0RpPm;5uZ%J2U9jO-92Ld!
z*l`pW9Lh(T9*mw7uUGX;S;&M6N<op&dE5bKm{+mOY8vo(#Lhayp^{PKYEaB8+!zf!
zmf3PRIc9H>?kWCU_@`Clj9q_ENAsEhi%N{2QU51|$b@9REpl~^W8ew%Vn)as?Zabg
zDx&(=GO`$ztSND?PFtbP-hjk&O@MkZ4*mJ~vJNe?2FUn*rAsm-3+gCYSbwKJQSSA9
z%tnq&P7Fk)L49(}P&=?ay))@ES;3nUGHJ`aF4mM{U&-&yc+qD3;7Sn)*oA6S*j(6G
zfA$3adHo+t%fXpsdDa>a*K)5cOyM%Im#8jh?sFS(gUOC6_WBY3w*oU>6EehhG|zG`
zG|c<X_ddW&J%Byr(|N%iD9vP9%kn}Q{{UW4Lkt&P(!3^AFN`Ln0>cYyCvP-{FE8pM
zS9FntWj3!M%0REUB}(5xJJpqZ*y8P~b2+14-l0<R-Z+@eT3)MekZDFWKw^UE6HcNN
zSYYkIr)0x(`1h*+2Jpsh;9^iQ+5n@ru@l-f=RyBSv`1B6-1Eo*;rHBm&ok!0LP3Or
zmV+Z`KB&-QnA&T+Yc)%O`OX_V?l-HpRrBlR;L>}f>`I?!7*kV3+WpP}c5XcGv~L>|
z9Kd4H;i}e)^%Q3Ntm;0<<7+(oCA~rbO}qf00ZjBRA1+N^YEqcKd&4H!_Ndk^yM0ed
z$!_;TP`((Q&FS>cFHdisLRvNrCDR9<fPj9<BC$g-cP8I5c&jzNxozb08i<^$pzy7R
zL<2gFBC7dG{jZQ+YM2Fmegh<Fo|Mv?W)*Hsfpb?HL3L!)jC+)1-1s>n99??a5<y@8
zrF`wR$xl|Q>@Ov<j?U^Or@aYl3d1gTy#_-}=UO7yK_Q?w8op)}VOgoEHf{fHa
z)Q^NF%brgD4%0!qVN<zRRWzxYTgNiNl;N}{h2iC;-xFBBgGzUY>vHK`8$yd!diGO3
zX|7PU-8Md05xF3Y<ug}-J*NM-6uZvClL=QXhbZ>0m>w^<@WSTjh1svn*@?@2GO>Kd
z$+L0vL9(ricPdw{RAE{|0L_!I%V%bjy`nUVA_a0g=8vS+II$;R9h8pfivqK?U-R-^
zU0r1c?G8Imy-Fy(S;d~gSU2Q`Z4^$_WMO{J5+E&p@2`6EEhiZXJ*Pdlw=VjH1Cx&p
zTI-Vr?#Gxsdy*|ier0QchOYKDJG$B$!mY~&ue|GuIx=snG%|2CYJ@+Ql}9QsTRD-=
zRCHZ)bS<t?m$MH(0WHd0+LZR6<W!YaYB;~AO$qZf^jr@)x`anAtsIxX!;B?_)gB->
ziO+)XZF%9>X)?+wAl2=-A_Gli%Z9N1Nr^>oglk<1Tt1laQSVAka**ZqI3z#_NwxOe
zW(D`kWX4dFhic24tDvx))LUSYmylnJyb%}hmS4hS1?S1pE1xeKQIWQec=4KhxDB3D
zl>B2^%w`T1wL#@-Y4ekHiUx3@m7FMUL&5L}D3ATCs~JND>-3JYj@~RTDwTvGD}O&e
zJx4^E_DIG%&ca|eme~&Oq0+nJ**Ah#M3uI8tr&%*wI${OG6)NSd5hcJ&?46|!AAVc
zgQ1-9N*&OqZyvrS_-N!@HT`=kgOQug6fbyN^V+z<s5fJ6obYH}VSv<~i!&Qt?<%97
zCe8IRvRRRhxnH0!tHp+Kbxk8?ah=Rjkh~{UNz>SSluhq(5l#Nb#QSgSoaUs3H`Avm
zFzv!1>UspKcc7f(XK*bdo5}rj)TsIJGQ<<{tg;@<HOt~@0_5o<>zD<@fS{<8LedsX
z;p-2prUo<J!TR!V6EvJ8iMIipzWo~-(H_?gfH(X<ZGUTlpgkFGvq4|qj9D7->ba_E
z^JEhO2z+^lsc?L&P)u8eWKIw%YeAcD?fB|vt=nC6iG)SJ(>D7#3HAipnV9fWMqM`D
z1af<SifrCqi(j4%YZ(Jy4Hpm7uSpcu=yYcS4J;j6q`?xrZI|i$O;{xyq*iqY_6RIY
zCV>p>A#vSDQIA2|(%fV9e(S#-|IjyVNLpT;MegQbmXgeW18kxnv!}ckql}AD+_qA8
zGs{fOx)8TsJA~ho7uP&uDLx+$)*O)?dnHGa?47G#@s9M7+Yu>zRTV}+2K>fQnk}U(
z5U0N+YwE!$Ygpab;y<`061~Tp$R$V7LB$v+JC~2Zmpv)w{_##Qo{cmx7eu0S0rODk
z#~Rkfb3Qd6)4ulH+phN3O5+)ZEztaJP3|FWg2X<9Ut&(YDlMPWm#vdBuK$?#{be4Q
zr=5}A&GP(ozb$HtJasFtTH3G`h+Y7y$)^3^v^c?M0W-X1wD>gVH+V89C`Q1!*0YzM
z5_hi3B2J^JlR0lC%?nG0Gz;UttjDYI8)7~WEH2M1IG)n8%!OtcLT?A!d#c7WgU0OC
z{9Fdh9pgw{Xpxhq4vn*7`Y4StV&3GAU>j8Fw;S-(J9wEDQ8f)^Kvzrs#O?`qG;Fc2
z6ssi|b}co!i|6^}ycdC2=z7${GyJpXZw4Xr=iqb$-Jq2Dl+He1slh14eo>&tgN2Ak
zH4QECQ`{+;%j(wBAEm$FRyt`_n8C{LT)LlWIKL9H60$XM5pjyalS<T!1E`%`3G%hI
zN8yt<+{_O*&EnO(Fz-C*`{lk)kcL}bJ`A_V@A%d~a|C>gjBZeIqot)4FKT>WSAE%~
zTrkqj!D@$AlT-A7m2x}H&a7_vqb70^FPGj!<@fSs5oK(I<i1?rBUmQY+HKXhdTXKn
zxZOw;{~fC0lGvwr4!NuY+a8eQR5QKVfrSM$miWf-&kLgXM3Hb%qK6XoOT@V`?dBX^
z?zyJkx@4D|b!u-u0PVJk)<$_tC~M<P_b<inSR+2~ar>zB%>@PFPzh-@Qn3=5LYTe9
z9{S&<bbMW~Kb->~b8~8L;OsNgeafJc*1bl)ECb|QLjnG_BPs@Ibx~;{X899(9ubUJ
zlmu2(e_p3Z&2?m%B_CPlzfpc!6*(w<@0&T?%X<kx3*>7#f}g;8A6HFeO#QXTh1&Qh
znWDnat0T)VC*Q?9+$2!ogtb&tO~=tW&F7WhyZlo*6G=We&;Dzg9lzGl(-M>$=3ZSj
z&_v1hXy2NGs^&n)e@Y=$WOumR9nK4bH=Nx1-i!U%CoSGEm+;vSpd7MmwV>tfNb^QV
z1<{SL!Ug?UR}wuGo`$3=>JlxRpcM;bwrw~3NMEK?%KY$oduew<C|UC|7-ZpXKu+?T
z9P)FTw*=Nd4a%3HsTb5pj`z}~x2qVUJ1%d+C<EcRiU_3V4IYBzDtVuUig%$j$5ek=
zLp4>jigfR|$P>y4a~|wBwbE2xuqf4`<9uXG61m;jqx5`3vAh#QjVd6Wl29}QWOv6~
z-qL~!*gTTS!xzPik3`PAdwhyQEfT<#PF3VsgiU&%8U44Q`YZ`ugAJ8CUm8Zx!(PWm
z_z=~z#a{Dsg$t`KIY~F^iUh5qa9rN|3$+};g!<CK@!i>B?p%jJG!LF4+4`c<Xp!15
z^@m|rc?m?(!m(*X?Ym_i2ozUz{_&Y!?CJH#%ZE}_D!&8!)Zw%m8d@VV>XulOg2c6!
z#`<W_kI8zkN!@&UheOb)HfF{+CD&xk@#Qc{0cgKelC)?dEAM%t<<hIdqujIafDO4;
zW%67%`;l(7$nJ+yYFNi2n_Hd+AZ&XsWXq(^nVWdC%2lL!UcVT086;tlB_XrQlj#M3
z?diXgajt$FcolS+Lg``>{tKkgubbDe!r{e)GW@R|#udrTjP0oRL83|9=fA;Z9j627
zQ{!hG)Q9Y-DZg1OZtD1|K6bd^r5FG?UlxFi$ZTM!4U&sw#x`G!5djn|&D*ok5_>hZ
zwF(?7?3msWW*tpYtzA9#x_NQ9-EStEMxfo0spd2>s=yFPTTQe7+<JX|J$4*BoWS4E
z3{In3Lr)W@ftdX_YoI<QUTw*Lk4gH4QTio3*2tfqr_+m(1||zLA(XUN<svT$tj!v;
zX!e38u!P#VUr{swN_v)8KHj*#wIvwNncZJ!v+nS6_f78rN7h5sR3_F+m-Gr7K+|oC
zhq~DN?B*rx%DgM)aFZ^|W5_Jz4fFCa0E+ngoTd?-+_f&wzHWM0IX;UnoUOqDPZhxX
z4D)n1>_dGRp(AN`ul$KOPRn&&;<JWC)Pk}7iV~0aa?`0n7Xt-fcv7r@5(q78`H}4G
zFf9rTLJHs`C7T|F+*CeR(FvH9e*jl0wuM%@SdgK|4b2%6Ru5GS-hD$=4pMX`1pA6^
z&4tltWHaCnp_)8j!bF_m_lYicSeF2QRFYmkHc!!kzjGM3vQ5l4J7-T^9!K-G%`EnJ
z^_4`Aw)%)pQ{ts%aSUgKG2jhHm(xGcNEi8Vf?XhEnE6x4-Ka~k3_7xzxdqG?c*%AA
zA)m*``mjug_(Ku>dDhwJf$o<(VeP<a!KMddRJenamMQOjr1R<iPQy|=yjvhIz2TBB
zb<{I8@I$W6&HoO(^Zx;Qu3p$Gc^9RcziiTfzJ>j(cZ%G7E>F3aE3kiS#{6l@{91pp
S3f|QI`gi^_CrIOOvwsJY%8-Kq

literal 0
HcmV?d00001

diff --git a/tools/python/images/clip_image012.jpg b/tools/python/images/clip_image012.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d6d1169e8ad6fa4bb7e21e764640087b4f0d3ef7
GIT binary patch
literal 17452
zcmeIZ1yr0%w<g@ULvYu|f_or9kYEi#gG+FC_h5klfd(221b0G!pp7NCTX1Nc#@!_l
z81kKazH{#V?#!L}|21pPnl;_4y54tn$$s{(+V#A(>wTDgSOGjylmp2D5D*XmK966(
z!#v<6022)j0}T}u104ek3lkfU{0SZ|E*=dDDFHbn9WxV<4#>dDDZ<0bCdAGF<dfkO
z5*3$}lw{_SQ<8n5C?X*#{<{$bEG#TM96ahLPpHM80-uWiFMl4|0Ql$tcf<fB1O@;i
zJ^~Ux!b2B;763p%d6f3I!2k9^Ktw`DK}AEyz{GkiQ1=9Yh=7EIh>V1Sf{grFI`Hv-
z05U!b0X?r2Dxtb58iOklUr0hOI#9Z*omgY+n33PiEffQjgp`b&f{B@h^(mWxpwKg6
zk>@XEWaZ=)K#H$5wX}6~_4LgxEUm0<Z0+1VJiWYqeEq`Sg-5)PjEaUNCMBn&eoRZx
z%P%M_DlRGgTwPOJR}XDyZ0hLj>h9_7>mL}On4FrPnVo~JuEEzgHn+BSc27>v&Mz*n
zu5W(+#)SYt`UBQKk^Ki;_>Z^{k&%&*(SGAXK=gi0NchMo^t`A9QtD`?u7nJHA?QTX
z3At767(jlFV`4M6F-#IhfmNoH-_ZVn?0*he=>I2V{{;3IE*JnC3E|OrNcaE=z@;5)
zUNGk9lmpJXoo2nfS}}6;kIyq)_W}3?;Yf%50PBz=Wl_;+bq#GRr{n^`mVU;vGiI0G
z3jXo@2f#ZE%Jl}T@cPAJH>tS9Z$bzc#5pu;l-CJ`XY{E+Y*Z;E@o;&GD1lG51=04~
z+6w*HjqCW6f%1g}0i){DY?yuhA39Qbr7pD^gXagnVr1l=5p@A5Ma?&kCYaOj37RrB
zFt}EA84SWf_;~3{He!ic`4X{CLeOHyF&SruWghKURH2lIXFsg#{ibZiKo)|L91O{f
z)@NcpI3YCciTi=#rF`|Q^FCH<cr_!{lBw!SoBWoG#`sBwhayK4<3)+mEv}`qTLvF5
zP&f|Nj^tTMk+Pe36Bew>gF0T?3wCFCIn<PQZIYO{Zp4Q-qU~tdQEZe)FXSXxG=`ES
zb>N9%uVhV1@Q7R--{%%B+j?ELT4y?r!!0IcRIy2q67FQSvexQ2#Dp)g8AvJ1V+*|Y
zrJmBUs-V*y;|5{YE&<2kvHAvmNe~?y5z%0j;9f%utAhF&4rf7@DrjmzKf!Q3?s)l+
zh;18usXKAuZ!feu0uw4o_TMA-Z?XlWx?lOA&6G1PQKsy!R&ZHBZQrcHyt@Ld==;d8
zc6PKaX2!);_m{2XJ8C$aM-#%6Bc$P_RNz2mr5BC%*6w27iR}bE17&9$ACoyl8&+0m
z4S(=csG^A)qVmf#a#1rzrHx6aVa1?`U98gxmpZpRD>0OJ$0n_kN1h>2lp`a-ZJ>*H
zZ{Grc+ubZ(`-JnNOO<MD<S_b#0lKVBhm^JGKh-EWEJR)yt<;yiO75a6`TXo4E07f`
z*(j9lFs<)cUc=)R!4&hAtRH}VpKO&Ix=UHf-<M!<x{#kN(&<esh^L16du;xh<Zg>n
z629-`pyWlf0{%M*F{|7H22ORg0pk}c=SKDs>!?jsI&slwVK{ce8f=KCzPc6a3gLa1
zW7m%cY)Y)+kItFf0(o<FvLyB1$mwvQBEaNS$=6@&w4WklI?b4K6v<$8Hd^sQ@6ddI
zuK4MIJlUn3P8r{tv-WAUOWmm|_Rw(DKbdcdSFC)lpuro#wcYMjc@@PHD(Wsd@YP4#
z$Ft!G`8ZrV>Z-(<e?5l&O+$O{o0b{XJL7QE?lmJ=!`GDIe8)XPe1<9=i{;q-41^`9
z#E0V|&`{+&3v7w;8FPVQHsm#Fy>z~y#ih3`n)Ng^4OLV5O~bXH8KU{Nd^~4Asi3I@
z?kb2G*mLFE+8htk!dluO^+FLktVCH|2f{_&Yl$v-N`WtI8FW#)3Z6%3Ep&$5$!{=2
zC{d3W#&tWbX2R)7D?Oh^ntmkZ`ixfvlu)cGvQ4F63h(~b61pKa?eEHNQsSgw9;eY;
ztimR6f@NrE$dp_R1BN3-5VG3xg|umjouZdfI5m=#Z+^?r0Abr;B!;AlC6V9LFaJ1^
zByu=c+QUkCmWDDQ{B%9Dk8=vFE9VQ}+ZG`<5~CY4Fz9u5XM(1>Q5A3zd!d@~_8L-r
zU3!{eiM&JFgLp5uV!ph&Md@{P=MM5;yLNoDV_ui;1?@9^vY-_jVbW$zHW2eOU94?6
zR;*#_8UF#<LNDVl+Y92_wM~>{4H=v}K6*J?HD6**0Fk=vza(OufuYM9&YI6EnvshU
z()GfO-kU{5{KWE>8eMKgo-Za5MhtxXel7_Y5$OMFB5yeo{O~m+iLJYTD;G|Mt&aAd
z+wst$J$-3yd=gb*eW4+dv1Y|ZRm{3jmxd{b_aHXx*EdVu@P-5h#?pYJZ61Wt2f&-G
z00prRqPe#8wT%`r{olNSHIDPp()PW$>ENe}Hl(LYUIWLhEnR$()%@fRHT9}X^YMTS
zL8}J<sO5R|8+|NKp}v6lM%)L$Qv_U<AC2OE$83}OFv`7V`^Z-eFbU!Q7QX#{D)?d6
za|$p{z@WQ1<y=3-QqeloWOZiYcT1{}keQiiKt9WR=eNVA4iJMudqk&n@(&0ca|?P^
zR}pS5KEndnPNy?ksT%b@78wE%dZw^GU2}8y*nDutU0#LZbz)y#)&WNWWMoMj40;zW
ztbxFW_r{I)KtpPcDBIw<4>TTU=mYcWRT84xH_7&uA1LJ--4~Py08jZdpxbQUjV|xK
zmdcrof4L<$F$etgR3;R-+2@r|H|nuf{Ix8-sHn-1x9@n6)t5wK5lrfe#F(`(<vEj3
zSM~`fvdn>>F_s7C>ZJvXhRB}elm1+#7x|VhJLdRM%wY~*;)F;;2ejLa^YgXBaRIA|
zBc}>IPSdksW40+F`gRsq+9i=+Pw7vvb)NTEh2Y1_Rr^M$VWAqeeZBw^Gl!SsIL#F+
zKm?FYFv6gB=y*+mu>d=DSt7unNlU3Vbu?4~&WF6%fQYc}Kh;#`0cwE8`G2n8KM)>4
z0m#<-RvgT>jTKmaATjZa@%r4^*~c{=B(}k2+0fHIHCZ8nJr!I%zx(7M4ffQnkZKDV
zd!3?EBHCk{iw2dyHu`{u0L^hEpCc!vA1Pg>O)IWLniZ$|O?LG_6~>RQXCg5Gbcee}
z7=Qa^x8NQ9mR!o2F~43<O@Zqd6CQ%FjC$W5pB4Yj%4`DL`9z0`zJVA^-Pt_n+vZPX
zhU=aepC^2Z7Cn`0fTMHSBYX?b`Eu}MAmu;K+Kbv8Xh@%MV5j2CyJAO3acHl4Q-`BU
zu?Ua&-D|P?yp>bB4nL%8)v$W~WU(bjqirjsO}H1X9M^SumFnfHi%*xb(#h)bl5Tbc
zSN!DlF2%X_*iv~@Z4InzWSO3~45d%oDHu+rcGoswjPpkK&Cb`I83V_1h$k4tfVaVp
z(vc<QBt>`cTh6YE(`h3k{gNrSPRTn$cI2QSer}XnMUK^&NSADlz9^w^@vAJB(61b)
z`H(*HJ=MAs;>?ZncsWLVMhq7&a#uojYg*o)YUoid!g<FnIy6S5X-;CnX}@+Wet26K
z1K`y1`kKc(_v7H7_Vp0U1V-n@VDc2c><DkG^m}^Kl6^3;zhI-1_D6}~O}<ccvi3KU
zdrPVMCsg`Q+d0}Ps}@mI4rWo`f^R;$;Nse#zBWd!)mF7d&FjZ5r5OAu#i3q}E&~5?
zamQxU!kltySj#U{^qOAY1_?xSb{5dJav}b`vGnLp%hHT1)#7y6;dKCreBmXJVVruR
zc5D59L=HzmLViY%1)?k)I#rfxkqv@JAg6~tN_B+v^hx4+DOKJBz$5F|P1!}b=NdoC
z!gtN5OqtC+i9?KA+%d#t?MOhetIzOmca+F)kT9Rm(<Y!RD32N~_<2~5Prl1@efD%`
zqJ_6V8EaxI97#Z2sl)Wrx$Q95ev!Tr@m+ss0?H2!aq(rQ0>jQ!-S>{Wg;78<tafUD
zHPj2@Po9;t`oehZ!gHJaGU3w#BhPrL5Zbq9Cn=noxSi{eQyq8=ROn)wlKH3*6<ES%
za_(ypYGmDk-H=GQceNVV55bQFZZ|H$613b)XfY=$&V&n_HUuL=&2@S-8v)})Z03g<
zZYpwoXx{)HN$+#NPzDiv`t&hN)WpH+<9sjk10bWpXxW>r7uj=9pF$<?XL3t5$a!7N
zT}&1$;<lK)F9$VNm;4=_v;~(McV@q7KlSqo;M95`qw%+|T4tXJhx+Lw$;Di!6K_RU
z%*7xix`#9?jJ6OdWb?gY9VMkwF?wYDyk0k8Z0Kk*AOdZ(7g*?dIzGNQv*+g7XcuTc
zn9KksS-z~RcUcKWAYZ_H`?RU3?pLNL3uGLn9^xMCSFzx=Gx}x<W!!kg6e1kcF*VdB
zJZt6fgc!|)&-cZ$e%Tt1z@m||n|TTd<y{9)@P_JOIJ{Iu%=>e(N@9{`OUGu*^ADg>
z!7_$b6GA7$trQ?xZ=HHv7uV^HBWfFVcV@z6xI9eM|7-_jO&F^@l(5)!grRGpdE-Un
z(wiYHj_?4W7^<7F^3Y8upC*Udq-{uB(0>p*o1i9g*P+m?+dTaxGr#Rzl+RlQ_GJLG
z*m@BqTn02(C1hQ15yh2r=)8SxFrw9ac-b-kF|LI-#>92R?o0f_(hi7pj?`a8H^UJV
z|5TVUBOXP|8X|5d+8uGgVs*O2T`ejH6nE*2UBumo$7sqWQQrq0Bks>k)e)K^MoNo!
zy+^N@RTHhD89)mvF4wL77I8tI8&FMM@%{+thd(>zXtt=5>^Iikqz43sKmtE|)k7oX
zzmaS^1$--<lDnjiu*Z5iSMdNi?zVTUrTCKSf?*QPq&<S2h!Bb^0`ONC`ci)kyNN?c
zd41;SUdwvh?bE*xxL~L$|H8EbGIb9?cC8qtPkL&`H#_=9S>N72jpKYe&C(l8H-Q9Y
zst31Ni;SvVw3JS0>^NT|D!+Tf0M=uWjkGcdL)<v&+f}9V?yV%-(^#~ZTUH~<LuJ8%
zs62B)Z{Jzi66e*c@Asx&VAvE1C6CMKYfdt`Kk&1r=HH3_;g8`g$j_5J&YF;HFld<K
zU`<X9KuUKjR^*#kdeJ&_QBu)<F;>5`8pHRJ<fH-4C`wPOnvw@SVK5x|(G#gVWUgTd
z;Nc&AyFBn2=f#@fq+`B(x)dss1FT{g_#HtQErPsTCS9zQX{nEiYWSIOXQ~D&?=s`b
zd6{O3-aV}w4OFO-tHT!6J_I{qe<*~EPvm~PZ;K_m<Gp-slM-36s8d@)JY$;xN~GD?
zU2BIVz@uH<Uh%L;e&W0;3L#oSHKpvz0LUR<C!rd#ru^n!E^CEB%%*S+z=PFCPR-{R
z-3!SE26{dKxF-eqyOV3v%MaEHARv^Ess`ysK6x?}P%owrym8oJ8&++1xn$SVCy7uk
zhtVs%Rpc{lzNEX5zxXY)h8eqa2!O~}A}5*=AEZA$&Iy}4G@l(LNY)LZLV)JLX9RX%
z7nLrFhF?4_es?g$*DWy00HQ2LLQzt%z{WGffbXcoeDv1_mW1i2D4plDVb(mchX`i6
zRI8u`e~R?^Hx0fFkj-hq+N_Q8e4llWf%fRwwvhJudEw#by~cqmYnMwfB0Ywia+tiR
z^{ZsgwiPF2?}I(VKuoeM3V(x7Q5;+*!HUt5J@WwxfDqJ~#ck2(n!@ccS$@u>;dO=J
zNDFdAm80g$;hw;oBhB0s>bc`}zoZmbfh%kh*wRxFS63DWogd+~c9?y=eSWO`&}&Lb
zYt4mcJV=ZPl0+k6o|i`l_`bCB{Mp<Czj|MJjCh;J&W%FtESJO0T0*b+(bt<c0*Wj!
z6@%SF`TIr%!n$;_Z46MW!#y#`18UDyQ|prIK|MY&skXgni?^J%dABlN!3G^bQrUu=
zgv|O?Igxg9apAl+XBez7v#Ar`t?btoM-cz8vf<c?MM*;6rG5Q@yA^Uq<I)o}cw7q}
z`C2WfV6DA>Sl~vxcx@b2WMzD}q(g;^DzaA&jy^lm?b}1L@<XS64N?0#X6ovVqyvuJ
z9v(h~oea&N%neN`Y1E5d^)^iHTG2+ceQ36QRA-WuL^^!mIZ`cNT`J<NvCko}EHA7|
zzI|c*yR+mLb=TYN{OaE;k$jV}Kse^RzzHyDL{9Q52R9|U9eM&ZT$Jf|z)6}AY|%iw
z84TyVa*d(FpD8z*@O)Z_g|T_dohqQwPYSU>V^Vo>rDB657rmL-<@d=@4mE)stcQ&m
zx)abHPuL73g8?3CcSU~VNkEQ~VDbkedE^Mb{ySkdTsJwlu8=rIz^+ZnYWyH(v)l;@
zIqP-b&y@rXH3YSVZpP^vS?nTmYQ-<X7P-K%IJ@+Z-Jv3~*qu2n<N^rc&a0)4<vF^C
zt4bey17?d1+y{x-y4&4!<Nemvomb!2wbZ4H*7_X8bJz-qUQ$8!3wRJdiMzNyvL<g;
z(K*iY=B21EObs(r7E`Z5a}j_^j3+}z0EjnCwaLJy<wGe6Lxl0t5r<~Dp-Fg;syM$S
zPV@1?SwDYZ5g(UgFRMb!SF^>&now^MByxDD-AHb}p{ftp!B&*TJ3`DGDFd%@fHhUp
zmT)(|{p|PFhz$mOeHlwzvqTp){$wXq%9xXM3yOozWnF_|M*_hG|8&8mG=>``ix_h%
z^9nZ`M4{uM5RY^4t_0Tr5Athi3%Nv*Gv0=PMZYcbqSo*w$~kU_Lm(-_dJ4l>!2E*A
zVZcU`m!TI;67&sYLqw0kb-Ag%!j{csOP5$1k8!*?M8!q?eff*|t|EAsW05Z}fO;az
zo#8;uo49vTq}T8VYD%u@uUrexCFx>65tHZ1Cd(CUSl^hHhNaoxH>$-GoUp!g(t*h-
zSts<vX=5dHCbM5b!J75_sLA-?<6I;j!`q7;tstn8=gt>Puy9_Cr4q?9_h7?5rHktX
z`6y~I(<k}#-OX{OPjw#k>~vV@+O~imZ|ji$M;03m=~S|}zd2$_vrO00y?@jz5rLeX
ziYiO?hSqCiTfy#{({WaE;(S|Z*<kWwIHhaCwu;}xIgXX`o2L{O5#)sA;d;=bpcXQ`
ziD<y=-!~>cL)8_}UNX{C4lc&`Qp@Up`DS;oB%CB0tok~`^+1`uneZuVS+yz7P^y|(
z6a}MYR3}75PMnQC*WkmuPXWy-zAQ*U%1p29PSld^rh?O<grT9;vwkYvK)cTCltr#C
zW9vqnn9T(hB8M{oBe)TcY)x&G<gnjCsG%ccyJ}!FDU(WyIG4gJAVQXug8&ll+vbn4
zCW>991o2YMkP_(U4OF!$V1`PaVTUBn#AH3C?7noJ(_2)=K9Tj2Pi?hvECvWIY*9sn
zkGc~+JYB-_kDRyYz_666T92UuH{KJ*8ERNEOunUzVhB+l`fh=1#H1->qDZ{oAdGfa
z{Oo<cb6qj40+}>g<|LrOioLepHC_P;oK&xWu0!IjF^oM1vL^FQn6yUmM&$929{qM=
zN^(N!Zy&kg9Wuof;{<t1#ifZfvp4?!TUcRx3nx?ELG;dyy8+DLORiTkGhMr+7P7}m
zmY-KSyAk5_h!)S`>D!U|1FYbIcK1*>{w&okt)NwAZeM$6+-#Ro#%ftMNN%o$(X80Y
zI{uHVB8d;?5aBmH18W;^HaH8!SW>n`lh%K2^fCljIo^po&GdMt$iy^!Y-_;SM1j#3
zxuFX0w3S#Lce#s_d~cqokJ>b>JbEK$8r4e<<BvfyNK!)+rwwy}lZrZV@$(VkoNl}g
z;-P{kHa89{qlK57PAT6Ga#Zt+61x--te}pP1Hq^+>`u{u{_M{B8l$wAVlItxDT@FM
zdetUXax!?aA!e|T_AI@2LuSiYl35796Ot39&lWOn<BA#7CI|c3ab0sUDgT#hG=nJA
z#fh?vuVStf^}MfNuI|}5%M2Wv>ey<`!UuIMVFpEh)-muz{Yf=c*{%1EhnDU{)dunc
z(&TIJSoPGbk&3BRfz)=x6>;`<R$ZoDHNLYh=yGjTel93)KBs3D+ve|_E-foEgwjKL
zOV0t`EMJSs8eiy^)uTNvQ(BG1WKovagN;?p`y_{nH{dilV)A#guKz?(TJ5SjF-p2h
zk-H}wY|$|6!`I&z!iq5uX+t9Gwc88;t#B&!@C{e!&vqAqnA=hyCLo?8V7yEDsH&51
zrSh0ZiGVWAjA}g@c!?+N{^U@#QABWO&Bl~<72eNf7Z>PZbh&&KsGy>JL%ec)QTOe1
zn6i%^B(@HPoApDTR(t%sMnuZ$wbf_843e8MB^Nm^eQ~oJrWdZ{s7Zd7xM09*A3|W%
zH9pFUbDKl11NMQ7i@WPfA6^<syr#?p(Ivpal3w8m2-u*}awYTpZ=bqGJuL7$#!z^J
zUs$0@6mr(5F?zYlyNszaPLNx6B)!ZEqdIAWi_LiKzV;8&e*i%6rMT9?yFa%M5X`!m
z<Ui&R=ztZDx?iqg^-pNkfhmJORZ~a^YW^C3YnYHY`gLci9yreMtmt$-s+f@nj$VL`
zNIFA3Ec@BU)eaIx^_edQRf{r?ZY42`4~=HgCR|@J&)NQM3UOwnhi!%I;C!Fj@x&`%
zMt)J8{47AX<FrvqA}6>|e0<TFRM&Pae|N=%SmZ`d)LlP$)Trlmi(F_l>le8{H^hil
z9J!V@cR|IdQj82Fv5nF>i4!yFjj{mq?$r;Jo@sP=1j=ryxFs$ahNeM_@IqDpf?pM;
z&tQkZWGXRP7a4hEDnyY!u`Pk7srBjS?xRR_o?BZ{1!(hFqi|T6u}_FYEi2c8G*^W=
z%`0a0x{M0f<{#cKLO%pg9e-=Fi=rML?+!mp^m*Ymy?`Oyx1Iqf-%%9fwoNkUY_|aG
zwx3CC<uXFV<=}CzCbji1hR1CnzS^6d(qetv>j#eEU-Kt`jh}h1zi-qk-7GE9LS=#F
zEs~klT$`AxK`RcwSp5(M2|E**c84#U6DeBUe3>G~w{tbO$hV(clb6S>-La=aPh^Lh
z*7W(>H~70gdGSbyNnJUQEVVf9+LzpOcYKVoIEFo?uXNI9O4x{yNJ7u7_Qu9lSa8i)
z%d}h#e5Tw`m(_!hoO1H<iViSP;6^zUk2hUqbrQI*eFf>^WImOd)07`VVT3i`YyrQv
zmvOG)C@PDygna458nfwMb!{L42raf%zKMWP3XpbOqGFLTYiCi?Eg8D4l$zhkm_tw{
zf2OC*ch-}@0#c-j?Y+(r*%5B*-hM9rx;`aR*SsxQJ2qfYH`xXv%6SsXdo_r?<pK=J
za^JV>iSsWpTC&4m{8kE{c4m@M=SvaQss@?=+E;JDHpcGB0bLG;`Fttl0x#|oD~80F
zld(WWas$|{$z#nSmSN~jALyc@sY=abw!~ehOO5RH7f8z~I*X5DyMnCvzVATI?VrxK
zQEgSeRA0W%yIk7cMb8ZjY+?=Kt!F+c{8%Q$AD4tHZi7WZdmIf*vasE<l&W8_2+EIb
z|Ncf?axd#+SxS>&?USaVyiE{w7RXXfFD(LN#kH48V;g!@YU{F~SoI|)-vulQ7rvk!
zs2sDmA^lz?@VMM)3vV<xrlS+5EU^IbEfp_%K!AAu46VIDA=uKu()@rP!6LC*Q>Yv&
z|3#UH8Y;oH#-yCusHlTleOJOK7c!ptUNbbqV?bdLmO-_vyGwN+wG)$3bFq$yi4VNk
z!3@fpgIyvf;p`kx)61Bpp<oGKj*X(HRKn$-5VNLAuX8lvDBn`DrY}s`Sbz&#iR|<e
zKRWVmV?44$*iZHyxg0niKSQ$VPE8IK?#CDBsaeET-$OUJ@ToTwhj7&_pSy@R{IY+n
z0mkwvH`O(XHIIF}Thd0%MD&|<wqJhI%Q>Tw;8k>f8DSV>L93tsalZb=kn=%vQQ$Tb
z4J{4_iA!55q|89r-<Lpmvdcwkg$q|oWW*+{lNa`;)79Qn;H{=TK^fZ%A_5f*a+D~M
zUj1UUNasVATSmDNJ;sJpvnEaFr(eUfHx?6P%)`KerGsnijSLgav5p^%8e(9<J#zGs
zUavDy46PHcf1rG7#rajC?d!}G->B)h+qxTu%F2W4Cb_;9ovk)j(DG@-pr1H@Q1Ts4
zRex=X6RHZI&j?$?Oab)!q{h*J(B)ThC>tM7JJ5;p$m`}k)f$BJ%`firdH;eC{SAH&
zQ}S2Vwqhc}Vlj?;OTjct6teatp}V@uNiQ@_82E1M$*6dH^+5rAc{=H^U4;eF!6$V1
zRA8Rb36|2840kQ309n6!S|=#UcY~3!dFg>3rFcpktgv=r7GW{7Z2L7(g_wc^X8U@Y
zMI@lS!9mTv^{`5$XRPmzbphwoJW*>c$r=9vc5K3AvWfgI8)YWTfp?S`^fggTN;Ewb
z{Tk>ZJR->w*8R+&wPB*%Jbj6E9aU4FtILERWd*l5{maj9lN9GEGx(7Qsw5A9Ml);C
zXHRIo5Jr4}bpX9U+L5feLvAi+@UlIJBNU~r%}sK|A6{gqB+jG1IOxgQ1tsjuebygR
zhrq}Rz+IohJNQ&L4WW#DJ8a7;g#kf)1=eBmj_8Q@X{Y|IjHcPxdS*UJY}zbIUVfJ%
z9lVv1BKn?Mjupu&nl%UU&s6pX*6S8F#8%+J{AEhCPN%tEN3C;fCFU|$6r*4&jXL5J
zap^#V1A!!UjUMFMc5`~%MO#!LV}V#t_sFe)Ht}(nczyjBimd<!ZlNQiF@0d)Kno&(
zrO;YwZOCY$*SXkmln94`9N&O4-ZH{9SQsd>2a7noK<{R<(Err&(_;{^T9JfI8$crc
zj3Hi=)(}6vviMp9-Eq-5bJLqYAn#=ratsJ2N=5b7V=$XcmU6$gHf6i&h5D-mIxUpQ
zHkR17K--<Ql$3qBrhqc?G{>vHRfU9nT!c6I6?lb6oQvB$O2lKqE$*Y_uZ~1PvuaM%
zZK!4Q*DAvv*DH;-h4GA%eU=WDsRQBSl4$Agl}}_QiatW8qmWZen}gDBTec8169#D-
z?mZu)DP0m~jg4A$wsbXfmub&}Uwgm8TZ3m!BJf@N4Fe;AD%J8pr7aBHODi%n)quG5
zr}>@Q7^JBLgA|7#)P8jOnaA*Vi+|Ry-Q=z4G=T}h@}iw|_Egz~<HmO@TYK7dINCp_
z2UQ@V3i|eG@Muk2AIU`**I|Iem|-9^+UX#+7v@;l&;VHrVk3((sp^^cfi;e*d{cGR
zC-iB;@<=GGPt)f-V4!V@B;qj>zhdnzz0AQ4jozH;e%%Nb9JeCu8~I$(O%$$tgOWtJ
zGf^%WpySf9yXx?4uGbOyB-zaPDs5y1p^a?)@$q)2<V&W!M)o~^sV9#3!Bym|&s{fB
zZKKGZUv3xQu5l{Rp8;#3VA6(Up!TM;<cO+@D57r;ZhG`4bMq8DTTKWmcPMI7m2m0F
zDEdU@hlpc|J9vGXxfJdw*R#+KfcU@jl`(a!V!RcK*TZ9NVBrKWe%|TOVj3ha5y$N`
znEB#?CGq0&6@NgLbVfhFm*$&-Q+D=OKPm;4`aFXOgsfs?QkwkY&FV&_gIZR+#jrDU
zL~LD4X6>vErt@#*@`X(je384c8Tc7)ROb`I7VhhsntDpO=_b;5nbY@W_FNcfo3JYs
z;kmaN*3(VY3S%lnb%Dm)tvWM<UyjP2!`LH(i0v3Vv}T@!6IM$#iQV+@8E&m3bII{9
z)d^(2x!n9YT%16Gb)E*H`^d9zf+l{7Cvs#MPE8FAoX`{~SH6&_ZJmqZmF4gFGJ@e&
z5b8SXsxyC>4$%KIF8!b6?8ds~=h8wcK40IS^AWMY@{}q18EjhO$A~m_7Y;Q6CXQ&u
zw3!+wa!mrtf-$NTi$fK3e80Ns-Sob_FMf+6<7xllo2V>FO%G(3&MAX=r9rwEsR%@t
zgTpG|FY5CpFM_kwj2D1*c-I_6gCi;StSIz--%UQqT${s<<YnqF&`+z9Is0c<Eb`Vo
z=!g0Wn;jW7tWNS`c&QSx*79rRFK<cCMc(bouHUi@o+e?p5r`h3yI=eLXs6s+9VGf@
z5YNP<M<r(=`i$W~ySD_f?lB@=p}5>ddQ>S-*#E&Qg)EXMI9p8W`@MU!>xxGO^tu((
zjCC+yU#W1gxcVzJsmg>b3^EVKo16gta2UsX`VK<)e1X3iGRgo)&Tf0FH3GYi(}36y
zkB1V77Gp8eOX62)vIfN!KQeYdaK1;qP__-^>0!c#Z822K<TGiwnv0u3=|Q@Jb5$8v
zn597V!1kssv))ki$jacAi$6fPS4Fj4A6gwTt@QrglNkbE2-Oy6utYp@LW!}lzMZy<
z!QzJ~@bdFT6SC8rMc}jJTfZ!kFY7a&R-tI}ddmBVE9k2W(|g}un&28^E*r*m1Pf(%
zPLB_8F{f`Fkscb}eDvwfg9nRO4J-a+s_k&Dv+_=-r>u>}la!OEqOmJV4hkx@f#P8G
zUY1z-KtQ={jF>)5Zs^16;}@LzYX>LZ&aMPcA)CAoFWWki256rx35y4n<lutG$0;c-
zLKlhgACF9-y6`4~FT`E!*S^UMsWMF^H>60?u%~v$95TAR*uIgkko8t?@R@2y=0OAI
z<`2Cr$%{t)lfVC&6XSF@8O<v@a~*MMvCgaDqPMq~eip&NS9tmP3Et2mDYz(@|8-|d
zzg5b8;FxZtjlpoIY)D)ZID>%oLuZQDm94A8)f2uv`Far^%t|`ip{?m^$GSkpHHCw`
zCLH47-c*z`Nuuh=L0+sMF)>KXS+(1q&9?>POG_U9X?+B+0uhanftK*rcP6e!y#fe@
z5)lW*@y)}cB%Yj`B0=#&2Vvqw@%-ndl^zO}mcFmfzI=S4yQJ9DW$yDPtz9q!w%8q!
zTZ+}o>5PnOGX<X=lIkI(_4P$!$w+rww)294(}>se2}|_O&~>jISup2D_Q9FF>45rs
zsyc4FlYUn$Sm(Oj7znxnzkAAjyiwKyk!3s)C9!~Q;WxXC<kwJT1rA$|e%a-l?e(;E
z$+M68wD0u+70!nUSa9!HAuxzRQmvV(WIo}6dOrYO`fh{-xxZr~m0-W%jCVutMWbe2
zzzy9Tpd2(sUa3aa-W$;jl~7|Nul>lyjWSdz$`@88)>(%^wt!|HHk~_Dak%dQPan&p
z{T^%V2T`mE>1`Fv^4yD<-jJ>?EYP8~lq_ksNOjsLVF8M)gv99srtW*8{OueNL(n=x
z6^(|D3mLY&SFc}SRvJ#y_)s|qWH9)R4u>>2S{#kJ%0Pb-=Oig9n(T0`)?XtBVkE`l
zUBWpZg9*)fvkc;xZmJRfCZmUKNkpfiFpw-31HSu#x=5rhiSlRCYLVq8idZ$ZU_l9{
zdyHa#E_HtZ(AKveOKeEucao)VK6@6$ot#)jgqu$_nXu!P@TT9Jiry+Ql`eyDLU<?J
zFIsoZlU+};v&xc@Cg1iz_h(U^?~An0{SIakqSib`j0_sPZG`)Hr<=io4}h-%%hl};
z^^pk%5Hobe{ZYdw?!(HzQmtEq<|e$;nMZF;9C0psiAP7*nrE&2nW{nD5FORU^<2!)
z%Z8b4Z><U%XPgete<|lF9FV|JvB@6*0gR_tu3#Si>$MMahB%f#JSI;=VHL9<tGVEH
zAd#I6&z&7u#1D^Y$WxrpBvD#Ai80)ZeWSt$8Pn9o(G`cQU!Ta@MeHlpE+|S#k<kWt
znJ4zz`CWgvEmxlO8e>%JCQ6hCX7fTXRgFvcB%wlbAw`^BHNU_Td}_O0>!Gwq*dF9Z
zD)*fecX6!qp=mD%sWx8hjJS8Z_jZ#Cl}aQcf~cE@#fogB%*uGB^W#)9-z&n#xShGV
z1euy`Fs0ngBjwQPUGRo)enhBpMHLSa30B8~#}Nxkv3oiD#BV3#3iZS$UR{m}BzDm!
zN8WDphrd`zhspvp-pf}7%K(Gl?s;_#=~k?U^}on?E%EjI4p6@eh?ExM=k60h3WL?M
z=3u(o3h~SjB75~!DTY3i`cXoLbWxD4P}P|Aw5VPV!hymf-bhY;>}ORmP0|=x*6-D1
z_1=lVIxZl!rt~!Dbv*B{t;@STMUO?0o~UotGqWt{=lOY%(EB`tUiUJiDzA401e^<L
zjHW-ldtdiF{mC8TvB*U|BYIS6jA{_|5wg_gWq1ZX8@99YqplA(UpF}PrG^fRz2|$i
zRmOX0uUJ>A%^O#f57n7^+$q`reymi+_5iSg1y)tWuvuBcbRlxQVIe<UwPrehn(2RQ
zf|W$g@*;rUo||j|sjJ3oODas$j&f})ex%gKUD2^2#Pi@v0>9&xv;H_=ITNl^o}g;%
zzgX-QbK~ze&I^88pJ=i3b-rS8<eHq8f6MjG%}&>mzD`dpvcp;Q6Iv+%ZyiNdVcFvc
z;@IC#di;6f<B#0Z?rdRx**1bQHh$0z2ocXYLeafAn5YBTf+&;;dl@3+QFLimE<U^2
zqz6=yEzNmlW@3=RcpB(VLVAf~oVcAXC@;n<7jtzj<sfc(0-u6&g+~WRRDmbO%!RI`
z;ht7!mZT{lhBO-4#7&F_ViN!`VbWFcCti8%#@Bi&ClX}D`Ckf1hpu1=%y%|0pZO08
z-?b7*!O>e>5`=~u@1wW`t_yEnAqokX*@9CA(&pG<Zg<75$at?pG+LuP*+sZf)_M3j
z;$LE`BPt3+;|=ek3CyPStZn<bz4=gAC&hQrX#uBmL?iE!gbM~eF6Ud0ck-T>&FIeM
zBx6o|;meHthjgqxUhCL*OAUKQcqg=lfY*T(?Qj`;-GO{j)^qO62SApJoy4{L`Em^h
ztu?cNV}BZ1l5j7Y2$i2UYQ9ju^A{|saz&_<{G869D(BSq%ai*NjEkLUr?QgW`l}h2
zUquQU=<ys&>hTSo1l>fg5WpxIV(@dpAGPOAq!<W#lIo2XrkUUP_(#OImGM-gtnugB
zFz)ZC=`Kd8PDaJd@{Si5Qq!4Q5>>3@^A6py<Y&E{TJ@)L%MiWUGM^)uaRPCOfVHnQ
z@6EIPC!gldc5=AQcbFvbp2>78;mO@!!S#vMP-G??^P`spr3n`A`g-8)TX)I^dC>sY
z|0J<J%>nOXm}DY#vn6?oqXUsO>xfP{w9i0{d5l_21rlw%GsiiYFFSv_)Ct{7@e)v?
z%84rMgk{8aq~4;bkYYSH*iRtVB=S-Zzs(1%tTo>H@6Dx?g<lNVUZskfHl*65n#4Rl
zssgO2EUd{d3hSr1xg%2OT9PY2*}n>Ajg+AZJ?hm)3dS?G11tvN+3wH3V*({un2}}9
zt4N>SiEJTed*<mG4Kh|zViZV!H>(<0H;I3a1il_N|6V1OS+c9lI1VD1zxmp=U;gdN
zky7y-IiAP2_*dR(@9S}RV@pC99@px}TzSWL@k9Y_>((qW>OXKm9@H<)vBc0k@X^(i
z=M<#oLxtBML3lf7wWI6%5G}Tm7%B3VgjvE}kuMk*?DL<5Eubx#&PUXRwNS}iZO%4!
zP{Nf9U(ixpG0gs%<61CNmb%pyq4YC{(~<5zgty0Z5jSyYG;6`nqsmPif?o=l!ci1n
z@8ZhuqDXPYnF_8pLFQhqo7Ag{nG5e^xMtNEbUh;#n#Lcscik}a2{V;iBA+y8Db_D`
zb@#PiW0+9>4)Z!a7_*{OdpA1r$vr024UfuIk|HW^=VYPRrM{0TO1iXv1tFxjgvz=c
zWL*TC6^M0wA{W0g&zQK=e{i5%=^f}?VKD3|>~Ei$&`aI2@>xV$MYBS2>${(MbZTef
zBIDVd9lzL&!Uv!N6M>u*qiZ;&pNB*y!1+%{lF5$AO=f%6Q~eS*m(g*cNv}`pCzbYI
z;KcbCRn(xq+@c4-4EL@b<1(MZP9eqH0WIcFHA?=@K`gVz*NXcOfUOyg2S6KnP)FIq
zb*gGYzIU8vRD9+ajsohpWk%GRlLn5BclOV#Gw3z~-;=8)LV=H^TUW>uu0z|4z=n3G
zAN$#G65)*I)f4juV@uv?wS%1;VfKv^1%r6*(DaFMV-FSn#P%<gM?{Nr!zD7qPLP)Z
z_B1*k_LRa#Lv0sZhw;3JPt{7<zG{4jf6679`J;W3;^|udc$uF@>%rg8npNUAs$zaE
zH53Lq%R`(+&IfyviiWpT5=8^~p@mk<KHm>V2FxH*VpWRwnu9dJ1{q8h45vm17Z5x|
zn@dc#q}$xxZ`dhb-p91<RZ|Zt2=H}bsT2Ati`_t}bwyPiZ$0Rm^bUdR@LXx|+R1XF
zGA@PI!&=3yngn2R_f1k=d)aWDG3#mes}@@`ve=PKW}R)XlgFb05%*_rY#1WAYx0wL
zo7wtwoK{vy7W;uQ=F~favEGl*{rXE}rl*eMR?)64RklKKHWyS8xPX4(0|3E&^#PFZ
z09Y|T#%P7sLdb%wmR3Ww9{^Dv8KG`cH-Wyz#gXT;n-JZ7?r9M3x0DV^18V-$GtFh=
z@6qPAuAa6I8hUC=AaeA1#AKt!mCU8q4p+UOsgcgy=cm;m;xN1#UZ_bxzjrl6bNvaL
z%KPs&ibf9rfG2CvANr+6yJPYSI@>D^TET8zHT-t?CA~HJ5N?0;1ZE)Xh!18LUVqt;
zx}fKkIBW9DZuq{nWS4{@Zy(I6&ZzNuMG0RA!3U=~+eP&C#B1YgJ<@*XAbU*l4El5_
z$WR5`WZk|0>awDRJwtOntL0nE7=I>?cyDXaHA)b}#C;eI|6RcYV2$<EQDU1HE-|2b
z&+xd9W<M{=UdM_3LbgxlR>90dC!;h`<ThWpw`7a>EvMBzT&>C&D#;nXA1}~P+wYe9
z44c45WZ+npq0sDv``|6Qcjp7(4`}4yWs85IidjEWxc<j281;2n&=2#Tprq_OS=+st
ziQ4OC+w*e+kVS}08)*hUG+8Xe!}x9TguXzWHI@qR(^#cAxn6-lXTrPXP^^Di3Gc-u
zGRaTJ$shJFcnEq4tDOl@77&0EEQ-rx0R~7mUO29}g^WEge&7cYN_R(ckzNt;J>xEk
zQ_kI>iyiL!VB7DHlrq5p^Z=-24LV2A*UVJ>=s&#oc1c<zr|?+R?{QMGKL9#(9;JTK
zlzI;syeA6!`yDY<)>Z$}f`~WU|7Jn5?BnFQ2f%1W;R7I;?*R}_`ycKd$7Bnak8%BI
z`yS6WabKgd{N3UT0Xo)U_aFW+zd=&$l`=_cP@Bj<8*lv&jr}h*F8}wQ3_R3rz3-5+
z-k(a`VcjD{JpeF_vhRNG8eb!EU#tJSJMBfH-xvW~xp#4oP#<xb{W~T6i%R~z%>Ta>
z@;@*0YvZm<0%C6K65iL1?xytZ7qpyC|6bK-eCZWYRFw?2&+HD-ttcFV>;1Mijx^*e
zZPoQZ*_`i9LFvMtOsr!{?e)O6OY+8#uF?L@2O%Y7-72K8a>A>=M@);z3}gG*hX9`r
zY;EjIN`xDv8jiErQaJLRzGQpn2J?9DLXB|aTLF&d<a$(<`;UA12SCW95$mSUJ$y1f
z2HSqbRwWdJl4zE>iOuf`n#CRfy%mKM_W_m<0L+Dg!?#JlUD4mP?auz$<^xjauVf#M
zm#H|#ecrShif0<K<vTq29DZ-}kmzC_067D0`NC*%?t1zk@$H741pUY8P1hH&;uhy4
zZr4=i;B+*NL<j11#NS)MB8M%JCaRnVqiOW7L<X(F29;m)#_Rtss*w4K_&fD4t}r{r
zNW{+{$bFEC?Y#d!cnp6H58wK@jhgQ|vP;!kDXjTos0ZgfQ58SgOP@PVTofR{$`@(V
zqH)-x*FZ(5qYBHaJzzh&#)7Fq+x{`?ksp8mt7)S6XaKzbd&cMg);Yy{r)tXKOqJsI
zAwPz%?%g~5Z8_Ld3D+igX~fq$zL&^m=<s<7QsTe!JZLYPQr>+P!^&SoFQBUqvg(fj
z7RkYMrfT(=^|C?DwyzNkcd9MSi$(plE$f!kt3-`KxQVVF2sqx2YpNUx-_DX2+Ni8=
zCdx0+ngZF7d^!9=-}zdkz|A6|RF)VwbCc%90b{mTcPIycu5P6Bug5U@#*gCDw9TtK
zWEVQ)RY<b}ZD%OzW8KDEnj9YhX>4RmQl@ln1tziW?&D=b71R3QVy2c&({(&H!Hl6I
zjawa&+ZYg%%|4F88MUbHgel}^G0!NK^D^|2Gto2FS_e07dH?wB+R#Q{-q7BoJkF#P
zBORFr{2Q3^LYBrm>Bk9b_Y*I*knv~52iMIsf#!n=u6`=pQ&h|IKEr9x#j{~sCz?}T
zLH14iNTr1v^?4ax6qXDs?0}Z_MAf<}W>w;vQCEgGmNY|w8+u|NPEy%dp)bArF7hUh
zeg&@m<s<D|qiS!S0O#zm!^mEn(o=cdQ^f2wDH#fO+Cq!0jAH303i^#-9qeeg$emVv
zBD%cd>#p}{oyu@VnKZdInZin2uQ?HIY>GC0AuYezU~dAv9ZG}g02xoFKRmJ{s&k2d
z<0*s&<G?g#Wxfjwnb`_~9R#g&G-pN6@2!P%?@4}op3{$P&K37H$CcjLL1C63jO#Kh
z>03yZD88sG?wa4=T3}=liThscy}o`U54Qg#!eY=9tChYhk~a1DCKFj^d5?>@O>Of%
z!e2K0(jsde<X4|iIO)!psxH{v8_%0z;U7KqMQQnls<!gyWy8Je^di3F((0QhaU401
z1m<n|+YnhIt&Fv2ixP_PJI=L0ir!9bp&9*m_{voNMr5bO%*_-|-S^vfpP5KtwwXkq
zJ9v_qKQ*h$J65jWeq26Iv!OjFhTaVe-WjdV0q{!ad1#92FS)PeH{^cTDi!YEhTXQV
z#AV~%RX?uSsSf}Kv&s*5V%grW>u6tURR+9V{cs)7SSRZ8-Auu&9)nGa91d#||L|+<
zM!hPiNrH_4vitq$ntt^Cy<X6Y(jz0_m00CG-XsdLftmW@sg8%O9pQfMw9_uI7Jkv1
z?6_}y7&j36yZY<e+sfv9ba&I%6ys}x2f+F()oZ0Y(bk~L%!)g6FW-C3;vO1WaZe@l
z>*?v=vX;!YF488GWP4r(wc|QGI~{ynP>t`RBA2+1_hN(!?K(x;x3UL*_g91Js5tp`
zTh(~Oz@PLs|1{t1@eP2tn)-n=iw^)~k*~{x+;<p3r_7K1VTpRd;8t;lbMohX^7~Td
z)$*5Gfzpq=19Pg3_LRR;EY;dUK#|v@L-x>r^iKJ^xw2x7@@GDEk75v}kpGJM1Gd<`
zPk@A52+K)D8AAj%k`arV8{WbN(cQKW(mi3U0aDJbM$k#qk=H*}U*=qHCXp7{CovZU
z`9lFJ=$Y8?7a~(5BIHe~_s^7CWgY;zdpeJ1se2@s>@9yukiX{>w`uH#cWJzydXL<C
zLl~SaK)&izbn^YnNnfF!^7u+vJFQz27B}IwTSw2*IJGts75*1cW3@{@l9<2t{a@;o
zRlL|`S5#aI&LR)|Hzd5Rt9(Bsw%MI;(k1?RHTPHNUmRcT9X<d;U;Jf**hkWn67m3$
zG6)(P``h*O$X`AEi{buWX`Gha2ATFceg596zW%q0Ua}oXu-=vY%NTLKepQS26Z5fL
z+&{1C|5EDztMHGR^nbLL67LDAkt9xu>vwkT?f;mWjz-HK9_M)zhreCPaKxXkgeS54
z1vwZ<<bA0`z8lRW!J#3)mW|!@3D{aKI}gR=;oX@i7X0=PHzf3X(8v}q1}-1J{DkvQ
zUuKF&>|<-8V{pr}+e{q1&YdItct}cTwJxp5NP0-I6B~YnzpcXV{)ZF}zkSmO)uu;(
z_+E(e=8;o|aNo=HJif3!DEUYe%Ud^X{&sC|{r_tHrJ4WT+OU;a<vqUXdtO+Dw)BUG
zkd)oppbljH7Z1U^)`}C(CWysTK8gGOJhw$}wZ~+*1V%!Qu`8jt@Ru9=*U`p!A`!tf
zziB*IKSO~10PyjU(AR5uy;}Azfq9aX+Cl@2cfy@pe(=}Y$N7gNq?&c!_`?ljUsrZI
z5DDd4Nyh;E@diFUix%T|0Vmp#fGdQ)iueAHD^uPR(&E8sVJWjeCxTYFjt|$CVcN}+
z?IK&We_uD%ZLf^I@1AUS2640=Gd%$MK8AHKQR6h&Y3By__`^>NjY+S45im+&B`hpx
z#C89p+2PK+dr8q6;{h*Tly?EvXTR;da(hkNH>0jVgd2>h@2dP}eY%wVw4{u&ysTCG
zmTUEqije&?9QJ2e?7zw%PXs(h2x|Y6_lW-t!TvMP?!JC3#`#~h`1^mlql?E4=Kt<-
ig0pPm-I+0;h&9%VO|SqavHCzrkAI*|Ua-W&+<ybe0L)7O

literal 0
HcmV?d00001

diff --git a/tools/python/images/clip_image014.jpg b/tools/python/images/clip_image014.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..65d0d3963766e72e2c8593137a8d4085406c5c84
GIT binary patch
literal 18658
zcmch<1z227vOhco2n2`V4l}qUxHAxfClK7-g1bW|K!5~yf(^lfy9Wpm++8xb1Rvb>
zo4mXC?z?;U?!9}T|MShEd6;whbXQeZS65g6iig>UWdNRnjJym02?+_{g}49@a{ws-
z1{&HEw8t1v(4SypVqoEt;o;)o;8GKl5Ridr85w}IKze2lq36u3{A~0<?w8#BB4QE}
z5{%Df6s5%#guoJFzZ*fq#KgqK#-+l;qY`5QvWWfLpNBR8J~|R5QVuc_J>U^O5;8v0
zLnnX+06;=TQ2U$UzkZM&A)}x^Mnivsfr+S4iwAgwgpB+M1sN3;1qD&tAMqT3f{#i-
z$0hmrsfr02y)z+qKx`H|@I_@ik?PnXh{x0=@CgPn2`L#l10xeN3o9=lzkr~Su+&Rw
z8Cf}b1vPaIO)YI5T{CkFORKlmHm+{&9-dy_K0zOXLqb1(3X6+RNK8upoRXTIlbiQ7
zzo4+Fs=B7OuD+r1+mDXUuI`@RzW(uv$*Jj?*`KhL)wT6s8=G6(J4eSSr)TFEmsi)n
z`9cC9|Anpp<m?~#!bkA+2n7Wh1?@LqNRK=b1sNX&m5%E%fustWiStu>?f`Vc7qMBD
z?N5L_s)t0TE@K$PAl?;*qu;Fk!P)<gvB3WmXaC9A-}r(7u#k}ugNKX{00Yh_eN|ae
z-w+z(Oa(mv&?9J0E(F$@9{?X@aIdb@jtP-kgY>OBanDjAH&qb6QtcYj)45&w@l{!I
zkA)J!U%JKmUaC}v=Q}%kjj^eo23eBGlv<iNxhzv^X;rd}{OLPmPJvBUPr_2CS4uev
z%;gr6vwH&)?j*|w?(rxbukJJNF9|ab-@_gNBc~65i?(tdGpA2=^}gGUD<#zg8RNGq
zgNmh#iiMJXgHSh8e878pq*2t<ud+o$WK8{aE|xq5hs6Xv^<pi>MF^rF&)h@u9{}G`
zY5vq?&TfN2>x=uWh?WO{y6e(m+zyS+)5jl>$in;aai5vM>+^M^{C&<>)!05aFZB2C
zl44UY1}K}r4W8Z7^0%Jcf$t7kAS+094}i?=2f#sqyy;>1J!yVmcdgY``i)n<Do<{}
zibO!GM=T(+>A=A-l#dch*PQCeFI{g*!EZ^_X`&=yBxykNpRN5C?|)Lt5Z+#XY3wCx
zyL2LS<@eg3W8G8FJ|}~;?s7$gBQ~gMQcd||Ir#-#DQS2G#ismGqJ*$LRk6Dtdeyul
zQS-Q9xWk4iNx{e~a>y-PU+Z_s!SVxO_3iZ{?zP0B*USR|CrWsARj=t2Q^vpm-Ta_$
zYE+ZarB*z|?*XvX{s8D4djMb@J04`-T!JrC$1F+Rvmrs;-VO{+wU;?z`CV)sv~m2b
zYdJXCP0#+@*B~&#cMpId+`U1C?aQ1<Z$pWI*$Qb`<DM8>>80lbfDSV=`vJgM2w5?F
zeRuKzfS^49o+=rI&Y-)*eap0#xK9iLlDsHodaW>XY$Zd2`Y}gCJ;BkD*WITsD1SE=
z<dax5T$x_x1w1O2x2Dt#grPp~EUfhVj)7~l23~p8gczBW)`JJY13)+WQljhuu({!Q
z^ZV;`Ior?idq_L9wYK;HApQU#bPMW8YPCpW@=ak#oR(?ymQQxM8>grfao*ikmClyJ
zGBHhsld=V_nl*--arzRk1x2wXBZdp(;QpQE1EBQ*aA)(6Jz6PVJpdk4PnX}`+^S5$
zw()dfBj`MG4m_KRZ2NJqB3$03V<cgVi&)dWg&EU@_P*rwfUm^~Kkd<7djR}P>ht%u
zA>ic>*c5_6NSJWY3Y=R}5bnTE+ZaaQh1zM$INrlVMtyoGB2zma$Gn%g8piyR1<x@C
z;F{4Q;M%uioA9qmPp;0p{XK5BTKE3<%-gw#i-}?%_>=kZ^rf*Z1qY8xBHmvU2o{#s
z_$o?T(uSM<>^2RSMUn(ii+SzpXWr6kK~7Et?tTk;R|R1_h?Z_|hY+Lt0JxsK%K&fQ
zlKSelo*W#>8bC}h?Cn9S=o|G3awS@{5hkIlPl}QZ$!n_O7zqrNVz|6ap$2_VDUWkz
z;%5ZQT<`JNTCc9s?teFZR`3r@7!D_&++&HA8eLsn=44ZoRt<ft)^8?WMydC9rIrAK
ze3ny<4@(Q8`1H~aN7LIL0B=RUkakZAZU+V8_-dm4OR{~1jYck?pVvY*msX>)c-aJ(
zy%IrlpvMc}UE^}&l>FsQT~#4wbM_XnOY1n-6A1YlO=!Phnex9RSS;h&Z&@h(A&Zw#
z@)|`Q`<L~WKMz%bTysy*oLCm*#DoxL>xY=#jo+0+8Q0Sv_ijzilMY!e{*@70eE%R?
z$PZM+oC5PemL352wbXwn4M*%B=VQ_g3pdTrHl%ozZ+fG+g6-UZw3~bkQK3Y^gB(w}
zLeLjXWd@#e&SivCH?B5j{{4i>`8!SuZ-gPg<&pS@JT}qBV}gQuY*zjUwaxo`wOwGR
zsXV<0-Ujni5&XbBZwZ^yT=`Z}riSRLa(vb3FBA12`W5CiJTY=*=+7<n@%7Fro~-#W
zdH%_Pb5$Zwhc+R*H^0CpVSCiE#v<CddL*v6O{Ly}>$D8~ceMYb+PwQiQ00Hsb*95T
z5_DkP&g~CRv~W0@-TO+AfS+r0J|Q?{4|+QP?oK7c!q+|KqroJAw7j70HrN$%7+`={
zr3RNSersi^?BDCr-?J<v%4f|vG;Zhw)ejvJO5s+T0e1?4GiVMYb^L4Qacfvp5aze~
z6MFdI{r!^Reac4T$iEtff^)al%tI5n4Wj?nE-Hdc2j+cEuX1<hd3u@IhT+2&0doQy
zl`N_A30L+R*XQhGx$y**#Jvo%W`Po?WoG|UEPqwbzf4pqgsSc0fb008Z=cZIz9t&^
zdtD>+W&PXqh+M^<Kc=(P&RGr?WVdOVJlb}Zc^QQtJs1iyn<TH=W@sK_S8Sm<`071U
zkQcWoa;Y{FnT#EStD7+Hv#0kO-Y}P}Bi^}tq3aI3zp~Mzf21EjsxJr8eJ1GpT-ZMg
za!Q<6c~ehwcEog268Q?V)N2TW8BC9E<T2`PGg^NHA|b*5Q6YyFK}gUMF|j!Y^jDxL
zs0-E{{MZq4tr<2akxLXpEBgY^*?=d5jSZNJSCrPoYAZbSH7LIpewCn?g_J5d0KcI1
zD0EwRL8bYFZ9s)7o_QW>N{BEZQ=jk|B6DIj#zM%n&G8Qu5KirLGyukiu!7cgiI3mM
zIGJGsPYbz{v=;LrcdYZBUQ;yI9=hTzubh~`qa6IZUylontTDZvpBTh;ehI~R{u-cL
z)vUPd%=)N#xp2=q={m7xXkxBs!#nnF`AvjIf^_5~`e2n&j5lAnXaRbKXJ62)TaT^~
zh7_S`qc$v@3{m#kR<sevHU<J|0}p^pwEG|xzR1pMNAyQ)l&L32{W(>^_cEv6?u})k
z%=xgYvlf%npB!K6Bh8^}gLYhPFQwUs4sL<eXZaXaKLYL#DW}P|F9cxSrS77ZT<iW(
z6OaqJ0fE&{$Pyo{Z0(qPeC<@3aMs4dDY-Gy;az_{L7}GfTtH(&J*wpXlP>L`&Qyc`
z{;~OGY5bJt-US}goen38(1sy%gG+JSIYl;WPT<SXSH4UZC>cZ5SZL12>dC@5`9&4+
zZ)^o~Lx%OdUSbVW<-W=L9P?aoi|4F3Sr4N30BECgYdx#9yBv_h=z%Y}qcq?(GT<Hs
z)H8V9bVU2qy02pM4_IZmJl_J%e;g*ll`;JiY~&iCE*qLp66nZHm7{Qj^W3#b%*U;y
zdO1bY%_?df4yp-Rv`DfxLs#*L%kr`SR`$%Kg3AYOuWzx9@V!D_bG`E5>f!w{ZTpqo
z>!tMaviks@aMW3>|5|=c(jolHu0+*SxvGEBE-E*m!KvuE(58@hqB}2J_^yFdjp?Fd
zL1bc4>&s8-MbJysPvBRo`j@)<wg$QkP&`LFliC-q!RA;`MBO%`cM(3%dzzVF4*--p
zzKi-$MR^^jq-{eT^J5r3rCg)ctc#2wshF!WVxb$6VwL+ovfudy-JBu*rl0>;@?=E7
zVpgbPV@u1_KIhW`4zyW&g!y-M(D@!5y`G)nxU#*{T$Pghq4fJ_yN`hN%DLXP`~fM*
z3}SUaK0uwghCcw%_BrP-!(Y{Erz&Rrcze5koMVkMa)9?`#eak_`8)lDU3MgNTYCUM
z3Cu$ND*xj98>jD^Vf__{_k}rxaa|=}J*&cC7AYw&YK-6aO9)Xyal~tq#7SF6*CV#$
z=9Eg!>#EWMd({g~$((R$gs!}{gtE@+&suK*(xp6I{0A#Hm~aJ&c>Ruz#1JMw9C$EF
z@P(-Q`SoT-nPHpZQ!~_DEf&@b>?S5Lu3(~Il0b(q;Yu9mdX2Qz#MQtdcD9(#5Y<+-
z7}<P2&a9Q<&l&K&9O>GYkj1_v_5dt7?OyLa^<@{I(Tj=erF%e9cWH7MM{Z?J>dnQ|
zg{&k&pl0F+?<?(KGC+ij*G5j&kw&qNfQRQqt_#CTT`2mTa(|wbfP|6~w~USKc)8tK
z-?P(po1+mFr#{NPfnR$}sB&mPl%pQWA+3hPg~7Ud=aPz$Otq*}YUf&=CumFS;5<yo
zZtE5D+%^2(GXH!Z>e!VD1N#{b&;ae8MZlLLn_8+<^9#IEr+DF#r&lj$h$}@NnVFGU
z><E}yx9=bgSy|uX@TUgwNZN{Gx?JE7Pbh9Yk~cZ_pAU3y0tdf-98}n~(iDl+l2VT+
z`h1wIzRt%%-`IUU&{cjZd9dv3bY_vBQBABAv-dIvBL)us$6=lc@t>=4o4S=Xlcp7^
zeg^$b;|wyUT7o0U0noc*HtAYrbluuS_*ts;O|?B}{6jobk|q;U<Cbf>doh<z_=0zB
zxJZ*XtuN3nnrta#DSh2Zc1`_aG_jKBGFdSi&Zt+;t|w!W&__}BRFIup+yuHtU$}Jn
zb#}X+VB4jny6xz(Jka9GpbZTTC6FgjumpT5c_w7qa3kGSt;`VwYc6Gv<1HI3ZK-;m
z^9#&O={x<kc17n*=Rhu%H)>;?!p<THJC4{ab|R>!h3<P^Q#nBo`zY+~sz`C6rI)13
z068I<iyq}#KH2)z0*RWvW?ya`9oCV;;cHU6V0S@5u^#8M5<LixjV6oPxi<%cKi2|>
zVv;g9F)95k<Pb_1q+y|5J#y+a#uN+s;hl@8gBJt|H%*_{1NMa9JWeE3kwE<yg{<6K
z80#t)vcz<24Sj{!L51I6f!B1O-nE~VneJ*#y_?{h%1eR^kA=xo;4Y3tk;VUX?kVR*
zayb{8wj~(CkA@>VVYpuCk#6Kol;$OJl2JczeH$N`3vJca+azEJFY~C`>JSgso{FS}
z*ER|=kq}Vg<fj5WXC1(sxkeX#4*+uU4(A7e7(HT>t9&`bQ*aq|&GA-`@A~I_v&c#X
z@izZTx<#fFY_X9}#&@E&#ZhCN*MMKKIM13X#T05Qm@{|64^TaL-)5DLCkL0HV*2k|
z7I7@M#VnvOp5_saE1PAAw~PJik;vZ4;zaa0U^i^902#a|Kyxzw>}LG|5J_Xw@Bo1R
zAr{*gVStev?4H}xKSU$@r}js)-!tp|oYx!T4F|m(sHZJlL#D13=WK03yE}$-m#9b(
z9l76afe+&Ec};h3Mt=+0ri&2Ch{R|y=(m_vhD4`#$gPc~*-03bxP7dktiSli%w^;W
zxu3VIa-)C2$o5WM`J!XY&AM-a;YC$FBp+UWAo&3B%RDNi4`fEdmnk4!hf}?l|DQUN
z`QOv@f33<8<%v8<Jj~O*&U8JIB=W(a@Z24_;sv<$HPZr(%I)^)-`M`YcJZ%;`Tu9D
z8ez<jSnijvCy7KK08aik7=XNjvziA$c_PBQ(aj%!BX8{z!CmUi44mV8oHIRf|6S_=
zAZb&X1L<k$x%2D0-9jdQh9=fBHK!NJV4P~O6+i1enDdsh(s^4Bip61KO?cckK4-|g
z1B^D8;7yBqnoiY>^TpKw*@mkaZtnChD77Yn3K{2vWp%LjYuGc>JmRE$9_!`P$VC>6
zyc&7(IbMfXO^E|n5pr#I(4Pke(uA5Hg8zjT_-7UZD}Fz^;NI=p)&-2O^yN_)^Z%-#
z=idbFzEM9x*pC9{M@j#cclq0$@Bj5KLL!e62Ozo6Ca$N4svYVgZcMX}Igkl{&@bjj
zOnuPMKmuUS5p{)hC?(A2Uho&H$m<Z<pV?v-(kuElfV_ihY|5S(=VM1_El>4F8h=7(
zBWKZH4n`*1Rn@Zos3J`W_;*U=#8F<ZLuMogfKWyHe^sQx^EA4F`I-B;rIHeawbXT)
zXUDDAm>Q83_hS3t-BsmHjkAApQ-bV*#;m@^vmu0?NJv~a3ACACSSZToSAVYB%#r5i
z|1r-ak=BJPzDuojELo$mmM8aycHuklDJn!cwU*bOl|xum?<H+G=*pNnzm@N4F=;LO
z*SVWGLNmrWUn1}d?}TWlM7`Bu?8>L$CR2IB7!_rY<$Ddmqt`(&N!}N$S{Vxj3-pWz
zy-v&JcAf>ahZ8?iZSD$aDh=gb-YI{lfeg$<IPSt#%0nDVInA0Yn?D-#@a~Duev*;b
z=2AsMqYdcgvmLbPSPjRm>SBNASEUGiS$EX=s%hqc#W2UwZ;A@oMQ=-y7GBBI*qVgt
zML@v7+HmD@9gx29;jnh9q{2vdvFk3kT9LPu{bd(j44{pf_C0@@5PaBGBeR`b9-~ie
zsB{Qs_}Wc;%>ElOfiV#abar{OmS$$1W@f`Mx|7CXhPPlZ_W>Z?7_~Me`%2Ia_T06X
zHa=GOTT-E8UR#{Lqh)NTa}RlvH|hBs;c2<Ctnsgvla_JQFbdLN1Psj-cqv@H$X~e*
zQr*Yuk67v&#tw2wbo~}>s=IY?)C)B!De;u0B=EPXfl2$XO?6Bfz2Ie<%wM$86W>XL
z@Xwf1eZ9V;HBA6(YrZu^JpjBGE7Lo*0!e*>(yvVmhgK-&0-Qk?qlz3|s|%v9pB(F>
z{H#piSBNk-KSLHOED>dK{@VT()3*VuPHl*8kU}L%RUS!lIpPTR;c<=6LiJ9LvUx+8
z6_%MbJ!=)b8=n82M`ibV^PH%qxz!xHJDfo`hp}F^7J-N;HBB?*2R{HR`Iy~p=Vr+`
zum(>^mD8TT3tj@Hy8)cxx>|91USE3##t+O+?7v1C4}K`HuzN3yhDKGehf;6ITv?O!
z(yh@1Bc#FYL)F~6)g~~XGE-rS`*A&YcY8wFFOyzTXZ_oIs#c~qQLt-t_!&5LotGhn
zv_O+9eJw^LD6A2uG<S~l^mAET(sPa}bkI5$Qo72haKfoF(cHYXGv8_F2vyODJAKcw
zEcAVB8&k!uX7-UrP(zePV|_v&<vab+M3YuJ?FT@W98kPA)_(a})iw#azGu#!mdb`3
z*XG4?AV?n?ip=UF2v=?%%bE}sft)w615?eP(470{C+YOC%*T0}!NLP)x9#20pD}$4
zBN&{nLSGm0T6ph-`wqRR+AFXdeScBb$=*^MWDx4?(LIXhw@+H6>%LGPQWLJt=bIaK
zY{8>6%j6f~vKg>rJ^t3&Mi*XIiQUVeN3@97tgxHOaWaP#?Wn9E9Qs1>0Ivgc9#*7`
z-5aPC@ayA;nCx}9*pZ6$B2%3`;k=<-x2>LFnL}8&^U2q}Xv5F|oeD2%^A-{ULy7(D
zWwPd=O~s8uWZ(GV`9n0){E_9|b}O;kuUS~u?q)=}a1d&gib9CdXEWMUp}dLf-@7fb
zXLsJ@Gw+xxsuN0}&Mz@666qP}7A`!Y3)0$Ce8mQ|dUyGQ9Ii}>7(HYz2Z7E3rzMt$
zF4&rSvD6u&o~YzTwYpEJFbdQvt4H4p3blW|44hFFxCSSS+S5+ztePOrQNGhN(MBF|
zh6+#M7hA9DVfJMQ`)zH+X@2Nom)4=u@|Zql@TRWy_;HV|K$1FJEmOto8@9%vgd-*+
zxzT7~s{kEQ%;;)MHfU**FNHDps?fhL0&HTC2!XC=H1c(1ziv;T$K3V4p}pww0aQy9
zzCl8iM*_;-NwMGF@z(rrO*9hWUzS`bq(QtE6`(rl1B1-;KLEBklqN_^v`9ga+`ilG
zZ_#?n*H=b&sD(+!X^Iss@5Y!;ilWvBOip%HB-#i%iD5WCskPWM(6rdOdFt#&+i*|$
ziz^G2JkpM*O-oUk3b1jyI3{P{0m`)*@_4PYJmqL1<KB=M{BBL={Qjtn92p`so@=jp
zOEL7q*E>Nz;Mb#d0Qf^>&IaYg$|NVrUedRuJm<$yPR?xKYQ`SZ$eS8#5hrC)E21a!
zOV{_`*XkCR7WuHtvWT`4<_7hp*k0VpQ2B(d<xFu>>U8-RC_j#8nA}P3x~)qj#APNE
zK|XQqhG!S(8R_d;&v7{P{~RgSFR70o95ctV8+>o>Aa*${d?JnTKpNN)zcR0=0~oUi
zwJKnuoyx-1GM~ZE^{p~H)#sLOx*MA0H|nN%Z6g)VMH0dmkM76Ftf;kNGTQI;kiQER
zSJ`msdxfdDOn$AY(_Zx@jFQ4@z~a`st4_ymX%YG&D`oFbwc0y<oQ&T)#<gyeTu@kF
ze-Y{AV*wx3{k12cHehXi9-dS)>r~I<iQ|hWgt-(COO}{qXpWY6Zgb(8?8z#9`<l~(
zu_KA?7KN%o)JkkiX&ZaX(^R#C^@{q-MzMUcu9Q!b%i@n@RsUIC3DRq4{ml=mwh$cj
znx8=E3RA3A)G;Dn>wZ#C3W)JtYt_y1QP;E8^-F`)mbruC(0i7g6PO2<0LUYD<j)Ft
zJ@w{px+4+8rhlI2d6!>g?Q!lo(Stm%jNKj(MxY|3e?}_SWp1rS9w(h=z<0MWSZ$!M
zC+bq)yOnir?l#gd?(j7{i>ezj=C@k}Nr2*Y>JOAN*eh3ORGM;&{;J&L9ZWE|>V_0+
zoiGXvMOvBC{h}xd_QB&F;kccB*`^LCc%c8Hl>EomW3yh=dh^4e#qdZr=%<#}=C{8!
zAZ=y0u)J9)-$1`^d!t@9W{u2LXtl;BwYrie{I7L*6I3hqIS~Zp_(HtKRipz~4;&B3
z1rSnaAk^mkZy#X6qnmxM$A8zg-o6~4s3`keRfUd*roW0sUYZ#5Z#}6P|37*F&S2oy
zHfxON;zo<Sv=Br8!U@tE9U=8{P<;t{-@6=zgA3MWOFKM9KMj8QCan6^qk=Z%p?8y-
zm0#1<xD#bEqQ3tzy_NrPtQwWF|Kv%=D|~DgK*4_i&~QV9qg8gT9{@&d4}dSV+T-9g
z*yjCr(7oG41)p;?b{ZGh`jfJs9I8$T%5hIzuiBK7>*b`658XuRaMuGsQv@UWRe=37
zhcA(zyNO62i}7S0<`4F*OSe{Msz}y@jN!oXO4$fFxm+T<`(vFAUoZWa4RIPEA{qfI
zTqfkGTHbf8wNP{{;u)9u;bVGO5pxXbFpUtM1lo7s2$jK(MDmdN*r1wd1@U3Io`GnE
zL28y7uk2&m#?&1hL;SZzC`El2?R^+rvBS>2-L5(?qi_@tEgl<c=ftxoU+M#X0E6}Y
z@<GgP`^u|tH%$7Y;`cVE-rfia4lfF8Vz+(_DJ-)Fn<NI8Y&$W24DLnvK5`>c&`_d6
z@p?+1*vdfI#?IRxHQhu86(QRVal*$=+)l#3sUl{m;qE-DBz0$1o;3Y3;g&eTJLk)s
z=^{xQ!Q;PyJVTV2dzrwuFqOVxn=-xOdQ(dp!XQc_YBuW0Czc#uS{|HMnt1AL#KK>t
z5LV2Fx=?>$PJq<ygd;qz>Kn?+cR%c}8C{*e`D>)qVoRL<eqwC;;>~;On_P2hyN^T_
zolY+PBGc|mANhz#t|JIuu(NY^RZFOGF!zVp|6IPE$944a8x&F*ze~Rk6Cd`MR-JO3
z9#NS{+{_@BVmV%uW@DV>2w{qiFgI&auQ^H#?w1BtEa<lLRU1fi{6N);bkZXQ1#%10
z`dTK7;HcaAr>|!wGquoIf%mRY%iJzgbws6%t0X<mOsA`rnPB)5?r(AhP+|k1IJjc0
zorZiBpB1YxliWVc({pxPo1r{XXQNI*LqmfmTzz)QvF&wKk&Ri*{xad7X0NkxLwsct
zjywYJc90t;yyzjjp1KzL?2_Gzs1Z%qBCVYrWl333zAjLZgLqGqIVUBQmF)_9hxwRd
z60w0qjA+=sluwvw$QhAo*nfSoWRzbxs^&YQM#k^u(e?Q+X$YcKYQ&97X-b5MjQ&bZ
ziHa-_j=C12RTV^E#e~1dnCLd-jL;i<hqa6F|K*E&@EzvAbbzmXfj0f6aPyDdUc#Ri
zVNKq5_GSNN%R>|H8-0Ble>IBY5jTLE&LwZ<+K9)}rpogG@K>Lv*ZTe~WhD;eE^Tnx
z*X=r*smt-J3A!lcH%sCZ(vzeFKH4!5KDUi$_vG?+m5n%hkBEVsy+I}Q<BlnVwpzQM
zvlVj6_KLgQd^fUlykiAlN-sW%+vvaS@T8oOtKyZMEXgH)(MYzAMH!Visv^_oTD&3?
z@TIY~)qK({N!~nT6{{)*fP-Of>{lE?&9(Ri5w&LaX6-&z^hkDDV6WCIE;GOBSIeSw
z?gfVJ*px<PJ^sSP|0_C{N8(D~Z(#a8k8-HqPz_Y166<_)h`i<vYuPs7D)5xFN6CMC
zAZtOgkndMnwwdpTNQa;wqzdZB+{gTJ;yL+5S^POD<dH4mNhkY6u;(O!BxIXML^kh*
z@(w>w?C+mpQsm3;8)`MGP)q-)bIP?Uf=1316o_)I+~nlFjkEDo<<@yg&1M;D22AW3
zb{$5dbrL%<ph~<I*yYhnq9@ZO?(G@Oe_S$JgFTqRBN^4*SF%qoGT@$=8*K5x0tvJX
zb#YM^B+^~`v1%J0tXGxlKe+2GRuEy4ZkA*9ULhjl9eq80l2DAOQdwvYOK*{y?Kk)D
zU+TCQ=;teut<eC`_lT=4Jryaj%#F;EljB!ot2g-*M)SMz2=*}bXEY0F<vu0%xPi7J
zm^x+VZ%f&8s|DX+hQ7&b`8a1lq)|0VJ1IVP?$Wb&!i$^OP3D}rDIGM3Fw2O=zHrrF
zP1QCxY+CPiIC=!PARx}=orCb{dHzo?o7Mh)ZTi+=5ZA{XC>uytWj+p<BkC6CvUH8z
zRO92fPo%_8pmJ17V0`5-SS=({(US7^d%x&AW5)Cl!5MW(IQIP0VKMo$QiZ&NU1DK{
ztnUr@j>BU$m4Z7?NP<)&3Q-50<Q8E+W=LX-2Af7BD%kG!d4e|!SKsr#ExBp&eO=iQ
zg_+hcE)Sq}kw94{ki~eirNo?RFi~2IT9vv@u=0^YP7vi3ah;=_NRSh1AeUKps1iTk
z79ZDh7^HOQ_SoNUqFfUyJ_V>~92|U^u*+-uW?K1@yAvyZ79bMml^3;5KB(VIj`FkR
zLx;8G{Y)M}S=w^WD^#(m9_r-?JT9Ga%L_2bscc6Mn!U(=hG!kGJ>*gUmi9e7rh--d
z993>uNk(w97GSNS)6q$8GBLE(q+A_(+}FO)deh{bu{b*<a^v{=eo#2v@JF4QP-5C!
zw68cmlt)oDNZ!9KO<Vh#rdpyMo_`^N#Q!ODqEYIOskoWK!3j+H0^u}>%!u6b-+96Z
z7v^s=stNVw*T^u_bR{#1K?kG)MzbqGM<bytE^$kH&QrQ%NHifZR_~m7zw*l3KLt?K
z>HoCYSLQ-`YDez;{(0C){&ouqhKL@2vai2$CDhHlOf!Vc2)3TvsO0N*W+C^731V<k
znw=Gv?yb~Wc?BiKQ>3I<3T{u1qT!=PPUb5lDMl*$4`+#L(R`0V#?cQ3UdmzyKKI}5
z166R8kmt?}<jT6bQ}lg*{S!y@%doh8!$J_$iN^p=$N^Z^TwRv6{FYGPm&G@E0~!eB
zdM}!>C+?KL(e1P1s%~mq?9XG4?ff{6vDkT2(Wz>GF5p)Zs}!8+yFZU%iu7M%7|z6|
zwj5G24UG*+$E3zh@x$8YE8?of!`jPEJblG#1|8r}2DS2z!1}kV_CjxZ!>-6oU!2)3
zn|#A^p8f`7MjkmmZ>pau7hgL6YzJH2+>_yGCs#|qYB-2)%1kczu?pD`sC@f)eD@9X
zSZ?GzIRg~zR~0H!-6m282d17rF(c?as`L1kD0orewo6(wB)r%^Um1qpY2C2C?8N<6
z*Ln+U!w+c7R~|Z4m!wKjPzEA-;sNq+<uL|mn?<;-78>T1ey`5ub#P}}khFZy%1`y=
z1&!w`W;Li+muG^?Bw4@hw**fC{zq0#;qp4a!r)`(5m#a25^^2(OrlY^l+)NX8*_?D
z(}HE1uub($`>u{xS?U1wj0TT|xmd$^X*Q%ym@3c0rK`%6D!dFkq4L0tKAzSBmTN^u
zOWoqTk$bpsp5?vzIF4#GJWUAXm3xvwpCG)VU)jC<q}QOezX`_r4FTr)NSQY|zHJB$
zL7=Ia$6CTAK!J>`prs}UAs~=OUsF7VVvB$LYcWc|{%K9f!b`i(if5l2C}}dh5J0k1
zVOIOR*0R<ODoFXWV^O$q@~iP$1MX`U>g(%%ba>*M`p+-v_}86}>s-%<6rBkxW_PYd
z3l0hw4SGO=GUBGUpY3P6Nvub{vbAOxUC#?^uRIcny^H?UdY*jV15?6#02H5iUZqt=
z*sYzGI^v0!;y_;=m;Zd-|0H!-K!;$7$$41*w9n8vb^9$M4siScpcUQKtS7lvDyE8_
zV;a|hze0to`YrKXq{)kftx>mae51j)h9$sI@A4``6pes(DyXh(W@M=0l<SX#J<H<4
zgdZ(#BA3v>n+nTMXLFPK7Z*`Kd%qHZe-@!dad{Ec^PL42<lm~5tWzf&|0-;CEZ3RB
zZ$)6=m@AIN<$TYdtvUO>pn%W}xZXo(T;>anXVTJ1$G{0!#F{7Bt&K;`jdHM`l>50$
zpELW3Zb<Y7OdY!oKQ{g8<&3Q~o3-O^{uP?h?)8qO4SMB!{GdvoJ5*)41u(hp*L_e4
zBHBv#-$tqFkP?aklFLDhNnwCrrS|1t;yYEgj-;^LQ&3w|F=W6VIikoD+tUS?Hf*K1
znlrG~PLPTeVZ=6@+<1c~Xw!IJ8FVw_C=GD&pvJ45DlwVlD>h@e8^4+$W(u4~gwEav
zqOC}~Cuq3VR*DuE{#}egUYbL;H2==8{627sGlMSd2fRlEs=TRUBW4fY{e^&a6rLs!
z=*}0r2@oJ(jiL#7y7MLrdg@>0#4NPnKoT9_YDGTxnibR#b)Xr5Wi^b>_8rr9AKt_S
zbDWimuz{;ttlB+Jw@__`7;A*ACsH$hMv3ZEw)6V9cWCPyaFl4#C1Z8f?a!qL_li!|
z-7^aCU(7r)-gdMGc5MuB(>DQ)kCl<<r-57b91yGgpPadM9y7bHRc<O+3MJYrJ<sA0
zCr5C-crVpO1Fn*;&Vl>Sjw*-wcER;-Kos*+`bs^Bq0ymIe(TvL0-aHc7HQfPHnEq)
zqbxRfJoj-emP})f3@q-X9<}<V@rYrlYa{JkPcJy*8ADw|^<oY?kR8b(%^Qt(AQMwx
z&&J1Uc7MN5z>|c>z(hV0D(b=&@wA&u_Z&(dIIU+LaUw6O*3!{CJ6I(Q2Q52sjbfQS
z$~?P*w4>d<10OzNuiYx=WjT9BuYDW9oJ8FH>un!aBPF%uMs;);bak4GA$aKX_gZgz
z1`oA{4-0`v{pl{^iM)%{{5~3c9-ac6)FNMx#SN-N8y&p%yl8Yo<qkU@09LbgIi9X;
z9aigIgaNWKI*;ll`+CTPa?g~n_r+3yqs{UzFo$?5cx0RJfXTWtu>4^CNkgD;p&-a=
zJW`~ape)p?VAsRQJkZj@d#pp2#vnK93jx<zPpSTdDEl|xpWZVU0?yiu1KJyyAMXts
zK>+QwC?E3)v%?4)v#{=s9or0HCQCh?M{uDn(ttR^^oWa|JhdfBS$i4jRVo>OPvu&2
zHCab@hZC+OX#!fTF9bYW-c;V_d}Gt1y2#SbCgAhnfg#IC;tPydm2o)JYj?4|n_;uL
zzk_H0s{9*)HFc7R0V_84uk?f&J8Atl!TUBoA^Ri^bzW;3%dQvZuPl^D58ov`mrdUJ
z&U3^E&EqAvpGrUfD4Wc%6~&VFUg;Nu^RV-A6qmI7j5tbNYet#l8PyBh<eU8}cH^~x
z<A8uRLiH1MJ%&5KunWX_S$WFX$RC@BeL=~yqATj0lOj<5pN`K;-rnlXLuNWNhl_VW
zPpwC8n}1I{Gr0#qv~A`en}|nqX@&5dgauU2tO|O{cQxPar#eg71+43|D_ppG!7u!o
zCtTbH9B{8IrR#5l%hB$5!XE&ZUocO#kaD*Ll68wR6l^~qd>Hrt!jTpLvI0_j!1JEV
zaP_t=n<nV&R&UP)5O0W-a{erVf9gG%a~cfq?K}SQ-gPonSYLSdponFkzBYUiDjg|+
zI^Tl~TE^{+VH=r~O{uZ1He*-=jo6i<c(~}<(@W^QH&ueS$gMrfpQ@o**!395A7Y5`
zBu>VAiT~N1+FEicpqFO&y>7-CQTIpaOjq5>&pAbRlq^C}j67LQ1e3b!G5ky_C|-Fo
z7jd|tUjJ>X<XP@`Zq4AEG*2_ixhT)<^`LrECWPNMGbyu*@4_({zDdXl9AS{`cHWNH
ze;v1@YhG7v^L$CTFMkLlSTixP`NvqX{V;_IGSldZO!QYwdqs(sN#2{F=M$pUbe~e<
z-l)G{3?<u@JwxDslUETwibwY<L!+K6{ih7YBCEI@K>9o2NfFGlkh&;t0EN4R1t?P`
z2#^nyWE*ylk)_o{oysW7TFqcmYnWg<-mLYykVkfi>{HEZyJ>dV?K08ZN+#CjOeXnh
zxY+l#LBRT4Hf>|kuWGCiCBg~~I*GKcu7$~&S-9=NZ>8+pRNox1rvu|xL$~ChNbG|V
z5^#HW&rg!TIA+dkrsI@dv7Aff^9Pfnd02zV*l3OM>ya}Sympv((7sH*nU``h$LlRK
z^^YRlv~_R4Sl7aL%{4$SG-4;Q_e*WIcUbdysT0AaayKquRj(^MW*W%xj&vD1Z2b%+
zTMlj>YZ3m7ma5fTzL%ErEa&>Rbr6BE+W(nISY4rk42plxno(Apzo;|fi8^U-?=V<3
zFs5G6w|;F^`sVO%Z6JS6*T6bXxB4Z#21pth(njVWBZ<PYNuw~>H1W(qRIGfiov;v;
zydl@7&+~;@Y5vd!3Aq*vdvd09z1H^Q^7pqL04^&=Ju!3&7ukjawJu%T4)&A=DZ*@I
ziaEJ5b0D3DT%X`(*kEOIbFG2lPCtjjM!$%=tykH!6mTRcChOk5)pJg$OI|#N-Hl3Y
z(e~p3M;-~b=o_+6i2;i`XzVzvg4;cL0P9SG?N;-!6^q#0nYZJhPpPWA7s<vHRa_&K
zs`9J$TpaeBI#7!S&9~E}sj_tMj$n^vL^ktWlVBE88AUtEPZzp;F5;Cb!g*L=J0!RS
z$2>LoBb4T6*>kVHAwr+esBL>|AM?c5O^nP}iQh#O|EO~wmZ01SL6di0HQ<JkTAaYt
zQ=bW8;n!p~QRxSxVUDOtFMZ-2V3cJ46(qyJL20-1(MJRiT|@k9V+&{ZW{0>+OMOGb
ziQ5>)6&D$Lj7X(ajdR8t6zy?V-NXT}f$)9z#<=(9_lc~Kr-afoUJ+R@I)dAU`r5j1
zuWuzGMgdcXUh%Ihzww_8w<lYD-lgD{ZVFy(GjW|`%wYQHc3F#CSyQ^<9Z0{*&sFll
zoX*c4%WF-n%NZ7uT+r=E3%0$HVyMz<$D&yNhF&~a9wpQ(#MVY-vsagFWZ676RI;(N
zrcT1f{HeK>y|>C?DZ&Yfbl62$4FLl*e6i(ms`ad}{jQ~St6NdA(u!ifq>sMpk;IQA
zs&*hy%+|o&eZGLbJVaN01393&b*wdXI%eoGB5FIye-d6Smyh2^8Y^X&=ux#<Nfr6}
z72+`df3W{V8D)A#i`J@=c_V~Rdtao0Mlf%+fmuC(^DkJ62cPbVCdw^q8YYi;*;RgY
z00FXwFP6s)Wmgdn@8$c;?2A1Z`w%*7fxU5TcAJE_S`?&*S4*8m&G~g?sj}BJcx?*2
zbRSZ=Yo@*N^Y>p3mwwXcA3H0oeIhF3oVLkhoSb*{UE-oP61LQf(gNqMcWk525ZNYY
z3y^rt?&t>q^~E13*s@o)E>|&`d-lqo64v?+y&QzkN~r`oU8S*h?}CfJZ)a3hJ|hzo
zztA-tXKYm~ojWqxuqn}1YOkemw9h)yJiXit<V~V;x}0xIPj}nZ_Aq2fVK+9~qcW*@
zZ(*uWbL*2`HKRLyye{>{7D=0K7N{y~r1=04%z?cW&)2J8Z5`ZwTgJHA?U{V-J_p;<
z4V-Ohy7CO`aI(XXYk4oU>1h)+(Q|Bl{x-EN+Mm3~u8KV?C<V`$gmw)X<a35-#po|I
zd8%t+n;o(1chIFtwMI#-;L*+_94|=_Cju(Thm*Ia*rXT9x~V?ySaYTqq8TA*k>(Op
zgi9s_3}il!#VwrP`+jzB1(9|hS@VCTz>#y<wPxu3&LlV&8QFs;BR|^DJ(n5`h2y)G
z<1Q}f6D=N9qae8;QVupZRE2RuqOI|+GaQz}9soR5*$wi3?j*3T<89p(F4#aKH)`}R
zW75R!cG87OA6LZbP_#R!szt7bBHXt#bogWMN`mafpo1XtL}uwTYy3N<dW(zl6OnIh
zsWnF$yRhyVL&_;lQ*jdlw(@0pn*Ohd&`h@DHOX1#&2i?f2G|-;TX-ZNA;qQa3h)YZ
z+I{ey)%`63^-m;w%kUI3+FF8ulOggoRttV}sR_R<KM~j}!M#*qd;nxYbP%~GEDwM#
zM4H8umrJM5cEK0Fz?Vg0xl%hCP4C!lhpyR>?+HG&ZW&EO4*DTS8bm!2(z%SbrB5|O
zh=6X<1n(OKH#aWw;HG>5Weah)T2~nigetm}0sr6^{TqIeiniy(lGJJjCzl&)MtcIO
zCfD%fu%i3!c36JFh$uB<O<;#=V?$GtyER-~jgbHHD%{xbduMvQ>>J@Qwhv8JM=z6S
zIKj1bWa&NY26{zy^B&6rz+XMOtO*X~E|hz4OZU{)Ti^VfwJY*<(Ps|;dU!b^59XA=
ztQCO{igrWbx*$OPP{dwj-Jtfm^*&@O{&FVg4rdn9q<pJ#`_<;pb7-g*fg_T?-KB(-
z);|Esc5hSbcp;bP2-I5nCvb*NKmX5X_Hfe=u<kBG&X=<z_adr4r39NBpkCu#^w~wl
z``ei&yRYk>j#v|xdNNzR>B0DXuWnsE-!z1}{8+9oqk}RiQ#lWy72csaD(j4;C17j)
z-1%IE_ILua$kr{Z>YVO6p0w5>>_6Qu+`ChkX@+4GnSusBu#n{|0=>|FtY5AIO<@1{
z6V~r>*1j!4X1}s!Ik8^T+|<&f1(O=0T_RIUWL3w)^B&Q)TO_Y(4kMhjX!wrLLvtFU
z*5r~%`ySAZYcoZVWzarZULdOa2`>h@5M+*ZMqPt6fQza0?ba^2{%B4*|1QJfWxqL-
zw}_JD2<ax-MJe%NZMx~w9*_QPGrl(_L(}`glwk@d+LK|Qb*Qbea5a&p>cAv&ZxI3@
z@;Dho^oOjqzAT|i*=B6CG58S#16;m)fGn<zz8&pAi^(d(VP5;A^{^4Ehw1VY*~oN$
z!y!CzvReLmY{WFg%()49U>G9qGgT^}8l+y!)86u;QdXYCR;Ht<VozF!07!sb2rR=@
zK*HCVqfnGv=27HtkLFgzq$vZ?<SD-*pkqfaWSfB|>_x1JUcY%X_SaCGCR!AD{mu6W
zKpo_o%l2J`C?dW^AAE6F{QwvXSJ7I27pjSNw~=|7yx}+|O=QKa?y5{wBO{-o@)*fd
zkTM-3uGmn|Kewxi0ciY7vK@p_D{|%D0?@AmJ9+FMO5?0nTy@>O2luZPwWljdd~1*M
zVkj~{dpESTZI;{cx%5FO+khbcs%o}M%=9v0e{2P4vcUP1kwoxL_XnTg=4$lBk<U5_
zk8;h+7YnIZ%v|!!K@bmq;K`%L0bbe3>6yNd{72p_LUcgZ=SX4~o5y7#S1IhAWKT4;
zX9GnjRaO1!%}QmSr-tqDvt2TNv$f#ZWIbSg6M<VuB#=S<%ZQ769tm1Y700U66)?bT
zDb@#!vn5zhnkEBAdxtRDnu;5YJLzzXRbFK(e4Y1XB+OBL&B1X8F_hU+rW|3V8+h4n
z6cvidF`#FOThjYcFvObZfq{{J?4v=&O{AE1m6C@ABGZbA0egIy5>XajoL&^%>&$GD
zcfm6boV6G!Lw6ib=HYk~?r69aCXfSV(U}&)52JjpmhNvu14kDhAzW{e*QUqL2gL{U
zT&-4f5-9Jn_30rG#k<)Ma!$?zDBnRMSz&B8uJu1V1BJ{UKVR}>i(GmyCXP72QKz$<
zn>($)N=wirh90+LH}mghSvF=m8!J8D01}x}>=u8r&%q}54t%}-)mtq}DC;#oyU6RG
zh@jO2AhuZAjI-Ejdio?Ltg0k-&GEJt4a4VDP}HCG*RQyFJz_Vl47EUcWMHb_%xto3
zVjjOvh}Y&vxzJxslo77!W+w%kErGDNYboc|AtqaoBI1-ybbK`Opjn`qVuH_4)IU8D
zMD<Dc*%w3vLV}bUYkV^-rDMa{la{@Wmr-%RYHEXfk^T`xvc%{2tvFvxNVw+7M5~mO
zZYY4QXxEdq(O-+h!;|nuOyL2HMoq_2&Wm-dtNUeUnivFC7|huzF@KFCBWbSwph(+Q
zEEMog%8T$4_3J<b2fwL1T-8||ads?Nl3_|8VK?s9m_0*D7#4Bg*5iHiSp6G&mBI*h
zW#P*b7kp}NlI_MPw?jn5(ttMz<GHo~UZUxpp<6*r62kv5r%ebLQM5TCZSR_V{b`40
zC9PapE&8YxMluNLQe9r$eCDijN=AKwMJ=rs=4)i}?1ZmP+`g6O__ufRJr?1e%z%IA
z=#(km<8S%(%Ffk_TOw8|8vCuK6<uAc;H*@#=<}qKhKw0ZuMR=*aQHy|D)n&a?JeXI
z_x7ph>%Iy5*4@w9GxslMGS7ye-KADQRxrj~WarAOMEnb;K8rpz128TkHgpkSCW|?&
zdkxl{q@~Zk<ZRxpw`liI25mG2_=V9v(&`Ce=e!s?|Fnv)G;B^o=LGQ*j#+3ZC1c-@
zvz=SOmG(8)qhVN1eV)DgVtMl|n15<v_?_JvU!_1`DW~b%{7F7*GY2y2qmSS>h!gU|
zkL?Hq>T4@R3I#M{bM6S&O6vX{WWDuJ$#kY;#`STCA{nf&R?j+6lm(sZLq}a#8?u<}
zyR%}(6j$rjdVB8CgwOU?sq?6HD%6Bsv|>Eb@znWY;|PyH2KO3c7ko*13i$y<yF+!)
zyv6bHOm^&9r|w6H<N?qJS^j)aFve8hS`URgW;X}&>kR01$M*LGB_ngYaC5(Q@Ctm=
zwU}HWg%w|#us1GEI*@4279+AI)Ft~z`t~W&G~`fd>(5=eDp&ZjgAoGvyd|j1q*~{n
zx!L1Hnqq`nkkT|#0N$*L68A_#o2Q>{ac}!@q{r%zqK|O`o1LC3?ipl_xTdd-e{;Ni
zX^g-Olp(BYz{u<P_PT1Bv4KLH1B$?sQlC!zEosJ0TfL*>`W-%{-~%}L*yxfGCnc+=
zs0<oo?K0cOdmeVKL*kg(0#Hf6UB_?owDX;A{UAP8AbOp5e1Ao#oh8<39%`~T5`X$A
zUl~nLChSw8%}&^JLc<DxkE+yilcHA=vpbcp;7m5d8UT!u&$4)aE3aOlyEyRxV0Zuo
zx~U`bI_aN7Mzs)*Qrl+EJw7RTiw)sXExV3yWIA@f8^A?mO#VUF9Z|r4FDQg+{;8(1
z*V?@_=D`g@4RVO1|15%6$?YET-Dc1SfJPEeL~e3Kzmsb(ANOX0Ze=NM@pv(Ll%ouE
zA+vByMVhFvjfk+~Dwl>^y2rv!z2*%sZ1(FnI~x!t``RF=4kU^=Ea$!P9!v7-%t~M(
z@SDd;v<%ZPD7O~dw;IXTnMdJ_0?EvSwCG04<aTiL6{>yT*4LFI_knEIP;=}{qf3>g
zDT_^Rbeas{6_XMl3vw`W9;MPo*!_6cm(lDtq8k@xZjeY<5w*xNDq6nTXvO(cblFV&
zGil-L8VirxcISx4w5EB#;k#^fjeSjZ>hZ7ZlgP3y?5oWn!@h1k`6~+F{J1_fw`^H<
zZ)geilA@5;N)t<+$JdtMLF5W?QtYujc8EUDZJO~AxcJ2R#;fp0+L?bWf`^GM(K5yL
zfHsBTx`e8~qD_jXo+1+P%eNC#hjdl|0a)5>cq$Q5p@;cHcFB|qI`QAEg3itg3-ZmC
zMZ;9BH4j|spKM4m0~twJ8gK^D9_QZ(`<KIoM9#gvMo5QkY?!Mk3YnVd0v9+2^LTD%
zxcU8+z(2lbhnd&SH6e^>z(0bt$*y~Deew~>rlZqhcD6J}JDCR$04eQLK+v6i>z$|3
z`qY2m&HuqqbiBOy?I)&yGXQFhj;F!bnq7^L7rDXTSS!cJmVBEw>x4~9yGX;HhsU9n
zI+?sx&lh0fAZyX-mIt|ncDsH}DOwP#!E`o%Li}nk9ybly3z&t1CnL+}+$`;G;?4KH
zlqi>GtJIyam}M4VoTnd)!0O8CMjITad+I7>*9FQSaGD&ynSZ6tN{mv#v>yeQ)Itm9
zoeo?Tn)V*ts;nO35n(Ot4+i~`5ItIK8pQf|Yvl`K<gNU)-*u`PByKMEGQ5GGN(#$g
zJkeWJkym#;iDcW+jxOADP@DJC0D*`zNpRsxKeU+_Cz;5C@YszjSScpO$)}{`N(O`C
zx77x+VEQ&J-rx+j97nlDRWr?Q0n{xV#G*nov;0^*a&;g>X@}t2FMD{EF%cxkI4FS?
z+4%(x_;uX7*faI6Gnt?GTBEAT(7Tmyyjg@F;H}yyG^9V_#%92BVl(sTIl3Iw^6eUR
zSnje$VTm#Hd~rvb(bnk)Pki@EP|u1Ka%#nq|N7WZgC%Wg?2Pf(_hqX4k$9k^2t1EE
zk<((l%cNtXif@zl#!&-rMiPI?>B`-|Saa&ec>&G9wr>BTt=C~q@WMlvr$(Yua4F{J
z+3?zQcJ<+KrkSXVOts(#d@Ps#;h3Oy^xS3Vo{2C(TblZ$JGNAZXFXmAYGNmx-Bd`s
z!HDG}4p&g@2o<)x<LW@EF7Zzj+Ujrhi8+a;38e2;)vJjxgQ9v0i7M?^#JHzdVcOHK
z=Q11)(IVVZF_IX$%UINj9QmQ_B_BRLX}}Y%`iQk>Cb8gA<|Lf@+JKiYr}4#rrm$83
z??4WPGMipI<y$?}h4TUwXgai3wllS>`_o;#ZwY!rkIMsqqnJx;(2meGYTd=_Kriz!
z(z?~c*YXn#El`l3w?snyCfBu{ItQAtva(Ab%h8kJ{)^|i071=%ebrpbGg787m#He%
zD!GySpVMh7L9Z3r7@B;+wQ@r|gb@dG^#So`Mbl8aDZbar(zfdLA+Q~mh{Ila<FTq@
z<AAwzpUqUrE!`GhgRT&#Syf<XJ4N2aC&J|6`ArdLJ(^i!R(8M7i?|hmw8!aXglnwM
zlayFN+b`~0@lz0Krsv%i)irTD3l5GipURnf>Alr7C9I(GSOZR%czIaAgyki~%X8~2
zcsq-hHr6F*wwv`Kel}{{v4ydf=6V~jYo5ECo0dcfVWF?%%Vg$De%1SI-(m8(sk+kA
zLrmw>dMe4;CGveM!PGy=+Ccw(U?ziz4vqe+1Asq2d;G2v;nox){`<fwU$h?i-|Pl}
z_5q;h_y91Nx#F{XyR^dz-rZ2%)L#3O1u6IsTk-Zkb}0WNDBvGJ1OI+Lh1KqVnzOUY
z5=c)Gr1cKlZ1fTC-=av7_t|Ur-_7scqrlU14}cl>``3Ep0@vZu*H;Lb(c{Q}BDa4u
Pp#SXqzb{*~hoAo+yD?5`

literal 0
HcmV?d00001

diff --git a/tools/python/images/clip_image016.jpg b/tools/python/images/clip_image016.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..05dc68f17f3a7f0554712a39332fa3e21cfac93f
GIT binary patch
literal 6056
zcmds5c{r49-@X})ElWlrB4e4zUbbRLh9S#Lgf`i-WT(bHrR+NiNn$M7l4J`>L`1S?
zEBls|Z7__PZ%94w@jT!0zR$ND$NL@MJ;(LWegEeCo#*+x?(4dKH@TlY3b5%QkO+W^
ziVE<e`~dP0paC#~!3<zJMh1EYCMHH^R&F*{78cgS2M+G%77`E<h6+FhVG>HxFfn;?
zL8$B*S^1MHYHDgC(g<BmRUIX`n#v9c6%!K^>poUKHa0$$qtK%&|K*GP8epdfPEx-B
zQ3(Rn>{K9jDsm&h4**oO6t+8r_bw`G5DhIIn4W==i4su822fLhK-4rKT3Q+!O0+-a
zIY7fsyZ?xcIvoef0xakTk-Z(4K@UA$QqPI*TojVCbPr@;JaCYUn@3ng6n0eX_z8Ih
zMI~j8Gnxo3ZKTdQBjfWXre+tctZlB@+SxmJc;2{)@$&Y$b2lhBBs45MKH*;C{iNiS
z2bs96?3~;udHK&vUzC+sR93xw)6n>~srg+?YgczqZ{Mf>fx)ryiOH$ync2CoOUo;(
z__g&7!sd=HDgg9T)^BBh(#1~EMNLBkq5<#dqN2u728f-8_J|DKesvVs!i_^v_BK7_
zbX-PBJp)t@y~t_l-pP1C==hlM(vGxUWq(gt;D1ZmZ()Dv8U&a@RFvX@*a0{&ej<H?
zV7~4kA*q<#0}Z}-Grj#JJ57+Pr7HvUS>BDG4+FJ7R(iTeDoZOum$KGD7ruWo*rFX?
z``FHNY)WF%TULnhYL;t~b}}7JYi*RdHt{X9=TggBKqlXeOR;_;rvTp+k_?<0ArX#m
zWX5XOLCL^~NJOy3Nn&$xk<i&M4#ajskEqB~&z>kQy@tztFZZz{WAH9y!0&-;xL(M*
zOAnSI2khuSO`NMAvhr*1!Q}p6_fbh7Bc1j}V>=Ycz$?AOM(Hc<WZ*{vvbL^fB<;|U
z;ujA*?h8}rH!{Gwe6Tfv<XyMDY`2_61|IDq>n&_g6KB7Sd~x{@<0Yk4N`${N7c(GC
zlY#XX++Mi7LdAOO3eFenU8k%vT6?%-kqjL1%Y)CP?kI@)6`r+=615As9=PfYZy*N!
zk?7Q~_=H^?g>tu=nR%_6gSY__|D;r+C~*b5HQD*wq12(Rc0%W_h{KClVr)0Cv9Eq<
z)7p#fa&h^gFoReltps@b*8g;Z@Sivlvy}ef@6y^;`p^9O1`n%K{%OcR3;TE3`mgz8
z|0e&nf6KJ^m;C?F8v8#c7I!bNftt1Q=X>UtbuU5`E`>jSIrj;{zE#%kRr++&S1nD5
zF&8ZarS^9R3;b)yR`cdL`{r#!GT=o9besSPN)s^e?LYA*Q6Q>TQ$Z^wvuAwhm>UC2
zbG#;vV0sP0>jRjRZ)P#z1hcfRhiDQFT4&=!e@{vC$8)~7t313aeE5iwAGKzN2_=H}
zhUtD(RUAsXQw@!lF@>xLo4yYdyaBseX#=%iRSizj<`!-y`oa5B4{rA6UcxpvY}=P$
zi!LPl!G<`9_M3HWSLgKk-Cp4~yR~#&x1#X;eNP&7=FaO$u1)B@@R|swUr>mfo;;q#
zpBMvH!b2CIIw4FMod8d#Tzh`Yo=GIG%}I2iK6U1h+Y2j2ftCIlK271C0}7vDg5h1m
zNb8*aNnCV>2ah}tK&v`ecGP<G7}B+m+buSsxz-5@tp&1e9z0;fTt0dzrYd)y$GCbV
zOPN<sk>0FPug2;o1h0aSBL-P?S>96k<~hV&C~@D$s9Hf6l95g5=cZ0%WaT=H8&Sz0
zzTh0Togst~fweX=gh?vER4u^FrXJ5gk$9DpCdU}gGiL=Vv-cQ!&$k2D4Qw1YZR+}Q
z(Pmk%heg}6-%E>6ZC;f+<*z3ZRkJpSweXB!PPq6zN_4R5>ibBUS=U3g)@TX=TzQ2P
zTw8$woHSFJh91*+o>nO;D*ljPz1bo5tp&)Td`ZJbj?2nR_bj42))Z9D&AXH^*X~i7
z{TQrO;qzmwc20Pxs-~`bFg_gYf6S?E4davFAXIgmZGlECX8{MWX>sEPaq>Rcf#S>7
zXEU%u+^Wtpw?92xnKy$Nub5|q=kyk~dykjBOgUA#FwW|jBl~=m8@O~Qp?>SLPmhUs
zE+WWEZ)nO@$czQ0u`ideNzUvA+I^WP=w<4glj2TiB{Cv$2z4vLhknD-!`dH|??lT*
z8cEq&m&dh;3$P5V7%yI2&(`O7Hq<d@y1HMVO6Yd?U^+auHubv`(mpq$>cA0>G00u-
zwLC(!{<rk8`X($(FOyQl=~6H17xwN8it|Y{j-MAQ6p7d6g;>S%X8n(Pu&!HE_Qa44
zBOQ~jrqx;1E4ES+F5M*F-qP!OI^$+8&_G#v$xhMxoJzwDJ<*EmbNYcpI<XfD3rkj5
zAB>;7KYYj)T26l@P0may<YfyP@Mm4jrWRCVZ$-{h@sbh~uH?LG!O9Y`@2Y%rxh1D~
z9HoS`3+gO}IZHZ}P6dcO9oD!u-KyO+kR`z;pby+wZW?cZOR{#W;=}#o>XC|Mzz!QE
zVnO<rzW$L6FdicV#T%|`D3Z+v8HjuNz(4B~PiU+bv@rf<Vp5P^VXKJ?_~ErDbLBbR
z=SmaM<$|9l`R7@BRaVk<a!LiEV%)vv!HgUG@c5i-tlxjK%}N;BB-VGwtVlHzqplv4
zIzIotDtaihy$lms405JJxO+Te9XlUXo@9%)H2KyIUfHi^Bdj_**f<jz?=vwgbBXku
zoo`c|Ra>xihu*O5YQ|eaNU3>S9sjbhZv$uHTDt9R4H@B(wsUc>o!wQ<uS{oDmEHf~
zB6ed)q}RXd0|-E1sTFgkSyF$N16|rUS?XPAfWE{*%<ryH_Z242FHueuv+pFz<AQA)
zcvr#`S}RtT5(lE)zmNfQo#JF{wdnRL{7q#2_5l1x1__czY=>C&=~DhBh+w?R7@mD6
z?%}fkQD~^-m{`>CRXL}Ls+?>5doE<7IG!0J&B9&-fk@($o=b7Hgro`5nef<ektMuw
z3Du7X6BQ1|(d<Ef9Ir(&`YhONM5e;k0^Y*r$21(oyz0llDs*Cn|Cn#{PEw7j<S}6*
zmPmD_f&*WRT)avMCmHu;hwghAoidZ-kQtixEKi(YO&#8vAe}yzK-%OVGvRh$<p^{b
ze!ru%dq~&$*^7LL9#^8$WS$Uc;bV{S5wFxA-nk&&%26Lr9J|xL=a;iac_TRp=;>wX
zfYgk641vAV;PbC3eXdpD+Br!5J)2-}{wuKQWmfCDv;g>|hNjM&<4qe=mm6GLK+sRw
zA{KMyXURYf{7tlrz?v@M6GgYDahyZjC8q0<O+oz_MnzGsIh1nlJw6a?DwpVS4-gLL
zfE@Zn44usk$Fb!JZN*`$xAnBp$aO(7fZ2SI^in>1(V!cD#uKH93Pou`fWJE?NKdde
z*lhWS*k0E4nAMz*RGJV&+SHCwqFr~6w$!+NR=grZW3}9U{cJCM6Rd{B+LU7N;36Uu
zW)Z=7(X;4<+fc^LVCpd9-eytR_eA$3_EstDdmWYXsL#pPkBCyBQ#bI1J|ACoN_uYH
ze1HkDOU!&Dqa^0)>K6S9pRVl~KS5IEpVy1ncIbWzpBN*(=&$Q5I;#qC3L_ogHlLRE
z%w??{L2cXPvK9TCHe=?EEE(3{3UfD~u-Z~y@k6d5*Uu^-PL(Dd#`WD2?=*owmm<AA
zF*0DorbfCHX>#b|Z~!ODIC;z{VV2I03YKjoA#m0rjOot2Njjn`+$8#1uxv^TW+IsB
z)8VXh$lAM;I2;T`L3yh2O)R5ww89|WmK1c>m*MULvzq=_*)Q4>F<lWd9?ex3!k-QL
zcMWN}T42Sj?mFH$UX**8VJhsZHxD$VKWnt}8gJ+K^O?EPP19EN+Cr)xPLT^^F(nz_
zoN@6+*S!@qNTrPWBOaQ`gd}%HvBsMR(MW1~A-b`^M$@Y@DoR9$qZ6jKUR}nCh7&$_
zf;Vl;j^w)Cqnr?SP6;~$=|ZZ7^G~7j#MJpTl@*qUrMa7}bYH;<<M1Ci!|#upmZlO<
z929!s;)~aP?jJ#Ir}w@ls_J5H%TRZ=%#_A{!2NwiIeqgL2NlZ9?_j5_qC5)Clr~6f
zd)5A83yHQGUS7BCvMg+7u}{WPej-+Ta|zXSK;GmUg6kBEuMgTKhU68qHQTwDQLBYS
z?G~|hoeYRj-b3U2yh_f6%1%FGmU6jAkJZ5@w$mwXjj{S(G=j2}&V*7nfhcir_-lue
z4*r+slPXi8VQL07Uaxtv7b>pjaXV|6>#)2_If98)jEa`Nxjtuv*&}E-!cSb{U+>KK
zDXJ5+;gp!BFeRPoO*QNu<j#!dWUorRwH)kzS;wQt_0Ze{=K|qJ**DWIu6wZ?O=WHO
zH+fx2QzHZT2W@*WI6v%4BX$IljTof#UsS%79YC23wA2we^iM{qN^mX=7babN4#&jB
zPQ>=N9P$`h%$us~ez1mwdVQ>M0Ht1i7jjdZ>2|Nz%-kb<#9t&znu~6LOJi1wiIl!9
zPF|p{dw**?59gQo0^Q_gSKfFZcg9gG(K&;XUPr5!(cbc!Fw^&y72uO*6#)1&;)_<c
z<)6IP2-<|dR(Xg`!%mJ6!!QN|R=!TY!i}_?>yY!q6L)&uOdNl#)^-RFMs+ZXKe=m>
zYQHa9afVOEw0eTlUP4Km$M*oc-KIbP15$}go>@I@L|~?uabloGGYo-*Cd8kNTg_KM
za(*j5C3o?RyyTSCv!p?Vt5nfd`F}?Hn3&duz4Z_7VYbfcm`RuksO51pGn)x4fWVQ5
z)s<$7FF0<d-Mx=t&oeyad+dtVdDvq%YzQ+$q26C-qPt|rB8&F#dN@y@x(|=bQERcW
zD;FeVydBks5}Ot4@$-EVQRiE=@t{Ks-fg(#0DA?_LCj46^!CrzX$evqZea3PF-}bN
zNI)w&3OhSQ2hfsE8!X)Jt_UUW+!l${+4UmoW}XlWE?9MP3Kg0K!z{(sq*!?QH^)EN
zD*2_49{vFTusaL8y76=grqOGpQA-+2%ni3Y%eR!{FB1CfxL%-0Bdghab1uspBw(!e
zn0FCbu#kZ=3`}~J_9;EdGZ>}GX)A3M@BgmuGbso6Yn|8qd<eZEI=7jr49k4d2+Iry
z{_b!i14i&K@UB$-y^g~U$P9;3`=;#ItLEZGn+-GR_B%6M8XRr7YjsO|ZnKAJ-JtJ&
zzC)LY<kzXC{JOPEeUVQ)WZu8)idS9x&hc1@&mjn6bWdl2IodRZO6A#6D>g#J9g%>*
zj9g7pnS+s2cKeihN|j<k-F)R^jd{_@qpvldt%#!fji7t$x0Y0LZ9d7EvH~QJ9}$Rb
z>3CK&K-8C%begiWF@VYAqY^Rk8OXU=W!^q&|0N_P+}D^`K(Sp8?XTbG^}|ZOEzAR!
zazGw9^O}7llB1aup%5-V%#`pQM|9wUO|9@0+1}u!z^BO8h|9KYz0sg+0zbX3<K*>D
zHc)$Vxy|<z_tsV^G03V@@3P~&dlRA<+MVd!0|mT>O*u+DXT(z&wp#fEWab~OuJ%5@
zI3OxBto@u>7724)dE~C_8&H;@dGdW_G3z8{^H19MwqWorb6l%m#Wh+}mP1dP$NHW=
ztzgE;wsY1*Oh2ze6DFC@pMZ!zeLMxOIXFSOhWq-lEK53zmf1PpnTrrULzw9c#p`-K
zvf|wjEOQ$j{Lm++z8RCeWafz~cCCTO1;D56-sXiC7d<s;YowEf!f%8F;5p*gR03^(
z@q>J%fUB!&87HTNLQl4_1cZ{oIyt1z;2kQ|-<lYtrOErsYOR1^^4HrI3nt6O&BPWN
z*245y9NT6KfiXH<$**Csc#5Jc-jd%8egts@pRmly2XQBal$p2I@vjIqsy+`m9l29j
zF=dL^tPXXF6oSazWL!IWjXY8Brw{jH#faFpgk99#1W#shNo^`W>z*RA0}I*Bfa?6C
zz_Mp|aACVCaGBBO(*K`{XT>E3(tLner5DMDE2;O@Tte%N?R-TD`d|2pfF%$72QNtS
AQvd(}

literal 0
HcmV?d00001

diff --git a/tools/python/images/clip_image018.gif b/tools/python/images/clip_image018.gif
new file mode 100644
index 0000000000000000000000000000000000000000..3995b48b3d4d0921f5da9eb4320a61e43a507aa2
GIT binary patch
literal 70465
zcmXVW2UHW^)Anu>NPq-FPpAn^KtMv1A{atPP<j(E6cGiHrqa|6geqM?x}ho3qzelE
zLPtalh*A`!3yOk@f|Zy5`My1O=iD+oJG1lLyXVdwGYeDgqh6D+Uf?PK5dVqTU-ApT
z>J@rcmtcL~C-hR-L6dOG&+oj?SwIOmxc<kPNK_yaiTMBBc&viL0mWm9SUeF=B2f-n
zL_DeuRZqu8D-cn|^=%}IT@F2<xT-icr<kaqU}<T&ySpD2;HIE!X%|2zQ79x`5`}~h
zaI;LMN7uJiQQX4n>#K@$a<F&>5=j9|B%)B*svNox{XmQzo@9x_V^JvJV2oZ|Uw^<?
zT%Qx>7UrfK9UWGc6V}~U6`fPuJuqNrsY@b}Qhg}J^;K?mx^B8eyMVATdNCe_+I_ui
zXGf&qv35Q_J^|5oZgzNG{1^b>vBe5_qEAl!n%kD0E|I7}D#qFsV-L(5Ad&E-1KY4+
z^Z2pUwR!qMZ?`2rteZZMT{|fMkB`!AH;-*QD8&aoV-z=^oCCF^>CtX$__ZpxoCB%z
z`1&gR79O92kH)vz{jWb7UmuMRz}w;5*4)PMYh&|xdcE6NJ$?*dKjxMi7EPzu*XK|y
z-Rj%ge736Gban0A>U}Ip-2?RppSvpHK$og$dX54XwdH0<Ukh6c$l1bMZn@#@*1{+x
zg(^Iq-WF!p7M7axy1uPD%&i`Opc=)ln1U}3z+>?gEQL<844@ZN+KSikK9=~c1EaR+
z^OUuD3O>g&8o&1cxhY$A^OUxOa_}&I&Bx7cASbMf9<YYr!eWUOU0wXzfo)YjSZux9
z*6aDT`U4~Dw<x+Kg)n-36+P#5)d5g)+6LCP!nSh4+P1>h2Ey6~!Yr|t0a!~^7`<(N
zZ4HZh-Bx9(8*SHpU{M%7+GjqeI9iupZ%H4sv|Br%c-=8eyaJZ8mAdO=Nt_=W2n&cl
zK!;oRmO?k)t$PgBjSsNY?VcZVLq%`Z@2;&4bXOGz_~5&@s#3eRP(&cCyNaTKt+%Wi
z3pfy1HHLQ^3pju)o<vC<AY$8cx~quwMEn74QCqtQP69|I%VJa&9h-{%AFNSC1w24J
zKsph%58SjF1`flyIkge;4%!QdQ~_~5)UayoB8T7MlML316u_onNj(M_dcD$11O@E;
zCt#+Kvw;#C$C4U+ETCJtH~hs5ed4%sP_Qu|MdK6L)eyY)3dTb6Z$;>I@Dj)=!d3!z
zS+{^&X~J#!R45Zh&$;Wv+ok=?h=a~Un6Nmj2B_3|Lgl*DUX26@5MyhlhGq@a2xNCD
zcwN~#Z&`apt1)7{TG1?dE2lp~k=avISZHM)^Xg__GQLoEc(|{Qqd`DR@shQT)6xk^
ztsu1j*Vqd)R}BCirRetY>8lu$XUwi}dolozN~JZB;%|<nX<QKbb}5+0UD91<e?5S@
zct&c$2-mw%UJmdXcvn~B=dlJp*vvcY+szwS!~)9Lz#~~4gh6g6i_ku7+V(^&hNOCy
zhI3HPalgp#sKE?@X^fvhgWGxHX!ssr2yE>Ta=7L|Z?cC>Js-p)U)h*KynV6SO+u^@
z%YiwX#ER9F30#M<nS=`)v8!<da|ynJuV6@gaIhfn5*wAN$4;L<9m}QF3sWgHmzwf~
zkt0t_sF}0O3sfrO7J;BoO-B6Dw)uQUB)0Y`2}(TG+o2niDWVk_4&1^BjlQ?G&evXe
zj^&`LCU~`O5Q<{aDFm?ML4@Cugr1wKlWVA^CevItjqI7|BD>EPOZ6_12aE%+0KzcN
z3B;1#?J|FbaD#B_Ju|>KmSWm|;b|2hAoF;p3RF2Sdlv1Dg=O3M5(YSI>US1vYGuf=
zkj1Om-qN+UkDniJ_nrXRP^=S2xB&-;EJtOCX=tfj_xrZpkMFgj>_?ylB*&pHR*#;f
zOXDGSf_ZSC9!M8v9+&I_Xacxl^#p+~^KF>=R#KKKQZz+rAVG*)7kX*ym}&wMw)rI%
z<D*fI^%t+(HbfKuqk{HFb`tXYw$_zsQP#yk>`?`CYN*qp%PcJF$X*o!#k2hK2oknO
zRYM+aTYxz-N%o<-H}sij9Cd|%0a`~~p5CDtckL5;OsDH8{azeCw*V%IEE5k%`&z-r
zVCJ7NF!4WOFhc&$W6NYK<3K#>V_A1*^Xh#;*l_Z<UD`3CG4ZIC)#zxCK2Na<B=GYe
zoAV`^9T)|*@3#pn`D&#D94YR=G2s$7J|@b^-}=(~ocAcvP}gep5cypk0y0b^$}mDv
zN99{Djh7?;aF+)95G+tN|6zLr&ctuaO)34hxt?fH+!fPX6hO23M*{=VM4pb0VmX3M
zdmX*{hWtz8iD50eR2@wCwYuBOZ1yC9R8U)t%v4Nh<W$In(EolV?nw1^2{XvVZ0XFS
zBSr`=EGXKsO><EKvJmD(0Hr#!w(n&2BxkcWUdhPCmoZf`W_UJI4fj^Dp9Ox@j77g#
z=IM>f>*20@AE&VKX4t^Sgu}e-5?>=E2Qx!($B_9<e<uR+32DPRN}!l)G2pTQzhiPz
zg6Eq%u$_hMaRor-2sh~uQXB<hXMr2G&xR#}+nvoXp@sW~b}Aba9R(Io-u9*N#}G2|
zi6Ov5%R6}wR1-(94C2QdEJkJ`mi}{5E_>T3az_yG{FNQjjaWM(dUTWu13;us5<50l
z58R0h;fr)O`B!%Q>R%h;e#c!2aG?Ra+H;O*nFMwuKMznd16IAJmUi;_E*1{y3(|1v
zHRYMk)CDi#8JB)l@c!79zZD!@;}uR)<ybK*d*H9Txt3t4wUAO!B#%8x=lSmKHwp;Q
zI2bzfZ>cnty|n~%DL(e*=j~gnM!?$`<dY5^+xW(tWv~bni62O-V&AhwZEW=kkg2k(
zv{UugUF@i#BA~<B$`Q{>wjTKgG`Khb9A)>+?r<Ifj@dJSz4PjloRECiq$0w@oL^$r
zHl@Ag*p0|_z3^YR<zm8!AJEmBwG(3?f#Gs2YT(n8UpDbJS=I)>@97<$we3;-eBZI4
zTH;xhb&gt2)vVGBH=ZeH*dP;dnX46hZgjFeXfu=;f8VdZGxHV|?gOU1=dqXJr#0WY
zr>Y!aa#{Da+<rLooHXGa1@|le+TxC+{;-jVtew;J*Y@funGG}h3A^}41U8Dz3Pl-t
z`L?<BHqg;<**3-Fli%&fZ|XlAW3$01ym3CC;02TBXLhBZldHJA9vIs`^4uL3u2L5K
zaP9OjkG=V;PxfTbMZbBa9k;n5{TT*0{eI>;Gw9qSG$*ISkBtwMVSs<DgMnt-Mfvdj
zJ~3_};cy+u^_Czil}>@{ia|U#@;h}kAzn0#2ky@p{Hxll<2$;1y;yq4V}g`<K9C-b
zg9eWqAOQ{5viJ+!u+G1x%&M4lJiqIR+P|iZ`eQ4HBxeSJy|3Aw#jDr%aKlceAwcP<
zpJMIL{ii2YTGHEORu28%8_{$M%Xn72s=2>6VwV0j^M2|wUes+6WQ5*oiOCS$s~9m$
z2+PTeSrIJmdwOFnG`r}Suf)raA@_!Fw<`kuDKBPTRtfD?o22;~_C=0eo^QGbPxBv4
ze*GF`f+|S?677}_XXNMZHpYEwNNLQi`aFK#Y%0J&07GqQ|9mNg?O5RNpq3CW!$!+s
zFXMP)!+cY;-;trTi{7C*Zx{WvD4T6oaj{dcpXSAOy|=y;mBE<*`s6BXCv7Wc>on)!
z=iTnDxveOEu?5u7?xzpdu8|M<7m+U>_HY?&#qr>#r5PB$yNzGc!~QHw@aPQRBZp=c
z{aIGnPl6p?d<vMUk|lpdM2>!6MCHE}ThqMp^O?u>u!47D@6{jZj9nWGW4tV0H@x$6
z{Gv+uokM>&^qe2DipIk8KmOS`{@<?`w^eqk4$rNdOhvu8SbDkf^tF!;8M?3U1%<ad
z|NZ1~^wI1MiQR@b9-E#oeonuezfv$H{^|N5y;r}-c9<VyaDSO9T^U@T-}Z|3GOx=G
zI-iUjTvd5E*xFS;)fxKiR?*jifwrdI_%A~Nr8DV=^c$XwK%4AgwTGQ!ntd`NeUQ07
zmg<GMM~bX|aW1?`e?G;V9-O);dS|ci$iFYS4`NpI_J0nad-A2|$lsOI`#)d3FxaU$
z{`aHNncqY24EDc<a9N=q=JVN*VSE$+UH{&)|Mzpojo&YM_J6!M^Vgh#;A<Y&X500_
zHBgb?RH1JKE_erkG>16-Ay^W`xd-vAL*E(!SU!Z1rwMA)gga<>c^W&4M!?aqdo+n^
zh)<rz)<F}=ry*TvlKuykSVj3bdH+~-R2;WUoOVYXX%9N0&7*>fBPPWj(vFis(WTZQ
zv+7v({5V1qq%=%dou*UFp#~=Ko_XTuMAY|O&;HjutAp-6lQ;gG3Z%5TEZ_BHI$Up^
zin{so#_>t*3wtNF*%EhF6Rvh>|J_cQjMux8uSYev{d<qvB<!&I(32u~vE*TV|LYqa
z`<Y<ZQLOfMRO?l}AyuxlOzt!m$|^ItXc|S?PA*wbuB=YJ+mT#0om_}ZsohI1OG;_n
zORhwvRCOfRo8N4#PA;y#`CvVzG$N%Fms*Uw*;k!Bl*zqxIf@-%%g@C8&BT~)r~2JJ
zJDKk@7U6d;&-bN&nkf}AW}fz83Rr^!zsYEa;Pdaa&(7?n$>*JQyn8?~EcpzBA8Fs$
zecqEZ9Q`sZ<T7R$Y1`{*Kld`eSNqK4GLQ-f^;GtS)Qx-$=6?E5f5g9qw2u*4f=@GG
zKQlJ9vwr8Non}wN-_H!s^A*Bp5YA`+S<n19oc$+1TOl%2p8fpq^(^~@%$@y?Y}L-p
z-`Zz&2eU*t(zYTp<aBa==if?h&-z}SVX($Clb-{e&pQ4i<J$u2??P0~p`gxqJ-U2U
z$+@8Bb5?YmRnNJA7vWLkrnV1n=Y`69z7n)jd>>hZ^vby%IVyB9WY{)f#;WaH<cmtG
zmqpad-P_LvtPqBtjS#kH=(d{_n;DhD$#!HEy!gegAa6!RG_0U7*?P{npa@^^uN7;i
z%JuGSq9e#=pNVm5D|FU@)fXJ`h_|c%S=gC-n!0C|k>$}a<7l4AvAlLefXtSMJk{u0
z^eFjEYF}dg?TBUi8ELA=V8M~c1$q-42|p(a8_pNUxfBol^!$Cf_)TZw-~B1ufdZ#x
ze~;D3+%b*>_u-P)ucCI&mUPJgd_A61k<rt69?p>^zj~A2cHSAEiQ23x`5alom#3F;
zE@@oB7Or@Q`$92on9+|YT2*kK{dw_Od;E|4Mecirk2n$#mc?xw(GBMjTnrvp^V@3)
zj2qgei%yBp*IbzG@oVgfA|r_lrWIGK%XBzTJunNL+$i@lw{qVrs-6m@o8LZ%bF1Dh
zia}K*%G)Ask9-gHJgwlVZG*52Mt2R~{_@@7-7A%mHJ5J#RI^_$Lb8nFvq>tBd0w-w
z!<~n2e)rIAvle{idNV?AP!=#_p$ugsyC-=B$Q%lc!pk~A$tKSA!a(wVVl~auISqb4
z{H)(%eq<k+?ZwW;l&jHko#o;72CgIEZ@Hk9!V;&pBS|B+H=3%$+iL18PknAX(xjU(
zEgKCfICiK937VYBAl~o#Q0qvmF^cz~q4L6JYG*d8yE*OttEp}nsmUrV>ak4XSjZc!
ztiJu;?m6e3%)$s`x7wcBn$LoDdC2IL+qIc)dC99aFGT7ed}vtyRrF9ga_M_i_Eqk0
z3?5U}ywSV4(f4zsbZgp>)q|X#4<9<Nzq&tF7<lJiP2`40`PHDBsR!-%a#MtFH%{e#
zjL-Frj9$_#d#~t;$O1E5upNxNv_4oi6U`Bfh--xBSIN$b1no@T`MuwK!jaap;?~0Q
zGTtJ?A~OX~55ss3TRNO1@HVYCh3os=1of%lE;jHa%&HoJf@NZ2gHMl>P92|=eJkr2
z37}>!qbX$A_wP7KEXpR6gTO#y2?i<z4tXk|55RmHL0|zcu}x7{<<_ObY*|=>6rf3l
z3sF%#40senRG$pTs={U%*Ar&7_s!T88xK86X|aD+dt<yK{0q3WeMC(J@TNY@H4J>F
z4(D++0+}#A0JfY7;J0B>3sRT$*g04oL^512n8zUp(^ZZnsGds11qBS7B?MS;Fk#9J
zq<+O?4Hhbz(GgCgTzb}}frY~xk#|2n;>C7@2kT!)QN^j*8=xxU?lborSzyWu!S(2e
z-c<PG$MCaF9g2s$<gx$x05S*ro=`4PAHu`r)x@#N_-DT#@x6?CB?1^PfK$I8k;pJ(
zEQqy}@Ou2DL-Ywu4A7^-t*}ps08FL%<oH^}vF}fE4J+#3sJ{<*C`#_=`^~-ixa0j&
zm%%dpsUDvS^Wss?%H$-*Uz=P@p^Lwrx^7M-%nanGKCQTdOUQK2^F;x#W&@XX9-Y5m
zcDN?dQK=%`vCcE1U5M?Jj7Z=a*XuFsCn7o$4Z1>qP^wZo9UW<k=?V{J022p8(1?l7
z?SWmaeDX18NRS3sqk?HINHwxxf*07a-+~k(NQ2B_3AD3tV{>`Huw)=E3N0$MeI(#1
zOo$5FtXiy6*rOUbDSFy=RG?&%OJ|$Yh|J#7iaehaAS3fgrWI~W7Lg~zWP;JE)S$ep
zJlr&dhbnhRCKkv9wE$GtLXYwna2bMgW+U|=Fyk9AoC%CC!W0(3G!{$;0@ax?Emc@I
z0Dlj#Z7(>AazVxrL8OthHW<!s!$%i;q6QI6f}8bJVT#WXr=`SgMeAh<9KGY5?=Jx0
zvWx;5E=pi11K_R=T$RBwuEb%rJ?gvIQ&!%sB@MiPdpgRZMy!qtwE%KYYR`>=+S}Z|
zOf(M`l?Dmp_8DB5G`PrRSUrTpUldViMHy_r)BxeD<5nph&e5^La%u@`189~Kz&XL^
z-8gQQi4<bNRUp6zn2=XRIFzVoQ!#G(Fm4D2&y1>Zdw`?DFxTwUz5yE0OJj9-@ZVl<
zGTaJK(#Yg9!dOHSIMm5{;sDVmbn1I@uJoL?HG^=Xz6Rq3+hsH#1;#YHr+aBI7Y`PR
zc~PJGl3Sc#)8kH5P1CD^2YWR<J8retIvYJY^GX#5Sv$EmhjZ6WEC0)>jkPG|2#Aid
z%<I(8?<lFPsfnyAiR`gXuDJNN!FurK?SPM0u<Tj=RSN9xi}|%cOV<S05)K<?!PNK3
z{8$G3y4MqLRd&AZxo#><jfvn{0L5(ta111`GO*_bf6d^~%0x=IS2mKSUoRL&GLQu%
zZm$z#|CB4(7vMfv)h{RDSnub2gau71Og9srwD?XELK{G63>oAG;1*u+xDsw0Yi>pf
zFbcLGqQZinyK9rjyCX4#ZIT8Jtx&c=U?O4}@b}NBG|OP!WO!{R#uorhAi?vkuR#Ln
zvj9uOcHY?TP4)2uI?(ce!FQ_D9IS`js&E(qR0Ci$BzWUEV0~6pp9;%F00I!#zdCp=
znf=|h#6lY`ELoVx8%(U=SOQSrdeHh<5VHU$Hg<qil;nZ`cOh_z3LXj&By6(=e%pvZ
zTyIU;eN?+CZ^X37a5)BX=?`Z%S;H(;h7SwB_XW^`xOACdXnIE*7H;tcqiRER^AEcC
z`30d7evhW^#O9b*#&yVWQ8;eBmm9eGPb~AZ`flc$+XPH&%pRk>ddu@Vn0c^9vg<Z(
zWYi%Af<O4Oz}b_+-7H=<eQvQ#3=j2v#=nC}A@;t!TiZtX73Px3$Vn6fsn<A}#5vUw
z3_l$L7omZ|7u!U8IoYh?OSBCE2HffK`th{e23-liLb)B7z_@<^@9Ex0ZWRH0xuzrW
z0o8+(KNH9o2FH>UlkyYe^Y3^JCmlYMB(i^|BFH7#GUaPjlIPKs0lg39PmW?&bHVSS
z&8eNpY!)vp7@c4vc&Z7e8FDaJIW!1>)Mo%C9*A{>FK=^yd5)H5?Vxh!`50VEfUeCu
zIMxRC=CcbJ4EpiFl`@gPO1LNIK!6AKJy<{ijOkW|&s&?+(qK3y`T=3B`yfET;^bj)
zoxFr8?BSBg1a7K~3(f6iNdCO>#lwt%2;7Q@+{Qpq;Z<)IoY}S2u)rG(7skS@R75oh
zhqRa=cP3mP3o~l`q|8M79-wa=ycqy|7#iDKMK(Z4?9yjAqe{~dR!ac%32<yI2i53j
zOaG2dlNsN_c9$1uxL;qrwJ`U^dW?h}R~YX|%ak`}aB(mejx0#YFl=Xx*qd%zZW%ZP
zJhi`EB$<Ex_iNyO;6H9){O}OKmkr>j`OBSBk%W}iq;ca|jK`Lzi#ADGG+AFOe=m4E
z&Qux%Ez?n|<uY6xyg*aOx{oPP2s7<E;ng>LSki|q=0FT412Gctu0GY}5(tE05FG?B
zKd2G}07kOi>Z=^9r*xO^y&~3y7|B6``dXZX+yk|B=nK=77b|u$A3L+|IZ*{C7+8C$
z%39YcRsmQ5ILj$s>szS7Y#l(OmxDF(0Ge^BC!G(S`HWrx3F$=9apip?{uOR)Vp=-o
z!TX!H-mh(_PiUU?0%7HZ^oi)*fcd1hE30$gFCQ-&JSAd=KK<i!L(fIIyPu=lN9Gcf
z`CoYEKipjyPqW|s?D5-c{xtsy{HDRR@VOzj1qLJySK>qk7Y}`g=}%-Wp|WAvICe0q
zX$>J@w@v1>&?R^yn_&?HG<**35XdRN&c;p}^fF99$Vtb-g=8$q$zld9*+X)r<r^>_
zw^0x-619y2Io+u7Y&>X%i_pD!Vh@P&?I0GAbk~6jydA@kQ#Kj!<q-*{s!Ch?n-4*$
z^D{;nSH&9ds;T*#Gmhn^e=(BHZ4d?GoEXE#1D^SWWp?XuLngb?Nq^b<;J#Rr7nDk_
zSS;Zu>@n$bGBZHAxg=^uT-J?*;E)l~esIUPx{;ZHle+d^{J&7u1#wcK%Yegw{a<_l
zMSjM8WEK6RwYi*xrc6Ayh->bMevnLDE;4`7Bs(LNWz2(sonJsWxtt&aNZ_a18&7YS
z3kdgi-N2KJ!}&Cca~A8w1i;0G4U#&J8>)<cqOk(<%=6aFl;Es5Y24X}{zT;Yd|S9X
zU$io*KdNeHJeqz{1|sucgRIf+!?&LUE3ir#P$qfte$f8Yxa2mp0t(B=E&A|iQO;up
z1<$}~2qE?M`CiGyW?*kZ_*egxas9zBhuPyuJ+4;#c`znTY~QoTkWaws(Iu~(d3z-Y
z;q(@mMki=ONm!Y2lezF5s3#LmYF{LXIpND%&0n2`tqkK#IiQgG52qhr7oskR>C|US
zHR(Lm{qv~dacPS_6A34L8DU#L7w{as<P!Eo=iQ&Kdcltv+3#2+tFhpLu6}SG{*D-F
z;>eaBIE)wa2wCyFa!OY6+9+^b%y(HIP!v^Y<n7mL0b>n)%)L?(21x{%T(G>i;G^Tg
zL^__22_W>{8$f!2T7+Q+E&?wtjt*T|Wy6M__vQI?mFk5y8m6M?Satz8Djeu0Ar=s%
zVd#fb7C`D^vK^f^LOM02IwEZNMsW}>oRPRMo(afkG@5cE{9)jVA)gj?5Kh?Umh4DH
zMdHkKEe`>FVikBU(R6`+1Sn%=2qU<70q2=C=M}>kE2<%v%olyWRsj0|3*h^wlH^(;
z_R--4xo|T4_D_giE}xfAu8FpXpyC>EaThc|T42@!q+wxru(#$Hi~;a+UuMwd+vyVd
z1TB}cMI?Ef45#1CJl)C!k_a673otoy%n)2`frk5IloHipSVAT9TSEZin7Uy*j1@<o
z{;c~>A=%gl2fzUii^#?vTqUEr_WNCS*_+<q{AS_=rG{@E^_RjBs7=ah(!g`?SK%uN
zU@eo1N-efE^P_{J^nCzki|El-OQv~=K<HP#KAbz5^;a_&FQB|-kev6qY`p<|hgxcc
zRWF|?gY4LC!^!akKO!?h$ii(i;rb{Tf9eA4be9PJWKyimEX00vAxpD@3=>@;9L`lg
zb*c1e9wn+$hT-b9x5GlwQw<#t0f)4>YAn&^lZ7)<5{~mfRX<JlYo)8kTIL%yY{!@t
zbXydM7y4Lkk%vO+MR_h2s*6pj3hH=loVLx1o4hx2U*{d|1xoE|_HKV-?eVlj4v*FD
zJn%NtvZJQ6e{Ffapjj8NbpaI3@Jg~z+0>Q8r-J<~8YvYNcH&JB_49Ww(6RE9BT~*3
zj29((VFLEi@EVK$ZJFloK^bR-I)OYpm%*==es(BDU;dVqr|z%4hRlZ?fgKW$4UFH3
z$aUox=(6~>zd5ewu<L@6i#x|X$<hx<zu)6L-ZqO~sdH`{R;T$Ssll@13(W2h1%gF8
zPQJGKN?x~rPp_jEB&Qc&g`_PD<GSKP8&7<;<1>Q|8@yI*4#0Wp1WZkZ<lWM6arX|q
zt(|i$&-o7<z-<|e8yX2jAV0hH`6dX$4&#7&V;bmtRVP($s!499K8FA3LN@kpU#4cN
zqxjVYAm)5$qIAg(j_9|5Oi(9TQ<xSI>s2JiU2mPLdgIJdoU{^I@_oli>rcy%UVX<@
zJSo^;451`?6fTX0mcDEXbsl=$kR5O(uf_encB1%fNW)_*iKg>&MvKpN>Ml24e7g!I
zN=p8H-w>t-aAzaWBz8Nu#mmyN&^Kq;goU159o#Izkg@CA1x@@xM>xz0UT^Qe9xw6O
z4*9!z&a8!u7NUyzWiaW4n}UE%KIM3zvt$AW*@%Dc;oBxvx)`M>+Cb`6t)|k$aAgNt
zw$%re1i)FdC_FBx9uv$B$1x;LS<Hs`i*(O-zg~AITxp1W9(eA^<LM`P?ah}<0x-Vt
zHy!LpTh5CEUPn*Q{8hc$IA9%gy6ExTy6MCA2i-h!klvd|uQd-pRNg%M?#*0>M0>}6
z3Fe|7|JygrpdZ~cpSC>j_$-2_9zGsd{tU$x&2gW((%hoIownt+#Cme2p<915`GnrW
zrd(&crt0a7&PA^uK2-p$HXLF6mqKQ~C^$b`w*Ms-^vZK}EPVRg(6)JQ1paxA{n=Y$
zQ-Sf5{cjY)?Yu|sHXPE^7QSHhA|+o+02IQz)IuKjbQQ$lk&N746rU=r^<np?2O-h3
zhR=R<4t0MHNqjsvbMk8Ix4F+Z{`);Y<+<CvtN*zp#Tqtyo$k5`zgl|}CLm9Y962@r
zHFxOGs^*=@G5c7)vaz?+v1*=c2Z7ymg8m1K7r?6rrQqF5-5*`g{CfHF$xe+~_lEtc
z{oil5=6C<Q^mo(Zc+A{GiQT^A23v`jelH8Z`_Vh4zZ3W1&#bfL&)NqDAEGm^z54j?
z*V{AKHY@gj%^#Kg{SayJ-HYeny6FDzS7-kFe&fvV`3J}UJ<a%Ux9rlj<s#3$r$_Xi
z<;DK#Dk>VO$^35KjZ%9%&i#V+U^zmKnWFff+KjPRTlE&3i_E~F)rY?{>ojqTepKTr
zE@6@yF|A_n$<3jib^SAOZEh`<v2`$=1ZnM9k**%GQA4qQL-Cy+iA_WC6+>dHp=4_h
z0nsb1)GI4Sk=5vxqxVXs_KMIctDVe`3v5=bUgecum7U%r2qRVByA@oGZ23s_;zW&O
zM*Et^MjAP6n)JTNh`z|iI87@f_2NEVUn6Z}BfV6k<C}d3JAFFn{^NmtdMifS#eMp%
zeFk+#hLc9dR(&SE{ic8VjNSW8`i(Rzjde7PPZb+m)EOJf7~4AZnQ53D&oMUq(|2;k
z*rdzY*>}Jwwa>23*s;>cYQ=~X)iA@9|3+1uU*xRqm49H#Q(UX?UcO?0H={z48?BL1
zr&!|cAXgR2vx=?xuprt3Q~Xt3e9wpbM=<vWqor?~+no$FzRdWOuU4yuCi>G&p`q~1
zp@>Seh=cQxRI`}6p_m=BsLG*h0>e@LL(ryKwDE9Uzgdhg?7I6f9c>QPnI+SQ_9Kml
z6J^YAY?={ORr~SiqV+_1RJS||$~10XAXwy(pY{+eY(-UaQ2Mq$ck=`+Mip=OH$Duk
zZokg5i0;$8p&A=VI9iH+7^scB#P!fkZ=@nzt3+euZlz{v;K;qvgnRv(HE%7d0!NB>
zp4P2At*)~?^Nbx%9(gcoQL)p05KOx}YH>z%q<O`%G~BX!(z2>c+t0|-NA!f-%V&%o
zO>d*l68f{d#qAB1BNeG5wHhbxD?J<RY6I3Sq_>gHqa*z*Cn}9Q88Mu-GM0~gt<HSZ
zEDId%aJPKmeu5y)JbYMf)lwY2Qm7PNj6pyP9ptIt=7NHPmjxwl6=LGIO7<&1w!WQw
z(Dc#zJ%c=}YB;e`GVd<_E>KK>OO4ZJ=-)8U_i1j6q6eFkW1s#cf~+xsdIXGM+Y$gA
zi7&GE8s@c6$tcuy&y4N*zW6!PGQXF&CjjhL5(}DMJo{xnk1~8hIyqXyF@#TVh)Q|V
zquQL=R6xA>FY3WuJ~z`AJz-H?ulwkI1cvukTGi=CwbQKMU72SKZFn407gAVt)yaI<
z9$=Jh5ie_=p0{!8Ei7ZC&4jX07!~|?*0aL&UOPEd$#dTBmMVBn?Jt{`kK_w`Q^1nJ
z=|bh+q<Z0jbO4vaU6Fi9Hub_L>phl*@~GiY!w96+WB;lmF?B++w);|_Y=mBpui4n*
z=Ek$bSr2=a(E)>X*@j=MJLT&$z&2I3ZY;Zp?gwvK<Us8O1P`bC#2q)g@>=`D@ydA3
zNtC1Mz1sV=c9RN+OV575EEb#uKNeyFGKMPD$E8{>IvDnwGsZ)>#h7gU3`oi*8KDRh
z8v(?6C4?)WctiA`{4xQD;une&^YXFcSSJ?`wmkqMc7O$CP(1)}PB-c=Xn<7E_0=nl
zS{w1`M5pt)6-h1PI8d7O0917blL|ITulpmNz(&bO8~`We49Yvz4wt%JD+Y!E0>$=>
zv5m7Mntz441X`397KxiP{>#=;D-z@52i*Q1j`(?aACUxb{WRt56U$+iF0g=4uDHwM
zv`&TBx&E(#{G$I-oFm%qxRof2`8(A`yhcb1k6IYY{x<h^P!)bDsIbk*Qk1?HKQ-6`
z`f1KM%D?VcaH`XGWq7cKq`kV^?No8iaW&9IQ7;{shPxlktdG-D@-zO{z$R(dG-+0}
zk+ZxlKXMMvO-)yMDgKh9dCkUoRw21e^G(iNVHO*ZIrrwStdozv?5=!s4Q4Fr+M65<
z_pNJ=n>ub+SAhl_+%r4eEtk``I-IjRieIHF7IwT+&DD8apM0*(-RINkkdh)t)zeXI
zZzE(mroLE<tCvi_eX#WN`3pR_5vaZ)p!bF?WB;t>;}M<1LXH`7uWYIT8VZ;Ax;s4<
zxQ)!D|MX&TSI2+#_GD!uLh`eCs^hu6Qbb(J=95oL>sRr!;I&yyU=QE{q`+-ZAE4EG
zffWo+4@NS7b-WV}5DErD0D3cnhKb<*fqRth$be}jxRDcJP7EPjV)Py$n#3n=nTf#C
z*IX0ir{NYT@QN;Ysu|MXOE{bFDd5O~#vN)9@!Vsi;8p2QMKnif_1ten6lHJ9snNuT
z7gF-QaAUb%6fAF0yfcf2S|~^VI<wSW%hlm2X9Kc(vir!!3SkpXI{=p&;r}22PNfk|
zan{=q#*aa&X1m$jBx0DqpW$0$lD3EsaKN{7uI=R~!cJkC8V$d27A(+XTBjovq+5Xe
z6m}DZGq?pdm4AY$f9#x%Ab&6v0(jx`J>fR5o1z9`vattS)ZM}Y<T~Ipyz)~Mae;3U
z7kmg$C>K%%?P*x4laPeXJFjL3J9Qr0C_y}x{r&}*I11!xWWNUbd~Fi}le|NwA!Hr{
z@IUh{_s|`^<Qmum-z&nO$!AW9+?6q;1)78SU?>15tIq=E_)xd1J+oM}01+5RBT_Dt
zsmC^G!GYgubVB0bJOD!aADU_kdgXa5=7ByeHp4u+SVSVH)49N|ma@mDSH;qQV0l_U
zIN%)2_8US<q(#Nqiujd8Ne|1LZPjRe{$K!8yoNyw#6kMgH0R(v5aLzTr={Q^v_IkG
z8_=UCm&5dAf8=R)@f1OeMDwyV00)y}WRpqsBb~?sTngj<IdGb0h6fPSaYEbd+r|QY
zme#^85GU(H{o03ywF{!bR1uf>l#p|96q;+eQix3BTW2HVV%ZlY6gog+!|P`Xq${EP
zGh(vXOauxg5E4u)cfEu4r;D)QR<mfV>Ro*oc05B=ZI2z*0V1!9T$_{P#z7K<YTgLJ
z5FET4$_j*TM^i=N5o}3RoRTP-a~dvAg(<1TeplqHK7hb_H3q<732FNp1$_F<i$)|!
zqFUJLLp<tZn$UEmYFps<NVdLz9J~{7{-oH45ubnj1W<&FIKnCM4%$_hiF?y=hq4iw
zrFgMMNRV2T`~~oC5<s!C1&2|HX-EK<c$ERTdDA+te3H3EF1%J-HvPtobYv<L3Q?2D
zP=(MPpeH~>5Z+DCFP$-@p%(x_@-5;taINex(MbIK-DDnYKz4osq65HY!cLGtLx5)L
zLcndj7B<frafz2l6|(PTvooJRnK6CycUZ8Xw7{mgvIoIqj<lASE%b+BBQC~`BluMj
zdgUDVHk^|7ZoQCC8)i1VsuP2gFEorh^Cu=bV^gwbp#9^dWKbjpb}G?tV}nGvSfzdD
zk+`8i#iMDPPer%*KM0im*}MNZ*5T(!Jqc4M|7y~flXff#!X2cv`sao0B^&kuzr+E_
zq$1Oy%B$4ilg|jkE}X~({;T(ODq_z8ggEZt0-<%_K2~<oCI`RG#h;15`NMpI!7Jr*
z0p)gj2X7~TtL%WbLhS3*AjfKeu>Ha3{e|cbTEaS@r9<n00ODt$WRH&nMaPi&?vti+
z{Yqp7;a7BJ%iOSJg22vYaBm$H#__^45tj%#WCd7>Im2-Q@CqswRfTwLIKNKNe@Vnj
z1p}W>-s~?37p=Z}I#T{_CWLyEc19q5pAvNQu}$_LlcJDoRDo?S-@Qt?C+u2YG*2uN
zeL>3CW&8UWoQ#F@4@<t8c_qdNAsYel#oJ;9e4<RYBq;K#?k${r8lNg|OJM2Wz$`bF
zJ*S#K_^5Krk0kVk((-ni<!w4CQ}93Lo=>QdqQ}NoQbwv}GQa9&FnFP0y`h3}*SLlz
z+;nKK6hvO33EOafGD$GC5sb2cuKF)|dF$fEqWsJu*llQ^O-8(Y=jsw$=9VnqULMu3
z2$3YjQGLn5v)uj-X8zB3#8QoW0yM!_0A@m@ZE)i=o7sbNov1^i*hCyT#*ByfM?|Po
z`<e{L-;}05H6PPbLFk1+!+xw;!qq(>Bnb+jT)uA;!#`aW6%{XR{vD4J5vFnlA!;jQ
zYmUC*PH5tWogNC`<N;fXOZbH`!H)6Z4U7#=412>JyZ`mj4fnl_c>Q945R2Eo7)k*7
z)IX%U&;Uy$9|J(Xj^E8ojs{S?y7^t4jPnU)NG;t-m;jlH0<rL8V!QxkB4*^Y=na;S
zC*lRnjM0$ci^qxj{j$sjZxb#7<<RZs@pTDy)Gnghzo+7Thvy?Sh$Wm|G6J@gfcs91
z3X|1#cV?g2Zs)?~x~<Il6wb!;;oxfKvD-mpt|pkd_Am)32U6KD%QEsc${|4FWqjA6
z!TxT*I((nCK&F`KMyUT#LXZ2ON<1b6+y3R!n8;zI?^DTAr%;d<s!o}-MhXxgEjfti
zTOka-0S#=-D{;I27M}1kiyikYZVQMhMy+D8vhf(PY%PD0U8XT0Ng7rS_}X+d0l#<Q
z^4?%SGEHch52bx5L<$1*-IH!i^@=1Tc-Adt_{!NZMv^ic7LZUp$hQHYII8QLEd{px
zDey=clN`tlLPLG1F7t^b1eCPcs|VYIMvW9$EOSrXdN$wbpaH=wllCTgUZB>6YzSy?
zAhO&$5ZeI8&Ri`4<E^;}5FYqb&Ch8=Sd){x+tW$lelf&K5pKgxT{N+!8t(JjHDbve
z+)T5H^uHyUMkiq;Vv(zoD#@K&2C+RtX5XR0`9X79mN73FEMRhD@cTXYsA~dtCNLZU
zJQrm{HiFApl6ntEw6oSFex5xvNO`9rRU)U73UOJf;;2RL{H{3?crA0PkuKJC2P)7h
zhI$WYzukuMOXia&fbM?C2pHBZhnuU%Fu1JbT_SBgEqWYtOln1(S6gc?=uy}^d>J5{
zHvPoKI|4IQ9!Ej?tsu7b_TLWL>yH9XDNFCbodH>Wf`x>1`M@kyyfv4itX?M-&*O8D
zQi{qRCRm`YGi|xh(l)GEj2O^3B$2&Bh3?ql$i`@CyyN7)5sblRctlnEXBypmvUjw{
zzb6xmnqG*^=2@0$G&?f<mWALQY*d4B39Kxtr*C+$z-0EiiAe`XGe<{G-jy;lU4>1X
zIIah+ED#0lnHuRfC;Rkpv!|gdY+=6NPQ!pk7RVo&wta|0IiK|m{bYt}C@ove%#O!K
zKom~GFsp|hkG3^_V{M+jn;v&GG#jC=;nmltaznL4;7~pbhUk%M8NuXOz|1}Mt$rLt
z<FJMyFyGXyyj3-bbr=;{oTU4~mjoxVFJnpGTt3<mj6*<`-JD-9lcvSNr(NV}?7tBT
zOHwZxS!PdZXFN;9WKu`b@GhSvlkj<Hb^ku-ZnLc@{pa^dYg*kztwa)#-)eQ9O`r6J
zvrq%#L;JJsu8+E+RUz=LPpp2X4VwZE0;`#j&-othKfyeP3ktMHnGgqGCdj!{E_is4
z83Eg-%|)I>inBm8$}jfMcZQJKw4sIN*H(<p;WH-p-@(HhH54LvEV8vd(NW~MRn4W~
zz!Nlh%(*mVY8ktMD3imDsH}h^orxqGp?9t{8`QI1ei4?fEt7Lha(zisD`p;kEHg19
zzu}xT?#(xZ4V}mwxn!{CC`8nPkYoa92uUh0y*F084(8N{A9=#q14Z1Aa3&gybBX<U
zd@CF>GRQY{vs+M2N+SXO`(hSP2<Hd8>qAbRt(4uBpwK*|AXs-l&D~BA5H)5)oq$Fj
zRoY?hnGZ-PxC&~(0lfRlSst+n6zpqb%2i)7`}|HeT8TUu7z!25=k%ZmEvZitOdy&t
zZj4~UeEK1T8BrMcs&Gom1v;kG2&l}1c)z4u2qSq8-&r|GLKSXlye)@x2|g)~huBOA
z84MWkByc5OVbscx4I<e28nB@Rt4-<>2=EJbzS(Wh`nYAjLgHN%!27_%op^hLTm({b
zBmw=Ot?<obd`LBFiE#pp#@pp%`Q4v#ga9$bFB9%w$x1TG2o=bkhY+=ywBaAY4i`8f
z)F_m>kIpZ<oj5}E=Y{~2*Y~8rojxi2I&?Hk=;TeARs);UA6>7urUt0A!aGy}Snv59
z%oF>w*7C8IsGGtd?y?|B8FozBQX;rvsXVV`@VLYT<!kGviOEx@+Qi6IcJ8a9qc{kz
zNQLX0FDVQIGz>&Ojh$Q)(FJHS7YyMR+fHHxKm~Jf<jT3{)Y<Q-5*%PCC_MS2-N;74
zb|^IN>Zpc|-`NZ7GW<XVUs+Z()l~a%C~ulnoV*EfDiXo4YfhmxTAm&-%K0_R6T$x<
z$2B$}YTNl|@5@uh7srqJP@!nP5jSntU76(IoZfVxX({*8X(TgtE8V$hB@)S@x_|n0
z!lST8QHNgb9gQsgpZB$hL3Hwijj@Wgsm3h;IR3FY@$BY;qyW2;i97+$jYu*X_?2+9
zQt*Y4bAZXi<kNArJFvvXh11L4KJIA4(X50D5ji%Pd08|40=?qd2>{mpwS2H+byZnj
zootK1;*zbll@(jQvBSgPKb-Uj<bHoc@nynL)X7xeO7R*8)At<gNvS*ET8!t*-s>W(
z#{x^MY|&))X@wMpk5cMgUFZuw2CMm65zkx+ok9x5tH&m4!bTW^&t6_(_<nc_w*F{8
zl&kq`ENmq1D4K<+-iVBg>-}nUMZd>tITA2=@~vE0AK+_bT!vva$`)g9?_*R%XH!C3
z9(<G9_@@0>e$L5t<O2V@t75XU^3mgmcVZOX<ScE!pXkATa1!9#gi`f!Mx~9T>t?-?
zFJy2`T;1L6c=fJD+YKh?A2D9FS$o^P=5MZ>n}G&Xk(c}e6$#|K%3`^Qx1Umz5caP>
zrM?-M;Z3Gm><sE_-A&9|naMNGDG5rx@mTBg&GwG(b@KjgMPmb3+5yijWMW;Dd}6}Q
z!iv?f+}}F-v<vEac9<Q^lis{vQsKNjfu?4r6C1zKONGd%C(hW(=Uo?(>tc4%gEa;!
zZ3`af7o6w01v0aG!qrZ_;6JB?yxtp|af$a!XTVzr3!9kGX4<meqOjL~!Iv*4$0irK
zq=D2rh44g9bqf#hNStfC9=<E9DW^V}>tAWs!z7SX;2r!_KyUh;%}Avq)k!<cr46@_
zxwroEt(Me#AA6^+E%<oMYkp6gys5vwq~1dt|2^D%I;4^3Ec=Iih;w&v?ffI<ciy@u
z7>kUch}#v}|Lzt4w<z8iCX;sj-_P3X+cT%a+e!!4=7yecckKW7g(IbXAp4^$#}o35
z^m);|m6w0+yX+9}{28zw#z}rzG_}h-p|NQHm1BPS?6YO-JME~284J3!PGQ#HIU3u`
z(#`pnBylz-j5~>|1N-dF>aD!ooVUr7=0yA31rC~GFYPBO&7bB+%Sxsc_6f=xB40i|
zzo|-0r;3#2kB`n}i+n1aVLM}>Gp7`iVZi3eck-Dsn<r35VM}#gf9_ErYkynz7RCW_
zM{KdOn)jx|yQNjT;^&K{n{GEZi0MkiGG8yr1S^9tWw1i=HZgC)h11{J#z(~$`-w|L
zO7@*ph6EpH`mvHXZ102(h_^zdMD>@RnlCmvo)Mrj3}n4`WyJd0-u2pI96G$sb>7;q
z^Z?q6VoN95@}>Av7T**&bt0r<=J?0T#64rmWVK2J3_OZ$Pr&kp?&_BeJ(t{<^)kSZ
zM>|R>ULgt<jw^*M-<%YjTqSmz_jDt&1T2@dCf*5r5b4xj5E7Xc7up}-f&ku~i6dSF
zV;ngsWTqf_^4Ea3z0%89h!oXd5>Wz-W&8@PdD>%T*2INsyH~RH)P$rZp<(?@My+&N
zRys<?mzV9#F^d%-!T~=kj#cDWpCMc3Jo_~6KIdz(66s5_yvf*+F!i=D;d`Y~GliHh
z1yA1DyYH+zF7e6G<5t?ja#iWH=Awy=OnaBGr_B5jp;Zn_<w@=(Lf$i!s>JH~cR&=O
zH`jqQu(V!(b1u2c;ZMaQResl$Ro5rpY+Yx7nvqiyZc5@Sx6Uurb<VYDukQR@V&^_v
z{%6(5p!A(SzMyX8&UV$Z;P{s7Gt;P5JC!@7F)JLND^{@iQxf~DfcM!?I`}B++9|a)
zyMHn#Ib}U4{oGg@(uVxd7vQE#+0Icd((u2VRu6N^IIvEfRa}xRmp&v_-e4jvde*xg
z@R9s1A{bQ>I8s(4Q1(W%{M?vK_xMD^!QZS>65Bzk-XN?um5G*13u?KVluJtoEpj#x
z!X&(S3q9mKi6Moy%AmK3+98=NpJ0)p=uQtEr?O!0r%{R$QEPmcL%qVC)^zx!g`$=O
zqr|1wPUv{g0(9p3D-V1^-&>slo+ydmv;x-@5%sK@<E8kn;+bE`vn1uXqBM`0=l4b}
zqzv4agi5W%<(5Zz*J<S?hi2EkWxeEjtL4k3PHG(rS=(O{p_RuOObGWbi^LN(K0H4`
zA>K8q2$%4>C4vtTsrtZXt3Kk6${RN|r*w44Y5uG<c)fP7M@p>|HkX`3{#;_?w%|Cv
z>i8=DqPrlTEyV+o<gue*V&z$Ucf@w>yl1=lh>N(LO#IDh6Vq`)Uo{!ibl;8p&kkRg
za5sp@VB;?mOv;WKA9sJ{nCxLYT{-DWH*~71^N{h5^~<@eSYapQJtD$Ty>MUULpFv{
z`s;o}jj)oYZ^QgXxq+`I=dyKg4JY^g>BSTFB9-+GmUrfjkP!us#BL?zdAa7zwq?&&
zpI=XmEuHHux3|H(B8xq!nYtf*;iiV$9nYutW=)1i_h)@fZ#Mb~G_rs6Zhu*4KrfMC
zlznh*7Qr2TbodN=?PCk^au`(NR&ufRu;*S$wfFhXuQI8VCik2+OFWZ%4P;G<15l>+
z|Ewjs%FQmMGmr7nI{suI;$-Rt&hP<cK~6Ppch4wC+z)LgpZosCYs8+nHQmHDPulgR
zk;~*Ce=@IF(!g6KKtLHAUl!qX&)DW<LscS8Ob%;7XJ3Hf&q9tIb_JYd_7&?p!~S{c
zl}g$2cS|kV>nPI4i~$?SWV$YW0h)M?VY&GU_yH(iQM(i=?saqT4{7pI71w4rF_rc2
zf;JvMO);*#$lL#EOy~qcyMHu9%A~U1#D;usai22b*YMWO$(UU48EmZQ6=X~eS`PX=
zQO+LX=KasYt86iT%CK7GDGNOr&$Hh1hkEwY3Xh-=xN4IklUZ;zWp>@8wr#BBxRby6
z`SsgPRWm68NwEbMm&u&L(4AvJFPxMC8>Ve#{j8h7s_!Rm*2Qxi<+o#(J~t=}eKs7I
zdj7oK!HOgMz2BAzaM9D3IqOFrciZZ2R5>QXJE^kp_cQZe^AWR64(`zm4c7;r$AaE_
z=Ycj5rEKon-65&@OaEB8*(3IEHiJexKfTnV6w?}G(zgDtCGV=#hko!^>-9PAvX*7p
zub8R;8>M<`E$gqmzDJV?Dy;lf&sChT_IG5T%+ja?S|N!)>2gYl>m^L>bznyu;)-D2
zLr{>@Ow6AoIE3PDg|r2x|Ke9YUNb|iP2W~0BD=_+3h64-gWu2)4rhceAuA`@ewrr#
zG+)csls)@&nC8NyO)@p<%%hBhG?Ql1WYd`lGHTsW00H61oLuW<6pQ>|8hCDu#E_eK
zPZ8M3BLJn&Vw9<uWcbA6X1P>gtNEwF!Cdat9_}QXOmWi1V`>X_#*wykoGJn>HYji!
zE|y`=_4R^GElEb*nA^Yis5^;M9#q?73g1xcuLPu{$q!g$6nRKxilV<tgS#8W#y}Dp
z0E6CKi2va2rus&1A<A)|AG40)Ag3bM*fNZP;6GBlU1VidkZT$^tUExm4S4wP+oQ@m
z<bM^dDo1|L`Keh@5bJ<D@yo}9yv3L>7!LNQzlB4|NV*X^dM1w8Pf9{TZ02NTmp+fp
z5Z*6I+;77)f2bpd$pZA-G7zfB9l^T}X>YU1J%|M+$$WYz<iVB*^0(Q-ODe0<5siFC
zDd8{z^C+3A6qnzKh6EOE-dT|d#Y{?ZtK4Ml3mm~y%SFB6HiZs&beRiGTd7ujN3A0(
z0W#NgT)jys+!%S0<NInC<w$BA;laf?%r}z+4}Z$dQ@{8EAIY)#lhL(GB>niq#qW4Z
zeebuOCztLE#QNQjN4zaXxm_;?&`QJm5sm^HNbWky{ROggB~#lROk1H>J~vjD3rhcS
z_gH-_aRqkxZGLCBDaV3^^$Le*#%GCj!=IZ>j(j$^R!UDhPFQit4F@6cEdB6jhavK;
z6*g&+BEcYY>_O+dOdHIHFVrz}lC=0E9JOK~F8BTucB*s7vISo!;3CMb1PC)3tKwjb
z-tNW4q)51^aX?7-WJhF@eQh`svuENLq3jh5ImSUMs)?ASI1Y_oK<UJKEE~ED<`y#=
z^YV=ZOP~}@zS3m`^NH8=WulIm0T2*23gJ@WF2nSzCt@|LnK{Rd-!T&i5viqB`zFB7
z)?ZTqC7;g&K}eeymkO9x`%dUaGAfXXTsIEofds_ZH!Bk>yN%A%IV>Memou0^9l1Ao
z{p;%z`yDEU>(V#REAb6?NcX)uB<#sg6bKHc(v1J3=uF(9`WiTX@9Y?iee8p=Z!^eF
zW8Y=Tnl$!3L&{p+v5tL^vNy6MRF;$^jU}PfkVNYg6{S+CwEeu^=ed8tInQ&>J?Hr@
zpY!7s7CM|O!{da}9j><anXL0*<2VJp3y{BYn8DSm*5lV}I_AptZP4D|P+G!G2*;N+
z3am8eNhQ!7J{EQDi{@@0DcYKMAi8SQPK1Oxr{(*!2z<O>FmM8J?-@f40O*uSOjfQi
z3#zAPD|thBj>CqR(y64d7%bYk<!kn~gX<}v?11A`p$mTpCdsv4z~AN=bo;Oat@GNK
z>bEko_U|^t*9JUxPKTb=BK`Vo7L;>M@7aMT&tLS`(j~`)a}FkOFJdPqkZBZj2?ONt
zKw83LpG_eT`@*jI%bTkQ4d2SY&C63;%B{6c%iV{~0(ku$m9(A%3`}}Wnx?UncFE|;
zZxLM|zIblv`6qCkTI)Tbio3L#u2!35{mQ(cBy0+dAG)Jd1te+J;C?<2B0@@%{sDaK
z53bV|)cjlPW3hOMNTcAtV8Z<2FqtU<#}a-D``9mDyNQWd1bg+$Jqx0dEmXnZ>N&Mi
zICIaQzAAsP{U%RCp8jVDq3z=Q7tJ$FL9@z|gL;i@`_gRbOPE;;ZVo?F55>qquqi<s
zRxdnuR?GYW6agOJ%!9z+<(pu`sh0f1IXyu|@c;wDF$_xSf!`^c!_c0F{$R{)w^N4>
zRPJ89-hBK#@<}(S{_KAAHpCY`^v024U4!(y0q8Sk@s8O@i~yoT>*4~E&zNo8aoMft
zxm?eGW;psH`ve{FacK!!E)mdYA$OykNJ1fhD^n#6LbUqx({5wC%lQU@Q+R0GpYYu5
z0}*XWp<r+n2olQof*c7{NF`4_x}>Ax{vEu$iZB&d&|N_XRRj6ct~opy6GHL8n2Ho-
z08n+nVJSpgISE%@UJTnBT_pk;EHkM}bV1Qf(_!h|=01Qm3?lyXxVwX@QkitMjTWWG
z2*5QDiI*&YB?70Z`=r7p_1mAEmZT{_)gXwJfwck{<|0&rQ&2>dZJybDISTPB3CuNb
z1mA%fln@lApS-9Mq~J|^W4M~=MbrXBqEJN8WGn_p<au{PaOefd9yoXAaaH=!rahUN
zsW4$}D${T`1p=2=3wDwUgXez{c=`94i>DXc0!#93sOcR8i%yvqmUCP<7w@CReU&j>
zl5F8{%-(bO&GNh)Uk#a>wcGC-0p%%d%<D!!b809cPax$g5AT1{#R4Wc5zX{_WBkzc
z{4Z*LrX}fB(|mA`0!lb#ik6QaStlS=eoVE%`Ch!*LFU?{FkHBVufGP$b&F}8EkGO<
zfMYBd*i6Amvh8F$K6ccs5fj?Q0xi`F5kcKK3F@LRcz|>Ej=Rm}gRpIW$(K6DpJp~9
zaRB-+5aOKM2_vzg&6M{Fx#CP77LxKQi`tNfAcSHq`GssM<*s1?$S;Kp5qN!_5*;1E
z02e?!68@>0ym5iu<Ppa+kR50%wr9c9Zvc()4Jq~U_20)oAVq*M?|cMEU`Wo()$x$r
z)xXB2XF3u7V2&@=S>Vb#_Eic}nTl7Nd7knlOibW~0vzV3DQ#sw+?UhLqh5B~uF~F%
zV*gAyg<_=&t0jamI)e{@gP<I1CnN_gdz3?nudneOk2~rd;*n1YY(`6e#i`1+w~s5s
z)bmR`E2R-6E@&aT5NE`jG@hUf82tIsKLDe!Yu<R+5T?M4IlD?p6vrrExFPFyzBG|)
zUE=!{`Kjr|PM+7y=}pt?;WsCaV6O@N_H==;YHX>3?!2SL7eUmZ>PGJ<>Og$&w7qmf
zuXqWMwba*|B=sc(4RB19V-P+e+;MlZWf7#v2A{**Q+e^coe4l~#3sS}v=mVh)hpx8
zGb4T}Afn*)2pCRqi>8?8C}z;DAjMB_Ku~_7!mlKlc^&K@Or)LCeu|!f$PRvrm+rrg
ztqrLz3&a<&6o}0`;F;IaEOL%)ASnb8c8_x`H{VKg;JF3r@@bt~tRjKprfi-A;JMEc
zX5{!DN>TX)kB2ZO*2OVj;2i!qWA@g9lSp_C8HO#ff$2M1{dpTDE8T4k>>*k4*@p>)
z<8**w7M2Ibf%AD_lm$DG2w!rxxC}4qGs)dF<y>Pbo~tOJ_A0zP1p3aQD%^zFF`FT9
z$$J=h#^$K;ISxhVCXuSmxy{22fgaY}fv>B1>$-B`QU(xWGCI$TL@@%lauKE#A;1sd
znxrI~fvK=Xo73QA4h5DIDywhI&i=Iw;4CQia?wzg_&ow(+yjw=XmIrs3a_z0kK|6M
zhoha;JTotz&PVak77QH6SZsmu85JhWg=E>&237!$Scu1;Pb!@P-i9)9gl79xD_b5R
zZ~gJXxhcly-3TlcUMk3*MSkuW#S$2BrzvPpCg*_D3sBf5h$4KCi6Ed@hz2p+<b;*y
z*&H<J#A4&rI$cq*o24?2%{)_5qjH~a=YrxrJhLgyM<**v0S^{ZF`LIZB}(}Dl{X1V
z#o6Ew8!28<D`69^1b_DSu*H927$|;1XPTpQV}K=Ak{M}W<Au|6e6KasE9ELfB%Ohf
z+92hikVoK|8#TABBB!TaLZrNF`zFYqF2#6LsJQIoDD1tHm7KB14zA&KC=klFm(wCZ
za#Ivk-cy^8yuxFRut%O)4F);X<I#nH1dN`4erp|-ZXe!tOjVMCiZx=%8oUNw`Kf*p
zY&v))`bf;O#`yg!(j7i(fCTlu9{UQ~34}1N)eCS#qc+inGUU2Ytout(MEn#rN7cqD
zjkyF!FB5p(_4)YJ4w&U1gX~qgf;{m{-lP*gDu@6lAz!aOWP@tN?olYpR)h137fG%t
zStm+=cqCn8Pk<5V{PFOSt22<KXEZD#EE^jAuLUFXR8&X{H!*1$Em;55NzS*o5}g8c
z+7J?~@<~hCTtnm#@|oP#z6**7GT`Pvc~zJxlep80I<C(c(~LL1RJ9#>)#FXfr`J#9
z$)pyg%o;5;PS`J7)EKy|Hs)K$vii!a3hWq*SD9*6HEcOt^CAI!TBPjMB|g}J09sJ-
z5cWe|s;1=N=iGWzziZ+J1M=%4ZV%6$rhEABYSsGzJ>27y7--V$*GJ6m&Gy0ahvD<m
zn&l=#=!OkMm*;$GL;$q^W-F!p_BwE+jh(HyeV9n?OK`wJ`$hI1C1lb*By_%A$=lCp
zTgmqD&Xd~Tc<*RBL-_%q!zOQ3?skclHKG%xBY4ibpAQzE^ZQ{+x{K<4;1TBGf-uZ=
z{!SIMqfo+wm(8YXZ!32lo9+scjbjEL-qF&yB;@QUBGP8?anTv92eSBk23jMP!NEJ4
z&$6F>`E-5JM_FL$*+cT_it{lhN$P$lN<Zp^-XJunTnJ!rP3|5))gIBJp5F71_oirg
z_!{cv#oopbY6nH8WsQ>sJ=iZR|9e8Xb;3@L_c3BVISsgwq<-V3OjWSRGr&b|E61_r
zOkXe1>E$?z_x<Xe_pScrKhj|ENVWugtDv3x$9?p!Y{Dh{use2Wma_e)@tOZ6?uB$f
z{)-omO-{$2pcw|As;s1Q>$o^{eZcl?waz5*?OT)!Z1ThAa5r&<&-$117XL@9BMnPd
zXT7GbD~cSs9MsbEFzroK>EXwEt?N&B-ttbIxpc~GHjATmejQ$9rgyjW&*Q`TrUhoa
z%G7;{%1#c~uES4e<FiFutw#nVF4})Q6>d&-k`S=L|3|3%)I4~zK6UbG@`KaND=pdz
zFd<(uLQ!;7XOBf~&L_#tfvNwR2Hji&mL@;th}`Kk;puy{6dAzjH`UdC!#WjPG~4iQ
zSX)P^CQ!@ZA%V~T^4b6N4w%b^TOOEw*dJA&rm_0x%+RUdu@xcHnW-ZU`(^9VhX%H9
zACX3R{>i-fBKG?M?T4Y>Wq&?I4g7qg`?p8=;cB^2^5LJ!?vKU<0d%{0hmH{~(dxyg
z?~{26BL&ZY>uTRW4@=7V72NrknrO8h&)h+9;M0kJCq5&r1mS}B+tl`Hp-8PK9stKb
zb4`29pS*6luli4lMmT)_?_}Ep@z(eMUEZElURg@~qk%K;K|ZG?hM(AnPVz9in)vVU
zzP~k>e+PA80xq_daID(ff6v1ZUnvXu5DdqPb57K9O#JN2iRFY^4VN+)YCq!d#7%wt
zVNjMlmaXOAE_mm4pPIwga6!b(>tkctKgL3Tye;K_%#3i>6%h3F7MNSsZ~&78RU$t=
z#nZwK1aOix;fTa%dKE$<b8$g}fK{(Rl7raI#Lt@~A@3xuAMQeYy^l^$3KcT8W?t_E
zTK^L#Z`t+;?DG`=Ls|~+#kur;2?`b6S#HPQ6cpr%Icw`77^5q0qYH?7Jj!N*FX2Ga
zJN(Tk!0N$W^Ev)oiGnd7#qxx3#b18>ro~bpe6QQ*+`77P8YS{PR4i_`t|qdT@<se9
zGp4VC5zK<CK@0w$b=nXA8N*5DX_*`L{BDZ;FP?X5J6^YQ@}YStY*gu|vpuSIQt8G%
zHrr<Vs-UvA@KlE29|JeV(#_wBmC7NBN*6nobmu!Kg|OtW3TB&%CVJb4%VEuv-v>FJ
zrnJ~j6%{M~pNPr!p4*rfy}z5QJ5sc+z*tN*?)Q+cE<gbPd!U?XV}A9$^cP#)A$P&5
z2Yq<mpQdiA#a_r!+<{i2N_vj=PNLrLsjuH_l6k-6J~Y3$-S<Fo=fQTF2kIshxJ@I!
z5mI|MuX){z*gt=1n^&z?PxG3v(K*&bgv#&XWSw`Of>C{npPg{Gn7`i&>vq1?vZBD)
zUFg9`;7<i(p?5jDfB6ZM-$6sz^Yz9^uRM;d@)~FS?xO_K(`bf_pY?<%lnZC}3-3E8
zZ!Sovzs&E~BmDEeT0x(cJ6WPuS0<`|$ua-AaqpM9kA0Wc<t;u}cgHOLCQA$?85MRI
zKR@!~2iYX*p>{Tdd02n>2hYd<IG(X{CW+@;X!{;HP6n=#aA1F{_?MRXZzk`A#4hX0
zD5ze2{~*?|GRAn;B*GJM#9MY}xk~5gXNxhi*6x0otyt6&lGS!FHRH9HoTGf+w07C&
zO%SzcCL2G38Sl{nd-R2!z6h@*@!Gu=d@5x5pkMB3zwybWu2vC%XeB(u#Hc~Al=oUT
zUlo<T6{{|*DSK_!2|JIpxZ1>iP0n$R7$b(9<Fnfb81~1M?W3CVQM+#kE-a*GIdtXH
zRFU1C+y(b?W?TLz_vdfj)mPnj`)_X+lxeIXDo0cIA^D1s!QIOWwK!*5aX4+hGmGZ=
zSrioRin*F%p5<YM)ZO>xl&9EHpNnnVuSL}Par;`S>ObG>_m;OUY!L0Ex-Gpt-w5sF
z|L*SgX#W9C>gK}d9~uTbhrIl!zj~C51^PS+Jcm*qQPb{!;N^BHhT`mBaM)X9>-+r$
z^c^@s#9L{vUM%AgP#_Q}=o5JJ=-Q->+uldoIPbtcB44*e_m6+~FBN-*-F*7#yOqw@
z7|tGTQ-Yvc+rm6f_)dlW2hva5DjjVhWd<uft)ML*Kx{^-&J-w5ogbBXXLZ6FAW{Ua
zS#Dcp$s=Dh_X(YSZ6{4HApim;DO9W>7c?gzC;-l%8y<K51tuFyp#-Z`h&{*gnrR$X
zc~=dihME1p28*z!8f+mDT2#Q$m!L;Lq(E_rSA!W8!E=i)k-#-QHb@NL^O#1!T8U_}
zaRw}mqmt*2PxURlLCzN8d(p8MUgHTPu<2u5n+YiD+eNhTu`D%b%Huh$N!7!v8t`B?
zqJ|ADP95=QqqiV<+1p5I5Ri*O-RFs@1xpmESgBX1g8*2l;@P)VkB0Vm7F?1K8W=bg
zS&UEwWT-Ot3dC0;b27fgh)4hiM#($pg0KEa`stf!1HrKnhQdmjy{ePAP#ro};~@e6
zgNd$iJylc`YsE$VfMT_|auWc;{|q6yGs2o8$cmNN;m9Ys08>;M*jz%TAK*@rFjY)+
zrKWE^K4NaDa_PrZJ}bqo*C2{txyNY4fL6Ys1h-wcBHl^nzc$DDNjzZ$Ocq>88;jdH
zOAL*A=xHF4=DC%O&d$p2KzfM7D36h}AK<Vrq2~+_;uX?M{MXi}kfWZ0^>cgoT!Qmj
zp&Szg)hKYT9zuzv&8UR!LuuLc-_Jj%XK!D2nC;ZPQU$ryeR>sDYm@)q9i!mgIXor~
zmplyTcz0D}iF7e3PgN#ST15#(vBqK<dhP&hg6d%oN8USw%5{R4VOsi+Tq-U*6VvXx
zj};vtpebkx-&nyD_GPqY!%LHSC^o3YzVaI>(Sx`8y&^jquHaauEcgA4>(%4-5R4s;
z_PvhrRY=uh9SgP&8nzYI0#x1E$f*{LSG@Pt+io({Y<mD*0>{-+OLkTwGMZv;J~P-i
zNhlF!J6&nv$+x!5prRHXd27bdfeJ`-2u@=ID2xQFTVibqil(Ad-cyP`q#&lPI^U+8
zNN3HjG#K3R%t}Z212h^H{Q`u$vi1@p(2msSD43HTq1>{v;s^!nN~vo^z#}1KJgf2u
z;PhLW7lKO&Q%%*l2A@EyPYNQVr;<BV8~5%3zT%w_1i{1kHrNi0D{u);L|BPR@%vU}
z4d|4;mbtl#ZnhP%W}`IV%9eq`q`>?x#VSwg$s6mgruJr{2=tOFw~6PWrLY!O-@{wV
zN=q{*1wuVh5{zewp%mRFV-a$cQ(n|C+-jH}>%xvtv?+}Z5~!b6Yj;qJ#VMzGtY?Zt
zAcQ3>s*Zp-OyNnfDg&&}1B8SSg2{XUZq3oZraGpzFM!V%_H;ir7{bot&^W4-HrZI-
z#yGv-AR$@H5rI4BrrItpkp-aAM)8p>&Lk+IM(xZAE8flLWOr!(_rvA*puFT4_$13^
z$fN8S1cO1?NiI^QAl<%Ba)Kf##X`GM;4}r~E?Ps8dnjwp{H(BBlwzF?!Q20RShzHD
zNi_p~uEa7QR$@)Z+Om`(cu81Y2mv_)!cL>DiLI?q6nRgC$z(K(rH!Fr7%PG`FL4yX
z>s(DC$lf<X)PU$fmbfb`YsYK?r^i4A1F*^%YzlzW1CGI>)lbADQgRhy2`}xFlxb|2
zXbNvI1ixYiz77LuTv@p+#3zubCXLu$3+cIfz?Cu-05B(k_pu-nSaO=mF`nRh(Ap!j
zCvKe?moj+e5!l?AmF-xfxa$5_lLkl_aEJA|6Uwxa<B+7VilQP_EQ2E9>T6d+NHir}
z|1f?0&nyTiAY(yP%6pTi$Gj<2bjOh7pS6S-H9&9jWQvu3Jb>_0XMWuli~ocAy<}`i
zP>%lwbAWEpkH}|?)#*_L*PfaonpGQKdn8kk86h&;?{ZF~S=-}{5C2N?lYiaMMvBW4
zpZG|NWP9xB<(DKA;L)plrMOU5wphwob`8*w6!LE3ndPka><uLB2N&5LvL5{&fi2?j
zA9rrOaW$mIp2U?p$Ib>6iUk}6n;Puz%-P+F+M84$=@N_iEBn0!kJfnj(A0obEl&98
zqv1q)mu7|Te)TNyEwaW6=D|Wwz6v!0vNKp`UXPnHn-6JRJ2Y}fUww~xNu@%cWB{C!
z=^s*xrDF+Bmrlb_&RmB{-<nXsSn34Um8$u~Ko@iUSLMEg-^0afVEcq9Xe|n?CPdyC
zc=GE_T-6CNwP55}yhjEXxkZR8^p!E>B6LR5J8Z4xS`tdS3Jt=M8)?X*oOlH&!;oUL
zQ<8&OV<3m((sLWb;=@@-5hBsi7%jr6Y&+Z+QgAOUccz}?)15rPr*Wi3f(akj&DLya
zwbxQZor0xE(Itu~!22ll@3yM6W41-yOBbzrb&qx5|EyT0CCGdw8wkmGQN?r!P7_>|
z9wgbS7BT6M?D*^XY<Q}~9L%6%8Qh6OM@>7LB(dPYs?BTFTPUFA$x&|HyLD9G6waAa
zSLq}{0>bhHG-Qdj+HCZ8^jb=GpT!;1NY>t~tKEkwB9kCGm<?-Kg)<cz!w1DLF+v-S
zRe(@NnWQvqGnwkPtNvr<A%A81vRSZc4VKM?*_MbJ`Z^U^MFt5<mF}R-IbhC8X)qa9
zLgRVZW(|6Ym*EJkLIFO@ND>y(-z~YWBX$E_ubOHoqQn4cR`YGFjRj=^7S64Pi~bC}
zGDf=3xh?s*WC+1zM?!LN{tBL6XNNHWb+|K{9(ubl3GXPtu~o9FQuNArgJ&!5^C#=Z
zzQK7Q3UXw?SQtP(rSi587F@>ONN;zY)!SPhJP_;nUJ*lkHi4xSSuY#Kp#gr;YY9Me
z`nykA2b@LF(0@pLr~-vo3Up_)NiDXzl*EKVlI=NUiEyamT2IAcsL~g1`rxfa|B-}-
z!8!RoUvxkL$x`eVW{xjFM4HDk*G*v>1z&*1lD&4W;IhdkS==#Rfe9)=d;45850F|i
zthd(vzRN+V+!e)=!60^nAgQMZ&POn?Mu^C-WkcxCSC~0?nN}{SBOkkaeEn1d$9aWW
z+*jUpYn6bzgzvVOyL=OA+~U9L>PF*I(R3I>aEVIC@?eFmF*pP}8|_Nk3WcFKX+(h_
zXV|y}z6RJ|Z0=>36A#03tg*zDe7Gwc<UTTi;eLkh5)uq$utZG6xVN#W0_CjnMo&Y%
z^tLKnqX4Ey)HRIXjgT3KPkHdEG(h)C$ir2$a5>_pz*&`T$hCWb`Q82WPj2ftzcK%f
z(|nh+@UJz%aB7Z1nj{O3_`HLMI(RZUn@ZS#(}ZdMOj~-zffXVNLXa3Uwoy`R=~cP^
zdNvhOKKD@MYxC{YfK#8<qTr0oXPcC!%GQ-2L6uXRTnl6I)u*n5(JBgV<74%gb}1)4
zS{R-MoGRqX=vgs-{9*;pdp~^;E+kDU$PrM;T5$`ILUE;BFW(eBAW~@ndv391IL}0v
z;*Y}81)MhdMJ%2H{;=fv0QG>&`_JDsle1QPIwS&Xd*`XYnZ0qFf`t5dR5CDAfF<!!
z?)l?S27{D-ZaPK^?ey#qj(>l#AUNOnp`qISljuu<uS7jIHv<}OUiEM@{DA6p)l0}<
zj55&vlg+>KjAhMlu_Hyv<<Bq}8{@0I$BG2%C<Ka};Sz2f_}ogeS5n-ca{Wz%6FD2a
z$)?rM8b24MtQi7l$BdxADWmwq=@huH{5J6v+`!+Xt3k~mWJt$GBr$!j{c#9;p{;o)
z8)4XU|Gdt!tM+e{`Wj)n#=;Q>L&H}U3Uxe#puLe;Dp4-Yhc}3M>DQxuo-u*Gw?q|9
zuxWsMz9SvJMHRv#U}gdU$UpxEZb4xkJn7|*$v+MKN)V8L`9&oAYN)VN4S`plX}jgf
z;gibU4WbYVJ%^K}zac2tm$Xsr9t9pjz#6bjlO-5<7UOCNpmvWHre@Su%JmDek}_&W
z0<w4hR#0{suji04>=J;<2HE|Ezza_?9?tZx_%(xfRECVtxg&2?JwJVVaPkcurK%MT
z@)MYl-xWyASIK(E5U^9Sw-O<!5qPqI>?D!`l3;ug$iToTy9Q<kZIs`OYHIsiAT1~G
z*|vEa{7u%1Iv!V0(@M#I+ze@i2|KHXrS8R%nt8O<T0~LYhL`NzEHS-(QKYa_RnY<x
zANvD!j`o5vWi3J$HH2JJp%t=BJ;gceHZY^?crzV?oQZ$z7CbjQdvgWsH-PZOkQ~Vp
zc3SY2Kupl;sGkon?q?cBdu#In&|o8S--Rw04!=WbwNp67$1Bi@fvbn_6jW$ME9x13
zZ6tfPhFWq$Vd0PL_Ba!cRl`QH8oBjH-`ew**B3_IhmljQdkz+i-;txzAuN}bc;i)l
z{F5AgF6NWr$+&XsApHdvk=tQ_bgl}2SYAJ9&wD<U5x<3yenP5Nz~HNWa^O{W2O|+@
zf5m!isX*Q_4F4qLOi#2%vlTDol(O#hB2?cu+TJxZt$H><QZGA<7eahA)pW1p=#0XY
zJ?<t$d{VSiohILu4Mh0;PGWLEY*gWRW!H8kZ@tQ+?!@~Bynt_}zRaKsQi!2^>poFo
zc}g5@VD}(yIj~-6!FBLs<%jS)zWpn_1Sfi^;LEK^Sn88K7CS?EkpX<6qT~_<r+k?w
z5!eHMMZqdy<#=Iy%nYnlFEySW;+UZ{@u+S<LOxP9Jf3FLes68Px35(5bMTRc>r!pK
zua2r@1ox?6uxz8X3)69|Czswhzmm;9*yVXH&Tr+ak@5Oyp})fIo^xf@$l413pyiCt
z0eigaxB0bu&eA^Y>s9;MM>bbU3&bBasR;c2i)AakqjG;1hsQHYJzzqwYlkOBPw;Jc
zB8`Rt0sxA#L8P8X#CV{4n(_CN)!m_S2EW!e^maxfp1%5?l4dQ3nmwLiDk56@Yb2ID
zyA(Vg87=TN%kJ>SGTHNH_dMA8iHkA0f7;*43&zd;Dc#vT@qBpCx7X*K7+odO<S@Ux
z{Ba!P<#A8)L209WRA`*QW(F;zmW6<ZN2|UCglGE+{=7ahs<gOzKULCo+<v-vvVSwX
z&)d*UN<TUOZAt(s#+t8U_Cfdq>xnySR(qa4_@jLp@W;8#SloZY)!($%Yl+z-Vmx-b
zOWy$fFCzGwuSc5Pi*xkjB45^hdU6)jBWY0a9AG+gb0jV@NO8pV#@zE#)c3xa3=v@u
z_Kk%6J#|OwVu!p^tk0-lKJKJ%K0qKmg*VB>So4$b?~gW^y$9&@#78U#w>wpcbzM1Q
zs;_n<^TK|+eS2f&q;zcr21eRqn-WBtUjU!X&xV*3><G#X5;UY|vih&B%#U+Nf90<`
zM<ma^d(zWBZriPAq;E6QRp0by@xzfl8BjgX!|x2^3tH!m%Vx(uRBIlMOqRFuH9L~J
z*>?0(?8~}@5~**q=c-5V8ytBmaq(J%=D+Ua*UVyXYANh-*m%#i9Ga-PztH&h_-oUb
zam!{ee#m5|x7WPtO00P{6P5Any^(k9+ehvhkE4FC@%YDnNKEuW{dqQ0aa(`>r@1fS
zz3^UgUOG`ANJ?NuZf45JGD^wfdvDVnbK8%`8Ql$_e>gNFh096|wuoInR<Dwpy7Joa
z$P+8M?~9`IKVIs)|0^%gPh60A5qU1Yu6}{2eGoPH$EklO(Rje@a6SzlDT1ys3tCLz
zrlyaINWm~}CaKV;clNygCxcPZJ#!IE73AywM9FB$uRoJ8OTJ>!;Tfsf@I{*HcBJfD
zdbDSTz7SHh<}i9Gf`yLRh|KwABqDa^q+tzR10QXq7TYF7+p89TAu`)~FdMfN&pi_J
zVBur~IZMnZ2c?#0ME3l7%}g%(#F=(uGA8EO&s-Hu{%;97A35Kw0}_zPS8&Lt4bqK~
z-U6xg(*8K4vB{s)1u}z@iFn9yNoUz1V!z7%&_{B6zJza79J8`L?j;dW;g{9mnOgfe
zd@^<aK&tQ1<D7Z1qq=8&{k|t>rxsGbGwP!yi^F!(3mzW}5wrDFJsy81>*3@5ZZKeW
zq3FiD%#J)pL{!YlRNqSm;WcX+Js3aI&lA%Q`@dZ)95gCC`#9smyR1<^NzcV2+h<ai
z)r<$wGL{a<jnSg*Q7Mm&rDu%G4qV&Q<CtoBzKAgxKJ?1p_{br*Gi6sL$}gYs&HGW3
zF;3j*WlTlK)Zb2QET9KOMe3YMqrW*`n_A)!mGZnsVcod&A|`GBoz$=ahR_t#pg@}4
z4Bxe()%_x*7`b#~bCEZa!P}*87vnE<oB#`HiS{{X$@!a)eYxnkdyJ@0t2rC0u{VzC
z$FvpEPv;@<$FR{Rxu*}Cyh%E`YdBwvV6;i<{Mw25<`DHxgo#4O60hev#dv45c{>;6
zMc6YJqo7R23D>pim<WB}!s_R5BM%*e4yc)2&i8kBGWmWrr*-kLM~zw9^;4lJX^-{8
zjr7VNVtF^l<M!8_$}&uDR!F?R<wM*qu)da)<aDwv<d9E#Huq=Vrb8kBR1P}L`?*T;
zNqc&fc-32>>={Fd9#96Hj*~;vkEI=|t2JXu#2YK~bU|=``-t<E{+CjV=Flb=PcdYt
z1NK@Kr;yE2Y+NMS_t<xlN0N;XerCz8??36?h?;JAAc0)`B{D)be8iKws(x@oQ4PA?
zK=Svw$8+}MucON`O>@g~Up*0nq?#eTNM!F$`1(}%@aR#pXH$xi@m(kLy}b<v{qXm`
z#m|?U8U|P&^J3<f1@B*Pm=9=re7*3cW6G#s&F9n*J0?-o17TC;w^GRXBH8$SI)HEV
z+~|=;4aYQZ)l6Q~My;U`7_IrxodyeymLKcM6_}C{p%i0Jh0hn7Ng2Ly3d@RE{m7}J
z(KvmP=h$7RCd()1R(Mi%J>~cU6W@78pMKoLGu>=5)V%1_+^tf5y(8S{m~>2UmFbTY
zBsF+?53kR$h~Zt=-RNXuQGWD#u5-FbH~`zoJ(RgAM5#a%m@-;8ZHkcfz(Y5MT^1wG
zS^+ZsOhCohs>pIMB;(WoyxT60)8^Yw?0O3tuTR-H<v?KCcMh4~od<acC^;O=*}%SI
z9Eax!<GeFak9ERX=xK#K3|Frp7Es8w*uz5Wvb!4J2M<)qV%~^^;w&-;wM-%Gvzk*G
zZKAt$PstwmEw8f&ejj~_5~c0$wgI|gil|{Wm)tc_3bYP50YxCG<Sp=?ew(W#41@QI
zWAexib@l+{q}?pT`rXNxuyw3-2-_55eY@0IL?*q@kj%Kmxy}ld0!-<Sak*9k3`twA
z&G+*i+Vq~=Ll;$WE+{?+0uF}X0u?reTt#8Tq%#|frz%%)1LJ!rAsq@VJ|&v`oIViF
z1vkQb?{=bfTwwV#JMI_)n9&2T%BfxbnPe{{{K78pSCQi|r$d{(_vRI}VUIbC(k%^%
zU-4G(8O1v3rM}B4PJQPx&|fbIIX1la^0P)1IIH-Wb!B>VrEgiGn4nMQ0A$#4BCz8r
zvE8*5O}PtWX^5JVP_8s^FpyGNOtv{?-xqorr&sq*B02H;z~`x4Ljb1DmXh*_7>Q=u
z-#OCBD150b6_#@mv#B8Lb={CHMol{S^P7An8>CyK?)4}S;$UVy68Qv7B_wuJSQ+50
z;A{~*Dr2S$;BGeLlUDFS2p2GKQ)nQm$$^J?_%3?b(3@rCC;ZbR!)g}cD{QU7@4_|;
zl4_(KRY?G5L;#I&!pkiEyEZp}au5nMxGEP0V_BZCu6L;9FOgsxcq4+X)PnS2qHF}Q
z2QD3i=)yHgva!GNLzQqy86o)aZFW8++4)7-n1Z-l1qwh|JGO5TFNQfJTmTv_Qeao5
zWB14eKB)k8oAvjB$_GM)o_Km>P(X|V>ZPZsF-h2!i*pO%_n0?e&ao`gq^FWZR5Zr4
zaASUPWN~&Fe}?+!5DYsR&etBBi32OVB$DbZ*_x_DI8rGW5cjx+(7O4P2Ahe4H=7~^
zTR~Nb*k7-Z6(J5VZ~l`?QvL?gnF7A|<o~0?=bysv(X<OfLAoO9$DbXAv{&-4)-#G2
zN7KZB%uU&-Y-vf3wpIw~K&3!gifnoo-Ww~&`aa>=-N&nNUPeXXN6fI@cv@UG4BjJS
zM3Mt-?z&P&Ec)!p9v&}8;g@%<FRr}PJSpr6DGYv>&eX}M0ssU?V(jM69($9X2^~O}
zOoORFckQw-s{jPDN4T^kb2zQ&a&O*^Imt?BpB)Pvt*uYKko`x6Zd8+XmY(=4k|c=(
zZ~b;fab@5%P=Nprck-z0f_Pab=cRx{t0Dl)k<5YxWbfj;VFLgt*zbJ?6(i3hD;9KF
z7y$EWu%OVcFbb#*U@U<d+b@bLsz5%ACxa<2P`xAYnT5#h;l()gsZ0jgl2E%?T^@yi
z23yilk32+y6??%v&Vhq$K0UXG&o)%yukHBKh~7S)^}Q-X9KMSj1c3^dWeTi}3IPNg
z`B9hv2PE`>-#gLwI0#`6J{>CPWo<zf(!Mzl%7X+$Hnx-|F}2z2MUyb0x+8>b;nCvq
zH2#Kzb{R~=fNa2vh3JNqTsaGa6wtU?>~9H=QkR$x&O9)LUywT&=B2*TYHVYGlD7bE
zk_2dDLSZ;{?Dsh$N7oquH8-_1F2Y^50;f^Has}=a3YZcN)LH<;HU;JB7cKcg%qB_g
zpyo{$A4LIE7E(Rxn;2uU0~_ANneZp)c2ni%d4H0G$A#d#DA~JY*rJW-x(g@_V4i^Z
zR#;?7mygu00b&J9ZqBxDM%UEA!&%D7&8C(LfYkGQUhJ1qV@g-GrLP~q6Dyvp#CmBu
z3Wjq<I6nk3H~DwLD}R#Ucs6e+6*d*PaF2k>-~b%Dak1S(ARQ($QvJ@zWP8#VM)HbM
z0f-4+WQ&IqLMqeEGW~j@<_Q=)jbEJV4}Pmzzk1@ORNd@W)t_royY)(23fQx|gIM5T
zT+408lAa1;bTsv&GLEqT*C8Pe@~7I4V|iEfp0=6dZlIMQ33$(|qoO4B+^aXa{0ejM
zDW;CT*iM?Rr|CV8$f7z*XA_2_JXd4Mb_uB19e<}lg7vh>2GAw=o6%z7aG4&lRFxMe
z;ik0FSKmy;&$U@vD2mdXg$20$nPg!~|9a-U>h)I<NBFJM+x6E;7McW<eh=(hD>|L4
zc9SXKsDP5CU)7=+|1x~K0+-L~6I#(j+}ku;lP20%fL@-O-?OAYd+4F4{!>x!Gb?~h
zHsZ(_tg#6`%(C=($fsf>9&2MkO%Uzbu**6}{iUH2X0zE*3F<+bS)u&uIJiv+CX@zO
zD%aHH!eux2UG~sB)&_sEsrJd#TD}Xu`+Sjap6hIu@Wm#e4hNQv3PDtS&3^>c=8nLB
z(t+u#V6SNUlAOr8i1V|Y_xhyKKYc->{r*3RPb>*M!quQckLE5ix|FKe_u`SD#~pnS
zxx}KQxYbLto5BJ2TLpS~4U*+;H~0N$4Y>4`7X~a|CVlxG`b95Ce-(L*o#fd)%csj0
z2^(wg6c)bi#ka^@Ek4Xo-iOekeUM?v*62u_r2imMHfe3*mmb3vb0-ExrU4QnPY6yW
zf4sZ{`&;4)Kd~$A0hK-9cXYJVzn_~@Ei5Vs%fGYVj)HJw9UuKg*TWR-!_S!wBz%N<
z$|P&~E1)Ky$Xui?&XIfD8h}Vw!z36!AE}%V!F37XYXB24|BuLqiRtge&R%)Nw^wZM
zxT+d}Q;hZidSB&SvdBOg_xQr?c%_uGa!II_gGH257-Cn&l$}7}Q;+}@W(x@c9=te`
zHT#C#P_Pp$oXW@pV2+MO3Iw($nhYB@n~MdCal#Y7Ou!HomY6nS)qn%b2h}x=isq^*
zkSM>7&yXk=;1o*!v@H4dyED&NMgOAMacY=Pfl#P*Zp{n~A#yg3Uy=z~SINm3YB^>n
zm6sGY*w#c#MXkY0?X}QXrmW5>Ujn><9xOq{o>$QIhrw@d_U&Ta%VGXBQ^G!swEZ;4
z5l67%b#hJ9c7cwu3oGh989k%92%R9U+yBLdyhilx0NSB=(k5U#l&YLDJNSZNckvP`
zi-Ko)qY0;B_AWx~;`RsiCPw3Gp8JurnR49HJn_)tS8{Glr`?FV)F>Mv6OLz{kx71k
zz73#p91Yg(!wMw?Wg1?oNdo$m*3`N+Hg4rp@B7Bz*&-~Ax;tcrl~6&f$}#lDJ+y*?
zta$)|m8vnzZ3Nz0jd^Mc%#l168VXblck<?!dTkfyK21LG3URgd1BWPZyJ#PzlAWUm
zh)X@wLc2y&t@CoUj=X9NuHj-^#Z5bqd0-ulEiK%){-tX!ND4#?6o*gSSP^^k+vIXX
zn;EIbak1^v#AU)Lf~jTIS*y0WwLAa1Xr1%6$Mp)k?GM~!j{lnM^SM%uSfgTs^jJW_
zZrXX@NYy>jVoSwBWuAaJcV~ku4i3_#6sh(Z_cN)$<|r!Mm_k825gb`)r}&x~w3H&_
zwK4uloHhF0ep3OaXv!ul2f(hq(FiAt`U1({2b;~L0ZEF8ATb1`;uJkd-=(ZdDuJKP
z+3FCOL3!0A*DVG|NJrwRM!3U0TFjKzyVJ|v7z2qF@gNcrLT^o7xI#9}_?h2;Fuq^^
z*u3WbbM=Gmba?lvwr;S_&*F!z6w&IH;_gyjmBIEXHA_f1=lhon3>THV@#yh!VX}xH
z0=am{=nyO#I8J%Jla#Ot;DqB@3W(NBMs&+@e%z1N>sHiAfE>?(GqVm=u(^VfYbIfl
zb)?B$+_ALX#BvZbyX1aiPgVm?1rFw3KrEekR0UfD5j=#<hBmaykM-|MB6!OC)Vjz}
z19aNy@)1CfGn$`7TXu~EVmW^daAUHsj759KmI{{LJ5Gc<Ar7BI;^O@bpvJ4Z$0D$Y
zbbmDt<^Hr%6XqbBE5O)K+JK&be83m`Iwt!W0?$R<TfzX9Vvrz05#%xEIX+=+%q-10
zq0L^Lisojk4NsyF#54R<2>xykz`d{vRvLVmGb1^U6rwitAqWGlLOg($K5%_9DF-;N
zw4;-0Ea#5kWY=x)ixzdM@xam=9?E>qUl0lE+h55>O7#Sc@dAP_{?%qYz(J95aF76q
zNJ9|(Ck`GSeQZb|d_|8ys=+7!P^uj6$+%Y;+STCG{px~Nxjq&^;<yg}+s<%}aV2M8
zMH|W3H~w;CVdogF9ot;4D?^Wm&;9FVp>QupCES5qBZ?}g!;v<-tO<U-st$y?i{sgn
z2hqNZ{g+o)<<IPw)LI{(vC}S6c4LkS#c_%vN+Z(b^j4Pw#pKUCn^*n%g{hVV+;!w-
z6GM3c7==^`Fjgbg3zfTl4Pn<@6I3^s83xBIg<~XXjwT_ZYO_vKZs!sMjdvK<yw(Ks
zKd!u<w}*+pLksWux(lOA=38A}^g74X+0w4$zB7@xI~~OTslnR1)ILiwUlQs^CdU?n
zBBnMBDI8i^fiEDp?Ox=Gud-UWi2Py2JU)<XBxw@U#h9Kqe#`5NVy?E_51&8D%LcB*
zAJlWuhYn482HKnpcQUuFOC9m5t3B1~gnGXaE_Fe<R>n;<>nl+ApQGzUShPdiZ{1`6
zrBoj3?Sz{VyFo{w#{Rjg(M9Kb5w9un|AqRfoz<AF514oS;L0-_-9vY%j&ZyY#B+)F
zkAHvVn{xIHpoJs3sfJb(OZRKZM@(tisPL5uR`1iOPH%?`^RdRWxI2+Hl4sSpP*jub
zOL7FpDe$XjIX-k1`!nsyzPGuSqJ&~>`Ukbbv6*YY?T<ZwZ_l0r#I9&z=Izy3LY5ho
zwc%gh3lMpL*JH^6owJ=i=35Icv3|;yU)1#Osw2fcC{0aUtquxKFU(_qcC+o)Wd+mk
zMqS^m(HtKY*#ETu1W;V@A8eBhcMc@KO7cHxcR4QXMo!Zs&+gcP^<Kx!mnTboCLVlE
zNIUO8|7GN87EAQ9+?&Fe`n#dbulyHp$z&bsWqoE6PcLr!E<gTe;sU%ns(ts^hXcf}
zW3}^N!&3sj`-$!~8mX84oQ0COQ~CZ&W$Ha30{%<09TiaXFS%PWZYNJo<>^c_Gw;$x
zFR%6?-fttNy!v8!nUOY!`+N$XZHATdP5UeK`6uVc);!t>`^C)diPLEhQ!<P68@XO4
zcko?t{SBwnOp2zg*Fsk0o2hE<qHnXnKw@LVfey>73x4^;X2Fa1g|=EA%8F^4COB&v
z7=3+Ege6zJ4|1P6w!iwY`=_2<Y?S=o$_oJs#Zjo|mHT$qe8&tf`@h+EU*|0CX&DmR
z^x)aLO-B7f?P`B-a`iip)Q*OW*%xe;K5&u_nLRIk_M<!Lpug6ig}G3>YfWa_Qf5*&
zS{nXc)x2S{(dHajA(4H__pg7zAq6Va-PZRKmf>(DYI)&pTT8{YhQyt@Pl4C0Rmc}*
zFv&YQ5-ZJa*H0#tbnh3adOdk{(BIto><N-fK<9>jbz<Wo^779&=?16!Vl{!wGosT2
z7Qc?<On>3w3s_;z#^2K6clP-$^KL--`;l)oM_!!xJJXb~nD(#!>j~w?)sVBB+SfL=
z$}<hCQ?Z%Vx{};;nKflc<xLX~eG2&2*?9D}XnJly+mSY<;urR1R~|e#qa~Q>`_W!?
zIx2M}werTlYU>{wb6qk|`ZAl!1+}yv#9VrM--vl_?P{NBg!>)hhv5zR)ay%Cb|C-u
z7e7(PeU`M@TiV0SISJuwwO8GiUaRUYDMxUdXa8h2dR5g=JV-WrHTyPQV&~bD$MOyN
z3d^5YYz!Y<zV_Ll^)ePJe>qrj=~`0lM?ZhFl*jyc4Ei0Ozg=xvefU$=%av?$uSQ>E
zwWVvUPH;?buj^eshvE^3mUC~tTK_)1mg-_O$0}d&HWGcPA9%z7?qvxN*N?qRb~01_
z_tY@5SS>G3UqG@_FKIxmRnuWb<&VCVzexL<*{!<Y{UNcp^bS~>1z!Dggkt-f5-dIX
z+Dz5Kx<AE+Eqlt!!-Y~G+pce>@>1H0dfoa^2z?^dEGndLWvDN(s*|vCHDqr1=MzS~
zmCfO&j1wN?i%wTy@0F=7eNP@1`C0cJy<6lLDrWGEaYIQiVtIVT`KCrr%h8><YBm3B
zi|d^hGHpkTZT85R=*d_te%oVdU^Mmg`qTj%gDtH&FAFOzRlOkFDG?iRjo)R<#wg3y
zbHkPr-`Eszd-SQ*W9jSXzmYGBK!#Y`v%-}=TivsoEXP&D+V>NdTDLde_w2x~>IK=j
z8xXB>ZyRW_lrwLH&zblSn^&u`?6SsXR+Pf94id52x1{YAs%}#d4#68X7b?ZC#tkKG
z)cPLivkbC1N|cKZw<l)qb+t7=sbz8db*mHK&9HAnLTi24?G{R%%Hc8nXGJ5I`Y!zF
zG87mE1=z-d>M+irXsJw};u2jNxLh7|s(iIpV~k`tH-%M)l}=ni9=Lj9XDMgd)RK3J
z?o#R0*y{vxfYZ<D!LM(I%mMw=fWQ<FFMe9Q``Y1gx*|^h*AkrV!L!S<{LQ*_pl`Zb
zX4<ky09eYY2<K_mk{iL*=bH6ju;sC^u{`8L!G_F8PeHN_0U3j<E;CU1>Wm^4<|PGh
zx-b?JD$*D}Q)cem_{@pj;nd;kwDDy3gTvua(>>lnSKT*H4XtG^-v4%9Y(h;Pzk7Xv
z-^}fvx;NA>Q%aEJzw?aYQaIDJ(V_@BN5<Zq6225fzxx6(YQKkV(#n<wDyyi@XK?pq
zaZaT&{QN+voy%T*^`?>5_Gy_S>>z!c198Idz5X?2j|VUBl-zwi>Wt>KvQWGyL^Bu_
zmE*-VVXfQ;H_QQ+^Q$0NcPX3pSg|kh!>2I9<yV|WTb<z@PL+;5YdC0{Xw~98xFhBG
z`ADl=D;u|URWGGYfJra-rsNb$=(D*Qj8&P<G~Zo*SSCW2L~lJ%xchdBi4$FbnM+A4
z_!I+%;{Zhtm=ui=wQEWxNQ$OO;5a$<Q;+~f&ZpJ3mr8&y@e8gMMmaS|FlFR9IZFSU
z0tsjqi@xK8#&#g78gkyrvQixSg*7^^BTqcz#+0U<695WD6uzK>vb#zMx7Of7AwVjV
z@640&0Octt`5I3$MbIX?N}|ToutEsfp_CyuJHPzBRSF5~)fFYgtNZT10t^<E(xgSW
z0GWw+TWGr{MHZ&Pzc)niQo45yzTm#TZWP%&$+2J-Ul6eb>ZJ3PQb8wtjT{pSWPriZ
z4}U2r7kHK@rOlhC(c>JU39H6>$@ZcZ!<dvwgZ+iyI~Aq5wT(Sc<SIsb4T$8l@U0`8
zI~1iHC!TZYGH-l#V+-*EP$~|-x3;Cq?CeLgLW>?y*JAi}5@1myFY!-aWe_bI0-^_1
z)YtNoXr8=1^xbj#;u=^ilgNz6rFhuZpqt&(9R0i!%ST8Xd}%Pd-s)nZ0{ao3gGl;6
zppri1?{(Q@s6VrfxikA{WJV?2AH`=ZAhijdBZA4ZHTq&_;^&bkZ|0j%`E+lN$;+52
zV)i$I7t1y^^e_wdRJy1g>Oo0e69$rk0GpmAT&YP@runtd;gbeWux-|D8GbEQ0MF7*
z&4~d!Z|A9uG25p^%cu5g>Y(i4lPZp|I30>-BXB(^VfLv#p5+%5l+%_A`5_Fye_qEf
z3SiE~@u_NwlbDlZK@O(0g||q!2MmJFv=5$=s3}z>`HIw3m{ZGVqw_L3LUfkA^tAdJ
z{9M?y3Wj}P@|6;T7Sy#ZJgw07*0@NK#sHxiQBDqG39h$&@EU^-;sNDo<mfBLCE6vV
zDBNOaEzei2EszGP5a0)8bL?v{oGS!a1`x@l`)oJxk0JH+S`x2I*H9j+Im#rFz~8@i
zL#H4gqv+UjNh1H%?xh@yG;ps6{n{6&v#&~0J<4^z)=rUg$J3r=PCZHGEK5U`e2Zn^
zE+3Ool|qx&5*ox}K=RjnvNu6-Ch+^8xW%q~iHF~d$j`*HGI5Y!nqag6G><RvVe4FE
zLrKwqa1D+EQvV%?k~R;{3_3Pv3-|Jju$1<Ui!nN~3ejI)Vi_<u&XQN1@V|rfvNj08
zc_|4}H6#mFHY|mo<tSjPUR_WWsNs1;fQhW}D=Z3Lfhs`Gp>KoWa0Cpy0|CM8k`%LR
zS##xk(oe@rp{3MU_1udGwt4RFQMQUM^W$0sbLT#VjXHL@j^_!Pee>-mJ=1dHhZC|9
zyUn(@r6RQ;?pNdK&A{1<nmalAwnd|(F_yMTTKRk<=O0!pu;~oG(~`-Ww>A5IB@eZE
zYuG06*Z>bh#DXmD?b0p5E<y|cds%fd<fTU!>cL7=z-x0zG=%-pOXpu(Ng5en2?ckb
zLo7(&{c8YmP6k*O-Q1@okgEoqKst-_YRo-B4>mL`mzK`ts}TG*hRg(>V-z_#F^83_
zA&d&N>zw7JY|`T}bRRbUV~pS%3}B<yQaT7R10oKx=>j_(hE62Q^afL+h<=XoHp!R=
zO9MCf0ckFv4AFn|f^k!TqCZk#>8#`uJ(csaw5P~PF58F2Tp@w4J+tH3^&o}6NpiP*
z`mVkII<Qo*U@Ra(gN}h^X1<|7Ph&6zrLrj?nMTKVNS+pd$DM_k)4XeqAl`HO%LPTg
zsduRiX*KmWX-*l|AE}2g$m0U{J?LZ6f?%4U+f#VLMMXg-@M29>oYI8Z7ER(7r*aU|
zxLB^^9DWU<5mCHB$zMKarc=_=Jy^P%WSgkmqy5h_rxy3_A{|dEEia6FcMvmt9rI$X
z!kjUq_wcql42n-%=XEtW`LXET;?n({b~-sNZX$YpmX=T=gbn;xWtFQW!-&EvfoQH;
zv>id?D1G`9a3MN-SIDtyr|*(`<2eFk<JaxDH(vV}wfFM`-dyL*wEIp$y61cj*2twi
zU(70$KABeT+*=;lgVEWqUG&uWzT#z<3B7(L;*5*EdJm|`rsqvq1Q*|`3LX6C;@+rX
zcP8!4+vBGIHrira2D8l@jCnDzS!7S4;+Yq8#Ua6MR8G1?&Rmz5b+^f?igHMPLG9U`
z%_5&VKhdcWtN#=cKTvYwxXYV<7=Joi9Yjxo6TNg9rpnIu(!AG}XYs3k%n!|aM5{u&
z7|<x6kZTT#y^kq(1WS(Wa1Pl_5ydb?AD93#@bP=Qaeoo<UEHdpZ?U95eLr{xKNWJX
zk)Hk$noox!)*v5D^{=@ri~gWQvxEhcA)0_B7eLaJpk8N*sl@s&$ksr#t9!}k9h)x3
z!m8KE-Dg9T4zJ*XO7vjWPZRKuJxZNPs{qfl)>htsuWhdDnP&hzlzi8idLfV3D150L
zjb7SUBI{P*xAxvuzxZ+=<fEpgcP+o!izj#esarD-&-O<%aX^`ni-^unP0L%Lf+vsC
zvmV;0>*=3O!ni^bzQO=(RE3#_41=H|wU)Pb%yB8+E8Lp+M+#>dsr&M;)sYbf-*|GD
zA?yEfbSM5$y>B1D&pER*#@Kg_eP=NCZH!%LvJ)ChLK0F*i!=5;d$ww<AxcGyRAWg<
zp%76GsU%6!Uf+4<_xuGjuQ_w>bDitH?)QgB#B!tFp71{-KjP~p{Il*f<}ICR`<?&p
zw9^G1mJBC?Y}D(S#mM8B5C9A+{_0!hCq=}_lbF3D;5*jOqrBC(YD;#@u4Yh2z<;Zv
zsyHDY2>ywY1rp^1U$u}kwpM|8HXuoocGz-|PP%_NQP+N_bx_vm<|SjWE8pgKqwawL
zyRcoDZOd277OGwpeHN%_+R@$>hgpUD{#Css|G1?OAd52|bY(pvK{swqRrrZXtEV}x
zG84$&&p+f2EjNOj__n#TfJRl}Y7P?ckBkCKq|%Ca%eRf>T19<ybUvxrI%7SXE;<E9
z7RQL5iVPh!t8o7N(tkf+TFH&sk)2!xNM>$TYLn^Y3@a9AtW?LmI>e;jVm{i7SG^@_
zKt=nW`Y^v2o!`(L0>dGUOfl!W9q(ci{E05`-|S65Y88NsOG9Vc6><K~u4*J$Qd-=R
zA^AQW%`Y>a(tj(PIjjj<yLH24mc%thlUH9#BMR+d1#t)okoGAM40`BT5~3c3&k$GE
z58a)CzlKyeQ6Ht4TkTpO5#^k~b{oJWOHUh2dFM-I1M=ZmB3Y50Q=;lYGUSkk;bVf9
z>Y|eevIHaMlcj2>#M3Vy3puhVN#yOO1A>Yx;M8g`zeE<%V2>6WDI{1(`#o7fW#>z(
z3>AE-cXD+;Pksn-$>Xtx00<1{1<%>-I-dYKZ^xj#G)Q2<9;xDM7jqDj8c~eKetQS!
zek>>u<(ZKqAdeX0h<@~G8P61rnFIFdKZG$oq>j9=FVQSfd2Q1Xg>!Bs1s(Iv@$c;6
zEKqHGn`)<|Oq{AX5MJ0d2`FPZz-cnRc)(<mSt%7xG=;r{th|3b!BCQof)Z3t!UcYp
zAD5Ga;Gv9~1NtZ+Ku>>hiz(oSEVdQW>1vqXHG7gwka{DTNYfEtB(dX-lSPEG-DD-%
zwvQ-J74vovf0qte&?#pe!^OPcm)4c*Qh7AAPzn~X=lqWm?URZ;;5j}ooz`~T<>I;I
zJR|PKYNdEJwi}c))lay0TN%J%S3>)3pzTTK4s=(SYa0FtgF_7GI1gz07~5vy<rb-=
zwh*bZbU$p?Ogk^u&s>E65mkyUZ;|{!-O!W$0&Kg2XEp-%+RvD%=li7D7IurA8N@JO
zykKB$N#^^yP_Y{2dOeV8FE7`Xyr7Arlwb@CGhXx?$l@8oZkf{g8kkTAKjamABO+WV
z)0eCP8H`NEid;&fi=#XGlyV5^9p63bFX8!~U2>p2jcGF3470@)lJX+}cj8pP?Ys7$
zIwWONfa#|^2Pm*2BPaV?S4(4I)$s8<L;(FkGZPeCDcJ%lq)P{G;>3HogTRR-Mx5wN
zY3Kd^+9T|7%^S2|Yqz{i%r9e0hc?yq%*Tmrtp|lb6mK#&LDP=$5C6%g_7(NRO&?$L
zsF7QE)&bFfzh~2QFY&$DRT=7#7Y=9*P$qNxb+?Q5FjeGciDH;yXej+I?Tgm!<KHf_
zfTkM{Z;lsG880P97MY@(2H}$3V|)y~j@GhbCUfw->wt-VvP#c%dNZS=L*|b7*D%M%
zd-V4{8&e%=Qww<q(vrT5uu(k`I=!MXHvE0O5c{oTfSp9>0gRmO3-I;9hbR7%cdL5Q
zQOzH%NVv}^npe4y6xWuz82$L)+RA;PSCtuaq6UcV%EQxn!*nBDfDjp`rj_t8P2Ao+
z&sR+3QxGxZxcQEB#f#@atfQ@R74EF_&jwVmCi63OiLn8SoJ?X^ygcq6J4~}nDWUFM
zFx$(wyJ}#exF{!0VVUvjiz1?l7ny~9QUR%yZS~|GRf3n(kUd0Hb9@#e`2i!u6hO3+
zwUOf`NVYI6&w}G=_MteOi1-fmPMpyk%omd5_`EnyJHw0a&l({qUM?<%ekv#m#jD#k
zGw`)$^h*WKdi~BzC()exCsnO18*yQb{a>e;|5}G69%Lkmivpr1ZG+RrMIo8`1<E4o
z=c>0!$n9qwwMWSg(k+odQ&0q-SK~QgvC)U#)C6$xYRry*I`q^#(`lv75A=lTh3Q|$
z@Vuac+*3GJZArU|+1Z--Km#LiL|LN0iiYTGu-F@+q8jkQLHlW`QOEKUcJ70HN<FSR
z(pxEZx|*GtaXCXtG<_^N`N3?*V*no`AdUIvFo54p28fN0hGtb@opX*=7c6o!$>YB^
zeFyd5@6OibvZ^L?-PBS(_$O<mE3$Lb5?sAv?H``|bG$7u^lRJZ&y1GyX)esrs|;Y`
zxM_X+L9&Z`SQR~jZTsMB%KPpz<KQEmZM|#pfmTt1kG(sWK4~!1HDfLeyqJ~0!RMyw
zt|>iXHnh{Br!(ZK>RIg?)$+t1AJ3ib)7DsSi+^uSrlV0zh~V0EPa&@1s*8}I4xh7L
z=}cS4+64*l+q3^Z`LzE~CP{CJKDupf+=X6=@jTNzYa`{=nr^k|sc^!!=i9F4a}Ju`
zgTHO99V$Dr%e3t$TmtTf9lcp9Hl5q_?&96c!dE(bD+7CS_jshu!gJdf>NqKP8)-jU
z8*4X`pU<i-9r@Z@eRAIVqo_J?ZEe>Nnb>pVO=GhhYFhl$LkC)ZJw6>uu1S*iwhdNd
zxW>5bRE*t?*+P!o4jk=YbP=(+-fdeJN4)$4)p=}bE%@|lk!Hx7M1K_MlU>=CO)&bB
zAyfxfKI!S0>x>EDy1EbokF-Js3HiGRmp*gT+IEOu*09mCT=_KAohN1dweT*#m+%$;
zfvINw*rZDlBWZraP*ePc@6sa}waNOFz<p;bdIvR^O?RS#_hGc~VX_kQ-6b!-R&Y9&
z@QA$BtOHlF9*C-#|M9n2^&tZxmXS}RN4xv*@ACrwyjOkn>%NrnmzqXV58PReI^Hd7
z1EJoZDgx5Xj))M34^U+g-=@2C+5>iLR$3j)y>oX)gtKn3aA|CG*28Lhb=-SeqW1Q^
zb(HM+z{eNEUb}Q=rGNIREu7Y!_e)Z@Iv`wsC)V0$XDe+VqvtT*=Rju}d`s+O&m7<0
zgYUo3W!K^^HCy<G)XFTxSeV4^T=f0zd58OaFiS7d>QC%}go^p!;_$CTi}{7w0FO_z
zMm>dvi>DuY&RvcBkt}4o_f1RB&5##chw~Q#IF7X~SFOLNbn>lRRV2T{I+5-pX4XXh
zycmDuv(V;Keel2jqkY}truUsNi4`V2?LW#$kO7d7U1kibnrx(D{w%mHOYJmxSs$IP
zInaJj^ugDI+i83H2It$&2C(H{FJPqn`@-goaC2YF5TBx}LN-7BiG4RC_bF2LPR&Kr
ziKLojsqbwff89b-EY$V{Mzt798|B8GsOtQE<Z?<)HS==V^8+dSG9Gz-=}VfTOXzu#
zy{yJP&Lp?+K33)P%qI!o&FPD;7<h5Y*T>vg{WL`94;|*_gA}!d^9%n)0B(lO3LECw
zGB?$;1C$~8a`IAH9fx1*)vz_eWWJg}>v6LFV!b!9Rzfa#@&37Kk#ZI~$@)9k?t2Ac
zsegE$UnF<Q=Un}X`vLX;_GE;_6nYA<2A}7z{63GS9n2k?+<Rm6!q7I3dsQM_LG)Je
zSnlKn(!Pc0zP;2_MALnqfMW5jrPmT>&mZ@XK<5|!HkbO8Jm@)9f^2w-ydittfx|yw
ze^rQpOndgng@pTF9($zj>TOn%z7Lm9g1*;<M%SXBl0d~!yf}+ABhwC6UdUMVM1^kl
zUHESwN7R}F7(W-Qe|h8q<ADo9yq(4UD_167$^n<<ll}=xNz=u|In~n_ke8=~tU31r
zb{^kH;sA8vWk#s&0^gG*@SX(w!D5@)tH;f}^8SpXbj6gJqAs1d5bRPodY0o8#%Y#!
z7wsp|*{OH^g*Y^TLluf`ILo2^x10R}1*16w66YG!#liB7$%CU)nm3k#{;&!pfRfGg
zyTxA@+n|mEP=5tbv5LA;xzB>-W7e0IK22%{x*Dc69Du{#!~?O5uGo7G8_Lw8RREt%
zKZe{J!=b9*8?Mc4Osbyt$D(vM!?fh9vB7!hjse1An9(&Vb{!DQofj)qRD0)a6+<oX
zRx(ozR~33gu53`ZF6HVcE<ZtryD3M!KN8X$AYY##W#{Y(6f@<WchL*kgyQ)WA0<jr
zX3<<@X*`<{#a6FuD5@Hwb%jx9s=r1{f18tt?P}0_F`*tt&}%QbGc-g9;-eFk)KX^4
zRo*Zrl`txj57-%KQ80Eh%q><CRgmdjl)3XpgIy84^A&sB>cZ-b$Mq^-!zUSbijC^8
z!hpWzCsisMS^EqLj=a3Dh}@Uzk#aTOjXQ<k?slEgXc_i!eS487DE$|%)*e6=comu1
z7#F<43W&&Gm*A1N$eyr{e%~9+dPIy@;5W{t6<a+b)YIzD55tMYOo0_!9qL0|dEvxQ
zIPPSDa>h=soh&jY1t_8{wX_BEwPtu4F$t?o46k5<Vdd5WK^VXeu`ws@Fim5?Hmn<A
z$2UD7={&$uFYf6kfuS(Q88Q`LKscunR?|ec)A+lo(G4w$mH>7Y#>F?u7rg(KF=Vyg
zf4s5~U;qa8WfR~Wz+mGg(<H}%o(zK2)4ef$gW3XA{we^o{+=T#O1P0R#>ms}re(Ce
z8>QGkh!1@bOqF+r3{2e)46wC$QsX3$Z^_&JhXj;?_@e&dAXquaPH8hupo(_}G+lBI
zPKrdvt?f|aBW<XJmJZL-B7hU>u33K>1>0$+Rd4@wujUV{VWS-Dry)GYj+-Qnz_T)u
zJn7Ln4Yx|&T@1`+u;@LZ*Jk!7XK-lV$bUKLxh|NH0mE+Sc_j7xFra4&A0V(Xl#65A
zFmf8&>=Nhu?#D;DsEgd|-=`+BqJCBtK+map5N-L_S1X@h6fQvA+Ez!d(D_N}f;otC
z?3rPmq10P#0l8qRF;hS4;=y7n|4>P&`oqh;z}zNH-V{(+-PxcD@V<}izeR3@V0_|-
z*Ey)$5z$I4)FlHDt-N2>p*73eDWho6U=b$G0qsTX>vQnIk=mi^z(WcP>Gg||U=fg0
zGo>Jx_vO6ogK~`7OKfYyy2h)AFuN&=$WI>RS&?`Wz!hnN<)>E*;pqa${$K1!zWElg
zc!F;;#zAav8qQN7XXphvL%stRTw!-^Lf)%@2v14mum$?k!kUnKD9oon0Y1~IFX!e2
z(CmwrKw;dd#dyo`Fk2o8pd}xgI`iW#0Zd<-G6kYDXe0`|K$U$2pq(-32ciZzD^31g
zlM&kx(%(U+3J`G?(km_#w1?%Xqqs@xIvrpq4aFD?3M|?^mxk`%W1Ggq;5Lt8dxnr4
zG%6N({sPVK&$d!t$M;YP(_%IAAI7BN?A0`dD9E6Ljia)yegeq9C9aEz_&`8ck{!QF
zHFwTVWm82=KaHLCASjmsRaxB4eQf?!@J^C4b`jurhUE+T(Lq!s3}7g>n+E0GNpb!>
z4PN%?uN7w@9)gGWiWy(!V*tqb4Ze;C^+mP?>hAphg9r+ZuOMAiwBI7)O1CK*XF6y7
z>7(ma;D3hV<VBQ&ZAJq%V-9$?MmzcRvQc;Jpdyuf8rCfclW~Z`Gtw|Y2Op&rv#yG}
zt=i*EsfYTUhgG%HPCrI>vk!lF$QX-bw%D6z*#9op)$JRUx9>-+_PhB=U>;zxO@rXX
zG|=IIa~=>t4nKZnCs<%-+$DVF>lFprO$$mtKS0Ax1AI|w@{;;FBSPW2L)t?9c>gY=
z)+tT-*a6*G?w~PKplL9{TStJxR@AW7T14d2sJ~mtZk8}1mTI20XHF&1-(F-_eWTfu
z)IP__pS!8QMrFD_RNcy4j9HSo6j7?$Jszn%vw969`itsV$2MA};``DF08Ma+I=D>5
z#8SU+fQEet<wp&KB9A%6O-H_`@_kf!?Moo3BV=x*`n({z3U&;#!;%LT<sQnO*~3X7
zEp$Ahkumr_L{;5J@v9Mw?lz+)Ha`cm8upo!gHDOOX2byY5Y<m5>NK|)Y`B19LqP1>
zBhv!Z-W=w^Z5pADs?`Jw&|!h%K@_oHGaeS*W*`6i33H1INwVKx-nteKIQAi=s;FF>
zUrwa)M?lliC>|Ev{(AhSgxeg}lmH8p1_=~fy(CzG1*_%b-xpm2R@stW*>>qbISWb0
zQwz5rU~=rRt!_5-T_>$z6mrCZVb2`i60nTpTirI@Ou}!*;1<^bXE6V0W~aA%T14WZ
zstV7j{e*QVd1|c65A-s$4ZyK`nvfz*L#Fm{7ef$!aACR_rtCs0VAvw5*LGLI%Bv3i
zLFu{_9_>aUOi<(cc$X(zFe6Jh78p~1=oamJ^{$;@-!($iReH67-u8eI6?pdo7KmrB
zwzP>7*UtWa7&OS9V^NaqTOR?>FZZDJoDVy5&i{T2w}*Vv3s2*Qt$-qPdLRD|x}14$
zwT?mFda&=hgWe?UbgwPHb=UezDc<d~XAhcdz*IK0v;UZeSfv?<10+kfX&3yl4v5PC
zUj1K}Kp%3C8HY4_KrTJETa|Imp)9~^xpuw0IU!(G!)KQ1i`+{j*xR~x486ds?ZHz0
zw~q0V?9h^fP9~#9j3dx?8g3iXD$vDJvsvEdnm$t&K?imt%O$Je$#*axa8+Rw`urU}
zRJP?zj`#hDl6S%_b&U~6sCtS?Vj-?*OH=30>k1KU{+sJooEASk?;*EUES+u=gqgh8
z6G;&{tDLi>JcJpC&%95BEdT*V;u`H&S&=RFr!5yf!Zzs3xYjbLwVIEU9Dq#XMX}%G
z&i>@X!stG`cX`Ptv5xkQ)OZFdZ`DL~5lfJy=?3ZYPdk*551X69=@V?50`@!F$een{
zZ*#};6!ewT0_An-j&fdtU21u=z@7s5*LhyhMsK@(tMV;s>d831XoH<}K**nJve9mY
zXQPfgs2o#*7EKAs&cu&b{M1qZ>JEz<>hkE%vK<v}<5v!z=p@acV(rEXqtbAbx}?Pp
z{xgO`L4#;&$<InQX6G831H8#Y7VZg)A%e(0+Wu)D)qg+u>i)zVry&6-J9WZ&g_^6D
zKws(9#qtf5W}ooJ(?0l@j~`N+yTT=Yo{?WS%5u6bY5(4YY#Zzh2uMOeala93=Ngd7
zyNA0!gAZyCpZ9uewLjgX_2{7?&x4R@y!DUkc87J6*d`R50qkVn#lEKoi~jsWBbweH
zVw6_=dH?OrjNFB#`f{XM!)dw2RbbDxsXcNxU!cCL+D)>(;@t!a?5>p#X|~4OR}~^N
zvT(}m(iQ3_x|6wNIR7~NeYw55HGl*bIv)?IpNA!7sBxl?eW-tYMQueQa$bbT?ZyMN
z3)ESg?R~gX$vz~XCf1F?_RWbn)9(5Xi^UZn@sd<m7BI(9OOYJMlk_!SPzg!*nty(d
zq=22%UrcwPv?K>kTV`;7Z>YyHoOcB|^y87%{IKUHAe;XK3nU1PQzaXzaUF1B-*0;?
z*O`QP{+d?^WbBljrBRSVlTp#Ib)Fzw-P1{F6?>*H4M|3kx&Wa?`+)Alq3f83@$EBZ
z?8gL8)*84Q$$pbIJ>lz?u{MB1(xARHURU=Lh#;@>kzjj%Dh45GniKBaDP@YDmOMtM
z@{58{6Kl%ZAzw<E)VDJ<Jr@R02xZa|@Zhd!DX+uG3ygy6z?315sUjN-*dD$Y11`1J
z^Badr-s*aTEKEY5?C!moJFF=KdsLAOFHZqGh-fOsScxwy!P*TQ&mM09#pR^iWJ68V
zxlSDh)`X~p5_`RfOVXjNsr2E(M(d=d;%--Aksf>NFe+bq1~Rr35Fou-J~wt=_AueD
z7HVm#Nif#`*-j-U5K@PsXc!D-?GShHAnoKx4EsozKkL~vCu9?(%o$Y~bP4!Fy)1&!
zEc^YlZU3eCyWtvaizq<!JNc#ikfZq{9rA&eenk3t%M{_wm3X2Npz#m6IJ2u<T_aJL
ztXGemyJGV!iEoXHMvVXxNcCbeLN*cN1GpOlL?aeu$mKr%$k#3#@V5}z2J~L9PnV&I
z461=6#R_NS&~xG0>Ha+Dlm*=qlBjU@Z@vc_%+=>wMnp~NO-c?CdEz#A0ql_nc1NEf
znBKa|dmy*~Ox<QO;3QT{YCxz$>`*Z=6Fg%)B+v{yhVa2t3v_kpB{_25V+^r^tXm&#
zalZP>>4dM0cb66ZlBs#j?@`LT^*$>is?nR>pt%r&N)qH(CsP6O!2vePE<}0|(r_Lr
zbyg6B7}!;$WP^in4$RT@KP(Dk9gjr}&q!?nO}G-82unh8btw7)MSvYTHRtcIqNb4h
z_sfW1k1PjtI1w8rM$`ByHb;mz%gOm8Dhdbk=M{$usyY)HH`TrE**g{YLMtJW@00SM
z3v<0avyOyPz672^uvl2RQ3eIZZ+KnJks&DlxSVIf#U|N<4N(JCvLv6Y_xJyi@>LO!
zXuYJc5T@kTY^kSe_YyD-DRVl)JXJJ)rvIak%sKO+k%yK>;(+71it#sDep!-(59Cgq
z+<!)YLy7r7woSzs*1joE3qQA0o;q~r=bz6XfBXBZjsWp1Igg*cl49jJN{97NsAf?=
z&58>|g}JDro$J%BUVr#hM>rBV=@DdP%*d(T=kNHYV5!f?y<()>1Xkz|5;eMtaXK7a
z8a_@`1HJB%uaw(R%p=eN4~xIyLq}mqK%Kqx1A&PkALWWGZ+D+7_1pdWtxKQGi>!^_
z6EG@wN7Ug8dF}@fF--gU49jWCW-#sg!AYS0G*|iCL$Fe=)mE%Is`Yo>(;Yk(eRy)=
z{x5;`WKxqf*ycHG^%vd`ActO^a|TR5;W#J2TfimLr|E^nU8m-1<$r!<gBi{w?PJow
zIr-=MCT&eA#Ew^0!s<4NKE2G>N+3Z|m4_QM@G`faWm#q${QRVBt7%3WJDqgpdvmc;
zEAHIKE)zL(7fhNmdsL+7;gHbjm{g1C0fOUZkvv@>`c?XCgu64TfLaVgM3R(CxLS5l
ztE~o1jg7#NXg!msRXi0}jy|P{#Z6>~QvfhEiHIm(3j6PNaoTC5vwjhgDJj&n<Fj~|
z{^eZ`pBW<v5|vh>GIAv9WDs9A#LGW9e<;VkFNCBMJ#zH!8+Uo(66uea^V@C2+ySNV
zzvWmR8*w3{O@2TvTtqRrbR=0NEJFKJn6&A>OW|^l0BvP^1=*<94AWds?H;LX!kS)c
z3a&z9_7o3`eIHLMK~uGTxdvht=%d%)ghoGcxwG}S^!yYUWHvK8L9QIGJDRdT`uF|`
z_?oUwM&kSL9>(-^6~le#!3n9y5(cd4k~%SzZ2k9N9+-`ibBa$VO!l`qzgUWEnos0d
z=-XSw-_tmM{-o@>{Th3<Yn!Q_^eh+OGG41`P@m!!aN0P=qDb29f{eg>m*BBMkt}R&
zJmmr$`Qgbkl2|;cT`4zGda7iZJKbn{WYK3Sw=XAT`<$T4((XSGC%P*}l5g8-nLmC%
zEo{qw6h3hNTwVKQyJJry>g^?;(cEh{TBf5=F)u#`@>w*+RU8$DLNZTh_e1lj>z4;F
zY>Mt=2`RIw=Rak81>aO?8!)oP3)3NS4a;W@#x(+-uVX_GxXDX@zHu?LF{VD+X`$s{
zi6=4dqb0x7tu?Q+;g2Wx2;A{O2|T}bv{@%9K*qjd+bD-?aWHPz$d32FE3Ys9eXU<{
zd_knhq=&YfNLf-3TCnzM=T2SzyZP{y+Wh72DxJq=%fC#yOP^f~-(HCv)mnJ0__-U*
z(~U`QotT9lT(_`{gHzX9h&>WD+}BC+r?&WmF;dR`wVyV>P)^e3D@5xaK8VK|_PLEK
zO7^v=j3wywzI-(Nb>Lxo1J~oEpP<adkv=>4b8>e2>6TWO-R<CyoGQ<uQf&R&-{S1<
z+_JS0(VP2&W6QSEU&mHT1QH1WuBah}6SKKvo|&^#Wb<8YmG0VteKBP-zr3Yw%#Lal
zme!Q?&F&ACQu{sD6)3b2p?%<2(}PP}4$*w?OKv1h6uexl2Z2DHAL5gTIw@8Yfv=43
ze3&?&b+GD7;5+i3q|ThIL~n(4e?vqPb-(Jj`m(ycx8=`d{3|S-O2j<Z%oiO`d6gJ#
zXZ1cf|N4c6zg?e_U;2AlNKPIp<r)G=fvAR>)u{>3!#56}ZE(Ec-F9W?w(sG8>q5Z9
zG3?w+qgtHn2PF3XN1cPIALjRQ8Jh{#9#;%A6W>a``FW44Rh*Bjv$U`_8o;;fW+DhN
zE1f!hZx0-c{q^#7`ibwH-_S~2<L!{WT3go?iw>o5J1@+m7jJsM&B@%+8`dstVxB)_
za;Dn-?Idk?rT7hP!OTC0gq5&IZ34`$>yKQ(ZdKX$^}J-C`8D)tdC$WyzVT_aq<Olp
z)lH0F=b`HJ@mph0?*A<6G>EodkhAe${@@-Pdsa8{^ds?g-@OSBzZD*;*85ERyL)fO
z#AW{?>$5|OGQU!6`T{WS*GEM7gg<ioQa-6Ysu|BcuSTb6M6a2aC`=0}9-&MKxqolm
zs=2w}EB85ZuhT{Af5)@!Ou5?6@85eWQ0*ggxFJDZ#OM2wy4PdjhpgPq6Sk^%{N8L!
zPnYA+F8gF{iKfc=9lm+{n{#aim1}rH1DEP<miI09=i}q!$F%jHODMLrAT*wwxqo)<
z%BhPV+h<COe6Pj~y9mJz%)SppPpJvm<7PDHWy9BFi4#t4D}PBk^VZ`j85>dlhyJGN
zeU}Y89TL?*6;LYHT2%j}yB5xM8!GUP0|MR$eE<BnCH=a%maDAbo0CnaJ=?7<i+|A0
zP<~&rb-TjlDF0CIENb`IBDilg0B8|z{8szJKUtfo>fCYK<>T%6byfuJbT*XN@g{D)
zKXU!;+NE26A64^S?@lVt4m?B+!#Gb8L}L^gP$4$?B#n{kNyQAa%5<p$pB`Xn47BtT
zG`zX9>q!;z)2<K}!Dc<noVr(;gru<pQUIjxDr8Oq2_N{4vXJC#HyTOEEUhZ>$-9Dl
z?2Y)}F{+Syr0%vVVtG?;hy=ZJcHHJzJ0_%VDhu6>!XF7mc(TEhi0dJ)z(%N`Ht7#d
zY3rz<lI|G=^GGa>gwCR3jF<nIe|Dz0;D<?4Z{MNJb5TWMdBa-eBUCXA3&Z!t8iMiS
ziei5HNH>;HsWNh#%MwZswcbcaHC*-Hh9!>K3l1en9>HyATrSKgG1qM`3}kqOgvk!8
zna^d4NV(vtupht>sSU#pZ(7R(NbzNm#zs{!mEI}x5m^F9*lOiZIy}4RYn$h`X@WFZ
z;s+IzoQc9F6a_v-#c>>czv>QLJdC8CtSNZ3@+n~i7I_~{*t46snx^!fv(uS@-RA6+
z2LwBtonjoNerP=^7(Jrts2qz@_h+HqSh2zmdvmDrJ9i8E?*oLts^F%O!c@Gv5NfQ{
ztbsHbS-|)3i}V3B@}51w^ZtByLu<vz7?{w^!gzQ}sK3=bq#}K62$eXr?yUV#bm(Jb
zK?%&oU|L+RIbWgpe~j8*ugAB~2WBe_!RYU7geUC+(pET(gqSWx$AcJ%g|1pcMzV08
zWze-H1n&*$C4}NjLn$*66j+T1+);pa)2*^aMf6Dqx*hW$Lj;O|kVmP?@-SMRB{5>F
z-B2cx0;1HZD%Kpu*$6>T5ryTjMa4Xu%?OH;(!K_ci5Ux($UsFJCdsm)5vs)LEwn5N
zJyb?%25f8w^?#RXF{pS`6&Z@HL|)cdVi%To_A7>sHf12yIlAQO|LzIVJ!$IZ?@V+6
zOgvTZwSw_?hS<r?z&=Mz6r3MVvIUpq%t%rghH-~N$LTz^ASy+K%j@JGs){1<VQCWI
z0prbQ)(LFPa6hqkz*&TYjAfwLY!x-A!W}%+kZSpDpI{~Wwj<#1ZJ%8_6jrI~hGOq3
zn6OBy|C+=>V_U=RuUmSC*&49XqDmNR813I^%HYv{Ea`(J)n$a8Ql{DGCB!xWDuXD_
z_7Ut*Kjn%<A{A%;;qtby#0CwsB7&}5MyIW)4>4pN*`TS|ZoM$|dx+|+!WOszGt8uX
z+V{f)A{rb4A`fRkO6U!6$HuhHgJ(wBiW)$Lp$EEUZKz6e$Y#EA4B{Y3R2^xydKhh2
zXF0AHl4xQpi6S5*6_FB(eg$I24AGH;GOFB}@{_UiAx6G8)ey?{Hr!{(6mO++mY6?7
zi^AD)k|*>fAt+(mmIJO=lkg!%#tolFeh)6BX{z1&JgI6wa(;3|as5{7T9t8U^>U%6
zNCXmwc*djBl?-|fOCpMmY@mw7QZ=3r5v|#%NCYyH1X{Dv!e7L0&~~L7Mk3%yt*ftQ
zKh`+H62{-c1#*n4K=Tk=3}8!CrlNybLY^#fIv^3!4@Lp_Lr2kZK$-`MsMw5=4iDZC
z5U~)KRbYtLrqMmAg7N(r21l#_gxZabj6mOoSqvR;cl?4jeS=71@)}JTWdKELjk;tf
zk*9I}TBd#q2>AoZSS|~}1M&j<l^j_TJgz>GVQdZ{pZ1H6#v^$^;&4AitPrE%^hLk=
zK7<V;X^DdkCh`qP%D}0wm7~i9j4&X=XheC76TMjy!%OImQ0+}yj4}e^ovp;%#vJ=5
zRejysnvLupjp=0Wt$rFpq4lqzrANb(RY0^y7(UQW<lZsk%VLl(`zT0_zM^cu_9i@m
zg9>W2rSzLbiV*fO@ULuf#R|~np@6m--Y;_8sGr9|#(A<4YtTwnv|ZW_9F>D6Ja<ZA
zMTP?*8%xOHoA79w719_P2jfSUwWl+fXRh=XYz$nEKW&MCOog4i&_?dYpz}Mk<nPjf
z+)Rl&V`LQ^`F4sI2zfAJ$yWiu6zlb&AGO9)`I1A;^Nl;>8MY7W!T*f+%!C1dMG&zZ
zR2WBDdI{;jq#2lw+*nHe&~$V+i>d2jYuSj1kkpe04u!IwT81IwIr7#=jk|pJV#Cn0
zP1bvssNQTFkR#V&D7T(}>0R#qld<7?c?PK$NAL%)CT_lH9iNpIROjKi!z{431Y7C5
z6@`+ROAqo-$8FeFWM@F{wgT6a5IM;)*+>c1Z-E_Nhm4;&q5Z^MIJ`C3Y|kN=`-*(8
zK&@Bx(l+x>!5l4N2G&Cg<>{-1e-9=x&~cSsndDK3UG}~e6T~P_C^uK;CW=uSFvU!R
z$N<5X%7YVTn0ry>)o|ru4j94^iz!FuiR}`W=-ynjIqhZDK@*AO$g>>;BVcW1TPqUF
zvYY0p0YkHIAy-4oKhJ?cpx{F|R+|;DQIBCTP$Xv+i(HZ}ltxi39LK9ejXHO>!-d#r
zZP;*xfwvAvklBbE#ookpobp%Q{x^sLv9zkgz*kO7Ff}`je3dtSb`p?!RgdaWKxuQJ
zs`A)6>Gs-g#F`%}-p?V4Ewj<QmP113kl^O1w<kSi2Z%^t128^A<!^(|r5EP<CYZ=A
zxmQeljn|W=L$)WY5zbUhhFP)MAwg4?6Nb9Wgj9KjyIoUQFIMyah~)1XbocV!J+_t?
z+ENogO?nS>I7u;*^=eKipjFe+<YUM8b9N{L$muVTDciXbP>tMDjstpLgSWFFuB0F)
zkSbRHvrTZ%>S+;%rni!To$%AaF#8#z@l|p2aHpm)Rrf%hRqQm@RV6D8JIpw5GErx=
zEL)|5c_OJ*#hY6M!Pqe1`)xxsptT(dl#_y9hZ>u*|7Jly0>?u0oq9;0X-bs_R+oBz
zdb}U;)f8M0UU``B#x4e#ZkEn~VoQ1e2hB3YdtKN54qw)<lW-LYVuYLgRm70%m{yeT
z)+Y=qiwsqxv>C|N-;n~@R=%XVd9DeDQX@f6gTyb2@zh&ihUb9?am_5$Fk7<xi_uk~
z5KHj^IqHDO9i=E1SPr1h_aNzi;-eVi(HyxLmWcutX}@SL!Z!a9Xc~q{_|O_&&##~u
zZv5HS+fGU0g*jQdQOti3N>NU-a3NAjD#?}Z?u&79=~6?-&bX5(IgHBc?yG_WLoZj}
zU2}i}m*kuon8;aVK@#fKtihP9gd#$V$c8eGnN`;#0|x<X-?D2b3QzcDQ&{2YBYJL(
zqei_79fzF{-8GS>#_-=oGk}womG?}S!!f+&OzEmmleIhNnF0&xX^3#-9D1#w{K69v
z2QxKHkWHfZ&E8a7b7OloV~wZ7(RhVn2mKfu2`%}|LmkHGSWy*cHgBL0ZG9B=_b?eX
zNGyL(qdh7rwyM429k+NDtEqTJfr^5?CauB*heJ{20Ga|wNctDtKXvU02{{z10IffM
z{gLwI<;?To{fsp4(nZM}s|5@UNiU82u}Db?Vj;G|e_0s$yP|jh_7V%`e)e)YmwZe~
zCH*JoxJ&AetDthf44O6c-s|2&1><$^fqg@$wMHBxEWrE*UjY7{{|0CHvO$9Fh~`Rj
zFRuMb!N2nwU!R7k%+ZA-j2olbJZ2ITMU@L+gQAHEmqYPY{To(13Y&zYQiaK-n$kyT
zwj^W&mjfkH6=WdQmWc#<3r0CqvA<co0}QgH3b*dYY-b7Ba@KaRw-!qAM|*`TZ>RV@
zhuX8R>}H@6pMCFk&3W}@E4TqA{CwlfX>kRcVv!5pC#jqJr#3A#zP$dk5zH7)ICfgH
z+j@Ol)nb@D^dp{U@ZvE%sx(R2R*>EhZAT@(Y*SLC9@0erIYd>oVg>CbAsJ9mQ@L8q
z(XN9qTA6zP+q4r<N-1p-9|lOnu|FB<Xp*pX6daWVW++E~9^s{zre_jdRPoVYNo|7`
zilwnE#_pXHlB$A9Ie6*-h(~xL3y36O(3DpMI0K?2cDg<;s&q=J-yRY!rE~lES-Hc7
z03^EzYXIyVKwQPvnZY`Hxmi1eeEbI+k)sc0IUA_6vjrk#J>4QIN$fj<#I@+(u7_1&
z*4>a{B=NB7W58<G;}ktV7t|-k4kgd<S7%731ttUL9Av>UC`Ucw=}KmkzW(b^)!<A#
zpg)rKyCT2`n9V0bygi~;9RUd(aTNfUHUbICfd$XfJ+m>9bRmh<v$B&mfKUDRu}~^?
z37CuKbH*<ZTe-jP5MYyZ?X!D@cH@&-7dIRQ#i=&|%IoxOLe@4TU0%PKUW}Ao+yV{2
z?ZY$2GEl_>@wbmW7z41!=@%q>c+x1paY6sMjSkFugykZB!n9)*g`u`0!v%+*VtNX|
zeynqX(}J6zuThn#FA7-kF<?5$ghF&)6WWx{8zQ|meN^nsjmM(w@@^c_VJ{FAAXA(X
zYC=m6cLl|C=?S@V1pTQDyS=7SFv2@F&rj{(!DWUD|6$iCgh)t{hRRWJRU;(XUnrod
z8k!DRt&P)^R`J$WcP$hB{FD8n;@^5zGq5|wN#>mX)bS5)$7@{n*POO(W*#&U%1c-7
zozCp{^9#d7G-iR-OaRf%64fgOo~pP@vjTn2whF&M>MBXHFP_W7{cjK9Ywk`_jSO}f
z&^N~@1bvHe)Vf=E-_G+^M$+f%p<<$@FZVDX3)D5WAKicLlvMN#qX)&-J9}zh+xxv5
z&QR;Y9g-D`rY54=A+2XqV)lr96CZ1=+w|0KId&R6z7LR?WZPG~3dI1W()lJa&5vS8
z7i$mRkotVC5Z!!jECl`N#n^4MbjV%QmFRsUS6&S39+h^sr2!LsE?&vH632be#*|Wn
z>sKqs;vPVZy4W6lBLxR2c$xfu?6QK~%kJ6X+%W#z)${Evd(S@YHov0yD{jAL`85`f
z#<0F&e-%~h)ks8~ia475aBUwu`HXC2qiZCP{|<}HjkxF+Fr)YUJXbws#2RfD)Bmr#
z^4-Ip+>5QN4u1Rgt<znYg9NWV!A+}NdgovL`bVz6QrficxEamdd)Dow)Nl9mP5rft
z(Ra$duAe-`dVb8sqQt$3{#;%yKf0-5Cd?1n<bc1?ck$I{&5+ve^kXO3qkf0>3*Fvt
z_NU$tCtuzE?%ZmB9$mBX{1|L+qn+uLEPUNN<LG|N9hsj`UPYY$5~bEYcuPC^+9Tm=
zwqX?z0Uzu6CAr2hsx@}`#GLthVLC;vee&EB=My=!12N76yHzGn&)dEJIO%7!@5@MH
zGmTBaOZI-4Zz2Cn3|P(UwvtaIAD`V~0pGhoqP!48v)26*G<)Z{%A}rbft)}|&(LZ2
z(IeK^f>}Y?O_POe7uT^jwP{C(5)%$`-O6f(pWo{?_?27WN_$kX(zQaxXqLMATFANg
z8kXsN;7ckVEaS7<Cuy7L8Fgyg^KviJJh=L+bFVZreI-ZJdRWu<tjM&+9-Oo7H)OF`
z_uH3_Dr0hHIcI43Vij4KzL6#O<o(L8E?4clXC1bSujj9l!l~y@E{;V9vV)pQztmk=
zcRfq2qyluDj8wJ7Cd<5+g_e*<oD)xLtCn}a7?JmT<P`U{a9uf4x%v1=_%A~3=wVf9
z3(3b+gZvZ3#R{thF;y??ug-Y|%0DD?=!tn{WnJ5fA6lzNk~VTq$RA<x)kcdg2gmun
z8S=E)i)$IxX>-{%2K4<+GdA<Q>U5TSsqx=CN!4kAEAIAuF|uX*M^lG~O1<!!#0RU*
zBsn1$dc_-Cs~;9eK5e&1{yn$0e(RZxuTkcaf<}71=H1qNCGDq|VwCPn(|u99+Dz_>
zCdXlCijQAu_jQ3KPh>mGobJipT{o)d;56I5jbKzFWBN;LXx*nUmqK60tUa0Y8KF&`
z^bQUPo50uuq>3W*!k7=@M$=WS>AlY`eRQ(PJ2w<qQc;JB&8X}3Dt|Fe?z|f!a<kX_
z>hRT!5R`V%;}oBk|Khp=#n!hYc9!V~43`!ahTB;Bc};$z-*S|04}SCeEXwsrN9*CI
zr4eJw<&UqHsf#?b-Ty`<^ZU2(+<ou3T3hqGpE5@qPIoAbZCsiRuJ5kyTzcVfW@T^O
zc?lL$b^n;?@fVTq&)AP+u2A@g@3y?qDMR=8yIvY{+wFTfsK%|@avo>q@hR%=W6=cB
zY5pZ_`>~P$^N2}l?I&4@{{2wVyIlUh$06Y3x^s0w>G~g*9^}o{S5OS{Yxh|<>Y5$<
z`cio3%))HU@zPp_qY+l2Zee%&buXCQ__*WFp6=k##Cynhs;;>OyFN3n)$gioPCJ4R
zbL=fiq9MvsOSB&+r2H(u6IhC-Tx?4JcR5$$TgSeDg}wptug>v4F~x3LyDrKHc38;D
zw`7mbOf6GJ&dX@o)>;@HIVF+)s`I0V9ak!7vRAM2fq>c`eYqI(Ycp5(ZfLWwMZEM7
zX{&cidP9&v&2)u{T{~oH?2faL@jvn7`Pt4kxmpS8^PWrdzAEbmU*@N~zjw{{%<42n
z>%fCF<L-gCx{Qk87p`h|E;8S)LnTR3<}_!&Z&?(%t1`15f^Cb7=i4n8+%3k<y)NQT
zYkF0*2kr1NF5W8dJinuJjkhU&<ZPs;JSw=^@b$4`?#mjjKW1MbX~0r`_VEZ5pP2k)
z#&{|quOv>&1Cf;b*2r2fYnSda|HR*wrJ>uCenq=}RpM40F7G#ed#Qb6du7G%IA+=>
z?$LVpBR5MyPR&ucMPkQ4me!7hD^syyTp!fkhgW#1%KJJKJJwk(H&AL%5Oaxjuj}4f
zrkamk2>GNdK&lHA{6)4+c_p<KyUU^~7oErIG1L85M?Ujpk{NRNGhp>K8~poL`{bB6
z->G{>H=mAeJvquAlr47DJZ`ZNU3LG$MDNWN*U6oS4qke{@%6*E#HWujNjA^^BbS%|
z3;pk2@0~k@yXAYxYbO$avKAcIl-?X)DSZ87azc1=r=wKP+4%`=1rkC!^=YkqMu55p
z@8tCj1^Rs}5k-2_bg(8r22Fx3`!1k#U}iByMG{~&K_MdI##Iav1dbD-6qeC(7G}#S
zu3HQcB|_nVRR&7z(WdAL5);>j;ieEF1vY5KfrRI<Je5$K;~;b_xMH8=69*Z47Ut3>
zE$T2lZVU0>;>#)qg_99H2TFcZ=kG%ib5rIS5VB;7$;BJ-FfsvZ>@y|Qdm%)eQ)<7`
z@o}{S_MVz{97vXo6pBxS=n{U(nNsZZD?*SEX@?z)*OW-TU1BbtC3c*hiGTrzDXcOX
zNdn})Nq{f|;3;f|lUVd-nmR{B9t$FfX*`rqDqdV~GaEYzJ~?u5Sv1Myxm=ikW~l_#
z$JWGZ33$UsDg*v@OQ|QGiR;D-8;`*4k`R$s9&(tdWd#0Kj$$$~1)DV=X}&qDB4x^)
zy@3hegt))YiZgzuspTMq$>>ydbXSy$6$w_|mQQ*Rr0_;XjSagErQ)6<NF?Sh4k8@L
zj}`(anHheDY04x}fsEkGwnAMzs*#{HtBRJZ+QGB=zRw|WIbse>rg@jxjj7bkrv-0C
zMD7ncAD&U&bFJ{dAO0b8S^TD{Ow*j`{;b60d?ps0d~-&vD)7eJDEvde#dIHOrw{`b
zM@Oef|5B|-D~@BwT&)z)Z@MsiN?{NQGzJhq;<W@><XnA1&=-Kx1tO^E-c2C&sjxm7
zIGG69=kgBoqMclv$scwFGa<D}zNt`^9aKaXr~H>1S_lS=0j%a%Frye4GSKA`>_hKN
z?*_M|DP0)VMpaW7yc#OvIFHp~<8LYgMw4Y~&qZa5Wz|_4?(8zHr&5NS1b`*fQ%XG%
zsbet-8FrN!ckvygh=`IQDLPV?AtTkr$8QLw4M3#~(3A~Xv-q=sf=gX0rd@nHGZ1=I
zge+`&f+W?)LdbG}uU{%@rKiA6khfysNZM*-gf;+)vJm>z3SV4R&^%wFEqIp%FX9lI
zll<eb>JAovR5e1Aj(EV7JFzKR=_Pi8Qw@=8?&5f{UC3P^@I?cqU0fTLsKv5{%*kRm
z%IkQ@Vk%?+@vKN9LYfE|GX(=gK-qX`XB$SkSd{DiAsxnnqlywHldQJ=uA$4x9*qcb
z4&rvd+wW@8&s1$VUOojrEB70GL)MaIRc94L?oz;j0pOM7q1?mxJc;Oq&%$Y%XhyjV
z9a00p`?>t0?Z{qsjR^<X^{ei4mrM}uTqzs;>ltDF!;#+4PbzK*nKxA9K)kFDSvsPV
zRU2KcNF-McRHy!k2bo=<W^t-`yJ1;+gu4phkkUi}tPD#4%M`>yeVzLhcBri!cNk#~
zVAU9qU>8w`0mk>9Q+z>J3P}7_5%y?%KeXvtjo^Qi?K9SF?p_&Ika@a2S2CQKvqCn4
zlOT0YlP(E(aLwUbwd@EL<em$K+*#xuB*OVzvEgC_f~=-NMG%D}+S}BcLF`T1VSfc&
z(0(Bk7%cf9k^+dKIZ`Sd#Pcr1$78<Fjw(+%NVXprvm&DhzGUAWfF7IgP$<TPb1vz#
za3k?zy?oNs)nM;oK^|Lb&FRpf^Q%+y+;(g2-aI5b4W)1ZB8(s~5X3GpSgLb338y>B
z5?27fEoJh0+d7j9JVuo#23k!aDCFXE`e;YVa$`1LDPGW(OrWPDR3;&_O}^@DiX*=2
z!Zd^uwO*jH=1!<U#3VQ{h1CPn9GQy#zsrn^3FZ0(U9!gZ7lJ@OWM7(!76neQ_=U*`
z1SyZ}(%-1CsrF~&%)=MhiQAAhfY??8DqJB0rs-d&D|!q8uXHivcyZh&AjH06E2M3<
zNzvedICd4^Gw8mMzS1V2zpEI_+}o$#SZNaZ7QB2jNm7!E?M;B5!?&xoaC6b+q9mLQ
zaJfGXk}d{nNCa_`x7#<f=!$Y8ENC8Y0EXWdQbHkEb=Re|^{G4F<UoCN-d{a?U>;#e
z!WwfRBDquGS8$F<?^|1B$CSjm5?)Gb<Mmn*XcF3WyY1&3R%!C$R|W0SGz6akVV-p;
z`mN)2S9HHc-`m+>;Yps|2mVn<5aT;w`gs!$bW{>iGtlVPb$wTgO3fb~uJY(<YEtg~
zS+BF|B1>nTu7tHSy~$eQE@Nj08X+l1p|nf%HnNk*xi3=MB(QX{63xPW>u(hAlItTv
zr7-^UBtMa*!BV&mQ{{%tc6~w?i`w#5ctnk`sK<}G<Xpr?y+#&M`?rK~X*^<R0Ixgc
zE%<FesOqY{2za|7Pba5*X%K&JA=?pJ*~7Z2kp|hgAX3eC@Pz|KADiA#(b>PHdzNkG
z9U)RN!0Cm;$cP`sgT2KV;wGOtnP|u^zeJMKrh@c_R+~8$f$PodwUE6T(h85@!LP(F
zD5Do+KFRlav&<f+Z3IK*Wr;cpI}qy#$E7A!x?ltu&9x`@3A6EDK^=QyvdyN>2fmQH
zv<UT*Mr4S`-;{#}@epkhnk|OJ=>}P3<oieXxmc{)BsjR|4kw}C>I*N!1rnG<C~OYQ
zZc_Y8vu_orhJuhGU1ExFN1(L0ZNZIO@lZ!s^Vv0r!@kO<jH{@x(D1ml=cHU@DOhv7
zL!ElP9|Gq$0bK?@mcOV6zykUF6$hZJQaoukDvtu#6QMyomVB|*cyiE;2-L_R)XqX9
zlYrnRBu?r!R*+VrkFTf$yb!UM)jrL%Fr(OvW`zf}!g5k<&{v8?qVw<4V8L{V=b%O~
zL`Oef=TED2BLl*CvvXmpLSZ|(98*LV6|LKe$l#~l7?Ble5IZIW5o+5mk-A@oA*@fx
zr`IXI(FzMN-I=J-s(0KZ`0FXfppYOV<>1WV6~Bc4RL!gyki=if|DLOyw@o$qckJit
zHA)vWVFr}zOYuDuHD%+AYGofxNxwqdg^VM2Y#JI+(>S-pxuiSYGPl*r>fUrU8Z)p!
zZ$GEP;`*A`)7JUBWZf`Msy62Z?N5`+<T))y`5w<ZDx5PiT1W}HHcO}b>mR;SB64((
zmgVxno>Mm!Zi#->A~dy<ca4>clMpyY<+~9`9<a*!B35*1(3JcvFhMYaSY3Y=tRXLk
z-Ue5eZ<TlZ(QQp!V+FV;gh-TP;M^3SVi%FXSm-4^3Id-@)eugwMoLC&))cxG8Hi`&
zdi1xAs_<iL&CDNk@gt#t4&!N9HQ2+JHtf53ir;}~j(BG^(UdFZut``y@bV?DnzeK(
zbwqgAc{^{7WluZhQ(m?K12xw_&T2Dw(?UpfTSQd!VlTa8ig`EuOWCy#W16--`Y=vc
z-miV$>YwMzyHApKoK)Wc!3m8y9C4~9@ePH&qR&E-8L!G?7TEn3v`4q+(gJSSNk07j
z_6PBupDkDjzxly0rqjP;{SHSU{@nW|P_u+klK+fOk<5A;;nNR&1}>k715W)%0Pa9}
z8&aAi+$?F1Yw3>YIU(4`0oMA<VW|Fk-~BzO3#(O2Zzp+)7K67~rLhgsBWf}Fw>?*l
zT5m8@jk_lLm#-fTMHrqFsNq2L5=pDJE()MOdS?Q?3CdFuC{M+2^|+qRB6VTOm9U6x
z1@wV_5dr>ZP*;=V7m&me<%6zcChn!G3E9lqJc%@MW8$C1&Us&-q>whozBK&22WoHt
z5(65!q)7iDUi#&j2Iq-uynq55574WkH6#M-mzq2bQWNVcvq1_8NA7Curlp<y)u4Qc
z_{mu|P=x$acv0yyWKt|lVSraAA*%Kc+h}Mvl_*a{Xfp7WP3Gu{R>98|-<m$_GDNx1
zq0h>|_gBK=vL_@;KrGpS@JhH`@M;caGkui*I(HLLz96Gal$7*tBEcf&%xBQ1h}15m
zJS>8vN?qs|^$^mt^DqjyTHXIu3>&Ig$k_U$4OP4lF)w&}^KqgTX9vvY*I&*)IW4Rw
z<UVsPhyOleOX#VJI&ObWxk$pj9x_3QJ}bE?1(ANIM#-cRA-66eN_-CECaJ<%<xZ*C
z!ku5N1gLRooIZuD_|10^gi=UfpMYT2CWv@lWVbg)dr6pl)ujws<dN_vRkB>3tX*La
zF#U<Qh~;+<5k`m*@P7>5by!nh7zgmXf(-_YbfhDt8I6RF6c7-}fq)}Kq(P+BjT|W;
zAgLooLTO1IDbgX0{3vxQD5+ra+n@K3J<ol%yL--g&-Z*@c2T-<B0pW+#sk*l0s(vs
z$3$6OI$tWugcin~(xZtAzBprCDXaznyou;HTOhcJdwG>t=B6OxY*vUW->$0>(O%9s
zu70wAp-oX8>=d%VJb+7Jt-#kBXO{LQqhZ=DOtzw|Oz@7-0T9l#uR+wb07g)AfBn=;
z+0!h_&fBpuXpD)fXesK*(q_~(OFnak)#;kLTR9)Vm`>Fww-g{<)+chQ<sn0&epGLy
z^6|RNO6DH8Jau^43q#q<&L)6=!MqgRarzB+v>*fl4O2ksH27dVO}##0ptzz-P}gbL
zO(6P#XCO`qU~q=myW5zBVPGHzM_E4+ge9RXkiYbfBp(p}Ms}YSnLe+$HF(e304q(|
zzcG3W`t^4f2Wi5iNpUrS7}i&1To2t^2mW}&&vX|sv&g6KcO=U<JC@bAG|(ce&Zo#z
zN-{}cnk$2paX1JnVcaRxwlwJdO>*Uj$V6>J`tsFRb@0xy+F`XxFIi9lxb{3m&j;BT
z=G_nPN$u5CP)Qu>uxxrqh$XbXOD`WY21zjc$sXIS$kvayD&2+-%6_rbkb{I%8AKXt
z(&YK(WgCT&6<)m^J(T`Q^9}HAuLdMq=4%xX21~g#tCTe|ye?oBr~pw99>u+R{%hpL
zX4}$aEhBi5+R<9uL6+U#buyQ3Golb>o~u+HNja#OHaDfzH=547sCIbSmmliYp1w|7
zlFfZ9qv+fuHtDG4T_adNE8RCou48JbeZDfMO+5sUO1D&I^#V+mQC~Vz`o-CnsFhi{
z)1-Ru+_z<^oJ1It$^sc&_I#7`Qg<@V*I7bernU4b(U3>EPSQEAl`aEh#tU)~(q_Hc
z3Ujr;Qd_E2&0eau4`n^w_f^PsaNlvBSNXz7@_B8LYs2<(XQ8*4KRu;6{zy81eLS$u
z4nIe<JutmKQ2qlOKGx#BI;Ld$2+aAPV?p&{7p&8p+Vzop>f!6vh+$Gh{IFDD(01Nz
z@X>5bxx&c%d6hclQJxxqL5sTO$FtEbU#gysE!P6RxouzW)mwipsj5$CKgC0pXiT@E
zfErtWr_u3?)%HfmuPc-T*^%ehoptWo4XI{-l^c`J>VEBT*>U#vlxppCoxNzHY4zOX
zFO+-7ezwl>=%Bp%Ny9~zpBHZ{iq09ORQ)Xev91`IsJ1Q4_~g#~jnli;LvLOc&K_)S
z2x5OOUqY(>e0tyI=klHYhY>5!EFPSs)jtumYP{58+LP{m!0hyoY}b`9K4ComRq#~l
zb{E^CVPq0J3*v(MnV7e}vHS+B|M1Ee7til{G`eH9=cXkm+DGNvZ$D|ScQ0Ra4v5=^
z2ty`bUYyY#k6rt11%LLbZ~TPSuJWsyk@wLHam_)W-X-qS)uXmIHUk0W5vTUIc43cX
zBvOLwIP4NK_PNq?SZ-!M<n5N2$G%N!P=3;YT78#QSKVfQEeL7n_EF=Es$0elq7UBW
z9j)&5)vsw?dc!|LK4eHnBV_!_mP8i^8|5Os+(J|HgBbFSrLE$U_xW-~f3LS*Y#mL1
z5K`W?Qq^VFC@1q;q#|fbyg+O}C^*YqT)g;WixlgN;FktWZ<<SUq&ci*XpNio!>yNw
z+ouCOWF(Y8)CTwB8tHkcn==Q(MCM0k+GMrFY2>FC*IaYoJ9~DrJ6^H}DoRC8%O7oq
zjr&Ddf>Rp#$o7GhcE`H!`uuL?%xovdJe~}Z0#0e)B?kDHww_5%#;zTndh5Ru^2f@Y
zZeQ`%%e$x@v>l56aJ@4`a_AlIipp9Ojdg@kIe9-rJuK=GQ0Hzg#E?>k5p%1-g<rfc
z!V&tA?{Q=3(Dc6C3sc?SMKux{V4haP%ja#@Oat`mkHl>*fAF!fQO9fRC~@7nV6$cc
z(XgrLcz4gX;?|PEO-;!Sgi2Vxmknpr^iKPDwbc8^3*H5f6Wcvyr?t*JE~@T`51vx{
z-Fd4!TY8egN}=R6XD#@Xw*k^f0`jYzTAI~9D*I15)7{;@bM%f*?4r;0*$$W0A32T#
z?|pQi$t^42mEz`q7Ni^VCvT%P*m2g*M>nkhgWzWk_D9aj0rO4sr#$7S8vRptj#YQ%
z*q<USM#ZQ_b^?`(w`x}COP##B_s@5|ZjE>B&2<d~%Xe3QTK=#^6OO4JlCtHwaKr5&
zi1JyE{~EvLgGHY-GDKmr^!GKBU`6sWj3+$j_jFo0Pj_C7e5CF2c^y5xo&B$!n&hLF
zLp`+Ch}`1lN8>S<^Y+D0{p4=P&dU~+yzH_WDO>sXX=E&8q?`AZoKHZZ+nh^S>NV*g
zj^f~TJ6Bfw!kQdcDX&I5So+-u=U4q`VO-_ns(MiiB!_ANB66GHEUt^rY3dS0fG8sl
zMtrcqOrdweEvnu;k8{H;p|35eBgti`M`3|z4G4h||J;~8)Dqo!DmEuwVm+F7v;SF_
zSA-JC^5DKC_SX54^P_-d@7%T-NIsFstJcaAYH(;Rm=n@aDKtsx+&0*Vud(85JHUlz
zJmN}SH(a(J3+|E?e!fjm%KMgcoy+~^pryx{F@2Tx&sPLg>m%Z9_Yjz>u6F(M<w-ab
z5sleJE5)rDR+}`pSsdPQ-!xemo7=DaE<*8Kj)3x6-!`#*W4L&<_)J4Grl9cdO&4y4
z=^}~SChzL<l&<_5F*YYc&bP$g%rSf)7|C8PUe2Qz@JsUUs!D^(B-v@^f$P}g<6m(3
zcGX0FEpUg1HD{VsZ=C1c1dMilujK|R;rx{Q6p=Eo9f)USI33?vy$wWO@S~rGR|!ON
zejVKBeomW7wL0EXd5vzrogQZI5n>FsqCiaYh{9H`x(a5KB%^XF2!quHUzj90QYq&!
z6q2bJDU>UMO7Hz(cv%x0!@SkQFoc&oPBmoES-@e)pAjr#foVo$U2qGXpu-dLfo<1s
z8T+6W&>ktEUJ!+@*JZR@pH<7f`pEHO=e%6*a~HT6&;{sjDdHd|6yR*}OI?{!))?L{
zeS``PV1!PJ=wNj@OI`wRu$WA%R%zfTm5M|*Fv5#yFXsaS0Aye`sF?~AK*fp~H?9F}
zB;M~t`0};AA8lo127bxN2B8eP*}W<B`jqXJv1M-@*P)(38WFL!3GCE>;F2UB%dS)F
zm>?Qa5QPY$-!clq4}da`<s5_S=N#){2X$#}06+QxC<E$Q#gvTGX}(OvYLXm>K>D<c
zL2apF8l++rk+VJ#-Ntyf9(0zdix5bJqM+R=F^n|&2N`cY6VaFEfM6hO8Bh|nUWcv{
z65>HL(wh`*&@n3n1wmC&fhrIi1%#`5$Vy0nh6C;ya1IO@fnvmAiI&qm&h5nZ1I?3~
zcoE-t5)G!&51_DJ0GbHyN@T;m0_gSPHZ6{!SRSJ8@GgMEJW}&6Y|tR}J~a4K3b@1L
zLTKsVDXC2}r?DYeBBLZb2ou}N572j^osEe)mik}4L1VWo&KEppCM5CJKd;Uo;D`WB
zCP}<rV19ZQaxz|LNS~ibx}F_ZoY#GhEA9b}NKZ&B)GD}V6(&u~L;6x26&oFs8o!@E
zyO+-#<fFQP07UXg9D#rjCAVH5<fOr<gntt{if|rcP^6H!H9%<ErBG;`3-$sh9h0mF
z5G6<*Y-W;G?VK?&#w?KyOY&A^j+=`65}rt726&4|QM6YAR45x7gz}2JHwxg0-O#)q
z{$olZn6nwzA317ZTEeQoK<31fqTp=*%0Nz%F`!9$VH5%nhcPY?d59n;$^IbxDgDPG
zv7r-Xm&qxk35?!3?@<uJKmd*d_)&?9UQi{vXt~g-4F|;=7^3O{<7+UwgC{`)@e>%F
zhLRL<uaO!f{Ai+Z6_tGf!r#VFFWaveZp_<u5sf82ChE<QnTG?}^N0Xbcp@V@@%IRc
z&fKAl6CWE7mrX+Xyokd-ag2eaZz*O2=olU{Rk#gg7Je}kpX|4;gqVb|qKW=8uedUZ
zs0BpVyI4U2sFw@Eg$AD5fIt|K;N)dc9vl+Rc*lkm8p6wikB4I{n9wm40pi~_#xJ#?
zjTJ}^-F_U>!nGw{-vaJm2C*6EfIG?a8p*q4W4^ezjOf!VbP*D{&=+<L@L}V=>4Sc6
z4@VvY&fx$XK0a9tY;OiYc1Z`Z{0Wwl24be4qsj7L5eG#*-!e&(l9u|%qtKy=Dgjby
za{{dpF)Kja*v>lAE&cq?EUi3O2Oobmn(?j~@pBWA289AU<04fcG@3})Z|rF>AIvly
z3+2I@iQ`6%7Aa+c<D5hyE#%Et#evFE64%>KBS|oC94JlEN`jokkrBr~Nn)8y;>w7#
zC4eDlFwPjnx7#1D^vvhBOuSo-XJ_qba7S)Lg=RrHJGLA1yWqQKvQRjPcapT%26veR
zaRNQ`jRd7hkPi{z(MIa}3YZL$5QnFoMj1rb45A_mU_?VuvIawaJoqJU7dVM^7vu~j
z{_dT~3ulDTp;0fWOBL)){pGXhSnbJJ20&*dJpRx2(Bf@kG0?c+_%t<~(R3`<D6Nk)
zq{Y{k#5D;K7=q|~ft=y<VUh+q*f@XDuB>;lOYYXZ*sh6akUf%^(k#E$4(A9YqN_-D
zLsxV%P)GvI9L-37t+3)ut!2Ud^$Exf5HB7En(R?rBl>0xg<;LBisnQDf&N1fC%y@U
zBm3|Ycj3AS9BJp8Xw<qkGq4y`r30M=@xnn|;f%&Nz<GFkuJCvxafk-gG7n2yx;B-E
zCEjmK<P{)`U{9G^u-wdaXcJ9LEAAO?BSqf@Jj0j5Xno)DBsr%{qxWQ;3{p$EUHxA^
zZrYoj%N7@b#ke(qL(-z3i}*aA#DN~J5eJ#TL40i>+V6w`Xn88dg13#devJV^=}BoD
z!-<hxa0xCqCOFqF@?*Q~GoWx93{9Bn_KQ3CqYHvRC`6Z~WF_C4g^0AhThL6LDCa~`
zNjInnZcC<204jpLK%d9yV-5J*NMTh-=XY`bJAla53VrbVyd2_}lxOG70a3$*QY<kF
zKBc-t{8To**v6=M+ykP;O$a*d5wc{eyBfwAMBpUv_`a%zSX2h+>uBeL@Wg@~s1NaU
z8Qh&HL?LnF64=qR4lrVFJ*XA7m}FCd)hoMZ+B@4&cFm!5EJ=8_<V8nyc5?wGqzL2^
zPLf?=7FfH4rh(vIF=v1{iJ>db;T(d^&*{?dJzI-NK@#f^oYu*cVGbc4%W?1+!z7xW
zFPu5UFCP4Nh_fE>ABab_fkdh<M5mGLsh~6O%mmd^#6gc86ys=pgfKClyNBI&<Pi}w
z#x@o0ln;*5elZje&}TYlJ4xgLx*W%rM=18kUDYuvc5q1pPCRIM62wL(4W=2xk4IQ{
zTNyJzB5OqG0^^v|ln|D5>I*tQ3z6Cd!RtX>cv6WIsEY{a9Gn-(J5Ad#0C7<B>ZdjN
zjTr@So5)EBnU1jzb&f^577r08n?L~z7Q8|vEuQ{M_h97X6{jB@$_Sz`9HjQ`!Y@H{
zdoyN5Nl%YSXTHf;cHo0CJJCp-G<+?A-pv{{==S-%<AxYq(3sJsJ{8q6x`>}J8oI_w
zi(iEhxh-P^7RZ`=2FcOf0+^T^Z6FSeNm?_jIqQ_uw>Jn`y%GbEBKiss4t|^x1L@cG
zSB0+7F7zA7y;kD&hq(IqV?p7J<y{_*7K|CiB<^tH`0wSkp=(!Raa<*E(H4fkM<g7X
zHY44<L;#_vFy5AQpbfZPhs-hC{7ZWk<V6ZPzJf%ZS`AaW-iCnm!*v*Uw|G;yn^4!7
z!xKxoHoh#z+Wr7>9Rts8jA_cGZ+onP|1QJVKJiOVl4witKg8c2&$k_FQa5BCx;|#`
zEH&!O6+0A(9wr-Uo&(7=L{xx8b^!_6t1m=6^CT$X*i0n+waMezFd8D?(z_^c)<GPE
zs3)G>{VIe`3_*Vc*$4@+BhcYo`o}nvLmfMO))@kWAlwp#k3SgzIENr-;B3s~G(C@3
zFTUaVXEZ(W#<GibZ{#!kLwjO!{OH1NnBg{E&m^I|tKzmpsPum^*JH2lZ9zKx+v7`_
zzt`}d^DP-PwBka5eKt&hcE3Iyx%O`EbcKDT6Nv#cqp2|-F+TrBZ8N|{+jxO`5Hk&S
zZ|BOo4AsFfz=P5>54jFP#SW-E6q<yI9QO!eY*A`M=RD}ou!qe%e|X?G;i?;%j}c0f
z{^&gWy%ex{70h!Xk1rK;{a0e(?0>+}ypyV-NRlJx<bMonFn3z_XRPZMm#Z!xFd@6m
zK|*8!M4bg&CoekR(aDzxoULLVxee+NgAA<3idcTPU-MmlzB?fZo(>h)HLzeb-V4k7
zAT$>1Ivelp$-syMf$%W4n=$DF|49<2qS}ax#BG)~KyZr~;{>bv;mjdhQQdZxk9xfb
zos_h|#xY6Mew`pj1%lR2iToKc!t^nD1)_V}<3dPqFa)52`q2j<kKKOdFximlUapH*
z;<+`Iu-g1aq)Y68JC-EdXNJ(|MFy@k*2MD;DM5ETU*1_^cnxs$x1@zw+>P@!`QDA$
z-k#uR<hU7AI!Y?5dT5k)lMi*ZJemt%=a&<{Cm4>zC*~s;OOy86upGpZJ)`=zl7H;N
zWGn9G-prKXd{-ezK-<T40rHXaNEkY%6b(Spvu~&%&v3HN8d2Ppq>bqeTY!w;OH4Tn
zK_K?4TsSuziuec#KZat@DALhXBq$s}9IL?DNK>3Z>KmAPVuaM>u?R&e^;E5ThyjU-
z`vwwv#e3>0Dnu5>j||72kn#|x{}OGnO!7_MX*5tm*zv<paAr7CslFvkgA@ceS41U7
z4sbwefYJi#J^(0TI{(z_xzOSj@o;&r?xe{JX%q`8CZ?2N&?rh0rUAV>;i*@E8;Qi!
zAxP@|k%dWSce42by(s(L`Nk6?&HM=caH68|Dm(7?trc>&$F;d=*1Y>e+UQw+^bsrY
zmoqQOcZAq;e?i#mgbN?<oci|$<>Xuu3(o>R4ST05h#b$>!G<3x_zoo1sm=w&rJQxy
zIvXn(9<TBDNfb?qgYa?62b`+TRDz0d_>VksF_{aa1u-UR1jN}dgfpX=AsCXa*Xciv
zpRpuNj8^+ELE_BTY4gz;gTG_zm0&nX6bz>+ejL=|(2<40ajv#RC>Wf9&mkH!B6wE2
z`7s|E5F+k@u%XEkeNm9U9z9wZki+HD;)X;4xMV7z4ucP2NW9}*k}PC|Vr48B^dcul
zW06!2;z1o0ZBgBjv+PlWsjd<jHe1%87MPJ-PAZjMJBZJqDueDS@!d&YVb38i0Z|$#
zBqrenP$DSC^8t70l?a%pX8ul811wqq=*L=Z6mfjyW{?$C7o>ZWsSRq}Q)}mUYx?1(
zJb!0R-0DK*xkerh2P&~Z8-g6tq5_}oJ76m{=XsB*3En|$w9~@EoBs4Ty0HpN=h+*h
z--%~mM(_h4KXDdbNx$F{20)&%1SZ}sL?(tktedy{9lak~1p3x}O>2tI1nqco-tK&_
z&K?d9>9=MOrc9z3*i3T^=<v@|Od$A}3POP6GSZo}*FAB573M~h!8{y?{JN*)Hv*nw
z9(l`vn6da(Kq|F?WL}@igS(`;m%E<v;V*+=poU4N>-}FJLY@4@c7GIxYRlR4r*NL2
z<1N%PQO-{9b|iARK;#g8&+NTHPESq6d4b!?UyL@157+5@sTCCAd^<>&Qe^(3p>-uw
zR4ijFo53^h%oC`q(OSVLM81&!g?^mdgR1wHYryxa)1&l4mO6&V7AnIpuU%&#sEb^Y
z=4kT(3jW;U_x<)vYPF>N&Y@FjP~)Oyk>E#(bAc*WA>ivxSB)i$Zg8kf=|JTi%I=kb
z9bGLhS#V*VPU&D{4Ms8x=8|gLKe_w1Rx<Fh@<!)^Sl8B}IPe2!HPnsF^U{mmniZkE
z7Jsu_1#WDto-YZ5KKA_3w84R{BCnO?I$QkBvI`9LayH8+XDy0->pi&_t=Pk@ERat|
zUn~+Fws(B1I4T^~^{^`EB>Z9VYJ2MU8iCH#rrrw=|2W5JmB^OfJ9}|tzfbb4^M2Qv
z|9Fl@6eglRjnv#oaO)2n4^u7V=iHXe5bKO6d?WPqR6w!tz;^8Wps(Bernv{j1G^Sl
zQhuSU?Efv_8UFA*bwB!6-QPPgz`1Spel5AM=6=*j_s?7Yj?ean*?y)4#n?t&_&MO@
zo}m0n{FAKz_*HwM<}cEZih`-4G@r%^$zNWJZ@9crOW!mmJ9g(kMLId=H%u8_^Jz6P
zSKO<q`|zhXX3p`6pVhmkiam~60{I7_nosBx373B_esYZ%6?Qa5Jb1qLRQyS6W7AcI
z?%xtF=^MZQn@A{*l90I9?p<v!|GnMM^&9gV=;ncFaKi()x?5`?Q8Leol&{Tt;n<D{
zS^k4F8)+>_s2q7y-rRMRC@#Pa){>xJ{d?K@wYAWR)9Iu7`?eDvwZcz+t4Zd6erA-|
zYIT_l4ATKM))HYz3=2DhieT&axF0iIzct~nV?IwL>dkP$W-1FJgDh@XQ7yc)7W(po
zkg=bX%N~G<Kv><LG9SjN8?OH19`~+iLQ^VD`l%wV;?{$!kb*fief4K7W*G@Jg|CE^
z4LTQhOfx>ajG#U1Gc)~t==#KKKJ`o5!zsbezuW~b*>${KXU)1HyFS-w-GTJ9dT_sd
zT}UP@A1D1S;Nn-26=`Lys6~q$aB{`Y@f|h2&=<LOUlL{b!;A-{I9xBv2r4OmZmazO
zjc$UhibHOg*m6__G~3{1-XFf~`XEP+fbjqDu24VK;hVhPP+sminDq8GQ{%&#Y@|+3
z+)KTJ7neVAeVP+Ul)v?*A*-t^RdWiVj;g#78EVGK_EV#AGQ6_%;1&UO+2fp`=NC_P
zpOQWQX=>U9i>S#c%#l%E*L2{NL9dpOk8y!QR&q$3kYLIFJN3yBmhY7lzpL(Ks9UR=
zWF{+fKi;*hnto)lonlb&%$sINJ!B8kQ>s{F3uvVtp3h0V)g`o~q#=EF>}w71=gXlb
zdhm$X!X(3nZ~{GQWh!kc7clT%YBsGYl2RplncZ?!&l7B%&aS^;eI0GzGFQZajZcsh
z`lYgjZ)Y;SuhCfrlB>e??bzHGP;uX9Ft+7eJvF~~(SvqCdwO9Or}oUfHV0TZ;$@;T
zhnI{lYslnE1gddm{0in061*c4ku>a(zvVdAW;1rT!X&o`I>)fAB1N-$={ev>E@Mg>
zop$4J(`t6s%!zeg&P;1x{5x?Y2A|Jqayh@-jQHAjzPC3YnX&nyfa6-5;<f~%)h#Gj
zJ?d%eDvw!_UVj;feMD}E+C!h^SwE>qN$TH6r;3kMuf`W9t@(dZf9$wZ_e%cRx~=zv
zP!{I^5p{iw-{OruW(Q>nXRVE6R~26(=-{|k1kX*LyqCx3AB2?ijbW#0Rgk($sXq0|
z7d!Z3b$=(LiO{i2(!6pV5SA{@zP{4ABiB>y*&5j^{V{^LJ*6l@%mbASj@`9VGk5>E
zLOeAtsVd@p+io61!WmqHg@KCRk{3g$C7;u_D`Qy+uA~C{Q6$v}NRSxHOZy}w|0BTr
zQ5`q)H(xwp35=)Ag)mI^b24q+82!jaExw*Ri!@l%b-yF#6Nu4@aGh%w7La5`@5rm%
z%&Z+|Zhlf&57GDv9|O_*$jQ4uBlKoZQ?E5Tk><2PRrAl}Z~GdT{Z%S*<a}|}xz_u&
z;C%2&Hc9Ph;ZBBzd<XZ7M;bauJ9%F|94}6)iQlWXzY*G1r5oCyuv6<`J@X*wb>Pf{
z^3+Jg@&{8!TQ%NPU!OKSfB*DSN&Wa~_VtucUE`6H6^RCGJGe5CT4;hf2jh1MnDmvi
z%K0v5CP8#r$RfH0$0pQ95~lBOYcQ@sm5q&=acdxzCdwHi7RTmY!mJOcFtiNm68mwn
z^cdn3m2b?vUTY*Y7aoZLKu{icun8>&rbfiMVF|IU*nUQ#3<D&do`0gbWy4%<XwSyQ
zx{%$+q1E=x&8uY21swS9DZnC@3E?<i`}}h<j)m$X@$4L?AkAP3F?cniYx$cHI<H5k
zA0Yk92<w{mvwnh{1ak%w;8|GR>-9C_+pHvB3P&u|8w8nU792z8MP3mD!D9{q-j~Jp
z-RS7%{HXcf%~3`2DP1k<0GUPUiR`{{{Tq1E^xNQ%j%)gRhF5at=i9%_I!Ox-TG&<p
zdGx;Xj*i^N^|$zYiCx-m;^qm(cf7fyw9<DM37onOMLoORhwCg!`R1fC{Ol|zxYcTT
zbm8>K{NB~9X*g&9T1=-+_P3Q{OV<S(b7MQ+E63&rY<W0@26gv?AyYU`la3MA;uI*j
zW;)x3C4n$R`X#+A!IaT6fxxk6kPvivl8Xe@9+J_(u_B4Lbkeu?z;KmbMp|zZ@D_ms
zB(|N{SM*sZIHpOe^coePM`aPiF+pR5ZG^#iB9jIkrL*}rIm^1E%icwus(|q+_32Ov
z45wI-1WgyL0dRWhy%!c{LgLt000m<(X9jVX1&9923ZP?zP5T{K`nAZqP-8F@Lll#A
z7R}H_?&>k7;)LRe^5sz8aBA#!k2D@&gahmgfbv?ePBalgOuj<vVMS6=G%C^nw3XC{
zR_WdRtOr&hAy@R-J{mFO-pkMcjLI8WI1t^PQEBW{=_|0CdUdv$2jR>kdu!$DdrSNd
z%6mw7AK)JUgSvXc4OAFaaAz(P*=}t=5o?%kDZb|M?3ztZVdFdei4DJXLxw|6{#$si
z<|~v=>87ZmjkX(-?LBvU6`z6C1Ba{p*BaQCjA!f21%^y=-}q$=w20n3UAHlpax$qf
zVxaai_*16g?@|;WCXW3llTTsJAhF^hkuZSwU9W};Q6f5)IfN)o2ma!K`S-Cb{y220
zn<kPd%m)UL)H5I5<WhTTiRB_7fF0Y%0w=N#5utxf>bA%%3jkA~0a~v@EDm7trGD?G
zBH=^`4yVwp=kL|~&Sb>HfH&*>qk;@TRg)o}{-ulb1BYue##Hn%aOZG+W1WHS0}2xW
z=r(CbapHxO8`n_YdpJ&^pUBbNtA!#7t>FLyAmi1m0jI-vIJPhJ2Z6vK3t_JX1II*`
z`c;4?lJO3J9#S>B^x10l6jVys@`&3=VgMNIe@ciEqaZ>`iP;{jtWbcNhGRS~7Ww<+
z4`?wr!s^_6kAoxg6sdO+=Q-qd-YGbnV-VR#M{{5L@nl_o1zj_hqswwfSE*(MGH+J4
z6ib4`+BUrMDDBBzrTKe5?8M(r^%>;T^M)3A55H*Ji~8Ynda~5St%S#x5{mIh+<C^u
zR<DWXF(giZahq>MhE(rG$-fg?-;icIgTl6RKFuG|a4K`4b746SigaNG)@6*rjE8!{
zd}kGCIM!N#x3-u?rT4s7ul&X!Li-k*+Qu$u*pZ!t#FnzI;JD{5U*7^LYZHYqWfON0
zZ1OH@zOI@oAYLa3XO}*chR8pxSj8!h#}rIe*$0w{?1ofVa}^OzGf@ngq{NEL4Nbo8
z4<P6^A3ejBk!EWCSc!>R;$>?_#A!wYv?LN@B^H3kKz!e;RK`k-QiW;-;n*Gs-P7wY
z;j$2)9wQ2?NMHytA5U;}B^DVj1kIE7v2(p|;BKzsDZ)fW)VlzLn6ND9l^iyQt2wf+
zMP1_Qyi+CJ?73Mwg%^8K==HP4&bf|tbNX}du9)z^A|~~~?1SRDjCXS^Oza4E1rj2}
zRN1edRBi3S4K9OnX7NkF?6sHvPf+bVw{$!P|JL}Ihz{(<o!{H6iMIBunm@8B|K*yi
zHoHLTaR+Q{7`zB?PuZ(PZlGIijqW$Ty<h#7j?1!dJGsjXf=4S^DO5&U4=Nldiu#1W
zyDDJ%gjB$+wR+i=5ZPuTi}9zL0wNcID0E02=cS?mqhswi7Y_-{IHIr<Zbq{jG3v`j
z#)(zp;3(2WvaW}ta0rhNUC%{s6Tc(p+^g7?w{k?+acpP^G8O0EK@>KVR>Cf65s2K?
zLeOJs#RWf6ZJZpA%92XeKkS|Gdvedo4O~w|mO}>)!ASf!CUTFjX%8St(hvg_h~JP)
zYtlt-41_*u9F*Bu&lC!5)b+wMNb<+H#YG}4e1qxFDxTvhbd(Ap<;u9LL-=qC_*m7`
zNTM(rpYnx0#^j-Yl4pNMAxaPlO`cvAu!(*yd!8v1@!9<Mz<(C;V@jk)+aWXI4Ca$t
z&L<mob51kyCFq~GSjYwVhe2jWW93J7|H+QwKhVzHwW~U<;7rna5@y4}@ak#O;}?#2
zmmiL!D$YWr4i2oM!(SaM$amJ<p7Pp~cWreZ`ql2{id>$?AOx}p?fdjPHQs4!M+ObO
zUQcAh;Y6{kN+Cp7u{f9*l`({hrcmKUIHNxs`KUO>E#RLe@=84&p&+JF;@q}+bl0RT
z%@~jeu;TrCK!pUWf-zeTgbgnn5CGx4XH4q-F!Y`=AgZqd;)C=`fD;5y%i@~&_E`J(
zOj3y;4e{$mY{zt`SOxgD1Uw*krH2ra;gBl?15qT=M<Pxv#E>hzAB_ys4@|KM_ty=f
z;<G?!Q$S*6Z-w;5Z4a^dR1ii*m)XhN*Ca+g#CWf`sJWd*KY7k@?$J|!yFW#97lkIW
zPG)cPOrmxi?m}-5l>4ZsWq1FWkxX0rW%SN**2|;)jW==yIXiXYwAHcx&ZHw3{t<SP
zSB}V^ef>PUW&ho9^6bvgtjRyoA=X@@_9Ts8*#k`J=Py55GcLrx&4HM3#8Sm%sUB|y
zk-y%(P`bMvxtB)%Y??+fwcuhjoHx*?4L;vO-0}*!1YEjwd)VM~tq%>%OR|{00B{Li
zZ^Prp12jw)$mIAC@{#Qzf<E&QK2zLvXv<2K$-ot&$l+#5*?41fn~^=UC&sF9r$cJ2
zrB(jBW-}kMs|pEfRzY%tUkFMJkQ|N(S9!5(^mr1E5b(Qx2q+u()AwMBbLVh!nET29
zLAB2QaAW-h^L)5B1#UX`mmM4K92ga}TKo4?c*1qu4eS4k3ZlSQhNo0B5w+N{)Sp>T
zjw_0yNNKQ7w0-#K3(Rzkbm;jGe2U9s^B>dqvI08qUWde9KQM0<*Rk2b2HqY}i{cs8
z{?P9`Q1v~06IC3b7RBqn?(UaiMr|BBsLHq>y%j7Iws2a%6;v}hwKiQzF!wuN8qjkZ
zUU*{JosMir9{UbUJ>w2t@ESv2aT;LJP_-KkMETsA8Rk--rH(FPj4c3rB52Dke|Zdd
z0D8c0dBoQD>*q*O!L=m*PR*5^uBGKvW(*zf99L65Ekj(A?Piztxa(lNw@Lg@H1?_O
z-GQ%x50sfd(7H{{)21)QzS*qY@lk#n2+vY5jVZH@*lu)^^<B)CmrA+N_tMDI*LWZL
z)++nhC<JWn;9=ve;V_$e@9YoRdR<;@)?TLEn3`aV^%lW`fgkJAH)9d@f{FV_PjSG-
zyEn+mW?Cm4^z(7qm$Q$zU@kL~=X%UN&5^~03f!kVpTI8zVy{vP!$V;c_W6^1>L4KD
zR|Moi1mPiT{GxS2=*jT&?Til^z73(iGP~Nsqko3aL`QK-%pRZ0d(nG-p<15&`1h2!
z!#q%(pd8N`(0=B^d9^-SUp41;(%sFZsdF<<YURq--PvpAj#Uxkw6iTU_cz%;jk4!D
zJG{M%zPpmGe2o*x{@N0JO}#xR*m@#Xe@;YrO8LGGwflkMWm$cTWBK1X2A(n<W!G3M
zwpzv2Q~rL{euO^1y4m;Pw$bWDFLe|A*Zuy_-y8!r)JUGTfgIIGlO$)4{0eo%-OWo=
zCCU5~_pCiy?&V%!9#4BQyF)Q|&&)V)bmdR2(d=n_ja`s{@tsYh^wy4(C4t<V6^o|7
zC!=&YGBhmW2E#0y^KBi^U+a3NWr6tfs)j2~;eqyMc9eTf8$WAHP5oONJHDfF-&+9K
z>&Y*UR3P|zy6p_C)GD^*A#@IJkjXBXhK#>u_48;URy4F*InqmOxa6`Cq%@KpHb(Aw
z7|StXHhi<5IkcA<{#Uxa@XC0x^^>_PYKd2$92_C5A=!}~t;sC{ub_cBoA2YCPaMR4
z3|e<2ef$A%X6wsIs(ytgTnJ1)dNF$aMS??w+d1vUOX*%d^`f73MtHO;Qc!<PRZAY?
z<Ld-+QXaw%6p~3#Ivp08|6|^t@;JO@M!aTQae9u^Z-Oy)aIP~?)`zd#sNpN|qOyhs
zzmpC}EC1Yt=mYD|EpKuUVyt-voOzsIM;}MDF&bre$6T|@Db&h8-6LEa>QEl|`J_*Y
zUH!fmE)_r#;(!>&ODHaEJ%uX)N)J<1?XLI7fd%!r)&kx8Q#eG8Lf|-$9!Vi_tGK*9
zcil&lx}i3b;y8miX7*JZNlY@3p~5`u3(u5%^MKyqKwlc=0nQY9A`hbmotw!;>M9*p
z1y<tX*v0vpCDo)aHAy~o%b5;Ze^<>mBaf6(*)dJN+3sc|eIvjo#ZqcqwYolFn3B+9
z*S~1_%)T-<mZx7DRl5~0`_{YJm`fBmeX1yU&+b^!Uk}75xke#kRMYFEW-M};ar;ZL
zjW^~WcjvacCd=P-92~obd)<=YsudFPQ^|McjF0^Hy22kcV9Y!qr|A93ooU_M3h(}E
zNAB*ADA?NvMEr^PKF06<dbzV>rDf}bD4W!)=&|3A0t&`oZxzU-U2LcLu!(QXG5bF!
zM{d0Bnr1*E+A^{`E!6fj%#t){)$S9zD?Fwc;Z;jntp18wsyuO%qm*qBWE(xz2X+rZ
z^(^DWCa4Q@U(OBo7+YC9&;H04ti$(m#?r9a!c0MyHzj3T2jMQjq4iI8RBAv#S3q8e
ziT_Ym{me_P$v4~IX0Ux*P_Ek=z^UB`MwLm4YA7j{wp3NWl#EPjlA3Td)2p*%3Q?)`
z1<R}Gg2a>L(p#%XDUFZ$((1}I>b(2AHhN9V8Z|o2Yo+djO>4_FIc81Y^zx}TH5_i%
z%2)9yE6Ur7r&M2!9nVr(xF9vk-UMCNlUHB~$V#q**A?7qe0lGS40c*&?1w^s7A)k|
znnY88Vy16P&O#nK9$EAFm-ejP&9wc?a={A0Iq!y=myBm3ABp%_MSaploToPsZIZsV
z==QGXg5olFMRB-ch-}lw40&GEH7)7cebqiwU^cM+q3Zll+~{i@!u$mRh)h1vTOCwl
zeG`k3!Pb6fuu-b}RP^=RrPb5vjk*tNdm~oo)~4S*=q^ANHJ+P}?#%D|6<jp^%zi9u
znzowNwb<$NT0ZNw>|J^<OqCBd9G*`QW~i}-&dZJ3)iT*L>S`q_+#LwpifbmH<^O2;
zR`{)InXU}Y+34h5H()j)rCRLmF<Bc#dvnPMCJmK_^#=x5KXp29EK^wd;)a|qv|~cz
zr9@%#g|rdv*YoR0nIH1j_6IuiW6V0h+_sGzL||j*_<Dcp*%9lo6TvTv*<Q2$lkBzc
z)mBY1gUK(_cOzZgFNo(X*tWx!vpriXuiI%1+*wJXOjOyoHUYxH3o|$Pbru>T#JB#{
z)a&WL@^cuto<};{rlF?z`_$u$LTuy+1uvrbM9lk~+-9ypoJTza&-PVc650$tlvM;g
z4OyF+KhWluwv;MA2d`lU{f3qAjeWXyclwDNEwRYa4~3JHvhATYmL;s2MqSdf?Ny6^
zhVBRy6D)jtg%8&4fG`xVUov@Y#MAZBCLP~@YbH&}_$5lP%KXJGt5xbj`(sjiTI`h5
za8<LIe6rv9oXy4XqDGMw^3LU#r2HgP6(ixR&`8&)hfH6G#NRf1nM^w{H~VsRJVV?J
z7T6b#U=THVv`9-$6*YO#dU*cJ!jz#p$K+@ikLsdf`ekXY{I9PV_!}Zv^@iRO&WX>8
zs&qH%_qt|es?8V)Jo|iV(sFqQ(;|BItm&0VSZ03Ynss|)Cd$C6VCtUZr&7h|NqO$b
z?A)#XH!ezPY1n5Ul9U($JUJEowZ8{7#i9GmL;}|kIo`EumFbaQAM}0GwFG-fF5;KB
zHERJn!P4%9&#gL<L4EWsOP&EiO{8SXm#04^@t2Ix50Uh>ZC`kBUnI&`N7*zZJG}Fx
zA`Q#2d7!mp8lvhxFWGK4Zmd-3+|`E;hOX{=tCxlod)^AmIq?sS9*)juz#a!CPl>p`
zYE`ESjZ0tueQo$Ak)ztK;f5j!#Bv!CZ~eG&(h^#j`uF*?JSJV-StgDTx0!F|swj40
z(lN)~N%9@V1K@$5;>LywjI<Ud<8KIhODOup@EgY@YVgLHe;yYb8MB3F-8OwVCG=aR
z^8CK;jgZ0z7Y<uoZUtP{s`2o+55;JB`|DS!)=kXuEQ}4a6d43FZ!{-W>;9|D>oW8z
zN85L7<1d*&MKa|-<AqLaKAB&;JZtItRjZEo07H@$oeG_~{G{l){@;a<3&Ea4m+x*}
z%o8s>G((6ACRHiTo#*reA}TjTSbUuXJg*h-Uv5saDm1$7nUI_Cq_a5HEa6}?`0ww>
zI-WyKy`3|=F8ldUC$l4twy*!u?x;(BHK|?i?FOIu@Mve_3ngt?O3kpA+td%5^nuTf
zyW&T0e)Q#%gT6<nEUuEJkGc-ygdpSQ25tHCph|HGF~>F))!ORS*~dKAHaD5Pv;J~<
zzme#{SCfq|R_X3~S~H~7fFI6co-+ELiL{TIZm-p;i??!7#a`*WgeL`0SY;;@T)q`>
z?NT3z9sLPpn-|BSPisqj<v#z89_<p<JSoeWx|zpotPr^4bLFfVMQ7}dy*Z>+19*J%
zj+T_ci_BKt>OWRF!Ih<}?X|Yf?+Uzbw&whCOxH%iKF*`(g^Gp{UUNl_x@W8wg@0ew
zQGJVuUuacTB+azFF4|i0?^jk%d;7{se{Rz<$?58ov|9yZEJ{<HqK?-uC9WZ^5-)sx
zcsse~LW=uGn)Io|@~X>os~2^iov-0yImPus?ORz~bv6=jPp~<aEk(u|D#flHg_bXp
zHu=XsHdz!e`zQ`0s)POBB0hg>Xv!X)1RItH&3LMcGv@!;e^mA*{y1^Kfzro1`$1}b
zG5@Mv(C{NZTPcKWC0necc|(jX1Gt_7@Agqb&}^DDzmgQ>c`7T`y1#<B9oPiVyu{a>
zZK?_&&1~-C`4>J$@z~gL-*F-I&K@;1%SI?B=A_PqpBY1c894uH*Z$=&dlaj4!r#WK
zOHHQJ2API$byq&saCj<OnixK&9cs-sxjo_&tu3!#TAK`ry}1fR=<?sVQj!z?@az?#
z$ALO7X)H$#r&@n!Rjrm=Q>x$N3GzkDGuzMSDR0kTmFZfXkq*0m;lk*F|EtUY-K`6H
z(dyvjQ*;v_^Ve4+#XR`=)dIJ}_gNhO&Yt|@>(+|;=XYQ}oUj>e@ynsi>G53ae=2Ws
z3~oPER}3q92&CU?m$-S-lca9a>aSN~QJC<Y;q|e8uvb#U8d=kbV#wgzlgDfu^1>|`
z8Y1KqVCj>B_4LnUF*N<0PDV71oBs+nxsQ|UBuHIylsK77IZtFxd)~in1YG$i9bPs1
zHcxUZJ$``C$X3kER7~#e;G5^)JjX($ODfZ(IMN2EH;I}OU$i|B;6nqH=TF_fIX6~j
zgu@+>iA(<SrzU7UT4e2GH~ldISYaEJ_Q!7@y|!JbiGBaRl|^H}7i5r;15+Pkn9h_3
z<t2AzncN;v-PMzwsJ)cr5>smwaD{9ZG%D|4Hc>n2KkE2FhGJQ%Ij~<!Cb*et4ePm&
zV|Q1jFHmJGOFcPqVz#-M#Q#Af$C(CYV}~oH6WroeQJ5=s`jF^zfbW!fsQx-Ds~5%N
zlE0!bVNA~Vs?N#=F4y2>6oHO!(CjrTEBgF6AycCH)|}WmP(h|Aa)akYre|sA=W~Uy
zjzKfMucD9NzQW%+)_$r(WB%Rx?DeYWinM$I2NNPlM*AIHe%vUN@cm(jsHwjcS~T#r
zNc~ynr>DGHwFW~H>*Z<qM$reFPg9c(d)Df#Ug<MiCxtDs-04k8St&CKg)V34U-lTJ
zy3rFbSr;rT>S<RnX_p^exOpe^W*EaH_`RO!)twuTNokP=s{fd-92(gCz=1W15dS@&
zya68f0m&EoeD80H;E700B4TwN<Uxt|ZNP*Jv*ls<U&O-fdibn2r2P~jucptWjiFRX
zyuO*R&^19YfsM>DVxTX}CIYPLB4r7(L9S9~KT0#Yp+Ojac>;GSj(<577LAcsPoW%I
zJTrPxRyjOiwYP&JKlMzn<6#@7?Ao{amuWm04~?1n7f4BeQ;{da!BQSsKMHD>#%pez
z67Ly$uXyi{4j(<9RQmGt&DPUugYQu>H?IFFE&Il}8)IFmNpmxujv`x1NK`1UROfaf
zpPF$h%EBt)-<9^ZCALk~OikUJ!S`PEN<AiD4g4TOHZCm{TVdKvo_kwt)feiu0Uac;
ziIuS1xHBTbjL8&sWDIz>Ng|n`0Kq}PD#AYqjDbMc1pq3!=1!(SR&{|uSGFG<Kmrcz
z1qN6{pz=5l4IECH0{7AbBykt&6piQ+WUCOU5{6B7P4KZ5hd++Vf&!7m1s*oBBiD>f
zQ~<|A!%I{~x(qp4b`Gp^@#oArNeUy5ijr;CQ!Y1B#6W$wK6ib8*ei1yV3ieu8_~vI
zT4hN2S8M)y5YZuGp*H<pa%=b>SpF<a|4D|YH;>QBN=#6*LSgTB2;>~xHW*^P+tDw1
zvL+WQU`_TM@n?`Rq>g$15b?jqFw1i@t<n9Jro8^h<sO>PA5E+F4z8P(HT9wWP2pRb
zDwpMrrq30azo$w)-x$l$yrijSvZPsS+9%iG8=UWg;@J~Z-M;}?05F3!SfDP#iE_r0
zf^b}e9b!QJYcLl8yt)q6TY~|@u*eb!9TVoU0q-kGuIh4%>2j2EfFePT4wN%`I1Zo$
zVL_yG#DZ7=Du!VQFF7NI<H!Tfx=>Jq7+|9X=1KWyOg&}X$3TaOklh|o-nt-)0`sLn
z_|_p}x?nR5D4kw&!-4g1*UTx5zj7{IB>;y<t}lbnS**e6Fm1*W%n5hx+#Ia*jw8rb
z3bw{AMueycLzxctRZ1O<^zKz9a}Hc`tUdU=>-*i?SV<<W4r{JBX}yC9e3q&#?Z~7#
z_QGDMd?KclDeVo%hw%;VZLZT#H~lyl0Err|O`G6AX7S9j{Y)j?4ZlY>2eLms{UP=l
z>g8h4*Jt3y#Wtq&V0bXu`u*TJfuDxA_L#0WTex93leLYX1=6xC;;&|WVJ|VW!F{wq
ztrcs0fr_oeEZ4XlDUNakHa!9?o63Lhh&zyY3KQ!AI|2*^YcO9GfG33ehn3zhVNj)t
zE@X{P8>8sDGjiRY5rgM<1^myPu>zo9dKld>E=f3!=@M8Tj&qQTa*Kz@S%CxBg)R{2
z0bc1V>W)_dP8R~i?E}b{02dQxFvTIBuRAjW5KRKGx(?B#Ks`kP{2FY!1YxuW8KgiR
zseD%IFg<`V(v{l<CwCG{NFpF*=a#Hc+6hK~L;6xAE_MGI>ss+CSj{xv_N~&G6+C${
zS1RSdTKf~;IP_da=<i6=4+V5uxtV-pNP4wIPtD9)jQy(syFPaluJmjsB(F{zA%^W`
z<X{)75tln=A|Ws4mr#k_J^hW5Ut8cZkCBO&DGzJodX3vzC`YujCN3XOsaPqUnATZW
zi(T9{KT;xiND_I%+<<6+VO3bK<jj559@xKiSRR!zy#<cPaMTO4g|40bL16PGA_{=B
z(F6f${PhcfBa6t*CJ-kMgGdbv%_l+$ptIRjsOS7ccY<ybm5zjjtoJ~7vG;Y?piY>G
zqj?l~?ZJPpz!1P6uFLptUOD*WPD2U1fxXdb$vu6w`)@FMq%wp65urq7`*(b{_Dp1r
z0|&<mgG#Sww=m3!0C{&V_&|#MYooR(RH{H+37+ycI(?w}CL$=E)cDYD0a=|4iz}_#
z=3t6mNo8h(owEJ@XtOZBhi>ry3v*1E+aFH$dsAKm_O0ZoW3}IPpv5!Pd5K^@#ILr8
z+hwZzd}}JoUFiSfGdoA?Ab*#rNX9gM7LW-lR?9E9OT;h?F7bLb?2<?2mHE^9(~V~W
zbv-of)&9(aU3$(%6Cn&Zpn%Hgf{9Y?aX6C;*P})iV%+s{Oa-=*{|W!~4MX_`!3C%c
zc_0v7m$4v1Mwb}$DD3?J6c-lC(!*#c3=^ZEEZ5F%965-pi2n;W{jY>kj8qH3z+5PG
ziU5pDLy)zfG0ut!3xIcfU|>Sz^+&Ezu1G%NTr5tAZZy+FNRRpC&Z$Ayj$jPBS3F8j
z9X_~n56N8;m<}@Dzr22YQTCbX1NV&jolizB8DAjFZAQ<j2AyO@f+gaV^<zR>n{$_+
z?V155tn@eD$9$zB#e82!`z^qNwVDkv49Sb{hO9_KK^GfM8u=|U3OoB`Zh;#&U6cfT
zT8>i{g$CdoUTqm(SGqd5*1hXGUNM_J$5%Rwypkvf?r)~de9l<%u;eV=qAjGWC&2!d
zA$<9HeDw{la)V+@5wa`iY_eB2ZeT7*7|J;@l@oh|2)A8bq5nXj!1c4xO+k$nHMO4?
zP~R?E2{4jx%1P3X>l<OC!JzWBvmA%+008>MbY@P^P-nQ0O@By7TlY~BPs`kCpDI;r
z{L6U#2kC;^VZyd0Ikj5MAdc_0YjWt<!;Fti`UIY@wLknvv9W}YkdHqS+pz32<?(IQ
z3QwH=L3!(_l>+<Ifu{-MPceIHGd}`K(np3%6|6cQ3m1HNuBYvD<-Eg(s~;^&%oF^X
zhnOx-Ex3HVQuMI+yO9;vVovhqw+$XEhK)XhE22zG*V|aiDBCx0#h<g*Pno=Bgg3in
z$!+UbI)C*@L;j}sy}t{Y#VaU_h0OP@@x?2a+)EE)P2bhcvq2Usj2G`0FF1_qq>Dix
z(3e<^LhXGv<2$z4oB#^jugIt~NogYg4eI~O$2GFWrA~3(jcW0HE8vc3Yf3szy0d9r
zllL$3*z{4Q`cM+s^=8rZ9^dC&s@vD@<7GSXmRqd#r=`meP4U)6Y#|9|YE71_;>;ob
zZx$mzwmRf2(j;sUN<X9VD}#HO3Uf`zJ7#IjJ@3qIZfp(h8Q518&B`3q3c!ZDKGjI8
ze?HXiixYavw<+ZkQx!-bsl}W$6zhlQp1Hgj)G!;&xap(6zdLBCDQw`vp%`eg_odqG
z(!OE;NB{eAxgR5y$1gQK>6_fw=wtSa-rK0y&r13<RGK9m;A>LNq3bvYakr!-PB_Zp
zET^o@G$)>%lgEv-g%$Q~JsUYw<;pO?t(*F6GPz9gzVDvH{-IOMq`>z`>r;X!IpBfB
zsKbY+1hbJm?{k)r`M;*g2FDFCTivdWOJu3_SLk!sr*#GoTN@v=B^FwC!g@mA2BAy>
zzObc^7;W~*+@6cDa}_<YGqOwk&HuY5EN)D*3LUp2xIWxqHF*UXf%=XAJ~b*|kGtFz
zcv-jdt_Wz`@<8FQ=)aT|iALzx4nU92j)Gx8hTG&CH6F?+A^-<A$Z<%JfQsaRNN+5_
z6f8hycYl-4FU-S*#+3rG2nm;v#o!P9*&vP55VVrWKmjka|DR{rb(0LvbEWSPjiLq=
z10SIqZ}&EbGud=H#H1-_Cm*puvbQ6%U3GI5IE~69a`hOX-l~?TR7FL1Q{Xnt_*ti=
z)O+@*9Y-!Wre~;1=S;HMVnD4`KZ2RrSjxJ5Eha-a<-_ItfDff2JJbCu54Sebi5C^p
zmmgKFc9i^mwcW{HfBnJjiz8+%?caNr5|oDr1jLoLMtg7au|zMunCUFo+<)wI@NzWQ
zG>2dHRpZmcv5K^d6H&JhR%ffux(^m!i+Zy@Huh81Z}+zUsvgV8kGqbC*ZYdhP3Omo
zBHB-EiZ7)vZrK~DDaJXt{2x^is_(WD*IsJxxx}1qc;VI{fEtM=UwgsrWtn;Rk#`zi
zfT<^<VH|olVtN)qrXYJI8i!nQ27%~OWCGyu76oh}fWwVIJ@pVlTwzt>|5yep#8X&Z
zIYiY>OrAyMRZgBYWkpRLb(c<FA|$|v6@A5ElS}3F)sRblxl@j1Uc?oZVd}V8LBY8x
zlT#8k#N<^q?dj#7ML7j0XVu9GR7pVr@L_lxa@o;`3Mz_HNPu!_>2gehiItF8HdUo-
z8SSYino5o~sFO)as!^qpW*TLUDyq~^j&Ocu6<R-`>d}%Wi3KN<1!<L~fKqyCS6F#w
zW+bOme#KF2#Ez6|M_QG5>#}v8xge%zal2=(Uk+9zw>e=&R#i={>0^&>!6lo6MFEhQ
zY>j5t*G)A#MjLjmsmNNs*m*YabpqyC*mwpGmoRaJ*+v?|dUcl@|3@YUHr9YJiYDrd
z-eqXp#v)eupixpJ)>BH8p~&)z^!*m{ydE7Ipp+wPcwxsK#%Hj4kk#9sg8J@k8)zi)
zJk)^AaqLn}C9m}Icr8;j6pj|p%bj&8()?Xx8#2~3U+Sd>wb-!XZ1Bb0VMY>f<IPo=
zc=&dgF>01YmzaLPksa`C5;KNka1Kw+-$>!LH}uDJ?H%Hd8NBs4kh@`-?Vy$a36@Eg
zVGb#@ahBUvm6*$U9p?5)i4|J5a!V<NW|2AiuDzD^>E=<Y*iW~Y>$I$&RDM)b#8ZJj
zuAsRAihQ!aqLt^mrZcaiK(!A4ESd$b{^yk6_IbFTu5LBE|6p+=)$CN^jjxjB%w?bS
zNrY9WC7&okTjchI(sbohuNt~lks`$jx5&gVP^w#rm{j*9rOD4=Cb@`JdIyy$afwe>
za$W7XGO0(+Dp`SoAeGS9q{NH~R!O1Mp-LB)$UTlnIH=Cu>LROn35-4wid%y^v$uZT
zW+>U&%y$+ylD@qSJDa21Zkls9>9p)>2cyq?5+*d;6h?&qV^)KB!kv^AWqNgKRCLy3
zGQ+WqJsOLiqcD;;f_Y7gvw_>A4rD{qXvc@g;h@)=$FsY2k276!W35C6AnpjxC2d=p
z+fX!-;7G?_RD(|$&z80JaHchI^qB5KM4tMHXgGyb|4IV<<{s;?XOj~F8eULlwaqEc
zZ$X5Ti461?j!jW(jik{{Rz@?r`Rh<?JB(rI_#On{qDO3L;iXDvIRS`oSV^fAPChu2
zPpU*os=^#Xu2r$Bj7E7uiC<i<bS6-_$w>1P*0{38rfqTWEaf_vGqX3qIR&dCz)RNQ
z*2y1u>grJ8!Q+v*M<HZsQ&HP1;s3@L8*~cIKKUF}xQ>_;T~Uvrugg=gHuTMLl2Cy>
zJXJ!^^ha^ku}ls%l9g!UzobZofLyWOMg)_(F>Ont;N;vi-?S!%@+nwX8WlmDWV2S?
z51Jc9(slmWLH!vjdi6^WKt~6unnF-5Y{|g8|42zNR2HpQjyce)>c+$QAW>qTLR}si
zBu23w%y0&qS-i|Mnds37k#9whYUVV@uxZFWFiOZMACeydR_#F>1Ek3G$e;QQ5orig
zB779aA~3>oHjcrXAq^#>@w5^nZd@#2?>3nsnkGbvbxnt&QrKU5=OBqu<rneAot<s=
zHa^2s)YLdi3^DD9i+!ymznV5=@|BXZ93qbNMOn!j&5EEL0WOm(GU?4mktES7TD_7l
z(~#|beqCk4Vuqd_QQ%!Z(tyE8XDVX-v7{f&)lNVpx-FrOv_^udt1<<V<WSD1S2I)Q
z`qUxz5vEPnJeOC@Vk_TdX@LlKDxzi@|HP>3<yTXYil>|h6_Q%CbihN_t#}tMzEotg
zeu7F$LkH6U3nV7<q+sZTM=!A6$t?E;RyfIt&W!!@!v<B-O?PUpeAZ4?PEzoKMwibv
znUhn;<4LQgMNs5H&zS%<6_ulT!TvZcpbk!EVhdGapN_9H0X3CQ5Cf8o8AMVd*&sJN
zMZ5JygqZEZffJpRp}IEBF##)=gq(`VzNGPFTVtcyHfuNR>=KqSvyiPA#>7N+muCut
zq8=A{9pkRAJ$f9Z2xC&_h13f_L}O591q-oIk_>T*#$n$I_UfH6^?DBTXU-U^Tf?50
zI$Z^$izHN8)_Sd#6U#Jf1`Q%3|1z#TnC9hwbaunp_^pgPoON;;O~irO251wabazC!
z)!UwpmCpgApbvW7&|HYm^ODZT1lZb1CP!;?$sJ!+<mi<kV1+AeVg2%}yaa-{$rm*9
zGU0T>|1LFxD!L0;LBi(t8qb1;fpS<_9L}kzvdvwITWrK>!0P2VPV3qfRbofwjI%W-
z!+Hp~M@q(d3o64-E@yw=>E2kOYr!Ok9!ilDn?YK%owG_H@x1Kc9Pj){9nT8u;J0y$
zk9<uyUn(Z6^3D!x_*(TdD@(U>Q<+R}zEZjtl-dWU8N1KIfMT*)Qy!u*bH?VlTATzE
zY$^RIwMo%Y^NoM37Fc8e{}(IF5f9l`7awYoPj*et6@_fWzDR20RGFB3-ulIEn|nA^
zL@mHFeO9lPY_odFp}TSEq7$v0+NFP^$E<Uxm@Ip-(mn1yjdEKz!oIAR#Up)bgx%N@
zl0YM(NOiWe+@B$hK88E)(_HxYhTuqK|BB4jbQ4-wzEAhx&5K|ly_{IWjE~<WjSUq$
zn{LZ+lzr4|;t@9?Hx8)3sx9utL^)Qo^m+O>lyrZZmr9!9Yv3U^w>BPer625P6g=`H
zKH_iCbWg{UU;Nc@!eSDNR8b*TKCx0wu48s46*{2?RK1~J8P`14L_S(KJn#cx)-yZm
zB63HU6$AA>2lW&j|5j3Mg(N~}IYGugMnOd-BveF)T^`{@Fo9l6qC5WcWV-??DQHxJ
zrBh=8YCna8wsUqG$X5C!W$4sU4#pHabxj|Kh47?f{{mI<Ltx(YGh5;kgT!|mhCfhJ
zawX+XGUY0ul0ovMhQOjOMQASD1ZBbFQ^%50O2~x$^h||#W7{)5J;EdJay0$5S(w&F
z<?%3*bR#cxG)psnQc^~jRvh`#eIjB+cJXr72N_WiF$1z}qc|7eSAUc8L$zUlkEUn=
zs2HHs9h%o&0^>4#u`-P}TJpwyUxS2=VO<cVIKPHa>4uD**jK_6FtMl`VWJm;v0E?G
zU9NaNu62pU|J5<_q&2%me1z72^Or?6A~LFV8>yFr=7S<~V>B*;W}R^?H^W%RND#pW
zig{L9pF~$F@^eT7R-0CA)OJVtVUPNCYQgnChh-G|<|C1#0SaP457863<2iGOEo5j`
z=%OhixrN_CMg|6ALl+W|LPjOVC?Y|IX{J6LR9SOaDLgb!dE$hm)hE!xa!-+p_v3c!
zB4)3mJ1J5hdV)U}fm9B}KL+-H1e6fO(iA-raRdb|{|8hiIZantJ1EqHnPVi|Lp8=T
z8nEL$1IA-~mrtRgmI;LrGj>eUlRgN<PK)9jgmO)gCpuS_KFP9ALYQD*ITZm_VoV5@
zEN2mQ|CxeR1BD>CAHCyWZ&*#zh!i$cbX}-r*fIh<VpWis9ZaKE<i|l$v|9%fALDm7
zFg6|eM`@jfd`6i;saGMf30R=yZZTvLOmr{x_i9oS8nvaDYNdL6<UZ47d~O42ju}Un
zwKD{{XeJ>;=h$r-0cu$>F-(JQprm~F<{oUsMBG)4BLHsAW+Di2ei5f?#}zR)v4oF;
z9w>5rEQnU3my21$Mi8@(#aSM&6kOtXF<KNIqIe$em~EB>GrB~5lGP~kLT@@%FsfK)
zd099>6CWJIiV2b)`zbtpmI1H;3l;emk4Y`^ah8sPm9XMdF8NM5rIg3<g%T)H>Et^?
z|8qZeL^~6h5OR5S8AvD|h)^qGZwp6cQ1?u7wPq4{f^?Upz>{Rzl8<1PlkqiQa<L)z
z<CsadLFu%IhB=3vqji92Jy~c`haz-#d1O!-a9Gk$^eKjIW>Jtblh)Ed9W-z?#U^E$
zh6i?0I%Sd<6i3QZEkOzqwnH1AvZSUqE#Bf#E73h0frXsKhVhj>sL7P|qmk;=VjSqC
zgcuZ6#X@hvM(wnC0ToN*xQDDkOJ>tcwYgcVC={d0oO=~$^Y~@GWNq|FLv-|xn&?Wl
z6d<)Bpb&Lu%a>>h1bXOcIJyTO;}?M7^*7a59?k_n@CAD@lUb4XAd#0KlJ#-g|7jEy
z(;pM^a^p5g&<7nYsauobHhM-P$#Sl$N1MWDieW@BWP^PDk{qwdM$%=BK|>d!(J+`n
zMY-9I1vxvUwOC?BAI8@c*H)n3C~ZhuobD!8fkl8aR9m7X0D~7fY7r!sLpj~_UYSa9
z?sODFB~HW9pik1KiqT=+GgNStb^O&OVOde3LMjRcD^$izW0^F4YJI6PPXFd&I!Y}!
zxKkL#NzKGMOLikWL12Y~m<ZG<VF`9%*ceJXvP_Y(gEA@cL6@O3Cv)N_u~9!W8x&i6
zDI39NbC;Mtgp(G?r$PFsQZ`vV2WHCREyR%(b;EZkhh7v`l{$e`FLk$s|Kg--wj0iZ
zsQ%G{UFd{mW3@Nght#w@Thd}ThN+7p5e=9DEYwOZiGQy&HMJFluvLCC0$q4wY7e_y
zG!u-#h`F6XM}1?R{3L>&r<|%)jAX_{7ptE-SR>$y9Pk0Kuy=mz2YPXlNc*atl-94a
zw=&7upXnGj@Hm~yrA&y1nPk%|$ohH{%74Ym7q7K!apj$K12@dqFsYht_r`(~0)4On
zHV|X6+1qX_5*fL9Fq38?6tz{c5qyTC6euG?@wSWaTU?EWEC6%B54x;l)t;)vJj0TH
zs5%@TOLz_lx6e}`w}WJm@|F>FCohJzx%86Kxu`fLYirwcm2xex|CBCd=45FZmSu51
zO6HW=6D>B}Qc01+bm(JdQiD|K!%SvmaF{D2_Bozn6DRDKd^fq|qqJwoD;q^}?qgp_
zr%rs>I3spqM7m8@QnpqiIvc^2C%CtG!lgq<r9%cTbUS1!3z>#88A!a6MfW>kr*T@w
zVI><9d1F6WImLxIeUEE{f7(y;WyW;~7WLFX^R-eH_`m_sF2(c~v}9;<WG&D&F^HtF
z-kH0-vNXl#T#v^w#G!mg1Va#;NwAx(kTq$xx}NiwkFSWkk%F<eA&dXZzo>MH$i;5g
z*lhopl$h~W8yjt`7Fa(qhoL;Wpz&DOR!E*)y4Q6>>XvAL|Iu5L+J0CSNmLUcQd4=q
zOK2Ori|19Z(i};hgjoNJib!c$*GDx3Gp{6Agk6J<G-Dt5(RcsxakM2_uv%%li&yR(
zHs&EhD<U;t+NhQ2tt|K;p$e)!S1m0{V&!CYBhjEZC&^zrKV`g>aTF>#Hqxf5CSyr~
zW+<ly(RnueKva3fn__{B92;Q#hip1PBG^Di8)^inxq@@TSV|TyxtCO2CtG}?GpuDP
zTg4fKcOQm!U;@%*_l+l(kBi82nrWgU45!C&a6PzSgM5A=1#%<!qDePS9kxt3={!^I
uJ73#Pp`$1Wq@qD<rgg2z9!9hPHpx`yqH=6g=Hs4w2sCBOvp;4M5CA(t#Nkf>

literal 0
HcmV?d00001

diff --git a/tools/python/key_2_emb_formatter.py b/tools/python/key_2_emb_formatter.py
new file mode 100644
index 00000000..617e7f99
--- /dev/null
+++ b/tools/python/key_2_emb_formatter.py
@@ -0,0 +1,220 @@
+# coding: UTF-8
+
+#  Copyright (C)  2023. Huawei Technologies Co., Ltd. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import json
+import os
+import numpy as np
+import argparse
+import tensorflow as tf
+import json
+import sys
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--path', type=str, required=True, help='path of the root dir of saved file')
+parser.add_argument('--name', type=str, default="key_2_embedding", help='name of output file')
+parser.add_argument('--ddr', type=bool, default=False, help='if saved data was from ddr mode, default False')
+parser.add_argument('--step', type=int, default=0, help='the step when the data was saved, default 0')
+
+
+class Formatter:
+
+    def __init__(self, saved_file_path, out_file_name, is_ddr_mode, step):
+        self._device_dir_list = ["HashTable", "HBM"]
+        self._host_dir_list = ["HashTable", "DDR"]
+        self._device_emb_dir = "embedding"
+        self._host_emb_dir = "embedding_data"
+        self._device_hashmap_dir = "key_offset_map"
+        self._host_hashmap_dir = "embedding_hashmap"
+        self._attrib_suffix = ".attribute"
+        self._data_suffix = ".data"
+
+        self._saved_file_path = saved_file_path
+        self._out_file_name = out_file_name
+        self._sub_dirs = self._get_sub_dirs(step)
+        self._table_names = None
+        self._father_table_names = None
+        self._step = step
+
+        self._json_attrib_dtype = "data_type"
+        self._json_attrib_shape = "shape"
+        self._host_attrib_dtype = np.uint64
+        self._hashmap_dtype = np.uint64
+        self._raw_key_dtype = np.uint64
+        self._key_dtype = np.int64
+        self._raw_key_offset = np.iinfo(np.uint32).max
+        self._data_dtype = None
+
+        self._is_ddr_mode = is_ddr_mode
+
+    def process(self):
+        dev_dir = self._set_upper_dir_origin(self._sub_dirs[0], self._device_dir_list)
+
+        self._table_names = self._get_table_names(dev_dir)
+        dict_out = {}
+        for table_name in self._table_names:
+            combined_key = None
+            combined_emb = None
+            for sub_dir in self._sub_dirs:
+                dev_dir = self._set_upper_dir(sub_dir, ["HashTable", "HBM"], table_name)
+                emb_data = self._data_process(dev_dir)
+                key, offset = self._hashmap_process(dev_dir)
+                emb_data = emb_data[offset]
+                if combined_key is not None:
+                    combined_key = np.append(combined_key, key, axis=0)
+                else:
+                    combined_key = key
+                if combined_emb is not None:
+                    combined_emb = np.append(combined_emb, emb_data, axis=0)
+                else:
+                    combined_emb = emb_data
+                print(f"{table_name} has combined key {combined_key.shape} and combined emb {combined_emb.shape}")
+                transformed_data = dict(zip(combined_key[:], combined_emb[:]))
+            dict_out[table_name] = transformed_data
+        np.save("./" + self._out_file_name + ".npy", dict_out)
+
+    def fw_weight_process(self):
+        checkpoint_path = self._saved_file_path + "/model-0-" + str(self._step)
+        reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path)
+        var_to_shape_map = reader.get_variable_to_shape_map()
+        for key in var_to_shape_map:
+            if key == 'dense/fw_weight':
+                np.save('fw_weight.npy', reader.get_tensor(key))
+
+    def _data_process(self, dev_dir):
+        dev_emb_dir = os.path.join(dev_dir, self._device_emb_dir)
+        host_emb_dir = os.path.join(dev_dir, self._host_emb_dir)
+        data_file, attribute_file = self._get_file_names(dev_emb_dir)
+        dev_attribute = self._get_attribute(dev_emb_dir, attribute_file, is_json=True)
+        if not self._data_dtype:
+            self._data_dtype = dev_attribute.pop(self._json_attrib_dtype)
+
+        dev_data_shape = dev_attribute.pop(self._json_attrib_shape)
+        emb_data = self._get_data(dev_emb_dir, data_file, self._data_dtype, dev_data_shape)
+
+        if  self._is_ddr_mode:
+            data_file, attribute_file = self._get_file_names(host_emb_dir)
+            host_attribute = self._get_attribute(host_emb_dir, attribute_file, is_json=False)
+            host_data_shape = [host_attribute[0], host_attribute[1]]
+            host_data = self._get_data(host_emb_dir, data_file, self._data_dtype, host_data_shape)
+            host_data = host_data[:, :dev_data_shape[1]]
+            emb_data = np.append(emb_data, host_data, axis=0)
+
+        return emb_data
+
+    def _hashmap_process(self, dev_dir, ):
+        dev_hashmap_dir = os.path.join(dev_dir, self._device_hashmap_dir)
+        host_hashmap_dir = os.path.join(host_dir, self._host_hashmap_dir)
+        if self._is_ddr_mode:
+            data_file, attribute_file = self._get_file_names(self._host_hashmap_dir)
+        else:
+            data_file, attribute_file = self._get_file_names(dev_hashmap_dir)
+
+        attribute = self._get_attribute(dev_hashmap_dir, attribute_file, is_json=False)
+        data_shape = attribute[:2]
+        raw_hashmap = self._get_data(dev_hashmap_dir, data_file, self._hashmap_dtype, data_shape)
+        offset = raw_hashmap[:, -1]
+        raw_key = raw_hashmap[:, :2].astype(self._raw_key_dtype)
+        key = raw_key[:, 0] * self._raw_key_offset + raw_key[:, 1]
+        key = key.astype(self._key_dtype)
+
+        return key, offset
+
+    def _get_sub_dirs(self, step):
+        sub_dirs = []
+        for _, sub_dir, _ in os.walk(self._saved_file_path):
+            sub_dirs.append(sub_dir)
+
+        picked_sub_dirs = []
+        for sub_dir in sub_dirs[0]:
+            if int(sub_dir.split("-")[-1]) == step:
+                picked_sub_dirs.append(sub_dir)
+
+        if len(picked_sub_dirs) == 0:
+            raise FileExistsError("There is no sparse checkpoint for given training step.")
+        return picked_sub_dirs
+
+    def _set_upper_dir(self, sub_dir, dir_list, table_name):
+        dir_list_copy = dir_list
+        dir_list_copy.append(table_name)
+        temp_dir = os.path.join(self._saved_file_path, sub_dir)
+        for directory in dir_list_copy:
+            temp_dir = os.path.join(temp_dir, directory)
+        father_table = []
+        for _, i, _ in os.walk(temp_dir):
+            father_table.append(i)
+
+        temp_dir = os.path.join(temp_dir, father_table[0][0])
+        return temp_dir
+
+    def _set_upper_dir_origin(self, sub_dir, dir_list):
+        temp_dir = os.path.join(self._saved_file_path, sub_dir)
+        for directory in dir_list:
+            temp_dir = os.path.join(temp_dir, directory)
+
+        return temp_dir
+
+    def _get_father_table_names(self, directory):
+        if directory:
+            table_names = []
+            for _, table_name, _ in os.walk(directory):
+                table_names.append(table_name)
+            return table_names[0]
+        else:
+            raise ValueError("directory is None, cannot search for table names")
+
+    def _get_table_names(self, directory):
+        if directory:
+            table_names = []
+            for _, table_name, _ in os.walk(directory):
+                table_names.append(table_name)
+            return table_names[0]
+        else:
+            raise ValueError("directory is None, cannot search for table names")
+
+    def _get_file_names(self, directory):
+        files = []
+        data_file = None
+        attribute_file = None
+        for _, _, file in os.walk(directory):
+            files.append(file)
+        for file in files[0]:
+            if file.find(self._data_suffix) != -1:
+                data_file = file
+            elif file.find(self._attrib_suffix) != -1:
+                attribute_file = file
+        return data_file, attribute_file
+
+    def _get_attribute(self, directory, file_name, is_json):
+        file_dir = os.path.join(directory, file_name)
+        if is_json:
+            with open(file_dir, "r") as fin:
+                attributes = json.load(fin)
+                return attributes
+        else:
+            attributes = np.fromfile(file_dir, self._host_attrib_dtype)
+            return attributes
+
+    def _get_data(self, directory, file_name, dtype, shape):
+        file_dir = os.path.join(directory, file_name)
+        data = np.fromfile(file_dir, dtype=dtype)
+        data = data.reshape(shape)
+        return data
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    formatter = Formatter(saved_file_path=args.path, out_file_name=args.name, is_ddr_mode=False, step=args.step)
+    formatter.process()
diff --git a/tools/python/optimizer_process.py b/tools/python/optimizer_process.py
new file mode 100644
index 00000000..8a658e29
--- /dev/null
+++ b/tools/python/optimizer_process.py
@@ -0,0 +1,116 @@
+# coding: UTF-8
+
+#  Copyright (C)  2023. Huawei Technologies Co., Ltd. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+import os
+import numpy as np
+import json
+from enum import Enum
+
+# 每张卡处理自己的
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--path', type=str, required=True, help='path of the model file to be converted')
+parser.add_argument('--step', type=int, required=True)
+
+sparse_file_prefix = "sparse-model.ckpt-"
+optimizer_prefix = "Optimizer"
+data_suffix = ".data"
+attribute_suffix = ".attribute"
+
+
+class DataAttr(Enum):
+    SHAPE = "shape"
+    DATATYPE = "data_type"
+
+
+def get_optimizer_name(sparse_file_path):
+    optimizer_list = []
+    for folder_name in os.listdir(sparse_file_path):
+        optimizer_list.append(folder_name)
+    return optimizer_list
+
+
+def get_table_list(table_upper_path):
+    table_list = []
+    for folder_name in os.listdir(table_upper_path):
+        table_list.append(folder_name+"/table")
+    return table_list
+
+
+def get_optimizer_param_name(table_path):
+    param_list = []
+    for folder_name in os.listdir(table_path):
+        param_list.append(folder_name)
+    return param_list
+
+
+def get_optimizer_data():
+    pass
+
+
+def get_attribute_and_data_file(table_path):
+    if not os.path.exists(table_path):
+        raise FileNotFoundError(f"the input table path {table_path} does not exists.")
+
+    attribute_file_list = []
+    data_file_list = []
+    for file_name in os.listdir(table_path):
+        if file_name.endswith(attribute_suffix):
+            attribute_file_list.append(file_name)
+        if file_name.endswith(data_suffix):
+            data_file_list.append(file_name)
+    if len(attribute_file_list) != 1:
+        raise AssertionError(f"under the table path {table_path}, there must only one attribute file. "
+                             f"In fact, {len(attribute_file_list)} attribute file exists.")
+    if len(data_file_list) != 1:
+        raise AssertionError(f"under the table path {table_path}, there must only one data file. "
+                             f"In fact, {len(data_file_list)} data file exists.")
+    attribute_file = os.path.join(table_path, attribute_file_list[0])
+    data_file = os.path.join(table_path, data_file_list[0])
+    return attribute_file, data_file
+
+
+def process(path, step):
+    save_dict = {}
+    sparse_file_name = sparse_file_prefix + str(step)
+    sparse_file_path = os.path.join(path, sparse_file_name,optimizer_prefix)
+    optimizer_list = get_optimizer_name(sparse_file_path)
+    for optimizer in optimizer_list:
+        table_upper_path = os.path.join(sparse_file_path, optimizer, "HBM")
+        table_list = get_table_list(table_upper_path)
+
+        for table in table_list:
+            table_path = os.path.join(table_upper_path, table)
+            optimizer_param_list = get_optimizer_param_name(table_path)
+            optimizer_dict = {}
+            for param in optimizer_param_list:
+                data_path = os.path.join(table_path, param)
+                attribute_data_dir, target_data_dir = get_attribute_and_data_file(data_path)
+                with open(attribute_data_dir, "r") as fin:
+                    optimizer_attributes = json.load(fin)
+                with open(target_data_dir, "r") as fin:
+                    optimizer_data = np.fromfile(target_data_dir,
+                                                 dtype=optimizer_attributes.pop(DataAttr.DATATYPE.value))
+                data_shape = optimizer_attributes.pop(DataAttr.SHAPE.value)
+                optimizer_data = optimizer_data.reshape(data_shape)
+                optimizer_dict[param] = optimizer_data
+            save_dict[table] = optimizer_dict
+            np.save(path+"/optimizer_dict.npy", save_dict)
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    process(args.path, args.step)
\ No newline at end of file
diff --git a/tools/python/readme.md b/tools/python/readme.md
new file mode 100644
index 00000000..3f5e86df
--- /dev/null
+++ b/tools/python/readme.md
@@ -0,0 +1,110 @@
+# 模型数据转换工具（key-value）使用说明
+
+### 1. 美团1207模型ckpt保存路径说明
+
+#### 1.1 训练时1207模型保存参数设置：（estimator模式）
+
+![img](./images/clip_image002.jpg)
+
+![img](./images/clip_image004.jpg)
+
+#### 1.2 训练后模型保存路径目录展示如下：
+
+![img](./images/clip_image006.jpg)
+
+#### 1.3 下面来看单个文件夹下存储的内容，以check_ran0为例：
+
+![img](./images/clip_image008.jpg)
+
+我们的模型数据转换工具就是要对该**sparse****文件夹中的数据进行转换**,转换成key-value形式，保存格式是npy文件，详情参考3. 输出文件格式说明。
+
+下面介绍**如何使用该模型数据转换工具**。
+
+ 
+
+### 2. 使用工具demo说明：
+
+**该转换工具model_data_to_key_value.py一共需要4个参数，path、name、ddr、step**
+
+ 
+
+| **参数名** | **数据类型** | **必选** | **默认值** | **描述**                           |
+| ---------- | ------------ | -------- | ---------- | ---------------------------------- |
+| --path     | String       | 是       |            | 保存模型embedding数据的根路径      |
+| --name     | String       | 否       |            | 输出文件的名称，最终输出<name>.npy |
+| --ddr      | Bool         | 否       | False      | 保存数据是否开启ddr模式            |
+| --step     | Int          | 否       | 0          | 保存数据所属训练步数               |
+
+ 
+
+#### 2.1 参数确定：
+
+下面是一个选择参数的示例。
+
+##### **1)** path路径确定
+
+我们选择1207保存下来的0卡模型文件夹下的sparse部分数据进行转换，因此路径选到目录下：/home/lff/model/check_rank0/
+
+![img](./images/clip_image010.jpg)
+
+**--path = /home/lff/model/check_rank0** 
+
+（多卡的目录需要转换多次，一次只能转换一张卡下面sparse的数据）
+
+ 
+
+##### 2) name参数: 输出文件的名字，格式为.npy；
+
+例如：sparse_0,经过转换后的sparse数据就保存在当前目录下的sparse_0.npy文件中；
+
+**--name = sparse_0** 
+
+##### 3) ddr参数：美团模型未开启ddr模式，因此选择False
+
+**--ddr = False**
+
+##### 4）step参数:在上面1207模型存储的目录下面，存了第0步的模型。
+
+**--step=0**
+
+ 
+
+![img](./images/clip_image012.jpg)
+
+#### **2.2** **执行工具命令**
+
+python3 model_data_to_key_value.py --path=/home/lff/model/check_rank0 --name=sparse_0 --ddr=False --step=0
+
+#### **2.3** **执行结果展示**
+
+ 
+
+![img](./images/clip_image014.jpg)
+
+ 
+
+### 3. 输出文件格式说明
+
+**.npy** 文件
+
+【在使用mxrec的时候，传入了一个特征one_big_feat，存在表名为one ascend hash embedding 的表里面。】如下图所示：
+
+![img](./images/clip_image016.jpg)
+
+ 
+
+转换了的npy文件构成为：
+
+{**<embedding****表1>**：{key1:embedding1,key2:embedding2……}，
+
+ <embedding表2>：{key1:embedding1,key2:embedding2……}
+
+ ……
+
+}
+
+示例：转换后的npy文件裁剪了10个key
+
+![img](./images/clip_image018.gif)
+
+ 
\ No newline at end of file
diff --git a/tools/stat_info/main.py b/tools/stat_info/main.py
new file mode 100644
index 00000000..9c27754a
--- /dev/null
+++ b/tools/stat_info/main.py
@@ -0,0 +1,339 @@
+#!/usr/bin/env python3
+# coding: UTF-8
+# Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+from datetime import datetime, timezone
+import logging
+import os
+import stat
+import time
+import threading
+
+
+CURRENT_PATH = os.getcwd()
+FORMATTED_TIME = datetime.now(timezone.utc).strftime("%d_%H_%M_%S")
+TRAIN_LOG_PATH = f"{CURRENT_PATH}/train_{FORMATTED_TIME}.log"
+EVAL_LOG_PATH = f"{CURRENT_PATH}/eval_{FORMATTED_TIME}.log"
+STAT_PREFIX = "[StatInfo]"
+DISPLAY_MODE_PRINT_SCREEN = "print_screen"
+DISPLAY_MODE_SAVE_LOG = "save_log"
+TABLE_NUM_LINE_PREFIX = "current_table_num"
+CHANNEL_LINE_PREFIX = "channel_id"
+VALUE_READ_START = 6
+VALUE_READ_INTERVAL = 2
+LOOP_SLEEP_TIME = 0.003
+
+# run mode to be selected
+HBM_NORMAL = {"key_process_time_cost": "key_process_time_cost",
+                   "batch_key_num": "batch_key_num",
+                   "unique_key_num": "unique_key_num"}
+HBM_FAAE = {"key_process_time_cost": "key_process_time_cost",
+                 "batch_key_num": "batch_key_num",
+                 "unique_key_num": "faae_unique_key_num"}
+HBM_HOT = {"key_process_time_cost": "key_process_time_cost",
+                "batch_key_num": "batch_key_num",
+                "unique_key_num": "hot_unique_key_num"}
+HBM_FAST = {"key_process_time_cost": "key_process_time_cost_with_fast_unique",
+                 "batch_key_num": "batch_key_num_with_fast_unique",
+                 "unique_key_num": "unique_key_num_with_fast_unique"}
+
+DDR_NORMAL = {"key_process_time_cost": "key_process_time_cost",
+                   "batch_key_num": "batch_key_num",
+                   "unique_key_num": "unique_key_num",
+                   "swap_key_size": "swap_key_size",
+                   "swap_time_cost": "swap_time_cost"}
+DDR_FAAE = {"key_process_time_cost": "key_process_time_cost",
+                 "batch_key_num": "batch_key_num",
+                 "unique_key_num": "faae_unique_key_num",
+                 "swap_key_size": "swap_key_size",
+                 "swap_time_cost": "swap_time_cost"}
+DDR_HOT = {"key_process_time_cost": "key_process_time_cost",
+                "batch_key_num": "batch_key_num",
+                "unique_key_num": "hot_unique_key_num",
+                "swap_key_size": "swap_key_size",
+                "swap_time_cost": "swap_time_cost"}
+DDR_FAST = {"key_process_time_cost": "key_process_time_cost_with_fast_unique",
+                 "batch_key_num": "batch_key_num_with_fast_unique",
+                 "unique_key_num": "unique_key_num_with_fast_unique",
+                 "swap_key_size": "swap_key_size",
+                 "swap_time_cost": "swap_time_cost"}
+
+DDR_LIST = [DDR_NORMAL, DDR_FAAE, DDR_HOT, DDR_FAST]
+
+# ======================  Please modify here according to readme before using ======================
+TARGET_REC_LOG_PATH = "/home/example.log"
+RUN_MODE = DDR_FAST
+RANK_SIZE = 8
+DISPLAY_MODE = "save_log"  # can be "save_log" or "print_screen"
+DISPLAY_INTERVAL = 1
+# ==================================================================================================
+
+TRAIN_DICT = dict()
+EVAL_DICT = dict()
+CURRENT_TABLE_NUM = 0
+
+FULL_DICT_LEN = len(RUN_MODE)
+TRAIN_TOTAL_DATA = dict()
+EVAL_TOTAL_DATA = dict()
+
+
+def read_log_by_line_loop(log_add: str):
+    """
+    read log by line continuously
+
+    Arg:
+        log_add: file address to read mxRec log file
+    """
+    logging.info("=============  log reading started  =============")
+    with open(log_add, 'r') as log_file:
+        while True:
+            new_line = log_file.readline()  # 读取一行新增内容
+            check_line_content(new_line)
+    return
+
+
+def check_line_content(line):
+    """
+    check line file content and record relevant info
+
+    Arg:
+        line: line object from file reading
+    """
+    index = line.find("[StatInfo]")
+    if line and index != -1:
+        stat_data = line[index + len(STAT_PREFIX):].split()
+        if stat_data[0] == CHANNEL_LINE_PREFIX:
+            tar_dict = create_data(stat_data)
+            update_data(stat_data, tar_dict)
+        elif stat_data[0] == TABLE_NUM_LINE_PREFIX:
+            global CURRENT_TABLE_NUM
+            CURRENT_TABLE_NUM = int(stat_data[1])
+    else:
+        # 没有新增内容时，可以选择休眠一段时间再继续读取，避免过度消耗资源
+        time.sleep(LOOP_SLEEP_TIME)
+    return
+
+
+def start_display_data(channel: int):
+    """
+    start to display data continuously
+
+    Arg:
+        channel: channel id 0 while train, 1 while eval
+    """
+    logging.info("=============  channel: %d stat display  =============", channel)
+    if channel == 0:
+        glob_dict = TRAIN_DICT
+    elif channel == 1:
+        glob_dict = EVAL_DICT
+    else:
+        raise ValueError("channel num can only be 0 or 1")
+    display_per_step(glob_dict, channel)
+    return
+
+
+def display_per_step(glob_dict: dict, channel: int):
+    """
+    display stat info according to step num
+
+    Arg:
+        glob_dict: which dict to use to record stat info, can be TRAIN_DICT or EVAL_DICT
+        channel: channel id 0 while train, 1 while eval
+    """
+    step = 0
+    while True:
+        if step not in glob_dict:
+            time.sleep(LOOP_SLEEP_TIME)
+            continue
+        display_per_rank(glob_dict[step], channel, step)
+        del glob_dict[step]
+        step += 1
+    return
+
+
+def display_per_rank(current_step_dict: dict, channel: int, step: int):
+    """
+    display stat info in each step according to rank id
+
+    Arg:
+        channel: channel id 0 while train, 1 while eval
+        step: current step num
+    """
+    i = 0
+    while i < RANK_SIZE:
+        if i not in current_step_dict:
+            time.sleep(LOOP_SLEEP_TIME)
+            continue
+        if len(current_step_dict[i]) == FULL_DICT_LEN:
+            display_data(current_step_dict[i], channel, step, i)
+            i += 1
+        elif len(current_step_dict[i]) < FULL_DICT_LEN:
+            time.sleep(LOOP_SLEEP_TIME)
+        else:
+            raise ValueError("dict length shall not be bigger than FULL_DICT_LEN")
+    return
+
+
+def create_total_dict():
+    """
+    create dict instance according to template
+    """
+    template_dict = {
+        "total_batch_key_num": 0,
+        "total_unique_key_num": 0,
+        "total_key_process_time_cost": 0,
+        "total_swap_size": 0,
+        "total_swap_time": 0
+    }
+    return template_dict.copy()
+
+
+def construct_ddr_message(display_dict: dict, target_dict: dict, batch_key_num: int, total_batch_key_num: int):
+    """
+    construct ddr info message to display
+
+    Arg:
+        display_dict: info dict to display
+        target_dict: which total dict to update stat info
+        batch_key_num: key num of current batch in current device
+        total_batch_key_num: total key num in current device
+    """
+    swap_key_size = display_dict[RUN_MODE["swap_key_size"]]
+    target_dict["total_swap_size"] += swap_key_size
+    total_swap_size = target_dict["total_swap_size"]
+
+    swap_time_cost = display_dict[RUN_MODE["swap_time_cost"]]
+    target_dict["total_swap_time"] += swap_time_cost
+    total_swap_time = target_dict["total_swap_time"]
+
+    swap_speed = 0
+    if swap_time_cost != 0:
+        swap_speed = swap_key_size / swap_time_cost
+
+    total_swap_speed = 0
+    if total_swap_time != 0:
+        total_swap_speed = total_swap_size / total_swap_time
+
+    ddr_message = f"Current Swap Key Num:{swap_key_size}  " \
+                  f"\nCurrent Swap Speed:{round(swap_speed, 3)}" \
+                  f"\nCurrent HBM Rate:{round(((batch_key_num - swap_key_size) / batch_key_num), 3)}" \
+                  f"\nToTal Swap Key Num:{total_swap_size} " \
+                  f"\nAverage Swap Speed:{round(total_swap_speed, 3)}" \
+                  f"\nAverage HBM Rate:{round(((total_batch_key_num - total_swap_size) / total_batch_key_num), 3)}\n"
+    return ddr_message
+
+
+def display_data(display_dict: dict, channel: int, step: int, rank_id: int):
+    """
+    display stat info messages according to DISPLAY_MODE
+
+    Arg:
+        display_dict: info dict to display
+        channel: channel id 0 while train, 1 while eval
+        step: current step num
+        rank_id: id of current device rank
+    """
+    if channel == 0:
+        target_dict = TRAIN_TOTAL_DATA
+    else:
+        target_dict = EVAL_TOTAL_DATA
+    if rank_id not in target_dict:
+        target_dict[rank_id] = create_total_dict()
+    target_dict = target_dict[rank_id]
+    batch_key_num = display_dict[RUN_MODE["batch_key_num"]]
+    target_dict["total_batch_key_num"] += batch_key_num
+    total_batch_key_num = target_dict["total_batch_key_num"]
+
+    unique_key_num = display_dict[RUN_MODE["unique_key_num"]]
+    target_dict["total_unique_key_num"] += unique_key_num
+    total_unique_key_num = target_dict["total_unique_key_num"]
+
+    key_process_time_cost = display_dict[RUN_MODE["key_process_time_cost"]]
+    target_dict["total_key_process_time_cost"] += key_process_time_cost
+    total_key_process_time_cost = target_dict["total_key_process_time_cost"]
+
+    key_process_speed = 0
+    if key_process_time_cost != 0:
+        key_process_speed = batch_key_num / key_process_time_cost
+
+    total_key_process_speed = 0
+    if total_key_process_time_cost != 0:
+        total_key_process_speed = total_batch_key_num / total_key_process_time_cost
+
+    message = f"[STATINFO]Channel:{channel} Current Step:{step} RankId:{rank_id} " \
+              f"\nCurrentTableNum:{CURRENT_TABLE_NUM}" \
+              f"\nCurrent Batch Key Num:{batch_key_num} Current Unique Key Num:{unique_key_num}" \
+              f"\nCurrent Deduplication Key Rate:{round((1 - unique_key_num / batch_key_num), 3)}" \
+              f"\nCurrent Key Process Speed:{round(key_process_speed, 3)} / ms" \
+              f"\nToTal Batch Key Num:{total_batch_key_num} ToTal Unique Key Num:{total_unique_key_num}" \
+              f"\nAverage Deduplication Key Rate: {round((1 - total_unique_key_num / total_batch_key_num), 3)}" \
+              f"\nAverage Key Process Speed:{round(total_key_process_speed, 3)} / ms\n"
+
+    if RUN_MODE in DDR_LIST:
+        ddr_message = construct_ddr_message(display_dict, target_dict, batch_key_num, total_batch_key_num)
+        message = message + ddr_message
+
+    if step % DISPLAY_INTERVAL == 0:
+        if DISPLAY_MODE == DISPLAY_MODE_PRINT_SCREEN:
+            logging.info(message)
+        elif DISPLAY_MODE == DISPLAY_MODE_SAVE_LOG:
+            flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
+            modes = stat.S_IWUSR | stat.S_IRUSR
+            if channel == 0:
+                log_path = TRAIN_LOG_PATH
+            elif channel == 1:
+                log_path = EVAL_LOG_PATH
+            else:
+                raise ValueError("channel num can only be 0 or 1")
+            with os.fdopen(os.open(log_path, flags, modes), mode='a') as log_out:
+                log_out.write(message + "\n")
+        else:
+            raise ValueError(f"DISPLAY_MODE can only be 'print_screen' or 'save_log' but '{DISPLAY_MODE}' is given")
+
+
+def create_data(line_stat_data):
+    """
+    store stat data according to log file
+
+    Arg:
+        line_stat_data: line object from file reading
+    """
+    channel_id = int(line_stat_data[1])
+    step_id = int(line_stat_data[3])
+    rank_id = int(line_stat_data[5])
+    if channel_id == 0:
+        global_dict = TRAIN_DICT
+    elif channel_id == 1:
+        global_dict = EVAL_DICT
+    else:
+        raise ValueError("channel num can only be 0 or 1 and ")
+
+    if step_id not in global_dict:
+        global_dict[step_id] = dict()
+    if rank_id not in global_dict[step_id]:
+        global_dict[step_id][rank_id] = dict()
+    target_dict = global_dict[step_id][rank_id]
+    return target_dict
+
+
+def update_data(line_stat_data, target_dict: dict):
+    """
+    update stat data according to log file
+
+    Arg:
+        line_stat_data: line object from file reading
+    """
+    for i in range(VALUE_READ_START, len(line_stat_data), VALUE_READ_INTERVAL):
+        target_dict[line_stat_data[i]] = int(line_stat_data[i + 1])
+    return
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    thread1 = threading.Thread(target=read_log_by_line_loop, args=(TARGET_REC_LOG_PATH,))
+    thread2 = threading.Thread(target=start_display_data, args=(0,))
+    thread3 = threading.Thread(target=start_display_data, args=(1,))
+
+    # 启动线程
+    thread1.start()
+    thread2.start()
+    thread3.start()
\ No newline at end of file
diff --git a/tools/stat_info/readme.md b/tools/stat_info/readme.md
new file mode 100644
index 00000000..84cbb5f7
--- /dev/null
+++ b/tools/stat_info/readme.md
@@ -0,0 +1,45 @@
+#### 脚本用途
+
+该脚本用于统计mxRec在host侧运行时的各种日志信息， 并输出对应结果
+
+#### 使用注意：
+
+1. 使用需要在mxRec对应训练脚本中配置系统环境变量 STAT_ON, 0为关闭统计，1为开启统计 mxRec默认关闭统计；
+```
+#example
+export STAT_ON=1
+```
+
+2. 请把训练日志重定向到固定目录；
+```
+#example
+bash run.sh 2>&1 | tee ${test_mode}"_save".log
+```
+
+3. 请配置脚本中几个参数的具体值 
+   TARGET_REC_LOG_PATH 为mxrec产生的训练日志路径
+   RUN_MODE 统计的运行模式，现支持的模式已在脚本中列出
+   RANK_SIZE 训练脚本运行的rank size既几卡运行
+   DISPLAY_MODE 统计信息展示的方式，支持落盘以及打屏 分别是 "save_log" or "print_screen"
+   DISPLAY_INTERVAL 统计日志的间隔， 既多少步输出一次统计信息
+
+
+4. 脚本可随训练进程同步统计，训练结束后请手动停止脚本释放资源
+
+5. 请确保rec日志等于或低于为info
+```
+#example
+export MXREC_LOG_LEVEL="INFO"
+```
+
+#### 当前支持RUN_MODE说明
+
+HBM_NORMAL：不使用多级缓存以及其他功能的训练
+HBM_FAAE：不使用多级缓存，使用准入淘汰功能的训练
+HBM_HOT：不使用多级缓存，使用hot embedding功能的训练
+HBM_FAST：不使用多级缓存，使用fast unique功能的训练
+
+DDR_NORMAL：使用ddr缓存，但不使用其他功能的训练
+DDR_FAAE：使用ddr缓存，使用准入淘汰功能的训练
+DDR_HOT：使用ddr缓存，使用hot embedding功能的训练
+DDR_FAST：使用ddr缓存，使用fast unique功能的训练
\ No newline at end of file
-- 
Gitee


From 3c6fc3726dfa05d43692de4bf71528a182ef9d15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 13 Mar 2024 09:30:35 +0800
Subject: [PATCH 003/302] =?UTF-8?q?=E5=88=A0=E9=99=A4python=20dt=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E4=B8=AD=E5=AE=89=E8=A3=85setuptools=E7=9A=84?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/run_python_dt.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/run_python_dt.sh b/tests/run_python_dt.sh
index a64a0913..f29bf7b5 100644
--- a/tests/run_python_dt.sh
+++ b/tests/run_python_dt.sh
@@ -26,7 +26,6 @@ if [ $ARCH == "aarch64" ]; then
 fi
 
 # build mxRec and get output directory
-pip3 install setuptools==65.6.3
 bash "$TOP_PATH"/build/build_tf1_with_opensource.sh
 
 # create libasc directory and copy so files into it
-- 
Gitee


From 8341c095dbab43770abb2813e69083c7c4a2cef4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 13 Mar 2024 09:54:39 +0800
Subject: [PATCH 004/302] =?UTF-8?q?=E9=80=82=E9=85=8DCI=E6=9C=BA=E5=99=A8?=
 =?UTF-8?q?=EF=BC=9A=E7=BC=96=E8=AF=91=E6=97=B6=E4=BD=BF=E7=94=A88?=
 =?UTF-8?q?=E4=B8=AAcpu?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/build.sh   | 2 +-
 src/test_ut.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/build.sh b/src/build.sh
index ed55e213..c9fdc330 100644
--- a/src/build.sh
+++ b/src/build.sh
@@ -38,5 +38,5 @@ cmake -DCMAKE_BUILD_TYPE=Release \
     -DSECUREC_PATH="$2"/../opensource/securec \
     -DCMAKE_INSTALL_PREFIX="$2"/output \
     -DBUILD_CUST="$3" ..
-make -j
+make -j8
 make install
diff --git a/src/test_ut.sh b/src/test_ut.sh
index 0517f809..156db1cc 100644
--- a/src/test_ut.sh
+++ b/src/test_ut.sh
@@ -140,7 +140,7 @@ cmake -DCMAKE_BUILD_TYPE=Debug \
     -DSECUREC_PATH="${ROOT_DIR}"/../opensource/securec \
     -DBUILD_TESTS=on -DCOVERAGE=on "$(dirname "${PWD}")"
 
-make -j
+make -j8
 make install
 
 # Run Test
-- 
Gitee


From c2b53491efd6e930aa88022d50c912368b9863ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 19 Mar 2024 19:11:26 +0800
Subject: [PATCH 005/302] =?UTF-8?q?=E4=BF=AE=E6=94=B9littl=20demo=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E4=B8=ADCM=5FCHIEF=5FPORT=E7=AB=AF=E5=8F=A3=E5=8F=B7?=
 =?UTF-8?q?=EF=BC=8C=E5=B0=866000=E6=94=B9=E4=B8=BA60001=EF=BC=8C=E9=81=BF?=
 =?UTF-8?q?=E5=85=8D=E7=AB=AF=E5=8F=A3=E8=A2=AB=E5=8D=A0=E7=94=A8=E5=AF=BC?=
 =?UTF-8?q?=E8=87=B4GE=E6=8A=A5=E9=94=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/run.sh                      | 2 +-
 examples/demo/little_demo/run.sh           | 2 +-
 examples/demo/little_demo_estimator/run.sh | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/DCNv2/run.sh b/examples/DCNv2/run.sh
index 234e5e4b..f30e0ac6 100644
--- a/examples/DCNv2/run.sh
+++ b/examples/DCNv2/run.sh
@@ -85,7 +85,7 @@ if [ -n "$ip" ]; then
     # no ranktable
     echo "Current is no ranktable solution."
     export CM_CHIEF_IP=$ip  # 主节点ip
-    export CM_CHIEF_PORT=6000  # 主节点监听端口
+    export CM_CHIEF_PORT=60001  # 主节点监听端口
     export CM_CHIEF_DEVICE=0  # 主节点device id
     export CM_WORKER_IP=$ip  # 当前节点ip
     export CM_WORKER_SIZE=$num_process  # 参与集群训练的device数量
diff --git a/examples/demo/little_demo/run.sh b/examples/demo/little_demo/run.sh
index 712f6273..66c27e28 100644
--- a/examples/demo/little_demo/run.sh
+++ b/examples/demo/little_demo/run.sh
@@ -141,7 +141,7 @@ else
       echo "ip: $ip available."
       echo "The ranktable solution is removed."
       export CM_CHIEF_IP=$ip  # 主节点ip
-      export CM_CHIEF_PORT=6000  # 主节点监听端口
+      export CM_CHIEF_PORT=60001  # 主节点监听端口
       export CM_CHIEF_DEVICE=0  # 主节点device id
       export CM_WORKER_IP=$ip  # 当前节点ip
       export CM_WORKER_SIZE=$num_process  # 参与集群训练的device数量
diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index 39e77fc8..33770e59 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -143,7 +143,7 @@ else
       echo "ip: $ip available."
       echo "The ranktable solution is removed."
       export CM_CHIEF_IP=$ip  # 主节点ip
-      export CM_CHIEF_PORT=6000  # 主节点监听端口
+      export CM_CHIEF_PORT=60001  # 主节点监听端口
       export CM_CHIEF_DEVICE=0  # 主节点device id
       export CM_WORKER_IP=$ip  # 当前节点ip
       export CM_WORKER_SIZE=$num_process  # 参与集群训练的device数量
-- 
Gitee


From 6466e02d0c2b9bc8c4cc2ccc0cef83e272167f75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Thu, 21 Mar 2024 17:00:20 +0800
Subject: [PATCH 006/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=97=A0=E7=94=A8?=
 =?UTF-8?q?=E7=9A=84=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tools/feature_admit_tools/get_hist.py         |  16 --
 tools/feature_admit_tools/static_key_count.py |  61 -----
 ...71\346\257\224\346\226\271\346\263\225.md" |  21 --
 tools/perf/mt_1207.sh                         |  60 -----
 tools/python/images/clip_image002.jpg         | Bin 9453 -> 0 bytes
 tools/python/images/clip_image004.jpg         | Bin 8027 -> 0 bytes
 tools/python/images/clip_image006.jpg         | Bin 21733 -> 0 bytes
 tools/python/images/clip_image008.jpg         | Bin 26810 -> 0 bytes
 tools/python/images/clip_image010.jpg         | Bin 24851 -> 0 bytes
 tools/python/images/clip_image012.jpg         | Bin 17452 -> 0 bytes
 tools/python/images/clip_image014.jpg         | Bin 18658 -> 0 bytes
 tools/python/images/clip_image016.jpg         | Bin 6056 -> 0 bytes
 tools/python/images/clip_image018.gif         | Bin 70465 -> 0 bytes
 tools/python/key_2_emb_formatter.py           | 220 ------------------
 tools/python/optimizer_process.py             | 116 ---------
 tools/python/readme.md                        | 110 ---------
 16 files changed, 604 deletions(-)
 delete mode 100644 tools/feature_admit_tools/get_hist.py
 delete mode 100644 tools/feature_admit_tools/static_key_count.py
 delete mode 100644 "tools/feature_admit_tools/\347\211\271\345\276\201\345\207\206\345\205\245\347\262\276\345\272\246\345\257\271\346\257\224\346\226\271\346\263\225.md"
 delete mode 100644 tools/perf/mt_1207.sh
 delete mode 100644 tools/python/images/clip_image002.jpg
 delete mode 100644 tools/python/images/clip_image004.jpg
 delete mode 100644 tools/python/images/clip_image006.jpg
 delete mode 100644 tools/python/images/clip_image008.jpg
 delete mode 100644 tools/python/images/clip_image010.jpg
 delete mode 100644 tools/python/images/clip_image012.jpg
 delete mode 100644 tools/python/images/clip_image014.jpg
 delete mode 100644 tools/python/images/clip_image016.jpg
 delete mode 100644 tools/python/images/clip_image018.gif
 delete mode 100644 tools/python/key_2_emb_formatter.py
 delete mode 100644 tools/python/optimizer_process.py
 delete mode 100644 tools/python/readme.md

diff --git a/tools/feature_admit_tools/get_hist.py b/tools/feature_admit_tools/get_hist.py
deleted file mode 100644
index 1afe061f..00000000
--- a/tools/feature_admit_tools/get_hist.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import json
-
-import numpy as np
-
-file_name = "slice_0.data"
-data = np.fromfile(file_name, dtype=np.int64)
-data = data[1:].reshape(-1, 3)
-result = {}
-
-with open("admit_hist.json", "w") as f:
-    for d in data:
-        key, count, _ = d
-        result[str(key)] = int(count)
-
-    sorted_result = dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
-    json.dump(sorted_result, f, indent=4)
diff --git a/tools/feature_admit_tools/static_key_count.py b/tools/feature_admit_tools/static_key_count.py
deleted file mode 100644
index 53e5237f..00000000
--- a/tools/feature_admit_tools/static_key_count.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# coding: utf-8
-import argparse
-import json
-
-import tensorflow as tf
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--file_path", type=str, required=True, help='path of the dataset')
-
-
-def static_key_count(file_path):
-    admit_threshold = 30 
-    dataset = tf.data.TFRecordDataset(file_path)
-    dataset = dataset.batch(int(1), drop_remainder=False)
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-    offset_value = 2**48
-    shift = 1
-    result = {} 
-    table_list = ["history_poi_seq_id_list#vector", "poi_id_end2end#vector", "rt_day_click_event_poi_id_list"]
-    with tf.Session() as sess:
-        while True:
-            try:
-                examples = sess.run(next_element)
-                example = tf.train.Example.FromString(examples[0])
-                features = example.features
-                feature = features.feature
-
-                for name, values in feature.items():
-                    num_list = []
-                    if name in table_list:
-                        num_list = values.int64_list.value
-                    if name not in table_list:
-                        continue
-
-                    if len(num_list) == 0:
-                        print("===================")
-                        num_list = [0]
-
-                    for num in num_list:
-                        num = num % offset_value + shift * offset_value
-                        result[num] = result.get(num, 0) + 1
-
-            except tf.errors.OutOfRangeError:
-                print("EOS: OutOfRangeError")
-                break
-    temp = {}
-    for key, value in result.items():
-        if value >= admit_threshold:
-            temp[key] = value
-    sorted_result = dict(sorted(temp.items(), key=lambda x: x[1], reverse=True))
-    with open("key_count30.json", "w") as f:
-        json.dump(sorted_result, f, indent=4)
-
-    print(sorted_result)
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    static_key_count(args.file_path)
-
diff --git "a/tools/feature_admit_tools/\347\211\271\345\276\201\345\207\206\345\205\245\347\262\276\345\272\246\345\257\271\346\257\224\346\226\271\346\263\225.md" "b/tools/feature_admit_tools/\347\211\271\345\276\201\345\207\206\345\205\245\347\262\276\345\272\246\345\257\271\346\257\224\346\226\271\346\263\225.md"
deleted file mode 100644
index 2cee54c6..00000000
--- "a/tools/feature_admit_tools/\347\211\271\345\276\201\345\207\206\345\205\245\347\262\276\345\272\246\345\257\271\346\257\224\346\226\271\346\263\225.md"
+++ /dev/null
@@ -1,21 +0,0 @@
-## **特征准入准确性对比使用说明**
-
-
------------------
-### **工具简介**： 
-
-mxRec开启特征准入后，进行准确性比较工具。当前支持模型保存格式SAVE_EASY=False。
-
-### **环境依赖**
-
-该工具在tf1环境上进行测试，环境配置如下，供用户参考：
-
-> **tf1**
-<br>
-tensorflow == 1.15.0 / 1.15.4<br>
-numpy == 1.21.6 <br>
-python == 3.7.5 <br>
-
-### **使用说明**：
-1）指定数据集，使用static_key_count.py查看数据集中指定阈值不同key出现的次数
-2）开启准入后，保存的HisRecord使用get_hist.py工具，查看运行后不同key出现的次数
diff --git a/tools/perf/mt_1207.sh b/tools/perf/mt_1207.sh
deleted file mode 100644
index fc0af5db..00000000
--- a/tools/perf/mt_1207.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# Copyright (c) Huawei Technologies Co., Ltd. 2021-2023. All rights reserved.
-# Description: performace analysis tool
-# Author: MindX SDK
-# Create: 2023
-# History: NA
-
-#set -x
-
-LOG_INFO() { echo -e "\033[1;4;32m$1\033[0m" ; }
-LOG_NOTICE() { echo -e "\033[1;4;45m$1\033[0m" ; }
-LOG_WARN() { echo -e "\033[1;31m[WARN]$1\033[0m" ; }
-LOG_ERROR() { echo -e "\033[1;31m[Error]$1\033[0m" ; }
-
-logfile=$1
-
-# ---------------config start---------------------
-batchsize=9600
-parallel=8
-nv_throughput=820000
-# ---------------config end---------------------
-
-validate_options()
-{
-  if [ $# -ne 1 ]; then
-    LOG_ERROR "NO log_file"
-    echo "[Usage]: bash $0 your_file.log"
-    exit 1
-  fi
-}
-
-print_throughput()
-{
-  LOG_INFO "=========Throughput====================="
-  nv_sps=$(awk 'BEGIN{printf "%.2f\n",('${nv_throughput}'/'$batchsize'/'$parallel')}')
-  LOG_NOTICE "batchsize:${batchsize}, parallel:${parallel}"
-  LOG_NOTICE "nv_throughput:${nv_throughput}, nv_sps:${nv_sps}"
-
-  grep 'tensorflow:global_step/sec' $logfile | \
-    awk -F" " '{sum+=$NF} END \
-    {printf "Throughput: avg=%0.3f, xA100:%0.3f\n", \
-    sum/NR, sum/NR/'${nv_sps}'}'
-
-  grep 'tensorflow:global_step/sec' $logfile | \
-    awk -F" " 'BEGIN {sum=0; count=0;} {if ($NF > 3) {sum+=$NF; count++;}} END \
-    {printf "Throughput: after filter(<3), avg=%0.3f, xA100:%0.3f\n", \
-    sum/count, sum/count/'${nv_sps}'}'
-
-  grep 'tensorflow:global_step/sec' $logfile | \
-    awk -F" " 'BEGIN {max=0} {if($2>max) max=$2} END \
-    {printf "Throughput: max=%0.3f, xA100:%0.3f\n", max, max/'${nv_sps}'}'
-}
-
-main()
-{
-  validate_options $@
-  print_throughput
-}
-
-main $@
diff --git a/tools/python/images/clip_image002.jpg b/tools/python/images/clip_image002.jpg
deleted file mode 100644
index fd387c49710c1d99303cce3767ff720cad502944..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 9453
zcmc(EXH-<%vhHf41QDbW5Co)=43cw_)F2{~vq+{TNs>$x1f*?1vZO{b2#CZc2nZrk
za+8`Q8AOmEIrPPS_Sxr+JMI{7pLg#Ycdb!>=A5f$eO2F@t5y|m0{0D|ysxUR3V=W$
zz~}M?a8rO1KuSzZLQF(Ta)pG9jFkKuJ>@kDifc?aXs^?=v9RBPus~Qj`6LB6xy5)`
zAwtSRV$!k-3JUB3sv0VC_a)^OWPd9Gk&%&IyL#;wCFLzyE(n+GzkT680aRCjD7<QX
z5G#O31;VES;o1Ra000qQTKn7JKQ9m-J^>*S@f8wMvdaZElmH$GA0LkZpOBD%;Bs}~
z<vc(@MR@(T;5{O0ZA)TSH?UAhT=o@+Vnr*B&d@fSu$6l#3F!@5I{KS;*f}`4xJ5+8
z#3dx9l$2Fep=#>)b@lWO42_IUtZi)V>>XfFJv_ahd;9qMg}n%mh<q7^h)+mNN=|u`
znwFEBmtRm=RQ#^8s=B5YU02`G_PPB_M`u@e&+rIlbZmTL5<9=JxU{^oy0-p(XLs+{
z{=wnV@yTyoAOQa#u>L~!pKwuK;=&^!z$YO7jSGb5eJS`<1cbK*iLT$%Cbo2=W)%v#
z0#=O6u4pBJ2<vRqSh)|8-e41%zq9ij+8@aN9<b2=CuDyC`xh=OK#mW(3?4oeAP?*z
zcr=vb<!cyRe;nQA=QoB#4iJ>upg92Kj_HTa+?km<CD@Hr#@)v+opbA;PYhk;%$S5e
zOUl)-xPx^X5}zB;*|Jfs!8m~&1Se?mLU!tW=PX(XCBHb+GRNFC!U-C}0i_m21*n2d
z=94cc2-tTAkbZ5(sg4eHNZMKrGf8*h+NRHN*JFvrxX&uJLd=X1`xOX8#&3{VR>kFM
zGjrsFs!(|K0I!R#XE7j1srs<vYBy6P7<H#&_=jDH$hr&JM=G^$wkPC5tIyqi<q|<r
zD%|{sqz{Gs7UlEh99}3}_S%n<;s7a9Z5NrDD{Ez%c(d4*Ew7IXLoew<;V-uFgXl{(
z3ZUfURIfC=(|9D(o%+IOjfgnd^ozvaTXOf@O<d(HOP?|oB>a`UPA~Wo06V=Cw)fGU
zwkvB&D!p2i6!9gXBtKdXW9nd@_suFs^{!JR+ytK5H)N75rSp_>Kr$||G(4aVGON(x
zfn0SR5ivD^{mdS4lSwFD=YDS>pwK1b^)t3Dg@+4+5LzqQX$1|wx3@_1i~~()c`}}l
zppI5q%f!MdD}~CY=#}N{J~K0WiV0^aK_NA*qlo8rqBO<q2;KYQAuGo;^q<@G)vRL^
zSL5Xp8={jCP%GZ0H+?c{&(;8^3VIF>Vr-0Ri2kdIo}3!Hp&Vl>%LbUBJD)APY@^X3
z<E|9OP%7}FN9V1k$;8yNNb+>!hi-M`yPQkb`m6`SYh_wgQv6l2d4f=yru1qmZhi@3
zdX8d_V)n}K)b*AsVBOy&_@5R$y~es2;!x=*1KQ<W!`r6crEVe+T!6L;_!DOi0{G8T
zVeQYK>4_FQReo0r{?IRUrR#8|V8rWHG@n|iw6}Cvt_J1v=-^Mbve8iM{Mqkz_pDv^
zd=-Q@SqO<k;?;7@`41S1H}h)a?9qz5+23Eab{{MSd4lxf+mj{PmJ((WoNw_ig*f);
z(Dkr$SER=W2gE+d0ReExgSW)ci;AF}G`}=Y_y+BFuEOHCc-8#-CFxUw1VdBtG<siI
znH&k%$n@zh4!^;Z#DYcmy&7Mo6IJgzTSiTq=#t+f&sPPx4%Bj`G~|sRY^%sTatT)?
zNfLx`(t%=pKOMZ&djyc^kj7tGQ;}Tq6>OTU`0k;Z6EcHs|6tbba))a1u;R@Wq~h1v
zyS}jsFP{;SFC=MfQC()XhID5>??g)T-Ri6%kaexpA5d181772~UfUG{qdaxN`a7CE
zYLdiEIr*B8pK)C8B>aTZ5^e9!N=K<)*?$*B-BsF^AG+&?O;t?5ep@fYPS-mY_u4Hl
zL`*$DU+ZIfV1J%~O?8FyebiN{<g$JfVVmT!NB%7~Nb??}E02{ZuiuWQ3?oxcj2+`E
ze_G2?CNkRU8)Ib*i*7#MR#2skgFa%z20^;+SeVw`tphCWF=@Vfn&g{$NK-V)etP;V
z-`jDP=L_7@FFp&HS#xX`*<kL(W9f`pze-Sb(XIlFAz2TfzK4B9hL9DTZ`6BC`we}a
z;8gMC?<<||->ERt%WL-TW)g2XOvj8ula?|II4JsBAQk<v#8j}@1V(3)w#NLPmqxay
zbGsphu%tmx7Y>-kul0{L$ybll@<Fjh=joKNZYjcF+S8wNfyEH0v5u8P_@pK5IeG1{
z1P$R0nz2?D4OeH;>1O?p&X7e3CR7dM%OSO)F0qCTDpW*pL(_|#(#iF`Ig&KW90}@x
zN@+qprlmt!;HOGKx^E!35<B%Vu3Z>H4zxs_JQAt_r!rOB*we2B-#~b@?_Bwc&{Jz1
z%vyMy$~1k`P6gltrxG^-VZ)cxQaU!)l5}qga}pr*1K{h#x(Z|}G_dO4FC*c{Xn5rf
zvXv!kj<*}Aj$rN~ola+w+C42sry(WuSGOVI5qXnHh{S*hP_2vNh~ZrFynUy?LzaP&
zF_7E8k>p1*Z6a5U=N$NDYf@wo)NL>Z12XxO{=Z*=|BhU}N<q_rSwN>*@Q+T&c9??V
z<gBwEX9PNC53NQTqFdq}tm4Ycq1Zz+UsVRTE(l6Nw*E91vFJCbAgdq(94qn;^J+E-
zwDT}6%G`7YKLV+MAK`bN`J+!4k8j$_z&jh^&GK;tkMMJGfKiLKrmUuv{Qj$b@CarQ
zKlgMl<U$YY3HRTY?@3o61Zyi?$1ZM?HPWq?KTLvstc_<?_U*L}L4LEAkA9Ss-LbV2
zSKL!(wT9oX*(NXDE8`YD)6}5bF(NUC{z{3oCeB(m{-v|TrH@vem^xS?#amYEfSLvA
zTXeq`G)Jm!grwPWH*%1+2U2jyS5T;L8KDz;BfM-L1WQ;BXn&C-GJjo6H9r|<Ly`0#
zBLk(?1kT{sDH?5gpZ{j|wu=%S`pNz)j=i!MyLm$^l<7*>jqF`$`;5J>wjZAP8Oj>m
zIGK08Icn3+H>wzdI?J%DL6&*k952dUa{jJ3B;wSD(CvNI^8~_R5k0-?ChLCHaP&QY
zxo0o!`Ba;gzVplAC?TIuOMyJs@FhD%awEtkEC*w%r+<2I4lSeOatp8;6eSKWWizZo
z!4E247jB?<jIqNoCzE;SU~2v!+(jz_zMak!@7*<`3d}3}AmzD|tT|supBs?qxO+RT
z(->|Z$UB*8`&p&92WN|WEN^x8UZ~t}ym|5Uv9G+t4Zh%zE7!Lq7PnR>#`!{!)(8PX
zB)RSZO3~95vxff(#r>kls9Cc-{w*!1YS3e%hdj5dH|mxI8GPvnueK7M*{`RS#Ry{a
z5U?i%N|26g=15gIck;+|4e_lkC0(^TF}J7My{;5EpeiA1WThY4JK*{>>p>mT6zhV(
z_#T$VN2ttSVe}X@psrH;lKD#TyV25!o94s0@i+PE(#JR;uHKm{{qxPz8$IR=jWGr)
zOUF+HX0||(z2di+vAYD-^0ptI(}o23gjmT`?RjlXiH3kH6;9{l7M-Z-FEexP2cL>W
zp|;!;cf*o$qim3q<gZy7T*TeIsaw2j1rdRpnWc}ZGlZwQtkDr_w;)mX0zW?=rZ|v!
z?O|p><Pnl(B8FVW7ZHFOmp-NSZjg(0yN&k06Me(IqJy;VdDka;#^>ug7T771X|Wy0
z+6_Z>mWi&(und&cT}YT&&>so}h<q4_0+q5y;V|RbqZ~44MQoq5WlO;QmqjukUsEoI
z)&onJ$DCpPs2)x}^KvoC1RJ+4x2zPd8w#B=Cx+q=BcSMK+tR9wxwo-!L3KV}>F-Vs
zVtd9LabzV#b~b@={RB1{UgnE1$Ojn%EVlA{1A4N#S9b72^x`$J#&W&aw~$@eLz<2O
znQ=CTD4WEPU>mE6vvte5<XNtqs2-@4Q=Gzl3d`ai-kX>)tTc7m_Z-o%%}`FUmpbgA
zZJC*n{{AMnpNKM-ikh)fPY2%Q)lFEf<X5K>3W^{0%N;-LZ<WM+g3!_ZdK>H)qClJf
zp`pG$v&@;vsJ&F!kwNCkYEFjduaXm$QnVjanJsCU=dj`e`;)wIzQ*@bHU<yGQ^eM5
z$x?I7R!MGaXSHR!X2jzFK@K*Qhr%tftim~SAGr&8_8X6Os_6wI=VQ*8n{-ufYJD3f
zz2{OfRUt%09hL6=;4-;2++6?SuA-NbM1P_klJU)cELbX9@#5sPwU+Sa%(s_nDRgR|
zIlhV=bS>6x=($4t;a4V9q!QybitVC}o*et@xpZbjcD`7Bqs|~7Sx#1;@*I|l4z9Oj
z`{oG-@r@!6G(dHlSq?8X>843I{Vt<E`?lDHzo7B8y4&rzHf}tBZY1c46cJ3yne*Qk
z5s?s^-H(RP6U;Tal+iJg0Tp$%6k5Zz&Oba!M31ZYtLu|)%!edw-Z7BScJIq@cW2oC
z@&uN$`k*Cd6JpHv>JC05&`!y)-S1G@;?Mbhlu{%`knX{Y-Qd@z%w|@>JZg`+Rs@u7
zV+Q>~rbH68|2TZFG4E{6{fu3LCGOw=rp=3^3)NHwir90ja}|+&ku@AJb{C_orF%K_
zGl=+D7dCYM^Air>b_T%OS_okt$}4pAY`^lb_kR>A>c%6ztyJif^;KAINOTvbyOUZD
zmaJZ#ur5<~i2|=}iYRk+w~DgXL6NziXoNc!=gK8YXEZ&f7GESaZ3J$dbp`dzJ~7`2
z%@6ZHlDbaf03X_~n)%*Ds|5Oy2_6x9mh+_UOZ$gkga8&^<)K@j83l>=zVLfneuOm7
zAuhR-)7LANau3ajI;PLNfizJtS9oAsFYBkbV%8|0N@U91?oaNg!xNs^W%G)I?sFVc
zXVv<oFlC|TYw7Es13#_I7y|~7qa~VLTMN<c+u2E2iP5Ou*TS7lzdr3kBsE^rx^?5@
zfKfy1W(1jrXKXiY!LYrw-Pum&we${xTkpA9`ip0o<Cp469P_$8uFGeV+bMjZ>h2}{
z?!$zM%5TKx%?5A}P~VReltzSlY+_4Q0xC@M{GGAN;q%4;9bE<kPS!GMCGIL+FX%0`
z!HNR-Ofx6`nBu<X-V+=UeD_^-?U`XpR?%L*8lQ<>8cQ$bn=k#}3JaY);<OruY-`Z*
z2B;gm9mceh-$9$-RW#Y(Rw{2)PN=hXykAf~4z*^qMt;+E5hbdNQMPs=h~!w2pXG}|
z9i4IO-$z@F?_<i}!O)l1m-!de1mhX!3fT}L@#Er9iv)q(nXtz*uS#stO61zPv_Q{5
z81f1ApuescTSfAol$%;v8+Y&z4MF@B>+#|Nnli~|3%Cc@JO|v1$FF=(c&#`f90#Cw
zg6z*YcWE#DzZ_?6m#jv~>y=FY4tmjlF`bLLy??(cWwzxj0+G^Xar~2Z1>P2W8@~F{
zA{7UaN-i+}yAP+csmd0*e{czh6&L(?@L?1}%U=E1!4|Bo_XMo1a!Ka@f1-Z1`jFRP
zmc-7on=0E%p4*~w+ST(^W-6E7Yq$TAKho`JEHct$%Us2&>^)m3Pk}|X5F;~Rkd?bE
zAc|S@dx6b3q@8k^HhZejd0>4gP-h`MCSx6It-yE{eB5dpr9{Ifji<YX^7q%}B>Iix
zul!5*uape<XZ|(vS4MU@)O5tWtXT*>Y{5S(t_JjKalr2QMSYEm&J+&lwK!QBr9OI?
z!LgIFKZrmiPb&3iM?htm^Nk%?g`h7#b-O0ouY9BA^iZGZ5t&XJQbkf>^jk?&!Eb^+
zs9bqaP4AShf6Vm*3;;z+wSkc`eovj8Nd+DOHZB$~7SFtrmw1n*pmkhIyStK8>v~<>
z&pw5%Jx!>;9?4Ddk)ry^Uh86cjalWrmowJ09gj#Z*7Bl_bLz?H<u?I$-*xo?og|*T
zo<#lF8K^6jPZ;gh<e%dRu`dVa6R(11n=J>H^)g#^T6CId(7rII+A1fsvW=ors7J@#
z1t-Qk`B9F<E&*qJM_dod`2wmC)%?*=Df<p)SzlL3z%7H(vtJ@#8OHFGN0_Nc$kE;3
zW_OH)?_z+YnWRH?x$`kR(*TnbGn(zA_nG~G*sAhsF5O1ILlW&RQ$n2+dK(%x0hXAz
zF)B3PhEUCi^D{;~UH2;(#Rm@y)+^XXG~Tp-jT6p;&st8!)MP<cmhYy|tZ@HuZhc;>
zbuUJf<DvA}%J{`}6|-IUZVuryxc8P|*E=`m4`sg+rXqcMHjVGm_3Af>`5&H<=-)jG
zVh^Nw(l+ibbRIFQh$zrRh=a8;S(f@6HynO5frhu`JIjgqUeP;aIOg6j*Hvx4@%^Dp
z5&1~NGaejpKZ=GOT2vN$<ulCyQskw5G#f%A&k#y)!1Thq+^K0uq{54Ua?Q`3=KNQ?
zX@?u#=1Bd8YH=goNWsdFSk$hp3O!R!b1ZsL#6JXF@)j04t2|TpDZPHltu`Ay_)E+&
z?mRtjnlpi9d8*^402EZWk+Ha)vv{C0-gP_$!Dz>Q=UIDo_1gVpttyST>%;0KQ!;H>
z(MN7^Au&D>d6Sq!^<loeX0n|1Jkz#H&eNM238+FQOh&vR*Q4|lZaM$1eXxxm;)H!l
zX=r7XEu`96y4Te6HNG9f-8izaM(~$2%{~sORLWbQHUB7Q)5S-b%=_%)BJxRAyKciZ
zOP-oZv*Xo2&4?CO8rovnV;gd!w@p0|tU(e!c%54)%k8Z7IZd?b1)2(GVq-@mbZhLT
z5+KDSpG0d=m&sn(jXCc8hk|t;K756y%7|qVO<Cn423YL_<bjMAjpCxlkmcCL*M?hh
z+93A=tbkzM)!DiYB^ZH2kB*yZ?&{ZAZB=MdxnJe5=Ola0Tmj6fiifc8&FAZr7W2$)
z++$^V4hKT4Uvgn{LTc>{Z1P?n<KV>6_okmUwGzsYnPr#!5JIvl{_DflxnS6NY+<CF
zF0G@8fpk8pPlsTb;XWT3v=Z%0tvo!Dl`L{9wB;=eEMy7Qv~TZeLhJWX(4t%j#&FOv
zd)?rmc1N=S=KSo`n&N;tuyGsgf6lFOP7RHHR9js;EYb?$`8r>wOQ;t(y@-sLJP<J_
zo49|le|8OZx&I{%ns#?PHEJ-|MYk`KXKmzp?IeQd;xaNk@g)yv<gyDKYOA=}Q*v3W
ziBw>GdRs1^JoEbzCuN#416hAQzrEgIr4f<?b7gD2J_~UQmWDHUt_cr(I4r@|hU89}
zy2PrD<UG3P8`KpYf<g%PGKFnurIO6A<mFs`8!g6Bk^S{dImngf7fnOIeC(8CHHH-<
zt_nBBZ$`-u6tAC*X9Sp5R8)+M>=c@swt|!MyF&0v0y<GrgPC1P;^6e}&pfqNga!W1
z`WZ++^`Sl@!<uCw=}tULo8&EgasdMfWbW3}l~lqPE81|;9s%s_m>Cd^F2fo{#AD-Q
z@O8G2va!*pZ*?NuVV>N3lOSa^n52^_B3Kf^wWn%{QhO6>ZIu@jNeu4!qqg>c)VB)%
zDtZC`EN~V6v*L9*^2h)5!+&IH4nVOp4p=Tf_YkGwmR5B~>YX4`c}f@_6%}K2lH1&+
z>{3>6z|(x=ty`g`F-94nG+%Mc(SSA$&)Xr)nVQSXhdAI9pJLc#FAe}%oW)u2Wh_^1
z%pz+&k_XC+ae&5L)mO=>M(GH_{X%85)-zTifh{$boeik)?kNuV1}@ePw7pDO$}iN4
z_v~d>$507<OwSw1_e;oDWe1Y)#6d0IAaOuAlwrCzKAw?6$cWUe@0Z1Rmn3IlUjE_T
z3$+(j$$ZJ(-=Ti^65oVFerOtL`z(i6Wfl^z+<Ll+iYuAJmVId+Q?*kjvL*kK%f_7$
z|IA<Ce<o#rmpYuL#QoBGLma?n<MC7VTH(5h;N(-CV`-s)qsN}dot*X4fz9g+`GFh;
zv0A5&G`j)Was=-zdO1L~s}g-ohqmVdQOlokfJpN)4(JB=t@vSE2dZ$uj$XvxGf>$R
z7Eb~>;tN^rz;~|4-{&$NX4d^`l0Kl^!tdxO*%)~83S}p{=beu3VRkQ?l}}~F_cXRH
zJ*Zu-0o)wBPHe@91AbtleLpU5KCzNj$~sRgAIRCbG2F@+Oq-^S7oKG56nVrzye!ud
zAINNoPP_7k*mp8sp5NB8K4>?d0>l&%<A=KRlq`zj_zzE&FctDXEPbs=uS%rE7-Sda
zrEc%#%MzaIL+pqr4>4(&jS@>LR<cUyXS3rTo|b_zP$838w&iEvjZ4{n8{hKmm1KJ!
z$yVK8Qlrq#dk;(83_CTcMeiRISGFCdOt&1fzHGL=6aG;FB_zIz0~GTdhuf2D!=pA{
zugV|otHWL41rI;W$c@_=(>g{FYAMC(H;;&)?Ys7;R2MIw2ZT+|x1?EI#$zce2V;lU
z$VaO!&<gqrIq4p{lXPg&f9lg~{Ux8Iagf@wS`y?LSn!8WH9P`o_jh?xuAL2^rd-BR
zC2Y;1^_S>ftlecES#~`1=jw8~-jicue&&BT;uY~ArV<Afg{{FenPKS{e(2Q)zD$Z_
z6;`qi)#-*jDCBKXqN%6YyoSP=12GxetRqcNC$Ccz!}?A}ZS{9-vC>_`_U>D~%)D@K
z^xN{xvMu(i`j`c*D#9V%U9FH%tEdcqq#2-&1Mc;1!PoP}-rL?UuWCvC>36Rd!K44B
z6H5+yxWi5auz7qcy1CcmSYPns3(XM2&G~0<30rxk11W?b)8YUUbQF9|G7tyAIdQ;?
zsrHs%jLkTpCDBxac5_@f!LTJObHIIYC3I}FbaVd-|F_rlH0Ur+_yX}QX!hoCw7Y=w
z&%)TAJM&nz2HP$G2V9}sa{LUR7v14FC%)VAbD4QN?I__Q%)8}q4hNVdgfI3Oj=_C5
z`Hp@b8XcDKeXwAwWbhPLWhLI!gb32J0MPn^^JMwW;|2T#I|eqJE&l28FdWe4xS(}H
z@fZip^=KVw%0L^v4`#N0SW6#wF6&zbHZ}&1#(ECtSk!bn#J6|D==LDVKxM*2tSemC
zaBHZB%U}||I7%ndcYenBf;rZrr#avf>qxsl8ty#1)gj!H5v13$o$ixz5tcG{xcj2|
z!^6hJ<KglBE&stZO^A-5I%B1=x!l?eB|3if-G^IIOUz~%@nkBEvg!nGk!TkqVX>KD
z0es>ed9(4$UyC;91&b333;0}*>Q2hb^q-D~k!b#Fzw#EnEIX-sj^oG1g5cu|{^r%u
z*Wy+iI!W&nraw5<Pcn4na1Bx$c+6w9OPFlPYwbStW_$R@MB#f0nJkmzUq>VDNCE83
zI@9G2Cm8s@HUT@8Qn!1ktMf3;xC)!E*Iu9PCD7(6cCT{gi#Wc=5U8-ly;VlWut9QD
zsm_cLN%1PlDPB5l`_hjeHGY%l=jrjio$@FO;rAAdp6AP?QcuOB4(sDD)9<LQAzt*&
z(c#ZUT=5cA8r)nH?$g#4xhB#2wTl7eFu7+!X5o)V=kYsG5Q4a}*-*dN=kWU$ze$;a
z16(X@y!SYlhue%^Y2Kf@J2H4+8s%cGas1iW9SXiD94@$or$8s)UoEPdtofM1XKX%B
zQE(SCA7tI{N5Rvkg*;RB$b7oJwXqlTht9uj)kyh@Mi^?Pxb9jkAEAbd75eYa8c+0r
zqAlI!e5zs_ph^RaCO2nlr%Z)*8(+SELRPo?a9f;udf_9(1+^B{9uA;xob@}qv{ZLd
z>$>>Wve~$dT^w-!RFZJN&}SWee0O8Tzq(QZy-wi$Be_4Op9%v^(yh{OIrcW+edSgP
zV;y7(`(gj&UP$z6jfcg*Xj8R+%LE4)rryDnvp2-iKCLc`CpOc+W6ho9yO6SfOmo?q
zvb%j*nf_g!{<}ih$9^CytsAjrgv_~auE)IC*`xJALO$GRsAlq4l*@{`{hnQP=w<-_
z-q{1`Z`84VJ!}+(U>l0XG{EG~0ua+bd#_x7wPS;C{n>kk{nd@V9GX45wp%br!iX?X
zMFxNGQ6nIt>-ymU`rT0r5c1XKS(Mhg@7D{~;fiK(x#HZJkQRZ^WY<YBr9RIl?s+$*
zY)hmibC~Ud*Gsl35iJ(C@km=E=^9oo8LPICC==2@8{(#;FcjY%vb;r3R;P329p=&E
zGbEP7F_=B^<-)UmHU9eIB}`SmF}9sa(zzhsLeP}i-$iiRnno4zJ{erFT;J$iKJ~e<
zt=v`k@zj)tGj$}BvEcx()RK!R{kT<Y`*n{3GN#<0_CEu8ZX)E%H-2)QTr@BI8cPd!
zUZCcsCH4$N6_}dBG4kBaV4v@C$L@Uj+v?1_OG$U)Pzu&MO^PAi-M3B*M8Tk;?gb6e
zh8RavjqqzleCMCyFw2qK1=8-mqUj#RopNclZS3Uec(W}w>#H$x9}T>~aEtM2=JEZ*
zH1YAVw3L_pcU>Tve1)&;)p}BxGJVR#$9T(S)MfihuU{!&^l^bSrdk7(#1s;@q{qco
z04E=mBg~8L`cPbM=oqI~@+pQ=64ZF^$n-zoJN>`)V)6akwqC;>%z8N{hdB+;IhF4?
zs6QhD?+-w*4Og`JYPdo>!OSpB`-4yrOA;v;$;dJ}U5%TV_o0brhbd1K%j@X;*ZWKn
z`H{w$vZc)V<~nGdWK!7fD(3WbkkB#FXlqq@-au9V1kX)-NQI63l37pwW^sU~iZ|Np
z+tE_ytEQUO1TyhQ;X;OT!6BVaW1=Pv7`ujAbnTNU!n)g#=;ugOX`s=`(4bV3!++5a
z{>|q9ZD}l6&wX^N)cq`Z(Dbb@n=N0k8a}C{Oj@0cC})axjA^e;{1@X|o8)wd>E|(@
z3EYE{a<}I$R53W<{oQl!8uS^`<nY|z{1FaNfXe=PnEcIl*WW#N{x{YU<0k(DFZ6>*

diff --git a/tools/python/images/clip_image004.jpg b/tools/python/images/clip_image004.jpg
deleted file mode 100644
index bcb5cff76d9c555e3cb414a2d30b1716a902d31a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8027
zcmch6XIN8f)9#{%-UJ1tD4{oL(jkCS1O%x9qVx{Z1O$Z$C<4-})F{1o2q;oSI!f<d
z+NSrOasuwXU-$Q(^M2=?A765<l`F|}&6@k3xo75C3q6LO11MCL?kNFSSXjVw%o{*Y
z0(Sr+5Qq?jPee#SNK8ybLQY3PPDVz~boCk)9qSEtHt-EF3n!n104KL74+~i6u8^pd
zjGUYtyMU6KqO7U}L{8?y2o^ChF*zwYBLxMc3>TP7=KuMHZUQI?09EWN94r<9n-U9$
z5)1teU<LpzJdC#&4*&VW!p6bH!v_%%5)oq-R8s)hSU5P?xHx!txVV_rzL@_3TuMBu
zn}YKASG3GPEUwf-{?Q)^zzXHfG}`?;titAQ0fa<XuhG)cv$1n<a&e2?78Mhhki2tO
zQAt_lo~n+np8i7vLn8}ID{C8DJA3zM9-dy$y?p{-zX^I991<E68yBCD_&zB)BQq;I
zCpRy@prW#>x&~fb_vvfPx7N1yj?S)u!J*-i(XsJ~`JW4mOUo;Ntgh|u?H?Q-9iN<@
zUEsn3aDKu18`*!sMTx<Mjf;ze3%bCCh3$noa42!{ZVKX4$!mekT(7VQ`4dnpM1L%A
zCIky>@6edL^%GrX6`5z-y?}O!?Eel}!2cJrzk&S=*91U<gM|qm4kZ8qmWpK8k&4##
z(A~4cT(e`7H6CFdG!QxZF<7K9&(VXn<SC0Cm2{ad_-6TD`t!Lt6ip*pewJLK8cC~G
zh{wKGvYfh<+D!%!uZDY@<U?+J6`UCRuGPdM4jwzft1DN`$hUgGsB~q%;<C+nMFd5Z
zp#gt1plYQGmQ`->!rM|V1{5_QYolw>Y|&KcaOn`{Pr%8zt}c9&iq(eI;)Z_&=p;E;
zt-5BbxxMuC>;blxA_-BsKJAUUKsh|>r0fsap`4gQeQ<=sgSS96g75==zebE`NN7j4
zfv3n+AC@Au^(!LI?^4~+TRjt=IN{TISXJ5;apGoyVf*(mCbZ=6OvEVW@#l1&ODXvF
z!$Gq3!$e$IJ`(fqmby||N;cCpCDsqAD>^CQ;#}1&n~&>bKL@4{%VE3wr;-dNh<RA+
zu|3RZ%qzR1kTIj@O)n(iLou~5kb8a?-@G~69nPBjfUskAHT2`XWL~^}5ryG60Xl6_
zr^U4K^@Hr%TK{Y%Iidg`&JM=6U>em_a}Pc9EYNw@aYmEGQcr|MhdwPBa6Qp&Q}Juk
zk%+~?VI=e3T;0|)pkWngW}vYBK>Wnfe2)vh<YyelP=e20$1c=WLT6wa-4|+Y=c2+<
zwr1xqOi^Q>gqL(|$`nYu{4smNbE~~O)n+Co88G5(ZkBngqX<xIsZy7J4bTv7&j#42
zwXI)W{gc&iPdk1TsFv!CJ;1Te=GMhl!lGmP8BdudE}u#QGm|6D2nBfMv|?Tm0i4--
zd}*2DbmALaoIpnCgRB5K)>}HOa)gSU+PY;7WVr*eB7l~nIGC~C0y@p{%Jwo0=0yai
zF7jD_M)4w&|M?(YC;x;7v<#DB2){3OW4@zjp}AgEGE;AYRBmfi;>UZDMr70U?23di
z8Q}hvviqx#c#6j+WX5XbSyj`mK1PY|O0`jT%8<LcPq`ZM*q1bGi(1P7o$PVr7M^CV
zmD?_UCZ*YmZ8rX|K-ql8=hDz=r0uVBElsA}Ib5`_Fvc~x0+-?53P~8rro^hI3K(nW
z%R@#x>0m^?(}GuNwc@0+PR`C7@dn+=hU4wU{)d*3{o%&#NYoecqDR+`i!pf)LIY27
zF3sA#3_glXJj}J3-Q$>ONBNc_QqVvQkK<dE4;;CR21W~YrsUZwzOluId1#I4`1LX$
zLyh`T->cC;I`m{WGrplQ@f1`Oi8wbo6+;88){m+Lud)~mG6-{M*efSYch{iY5RU7g
z5!`6t$f*>uhX&fina2}8EOberf#-vZzG&dq9=Q|w=~3gJGfJg^Bf!u?k0)xJnWMa5
zE819`#RE?lyHz1bwmN6xaYd>KE3?9Vjw7plG?1W-Ium&zHhXp+IC<wU&xHe<YL+1a
z&u$=6W7Ele^D)0Cho=)xwb;D0Km(-@(Llv!nP!}e<enX*(h0f(LrL3S_)uCaQw3R>
zKm(nPr!4P`6-vQMb64Q7TN3RiKQiqyL^Js2NWY<h0=nW`8<QrdE0{V({|^JdgXkp(
zy^ry(-x$Nv{3O?QUxuIWj_6M^l@%K}$3Zdq#&eh4jRCWU)zb#&+~OMpH*S^5O@0w_
zR|fk+FRU>CNk!+LIiNa-UDs($f)q@S`%anSn&cip5!|rs<Lin6a^Zd=#{GW<Bl;N8
z*zp^V;959dS&M_Hfw!-cAmTJ{!eEBLd|z%n{bh;INa95}_J4(=rg6U)4KRtg$)bUa
zaK8O3oZ5WXjju*%3Mt>ubo3>slODRpLs%De`ugQGjW|s{GQ|N6NH^{-DF1DXA+m7^
zCBOL*4akn7?qlerbCdaew}8=~5%CczksYA&k?ihUR{+~$^}8J!u*MMH3qd>|NaG)u
zW2eHR!;Ss^Zx*Nx=O{!O6(nBr|IqO9(_;uCR2q)r@&s)yJjEAqc<{$g@avrMZ;mbu
zTN9ciNxytT0~n{k+o#Z|7q-w|OtQv8_mp1pCd3H6^WXnSM;yh~@C<5gTi@h%+8ZZq
zWT95WZJ?kUk#)uY-hVJ)uSi-Sx+zq)0f0b^K7$L%Y|-yjvqR{dZw2AxA?!~F`GSNg
z=~iOwloB1k;RVM$2d>2an-{LhIG_aPrOBqf^Q|9HZvAu#eio>Njw!?GlYlB{h0{^O
z-gR54d!0?PaUt1{IwrNL+WQ1EqmNMyb4UGX0EVHQ9Stmo>Muj8AaTttvPV+l#Jvo|
za%I@GT`Iji6&SRYeE(f)q)a`!{@TM-fSk=2+On!{xWMGY)~pws8dr!0#{Wu(#&OR{
zFK>>guBoY79p@;MMWzYJ{w8#B77?8q4umoe2JOvyiZU!jaBEgQs>*BKkptfpT-`h@
z%v6PQnsiFZH%o}X{<emZDn|V`j{1HB!TysEPRKB0q%!$xyy+<G%A=nG22R>pq-lbq
z5Tiz9l0(?3GV}KTS!q3HjD!0^`C3DH#;B^3#UM^2h{!Y3S&o^G1<*v*r9l51i6l=O
z3z&71hNlVCqJj0H09%9l(6w6P7#(Eh<Uyg0T8&O1C8#e}Vb!z8r7Xr5>)$nEB_dF3
zch$7u-c1)F2+2^f2)?DWpep&N29xOp1~eeqAoAHF^{$45ID5{0LRL!ZYBzQ^i$%nu
zM|&|Q*AI_>lbcoXH=@OD+?daiiO1!m`xwE7m&Pz))x!>re^x^(OivqKFxVD%4|5vg
zfj6mglX4JI4bDY}(hJRH{;fG`wYyd!2_`~vDS*NGe&E?aSEyf~Se{#O7NvhW`9U7)
zvWojO(}uLo5HhYYj_Rnoy;1r(mk`-_ch>bCFLno*@q6=R#OVq=_lrjZ$&SNG)zssb
z{4DH|`zeYIZUh~49VKcTnD*dxSbF!woHexl!MRk=tFElz&c!9r9-sb%67_s98`W0?
zEzI#_^*3ih7ij;BGvu1`$(_abEU`wxPpv2;D-O+Kyr-vd9@o3iTsI_^p0U+Da1Goq
zvtD2C^Vrmk`f<FIBt4SE|NeFJSbli`O#Ru)OoY|g<l4y7?YeL9W;id?eJj}qZi*iy
zRwBea3;JJPcX%IGb9P2+O^0h_0Y;sbDh;yLI#n2}KYx4N9}nS8^*PvXI?Auo5P4yB
zBS^0q*EI0HVYH-i$n?hOn)eWlAiN51X(ZFYd}0N8?SoeDu2<H(`D=+RcW>wQgwuhM
zSMqvIzHIS)89|IZ3hqqoa?sx!5%BYtScs^%h%DBK=q~)4lC?K6wxr>?)wQtOX4{ad
z`;aC6Jl)TaOy$n4qk1!ga_~mO7s$J6Yq76-w(^okdRk-T<>{e*&OZ-JwFcwgck?D*
zVdgi)E)=`&-sV0!He#Hx#jgz%-Dwr*-KyLdI6lsItGLgy+V?ItEPZwdI>(?p3t~;N
z>@Kphe4xbl_QRp8Ugimex1Tk7EkduCu9|qZROn&OV-e@8unuvuT9WJ0*Rh4f3-k%r
zk6byW>c<3YS5Ss9GiA{lZt>@}aOBhertTeRXKf7oNSnpG@7CwH08eIUelw!LK$f`r
zo9b#wE;ns{&61mNZ|{>pS4*q<VJ+5gF?T4fZ={p$_E{br6n^sPc;U@&*w^ye-qCj)
zXOe+oB{Ey&a44id^v%N3NjCCjq`<qmZZ(mpa7W(g1@4jBxdZUZA1+YtmoPkbbqj-8
z*w(}WV&)6!Li!J-;be9j_mB*&1d1cT3tS>IIZ=(*!)MPg-E@<uR?0<iJ3Vf2P)A~L
zsWPJQa4nMM-Q0Tjvp&+tE!<1*CthYw-aI?kgPmwop@BYE$j&v)1W>Bh?}~hD7ZG&B
zO*YLw%i3~~wbt-ycD!L9zYjKoN!e_+L|C!wS!}GDniLZh4G<}qoRM*$urVWy9y5Z=
zeI*zQkL}@UbRO4KUx4_0!BunR?W9XE4cY)62=rJ>AIiYJ3T8a|DZ1_!*Ic72>+lVb
zA4C?Bw<QYki$B`>aSKKLy77QP6M=^YHVQ;eczb!HOX}Y&w9OyswD~}3eAc3LDclI=
zMjocPu?_3vM^Ee0-m!e;xgT~uEJJcrGaWOR=|<A;+O+h~JMnFyfolELg?O;-YdnJK
zREEW~hu5F9?lRqml1($u)H})4FsUQHfqL2NbAL%T+oceX(9qjYQIdOQd;zt+u6%h`
z?CE-@*IU_+oE?e0JiqhT2(3GMjs`xT_5b<d!qxv!_Og0#*3#=Ud!RB3jiK#BjYgH1
z`!`vXb;O=r?cy<w+n7qF&CSqN18NYi)jm8r!qte-^=Z1PAqCJHOyc79eql8Au!tb5
zHeH^9(=z%K_f2ysmWFfn$0%tku6InPg3*;SVm^Mi@ve{sB;+G=G>@R%vV$Dwka!*}
zZ%MK9D%f7{ZbA8DhB8lprb){E`@I!%9;D0pv?sG_T8gAGqpItYgWA@>`f@)X;~V)E
z^3x5-_IS21Y36au#q4MuVTEK@9$uM-&S{|mA){55;MYACK#9xSY`1h5%y8R+LME3Z
ziy;U^<oOY_tvXvz;n*aQ<+)*Z0UBt^MA_Uez)hM+Ig0Q^)b&EL$-shLo>Z+VaGB@e
z(3QsOTuhV03_*UqC;h(}Is4}1;70<>AKGf%;Y0&Tb*$Q|<xY4hPY1}2_ch@m8ScwB
z^zl@9g#^<baL$dOr>D%v8)zV$<ERP^?C&*Reo-yOJuTWsdXvs`8;wSy@R;|-#_5We
zR%6azwX?db*GZUP6(}GlxkeL_cpQ^i?<SRH!QV|xkvVmg(~#*#qaJH!Y^8R0%xq1@
zHr}?{0a6$6^}fskW^=5J_^x-u1Eo8)pWKNWj77a@#{dXtp3sK&K0^bIN);<61J)(4
zC)B2YF`)S(FQL|d^PAezw-3~yKsYW`GC;!Ur{RQVvK__#iH|<^+7*efszt7;1owWX
z!yi7S%`xEYrhM9tGZX3-CTbFh25!C}Jide=iEm(0tqA44swhnkV_k%GrGY04v8971
zSZ{DH^vfR}mN;)7gJ1$oSZF}ohbgXNW~yn@+L`au{2Vvbs6E=ldCNLAcH9jOT*OoO
zBRm1WCx(~Ei?pjHk9h1wGTkPshr;_&S*G}kv<>V>UE`(%WThT@{KsJU;ny}+eq2Wv
z+9nsjpa_+jV`#9O#>g@BGD3Y_QB#jw>A&N0&U*JwYv$v?x=QV0?N)OMwiL^wDi=Rl
zmA6SC)+7GKm!#BWi2G8lZzhK$?wn8Z0H2cdCuC2ER?5p?!IGkhRo^R??yaP4Y57|E
z1AlhL`H-&tppekj{FstB-*I|i(moESOMjmb93A<|m*<V->t-A}U7lyotaF1Oztq+y
zvAp>fBQcRz7IW{Y022~R<P+b~i!_k*jG-$koeMSisaZv69zL~TOZxRJT~XQ2(&_1D
z_7GU!(bzJPMA9{i%ypnHcZfZFoZgZoD?mE48L<kf%EBZ)X1<)wRF*`=GXJFv;i~HN
zR@PdzQ@(NIJJI$6Va*{Dj7<yT2rfpah5o=E-eZ+m(*nfSJW4y4*{gKk*ybn>wm%Ko
z?$12ecmhg)&wS9)|D!RhWo`e@oF7>eYRIW!x#GCFSP&<d*VW{np6a8Utk7=erZ`>`
z>XE>KZ%&KbIb9XDKR8pny~*_}PW{O28yMaG?cufI++W%iE_7!|=56V%CO?7+>bOwR
zoL0h~WENk8Q%u<hNEw(KEWCfe7$UkPzP`Smyz~p_>OT`Z)t+^5bj#wb{=5`H1X*_G
zt4h?^bY|`%S6Ddvnd3cwb!3NN{xr%v627c7<K62g^-v6%&4D<IL?P*aGpH8hY|{Dm
zZ|2JCV%kK>3E^G_**HtI9O0>0ktXZPX%;oQ)3ogOcS-y<#1e-h)mL^)Am^%@r(3Y&
zKQJIU5XYM52M)3ZlU~Py=YOe#pvUhnTimI`t!;ew^my+Pg5|It7Jb|I`Gi$AUUaxV
zKB0@y%LrZ~;~HkONT}8JRkuMn<WBO>1`4H9xjE=vO8iISCgrJE^j$MW8xsFl5)3Q2
zkzF4GrdmaWjJbo=N`4wpM!FjOOY@Nz{8wRgGIXwoWG4pQesR0>7*qY!oOr3Ggw=0G
z+RwerB<*bkex`2QQ>pab)jh2hUqOb^V|FJO8TDV7j&v-B&$S+MJvlZxp@#OXma2=f
z5}VU2nSExpL}b#+ra|eQQmu&-3@137@1$)=1%@#fyF)K35T*@X<P?UeZDu59->>&#
za>wjYB5&`{EXaBDGxVz>If482La28Nm{#_E_Lq>Y@BVP^<~fyE68urEeVo-^NVxm|
z23y}6rd*SBMNqjm6q;-+BYxx@87vPwFW+gPA~RsXmnI{x>nxyp+{MdULwcS;PK$uP
zFEY6x<`Z-_Q^kXhPDMC}p#3cK-VZ)Dr}`x5Lzu@U*I!d*7km{T?!QGGVd{9_sFa0i
z1-`kGth)aF*)!|7{7+6@_nmH4U+XUkOD{24r;m7F^2ztGwW6+@Ad8#!{AW4pvg0w^
z#Uf!{RplMeOm^be(ZB(HBc>d0DkIo@&;Sy@6vNE<L#d-A$T%YJTphN7(zOREq5;Au
zm}T;q{oFZfm?52QWVt=5a8^{(+7k`XH~(7ad)ZfcMK;SDnL`dc5*>Y7&q_a}VH)J`
z7c$<SutWnkdzqU;(8^OS(=R_>pOiiUgA5zNsL$0L2sZi^B;I!8_HGIE*NR`Rlo|g4
zndcf+5-c)t9XxIi)%jWcd_nU2X**K%cJHq%|LW*h&D=4q@^#@8T-avHMtbfK36a27
z$+eZI<u8TYTm=`;Mj^k0GvNOx4=OM9N(F9Kr|gkGu$CT``F*LOe_texLlba2Wd|h3
zF4FiPP~|aR`<+ir6ZxILAHPo^^ud2L&hw0-fj@3zhC}E?<caDb8aN|0!Bo$q60hGm
z7$PnF?yfszy60Sdy)d}qx@AQI9U~ccxXwTL$Lr#T{Z6rVS$CWsq^c9uyQKW<_lzdv
zjDJi)SFx99G=E)5@zr?QdFdfd5+-cz>mzAzr=fX8-!)N?Csme_{U!Hv@~I2@K2EGB
zbrN<(cQQm&mst5H8V1cis0p30rRG&Kzk1J4q1C;UL6oq>7Fz>gJtNXC5+aIHHjan~
zQ(fE?g`ojn$Q*PaNfT3nnRg(oawNIThYp93fs{XQpaY`tw#;1OP1$QTRStJ4L@J6*
z1f-zPub+=|q@KANaEl2rRmEZTvz~dpR@5DvGFEhaSN|cJu4mlQ(oKP>8ulVbdh{Bc
z{-tYmXg5c~S4Cz{4qDum+-mCqQ;$~Q*??Mh$~x5}4bhvU_E|PPT9q)7h3Li-Hz<*o
zcTThLXZ|$aC8q?1a{S<${pC2?_USei1W!`ND>0j);`zR(8%{A}K@CoCLEe!d(o__7
zQLOE#k8tnpu=AU?SrJ(Eyh(1aGa{6EQptSRld|!m{c#J)Ub`p)_9qE=@L_PcHmh^x
zT}w*ka<6#Yw0aAe{SR{H@uNBK^v{qIXtq+1;X6Uz_IwW3fd{%)T~GQ;M%qqu&GE;K
z0cdda;O?rC7kyfBX4gpR%0@E#Yu-0XSEVh_UOjJhAJXHhDl+5NimOY4GfrVoaM3?+
zp`8!Q-Q_YAqf*Y{8co#qim<px0cJf{|6!2zrh?41=?009<p>S+=}hy6-nP4ovEA2)
zay4yYN%0{XAxm=<FGz=pdr98e4m=KYyh<H2bp@8VO(o@Zwp>w@_PNaSY#~%tqFGxl
zzxsA-)_rhf`8{#}@(KY9zGc!{+BVNY%LaG=sRoZFoQJi`Vs5ciqFn~hVlLIid$pCi
zZtMlCL!k4KFYlC^6HDt&s#dygC(L6K4;lBFlD2&3!aLV#S(0DSS&-!~-FiP6knReX
zCQ0=uan9xubW@}HcF4N?mZy+Zr&43CqrY6~x}nB^3_(^#XsldD=*4aO|FHwdr)8^g
zNZ%S$^qEVIJTG=JmA4@$NS_>*$3#n;w}Y+1l#D@oRBB6M@!)OG^$|GD4974V{Bwi~
z_I&Zgl0i3*a!|KmcZPdJ{Q^^U=YXk?ZWu^jO_3VFJil6ciy%MK;X(s@EhrF@6Dw{3
zRqU60reoKRG(u_~e87erzB|kr#N4ehXf$%dJpNDw9$UfLlqg7B#APrA{V>0vJVa8l
z9*_0useG?Ef;*drA>F#6-;=NQbt~A##N9pJJzZ+Z9Y3YZN~%*%a&h@XMsoP;6~8wr
zmV_MJG<SluaZ*ifVpW<_3<cpxYrmaNhNFT5t?uHMG&YB;j33XHrh_T(Em{UZh^G3$
z15AXZ8~PFI^YZbbQMJYkFn8l=>XFc|aa+X}84gaIc7m0@sr=xcLU!w0%Ta3&A<sBu
zWF(3S%?Fv52WDJN%iH+F1Ql;Q<KS7^I(@6v%PVZ`#LgA<%+WmB<BMdQo(e<Tj>3MP
zdJtaOz4lVS^y(Cc?4Fp3UZ|6ZCpouwEX_?%Dk1lSqBHf>1rF*PQX+!<@QjR{)OUoj
zJq9b+Sxf}x1E0>92ac2{ZY0n<6i*j_`QB~Zc^xLkn_ou5t-G+zu#*0>+(m}V7TeB=
xJz!$STNk%hyFWE7ij1)RseGQDk@8EBwP~PzfV=-6m(Pd(#}e(Yq{cyy|1UT}!D9db

diff --git a/tools/python/images/clip_image006.jpg b/tools/python/images/clip_image006.jpg
deleted file mode 100644
index 8759d287be51d491663043f531f5e3041284811f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 21733
zcmd?R1ytP2mN(is1SddnN#hpWH3Sdt9wb1c!QF!f*Wm8%E(y{Q0t9z=X$T(tYtFed
zC-=VjzL~ji-d*pl2dkRq-$m7yU+t>BcWoYLA6EfbvXU~A02mk;fE)B1@VEdF2RuVU
zLP0`&hJuWOiuw!<lMo9N0|S!`{{;>q6$K47kOKIUj+u{@j)9x;C6N6MJGX$4sHi9n
ztE8NSuq>a5sL<0OFsP`gnCO_KSXiV&^gw!{|Mu5oHvk(M1{t;<4(25Q78?c*8|JYW
zKn?)FAV8^oBKU7#FtBj&2#83?D9=!#9U8CzurP3Nu<&pQ2=MUG-rms90r1!eIFxK+
zh|iUckY3v3viry8AOm05e8E#0`$@%N>=1zR4F3fIArUnVEgd}rCl@ylFCV}78wp7%
zX&G5nHFXV5Eo~hW)AwfP79dMUCubK|H+PS~k3qp9p<&^1@d=4Z$)8hFbMx{G3X6(M
zN^9%t8ycIMTUx*N^!D|G2L`{5PfSit&&<xvudQ!vZf)=E?(LtPo}FJ@Uj4ehdEyHO
z0QV<b|IXPz@P!TK3l<(84j$==FBn)?XoJIsN1$Xw#1T_QGO~aElHDH}_jPPe%@-6P
zhssYpV~4S4_*9&0)F)4@{lVFPjj@1#inD)b>@U9N0cdbAQ02j414IDlq?VUfjG{m3
znT8{02k?fqEj76vhnPfVr~=Wno-eoYNW?yC{+5|rcMhQf&BPCqq=ZX+t+F4nDooUp
zMCYp7c2qColjnQ{tm4+++n(+}5WEkA<@-X34AIeP*y_(J9C8Gi&BXQdxYI4jVf?x(
zYvI)3h>LI4;Qpwmp4Jk*R$;kgI3c`XUpV+K%GaruTN_*{pb>dEYG=+=<EQ_E0y{OU
z01qMk`10cT?lk)07nkhFr4`?&o<ZBPh`?W(hVcj5tu8T4+o1dpr;be|06N%TW^71m
z0^ytVtM?2U!8u%q<G{IgM#3HrvWI}GF;NtwM*wBXBj8|3<ea8(p8!4Gv5T-s%Xh%i
z_vT9R9;L(<LYt`qRQ8*rO4r2aXHUQhZ)w1fpUis87x|D|rOrqUddIvCfFX^fY2cdb
zjC7{{$jmhx5vnZd0QUa{>-bCKvpU_FOodE|xe$h;+KioPlGSWs)*;(|wT?J0Foio^
z+JTRviD0kKhV~Ogh4B&4w4kT0XgomW|Fw<#5zx{EqO5gWy}+INIV6?6$M1#nl9+xy
zQ}UgULlN_@IC+T@H(7bfM}T_stxB(LP@CxMO)%A)80jOFm#r=nH+zo&_)%Bi0~fu(
z-mE<<t76a<_wzwwxo6)7zAEK0!KjD=e@r2_yP}ntP>eUgkmbZ$^l4sIii8l?c3kt4
zIG4=4nY<vCBwjMJlJL~Zmj*VV*Vk2!f7i{-02F>%`xIKyE=1TInn6D#Eh^G}Eqii3
zcJE_M9o=_XB%iu&<-Xb+ygAQR23dZ8n7w0*yu;VeU?b3bIWy!z6Io$at4uW}kY|KU
z#rmEK`^)iso%1Us8L#B%ZC$|%pI6!1c`M||FRTJ5u`@HL(2s)LoN9^+g0pPxcBCuO
zKME*QlGK=dp^8G9j-0vJx{1Dd=#<b!*Hw>K8#}V4%2ai=b9QLV{z&Y`R>?~U*K$$*
z$=gJ|`A2;T*{GWNl%2g`nOEt1IQ!Wwks~l%;d0`VzC|>5SJBFwlYH(NIn3n1_+|jx
zd7rt?FNO65=^r*a>ohj(JEO4y>AXE3gdQyMx~i01A39K~oZTxLiXm$tjTwbntG2$i
zxn|1e_QySB9YJh@*0u5=tZ^<F>N^9Q&y*rwtSIE3o85fZHF5=eFEl-btGMRfU$oXq
zOWS`3Mb?zu(2=K<p{!lUJpx`gpXDtSF4VK5k+jgg0G{T!KLT_g0o_Pj_cl<iTKljz
zq$vcuL2PyQ-RGB>ehN;!%=+xS;6KCHPk*r9rj^sdAMspk)btZowM?*a#n(gzY3od0
zA-7W6ugNiotqrXo4Ru%KoiQ$ib(zv}ZSMubS-hxS%Or~REH#x1?kDZRgiR_YA0-!N
zs+i9ld>P=XE>`YBvvfOJPJ+X5FEv}86mgFR8B4SgzeGTc*@ZA)R`zVj@l`@51`=&-
z-=7+_vZU!pD<*KSqLAQ-WJq!f8ABPU?wqP*`sh+KOe}6qytABbJGggV?DPs*E?*8F
z-6~JU#@g4_fw^m(Q0nbT#O-!a*TtefY9?As=8k;3Rakyv(t{JiY#c5RT;|tSqQRe+
zu`H41_Gshzk<62I0of6V*8Vz^%*nar8o0_5fPZx}^UgeYa4WjMgW412OV6t7^{lJ2
zM?i%})di|I_u{ZKg~SLhOp>t&0;!B}KsC*Y_flk@o+TM+R}d;BdMFi%1?Bn!Pk`19
zj3`l;dFvw}ll~FlD{?dW+qmlbioK57GUjMHjwyDmK#uRRxoq$d2b6aoFeQM?WmQp3
zdAGXDV$OISZIBn(3vx`hfa}lfg>2~&I}HHPZv*{Kpbp)-^wdn^JLHW9em@T3QRhd1
zpz6be??v&g(9GS5;_dY#V7h{z4vypS%J8gD<ap;1pc7rU<U5M{2<XbXx17lJV~0R0
zS3Bc2249nLMcbuJz^9*ImHF7r+}+aN-$MnWo=OMXlm;DHco52z@6Dupli}H6*45RM
zfGo0~E&tV3c^f)%t?$apLxXm%A45am*9|A^h;+2!$p<y!PeJ{=Ktf)^<dVBm-875J
zRSPryft8}>tKydvQ?yZ__et(R`A}2f)`cKO(X=1=Ut^K|HN!tm4C8N9OZ~>plvy~S
z;$Jw`&f*bxE}3`q1}dZ9_)-Jke|Q8qJpyFC0UBqvT|09k$Hz~);y!rrj(>P~1YDF3
z<HG0;L&2403e{cLy`|tn)%B&~?JZQrWt>U~c7Js({%wTvF;Tqszk25VK0=+u{|_f?
zY$)%`2ffhDGaa@Kx~pEr2U}0gRss`PNO@RCesre-qJJ0BZ$d8hNa{cjYr^{246?%+
z`#K=v@*QPo(WJma%pQI6wXVC0)*uBtw-gDP1uUD;;OGte3HTqiJb0CLf4s|>GIi%&
zxO)?E89xYfx@N9<`+9XCkA|C6EtBm)qI>tZIsZ;wUnkn#^DR9BbQTsDVvI3P-4E*7
z2xmKHqa63b2Mxo-s@@Pgy~_e|LlIH`2$);yx;I?fAUB{pcy}fs@*kt4jx!nuUFV2E
zW93~OAvJoJh=cs+y_E`Er5FEiSup?4txXB2jbgo1=UU24roKqfkbs!DvM-jH5P*OB
z&p>y@W_h7QCjn5EO}lE*FfS&j4A&>t(AbRF$X{X${syJHwUjcr?A2d12oVEH2h@rg
zB$0s;D&!_3f-)eQ4$rlN7HgXXUa2?}+e&js!gqdHGRjez7YyPhdJ3@KS&$R0u*lFi
zY`I|$6pT^48$yl-{+kk^;Dpc7N9cK55O4uEqksiXoQ2JLl(vGYlPKXsQHO9V>H}bx
zibISfh=_fk*aQu4VAp$JmM$AM{GV%x<QF~n6ZiDzUqw7HZ)vhb5pb3NTa84rBX_rx
z(!D-|R-u}Qz3E2%&}G19q(gByy3+&ZS7xG#j6BPf&N4Glq45Nv-jvBlR&t!=9eX39
z+O22!{#pQ|+g<ZZ=?N-`1i<vv4L44=hckANNx7}pA>tc~_&%m3zZM~Q-=$`+k)Jev
zB+j6*2zMf~yH3EC$$o}>yzzXAeM4@hsy)&0aHwm=zwPrjEe7D+7ITIx-|3dS{e_Jc
zObZE0go)cDz~8wH_tnA8HQc1JH*^>Dym<!Q;jy^7Cs~Z8t(`qfJ~4OHC&$$Rpj|ae
zvF!&3`N$e}k%i5lx~fvtH#LHYmXUKox{e8=1*0WkRf!Nszn>R54U;*Mz?yNIpfh_r
z((~^0i~EJjub6t#BXMV{{Ot3!^WG(ElU_R+Rm|Yb)AuO-hsD>YNY-dKGrsobHm-FL
z?w`ZeTY|ml^|$5)>W_eru973ZwbmSu0E58?Ttj7oOcL{!nVq&~t%~kYupPV|3XGhs
zi2VXBuoA&JQFcLU!g?ELe(y3y77?!RS((o}^?81+{|X$CZBwS%wi>5tD><}J3`fQh
z%#hN_7$AGSau?+mhF1mtqr+L#?d9*aUl&G!<>`t@mB*B&G1SPFwHrGrCyvoS2g{(e
zvm-k`M?1Wo8`wBGFeshe=G@H<2hjbK&F&<+fIscdWKZFr$8%KCfkNTa%YSPKL>Z!Q
zZCduy6roOGNMjQ0ht;7r4*5`T*{JpNH@pIqVDN#Sa}Dm^$aq*eB|S4?odlkG10z>E
zx35*xtpUfTd!X;z`^B9C7ef5vavQi_R?AE7ZBRvC$7c@jl=sBd1QC#f9~y!C+{(+$
zX-f+g6AslWYJ4{=j-vP$r4#$yE=|~9>|O0}#~%S@Q8IYi4}R&Xc|`|dAMA!V#BGyi
zA}u3E{bu1vJ=4##$$~p@Kl-820$fYW%0Nf8>?=BnVXgyjhCUjqj>7sekEzG2)F#c@
z0^6G`NE^$`7!pl(G3--ErrO2PD+7h|3Wx06UOFqri4MheV@V$m18kJlUOA%xG08G;
zd6Mm8J2C6w8898cfVb4nZCuV&rdSS->c3dqsJjj{!KyF%hWPP;Ic*V_{xB42!O{Z*
z2M|$;h``u=Iv9)JHJ@Jq)xq0{4WMvBJwRCk_iv4YA`XU-)B;8%!kd0_WrJK+No)j}
z_dpa?0#^y<-y4usNe&Be1hN3+hSLFZV`B2C{DkBY{3vkPJur0uI&}Gq?2?jFme6;(
ze$W2B<ukn?4uu{uIRW)gUW5uP#s{%J=0DsCPG=Dv=ckvQ3rsv2JA3BD#@G^}?S6~8
zX9-^R=|?2yxZKFQ#g$g_C!R}75+;thmLCkQHB$r|=m}^EncE7*t_rgHu@>Y$oKkq2
zw&UB^yPKJzrVs<32W@^3P(qTCaBQWGU~cdh+LGS#487}TlR}Lst${=EBc#CKP&YrR
z5A!ub<EP0VTe9P0%VC1u+3H#Sfut>=JbuAeDth`!H|;}3)AP~*pU*T^ze(bEsr>aG
z!D=Ej;dYwy)$P}w+c~@+{EQK-0|kBdnqT=Rwzq5Q8BQ6s#F7+vsWgFW>fzt!oNgif
ztCs7Uph7D%Mj(#jXX^e%?5Jx<_#s_oukomG(~sqqo=#GW^DDXL3yC<Dm%_&Mu}#kx
z(a67*=~~2%a`i=32p1>Yd5o_U<G%OJa;P1y-iUC1Cy87oo0~dubN&dR@TfW%y&Dlx
z)H=U+RFl_LY|h-@Kk#~9Q72o44-jjHeNOUI`l7tq>9$P-Z_&~tDB(&+t1Cw0S?~8B
z0tm1+dB;`Oj{r>N<qn*fhyadg``2SdvqUYV-OM(0fH<X9AH1(~I{FqXm`<}0rxg}=
zDpaRoYtuM9CWi+D_FnsbOoPNJ-Q5{hz5J?5OI3#S&-qvoJCo2a_f{I8V79S$uXwN)
zC8!G)FHfzKCg@XVr8<T3Ue$;K%&Xv5QoAbmcwYNH2OEK?K!O^<KoY7lUSk43UMc{`
zIo>X=^7zX1ppV}ppsI6+d~HKGsL(QIaqn!fOsv!07N6V5OOfH*O3cE-p04nV>bRcQ
zetK}1El_9agY*}65n2(2tRSl~Q3S3flh=nIO4;T4WBepBU;6Pa@NIGo0^MWv^haOD
zaA)HA=xy4A?HpAtm>I-Dwr6d|IGbM0g0Xw4)<4}ZXOx*><+6n4vG!t6NEJMuB}?B5
zrAb)MdnH|oao2^`mLsXQ+lPPU=Q<VQkCaWTow=*3^0YFFCK_}tt9wIIqQ@rUdS#sY
zF7o|2fhblo(gz_QagDrMW`v6#Nw0+@rWc*C5W^qOg7S=<^WQX{h8SinfabF)k!Z8|
zAQD}Q-xCFPGlx#Y%9!(?3tzv~ur{G`DIc;T_Ew_qjk#6^Q_>TWn3k-@@!vD*fZ*9U
zgR}!8V)AHDOZl<v4vXhvH<rJD(f=SywE=7Zy&pD7eVzbnp`G(F<&Vlw(utGu<ZEnj
zt0T#P6LyEc?CwYMN+sKNv@)yxO4X9iI_T{OKpEU<hCAWn>Z=d91f^ou_i@URC&!*{
z@U5N%W{sE0Slwr3Q@w&){1!UakIj>~l#ZDVk9Mopwr@jjHmS0rICwXc<&eO`Y*z{<
z8b;1Kq;wdEEnR?mSo5`+oV8+?$*puQoNW$ro~s(Fb9yW#Xl!q~E5eJ@$Y1g|IqGsh
zAGWt^-JYx1(qQ-|O|j7otSOB=5|sNCZUu#Y$U_6Y?Ak@QiZ;-$?i_nFEA2;kn*}5}
zav6jy5IEp3iD~4=uWt3}ealpMbIhx`hZ~%H(xuPpY0}gUO>9FBSMWIMOOl5!NTl{}
z?5-coxJdL~c)r@-N@a+OC*9PbuzY?!@xqv(ShYzI7KxPMLd8{+HQn8G!!x}-m+sv2
zHmTtigJADlQ3|64Qbo(^pKs##4wHiM&ysgGwpVv@N^ERQljc^N!zDr>>;fz+7lH1;
zpL-Yi+?WW92GR*%XI?848oXRR`f^U2+Rtdd>!Fa_rg?nttK$W>;t@6;wH!PJZ+5PF
zY>V}aJbTlUZt1%6zR@hB_Z?G+WfiH`jW=JR-omoMzUym7<5+TLuRSe*OXG$g9~Yw{
zcb2cq>B1;qqFSCfHpQw5+2q2(_9H+ACp?6q&Sm})u*PKh(ley7DJTWSo~nS{7S}Q2
zHXU;$rv}uwoUaT-drwlA4p;A~-q5lH?`zSNVrr`K9$5jiP(rCGoF?_c)`rHSAO;Wp
zIhG{SMXA*?M@aYj%!QNg)T_dY^Dn|L&s_T>+XN)H0a?>^$5gilWoG!>(|UxIAN(@f
z;sw__5Dc5qi*<FggWsUjnQ0Qs2J!8jz}6nsLtbAJ4<zC&?}I*mV&~RiEyZ<=lKBGZ
z=Hh63g^``yet0o6qu8SgP0IKr;T7J#q4+$OB25L?lpdd%0eVFlumo8PpJqR6?C$;0
zZq9qGFlO%B#LD$CNzS&6Kx(i1;rXJ|cA7V3ymobvaYCY*i+6|x0oWibNVSRKny~E_
z<TCX@WRpXsK|YtnL(?a>$O{s7xjl{&4lIH#-Am0ilt-FldAmY+E6dv>>4_uqaVJ6}
z;~dk`M3Ai$9LHX|{4qQ~Ns%Vc%Kp-~uA!Bk0~YghzKI=a-KcQF(!_+(Yx@M#eIe(&
zt}<O?bkH!QtXR^DpwoSTmwn?G?jwLhRlIo6ieGoBhhBzlQz2A~_1xNfT!f*f9M<Vm
zOm>_JFTix_vlx~n@|R~KiBwq}*FL$Iv6nJj1uJ_c8kcg-!cK9E;?49c#5gd#7Z+TT
zQ_k?IHp0tp_ySIb%h%Su_*9UsHq32C$y0Go72Q6}^pR&7ixh_rzCEcU^_~^jXqp-m
z`XS!^`v{98idV_SV7Q0~81Y}93S8wYUM(ngo!x@1;!;fOYE4&gYfG9bR&7W&PJjjC
z<nzIKMfJ0;jskw8S7LB*MMjzzR7+f8YP39nMkH&p`k}Hn{C)mfFWzapVzpx+(rYhu
ziJUTINEg#B<ag2NrEKB9z3we->2>1+8>N2LO%Yyjb%mc8&{@W0=B6`a(uIrdvzA9M
zXs0Kd2SNs;s3M$&?DsB8#idyLuVn1tmg<h#uwWtm+;Hcl-k@(+G#^X)gl#)Bpm`Oq
zNCD+Wtl-s=Z_?(xBi<){irPu1VfAwlU>mN4W}uZ=eF#{IpJ%2(w^}Z=oI-B!P17?!
z=~WZ^i#`=szWvuTHWv#wKQcZu#DM|MG_@*f$*Md&OuGedxldwz?5EPmipXOsj78xy
z1L_J8t)|-JxNn6@Ya$S+Q$oK`c$ffJIP=wst@iG{9|5uWX{D6SR!BG;l6^QzAyy_e
zysJ_CQ*xVG5ZB92S;6Gc{-xBn@8AKK8}T|sL_BlU8L<-(j`lBlMt(x<0#2pLmt)7x
z9k<xd;WSZH(yknB&b;TVYO;65`+*TwoXK}0Ycm(X_QZLSx*9}YTN^x=8+^U9<#J1$
zv<%DT1<+}(RI;n_uWs^S$7^+MYo8vQ_qMU$#^t!WPzV_U*z-5fNnqW}<|A}31RSOx
zJdm@t7Ge|$hIu?il$Ytt+#h5N&*<>ooi=On9qw!Jc^tmyMfTA0og+3h2n<*s>KsWr
zl=<cI^q(pKNN%R7JYK{WFDfKe5Aj8sGN@Ub#3zuYJ5`X%PRiuF2h7DzX_~?z84c_r
z5$0EY`<h!s-$z^U2oNx=>}3__P6pjht~h=;T#y(<loDLEpY;hS>}c_382SEfT*|KN
z1<nh0cJrd|?)`3~M#{?zKr0<nInptPV_@UXe4YtrM>>Xdnd2;{%7Tn7*_)u0RPLC#
zIf8v~181tvbNasK7U#CR8CN^XoA*6><s-EyHdhbZqK+*-9oNU2Wg>F5+>uVVg87+>
zY)0@#ygNlD+x3jnhm{T*TRUED=%k9%EfpS@0t=yt5e7Tz^~qO0Dz=fH>;sPgXG#8u
zqjSVo<;i=HC|oui=l3XGiT3cNBa>+|U1sC!6QFdPTDp-2?UokI4|Cz%5(|FZ%Q(G9
zfJsNb<oJ=>RArt!1~G^>#6W?MmTLhEQwf7EucCYXPS}~(i`}$is@(dpH{MLmFGPI1
z$<?^S6Yp4ibnRa9)TzC$t(}|ucEL(VoFqO4afTrX$?h1#Q!{rJI{;CL-r`*ZIV!3}
zMJB5L+a)SdOv;=nn!wrI(%39}-AO-y48ZiFAg9gmFK^-ByoSH~31jfnjDlTK(Ik8*
zK=bVNb(E;J!}3Luhds`{)k}h{W!-Nvj<Jbcn<l!Kt)yxyTa&wtoNj^g-V*h{jy9|s
z2I#WWNz4iNvaF~5hG_M0_-EMGLXVw35u96Pu$WNUZdFc}1nb2uoMkLTd6-mrTqG6I
zzY6ivH!q%_>v4AnOq#O@Ho(J*^T0KJ;5X;Wdlh<K%!O@4eJ<yq#nO}2?tylWw7@kC
z&RzzuRlv?!Uk=-pP@PdQCK$4X*>(n(>ijZuc0Y&6E@)+#wu0Wo>IXGCziMV98~{eK
zy5mlF%@kdnA~juXyYT2dq`db@(tKfGF|>j0CquZG?sR7v86CPI*~wqftIS}D4zlY<
z{eVoi6R++xTjeCLsTwTL?%B}U$|2>9t<ok9VB8Gg-XO)qOR>w@_b?BybnY>`ifLF3
zXRd9_l|;SWJ+;qd72CmO)`6a4Ui~r>IXDq$d{?R+1t6rUT|E(wc7&f0^TmyQos%(%
z1K67(xks=VDS&LM6B>Vn`K3vWm{tzzgqdJEtk?`*=R=y0Bh9qz*Gig;k6es$l+MdF
zd~R|4t|?FHqb_Hs;%6|q$d!US(t#S6B5wBsSzG6rt#PWHI}Z1^tb`;ItPdVUJ4QdZ
z^C;2>O&rDs%NG`lTN6!VdW&Jq_#jbPBUM&6_&M&*>l}7)koMpyBPRw__BvZ&W;XV8
zmf}Gz^&`O0J1S7=gexdPGgB{Mef>viJ9bu@r}xyTsN$Z&kK-!4aT%5@1ygtgd`Cj=
z%(dvZ_l!xx#FHXx<XLax2Hs9L$+#p_22}h)Zw}@gaQtyuVS~}Hc|zS^fS*Q_Y&JcF
z8MmhP?RsRrRGNUqyd(!bLxNoc^?8j=Hmu{GVxjCODuu|5a%A58OQOY{V`s+Ti6dJI
z&yS%J#8SY*#jOzF&k8r%mZZzq1F`e|p*`MLLL)+qS(C%<sT#!xv}sm$Tapom4wcA@
zNP9XOudK+jt7TxcqqZ9p%8VwughG!~hlLbnD|l}O3Ra2|4DUp~H<T`79?rdeP8VYW
z(m0M)e4&zGvLj>pQjoOV)%3his=Nnl+#CT<41j|u5UyoYxz<6PN?4&qk|bXl(xiY~
zI`eruJfsd9EvBNWa;46wVH>>N^N}_|i5rn3-%t<g>#K-h6}+Nx=`|L&Yfch6N!6Ey
zyDu~1_$HpB4eVZnQC(;$HrCAQZ9*Y^*aOz9GnMA&FfE$Y?5UR7-RwD9+DF$JBzw3o
zH1>69Kys75jnS|T+0zbFkm5;h-<T8JS!NcL>dk*5bd{8GK@05-6IQLaKLBrNzS>Z4
znfUnGok&up&*$=`&s3rXl}jxSQw@U-PHg-Z<elhJv|$B3k3;cSl9Gm%ovv%5s^V{)
zvsxdanZ{GaBsWREi}TV7dsl`?VQji2dMv@tcZ*7Z-krsh>v6ro44|C*(!xEB(02!|
zi}Rv|e9c!rZ=!L)AEKxL!356~4VI*SOtjhEoZ>_nr!{F_FWg8BdCMH(T-_7o=?reg
z=y|QsS?`>bhjM|;Cl1+wv}qJ{-61T9T05D%hnAP=z)9wY7vT~Xwk*ULA(RP#BBY;!
z({{7DMDH7Vv4dLpv=T9RnizO55eu5~PFd3F3)iQc3~9|*zE6(r#K}|wtwQ1l?NYQw
zItIRs7Fm`FUG(S=!E*gj<TD`_MPm<&7H;X&8gDPIV!<;L<DfgsbGct-U`zM#oFs}k
zurVFA{m~vqmuSinQTIF|Hu|+&;d%Isv2lF)R8!NP$i<Huyu;J|m^_@4?SUX~QBg_X
z=el^&s1hs<BolH3HXkL37LSHp1$;6NjO&BJER<9m?`@#*#8STgM*zW%YDrhWaKipS
z9pkgu_h}fh)On~PJ=nW!<v<Nn9&R6YB%20%kr(q;Mm&XQxLOLPHlxsKrdOAB<y%<h
zyqa^2z6{!GnN$-9=LRF2C9eyad0tFZaWXe4OGY|Uxk{VJFPgd@Dll+tAc2ZJwIVkq
zZx5^cVDV#0S>AhLGz<C|)Zf39;ap?Rtg~`zN{%DvK4mqHG806@+X9fn0MMLEZ=^zK
zf+~Zv1V90(B`PnOV+X~Xa*Td#H0kg|1F>TFKJPo=daw8kKc5PxxnF)flYaFgkr?6t
z8%%ZPCqgnsc=x`P7$T^<$3{TQek5WYv17SY^>d>W#3|2zIocB}FQ?fh_wF<}nDJd4
z+Q2MD>CsLVS!bg59kshJ|33CmsRRijo9?K47|l{ooaF0GZ^z}R?5L(UQ&dys!#xX3
zbh^kY&I!C12yO>DTp?=>O_63%gj_;<em__6g*CI`@4BuAxmx7IpwWA2qkoT~{S__x
zx40TCE4)#)R0knh1HyiAC?X;J^joCo0Di=$@aBoZH?PK)a(YL`#pT@a!N6V@P1h<L
zOBU*9{T%$S`>{4kkNY_?g&Sr~t(5h$g71v%?2U{ta+n~Fj=)}83+b?Yv|0I8n<fA8
z`t1{i41~F$@3;73&-IYCzD7@gyxx(l`f$S3NlS`B8BgX3J<Od<?L+82lN3$(f+}6T
zQ+Nr73KpDpjt)~<UcY31Z5}5znK?gAx3Rd1fCPX^M~jom^F6pZ!W5TW>6yNqq32)P
zw;gc6m%GkToRWJtUBNZ`l7mNrkSp3`y+cH$*0YVXEeU}TqwzQl!@j}oI!a1RlvquJ
z*sVu4s(VSE>PP7!s<{m1`&cR=yktVchXy&#lh>o!D}#lCmsd$~l5tc&5}X!$kgPC1
zlGVdZj_FM|tv9cZU4Wc&-#1rN5`KR>7D5HfE__0`3D>9hbL-X;|MZ))r@V}~&5|8b
za`g>%Gc=crpKUz`jeL$W2nBJoOibDL@h64DZfN7pSQQ)XO7Mr%!4%+yCn?FEQ#s{p
z<w=>VDUDuz#!#Hjl7MT&DyG7L0mdj=z7E&5SJEXI5l}78)+E9f1<-ZxvW+LhW%6GX
z<oXeX)?~j?D&+^TB5+ZJb2G)3#xFjL3rBL{8Ir%fWI1JPBiT5RO3GR^{FWgB<H9NF
z7l7>`u%01-i~JO*|DO!H>rIyw|MGXnF6_a`z?_eL82w;Nf*ozg6gj&?5=HkeDl>e`
zDMPh4vV=R$6)~B<UV3<VT%H0x^NWl3A=(9)O9K;sX=RitRW@ym`Ai%Q&T^d~!95Cv
z!T}thXfH}yfH(BZPG_2nlycdIZ(8a79Ond0Kjka}CA(WcN7Ph?{kjNNV_uOvs89+w
z*pBUOTy%2^<r$fzP%%Qu;+S?T-o<jB(PdgQ<OkpVN}M5rw~w>xjYi^sdf7yyV>AJo
z@Jo4f1&cU7YxUr;O&bG?z8~3kj6C7qOY*#D>s7%)+RG}?s4H(76&>MQvV2FqosjYi
z8=`sUmvYjk4Iis@zi{)=Q$-YVw52<PJ@zard&`X-7}tg_eqM%|H@@N@xo4pv{xwTw
z#w#>}T*{q^=4n-8V_W~(&d4)3(RTTydtTRdn9LnmMB-F=8g8X{FmV@H&{cJynp#sk
z<s`3gBVZ+MTk*<PzB_%mCU+?!SkG$~Oi=zykga4d6WBImJxyPfx|ImojNh^#CTCCg
zibB4VaH;<Yi!6kWD#z*(k@7Z1y6h@Nnmp0LaF(c@3~{1<$e~OSqYD&H-{@X*L>(i{
zJ0iu^V6&!=KeESSbN<UD1!K!TJ@9(E10OD+{%2Gk&S2v_GWLbmD||{m@7Pj52XNdR
zo~z!^zRLpps?T5DU4Je}VV`kl#eQy@2<&sgRl3l4XnplVq^f_1D;IRr6jEL^krRMg
zrt8=`t`ToFkiVICaussbc2#n2!~gR@v$f;CT_aAfKNcD2y@Q&S^7(_JNTQvx)aU<d
z(h~iF?Yv0U_bKu4TSDURT#Nto`Pk&WRpvCXb4I$pq1Mv~Ehp*5W!~hS&newj^Toz=
zD>l^SnuPhjr)02u(wOC2)G)8dclPR?%`(Wg;$^00w1N<Q5*d*hPvA)V#F%Y9>nBaM
zCBl9IGTzF<aKY{k&Mb_EU_Y?3xACrB0#{6$fPc*D>bo5RAxpSbuJ!~&q1yQ&<VS#1
zgH?&1=dsO_^j=d@hpU?MHX?Zl^Sg$#aUu5oegnZ#ZjD%iM?h^~l&|dK?@5{e&HF0%
zKrDDu^PKWf<manSnD<r&KiNWUpqQUMKgjN_EvDKz){Kw%xTU$I$zQ^mSH@jNG}H@J
z%?QefoJ{geW%E1@9m2qX5eOY6>}S+tQMK}TH;ovUZ`sOZzc<8{o@y%5-hy!~0NSJ{
zca7NKyJMWvNiVbuZr2#w^Dy-Y>NsGY9JXz$eU~TGQC*a=RE@!QtiwabQP-3b7T{7B
zWk`KAS=eX2adfmO={7)6tod~YC_Z-RGlp?XJk{dzrH9U8mh5iI>J>sm&}L|(4s&0b
z!US{g<!vPv+oIH4Zj28O^i?7UcE2bmZlFnDKkF>4^3;UJBn=+TfTgc`^7ubu*>=d;
z5JE{ZvZpoD)<x<pd8Qrib5HkeWTjXAk<FyPmQZc_m5Nm8HH>q~Gpe*bj3f;o+R^f0
z&@57gr5)RJ_h7Jdgcp#1v^~GKJkXh_d?&Yeq@9mrd%ESfv1um>kAs|JTeWc4RV4=X
zvG@l`q=@-;v*W{iCj^zayqOzN_1VuC62D7a(kE;NAPs{Rir|*3GGxE*=^)!d476d?
z)l+0>_fo$ke{~J{GVc@9#9H52%Q#TcEb#$`D$gXNNTlH9C-e<Z-osRP8{28MHeDY7
z*KOsI-|ol7;7wu_bxJCJMIJc`snD&rPE?g=lCzN}uxc6wqvZ!64;;)+YAmC)ew22d
z{YtJ9eEY2_BKKvkpAwq$MuqOQAgyqTaQ(8=OCd={w<#?$!CE=<eRSchnfEt~$xq>=
zO@rt@7Zu$VefZDX^E}Cy0f=*1Ia=34j$nlbxv%_{WyO~K-#r>9S%(S3v9jG62Ak=H
zMk_tXK&!Lk&kGH4TgO&o<cx!*WZsD^E#{k|{d7!C7X6v7L)KKXBKciqFaDb<XO#MD
zKUZ5?TA^LTqHFXF(*NQ$h5xA)|9P5R|6x3;=T`C?s_yC@Ztm1oo{Roh<q?iRu<RaK
z^`e|q1*|d?s3U{MtzktW?8to`g&|HsLVraI91lh=$d_M-pERpUQ?F^RpBRImQMCB7
zX<Oj9Z9-`ue>`3?`zugo+B`|0+)?nVqK4KuwxV4WZe+UuRLc4cqH|-GRbpY$2)n%e
z(6_j)v_O3w%}gQ4(}F}h{>cutN5H_>LW%<2nJOmK)_|>ggK)NIqJz}8isGS*y(`$x
zc_<QPvkl)qVz|~Nqrm2?Bmj@nja2x`NSjBevvIyMIYVa}tKXMhq=t7(Rj@eW`0~ra
z1Xn?Un=Crx8)4hjnPjp2;r8UWC#}g;rUJOTVtY3ezH6}cttPIH=7r*J+3sv#EDH(l
z9gvC77Be%7Jp*PJe%>!FL83D}IaLHmO7jt4mOYFo+xjw0m3t$8pK~8;j5IH5B8;1)
zkm=|KkO}o!s~U?*;HnV(pTa(a)Y6(xk@X&vR_<^c)_&V@hwEH7({Gre&MWj%?Q}C6
zL5WHhgY<j`7qz2L_}f+U24hewWV+hxR$tK9jDx*kXf)&l?;;SqG<vHI=B+$??hwA#
z&b3ZlAtE^rY?+vyTU_L-T}(M$8)`&(>%DlAzB6C0=Os}aymg7H*%0MRIl6$Wm=0m8
zF`<Gx!Ze`M@t+;Xjtv~I0nSv~IWvZG4h!n(LcK#p!T4)mo)3YbrDzAfa&Mr%mA943
zgbjt&(3~?pVcr>c=*zELv#YBva4k8Okb9>`0ILV7pbmE$d>IqJcfJGtfRn!%(m{CJ
z+t|D2jFXH~1;)(rJ4NRb3+6KJG`xm3n%(w;>Xt7QNdh0rzy^v9FW~ZXTafR(;y}ZK
z8glmRhSc%Y4c`VnHV?e_0!ShsGIWJH&sEkln~hw~YU^v&W8Bg%2NC+C5ySlmV4IIV
zxVi0y?3Sb2m>wj(sLR*Hzj27GUV0-F_d)pVrGJbN^5$jf15Qhv?ve^sYjf&19z{x)
z#gqxgGh6gCfTH{)%*4|P_y452fBWny+3(Er3p}3j-@0P-`|kb}^vsZdihRnuj4C~h
z1<&v3Hp}xUt)P&-p#o;Lw$P_z#~!2-04s8vmv9zTf-l#pPS7V?6;jK{u3v_wN*d_P
z*$e{SdF*g0Ds)b9wG?I!IVVi4aMX<u<cj0?g^E&a)nafQh>~95^}E@wKVYKEFh7(T
z*;y04qgG=WYx=yH<iZ$n6D`}_HuEO6F_9{hi85xinQ@+bk(O8*M%p&eMdTU(EcU*d
z24)Z!W$~;4cX88%IaAx3S*Q!r5J2PNKRs@WxkQ|x2T~E`kR`Im=}4a{xDD&L&vPxx
zVIOiRSDbb&x!je-br9iX9aC48<TJ>>a()qT=dUPeT;o&$dn~nJWmf`M2z*Y0j%cdn
zzOZe(i}G+}zg*uw^Kg<k?0c+f85H6E-G5DjNHWFbvav5{d@<zo;enuW(LAaHWG%?&
zTrhGnXGxdp`F;Ev-J0mlqQmP>4|5^4i;6eRXSkdWNUCEA0hk*KXNI@^LHL0wgf$i~
zH25RG&)eowG_)p<q#^~nWB8m%E^_ZI5*2%qG~Pta7bNaAPLgdEa$IYp6ZRY1$F{f)
z2x56?9H%}4mN}=ISK3Vd8`Gg7KyUPC=++oA33(^NVZ}0+0^Ta~GKJq5%1q<DOVcwk
zFxN6fWOYX~`_+^93@8Q;^J7~$26hUq?4XP(Z+BfqlK79jD+<(Wk5ep8p-y^W+<u9b
zN0)5_Cx{_}>5-=TGtN?VzgB2Wxfl#@UC8<xc~H20hpXo4FS(8c8<?s5ab1l8c?o($
zH@DvxL@*Zjlo!7g>`N|>%?h=nBRe-)Z~BkY`L^{75Y-e|jL)s;tt>BnDq9FrGm~zw
zS2yrukvVdwni^%hm#23}QjrkW5H{7$ExoR~-;nx))fNKB+$lUwD7i#}PYQ_*;b&G>
z_xhV|i1_}+A2azvLpr5JpKn9Hd#F57sTlhy1HZVm)9Ml6HufYI7D-_zo!>cdVBL`@
zWvKdt8}}!^fEfQD?cfhR$`dP%+ck8t&p)_PP7FEXqPA=)Ij|oJ8)n1buvWk@Zr*__
zfQ#<Zqg4Lbl5%VSm?Mi12pj^6<Id>Mzcto02W%*)`s00t#8DrIC<otQVU}=;uX*wr
zKpN!a1V*{Hwm*Fj74Dwhh=u4Fn^9n0BgII0i1C@d*M@n&4?{?<!zb}Q!Up)YdRNER
z`uv`CUv{)nB@K~q5Kyh+rh>Gl`Uv>SPHW=m?PNO1D%i3v8i3{^u@kt$Hw>cMmRO8K
zG*qVuNAI6j2La4(vt#4W^Ae*|&?Z!sK6w~^l0@~3`;z$TNG&?~Cry>QA=e|I-QsD{
zW--2C{~dIC5nj6u66#MMMgP%h(`e+A+HZdlB}wu>*CG4~<MbGSn`tNc+l_~-iwZ)u
z+Pe<RQY~CQ_RXwItMC0r`;%pEj2XC=uUYF;KE%M#78vaj6&6%THl6f=@KflGh`2X6
z=r&v&>&+|K#+#4;8uB3_GxEtc75DSnh)j*{hRibOyh{;h5$hfL3gQ_*FDr`oJXk($
z>&|BgZ|~0gp}Lo9FJNZ1MO^mDKJaDA%<BrgPo1JY^-65YDEI&^3|n)-a0h1}=mz#^
zd@9n?Mlm}*w8dZNh)sIeSM9bJFK4rM^4^}ZZqkmTK2DWUhf+T+Z179}D#{MvNBl#l
z;ZJcnUjyHt$Q2FxQfiYd!D)E8bMx|>0;A3C<-(VXBrN=nnN}~Prp*ko_B|w1a8>lV
zbEvM+=_!pqJ>vjV#dm`@^pP%M`-<fq&Apv1lQF0=ksA|Y+cL!%6utA+aDJ(LADK~B
zv`1K(*V(cK0zKjoH}}59@RR(ga$32lhI$y>n>MimQKk70uUj%idOd=`!H9L<_Zj9S
zWw(jOJ4p5<XJ_UlbldgoGd8cN@rN+7D_B%tnXuE1mf>~ZS>pd}C>dj_pUia9C4{ej
z`z%=lKIVfYrs|~95hB0a_{I5^MTO0FtGuMZOM5EmwSIBH3VRHuxUyS_W^26U3=@}H
zqqg;1oa@H5Ms^PKoI!Xzkpdgqj}>CBNk>N@s`jXhr8$Rjif%~vC3%p%_gb0U_3>7c
zcAz1X9EA!iylHHd&~@&}%9Dpd+ErzE%BZ~(jRWFwbsZw@{6x#jn@~1Lus`DHS47Ic
zQ6aX*JJvudx$`#4!bt{}xm8*hhxc~GzvY=5*}bJ_7m|*in6n+;(r`n?_cFewXro9H
zlDHiI)y%x22GFe5;=BNHq{{Y*bhnI2*z0?4y27OF%U94NOYeWB28s3rvw)fBSIGa3
zBMou~^fY7RD2HYEg?hT6B$9o%o(J`^7KhueRWUa+GjS4nn^oJDUvNIY)l##9I!92a
zyHkh%62n2eqnm<URz;J@g^iP$T4jITN?uNbUnmB9;SEd5$A}gdsjK8)qPzCmE;EjX
za#X27n?#&-hVA2WOreBtpanzoV59&P2)M(!v6b`2q(AljEp=+9jkzJ4vNzTy`vaB#
zo(7XsF@y+`KgToALS)o}G?ck`Pp`dlbBdZ?6m$&9`(dixE?q~@fmydu>RysUyhEn-
z>bUz@`-N)~SaZA`BBjkylD;ZuKRR@pNX_0NNQS}KUOi4OH{Ln?p1O50*hPC8&A;W7
z6Fk|k1KCzIv^cKGiNz(Eh{ZY^XE4x=6&KSE%*lDgd8qNT;@jxE)lpZb)uR|aLS6H0
zjaW^QAloT90=sw1$;n|(fn>S_jnMKWSc=uGY&_fIm_M$KK3yICj~gr~!$^MF;P@Sa
zX0%f9Ye1MkEs4LXe!66r4mju$;3Kn3elrsr-aA>OGUh>Azt^wa#?x4JCAB3vMH;dt
ztA((%Kv?N0+CJqx<aXiO7R*ThiQsUWBqQBVZbzUQnzcE%H8OFTUthjlGH-O-3Oj3<
zi23lrS6KyOcohAu{ShGEHRY5+{5lJQC6qC|GpDogZIe-YmHTdo4-%nPvR>ix;L<i>
zeU$HSiza8w3==t~YxSJoI&ktFOps%B@2lTabaC$_&{1|jcCDtQr>mFp29qS1mTj;p
zqntnRc-TnWU_W)c$dkHaQ6tGklV<Ua(eead|9Wp$f}?+U?>O|nyP#(AZmL)8!nc{z
z#Zk_(^nTdLn&@@wi*sCBSG!lI_weTr9SXM==eTK_w|S?9-?u1YXG%+er<EcKGY!Bs
zx&|9n<zT*70BK<Gj6H^yAFeqQn3P}He)jMWK`HC&>1vBkEXj~4v#=j`6Y}Eq6op^Z
zz55wi=1x|Eo|WQgx0NIX(`gF78gVI9!gViuBgEKC<Wj)D<Q=N}JzfgGp8;|u8?{<;
zx4Y+iP;~w2<{snL5?UkWqxPw$t_k$z3%J5@9${%FoO9vR!Y6;c#G%BdV9X@xA0(uZ
z8T&5-5Gj9SX->ZN2%9(U>2!m<#G{VGLN0K*^Ic2iiNsG#6)8)bxcsTrhuyo9NhP>{
z^>5sjC#O-F32jf*<UP4YZxR1MNaT}ZRg3fg<pllD2mrQH@f-h*s(BKz7FDK039hB~
zY!rZ0UJ#n!+Y}UEyXPKxAXQjLF&Lan`iZjj)pm~t&Iu{-(84A_`oP$&D2V!FUjHh4
z?J2U*KAWSZSsXT#fvJRerKDP>-@R4L&qqM;Ik=^Him8G|Uzio)BR!|Tp{>k^wwg0C
zBx@M$4+CfJSqoQoro+W@_KZ4Zoc@z^kucBPlss;qy{ZWMajxqQIa<S6Xq6$PLNt&R
z`*JUqD+5{2q%(EWHXDdk)aV`rujye1*`9CY$>FS6&VP(^|7OG80`DF<aIKl{I5S<#
z0;vE_I}N-aa|ZIBg%Q8kU#-N7-6nCILE2L+9y<{DL!W<uRo#CLxfdVjjkRrs>Bt^h
z@waltD_0IO#+;lm^nVc&p@}9y@H&vwO*H_+vKGWvbXV`-eeyP0A$*yOZbm^8fOw=}
z9|f%xHT5x{+%5;f#Wc5(c5NIf8XZ9NoI9&VZ-*busxOnS<f~Z$=jCOF=vs3I^_TtY
zoaV1Coio~Fz$GZ@=3Ud}q%^R|XSUFRic*B0N=K6m&<nc7&2-yIc3!Z%E*(FN-GyX>
z&HCQ}ds^LU>grEd2J1d6A}>3T;B@bFQB7K{c_JTYZUY^$;uN{1ff-3Jd^QX1Q@s<o
z`3q8)=0{zA4z*c%=c*rdpJejUo{|NW`@Zi<zfLyjIys~L^#cLq#bZK~e^qQt8TXn<
zE!gbM=cu4b5+WOLFz4LVCkcvph=`RoFs7_L9ag<AHij)K(Hf)#i<#ow&%ZbbIX)CI
zR8=p}sc8o#&96g->pS13G$#k^w>!A+ig4^(oWoE~)3klo9n7_i$<LyX6T_b(E5dnx
zqx3*9ktNkM;njkVs(}ed6+y-gA0zIJ6t5AS2mrV@d6`sv&s=`!xwzoWnqxH~vsrn7
zAAI11ktIg~eJDeS(qMK-wjO1~2dTYdqdGf2P+f9!Rcuc^)y{Lc`b6|yJT{h)+J0}k
zh+hpoYkBD`Gnw6WwCh&t{2WhLIT)-^AGd$He#9%*!`hs%VGqm}uDa_$1kl!xRWLad
zmkp`AZMJ?%#2e{1y3_SQyb#*g7g1=Mpm3{DD;12SM+m$Aym?V<R^1Rxn`-qbbVEry
zr7|7BNBgT|bZ<`y1g#?UC^yp=evnJh_+n8->L*A-p6bW7TPjx|+T7cu2W2g!aK-mh
z>R{hOp8=ttx^;up)>P=J0<jh<P0<$?r9c@!fZ#%>$lkW1ov$TEJPbTkB#(fJ@0m|k
ztwZB}uawo&Wc}+>Y+FX<QjqkQ9{~{Z0S`2!r)Mlr&jfZ>?(=Ruo&nk<R33&x_|90m
ze$ptOWP_9f{!VvKvq9-<vuB<mIB=xgYg)OFz0OjN0Vx5H(p>-4@x#Eqw>&^Uw-}1{
zFA?rdK3OLhw7<qP7!XH~ZLgt|2|;l?FH)49jasixGoEd|ty}3hzAZN63KjV&Ss={#
z8dVniLDr3X{L0G8_5X#a%N(cT7X(QM5zZ~~jY3fIkyn*G4Mjbkt!J!T$u_+$K4I5&
z3p^7pqT~DwfE#Yxc*r!pyt>Rrom79G-z^4(hsgOEZC6*<>3M+-1LDa0)*T@hQ&Lw?
zXi=vx<t{6X-P1myrYq~8zAaZNw&zIe1(kNaucDaTeKLmQw!ZJ`+pLWzS%}Mi#PS-d
z{8REqH#ey-Wc}^KY@5NlDVXw|kATDI?>Fgw`ne_O3;@8H?Jwwap^iUE&VLvTBsjCZ
zF?kS%78?@*IwVy1)!@nhgrDc%<`V}gaUTvjk9g<}@E<{|!7U!df4qZ*;7RW(#VMvZ
zkKPLo9v>e;36r|i;~PzJhSuP*`$j^A51kP2Z%~0m|8Ky<vF`xm5}wal{F{s(eCk7F
z`~sk8@`a+w*Vp5TuK!3z+=+1J>`RV+EfrkoSOvd{=U8++2cDe$AR4O2g_VWto75+8
zK@q(0kMaqG%ItQb6RJ2UeE+~XltZao8Z?vPw!P?I*YS5Cg+v&nt0u3V0w|=Pz&r6n
z31OZg9x5G2<{7>{M~Z_sD1fgq#r*yh%!u#S6ZMY&A*3tW{y;CSHB{gKwuJt1v2A-R
zD1lg?Kn;e<7;+4`ySIS7z<aaDiY1li`lsBFW#`q<)BjF8nSlWOr(8vgmk8<BSh7k>
z<_X-9G)YjHH+vpT&Ax*r_)-nckX_$F@vk*k<&^52&pynx)DJ}X%slz=B<=&u=$iNu
zYW1v_0AAvc<hQNObrFp_O|OY)NV~;jS}q8<9KZ^4h^=gGP4hIMG*@F!Ok0M6tV{ii
zb{_nuQ#q1sc5G;kCC^*m7fojuoKp|qbyFyA^}FLxaJbPOX?Utt%HrIqpcg-G(*0Y6
zI^(82#A@r?lV}#d)6AFmIN}RpM{TM|tlc6*%z~v;9fbwwXpB$!NZG-e0v@W?t2J%)
zFIe^T3B(v%DosgBo;k+vXN^uyvUkQK^x~fyey7U%5aC@Zo{r(Z1cu)1a8;;I?eP%0
z==&~3rZXI%<l7O^w2|7Oy;n4^P;S!PY(GHFvba%|=lit46rosK`bRtUglAF)9GSB}
zY!!6h8{ONb`_oLd?)+`}K(R%nh8iLB$40Y2_O$e7CV#X-AIUvE8t|}zE}y5}cM}S(
zH~*+w{<rYh+e^?p<Zq!S^Bl$_JpT{Xy8gECG6RDD!B(B0{|)mfRM*Ldzn()aN(Y?W
zZwA3`=iqM!^Y08f6kB3Y+kOvOf!ff&L!ro@o7Y=k@b|0@S*K!T;BecMX}Y?)`qN%&
zyzdXRaaharya0+gT$+-^c_3arh&SNE*#3M<)(9$w4GN{*OdX{4$7&1rc%H_8%RP%+
zzQ23~Y<w*%4nl!b>=L}_zPh@+w;Jg)DpfCW%fQ{2WUJ%m@k;X>CG2w)CQ3Tiu8G!Z
zncP<85N3XE5eA@sjUBlV%*S^y->9i#i!}R6bk*S#<oNi8Xe1acAFaZ3vYX7d#5cq%
zim32IhPWmxEk1h94*Ww38biaeB+~elZ6HznZP4EP(!7D8Q!{Ct_k3h+hVPC+n#&N!
za+oE@h-4qwYLQ6frm@J-IbK<`Gr}<<rQGLKh8_$3H&hROEm#W!b#H>%LRTmPuUZ4Z
zr(CF@8<TxnC4Rio`7jJU3PRp}$;0QrRCRm}vVMv|?%iYdCJd}n3kx_?6Q*Oe^a*h0
zNwbYUxXQ%HjCh(rGs4cqC4xro`P4nLx`V}XY|Q|(O~0<z*J6Mxp_B1efih<P{X4_q
zoW+juch~*^=7`uBVx%HpWjSY1(J1_wT+X6&BwI6uq%a8aL-*0@U0~K_NJ);xgzxlM
z59npK!o@?CWl7Zl+!)_y>zqp}N90I4mPAoS-uCXrv1rQOp%|xN7~XDPlA4XE#43!o
z3OU~JgRc`{TN%w*+dMA}_l1?BeFhLS=e1MsgyEZJZVSnQ_G;L(Nv}Dw`f}x=!j_7A
z=mnKVIkcY#So2}jY(o@MO0qS|^_iZ=S(S@R3tGRT<x{skcPlieE#9_yjb^mG^@G`R
z+vspo0?q5umEZ^R)2Glti?BteS$>rJ*k+S;9i}E}SywKbXGye}wPKg|<&m?hJlolZ
zG(X5I7G2Ew<oeF6JV-Ix`#P5IvaJ{{)=3DLpt<5}J!`ZN=?4>xc$jALZG`XU!1w_5
zt|@G6_|SemT%I)v|GH-|Z=Bdp-3M_B1X6Drv7m(cx-}D^@wklcf^M8#QUzI(h=djy
zb{vghnCusm2KU18vt|<gpKcltw=x7;T0@Kc!o2tTJzheA3|-YvNKS0vhTf#lHb^(m
zeZ>ns^Nv0U(mfgH{kJIO?>Y|d`<?(U0jFXtdW*0-dW-r8SfLx4vc7zp-3`L-2!HNL
z26T6Z_-+)PNKDRdJWyZd@$P>DFF|btG%iod3F8x<b6+?+dg*<vc=YPm4`*(`BcQkQ
zd_xG^{oojK6AtK-P_mIvcD+IPJJNc7s+LsK-oyUZ5jyR|xb?>2iw@uBu8r6GZ>1jr
zq8)@_H3NR={?l;twDT+dM}#33_QKs~d2mD(cP>LWy_{^+b9GDzcfW7lv4^j{Z-4OQ
zoZ}+^=XN6ETw;g#=~VFu00#jKbBh}bhuZ!|L;tbwrSHj_*yLv%;V;nnyk2#6HSPZ)
zEz7skP-zi90*t&n?*G?-!zE6ya&FgO(041mD&F%J8W8p!tmi><iUCrS4THaNUz}i}
zu_qnaxQbu6__y!I`EP#T1Fm}e2yz+o$_*_48Pe~8;sk63*3bb4TcFi!CZIKb#uvrs
z?fZ9s#p2(doBKD!ef{_7KSM7=+@ck|zMb+f#KG|atimr`(pz?RJJ2`jvVj%nfbHP@
zA20uC0D6+ww?qD;J}4mL^MT4PB1aeNFHp(>?(jzs9qp`~-TNVy&&dP&5qR`~oCvVE
zUsS5b0(8Wj`VIU30o}9!5;nk;^zHlizq}w<t;2AI$FA9C_K)I05d;a7A1~3Ps2M#_
znEr}7ZZ(U&|3ZAZ<csce=D--){GXwp|DfNp1M|OdgQIf&{@?a1Ky6j|w`%8rk#_xe
z_dgR}Pn1Bp!1_w732S_rnkm!+6DlaQUV_SKP(uB8IVb(M-S_3+#s6gF`nJ^r9bGuD
zW6xhm+Heqmu}XL4rgxxZX>r~qt`OLin*XPwjuD(@stW%d`u+QN-6BvY!fnZs#&6vB
z_TK>q`2}DA-2cLTaZai0W9wRAn;w|{d>LXubI0~VFLwFOFK64TH$7QJ<EY-%wHKy`
z{WF|+n)&?7XWOp_CHDjM?MB3CWOsT-e%;N&Wv|SQ+wQbI=2Je$Cz5yI_>YTMtR(-*
zT{PMKx_fQiZLf{o3f!p@t!!6)9`>=HI`a7yo3`4yYw1F2+hyOr@c#L9W=>L(&yIlo
zMiU$YJmmINg?oK7%dY))JJ&pL>-@6kW<MPDj0#TgX1Dn38nbPZHFHto<@YCNh0XjI
zP;}h#xc!^{-!JOe<6ocP^FNfCW>P71+x2StJbz2C^Lt!gKQou#lQi@A_Q1&SY2Ep2
zZX8r^TkC!MXPaB<wgY@S`p+CcJ?GbB)4OW|Z@=tM%+FlqEcPt-qkz2X%sy#b;Eti=
zt#<dct-am1w7cyPu)iNHwe4TkBtQSh&UXaPbKUYyPWs@xbnRs39lJW(XVsP+KX80+
z-Zi-y6JNGYyLUb0(|pFH32h$j^5yD{pX=JzVW!3(+c<zREj3#JmLy9+DOdhhZVxC|
zegWq$1^tU5(!q)Ie@q8wQ~PRQL|+NQEG>S368v1xM&&o+%jMrdva{4=Az;;C|GpKJ
zBS7VcYpGdI{erlj$E)-uZ@>R{{hw8)cyQYBKi=<AN)S*{x&W9c<soGSFsojI=L)_P
zklY3=KG?3@EC-r~RHECn=6be43yS%BfrUJ{)N)5IEr2DwJG3~zeE<Fz6JSPOpI??I
yuyNZwo#ajt<_q%iHMNe58gyXmBPgN4Yb!7pe8U!g6QY^jgTF2(@!Ylg|2F~O!c^n{

diff --git a/tools/python/images/clip_image008.jpg b/tools/python/images/clip_image008.jpg
deleted file mode 100644
index 8de3734033fd1db4eec31f5cc4d7880b47d92a44..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 26810
zcmdqJ1yo(jwk^8Q72I8e2Y1&HJh)qMcL*LlKtgbW1b27W;O_438r<ct?0t6RoPF-O
z_qG3Dd;gI(TbqPhRils5N3StwRjsF)rxgIQq^N`_01ONa-~##wc$x<Y17Lwb7$7t(
z%rh7`I9PZj9AqR!L?l8?EL0paVhVCnVp0++Ixa>kYIYhDQs!68>^yvef`Sx`qEaIK
zl3W6UeBV0(gM))ZLO>!wMke5ULHdI4fBf^*0zi2NI0vtU03!i_qkuu6fIW2phyVaE
zD3G<^4gSXu7&rtZ6g2P|3@jX|K@~Cp91H>i91;Qw3KA03+5_}=03-?&>T@O`Xf$~}
zAc-A1vrk0YGt!r3tr!ZU$7C$}_P#K%m{{02xa1U+R4=Gm+1NQaxwwU2iHM4cOGqjz
zDXXZescRS*8X23InwdMib#!ueadq>1?;jBO;bTx_)aU4!*tjq8=^2?>**Up+`Q;Ur
zRn;}Mb@grS9i3g>J-vNn;}erp(=)Skt842Un_JsEyL%_6XXh7}SJyYU-*JHfApV5)
z1KB^}LIL3dhlGTH1b)W_2JQ^HAW$Hoo-;wC3dsZY?9fP<eV(Dej7TeMg&}28IL6So
zABDvvV_hXb`3~(5WdA*2zW+<eegON4YYqSp0R{>l1PVX^aDD+FgrOS_V1x!<PdUVx
zEnitb`})C(*zi6hLSfbdiv!E<OCecM-_F{c;#DoqP51E;e%MqJ<^6l42hYwYKq_qR
z{j1?s`WyQvz%Yz}>`6v{?4ygstyhT32}J`%h&91$_LBhZD-PngX-WVf$Br^5wWsiI
z7~P!x4ME*RcF?rBcmYnB!@Izc0fE4%7RvqLiO-h*+M9`*OX#h(1np*3z{{gljL&2r
z;x@k^y7o|-T6&ATtptnR?0*9Al067?h0*KWtWxeivgL^YjDB>Kqd^uSZ9AsfTP<C?
zJEMD5L`?;r|M|65rLCAp#=RNk>5&}SWu$nTpLgH09xh;sKJ_Acq?E4phJdNSS({Fe
zSIF~P(n97(;d^s8!!oiHR(mQ#Jzpd@U9rokL|5kRafo|!o-q2O#g*EsNbyWOb!>Q)
z-Kk99DNdXuXO>bxMVW!mh~dOJ7$DKd2eS4S=_GeSjP~U9Wh86(cE5fGO@v>l2WU(%
zq&cMf<JeQ8b+Ny06y*hxA5y=&*X#KstH)n=Y;5RY3JYQuSg}x<5!q6DN{*Cd7e^9#
zo+Dn{@0-J)0J!l%fWSlGql?rH(n-c&H+i|K@Zacds&AG}n`aYm@C;_pcT=W{ehk1y
z`~*;27LrMQ_Om1Y|JddK$&G{V<S?I0RM#aqc}<8l6KV8~+{@L6h9PVYHg!nqG55ZB
zUnqzu1H%edb?FD<5cz%J^_WNIHsPx}P?QSe8bzVg$efv2RknMcNuJ?}V~5<I^g!%c
z*gR@@qJt6r6{R2H(#pCDW|NYdjae41Yphn6%#7K?6L_%_E_(v7XX&zM<_dZObZRuu
z9wv7!@og<`CP~s*8R<4{K*A^Lp53lMl->zF0g_QcOf&s)XTcyo$zbjHqKH91lp~by
zPLu;F(I*iVS`fNA7o4b)=w_8+1H}BlZnD$8pa1ym&tA;Uss2xC3}G*JU;3TpQa^k?
z5q2du;l<s0g7h);qnC+wIBS>OzzRgm3OtckiYHhqLdjq7e&DR*W9Yn^R&kc~ASi0+
zP&V+0WEPh)2sQ~7<?21hppRYKKYXcHmeW}+rdB$}YKdoNY?WoN3gOo+3D|x7!Repu
z-lXHTva>VeDJsN5{Qz0^5HIh~BZXnrVb9cqK|cM7-ausGqxnEtG1-i`eL`=qqZVi^
zJ}QWuzwxyNO?!M+S9W>jDOuiTH?3uvs|g}ZD_)$_frWKo>c7k91gG+0N63$m3G!O_
z>tC6c=MsL%S`}3)eDgn~jn(xpwJnmQf?)VdaVeuH@9&yHM5r=Ng-=7z$ZGUECR!I3
zAqB@>dWobD68Iu?un2;RmkkD0jyOXAqID6AQIWPz?!KtUm%ztn{U^W(l=&2Nskl@+
z4DgWY6cK3wL+(0)<yt+b(8Y;P<#cIR`*}S6<^bGtT~VepL)grBIOyTpB5XZgeCu}A
zP=Z-3zRI?EC2o;YRH)2tOszX=Pk<0=b<Ohaz9&F;*!7fMB7v%eHHRaj=lhss%<Aeo
zW<yugo)PL7Dx&ZM5|*bD`baXIMSZPz2+jAWe|%*063^>q^9c|}bW-#X#d29t)+D0)
z5HSDf>b2etN|B!p58~XFXO_}SJ=<+SY219W4id{B?Ji&Q<#Xf~5|z$_gp}dl1!NbI
zUJWSGbgoZ8Qpp#EWZr-`p@KAH)w~`1$m`V=RyyzXkA{T3fmPZedo;iU_9ws@Xr3Vy
zSQq=*(6D_j!!79VP0pBQyv+T>Cab@<>A*%_O+c^uhavp!KN`AI5KjJS(i7l^O+T#v
zZj|T+hTkQg%|G0ealpR(mv!ZOhpGm)9+$VUj=CNJ%am7t!qoYXpneSXcc^}h1VZ(X
zI4u8d9F+b*_0zrlKST1jsQ$*&KcM`-<!IAAc4l+f(uA0x*>Z|kM67<}&U*VkDR#=!
z!l7L8oC^s2!gU9;{BkWD7{tNwJ`p{)$~HgUMf6LIzLVt>%8svk6>#DWPju6>BHS+d
zm&JAqiD%P6;}A%P)n&f`PSLAbd@QKB3cDT!F;s^HE(~%`N1JbOy(AI4^35xE445M!
zO1s;W-W7!<7mc-j**jeA`IiYe93t=^WZc8cy**slx$+)=dJq)Pe>fdP>OA(q^!`j%
z|E&Odl8}Ksic8Wo@}vU&-;>~%_UV5T?!O<J-w5~L$%2}HP28VZ@NY8nKV*T^pPBm?
zOa3`=|1~v#WpQ!W`ori2(NIN|HV&J_O7sG>kB+WU&%#W*BLUNRlOj9&^%Fq(4}r=0
zT}|X8Tx5GIOrzxDUXO`Ak1}nUX$`>2u{7L9KR;e5@FGlg1iJT3_o=wF>d*+h4Xt|u
zSa98Qg3|C;mH5?^@YYrJqVt=g$E7IVh;Gzgwo-nL{PJTk{M9glp&}&sdA`;dGh<71
zD$<AyY8AdDC0~1Lf&WUeFwaAXcdeM1Non4{di635MHI04kutfii2L8G(f=@Se^aA>
ziNjCb`b9v0ApXVjpKA4&nE&RcKjg|SXnS6|(jX>K;27jVT&nMq%|-(OnnF|^p8z^c
zPk<fpntf`Wn`cC~51*a@f*8ox5f0aC(yrDghnjJ&d<zQQu9o>=b)SyyRN16ec%V`8
z^Xy?Z?4+{xEId<r%?nj+J6~pCD9&ptXiJqCIWXgOq5AP%B!~s{2>D%oed91=>mjXn
z=U8%oFY*NN2paQ{l@3WVY^qcVHfonl%^l*}4t2@tbvHumh}4?T7<|lApTN3xd&DTu
zXD~GKsGolHBzXdS?Ig0UdIF?;X#Q5Fa)DqzEL&SsV;pis@#Xnx^t9U%FB5^CXhxB`
zn!5H-3Hmp4ut@AhsS?f&r+c$Ixl0k*8Vyj&b?98pWE@U4pPgVmT++N*+nO4LSd@aN
zk{KCel#|V2Q<+z(Ov*uJwZ%4eX?AI`nE@kn*Bw0+o<;w<Xz>IPK=E4E5Bt$mK@IL!
zw1pY@Kuz`6*VZVDT8t``0DR{>8=J#IOV3HlVH|2?Q3xD3WORmN33PY4OHrHoBCqW=
zhPwm#%YB`j>ran&7->d%ns4T%*vEn4hxq-Bum*b5yz6N|t}X;_J}H^#QPRdLCnu`>
z@;uu1T>Mkh%d~^w<sTOO#PV`odAvx!F4CgG5i|ddT_YOy6@dshooJg4v6!P}*Q)a?
z$WLE@rQgJ=kPM9<{>G=8395G)ry1r?fVZAX-)$xOS6l0<yy^w0*-Ng(Hoy0y_rJ9t
ztob5)ud(k;@MrkTG?lH%1yA`K`9JL9%WaAX{L8QCKY3$qrE?V47gF#rdLw@*b`UJ^
zH|}%z6Zao2!^{bL+JDlO|3~C&{y;7|`M7hR`#lc-|L90?Pi1+oKgY_8FT;#wGbs+c
zXK`GBaV_=hv=uG=91KbHeQ5~m2k*k?q3AVS+w=U{-|-jyjUOeR05IeqoEPEuu~9#N
z?qZiT7&>m;CI#l=w04<#q3!0a-g)SL<R7$^mw);0#vmAFCe&fq?>kY%e)N`NT{V3F
zPGtOUtx&jRZZI!FpkgRz$igu?!ihDgAt=;ge8M1Ta<1X)&hD;Q6;?Up;}7rul6NX@
zn<3}*!FETY<C#puO?D$Dq!W(YGn4MJsPj5m3F@=jgO|C%ec&XY<>n=<2AGDeO)oPp
zg+2eSFZTMjMe~Xo1pYlPVpv0GlmoDe%HxhRzq!nDHg`VlqnK&3c}WA0{@^D-C7;rn
z-@*434AS|bMPh$(QL(sD%ErcaB+LK4ivwIgrs%GU>!#kG^H7TN;{oavoOk4n18HBw
zI@OX!@mi}UWT+C7{R>c{3I8P6FA%x;n+%Fr_brk}>J)IEtvGL}B!A~!-krR=01MJz
z5=QltrVDe;i$u|Qe<bl-*etf!zgh^&mhaU4qaoo0-+xXP;s2f8ytgT`djb%Bm)O|9
z@X^@juOEeejh0fJh?08WcNvQ&nmbJTGU$~#cT}Xn^Qg}6FogeV{fjqJ9#QeooJtGr
zE3HUGF1`Kw5&P?u61T}v=J7+#MlIwe9LOuwwZ*tQx5;d9x0HvNYx!$e)^juFZs2Bb
zqQCwfQQ%6{lb9{0f|QZ_H+V&$PbgLzY!P9436R|42^4S~*-PDB)8a&pgh{-~<ZQ)T
z`Fi$4E8XSqNW%Ui$WP6GenOsFEmcy!h{tY~y&~Fx#ZXFEN1P()LXzdDU74bQX#u~d
zAweRc!Bn=$f9jWbGAswy3c~uPf8g&D({wLBq&mXzN|kyOtI{q;AUt<-Qa?;Zo1e!l
zJFcs|Ogcecf*36ZShj;eIN{lo^&=a9YNTfUlvQfI?-F+Mp8C}xGCJpspk-u>ts1;z
zj!1qde*xQjl?om~B`M<BG8pX^kArj4)IX*Yg`WicN#p;|bW*vYMlU*^nJ-RWQ}a8~
z)59z_7z~vQ^8_}Cz4k8|9snvuMj)Ab4go4`%Di^KD*`*r1pH&zJeZCQ&yedBXFWVR
zarxt>j>>!DY<;8%i=F^e1*Z}$+LiM4>G{x`EQyhP9(mzfT5#(&oGPLUJ$p=v2E2y1
zqeF-K9-m$7f<H9Us$5PvV-9XCEXpi1P!109IO5>se_ExPDI~8@t%#tvBB_oPTdXJe
zR71toa|A7l;3(|?j)}mko7-;`2$92D#4FxYL_#C52<1(8ev!w@m3=(<ZBAaf2p3~Q
z<!;!cOP{BhDfkk|$Jwd!snwzQ3tvT}6{mk%&}uQ6RZFeGXI^?+mf*8pZMw?>8$B|V
zzWK~#!;kXfJ+!tkIJ=^0`SZI|s#whI7d3#Mh;TK-TRAAWA`ZC(CEt6x&n3@do}>Hw
zzSn=IYioP0MaXFHS}3jBuBsxpKfQM%QxkLMhohiDtQy7;y8R9D-l5YeQVO4@UR6nJ
zoz|ARsW=O2H1P$M-j_6<V`f#z_4#-eh-7};#>T2-RZO|pmm}@y@<%i8s+VI{P?F;S
zD{fQSS?Rs*fS4ER^i$&5O0Hkh;NRmLlyY_n7EI$tClxge^G&=!WfZkv2m9CpT&9RQ
zeT2hfapZa2(M8Cx)0Y7N9Ld6=6m_E`LMQ^g*pu=!8|DHg-WPGuzGPBXf(R7_lKDt`
z!4Xw!%VJmSuv<u=!TF}sn$*-_6d)EM45hG5Hac*<FK^^=>cMu@!gsj7DzM<SY~{!e
z4?dvf@Ko~NEaD}ib(&_B*(Hsm44?@}7Nr~*reHY@oo(d}o!AdU+^cb}wwLBBZ-<z#
z4I#6njInP4WH`B_jc4;rU0oJ6UluyS&TqLaz0QNAA4~Nc25w;(Q3lCnbIG@xK&j!c
z7d?Za%2ZL6>l1#lBh-_X3LZrD+XeYZDkX6*8siO1mHuxpDp>=v_{pNZa>xK2Qfh}t
zwLGF*1j9vBZvT0$z{p8e`B3r(>cPA6q<3YZLii5Bj$8E_R1mRs#~BOS!z3S4h49y)
z)RR6p<|~ycwmEWj*~^%}c=z1?L<+g?Qy*haG!77kqmRt;7CD3PV5w?!5BI&BV_m%3
zR)iRt2#!5qPliX??458<Cth=+8m(PV19Ifr>jPMxD8H6e_yQgHh1NC7=D~Uz?WWdf
zg5c2L+S9%WZx+AAON{X+0M@E{%;WQl+6n3Sk5jTtX^1DtA;fq=0T<l@K1Bj+_f(od
z#W;S9T8rw~L#BLbSF%`GFn&vTVaVGf`D@IJY_V1MT;sCvZZmtU^@wM#$-J0RE~Kyy
z-PmmTOBCa%mrIjdMloxybn6T6AxJz{ll|tWKE4w(!T04H+j5037cR&CM9>?u?A??0
z7Ve1IivGg-!aYGEO`?Q2lvB#&$}%lL_Tl1I*@LnjDcxM<Ok=5^6?{VTbyP$m1C^?I
zs1RM^?PS*p#uxUrMQeKLwhF2Mk`Acnn$FoUT?l-R`;@vqnP<|yRi}*=%`P~+ZU$S!
zuN?%@+4yMWC#`YS?i1Kx^^NG&7P1|x%C1yPNGYSn!xh$FMJYB{*4I{RN|1<soa2Is
zhA`9qg54!%H>12L)uEzc7zY2zfv&B16`uX*pgnjU0$5T+=1DewL^r4D94S{`$n!4y
zrFMe#Xf2|{^f)@P!CO9xZJEg3Vg_ppRGFjP&1bEoK!2b>mZgel%3276Qj@!;W~P}E
zIC?2mMY3PXams*7698Ra<ZrL%yVq0nP1!o;4np{zui{-4jB8xu8Q97hvHiK;)Zv>n
z<ExDBUQg0VsRT*}OYo|~dt*8Uk*uOS(uMkZGtUb)Si}k*(`D9tbX#VkFHU{uZGdN=
zk@EUPBF7B(Gt;7!2SHZn;efkg;sO^U%N1aTodTwdWtfsJ4OC!4h0RCK@RS)Vohicv
z^C0wP)f)yAb-0ZpHd(Qb3vX(ZjdpK6TY^R)*~XDyPW06Gu8TEETkLc+vEfdw2y$+u
z!9)G902z$d+e$l2TKs~6HAJ4Gv93y-G=7Z~cx0%rRe`j`eqc4nY|AoL74@dvid}PU
zjMla}mV_C3?*_tpsjinONdmgahClsM(>T;oVUH|0*AWPrcLYXyh7J~Mm$sNNFTO72
z`nG{JSjhT}RhRn-fCk8M;%uP=1YhLmt@bYp$a{z`h_H27@uqczeBG$!F)$LK_BlO8
zIu4Bnk+w`p!p_!evD$SF^K{~fih1dMXtQJtfoGqG)PcC+8C;|J7BdQH-VfLje+(S+
zCMuYf$}^2+59*1M##~4|1m068S7b=!4I=hfVOc!ClOImOoEq-!_Dm8}oT_A_lbM#N
zv`FY>7v)1Q6A<Le-I`1bl$S!m=%`?(Nug)%3z7ho9gaLIIYJ01j#{(f0c+UZ-fWuz
z!hPX|!VnuxJh^y7-N&;>HWQR963O%7oK-o51x!Cfb&YomFQ>GXyxBWeABN}GI_qcD
zFMxFKffXdX)h$h9b1|IQC=104jAVukCFMbmUI4SvLb)07iQP#L0#N;O<)|xo^$0!z
zrVn&(0Yw_Wz9m=UCows$kTi=#kQjKMYpMiM8SZK$X{ktdn@K^UMk`x;NDLp{Ew4fz
zWAIl0Ll=0ez8cy@#QvdttE0fY#?l%aAkQ$u1s<AZc_}jkikAf8q4G>q<Li7vo)MX0
zeQ#3g$*_Fuu5y^a%~WquJf8ZTg?Q1FbNJRs`bzXV<y|{A7si~tApZMgysuV5*sT^X
zv?Z(`=e~jd@Q81ayE~1YKXT1|^7bw_^QL%!4s~ceq<pg-Rf&qPqL-78rlhespyj2>
z;@xfx`W(v{Q_DTo1jFh$tx-em;^r&nc~M!dx2f<+!m?lMCyGkQE2`>4iP)K}$eQV4
z@C<RZtXo>K*LiW<CL8%&jWC<Mr~+K9t&YHHm<CH<Omc2JF}9aLBu_3WyGzZKygR}C
zwjeq3Rg_;3^kBlRvIC@e$K>E@<Um|rUd><wX*XX0LhNxDceoO98VYk!lq4mVyf;hk
zD~2XUrbu2<d-^#IXxg&Q*=6y@Y_0l3FYhOOU2pYJXo;2wY4)QdI6RZux^OX616onH
zV*o^?!~t~zWTCE9ra8;5Mj<qhW0WEq4-)Gd1y%m1@nj%nO_Sg|8e0mpb!0KyxsSDH
z6ZdN@m_xPu$%{0Z<zM~e-uGK>er6Jdl8n}yKz0SrjFI0n<n|k8HST1nYlgCy5j#8q
zz*q}`#9z@S4Bv}}tIvwZyU(#6_ej0o3_lWJ5U#37Oi5^Zy@Xm9-`?lX-^p8k6T*?w
zl-_oD1D+oi_bz9i??l@!`ME7IGecsZ7KJM{B20G<NoFeS+&fVO{v`@u9{UgIohW_T
zKtY$a!YQ}?IR`BXq&X#4y?&M#FsB~(OxQf`D<`jIQOruL$7v7!jidu85vA-?DLC*i
zTed=4#?#9;P6%W1nIxxm$nv$2gPn3Y$xHd=kRTIvC)(A^xC3Y2KLMH-D~_rcXBKOm
z*pIq7hwKEga?|tKGms}STUA8ky0^-dZ{+O-d#{MscLgzb@>wj{8FlCqqt@(|?W#T~
zEWgF<0^{@HoiJM?U>caImwPViWUK<KEQ6g#3J<oj&I*<#V(ir>lygI9lR$YpOw)g=
zGq4=IZp>kVf4(~?SwEG0Xj<TmW6>H1;G$<r*+9|&G=Qp$sOu2Azm@twrT`@lRXKId
z#w&$FdE$N^d@$vsssuPXDsxqi7X%5^rJwzL@$g*sheQBx(knKniijIS=X5H`BE?Bw
zT4l}+xB=$vZpW4^oNzt*S{?a&pDB2XJ_q9w(uYPzYZ;;fr*oTfpGSjaU$eb|=N}tc
z4@DH*aPzyJ;!Vb#Ae|tYNQ(hBRaFOYUKxtLkZTiEf~OaTXWZ~$Mm(9K?Wo9!+dfzg
zE-ov3)zS;)-6eRKIIj6-Tv0qw$s!X|(eiY&wcVPS0ehHi#JjzGgkkLjrqOhaoFmNE
z@J`59J_-ubrG=||fRAs#;E|y%SEN`?I+rc-^^#kIm>(V#J+x#1DWuO3SOIA5_*$+S
zF^Bfu3-XDvtgYdSUb=DAU}uK7o}TtlY^&k2PT}mG`mOk>E0(OP%`q)4-URZ~_P{xe
zIv{X!!+q*@f6iSy!G_E4<7=LHN+o8bRYcJ-j|#;Q$<ZntT1;K*^<c?SfIWJmv43iz
zLty*#$Q#XtxR}k}WQiEO9B-PQy#<s>K`n>rBDFBl2~{23b-Ly`?fXDK=f3KP50>^f
z+x#gEx)v(kniUh)M!qvchVRz|FHebuy=|qaa9V+c7i<Ap7WtZA4vf{)5qs1fu6$bY
zM_2^ZY9<)0Nwm4yYT_a47|3;%40XNzVt~s8V24V^qZCc!g@+X7rkNGBVu%!oVUf;X
zUC2(m!?{m16)f19bUsn0pvAz16e=;{z-nSchI%JXNUYJ(bqvsdrlm48HqwRl^Hqun
zz*!GI6GT5B#C+^mk5_4SszP}c^i@fhQG1yIf)od%Sv|0W+qUNH-8N_sg#PJ$>=_4G
zB)fsLjN99-=zF2;jF-*kkgxOi`U?9nLeVS0{CihSa8ePdshbWjzNP?^94R_z(QPG+
z`9MM>o0=c8M#p9~kZrOi4;I$z#mKdj#neE#D~S763HlVHoWq_e2<qj*Se|sMV-F|k
zNxne`w}YS&ngHsv`wF;^94O)726jE$Ig)_!^a@aAp=Ab_kg8yaN<+;A<`6RIa~gaS
z6>6?LRruz9wh;8P5wc{F#~CcLq0CosBY4Z3L0;MNf=22(D`xC%3}<53K@$XG#zo;Q
zuTfejbB0cLWx#+R4;n$-698#l5bv{!W&C3jYDQJxha}!s%47cpbi>Pjd{0OAgnSM2
zsW$f)V@ltud`S00lc>Nq(d7l7xp}e$uF>#`KCIUGKBrIa^Blz;-sH_8g_S|-58lS6
z%#mE+6Tr>OHLtGM4`^^!CN85v<5VV)phg7NrNS$wEj?tds;z2D*cg0e*m~Ea#BkKo
zogeBjGPXN~n_jgzm0?=GJcv0?+|cGfd|P`ACe*u~$LCQMTBFOLGJXi?>!50jZ1~Wl
zJu?S`?~^#LH*IO+Do+;mPGSq)R9iikv(J~go!2fN&{Z+U05zDE)xkrjviU6zxP*m0
z<KNRy&{0M?;+THfuPjy2-)~`uKh|b7jrK)~g%PYvtS6sYoG2Efwtn!F+3A~`YhQ{N
z&kc2ZTGq`Jm=R@PE2d7;Su?yD<H8#jSuBf)wcmyWsUAz0s3`>Iwpf#90+0A%j1}Q1
z>FKI^n~mc);_d-xFbGb{A{A*g`SB`_2}~B=@WwMtw6Z(VW2e+GY~d^W_PeGt&A<)~
z)0uadGs6`FDEuwX7@K82XYlss;#CbW7Z67HH;F7%c-S<|4=Z@sWG3NA=z#da#JNy^
zM+<C`WYZvTSwM<gMv>|1@?y*7WohP~B39eW@>dw;f`E6k8wQ-)@srIHR&V<a?A*D9
zksm1pwpn|tq)crh2ZM`vs65KrzWGxEBEbNLgkL{9LJ%p98a9?QiDjviZEl`!s!PVf
zvA=71xlI{B>`Wl55vh&oJXK^(;!BlGp%Q<ggDL~Ql0k*P9hwuYlC_m%Wxy&oV8lEC
z^C}1{Je(=Y9|$n?d`q8adFwa(JUZvK%{*N6>)_yO_E5eZ!T!(xRNpK9@}fk!`S6(s
z(P_R&M{L?OCr5;Vz!7ZjJJ6Oh<IHIkp}gh<l2pcG4t$d>bI?0c+5GlR@sbZIJTi8D
z6Vvsd**THUm~iI!sLCeNo3*;5Ifsz4Xn3@^GTe0(6LnxJV~?fnEb#K`>fL>ZlXh-S
zTM4>s?PeyOjxVz<@4pm@bhc}VAT+hVW-;Dll#%7Loz;p8#y#B04C0)vgxh7~+=%R@
zNMa+ay>k!;I~6<jnfB>gO4_UR#F&sta)~S)e0N-FMJj<Dx)KX?hOpa>XvntHUKBEM
zM!3k=(yYGD#8GL*Ib8B)a^&IZUwk>|9>S`v(Ye)qsYrd%GfR&MwnB(R%I2THQ|}*X
zQ8WFle0~t2)elv7Zj=A`Vlmhxw>z!)JQ04<-M!yRSXL@`_RW;i0!{#!j;)y*cOs}b
zOulW~%@`-~rSt_<7m<|4zw+oj8$spQN|t@6vXz&$s5RSld9=|^%OYktkD+P+QHjib
zvJeqJr%>pf1|{A@noT7p0Y=r=f(irV_45_r&hDt@svf^k9lsD?{UF4jKGs*X<*m!3
ztSrM*Js!!e;&q8}D&<NJ+FX^1g)-kUf)z6BTVX?tR>4V%vvPG_JQpO)S%8FOu0mrZ
zFk`jZ6fm|1=}I8`x${aSlIiqteRDTOvSD=)Prd@n*NnVEf;{w<5(@q*K(7)!ABLdr
zsu)zAuha9g<-SoXj+569U05jBiiMQgf}UgKBJxVSwTmyK_>!k(9<h&Lve^sQ0hJS<
zl}O!#z0EoNjdT0j-MGG<3L*p-F$eq&vu0*Vo@PH<NgSZYBx1%eIRj~kC(NzPCQV0}
zRYjA<2IC1Z&p%mf&;GF3<ZQS;4sl$ZAB5?B!8NETQSaVhM(JObU>GW5YGgVRNM`j;
z_eBe&Tkr(8Sin({&Eu&39zB06I+3Mx5ykOf!#9Xz*x-mEn!I#x69$L$<*LES@{$CE
z#Q`h?r-9V$!2}=qAus1HXnyONd<7R9g}U*>!a1@}cl^LmE;wI$JhzCJoo6x*3)2=N
z0Q=_I_^`{7Y5p$fo8^=3EsDeBH-qyPHC%_uDbZ?`qx40Em8Lzy<W>FBTtI(#GETx;
zhM=9?)c^Pb=Xv9YK@(PUFyoh4{IzR!+9qwcF=?y&rh0Y3Qc#g?)x+3EXN+K0v-s=Y
zuHF1qGxCBs5=M>`)v;KbZ}2p2OC!2=HF@R<Y5_=jh0>a0lT|$94jYZKtaARZlHS#R
z(W-zEyRXDQ@pccT$+D6e1U=d02-&x93NL>J+f8)Zm#zO=U50{}xBh7;LO;y4>&iNa
zZxCP~*nS(27pJEV{RF7O`ciBkRp=8${Q5o<6Mx0PMls&-E7nAO=RsSCmHtPvnlLYk
zP>8VC@JvMCQY80fGK|ZIaq2z|)W|X$!NY{OZiFr(FjcujjvGwi9z8dqQi!s?(X<1z
z_2@+>=9-sAll#<BVz2M`>2OT8X-nlpS8rG}OZd&V&@UIoHP_9sS(sHh!?}FH&hk}#
zOo~K&g1tC9;=n9zP{HP3$~UBa-Xyw)x5zo?YP+C=7gog@Bq+W5RSwbFZUMl(mF6cn
zWzg)WW|q!DahMe|LpA*Q`em}Qty8Yzc>dJ!bY;6EUClfxYG;F6Vz3pCwHqo>BiJQD
zf?d{$L9P`CZyC7R(?e!JY>;@K3ahwVAFsLJHb5=y#eIWnO+*qm3ilw2z5ew@yo^t-
z8AmK**2*1uhxn_~u3Y^Hz#EJjh&sXECxFRP)uCe%|3!puov4X0i7#wqs<-&CUc}9$
z*R{lX7&~_7T5OzkFr-1FTPPQP5MUtX)mQgVp9C9L>Y9QM7Gva;sI+{WoQ*hJy7?#N
z?|JeT)aC?7$fUF9KV(n{iu40TvAYFddEC`hy`8?DO3DV_866;(r+I`BBZ`$@u=X?x
zWyzXfA)8~4i{X1(xY|QM_YlV)@$SMkPLChHoT;Ear146-`B?iRM2`L7a7cAJEkDA=
zM82vtMHVHqzyqE73<#!6?WyP2i@h8A7(LORKRfJXm0m0QMTp#k(N=7lFMkuDZ?0kW
zZH~@HH@bvyYNd}jO8-7fU0eaMVcoO1I~_mtb>!QH=5RHy==4Bc4F&6G7*x)y%}<CN
zs1MLC7*q0s*!5kX?A$YlEuaoMSa~qHoT80^#1#h4oZT8jRxAopYTA~w(Ql&#M(!sT
zltg=EK!)msy20-4q$1Ms^5o6WExjxyE6UT1OZ*7H9MGQG<_(8TYA9*at{Sy5sIRVa
z37jn|$!xfjuzlCkTH4^Y&ES{J$hV)9?(O!aUOKZ{OpZAhTr)d{F^5YbA9%_Z<!q7n
zMysC{Q>OmiP?((sSy1_anBzg}syz%&xAlVvI#~Z;dViipQLbpY@K#G8hx|9#z5(!q
z1wMxhu7a-^<HhHT<fM(Je)#tv0l_7dd5~MxJ87ncgnA<Aa^Qq1A$r$A1fnPDmA*?R
z;~S!P5<a7bo^gU|!Qd>BTn9{3HAu2u`8s=38pgxb3uHWJzOy^rQ9E3FrVwy<xaV4Y
z^_o!Ck+(df^cOc6FCV`VID7^W#_^fWc~obAb9B#BTS&Okn_-v6%`{7?Bg9U;T38e(
z;4G@1JWzSC44LpUwPpOW9}&(n&WY10hS-p!<@%_+V6{QN3*>^x_UIA=1w_ToXf=7N
zYYiyn@b+p9^LYYXNYD|dF}<@RFICPP;e26tXAh{%ewwxsu`O|Oo<(nkxVu~~O3uS>
zjIZ%k!SU`obOT<-uLHspp@>)fb{ytnmK7`N>tp6%Mz=r3%B;K{t{t`$#8%~2UXw7(
zF3KF+xh(7UfsgQG@w<1ABgg=(IP!T=7rLor<(P-$@+wMiNZ9fC5SwlyB2I=f^))Yo
zQp>&AB8~eot~}HDl5UJiWIJR&Xq^v8915!5++nJKWubfQb+5M=UDt64Hw+D(QlC}z
zOH>hn?>#dhGL0d@kk}F5jn69Fjapb1ri-rVc0Og8SMW%~&?IBt-omB~CTuFuKRR7%
zTg+)l=G1>vLJFXMabZDYF>^?f2=@_S6#Rk`a)l5G!C&c!7(kzu*?5_<Q?J<RM5qT}
zrftcDX?gR0JB^qL|GbM@owXsNXnxoRqLco@qM(c&)e+7&oA-j(X`R4brRb=@B!^eR
znK5K@sB=!j(tsE2i0W~wocm)Ln%`TdxVEU^;NV1EyO+>f=|NP0UoME)9O1`$f!n+9
zTXWFo-U{qzJON5-o6o_Nkv@F6r%li<bIn0IJ>2jW<Ku>yi+x${vMC|6`W`I6Q_iD6
zLw$Leu^aU(G866fp7=Tc=YIFGD;p~^3J&=?Tl+%J9a}!Lvj(*=!@9cASDg;fRl1Zx
z+lS?Q%-?F!Hp#;*{RQO~0dq_~zAs|q>C3PJX)O<}vN?#1nI#w!z6v?`1V^rbO>b>y
z(^N^pi5D;J@u*?0cHt@q0|gYP3YF+kN;DJsvYHmRO5ToizO^QqbhykAI9mSzwG=mO
z7ZL0{s6KjLaEK|zwO*O@1Q^ZIjEm8A=8BNCm0<?6X%E^b4dSf1vCFry8mu(1yWl@x
zX|3=@L`0<0P(!3>TiQEQNm>coAN-^eUNtTq*Fk*W8+^4}LwJFhusXjru2*S+b2y_u
zh_Rf+`GSY>2GC9zn7E>Ar#)dOhWL$|O?p7RI@mBd^KN{nItH<ZGxu%6=Hu`kU(#gL
zXQo%NF<o--9fa)n-FPw*8`k(L(*bXn4{90o*o(j{>I@efXh<p|Gwt0`b~S~3$iQI;
z#Il4Oy{t@dc~AWj*VtNU)x&gwWXE3A+&=n$togivfW}dHc7YD4RI(pN$9uPn%6EX`
z%!p$!g5(ye{c;TMb4|<^`j;*hc3bQ)Ejg$l>i677u>xN<_Sj1{v#&Y|PD}E(bDH?7
zLb&KqcT0+57KbK3_pwLqAON(|3-~ik2nJNXIf4&U4cL8w-Xd&og2|ojw_`<F6dZra
zLIMN5LAYK8P*2aJFUw&|&@Q?tt5&S+BgYduA1>m$(0tYb^^I;q%PWsU=x+GR!z~C<
z^IgWgP_i;XD^!9|pl8#6cVS~4YcE<{+7L9gi`mehZK0on9}<_Z94!w)yX-5mHIb@L
zT3dcwM-GV+9G!pt#ln)4-ku2Z_6{C)&#*S&g?^BPfdmDaXmAS_vEhpIdc5J)Wen&U
zgl~Xpkm0t{87yUoXT-a^d2HLgN&9dG&zkky3)mH#D29lOn?TB`GUMZ|>^Z&h&RTZc
zS5ca&b}6if3$^D8_+eLpSxzC2;Ef-~J8;RDNn&wCF8KX|%$h4TUo}|7iB$O0Fxi#)
z?D~x-2FrPG9-J4w6hLFmh6|%IN0t2o>q^2d-4IK}et$BIO;P^@Si7gLbV~}JE-@om
zK(ha`h}r`e?AWuf;`Qk`&5}Sy>DukcrHZmP;se@(l!Z08xBBP&GV3PnlWvWo>|O5g
z(JJvJxRA5mUT}}hoVYPay-F+28D^&BjcG-~0zJBY4KY{iD=Kl*7(q3Z=^$~o5dKXg
zQ--2UHBm#O&&Ylg$rEoOfLfy^Rqi}+*!n1VF7LW)o%UMoHjW(vHaINZ&+ALIwax44
z*43-ZMIifGjbg7}x7s!mx)DaZRt+*)#cWo=$wRP@K96>a+KvBmfya47;wzPlX%=80
zOxz!)x0G~&5q#)q6fch1HM#NCzav6i5rsicxa>ew6k1k>hQx#4A_>vmmCky#nI&uD
z4sG*GO$k_lki1WqP}?a@UZ7lMZC%U~UYubv!$m}fZ~p}b#!y*~dO6&ud6)hkQoKwe
zmvh32fFZZx_8OIBicoo*>`F7l*ISdHFqb2pG4CGEv>jEfUXEg5FNjpl>pm}M@fE-K
zVI#wmTLdzS<C9wH^Wa0=e@Nwy92G$a$p6h1n6s|NPtGxoN2^;?%dVIK_oZl3{iJ1v
z10~qk)7tK0!v-`dt2Rtn9>Bu&=MppHQ^v14d$0)NFJCIpE$|gOS(CV5mnc{kPlPCY
z3J*CWBzO^4Q6w^K*Va!s!qMz}(?Fu<TvuyrOQ@)VF{o>Zbm3gb7^!+lU!u#<YNJ=P
zR{Ipv;lVv&XFm6CpDEySu>-$PMbOE^gBusHb#PE@7{4r&45RSEhVWJ;9O3Ab?^iv1
zYlcvu(XMQh`<=(jeYv2ijpo_8B?jK-9ie;3Vl~$ey_6%WlJWkTF~*1VdKS9OMu_Z4
z-C4-Xa|^??j9Q&!WnP7^Y}Gw8ssIko+#Um72BVL?_P5gYue4W{>1<cF;ibR}!eQ1a
z`NZ5t)y;983NG(Hn7ply*;;!A2Cu0_@Kuh8{jAGjZ>ee`Nk>+F49&xmJR&IFwIGIg
z#hYcCIIM_|4Nk&_aEvEcV-s3~c%}9>w5fYr5dB`rgd$^gKQ7&yuf5}4rKKSgVNP0p
z{9ttZ9*T5MyHo4{Dd-(>ZJ>U&c!Wbwt98ejW|S;OqcrU)nO%vh69B1W!WrwbIs|uN
zK?F_GDb*QJc6pJJi;bJ)*HnGSLm)~sX%+)RT6Y&bm^2vE)h5Wv)v^weC3(qVRxCtU
zsLC{O6ynP9<uQ!<{ha6Pha+f4piEVjqnDXJ^5Q}}ApB5=sqKj0k99mx9d6awdgV@j
zyLXA>d|0N5O0Zw)pI(gEpYM20i-Gox*n#$pB*+9ly!E=c!g}<kPCG#p@K2y!b>spg
z_S|$!xr|R}WG?`~BOPV#cub`@Y&LbrSfQFIA;iqLklj8|^KcbC&XU2Im&z0*quX;H
z@uz2IB>T3jH%`z`tzDkZDU4Ii;(h4DzcJ*0XndFB4<z<m%1E@BRw-Q^s!`eHok@m-
zKSZV0v(4?Z)gfCsr<rTO*$$e+jB&-}oc_jw52hdo#NZ34EM4GU*kh4~sw%6o>V)`u
zx%?RjdDud{mB60jYJ`|gN7}3|lWH;pUl{vL)gQhndb@T<u&2}M0w1l@0k$9V6PeYO
zx{OdNJc6YAypPeAhEBHy-eF@XG`|6Xcb-Cy1C@cVC>LRL?2GB5!Yv9uQDL^vqw~}R
zLt$HJgpV#({;vE<4%$s3mmfUG=o%-^`doc^@8=zY*T-4Ge6j^^t!wh6$cy%?#@mE_
z2Qtx>DWiOR5#4<b)njhq3;5#cn)WJj3r{Op$E@c1pNkG1iP0pwC0-RT1wmwmlFC+2
zYxKEp)-~N($^%_+Ig}!mb!XtUe4oKLQvrNg=3<VPwc<1z%a__Q#(HU<MQpf`>9fCh
zw(niAA3fxtU4;(T^YBSUbDgK(gA_rO4D`zRB{sY{{s9nsNVaTO>5#SxfkS$Hy|Sj6
z0jeWiQ^gFtN9uS<mlwc=)0dcdrag%c7Ff{Y$YUW9*(b}5H*>N@ypUgxFu+&Jk|H^b
zIhR%PVcW%X?9JK*e)y*iJdad@!D-vpmLZ2wCT$~hI>46kc-92%1zvwg{ELl5SL(8F
zhM|;w@0wbP%LvZY_p^0gMw-)C;N=}rn67IwwiKin<8Zut|9MDaU4TR5FgHtYVYNH3
z#45eA^kXKoI2}SeHUvprStQvoKErFY-QWquojL!d1dL#n^32N!t&f>TZG%na;58@K
z`-e*nb1y!ul~~!(e@l_Csj924v53>*7d4iSsHG+4KfuZnN)cAp9kV$+Z}{X|c|#U?
z*?n-#oO**d;1GN8{+kM)4{#h~c~lS^t=rk7a5uy1T&2{tpC3QHr@uyzlh>~oBaJY3
z2>HSH!uiI_jp_7=%whO=opPMCNGIJJ4YbBYd4j9)+gk6h>%LU!8Ifz)8Lnh8tm2P%
zEC8%McR3)PSC;v=wRr@g_3GLno*0x;T6p9eOQv80vJ1W;ppg8Y)3kH>9v-c=^PX)B
z-E7o@gn~W(_GT+jb1d70MDA=StTg#zy$I!kQ#h1O+A!<Q(cz&1s+KIee3bVXr<@m>
zAU4OXq3@N3yM+Q&S5hf66`^5D<NCs>0C#7?tBKH%hZx$0*||=F4uApew$0JWV=JN;
z#(D9D8gqaGY(%)2n+B%0os$zs|5}UzRJ&+>a0Qf$Z%6Lv`T){I^2`;ls>!{MKM1%d
zI$a@wwcQkG(FV6>E;~To3c7&S7daLC3Za0JG_o;V=wjGBplj+0a0fdElh%}S;2=g@
zT|WOF3OzAD5pYe0k2W-Suvza5{74>+l4TVUz`0<Th~!BXvxW@|WAut4DJ_)6C+Zd!
zBG0eTT$M$mtC}54{vorTuYC^7R272KL_x-urJE?(z)bOF)U1a30(#vY<*d<LfqL=F
z4K9JVA(>BrmfGUJ7a2)Xul82e@~9nrIYA4gh>DV9#ro5S)i@$a-iZ*jgz6NcLmVdC
zNA>^$mMH1T!}s;?$IWo{wRoNfZN@JOYN4IwSz6mgwd{BEI!uiUYOQULuGK#d21=-t
zREKg|4IrR=6q;ey3EuFj;XLv%-}YzyQpixUctC44G1P}kFkj}sVu%z|A4IiB@Xx!U
z{`wsfw8pW0FH6_a@b<bW8}o{Zc|lvu+K4(4JP3G#pDYVOu%8NQ669XsuJ60%l1^{i
zut2}Q!!qT~rE_rSU*~RmNYTHCyx7ep8?C4O299VAOme-1jUtKcN)?McdNXUYX?XTN
zS652-1L<ONrrt6quU$9bYIkseWJS<8_;tH&aOr|rG55H_<&^x3&@C(!uR`jTgWFp<
zAf<^7>ml?PS_SHbrKW>Hl0#P#g=h55ZR*EN-4Iz4&JGS{d5e2Y^9xK;PQDD>$O&X7
zU?@yit-MjgLc?s6bOT7h<O<P|h!jRfny?Z{(pq0LvQVxJ=it>zHPoC1r&L6Rtv+#;
zabXJ!A3fApXA5H1V{H8ai<Kx>o_dk8yeIq0E52tkQ=W^?@L#1Xr&UqAH+=BTTUH}(
z8wfd{0H)_H#U0BU9~DR)o<;P+1_;HF`uQv+)-duOf)C=o_M^krRYOwX9)ULbNDJE1
z+{pFzlhxLMJ_V~0l%K)PluC(JzHqUuqse6kZE0IfHknE=a<^NLtjGr8(!#XLP;OR|
zZ|<H%3kG^k^@wDIv>m*?#1+Z{__EMaXsZ3y)HvP3863_2hn%X~Dv=21@}A+8Hz>8O
zI{*S|3r)NR1erlA&)Mzf&kR!V^Yw*#Qs7>ne1h?BgLaM>=!)1Mve&Sk@fqS)9nJ@z
zH&tViEG)dF<o!?+(tW)Das-F(pPoYSo4AQCMnOAC7C<{mvZu6bCrWH68FjMi#d2w>
zvlsZE2?Y#!+{$H{(ZPi<nBj1cMSk;qlt}vc1PD*`dyFf2d_OTod8e>IbhAl$rqNSB
z&PKdi>{#HW&xljG$?M^)FmE~+$8Ytu!ZIg&bNn;W18n(a?^jI=&=E=&n}TjH)8<o~
z$5+Emn*ED3DnY9yL#dq;FG(rLrPuXj*i&MGT$Egc<ahh*6$_eTbhBlerMy?FCeQRC
z0dm$f&7`C`GSL+e3B{n}p>6{AU^<V~VF!!OkA5IPE3i+16J@R8ihF?pzV`y#2VUO?
z$}qh3-D4j$uTd>5fHwyEVk&((b;jZ36mSk`+4GE()7qP}4(fe7T^$LdD~^eM`c)zh
znw0cGueXP<ENJfuP~o_Iiv%ijylFto-Abz5geA_nSzT4|LB-_&OVt3yaL+4!DE#3Y
zoz+MEOJvYlo3bZBZ}SZrt86YrK!U<!w?#VSqozkHWen52)8frG&OJ3w_6dZ>D(;1Z
z_lE_Mv-Lx2^co`f{vokxk{YkyhLisdXsF43$?+3_@z+sEG&P40;p`mfGp8=&(@=Wg
zPz_97C?RMYdT|aDRgKhAX6wH_06{N5T0lz(Yk7G_MdCwwYgE{2b1mvG=G<297aTLb
zJOOAxF7s?7Iy)7(;dla&SdXf`?AKtfH0Y4SK?m=njU^@w#A^ycGAMcl7S+L+N=8Lz
zm_c?<<o0(aIR$k*4o-XR@DV)%N}d21VwctpPXM8vC%`e3*L6Nu%4u`^efx~?qi6RM
zfZ_?TAze=czk9o)o6~`O#0Q*Fr34*n&HB9ymPgfVuQ>N-Uc)n>4=mq*tP#=oGTv`K
zCk%T6092m<K^b=(+MsWMIQO~+Z@yRlGah;e&q2qJ44wc{UZ)g`7$1N0=I^voauC?`
z3cLv{4ZME{dz5|x^d!7hq5R!)f!|{@@C1l{B-$bbx$W{#w^@z;lfQHS1BxopE|W4)
zw+kuPDSuJyZ)48%KZS1aD@m;X7b1f8n?H$uqfzdbS08Ap{tQVE>E@2;KH&)v!M;-{
z`&%9={Zn}UL9(BDIiLO=`7gQkx4?fR`~MtAI`cC=1pA&0k?|`1c!SqPUUBJ6s5#Vr
zuAqH~8<AkU`ylH4mqyI~Bf|ek41M+lcy3-S6zR9qWsRp|n`%*bQk3NHsulf*0K3ca
z&1-5$M%AN65xjMziL9Mg1N$gUoB~vP%yz2(71ltCTY=wG;x!F$qHuNWyc2Ey(@wM;
zVd=A(FL*iqb&R%dXhC|?l5G>MHsq&W(`OG^8>kH=F$RQV89Xwgl8<av4}Jv?rHfux
z9VW9CuH`_uX@GUhJ@<;z^1lt?&7UEp(vZrZ-{*GI8syvKr7kJw_#kR)fADOs<zylN
zSi?5)eOIzt^KZT6edjJHSN@^IDLj9qY10qwZJ#W(ByXzYc8Z2tP%NJRs4~G=t!r!e
zhTp)~<&Ax_hGSPW!r<U%ZlRF#Gb^%}i(YRteG{(OVc^C3$N4-gj0zny4OLIzsfTP+
zXQ^mGg<V226oNs~bGgUo{!-unZ`hOH{7=ER{xN95%H0bq&zU#9Ruc=79B?_G6S>ym
zfrJnHIfZ0pQ<;$6YuIYwLNtaskd8!d{-smPPRqz`^D>CEpI2@@_PCb6%Tq`0FY+`;
z^WRAZqW`_p@qdQ^{?Ac>_E1lAt@T2-vVt<#&H;@!*2;41gWL*on$S&{<%(Uhn{rh4
zuM<P21%ee*LkDtjBK%B=!1cvnQ-j{msX<^S$tIq>`b)7)8bXI+T-@5CvKYM>J~Wiu
z#`#uCHm|BGmRoSlplR1~U+AQz){R*MPofprJSUHsP;z7Z<m2!(2k2<>dtZW?A^tYd
z$?K6ksDNr`gAT@|COgAMuk{HB{^36+ZgkLb<c&Y7lJ0W<Vv;}!rc$YfUHUBJ;G8e8
z;hoh)^*EQse9Y!jpW5{bmdxb^zm-p}$(|d(r}gjZ-~v6At2czkT@?vsTfDl-c;lUD
z;5DO~GwleJo-*o7a)X`|>Yh<rxqPJNN-_C;x(stLz!xwWHT204k9lEj<6qIxcUOZP
zoNp@yE``>0lpH=!HiZk?V{o+k0v~O0nER6xG&=EW<5Hvv#3s?w6TrCUYim&e9B8XK
zJWrU=CHXdJica0FA3qAIy4DO;n#p_MkRN&i#DZ!(CU;n`zv>8qPRvhvF2=jB5Lj+B
zGo&+<7Hco^;};Mxix3uLljr<s?Px_8cx8NgxvZKgAkdQ9Q}<Px|IlO5w>kY$a4BkZ
zYrKST2|$(=w2V!;2NfJ%{=ZMU@{=nn{`4!c@)k21Z);FBZhU4+Z^UWZHU)vCp%>46
zHN8+e`SJ&>iw%Wc(OTdoeHhgZ=erm=7aVp~lDjp+@a~0Sx2Jl=9tVZWru^RXKM%Cq
z;qUk!#asoCl+{PQgBPGP(Elhq8veQLC?LfDnxHU3dvqkuBKbu>1CW?JF;RRlk|QLF
ze-deaIrd<)NZ*x&F0Z8f*TDh%3t7;K?yyhIcW_}3VCI`fW^>WeNt<ihZ#3K1m656?
zz>^xVlHC>R6+?!C<Ua2z$*Vf)#B(>PSLbf$5)EaK<FR+R;TFCVON)e=tUML-s-(Og
zdjhmcr6b*Z+I<3CBd4ruMPx>ssK(_;mQXT~-hCN*V=)&}=;q5d;VXWm55C6P-W5^V
zD(O%C%r+<7A<+<5s>wkvpzm!>ZGAjeQq0(Ss+JEn8UFLdGedX=&Mudt`9YKR=}}r|
z+@ep^J>>xU&+qz#5g${P!>%RX%HG1wc&)tj3VD0hH6L-|HhoRBF!1aG!Fa1*uHK^E
za<tvrK<oppxdxjF*PM*KI50dXmkoYys?32reZKEfg1o+x?k!imbO*9i8bZq(=!Jmc
z@hng}#l8a7b4`Jdkd)V;;vR+`^Z+8{j&%P__9SN*?}(v5rKaU^OLa1P66@fktKYhO
zv76`%@EdlQL&Nh}xDkgz{q9_g%gX|0X|{!dSW^-g08U65o-J4}#Lnyr>`wD4`KZ7S
z14spYU>;e$K;H8jdDH#`07ttw-43(8t+Ltha^Sj}Vu<AW+Ua5EsQ2OBe7CoacZJ@i
zcPc(Tl5h?0tul5=iDl=yhL)sNF&pL?gRyp?sXu$-ds4`g25Zi%;)X|5ZL~+<OQP=~
z5xXa=r+k1q5IBA=&=u-(g;b#OD0am{DVFT+VXR(-c}5XGHYdQ&3EIb_@!BAi$As8`
zuYiB=^6{*FP07sq!l>4Nsq5A07#1o#y<IquVN{}Vz0I%gCpwPYNjb+V1=aeXE!DVO
z>kIrvnm0rXL)sVnB}^n9Z;n_lAs8@&2#Pr(DvQU13E8hM_$;I(JziNj7x6}VlxvEU
zX{u{lJ}*04@h8)LaGP8$ba63<zPeNCK7@V_V^c-XzRlSM`c{H}3eGU$>V?~K&*PRu
z&58JJQ%^F{b$rug#T>KmyaAr`p`~pGs!7fpw`mLYl;2Gh_$>ww-L)PXHKzh~@o!a3
zes$JwB*Q;BB2q274?8kW!T{BjztSS(H+S>82TtyLfykfpee?IP;r(~!n~nZw1K<2%
zpiYOG1;h8zC?6^QjIPu_nUc)czBkz%@h|||FZ(;p-);F$wWjY>W8Enn{twYC`5w(d
zZW@-~aQ^D4KWH`eomMhIpvRN{B84)(8+h@5mw~$ipN<Qce{~`y$T<|>{-18HJRIt^
z{f{g|CQ^uEtO+4YM7BD@;A9MD$j%63-wxSH$b=fgYn?3fa_k9Xr|gkoNU~&C_Uv2q
zjyjz-?|IK3?{)ot*Zap@b6xYy_j&H;UOvzFd7k_JY}59;llJA`2ZgC&anJR=me&rH
z<CT#s|J-EOb_IojTcp{h{KT)td%L!~Af0f(iU^(VyQV)8Bl4D?{a#E1)5IWOrSdX3
zG?+FQHR?@!(53P+^PM_RcuyGbvNI{l<$Qu0K3O2XIH*Up_r$H!)C1F3z)M;}X~C(0
zGqp|)iH^Zp*O7U=cto6szx{#`igytdIC@09Mnz%q>ZDa=#<(!`#N~pNr>!3W5Y5^e
z8z+W$(8z&;)P5P(a?QFFw&mKfb=y8X$+dI;`>y@}KKA~*0%np@$J@7>?bPIQY0~hk
zi#*PFJ!*BsLO{_({2DpC1P1+5;cT}UsXF@e((~iO^cDM5I3<6nGP!=uw1HQG>1<>+
zylwb?Rp2%_tGA~lp=7sWsg#=7N2DrS>3j8@H+C2+eiPu9?9zWShQQA8Z4Cc}c8CMC
zv%X}3MHPoZB3;<0)}kxm=g(Zplwj1^)JhhSC-7z3ZC%`R{0Pv(N3xUX)VNs^6|uJl
zimjJq<j4n8W%5V1AQw4gU+nkx_zqWjZqFqD*6c<Mm%I9?<B>)Vs(=hct{Fz*N^fwm
zW_&`KPTSc5BCwdqcf84y1#Ne4TX$*5sDgpbZ~S#zD*(JF5U1(jtLY@d18H;{R&zcg
z9ml`i0NG;|r8>2TYj!a<{dPu4=kB_*A}wQdds0};ay9QTti3ou@lW%zR1geu^1a~k
zOY7D>Z^1ZL`EqvuSS6DXuD*prb1{N1LVBVkZqR?7PED|X8v;^OXpkhGDg6cNJCR^N
z*KjnzO|-N(R6o6X*Lpz23`L|a5W?r>>HE)v=&ov|g-OdyXWE3+q0`DkkZ~(BJp{au
zZwbv4%D7*hsT9Ka$^yPynZAc?9rP^yvCmN4_}=pa+B;y+O|j$UcXrcz@1$^|r*00r
zE_q4Qb{PB4Vg4;ht|G={EkNLmJ51TeCFGA&S+*J}X)dp>YO%+cT#l$8fwl_tesisw
zBa6PCIq&2b+OzLxMyivtxL-67K*;0RoY$QUq>-o8xtw3VI5JnAnzH8~n>1yZq&lE6
zR&k-<LGf)L{q6?;?y;;7J5N6X{F~$#Eu!S-M~_qmo};+moIp8$vN0<9LTA#mNox*u
z`EC<jVfJC8Fw8NpFdi*-{0+nyl+1>#EB}488hy*+!A*+H5WUeD!q#bdVrEW@PuL+t
z3Z23j3Ek8L1;!#qAJ83B6~Q@yx~~!9Fld+lK&WCY_+37{%2sh+3cET7+}maRIzDD8
z&Cp!DY#kXg`0{=!x2+V>=%R*T^pTXoP#VWTZ|pLoPnvC6ZANVa>LWm*C}89<i30EQ
zlm&`MTA11bGb6^xQ!*k>sIEy$R=2x3v9+=Yp;@Trf}vOZyU$l)_I}nV9L-<MlWI&l
z*)lI7Ef@NVOZ_BXsL!%7StPzFv8LFgWLA0`@5=GT2(XPa(Czi${XI0KFg;;h7v$Sy
z1>_i(R{TAy=DDi#gK{&=-OMPOwVm<!N}Y}HL0H|-T{G_9yHj?zYI5V~<;QDGCuXGx
z&gM7txO5kXq*7kj11rWO$$V4_3S_^C@{OAEV4AH_UY_Uw&1C34HNA5A2v9x{QQ=Qz
zX<ru{Y+R{$mNU5u*UY7xI$A7kmgOok*6FA45GyF6KvbWd963bN_*2V<OecWtpltHO
zt9!~qdXJ$JDtF@GL$^lV(DH-*ie8Qm_8dPy$i(QKDXzqu6V0Uj=GH#gIpDaUVN&aY
zL!uXXEPOabTWw>fdvRdGHpioH>`#q;6&Gq4VnOxFs9?VMIIwpuyEDVKIEQs&J4hA}
za*2}I>gm!LkS{pQ=XvA1GQ~($0YXs>>TcNmqDd@HxO84mx$)YV%k5jku^_Rn(+0pL
zVLzqAluIh`6C1d)s*#dIQ$LF;!3gR&wmm+~fZ+k4cs@$rBDnoTl-oX|k*TtS5Uh5b
zX!%}YHY(2#copkyGK0wVu{}&i^_7;>i<p<Xo5&z7KAde+81{6mL!AVjxBL1W^y)d-
zg3Do+h#qM;MtHD`cK~v_L^}g}NB7~pCvi=c>M86>X6t|mVwcmvSx*=2A+`XW&bvf4
zV(A*>EPf|@jX>JP%-VLGUEo~c`_}U_caIe;ZEXZ+eOP6(%81qt@oeokq4Li<a*x|<
zdc*$yVG4sOKd6`*Dm=u3I>c^Ewv|P_uW<C$OVY<TWUV4^SXyD8@#~&Tt&5{PKdgdj
z=K;dQJ;el7SqQ9i8p9VhQ!T#2!&eZ5<_@$>c}3}T0GH@1RmAD*BqiP`vp;**K`z=R
z!@@2~|4I(F7@3n>euyR*4NR4MPFIU4m{*~AqzOQ9C@xC(VYy11%D|iE)H`@>;VeJu
zLl8!=iSfosg2+_B&nOUgLz{7gtDaQmZ@ORy6_W929&WVfE!2EON6#HIrQ9;mZWnz>
zu<GC{Ah;-Qmg)pSM(aRvw|$>pOlxs-L6~3zXHW6Ths2uK^k4Ap8%WnrCmG{#g(LXW
zTV<w(3hW(7(GUI(AO+oemT{k253|%C5B}dYC6G9<(nDZfWPt*`Iz`Dz!v~Ax?LW9b
z=uA@RcSx5=bBiR#8XZ*EM?k^-{n~NTISu9K0{Bdi{)i!G*<||8iq+}6Z{9ci{$sIv
z+~uW9uWv<@JFj)Vur>eCJDlF@wTdL66GrL7OX1MhuOYB%w$Vwjyg->$fw03{>t84f
zhSNm7$m_-o+<mks^~!y$+n+~i%6i=!tjs8ugZVyVuO}tF5DLy~k8oJ^#I8`ibr{8z
zjFLE9?X`)z^L9GSZ!7onktd-?1nfhNs)TZsI4UJb)i6q46f3i$HPr<S{kfI^D=reZ
zEK4iFcUMnI4K1G<ccO8?sBdZKA5C5P;imF%EAui)1r!r13WVa6*dRry?qsSsNT&Yt
z5A_joGCRl_V@k$tipR_|dv;UPW~Qy(!)-FF6h6^hBBIo7!qGs0^=-cyzJ$6baC;H(
zm8zHiot`1j*fTi2C-U7HVQtfr;c6E=o#q~gS|zh2{no9EY0R*5V-0v1B_pp0dU*$R
zv$964diTNfSu;4zVAKRAu{S-+RNUBoCqS<v^30pP6w47V)(G{gGp?f_CavGkY{3&O
zBJ(p6QKrv>618vsI%&2J4pzk3z@QJ~4HjqgTZ@ExUO(Zy()Ii)Tq+RvFiPR!ZZ4{n
z#&@gU^hspjV9T3;B@TO+Ev#)14%dbYovU@UtSg*T;x>Hh5hq@v^GmxkynBK-t^YN6
zV#o!KjAZV6N=KHcpMpFeI+P_8z$v|eerJ>M5O)`b(AatPp~EiR@qw|zjFgZolX)z^
zk0qG?F(XX@`E9I?oki;<Q5oQ|Djrsz#kHjcgZZ3Uc;HRB<Em5-0RZwq{;5-V1YRH#
z6w}cM>)eFAIFpyjL+4{TL0+Tz!u^82YE7@T<9tC6K4Rl0VIs!B9o9a5?#89Zcaaqp
z#6<*(#SuR+t9APf(1Yef`FOU5%_%SO7+RHx6;TS+2nULffMm~ttcphfDgeOyD7ys2
zV#vu7Ia{m4W5g7tc0#oE(S5Dqowoq(fflwN<CLdLjetn%Rv>S3@T{E$T1K+sXiZu>
zGFED}wai32_ZK7$_q?Jswg1&s(5Urt6Ur!eh+S3$%)NbdIhM{Nn)ZUdH=-6erF@jK
zopG|K?3K8C;LYgqi9NQo*omQg_hX>|k7hU6o9zHQ#L7MWup4JF$W~AdkPXuneHl3t
znFm6$X?M5d?r!Yw#zy4!hAz*ajciaM9uM($&5zM6#tQ^^=r*U_G{J22OM_!^Xn~bE
zmxGWgE3G^8zyNx-(UDkh&aI+FOEpAR@6?>u#~!blz@&8XF6@e2v?>7PruRF)?QF9x
z3Ys{=1Exys(N=bcU*bnsbt){XvC>A2t|^2jk+$zM*_b%6eHHB`D)Z8C*u3<{blHr6
z>*?L!{O#N&N_xw}pys6*@*<ll;FP>5*}3~!Oo&d%qncsgP(DE}y!Ed@qjiFnoU<P=
z-(frsEAnh(glT6xLD5pB*U9kqAgs+iIazd%QaIbkkNFZ}vOWoMk?bBa%Xhj|RQ__e
z#shYbm)Q7GH|I*jj-nxT;Y7e~`2{?R=NO*zw(L<NqRQdw%jERDw1@V*7k&J^t!45r
z^J@v-3K6!q<Q{rK+3Pf|@D8HVX5v-Pxu!5Wp5h{_n)wWQGK`*OjHQhH*btILDS?O~
zm37(j?mFn${T|{c*;VUCYitkX6qI@szOcVJhY&tn9`{Zt_i_erP%!yQ*;tgfJ3;fj
z0otf?@shE0_DTYCvrV4877arBJ)VtyNZ<pc^Rl+es}oQMrp9>c8@wPGWWuv8CW0K@
zpN{4C@f)to4<M|FE$T+Xr&7|-v)AfE#Iq&Zc2WhI@E69yP`4f8m=em4(24<)_hYMq
zltr%QDXE=zaH@YRa;+Ge04HFkFZ!FwobxA}m2MW>?%jmsXJ2PP2omc>8B-NXE6@fq
z!1cllvzwx2Z5v62Ps+W<L#0zPj2Kp<uYjwr%lLPnd7{Fn5B3o1$kUGuMolDJOX8K;
zQZ05`;@9^a`kSpPBa^FRs+zet1f}|TKeTXYmTZG}JIYs-s_bLhJM~cW)N&jaBn`;;
zHZ6PvR9ebvY|y+>LGiev88AVtT!w}6tMI`NwM<L2nz07O6`CPWt~Nr&<St_GTn*hR
zHW%i1*2esXjTl}_ry99C?=gXz^2IjYT!YqcmoU{Urj+4E%)G~CEg1}?!6_(I_M9s_
zVyu;Pj1RkekV#F-oYsJN`(G?MG_U%HTx+tBy_9pCJ?q&_AU*XvllhZM_ctl+5xUrw
z<BIFMe`d|59AkuOYyI%-U-pk}M4=S#61&Jqg$MufqxL214p8z#GVhnmi66g7tVTp4
zDFgg3h%mnC>DyZAA4uhLI2cIO?Ool<OlSXHNyK5l)BHysj&Fu%#5Qed69Z0-U7`Iu
z+BhiHIGEbO9RHU7D1RWQ{~xv<O!@OmRa!b)Q2no1)_!a|NND{Vo_JuN18*dmp!<-j
Rgu^1>Am0AoU&%hc{uh%k)`S26

diff --git a/tools/python/images/clip_image010.jpg b/tools/python/images/clip_image010.jpg
deleted file mode 100644
index 71b63575957f04d9e844f6425eeb562470201840..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 24851
zcmeEubyS>P(q}hLaDuxu!QI`0Gz1Oq!QGwUfdGNVHMqMwB)D5}m*DQM%lmyhGw;lq
z`F3{ySUG3A&pqAe^zG`BTlK4_s-AwCdszdZ%SeJH0YD%S;QsmqyetC50dL^p5#iz9
zAR-_lA-zFH!$(I$MMWdQ!NtS}k-ej$B%`FDVdQ0@q2r>bpk#Z`#>FovA|mpRMN(Em
zNQPHfMDPzHKqMq2G!!&qbaY}tT1r~M|LMm|I{*U#umV*J4Ws}-VE~~qfG^zuQUCx5
z`^xPf4FC5B5DFRw77iW(@eR`JhB|Zr6c8F33I-Y$76#^ZxA*Jk02mBdOmbFHxVK85
z;3*ui*!-ij5Gci}I<S?;PeJU4jsb{oaB%VP38>yt)6mj!aB^|;@bZbjmync_2FrX@
zQB_md(9|+AHZe6bx3F|_c5!uc_wWq-5)}M3Bs450HZDFPF)2AEJ0~|Uzo4+FxVomc
zuD+qMskyVOyQjCWe_(K8a%y^Jc5Z&*_xi@>*7nZs-u~J7#pTuY&F$U&A94W!(Elmc
zUnTnoxiDVkf`Wm8hJpV>E+CZK>w?CBfhA{!!xUA5|K#wNg3TWRODsC8ssoXdUHKH-
z&~f|?4v6D7)!84S{fA`#o?rq0OOpLnu)oQ*06>NYz8Vi210W0-Z#^At^7h~yt{+FF
zG91Q{fEHSsB0c?jKzebUadYL{+ww3pS8BjEEYI!-uWZHE*R9AQZwI2=8l6Uv$8C_g
zB8PN4K2)jcDLjt7^Kd2oz7pGVp}*<q`zI2Sx8hfkcQ*<T_iY<`fv{8RT%GRUS&1II
zjS92^6=pb&w1=NZe*_#1^}YaT8*J{UT1T75^PWBrYRbVE+nwld+PD2HYW^->C`ljr
zv61ar5K9*$TyXZ^LgNO)4-?w^Y6;iTyXwyl3s;-F`Ofxl`u;|n5#CuAKG}Z(M8J34
zx5IpQZhryfmppUsS~y@Oth+dDVYE10<}(g+b4}D@mWE*ksBAjtJSaDqJbmtZc=y%q
z>2o9x3DJ}WRAm4fS>R0KGsX|uXLt1?&=_i>_4v_PT){T!VOT=%-m$nHbhvY=o|}@&
zwX6E&3m`!Mnv+D&s;%uAxJ7yzv9Ev88F?#t#eDZ$ug=#CC+(Bqat8&CVvA8x-=`Cm
z%Vp0+r34MIG!v#;xG3n`HW2{4c>5e*ah+#7bblB96f55H{8K9F1<=V|<U968_&4EH
z+fI<@uxET+1)G$I?Fqs=RZW$ovspI*AC8_Gju|qhEOS}m1Xi~5)#ZcPg~%hB<;1HC
zw8IqQ)vI$JMiO1$_w_8=TJ8NB^~TC_<}U!d+l=SWN5Ut;F93o=pGB+dw(6&R(%KCn
zg(U8cSv#$PrZbbYvd1Vso-k$(FfSPmYjzFr;KuK0#7O}?cPS6nH{&kNzKe$nZ)D!n
zH#+pGZQb|oqV@&wlkm~}dG$>A@ovF)73G<;Ecdn<MeBNHfvO&js%-(Z=v-DM?-~vq
z*oa<Ssb%DR5;|T${5eh}<ja~nT71YRLT2`pO|ZyI;{`zR=sR3t#JuDC^^R!l`UMdB
z0@xXye7?0i%X#qaegRzUSiAr}bKlPDaPq&koE5vCTP#|b2vs)_`m%#nZK<ZDO*NdD
zuxCe&NH3G6z3UVxyC`*syYc+#(B$#t^aVf}^(RI-eE+PHJ&qRuy5@?7+U(>IcJWdS
zhDp4nv>}h+{YU+7ba9dr(0FrS0gohEfH9S7FnrMxoS^yj__M_A3n0l0-<SVUxPQmD
z;ZflQAmo~H)8!jMy7)}}0?6Q3c=+Ac+w65@J2N=3E_PPejgFtPIIQ#zFeX3EK~NTV
zU16W-x=r%}m|=;R4!>s|!}^ON1jF0Xo=G<y_5VyM^5_Nd_60DOR`7JXx}oc|?@DW#
z$n0TVe{5f!tU^g@lu8ghs}Zoiy^c%&Rzf)H`;yP)A<FqPd_f(_jBtM&=>^(<*KgZ@
zrC$>#4;xee@^T7)a6221qA#D>bwpw^gRfP?>YL-}&(niw%Ixj;<3~A<-_BkDEZ1z$
zZnwVC{eP!X;XkXxdGa8ku#&SjdKuAHZ7wMnr7E&qP>>F3?CtzQjfX7Nf)s8VN5X@q
zVI;_wTHqWFnmVwDWZ};Y<NcjR|3MGc)y?#@uo}h~OCJlbtNzX`zh3}<X5{avw1m<8
z!|t_>83FLy{sS|sWK?<8w%*)79T?6fC8@f|bLPIsuDPMR)!)xjX#dg){x9EZ$;+96
zIn>~iRx#+vQufTb@{f{1Kb5U3++EbYy0`YLd(WIvzq<DeV3qNibM^o4{r`ISfB8Hp
zV*)^_z?DcV(oOxyyRTJ=7j0dS5Tpm~?u?5at5<+&xSfjDzfCt`Bu@Jdu=nUiTc2LM
ztBRMrz_r@27(+=<x+=|k0YtWp-M>cT>axENtazjrXX=l)^RLb#{sIWgITE6K#Q{R$
zhh5(@)y5m3Je*UydlANTui8H_;QQyDD&7S>`WG8!F92_}XSzC*nc6u{tjdIYnAXoW
z{@-`3FWXC$tEAJuDzb~0VPF3l;-LSI%*Pi%{f;2-JtOwWQ<m%>SZMkJP%8OPG&DV)
zpr1qj$fR;9*l~p|)+HW*fX|d2X0{^JMB25`Ak;zqc=H126TZLn4p6u|c>zFtuMSL<
z!H)UjU$k2!G(JIASczyb;3f0|a_Xw;sxXmc-7_(pqPs2OwgtgAC94Lp&t5Npjf^MH
zztOsajoYpdUZY8&iei@Q$}?Lwj~;Wv2vw#Lx$C^*Y`R7i-f4U0eF4no9Af&8`93<m
z04hYVV>rTCqO*%pq5ONKHv3>pa%l?vwS(}m-Tt;odcmnWp8Zr(XbZqT6eCABZKT`X
zo$~@9#(LsguyR~60AL)Xq|ke01yO4DPATTFUXw=f4>i63(hpt$J}-d#tI^Xl7w@tw
z<^>@=<=_S@t}MZzlwP?KxgDK0o1zQ4j1rqiVCmjxR)H8hwIHJXNOxVIQ?Vd-o`<Db
z(XA!lF_#xWo9~mSKuhG^aof$SzAAKKh$>He9%r~_s`FAQMklnLW4Ne~U?&Y2ShynB
zDbp=wGZ0!!yteYPlS|8vHh6kx!ehLg_agJ0xzQMFB{km-qP40qta+w;DvNU@qEYnl
z7<_?R7mmviaT)<QwVg&T&Fe4gfss}ajzO(~Hj@E5fpwo-jRcNHJza~?f{s&qd$_wz
zWUZp@buv;k6Jsay51+Be@7zc)mVHlFUjTZMOdHQ$6;BT@fP?Xgj+q^1r_9yG_i9Vz
zUf&441!}#GI_y~Qo!`g{?%RCq)VKbo9ENukprgImf#7iS-Wm)KN$RD~K0g-TX?{KL
zZO=bQbo(A}^dDZ2&1}{_Jsz$QNwb{NNtyC|=km9p23NXrfh_!ijxN;9wJEh{lD3-G
z;5(z*<l)%XgwjnZ+!Xa=;sTJ(j(=KTo^$%&Nag(5Oqa+TS*Iq4YBSNphG{s{W%<GM
zi9xt*?8XL;2-fHhzf^x`E|pg@4x09A=Qg_{OCw>K7k(84+oO$J<@|F_Gf_Y9a(Bs3
zJO3um$(v7`*h)Ql_hF6g=qS}p5U`##^_kj5aJWXRRjHg&rWazgVz?Tmfhu&`v6ET;
z{qH#Q_>*D}j-k^6yJoq#xe^oVe@X9c-z8iu)%Z<dQ+34)a}NdfIo;-Jq4a)V*Su;n
zsWS%|5>t7XKv^s5qViamBOJ5#_c;6fO#_)dym;)iq*r5qcmuw=lGed}aI%w`lbH+l
zmm`qBF<N-<AZz$VtVZI`x6@aMwkg~0L<6wHWVL0rp$PL|kb(fnB)REU<<j{JZw3Q}
zqF(@4dn<n;k7}fL>fM{eInlwV)x}%iCFa`f@u>@=S2zT%Admk6T1DF;CyX9%WI1L2
zLsbh*2-6xd+={50Ff$&{=B(^LKl~zS0T_06M08+{DH`fI!`#9@IfhiC!wW!g_W9|R
zkzFWHI<!BX!#`02q=yCmD-HWmXJ`Hkdm`@s-|}0HvH{&52YpZMuMRfxC%^lDg8BcQ
zMromHSN?pp<~t_*{Db-b*x|tA_-^Hu!3&*g+=tIA9ePmf`s#2c*h;<|#~05nzK11R
zLVt6b;thERP~<hl*rspu#7{U6pb|sMTCGjw`ElHpOlJq%>~Gj4H3xddnHRwA&TGu|
zqxlh0npm|W*ed6Q83&e$7(`P<0n;Uh#<@^pTm6eK1((BNad9V{F*t;<@Wn2TYq$Lk
zT<NL6RL1i;g{xu>q0AAGv_>5PAOt@rDA!RCml&<-*=1yxO^2$dME_k`K4v75>+D-u
z28rv~L|5u?C=t>$*Ti~zOB~0A<#)Fxgu0HBQd6pUt(<wHPFQCP{-F19RC60-fgL;o
zZ?%$B-^CX$e+ba8)eY_GV)FncL?fqb)o^_G#{<#y*;@VLAPjCEXEcr3D^Htbq$d-T
z_kfiW<MYA#Dptks@Rmlibsf7Od9mU1(m8Mwy1iug8y-2f3Z^`ukGc$sSOQCtB|D4;
z@NX<&)vH<b^Y~IyDV}Jj07?>A|K(Dr-9Y-GwJ>SZvJf9*e4eBDZ1JWe>C_W|D731z
zzhBDdQp;VvT#VD07)V)V2!A8#iS|hZziojaBy1jOeu#7P+h*;Xp7-9m5QQ60uV|Ia
z5&89rjhp$HQE##B<)qD8yKUl()3*UX`>-3}Vy!F_5v@T%Oo$9s{q3}ZGU1N}dtou1
zohV$3{S7F!HM~qc$w{d(;LR90V(~04`%$Z5<~kHrPFM}j&H9*aFaf{E(e?@u&oCDX
zzH$@#PQAcC!KZZ^?~+)MBA(<eYmy=Oo#XLJ1}pEnACH^~b@OCdeoLFIKE<FSfpu22
ze>+klY~;rnHlC}p7Xb2b%z=DD!}>MCnkk#410KUib7+TMtS5Ryebhxc#L#0-G=?JO
z?DG%kUM@w}9OfL`A7N(03tO<Mn%oqHzO1JhTvugDU@8BPtOBffF8dkWbV8`RqGeuH
zm;45KU|YyuWFJ6$F7n8(i}cicucD3<wVZ3kcZ|_r%?$b!w@H9E-eJK_ka<RvvVE;C
zH6(z-Pm!<);WxtTJc1YQwzsI(-M5z_ew<auS@1fH!Io{Sm7#JU?ct9*1w+HQP_$@p
zFbb_$ypsl|h;<Kc)*$cjP#5S;W9i{ckV{BZN94Spt7>A%^jtg3@To=Ew%wj#ybAfP
zT{2*%P?HtapUpU2uBizJ9Dr^ha#Chk_>zb#Z1K}A%<G%^f-tCi5YPApfI;Vzc95-r
zY~irn<Fd)GfT#;OZ^3Bb_e`JXWYEHn^9t28Hcd8VW+*+>^f>`tlBCft@ud>S?@k$C
zq`o($CGLo!CX9dFB<-SfL3THwpR|mhiM4Vk*TkOTNn(L;(|id)=qDmV=bK*O(|`)2
z$pyZ8!Jiilu5pOFVS%9NcCkgx0K*sln$JJ>w}#~yv=#lFR!Zjgj;zRx$gI5#8;d(j
zD)6O@%p1=+ZkUx+nT~ADTv_zNw#X&`jp@pHcsI&&Al}}HcKIawzu{qju{RL+I8008
zAv?kE4NdmHwFrdH3O-7$aO?wUvcvwZCHYXWCo8?`{6uU1m!JJtuTXUteBD_f^8o`S
zCici~g2C92Ui`Nd+>&aENSjTHebj|(u4S?pAyHBGL}qUkcIlI=PshFHDwxbttm5L+
zxdv3AsfjRBlBa@rnq*Ip>A|(x&NrSBX~9DCZm>UpM{oDz3TMnjbIRf?yF$*F6IP!b
zFpDr=06)e^Bg`y4iBf`g(vlSJVP@KR29_Q?_9h`#eENtw9xNeR^IAyS8lC8_{GD5s
zOWu>u5*){MgnZc3)+8h4Gq06O+Kwcd7U1QmgUaD&u#uaUlMCoqhYjqIcBr1}+-)D(
z<qqd}=C4<Xc949D%%%qwC+|crypZ?CTlG6`BdJFl&<Cn7fZteCFy9>Y&^YuI><xm~
zrPZzdrB;>Fx=50EUkjKO{itCms!W_7)<%kh348WbYA`H@?MBac2PfcCD_2j()Eb?!
zH~m$$p>O-Ka~IhCGltGe7a4_CyWD&&8`BqNh8Oy*!_L$3*sJhubG8dPiRm^}y(M>K
zV>-7{<cGBpsT|+Gx5dYUVYzE|=+C|Wh0%aoVMgX}+!#!|6(U1!0k?)Qrf@NlRd@C-
z)}w&&((QVEJ~h)Vie`!5X{Bl`xK3)CAunN@x}h$e7Tu#1s<^MVP=rO^(d})jLywej
zaNXAliph=i<xk=AUl>NyO)uh1$(?%TKk^9GDfHtePcP~8SlLi@pVNsGB$`qC!F%T6
zhBz11?lK2GHEZi2_nwd`dN-XMDK6;DQ+?6mx5O!Pk3zu55z)5SA>o_xF8^Ar=?yW4
z^jc(9KfIL!mA(a>t{t5$JM4{NrT_l5rzJwp#)L0+j3;Or1ZlExE9h|njf;#xEeY6>
z4O=cFUf&cqTPF|*rP*YIi?r*eltSuj{a8T`=R1P8B=MnEYb6hYWOI4lY(D4o87%$%
z#|JZ92JAJ^TY^$qIfcZ|1VL%PKH9m!WW_ZfMiCYAQ17B)UAWmYN(`iv+lqXmb!dRk
z8k5=N6eKRoscEQ1g3IyXFAb|{9X5;t(%ag~Lj|S+qv$hZY)Fm~IYeD-62d#KU83Cp
zaIBG^L~xln3He2~w~rCtyRU9b<Jx+f39<*OX3u2Cs-_ResYXy3$Mv(4$GQS;?tBTl
z6;Sr~%<#P$^^P^&^su6{FI3)j1KXEa{iN|m6*#dQzAzuzl+_PeUuaXM|I!R-5!1G(
z#s>tiU5F6BUr39nm1DR`IO{DpVmL1ZepMayd{Bv5UhIGx;W(3eYlM0Xm3q0|q;M<Y
z@|I0k`d4k6d1G`YWLrAXI#0T*a8z>1xnrQfYT@&B3TIFpi`G**VT(jVcuix!ZxWYN
z-v^S{hJ_!zoL!No?ia3E*9%1U8e{(kqjJ_+{O$ZO^J7X(aSG|LIpqC<&9lVCdkqPp
zjOzjzR=A19GrziJX$BD%V}Qc%G*VFmCz^){c`hID7|HP<SO{%?nMhbl5`aGz#nZT~
zbepsCuRn*|fBIhIMvo$RR>|t4jr|@`bPvNm+}wgfGiz#k4hTAYrpK}QYfw4URTDFI
z7Q;mkoi0Z>s6nN#za$*pekn%i?m$X$*Fs{Q-(wqsN1hJW+*DJAzOf8fX(2jg<2m-`
z8_#hM6Eu7;Y9-hr3uyW_vf$P$&wd^onN^HKLfA54yV+EefHLbOV@GM|+FyF8<ll#y
zxj~F}Aa~jjTwV*#wvec$CvJ!(3y}1!<q}18gj;g*W50<&x_@q~ILDuiY_-B!?cq9?
z!)pxC)}ZpB3|m)|gY-*N2FC9?q&k;n-9$K5x?2to$u!w0i}ZYX$o0_31yf?W2LE=a
z<RLCJu-NCj{+>I4skb8jc7UQ6JINSll#L%1jbm9@uePlzWpN~oKV;iIfs%~rw{Hk)
zV<{uM)D+VtzxMp+n!+DbZXc+q#3?=(H+4{M?Gz|mPW{A9murl{GAbN$9c@~|9#0I6
z{BeOc7<q91+EpW+-L{b`u{f{o<UcTFfii(o71AJ<b?O0I`C5G@TF~j@2|x0s?XnU2
ziaj*++eYj}E^O4UN{o@gq&76$RLH@3F#+#R3ZF9FC%&e@0#jrEV%d}&E1^;Zn};m_
zFr#3YRRp7BotCLIzvZifi$7jASYW)jT>MaQfEZ2bao@_@XMC+G<yg9mGGJfUW>Okp
zSb#s!r!O<80Dq@P+*BJb!dwnAwQyXNX162F=Xc4M`>j1z_bo0O)YCJR6f_*QWkMF%
zB<aYt7H$wILTp&w9IT8l-A{HZ!KgNQxG{P8;3XXevd@%I?D2!HR}fCq%zQ=tf4X4o
zntkG7x4IL4!3WQ1CxU=PCZhLSApn)58-<A)&I23C;{ozX6OAEhfpufnsz`Wp4cY~8
zAJ1O;B~bktvk}w~addc`AD0^geiMX(*}JIE%fn*}fZ3^&6Am#7JiPEu_+4to8q}iO
z!v2oyENyOpXyb<x0rpWtJX4yTneD2v6_?-ea7asaIian_xg!Gpn3gpk?A4E_?EWa-
zxw%{m%Siiy%3?ob8uZ}LJNP`rl^&TW&eMI@&3hMGBS`(_r!6-0WIDFHU}7mnc2(w9
zO`}h_%a)kABgk)S-(ZMZB+H|Aq>J!wZXBl+xfNKt-Zb#V=ly7wr-&BoqGZ>pvH$K5
zpwVl6tF*6&h|5Q}sI{tq-R){RFJy8)AW(y@l3KMd#er7T;!+}R<pEm*f}HoBKYnX5
zEN7EGnDS#?nqsk%ypWY$<R?u2)<X8>ND5XXFf|HZd?kDQ&`ekU`uvdBnz0t{gDq?3
zR^!IDp~9~>MNARJU0G5_W*jmj4wR#6Y7r@1rZ&Zk;E(+SedRQUlB0=*J;ifWQ*^tf
znle0H^t5#CPY9go)9c<^a)RWk!%@j@SPKyWa6mx?%EgtOiH*h@mbq^THa6sek#u7F
zgsxBlBGeEtWzmUp-IuT3h`vpq^XklHUH~|<&>00-E~r7~0wO4v^|SSknh1uN0}K<r
zN<bb!R3FSmWFDBQ?%G?PUi%w=$VR$27JFKrZf8O-uz!t2`bbH8&rR)``>=Cn{rcE7
zF4Sr3XcR*M|F_7|OlIJRDM%aD)uGxfp>D$bNSEOX`y|F;@5a)O&{r&=&zL%{R`?9t
zqMV(8L#Wt5?Xj3NJXxwE8hpxlsT?V|sb-pqX%n)tz-zSdtAmm!`XX&Jighn?rhfM?
zX&Mw#OXJ~hHfi@mLCg5^7VPd`F0mwjH?!~wtNIEDYpx(lJpM#7ZwbTsy<;<zAt(^6
zYsQa0U&|TRZ|Y`(;1QYrgWYnwZUTXYh~1~9>h~$12U|dN0!!tiUz|ERjx-~(_tj>)
zMX@It>gz{b)7?hc;e^a#GJdL@yN`XCk>2I^EHm>(Icr465jIQC`W*f#nEmk}#+&cL
zs+Nn}DyMGBdWOG>TqGRNKON{@VC&H#xL<J1l}B7YcX<4%(&}#0%0642rsH!xeM@x5
zhJeGK)YTnuiSZGwaKI4pVk5%j6NIaZ?6njP<Y<_!+?!Ee*1@C*F$uvJj(56Ojo*$h
zLewK{DI7`r(U$b!7W?B|JL5Mp`_X}l$;1iOcpVR@y2-{+!hZIHn-mRm{Njak?`k46
zUMA{7l1YS##{87@TD-CcnM?b1|Kovw%*y)0iA?>}5i@n%n7!>|6C$pZsd!=k;N_ir
zpZq*`sk3ZIg|lqQ5Unw*cNSK{%Ju#zvM9u30f@hY1)S2~cTF**T#>EhB^Oa1p2Oz>
zVA|7lvr9R=kwYqt?$3&k+hh?$C`B5wdLUNrQ%nLEA;g#`iDQ;9J>{aaKM#;bsK~M1
zy4wzcy*#?Qs9I#s`k|_m+4d25@1)=t^Ua~l^1&41gkbt&NxvF)B7h*cTxeFLwYEvq
zCMOLPsT}uygqAL03l)z2hMQ<(rbye}@%cb<%BV%Qp*dk7JAqb7jW_?*5jGk(iAZ<^
zCU%Lf-JUG0f?I2*@~0Stx(XC*U~9CKWGaT=x?hN6t}?;-D56?56{Eji<p=)-y#Mn0
zZ-F?p?hFUhA-0Nfj6JM`nU`ImR@q4E00*+PCi*535$O~`{J61Lc~;yNj4?syWZ7|9
zEr#*+L@3j{G$bZrXZ+N!0+)M})X`vnS7r<gCx*kssw0257A_T=QEn)yVfJ203*q0C
z6G6@LvtiM#2iDvrw3sAeqWCZmvpgvhBUgE)++6Bhdj)fZZnO<f{rCv17XiRXfvbM3
z0RchpB2vC<-hAzr+jLP1IyT8FwqrU}c#y+D!IKa9<o%)R+kLM32G{GF(IUa#4NQ`X
zT9Y<%MT_R`j1o)+TbM62pRr7+6eRJXDVH2{uF4Zj6bk27Efr1I@%4~{VGt6i6<+|S
zT)dA(0>lOS3p(0_Ax!axRp>Id0RiE7=v7D0Bu!)47h+5Jb7-xWW$7g*zAzVBOQ7fu
zN-28Okf9s@d)3zr!9k4xC(jlKrh&DiP=V&>5J2gD>EO!4ucJo^;IG}Uq0SAE`Hd#F
zvMC>HAr<+DSf1r}9(=*Q@!PKoiAr3E4Eeh*;&x>11Ay2DytRvorV=@!2L&72+(4yJ
z!`c{X9LsP~HYjZWF}%yNRl037Onv>b+DMGb2t{O*Wz7Mllm5qDI1AB2t)&$mtuDUS
z>&4HT#2+asIX^0&y^GHcl~}%{2Z=kqC&FiaKKZCYAQ@3+kpjsm)Ip%K7|Pp!|AprQ
z^=Vkv;Lw(b7{&5HS!cital&RZ=6Nn7d8(9O!?Col++CQ-0}E&)AeAR;#c_ReDj$`9
zgWl)}|H~=E^0U@<2IpCgY#h7ays|?;E-k4}%1*iU!hy&GbG$e9raG+?A70fCcpFMy
zQdH-{&y(gzqU~#iO)D6h@n=GR6e~Mt38wt1qejE(zSC7sVKZ?AmXb*4e+~SX1fQX_
z6HLU!A__HPya2#=1%D(2$ra113mOI2HZ4OOF)(H-tDEJh`nHnCNzj6F$gr{9o;qq#
zOLgma>`?uKrSs4yn1FuhCPDtmtyzv+JIeZ)O+*kK){#wa!!lu219jzyQ`DSX0p3o1
z)sIGqHmyxqe=*gC<`9lUDxf<xdJ?P1#(R73UiYbPzOqsS&LEs^NxB$Sv!;{YDBL)D
zuLt1#Yk$URRWQ}bH<yO`lQJ8;4kpXo7|$pj{-bFAB0SrYUsb=MD?&FVvKa^!(7$=6
z(J+h4)!f>gqqV}}S~-Z&h*Sseo&IJ=?EuX?hPSD#x0lHEjQ>?mqrx;@V{Xs@L0MBj
z0n>Ps)@YH)EQJsLV|L5b5yR&yC%&v`%_xJk4oNWHbs>C+bC>xIsYNF!;2kU;kJGrT
zk*h*JJ@LCA+ysy!*n$P>>LBZ*OnK~#Kys4B5Ykg<l~wuHglQ&$AEM<b`%8Lqo5eEr
zUzp=^qX27eJYzcJPrmwOd=LEnoJcB7wecoF&j-w#zC_pg<yn?DN5%~pbrS_i<IoAJ
zqio)MGg3hdR{YMBwpno>U=j(JkPj_Tmp2_+cc@9O%;)=m(rU+jMJMEzbnrt;{1{md
z3(%ar@)G>$P#wLg`b_z@INhOx4Ek0Hr^1XYiz-f;f9$P|J%x9%Ln==1(i-GyY;jIN
z*fJOiMfQQ#0KtaDs3~^^;e5c_56-C{KCISy`&+l=y~~e7;I;)b{^l@dk&Zp0Fe|wy
zFr4TYTzy8#{C-PBTqH%s8D5&o^;#70k?zQ80VQ=NB`(wocB?)q>JWVy-01PWIUh{!
zkbb+Ti}D}WM}LG^aR=cTbQq@5MG+X(?wfm6^(o5Q1XjVOFVgmebdtW%C(0|Svad5)
zx^35Lg>|n@4$M8@HQ{^oOq4hDsb|PH>o0)8n%0yBj>;6@<=(QUsqz(Rp#sQN(n5KK
zl}*#}R<vndUApE6$|q7U3`!d4Oro_NyfF@Hdwxp@?fQ4$cnYCjnniin%6+sS4r8Uu
z`L`QWCf+p5Ql!_>5)8#k-;g(KxnXtZ+Ls~IVv9<xwE-&iEhcr1Q{@6f^C91OROc_O
z4cN!{?Hx}3K=fTG_wXZxXyoFoFtcsICC}yB#0ZD$8ApwDRc~tzYddL#W3${;{lbAl
z@rOu;1f22O@f>S63nFBPRm#d@w}#18cQ-X|Tt}fy>@JH%u~HEs+<H%=ZBSqe;D}@e
z-z>~5f<U8YtBI&w5>=><(LPYGh0S7obw6fa;9|64V5{(f7co8@6BV$5GU|D=ZbkpC
zB|<^0>F19o2?r+vOelsmzB`j(zUuJny<QsV>KKjRaudzj_f_q=Deh&{S{BNjrU*!t
zfGQs7eK{wsF5m7+g~V?3MXIpeLCFh{-Y296R~4s-+x5)4AL)x-d?rPV*Zp~Qn7Pr%
z^n?TcsgM?_DKACjLpot0X5s#ex1)<~ram@Jy$*XR>p9bPEneDIb?H_`<(M~5#My+R
zkq(gnfKxBuMN08~d^cav$Z@haS03A*$Xz5}xA9=h&;4tBetYR2fG06_H?g`#K-Xx^
zkFScUW8X9=ec4BK+xTh@mCm2mM6Ly`ipRI#?r}|gOK4v4OHOu85WGDavz-FE{*w?0
zqu)$x@kiD1e^@)D4c6$+ra0Kfh)Uj!%oA#ov+tk|IfU^MA0r+%$$_vj@n5t&_z|VE
zc5Fkke(fWN0H~&*Tm#de<DwO?7F&xju2YFiQR%-<#B~(mP8%^=-*D3#bjbJTvHcW}
zJZU&>_+X4oQg(_u-CQ?E=6jn+*KZS*84^q+yT2pgyz9OlR49mg>b)Tnjps)U=52>B
zgFYd+Xc1)F-q%Jv6oovG5*C#W8Pk*2Iz}9JQJxDH{>=JyiO*yZ<Ob$2XAiFdj^!~l
zpm%2FC2)NFRRcirI8O91apH_WE_l;n!+3QN*DAur+6OOS##=C4f_05XJ)c#@t^RKI
zL5yO&R6?BylQp)n+)L?`&Lxn;1iw2()id>!b{=ok{i+;Ii3MCD>tu&-gYiLJg{9(n
z%i%@_vfsw^=z>QaVRKtNE1xpTTX2ptY@!9@D*U5kv>l#W00Ilm{X2y}BtgSilhPsH
zg%`$)z;H#oF}(B)gS_)o->}Et!LMi4p1pdmcZ@O{3c<we%B=AXby3(}2EZZfU5(Au
za7gMjdBJFoRMNbjjEQ>!FL^6^uo9910MBV^XX$=?rqe8ET&bhw@a$Mpa)GUMG31)M
zZ6-JK(?8U`|5Av7;$9Y(v5o=J=t-%FPA9Fsaq^zLEcJc+3W65nb<4Hf@4#XbL~|G?
z9b{Dk006acA_uT|vOA=7f}$h}D*-@s*+ZMcN)yr_AjXDXIY272ro7TSLFn9;vU1!%
zH(XhbGVXDBXB_Sn00eriIVgK1+_-7kX1HBzYab|C-?%wNQJcL%fCz1-0|j|0V?~H3
zh6r0InrjMH6g^0_B-24;9EG{{zmR~jdspS)6YHbT@;8G9=4ESXc@G>W1S~8HY5QOu
z)U;bpb>%t;8q$}aTqT8i42hVfRc|TzFX}1h``+*f!ecq37C4R;-OQ`+JILXpCw>0S
zU`qbz6}&pyq&+m`VQYJGg+L(7NH-<ES&6Kr31C2L)cv-1K`<D@lhQ3mxYxnNiIrj;
zeo|Na<_loW$bko60Hxd05x6VU5yuwwnv1&SQ1Xlk#gDo3t@B)lSbw)1My{qSB-ayW
z`TdI|#HszVNTOJL!<SdJ0c`NF6yv*<z2@Z&3$X5UKpq~#s%l|gdD9+!m2-rCx1#W!
z^!w9Dm)n+(Y2xOdteb^Jq0}|c8uHDVHcbrfYD9#e_}kbiW2f{w8;Y>5o?+|l;C=0;
z`Em+#`0utYtc^UIXmQ9SEsdF|+591<rW$z>^FF6)Fp*_ooq`|=NS|UtXRij+5hik7
zrwGRQjd$OtE=)FWN!|lp-M<2p+7EQB$RomF(#<(-{z^}y^yK2w1Wdjj%CU=d9H}Vi
z$O@8p)6;crh5Ogxp6WXF=^UwQGZ)yXfZS{TAb^FEhSqU~uZhQ|MxV9of~@lh*U1{?
z7Ql0-!p<{OK-HJXXsn^`f{CIbX4Q&oZg^N7)oTMH$-YaY<v*jO({&s6Og}TR%&7Hn
zgwa{!*U7-fzFF=Rg%xg=8a7-xTTz;_K253pZ81$|(>CHmNEXx%QX|O|d!L|#2=O%b
z@a|2`k~>7xE{Ci7X9mz6{~TkepJ#_?+D2Bb^>x^!aP@hgUaXt~BG~(>?rdO`&E6e4
z{3;VQ!3)8bxy*KRd3)jH{M7v}7*)!%PA9J|v<~UyaIB`!g!DQt7NI*RG*rnE8X&13
z7{5Upgu}N|P`IC>p<W+`s~z7Mp;#H6?V{U1zw6~K?<7L3i#@WkJW#YhgX8F3-@72a
z?MB#I;Hukr7q>&oD(KO}Hmlzh+39J$ncFbSNc3ich)FX<-P)DRY{ieylRm6|`h<a*
z^Gl3|$<LT8icrdkO|@>iKExJEh`c<ktkJqx%eT0;#*~#+c|PS8@LO^!RB38^p_Mej
z-RCx>uUGfPLD-{vB%r3|2SZ(r;q%20p&^C~-E43$Sw@hbKjRbMWqDzu$nYMM?A75P
zjZ`_+7=G<M+>65E^1PT$A@OsaQim3N+KSK13RG;0TGi-d?2aV&WlWKt>Tmk(?z-mC
zepzCw?Y~ZXsN~{u4ebhy9ktoG;eTvn=Wo846>R=kNt}k=A?oesW#<~Zk4uxl^-&h4
zi2NhDrmlDusTUA8uw(3posN5$l^m81*n~N4OX8v*s#|vu(liRRbw9L#|L7C8{wP|w
zue`Wq;rJsn`CZ*A$nrM{uknIP#5_F2W0Pb$r$ld<vZfNk|6O|JnBRg*sglj$sS{O%
zN@PsW96Bev@jJMjnG?)0a8e#Ky0nb9rJ1aIjRn9Rm0upY>f`NE{i@Q|I>F?EcwA1|
zToW*zCE-}S<p+u<;#zkYE5OULgwIsc@y=VG)R|i$EE?&V$>FT6WRLlVw*o-?WiwYG
z=&@5~<?g-nAX$dpGu!}2|D$-q$bT!V9ommg0Sj3DS^}S7UdF2ZFFNR-z#P{r^g<@D
z7W$4)b&h2WRr&*fPaP{lSgZav*Rp#5C*>67l<AKE{L%@NdU^(mB{;kF_AUeD)Cbh~
zir*6y6I*Qw>l=eDMhqBZ?EOaJXo-ZOykxe*`j&h;eBUqCzju`FC#XqorJRCY3`{~s
z*c#)*!vxyd-n*;A6Tc1vnwIaYtM3tc+N9Pj>i75}m_29@R|iYlBXRNY+p7b7oPo>Z
zI_9&c(^n%*>!ThN)yijJJZt#4?%|HMrFK$fi=RlXEneT>x5n&x7bv`?8{_>xYS88E
zw|V%`1!2#y#JPsgnn!E3m>Pe*);*OItWUHl?DKG(joF7Ir2{9=n6P8hVWlE7lZ6}E
z<(u!rn@Bj|tbh^}uo*iNHuq+6(F_ZX956SI`w0ns%#eS0h5h)nrD2tFc!R5My3#JW
zU#%;>A7-?)gwye8zGr`6RFsdG!xk#ToCNplb`$NxEa~qEpRJSau!o}<&P`IAFkVBK
zx%s;+xpp%^*2YTyw4K?H=GJA#tpII{{B6+cRzq^rPt%SaLMeF4?W|qGw2xgxkn*z}
zHZv>i0iCNJ=7c1rD*Fs)qk@UX!Yji0-`+CXp%cga4_+?ysA5<K$;xS{ZdC(21$k=U
zRi_v(z2qd3T0)sB7;Qnxn&0TKH(A6x4+R`<xBaxz{UsdKKk8`iALNO8J6}5u*lf#8
zvh&$qutD!oy7uh4kKv72nm2zrlX9>fU)If4ukOIg*;xq0Tk76ii(wy=0qmNW)9Ad8
z(IAw1?(gHmD_LN-wsG8<3>~rDc?-|)rCy{oWGOh_LrNStxNAzeow9kF85-fgVA%HU
z$+x?xsFRoIE4T?|Zd<ir*GvWtW1O6iKK|776XxP7?C9TF$<!6Qmavi7Nwb|)4x7`4
zC+9+M(oI7o|7KRPM(e8|5qSS3zYun@SEv0TNHThlG!)9jT$-O?KdfnFpuVjtz_i@$
z&{t*+bbwhHiKu&;jYX+-0!2_K?e^%Cv743u@~-JY6VXbO!~R}Snz%w#z3Rl_IAlY}
z(J%RU1d|#i;dt$O^Y(yZLeNM_PD$J5Y$Zq_Zxj!uamd(+C_MEtdtvVF#Ey%EeM5s*
z=+qSubb}L$l+6&`JINbi1hQ3y`8?d6o(k0diPq>90W~+^!r6t8bC=g(UUd$E6Is5>
zvP52fZPx<{k2CTZDgVfQj$FOW8GtP@TPlqR38kH2?P{hKdGD&pbQ)-xu=N~*ZwRGz
zvMnV}I2nw;=y_2IeFyRTR%jZ>3>A2bFt-mrdNa%qxw~-5dXx|LcOszsX|O#_XqZ9c
zZ;}8qQR}_j%d^aFDK#mrP4J9~h^FF42otN%#5g7uP;x7W=VN?X-z^44v42@$-%Fh)
zENDyYJSoIy>gn#3tyJF3tP>x^Y`XJP6w-5aI*^j}pY5{4ZgfDl^j~VR!5bpUeR!p(
zvybHFvn@MsFnL(|I23AR7?du0etC@b7Si-dvwEno$BKYvN{xP5RHCdjHk$uEdIFr7
zqu&u@fD?pg3_kH{4s}=*FLjtwpQBLhb_Y7uY%(RL>NBky)o4h-wkF9sFIB{h8uCG1
z|AO{yRr~TvkX6W|fss{_9&WV-j1&<4t0JnPhd>HoA-hSAu?~d|Y;8-z>w?p8F|)2c
zKo!*cNL2LjHJydU+r7xYd9qb%TdzhA<?6K%O0~Go92;c*^H@l2)9f`9e4VMjbvxq1
z?USF<+-|Y*6+VJG+6KcNde1z3js9hDf2K?I*L+3P#o89e#F(|#t`Qx0DCJwG{#_wu
zot-H0OBN-WWDXB+Vck#@W0|kBDj1*YJ2a_QcjXi{WUYRWkPQIXfhOLcB2;HfQ^nWV
zH>nLUV`gA&!D5XWimkoU&+3u9H;Id`wX=Yxd{0iNXpp(vCc39;S65NJ++$&9!8kqC
zZE7s3I*-RQ0D|lQRJBR5`U=Nv6Ry<>O=`6p+~~4Fh9dCkO?m;DQ@;q_r7*~4%-K8-
zYkNc*^q%Nf{NSu*WGwR;wq|UNcs3nlFmk*3`SU6sEy89;nW8R1^XEiD2YECR=Dw!<
zngayYn>OY)X5U6~klza2UBdBeNeeT1sSxk;d@qaWsMjyJ0}EFMmAYJ*LJ_vWh_Cc_
zBvN_Y4}|jU3e0n5j|06_{P1H}SG#oIZxfb3^V^l=wVq4xMyOFi`{)Mvhi?RUTk$KN
zG0tY#z;M-cxe#C4kJ-f;?(Ysa+9vTFEW$xD&Gp?4dmoe<n%v0|B0;fITal~mG`ijQ
z;Vxy+`I^}E`qf?EJ|+P~AKGrb>G2<x>~Fa27xzkyV~&n&m9`WOO~f9F);0}xCRe_@
ztZ(CZ`qxy(A0LvtHHJyPA^&tEPIS@uxr_K`Z8;7%v-5+TBsuX>>&NW04r86QNt21v
zcbaP2JoqLO5D_C%wu)u=IkQNam{AKRhV=yV&&kB1<~MMQw_)kuMbd_U|9+wo-HZ^L
zlWGMw43WAcn;k&t#B(f$Dd2in(R&TUJ&fLbEsnIaY>@z{RZm)9ZkbfqhXHPvw+acX
z+Q-T$_7cE8vn5st&7T5kL>r%lgS4{!XvK5(uJ`!FO>9FQGC1EGp$eikyo-Inm3u&&
z``H6O-(Y)Qk8FE&|1iBz0~NO*lwltQno&Ehe*x_Khkp{vh${IgPTb9mHC`IsxX|B4
z7AZ!1sR&Es>@Bpi+8;a!^UXchTr{G6leDn?vAAcX6u15eb_&d(a1-4tT4bL2hHbam
zdL~+Hz{vKOm=lpHYta42t%*ak=HP8@X+3jEO2<Gte#5eH5p1JLhU}kSb4Q=F3-Bz&
zQ2NduVdaQ0#3nc#iaE-f2E%Q6dKK1S!T(O1UMY!!8_J2`u0saCj~qOJf4Qa-rwD^)
zX2%dC5G6Ur;PbIcDM8-n<I%vzN}!o_C*YT)L=zmkiqGK&DRTi#n9IN?8}f*27G-g3
z=E%>eZ88JDIC*Uunf0!no$Hh1WcIdLMKZT0Z#~l+*PumkGsiC<Oq04yY=a#_9q{h@
zvaYYQQAic)LMa1A5hi#Arr!j9?dA4%=(R$VI^($SZSq2CS@>>O1Ddha-?pxS8VQd=
z1ws=cdrootkWJ^D9%r}!0%Qj&Qt+|J-|Uv&y<zVl=gBePbD2CsFVZ$w$*KMJg^1nE
z$y;9Drw$tuU9cAv5s`|s)r%Mb=)b&I6L_Xf@OG2?sz5WNoMn+(0jf3mg(i<ny-$aR
za4_4PJD#_+uEdzVMEna+riq8#>1KAdf#{El|MY9cKketY{Dq~uZX87^_88e>fXBhz
zbjNEBM7wVDzQ>7y+d-$9YCZ-7RcL7Icn(aG3k;kUaL#Ih;(GxcJ}6W%X3S?luo+ov
z!;J?<`yYyM$QCSopvGK$zrI0YZwx=&C%4jAt~aJRRl!jPj(IH)YAxn<%3QY)SAU78
zS?!8qS-!h0vgIs!!WFBWVJ=tOpP!Z+Hl&ieo3fb?97VGeQ?MlQgQUxvn<mhLLJ6e7
z=5r9m=)g#M6bZNd`PEiA4=dy&j6r;K;<g>AriaA{TRi7PzBnbYmwlCVbbEOr9{cCq
zt6spEPhDB5T#=?ly7#vZ%FdR&3quASWMjWl-d=ecs7Z?i1)qj8E5v3ce_GK-AK?fP
zpK3(wg&ayQBEubKrD)=q&J1c=q2+)J-GHx&w-Job=bF40+dIg6tKbfPuO~q(7WW+0
zn2Ckopt>NF7`8nZnurb$pWSu=*v{Ci%S_Z}L0q}8L0RcTo0$di#Bj2Oz9k>1(#1t_
z3n)<=ObJY6I<j`GgzDrYqv&vnB;%wEPqVo-W0fhOwE4aJPws$0priZ=x50A8YdqEr
zULmw!$BSV()~ye>HJZwBQdSbdw!Ivp-=l~*p21J2j58CFc@icUon(l({mE7NJ(K0#
z9Jh~)Wf-6cHTIJo!@0N_l)xQkRN&5-&a-oUvq@S+jrPJq4;FJ;e;B~}Y~(eNka<b#
zO&+$$h+)xa%;f`ifp`=pLV@{$t@hqwRMMlHb6nM=id|9O7g|_~u<nO8>umAj%_d0X
z3}6E|IPi55SpdJ4`&y7|AHf%iOMt<fnExCF{A1Kl^~Cu=P6h<IUf~@lbo{Ebwt-LT
z&-v-+-o2`zmgH9j4~a`xnhf?air_at?$5-55t)0Dc35biY{$iVawJ8>VrQ{rvn*ou
z2%8uTtOCUGxz456NV}ja8SBdQp7B0`DeKd0KrI#|s3wYAMt6vJ;=slm{34TCW}Q7;
zoLSan=I#$1XE<OnpetjlACOv*2Rii^T<bIOq*>S&Y^eib-XeQ9R1K7)q&J#p`T@A=
zr+LJ5E<4`O?MrwY)1D$->M)_+8z@<~h~)(74|<w929%_29lX5`DoidT61DKt<f)RN
z7%*eb1FMUBM<={(7y>?nL&tR{%GS7b>$gUeFLK07`I-s(Nvs>Be(ik|(ayw-jJXZk
zcml}?rgE*Ad;}#9h08jYRr8u7$mER3H<<A&h1y^GD90nmtw^6S>9;Bk8!Q-@Ki8k?
zl{GQa*$iQYdq1K}O5qJ0N&S%@rL4--);4P%tz%S+Z&FmeDN0>Kt2i%#LkM4YBET1m
zTZOpp@Qme|x!Wlvs*IC2H5ywW@g=Czr<IBfW;D+cL3*R@?BGL3;?fo7<w|7yMOUK+
zd4)qaNF$Jukg_d`u+A7G8X-=}>O*)wj_kKn-|{u7F7y~n#kPWxTO`+o>Vk6i??c{7
z6-hb0%cR6a5~iZ;Uk*^J35KRDdUj#pi@($Z#h{382dsd@UB7KIC(<Aw+;xDs+=z&r
zGf}?DagBE5TFIJcS;*^T`)MDjm873;=XeR6*p&>h>D~F5&2RcEiHstu!5kCA>nmvO
z{&i{;>c>{LZcklR^du$4Yv}Bx)l3ea;SYgHeE|e%{}`}xUh2@&Aug5l`M9UbobBa4
zv%X>`j!zh$mc#-=-7W&>8R}3hFVNox%B8QH+gjUfS;a?WuVVLqITARs4vg<V>OTS(
zxGT>rf@Nh(2S*I++0nb{$;K2Ob}zKl7s~R#HykOmFd)H7&yS8z{T3xd(~Ikw<=75y
zUZ$jpC+~;ZnuE&p++^V;Qi(ItyA^e^wxMn`d(dGUp&z*jiqKTI!^7*}z$??%N1I=e
zMJ_h+$o2rS@o%N2e){Hl3b^j!e3Z+RBANiZRP!kIkB(O=`>j={e)0o3vd?5SotdWD
zAw>I(bKi?;SA)=+@T&sU@q1CFDy)CO+Q@bmJF8SquGoZ~qExT~RY6NK_uSO!x7a3+
z^kxi3!zo7v4`NGCnL`hq`+4fAMgWqe9&>F=*M>yQE;~y8ZkeX@IU35s<#0^){!xkd
z_s8jpZ6;QcD~K@f4-NgtB$y+znjKOvM9?d~GGgMzgBmprQ<v`66;?RDpo4ij`gwZ!
zKB{-&ys3^{B^9C$TT4bYLz!wT!siieb}q`$pPX6XO)#Putj6o4#C+Cs1F$Z_Ws^|W
z3*qst1@H8~79BN112^z{S?@1zdKlr%7Z#rqW_Sb!s4&Wx&xxw;Q&O5%Z)I{~wPimg
zy$)I(DGvy?`mMzPVEk|kKJR3f(R;ggZaW9?CVe<mlmgjCM730k1Eh$;uZ})Q24N?B
zmdy*9P^dHICTu9RR#kUh9v0~g4;*OcU=F$LYq2tUb+Z0nQIwxO!q`6YbXX&A6lJ2w
z(Bmqc$pt;d*R@})bHfI_KNWfb#I2<QZzQEgewmeN&RnUFAEFFu{b+}Q>w<lQ2o1`p
zump@}S1~_I5Z6~WJKI=R1A-+RGlrlY+EwvD>4IP7Etu-QKQPKMZr9bhYZ}L=(9Si#
z&pE-Khcx1ieLNB&j8kWR0U!qUAXt6pnci-IY%d$7vY5E303mP_NK6Wp<%m5W1Z3@J
zNeytKUH}$Ifk1uX8~y#q;p4@OmPP!r1`go3lFs05v+Q?$uEB0;!v?Ew@fKB;^lb6U
zz$M2BNMNLAl6&R=_ji;+8y+1g>&ax6hb1Ni9mmY-z){b$*M={aI>S#6M5-<Y^K6=p
z6uGKl0+sVcThNEkd7&GzTFs_Kf+L&PZZiXRPH#Ys5V`&S`92<^fLsD@Xyvei0?QfC
zT05%ne2ww2#JLVIOaPEF72Q*H5b(!{d9=@N-aj3M`^$(NLOD4ux`4!Hq<!!0q7`!^
zr{wD`FAY1RO)-**Bk?Ob$&lqnpB{%ryfw8ilTvm_BkaXe1PP`_et>-|`uAsm>CzM1
z+<GsKBQy8Y9fm<H7DIkxqIExp#oYv)=I^T=c0^9CViZaugrd}$Cu8UNOhKlVTAeW8
z8EsL9eYwwK`ZAO*Na)ft)@hG(zbF_e>S_~~C=q>6p29e<Bm>Nu2ukl%V8hOSOPLj`
znbPVAP2XK?r4adGzz%2aKMEBa257VTUB)*wgmYGnuoLrJ#BRf0^pV9L-j-&fmDeH{
zOxQ!!J%X(FLCER&JeDk84_*hg^^`Wzhg{a%wof6LnMWswU#6nlfY#*xc)^!0uq*wY
zx1)qqLlF03Sv$+fHVn*$)hWx$O5a3KqcFR>_lC->biM978y@%vfliTCZed=krWb%e
zL8H)Fxs&{vPGUn<_e|xT;}D1JmcmczVUu88*NDuct7962Z`VJGYU_i46Uc@xtYg@o
zf4KXE2Lp=-4KRLop&4fsSi~_nu4lco&*4&WXmB8YZO$*vm?VZ7XMBL1OWWuc&_s#a
zIsF)FldvvCdi`??&RnzUgsE6Fvz_DDyP*@#J2#5>*)ux7ZX;L=k<<jtjLv`PrRROT
zg_f4@8pVD3h36*}Z+Sl<+oK}eR5qFREDz27hyW3O(;3+_v8<VU?FyW?kz3loFrb5@
z4<aM(J~HG(M)QnwFBLcdx$asEO=1_t4TW;|V-+DnGT0zZ`wo(8IyI+{e8$Dj1Nn7d
zUH2F}mI?1uxumJ52rN!}0yRbLK<3-{I0~#A$|J$c0S=ROOcf=1b8<vn&2Fh?cNFpY
zs&cm=D8w<#OaV;?y8?ZkeLlDOps=F^QMP7|Mi`2X*dgGAA0N+#v=K&kSjZ1-Nx&oR
z)o|Z>Ny3!PQCkgl0xwcg3i_KS#g?#52s939kn_L3b1C9=#<Ot0`8{8&B4|R6G@2$E
z`2%nX*z1TOXnFBk1uA4F+2;XP2h*B>kY&jwpqoHYzA}I;;jacw(vJr%z46#N?1%t2
z$^CGVz59m=soyPL-lE4xwzg$MUr>mwTi#>>)uW{Gh=IqJaYFRz*9(*;hA|480|Ld9
zpP+K$?lhoV9N>w7kjA280SOKzjTNd=yMpCfnsC$Z+=1{<tbySdqwYd+WTald>zXy^
z?QC0jq#*C2Z$h9?7o~`zd%-*<+@3_Zu4meJvn-3bK1yNuTqOl^1*aR~1Q23FwOL3S
z2m_mDlmy=`={ky)tZyjwY+%<ituJ&-j!XO2#=sm?6B~N0ZbyaZ4sJyw%3kOdTyTVc
z<2NJ>Db+Vd59+fl8Qr~+%!C!4^@HDtJy0e^>psl}d$^yx{Ti&iPx3ZENQVZGEII&1
z=)W~`UQtbL`5zB8B27_xl_DtRB279(gb*X42nYnYAcz4B5JE?#gE!QG^w6YBZ=oYi
zdQGUIh;%}iqJlbu|NsBqnTPq$%)?pxJneNJ&RV~{zvZ)SlK9(FbuIpyC4_2ZV86zO
zgFchlRPj6GGYyObMm#iWiULhTvZ$etou?dZ78zKzQxxJH0r~+5#A8pQ%UzIa0l!2D
zH_tvc9j)8j%vC)ljnDoKwy${KmglfG&d+bvSXnC16DcKL!~VvyvK=|#raMsYso19;
zp3Bw9wK4;FKHh`hRm@1St!(F^7*Qf4Y~&DE_EJgYlY+z-Rku<iqGdDQfkD_4^*UL^
zL4Q%%Jke6uP9*n_)ge?B$2_o{Zoda6uAz5m(t7-8LHBf=)DzU1R{#l42s8(*sCN?+
zx?QG=(+wo7cT1;SWjACPj_Ucn1)?_8Cn)?zA()XpXs+DR$ababXv5oQDqws7*L$-}
zp6M*B=+&onu?XNm%oD;*$T`{AuCNvPS0J7Sy7pqONryn4EWys%Ls0qQRJSsWaykS%
zq8eX!*kpSGr^*)k1yfA#&e$h7Dvol9o|(WKU3MuSRMWi$e!;Xe$pEiYz%33)PEAql
z*>kBvQJwv0U|JXiC}3|Btb9Ixp$c~kJr3V}I_t`FgRxrRvK7b#wH4;TK}96sZ#IO_
zQjJQWZYk2th>aG?n~s{+w=^^aN<(Z@-_8u(RR;(MZ4L8j+H@=-atP~H%!r(v&vqG-
zC^5^BBZWO504i$o{3DavhlPcS$F?GA&c1PS{)rE*C|2$HJMlsd;pW0IO1a)qpC-*%
z-W!tMsn<A3?b=39nKa5!2rAxgW*5B~C{_`%moq6<QRZrg_k9yxb<azdh3SjYj01<j
z6^acUK_fi>SsH~;u728%Rg3lG{ktwq#_FhgH(-Ik173a3xsiQWdi^0apUt-#B46za
z8qmPDj_B^T*D-Sun&w4N(N1>~Iy{k0-z=skMxI1zg?BfOz}0Gc03}xukG1YATYl-A
z`WwK``S*Cn!UC$*gg)LTey8_r`BwuW9!3*MPw6E}GC2a>nUsFkZq}(NRjgOVtlw-3
zrHvQ(C=)`A?Tj1pAHAIMK<VeKSN?iU%xp_TWp-{dW3WDF@AcSEdS}LQ+LC1%vx)Z5
zNGLK$cO~bEK23yZtB1#<tHb(Ba}Mv#-@;8pyLgychu^1%!6+VKLqbMCthTChtScIh
zpX6`8?-@1F8EO0B?1cfW3m?20EreUp&y3CK_Hc)VB_Sqo-%Gi}cl`Ne#Erf{vkVC3
z&-bJ{WZH~Cd<tSV_f}Modz@hy3bGA3lgs%$y$7=<i1{gp*|Lxv8faC(>kxy%0%w!w
zNZG!L3RkWt`)<Wtl<&z{Flf79^swaE0I25t$!wRj_XMy0;*diVs#%2`fvt@WCq#W#
zMqzZni^XjbIykBvZ>B&6;-~M}GcXo6M=?bxos?rPzW1U}#d0o3$*aDjSq<lg2amf*
z<wv?$USX{sCM#`y&~0P?ymm*+{Tj@1r4ga}KFg0HkYA0?L^SGfI&D)do5}D6BfXA5
z+9%c5%SnnuZo7%~U$8YtS9~{>R;Mz`vHpy6)_MWl3%Yg?PMgk@1Pw8o7&-|U`m5=O
z>l6uvTkOgX>BwhDzZ_c`S34%}y?O8}X<hZNCHT07?%~steB5<S$`3URz#+fPLY!BL
zoB`!8pDyx?w&Y*B@}D`_f2_uJj26=mr6mW|w^q%@1ld;OXi;PKF?s+<k>e>TKiy(c
z$3Cb}A_ubp6p90p#cy|Y3Bm3<)6Au~N^zOT8xdUYrF&5hxuHk)THmd$*E2>hYoT1-
ztC~v#5El&%56%Vy7<!sYu~%-J(Mp~^H8-5J3cl9j;AL<%74ArdI|0@^0ZQ@_GG_De
zew2qOTO7BuP1O!5GzOlK)LAsjtBlrd<DnesnV+|}Sg7k4QU3|eXc*-WsHlV7{X<e<
zl5aq_9GEP4|LFM-Qgfj3HQW()j%RYN^}$HbczJpGlNcJKwb_RDRqXax!*yEi*ib_E
z9M<bI$1`6F&kb9h`}%K-`IB3@fZ_Z)X>`Jke|(P&sL2jz7We5>$x3K!;LFf2dmg!M
zYG!8HX&P_>_WAZRvrcgI+R&jy5ut!yz_t-h6sqIZLUrwPwbvfJHS2ekuWN_CzHpi2
zz_H3o9924;8`M#J-TrAGN9g$<bZ32-GU)%%dfput`ne3(5JHc`KQ2&HE3$cvKPVmB
z!kX7jR?9bngMB3NiXNN4OK73uz`T0;#hXnrCC_-+@1B0RpPm;5uZ%J2U9jO-92Ld!
z*l`pW9Lh(T9*mw7uUGX;S;&M6N<op&dE5bKm{+mOY8vo(#Lhayp^{PKYEaB8+!zf!
zmf3PRIc9H>?kWCU_@`Clj9q_ENAsEhi%N{2QU51|$b@9REpl~^W8ew%Vn)as?Zabg
zDx&(=GO`$ztSND?PFtbP-hjk&O@MkZ4*mJ~vJNe?2FUn*rAsm-3+gCYSbwKJQSSA9
z%tnq&P7Fk)L49(}P&=?ay))@ES;3nUGHJ`aF4mM{U&-&yc+qD3;7Sn)*oA6S*j(6G
zfA$3adHo+t%fXpsdDa>a*K)5cOyM%Im#8jh?sFS(gUOC6_WBY3w*oU>6EehhG|zG`
zG|c<X_ddW&J%Byr(|N%iD9vP9%kn}Q{{UW4Lkt&P(!3^AFN`Ln0>cYyCvP-{FE8pM
zS9FntWj3!M%0REUB}(5xJJpqZ*y8P~b2+14-l0<R-Z+@eT3)MekZDFWKw^UE6HcNN
zSYYkIr)0x(`1h*+2Jpsh;9^iQ+5n@ru@l-f=RyBSv`1B6-1Eo*;rHBm&ok!0LP3Or
zmV+Z`KB&-QnA&T+Yc)%O`OX_V?l-HpRrBlR;L>}f>`I?!7*kV3+WpP}c5XcGv~L>|
z9Kd4H;i}e)^%Q3Ntm;0<<7+(oCA~rbO}qf00ZjBRA1+N^YEqcKd&4H!_Ndk^yM0ed
z$!_;TP`((Q&FS>cFHdisLRvNrCDR9<fPj9<BC$g-cP8I5c&jzNxozb08i<^$pzy7R
zL<2gFBC7dG{jZQ+YM2Fmegh<Fo|Mv?W)*Hsfpb?HL3L!)jC+)1-1s>n99??a5<y@8
zrF`wR$xl|Q>@Ov<j?U^Or@aYl3d1gTy#_-}=UO7yK_Q?w8op)}VOgoEHf{fHa
z)Q^NF%brgD4%0!qVN<zRRWzxYTgNiNl;N}{h2iC;-xFBBgGzUY>vHK`8$yd!diGO3
zX|7PU-8Md05xF3Y<ug}-J*NM-6uZvClL=QXhbZ>0m>w^<@WSTjh1svn*@?@2GO>Kd
z$+L0vL9(ricPdw{RAE{|0L_!I%V%bjy`nUVA_a0g=8vS+II$;R9h8pfivqK?U-R-^
zU0r1c?G8Imy-Fy(S;d~gSU2Q`Z4^$_WMO{J5+E&p@2`6EEhiZXJ*Pdlw=VjH1Cx&p
zTI-Vr?#Gxsdy*|ier0QchOYKDJG$B$!mY~&ue|GuIx=snG%|2CYJ@+Ql}9QsTRD-=
zRCHZ)bS<t?m$MH(0WHd0+LZR6<W!YaYB;~AO$qZf^jr@)x`anAtsIxX!;B?_)gB->
ziO+)XZF%9>X)?+wAl2=-A_Gli%Z9N1Nr^>oglk<1Tt1laQSVAka**ZqI3z#_NwxOe
zW(D`kWX4dFhic24tDvx))LUSYmylnJyb%}hmS4hS1?S1pE1xeKQIWQec=4KhxDB3D
zl>B2^%w`T1wL#@-Y4ekHiUx3@m7FMUL&5L}D3ATCs~JND>-3JYj@~RTDwTvGD}O&e
zJx4^E_DIG%&ca|eme~&Oq0+nJ**Ah#M3uI8tr&%*wI${OG6)NSd5hcJ&?46|!AAVc
zgQ1-9N*&OqZyvrS_-N!@HT`=kgOQug6fbyN^V+z<s5fJ6obYH}VSv<~i!&Qt?<%97
zCe8IRvRRRhxnH0!tHp+Kbxk8?ah=Rjkh~{UNz>SSluhq(5l#Nb#QSgSoaUs3H`Avm
zFzv!1>UspKcc7f(XK*bdo5}rj)TsIJGQ<<{tg;@<HOt~@0_5o<>zD<@fS{<8LedsX
z;p-2prUo<J!TR!V6EvJ8iMIipzWo~-(H_?gfH(X<ZGUTlpgkFGvq4|qj9D7->ba_E
z^JEhO2z+^lsc?L&P)u8eWKIw%YeAcD?fB|vt=nC6iG)SJ(>D7#3HAipnV9fWMqM`D
z1af<SifrCqi(j4%YZ(Jy4Hpm7uSpcu=yYcS4J;j6q`?xrZI|i$O;{xyq*iqY_6RIY
zCV>p>A#vSDQIA2|(%fV9e(S#-|IjyVNLpT;MegQbmXgeW18kxnv!}ckql}AD+_qA8
zGs{fOx)8TsJA~ho7uP&uDLx+$)*O)?dnHGa?47G#@s9M7+Yu>zRTV}+2K>fQnk}U(
z5U0N+YwE!$Ygpab;y<`061~Tp$R$V7LB$v+JC~2Zmpv)w{_##Qo{cmx7eu0S0rODk
z#~Rkfb3Qd6)4ulH+phN3O5+)ZEztaJP3|FWg2X<9Ut&(YDlMPWm#vdBuK$?#{be4Q
zr=5}A&GP(ozb$HtJasFtTH3G`h+Y7y$)^3^v^c?M0W-X1wD>gVH+V89C`Q1!*0YzM
z5_hi3B2J^JlR0lC%?nG0Gz;UttjDYI8)7~WEH2M1IG)n8%!OtcLT?A!d#c7WgU0OC
z{9Fdh9pgw{Xpxhq4vn*7`Y4StV&3GAU>j8Fw;S-(J9wEDQ8f)^Kvzrs#O?`qG;Fc2
z6ssi|b}co!i|6^}ycdC2=z7${GyJpXZw4Xr=iqb$-Jq2Dl+He1slh14eo>&tgN2Ak
zH4QECQ`{+;%j(wBAEm$FRyt`_n8C{LT)LlWIKL9H60$XM5pjyalS<T!1E`%`3G%hI
zN8yt<+{_O*&EnO(Fz-C*`{lk)kcL}bJ`A_V@A%d~a|C>gjBZeIqot)4FKT>WSAE%~
zTrkqj!D@$AlT-A7m2x}H&a7_vqb70^FPGj!<@fSs5oK(I<i1?rBUmQY+HKXhdTXKn
zxZOw;{~fC0lGvwr4!NuY+a8eQR5QKVfrSM$miWf-&kLgXM3Hb%qK6XoOT@V`?dBX^
z?zyJkx@4D|b!u-u0PVJk)<$_tC~M<P_b<inSR+2~ar>zB%>@PFPzh-@Qn3=5LYTe9
z9{S&<bbMW~Kb->~b8~8L;OsNgeafJc*1bl)ECb|QLjnG_BPs@Ibx~;{X899(9ubUJ
zlmu2(e_p3Z&2?m%B_CPlzfpc!6*(w<@0&T?%X<kx3*>7#f}g;8A6HFeO#QXTh1&Qh
znWDnat0T)VC*Q?9+$2!ogtb&tO~=tW&F7WhyZlo*6G=We&;Dzg9lzGl(-M>$=3ZSj
z&_v1hXy2NGs^&n)e@Y=$WOumR9nK4bH=Nx1-i!U%CoSGEm+;vSpd7MmwV>tfNb^QV
z1<{SL!Ug?UR}wuGo`$3=>JlxRpcM;bwrw~3NMEK?%KY$oduew<C|UC|7-ZpXKu+?T
z9P)FTw*=Nd4a%3HsTb5pj`z}~x2qVUJ1%d+C<EcRiU_3V4IYBzDtVuUig%$j$5ek=
zLp4>jigfR|$P>y4a~|wBwbE2xuqf4`<9uXG61m;jqx5`3vAh#QjVd6Wl29}QWOv6~
z-qL~!*gTTS!xzPik3`PAdwhyQEfT<#PF3VsgiU&%8U44Q`YZ`ugAJ8CUm8Zx!(PWm
z_z=~z#a{Dsg$t`KIY~F^iUh5qa9rN|3$+};g!<CK@!i>B?p%jJG!LF4+4`c<Xp!15
z^@m|rc?m?(!m(*X?Ym_i2ozUz{_&Y!?CJH#%ZE}_D!&8!)Zw%m8d@VV>XulOg2c6!
z#`<W_kI8zkN!@&UheOb)HfF{+CD&xk@#Qc{0cgKelC)?dEAM%t<<hIdqujIafDO4;
zW%67%`;l(7$nJ+yYFNi2n_Hd+AZ&XsWXq(^nVWdC%2lL!UcVT086;tlB_XrQlj#M3
z?diXgajt$FcolS+Lg``>{tKkgubbDe!r{e)GW@R|#udrTjP0oRL83|9=fA;Z9j627
zQ{!hG)Q9Y-DZg1OZtD1|K6bd^r5FG?UlxFi$ZTM!4U&sw#x`G!5djn|&D*ok5_>hZ
zwF(?7?3msWW*tpYtzA9#x_NQ9-EStEMxfo0spd2>s=yFPTTQe7+<JX|J$4*BoWS4E
z3{In3Lr)W@ftdX_YoI<QUTw*Lk4gH4QTio3*2tfqr_+m(1||zLA(XUN<svT$tj!v;
zX!e38u!P#VUr{swN_v)8KHj*#wIvwNncZJ!v+nS6_f78rN7h5sR3_F+m-Gr7K+|oC
zhq~DN?B*rx%DgM)aFZ^|W5_Jz4fFCa0E+ngoTd?-+_f&wzHWM0IX;UnoUOqDPZhxX
z4D)n1>_dGRp(AN`ul$KOPRn&&;<JWC)Pk}7iV~0aa?`0n7Xt-fcv7r@5(q78`H}4G
zFf9rTLJHs`C7T|F+*CeR(FvH9e*jl0wuM%@SdgK|4b2%6Ru5GS-hD$=4pMX`1pA6^
z&4tltWHaCnp_)8j!bF_m_lYicSeF2QRFYmkHc!!kzjGM3vQ5l4J7-T^9!K-G%`EnJ
z^_4`Aw)%)pQ{ts%aSUgKG2jhHm(xGcNEi8Vf?XhEnE6x4-Ka~k3_7xzxdqG?c*%AA
zA)m*``mjug_(Ku>dDhwJf$o<(VeP<a!KMddRJenamMQOjr1R<iPQy|=yjvhIz2TBB
zb<{I8@I$W6&HoO(^Zx;Qu3p$Gc^9RcziiTfzJ>j(cZ%G7E>F3aE3kiS#{6l@{91pp
S3f|QI`gi^_CrIOOvwsJY%8-Kq

diff --git a/tools/python/images/clip_image012.jpg b/tools/python/images/clip_image012.jpg
deleted file mode 100644
index d6d1169e8ad6fa4bb7e21e764640087b4f0d3ef7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 17452
zcmeIZ1yr0%w<g@ULvYu|f_or9kYEi#gG+FC_h5klfd(221b0G!pp7NCTX1Nc#@!_l
z81kKazH{#V?#!L}|21pPnl;_4y54tn$$s{(+V#A(>wTDgSOGjylmp2D5D*XmK966(
z!#v<6022)j0}T}u104ek3lkfU{0SZ|E*=dDDFHbn9WxV<4#>dDDZ<0bCdAGF<dfkO
z5*3$}lw{_SQ<8n5C?X*#{<{$bEG#TM96ahLPpHM80-uWiFMl4|0Ql$tcf<fB1O@;i
zJ^~Ux!b2B;763p%d6f3I!2k9^Ktw`DK}AEyz{GkiQ1=9Yh=7EIh>V1Sf{grFI`Hv-
z05U!b0X?r2Dxtb58iOklUr0hOI#9Z*omgY+n33PiEffQjgp`b&f{B@h^(mWxpwKg6
zk>@XEWaZ=)K#H$5wX}6~_4LgxEUm0<Z0+1VJiWYqeEq`Sg-5)PjEaUNCMBn&eoRZx
z%P%M_DlRGgTwPOJR}XDyZ0hLj>h9_7>mL}On4FrPnVo~JuEEzgHn+BSc27>v&Mz*n
zu5W(+#)SYt`UBQKk^Ki;_>Z^{k&%&*(SGAXK=gi0NchMo^t`A9QtD`?u7nJHA?QTX
z3At767(jlFV`4M6F-#IhfmNoH-_ZVn?0*he=>I2V{{;3IE*JnC3E|OrNcaE=z@;5)
zUNGk9lmpJXoo2nfS}}6;kIyq)_W}3?;Yf%50PBz=Wl_;+bq#GRr{n^`mVU;vGiI0G
z3jXo@2f#ZE%Jl}T@cPAJH>tS9Z$bzc#5pu;l-CJ`XY{E+Y*Z;E@o;&GD1lG51=04~
z+6w*HjqCW6f%1g}0i){DY?yuhA39Qbr7pD^gXagnVr1l=5p@A5Ma?&kCYaOj37RrB
zFt}EA84SWf_;~3{He!ic`4X{CLeOHyF&SruWghKURH2lIXFsg#{ibZiKo)|L91O{f
z)@NcpI3YCciTi=#rF`|Q^FCH<cr_!{lBw!SoBWoG#`sBwhayK4<3)+mEv}`qTLvF5
zP&f|Nj^tTMk+Pe36Bew>gF0T?3wCFCIn<PQZIYO{Zp4Q-qU~tdQEZe)FXSXxG=`ES
zb>N9%uVhV1@Q7R--{%%B+j?ELT4y?r!!0IcRIy2q67FQSvexQ2#Dp)g8AvJ1V+*|Y
zrJmBUs-V*y;|5{YE&<2kvHAvmNe~?y5z%0j;9f%utAhF&4rf7@DrjmzKf!Q3?s)l+
zh;18usXKAuZ!feu0uw4o_TMA-Z?XlWx?lOA&6G1PQKsy!R&ZHBZQrcHyt@Ld==;d8
zc6PKaX2!);_m{2XJ8C$aM-#%6Bc$P_RNz2mr5BC%*6w27iR}bE17&9$ACoyl8&+0m
z4S(=csG^A)qVmf#a#1rzrHx6aVa1?`U98gxmpZpRD>0OJ$0n_kN1h>2lp`a-ZJ>*H
zZ{Grc+ubZ(`-JnNOO<MD<S_b#0lKVBhm^JGKh-EWEJR)yt<;yiO75a6`TXo4E07f`
z*(j9lFs<)cUc=)R!4&hAtRH}VpKO&Ix=UHf-<M!<x{#kN(&<esh^L16du;xh<Zg>n
z629-`pyWlf0{%M*F{|7H22ORg0pk}c=SKDs>!?jsI&slwVK{ce8f=KCzPc6a3gLa1
zW7m%cY)Y)+kItFf0(o<FvLyB1$mwvQBEaNS$=6@&w4WklI?b4K6v<$8Hd^sQ@6ddI
zuK4MIJlUn3P8r{tv-WAUOWmm|_Rw(DKbdcdSFC)lpuro#wcYMjc@@PHD(Wsd@YP4#
z$Ft!G`8ZrV>Z-(<e?5l&O+$O{o0b{XJL7QE?lmJ=!`GDIe8)XPe1<9=i{;q-41^`9
z#E0V|&`{+&3v7w;8FPVQHsm#Fy>z~y#ih3`n)Ng^4OLV5O~bXH8KU{Nd^~4Asi3I@
z?kb2G*mLFE+8htk!dluO^+FLktVCH|2f{_&Yl$v-N`WtI8FW#)3Z6%3Ep&$5$!{=2
zC{d3W#&tWbX2R)7D?Oh^ntmkZ`ixfvlu)cGvQ4F63h(~b61pKa?eEHNQsSgw9;eY;
ztimR6f@NrE$dp_R1BN3-5VG3xg|umjouZdfI5m=#Z+^?r0Abr;B!;AlC6V9LFaJ1^
zByu=c+QUkCmWDDQ{B%9Dk8=vFE9VQ}+ZG`<5~CY4Fz9u5XM(1>Q5A3zd!d@~_8L-r
zU3!{eiM&JFgLp5uV!ph&Md@{P=MM5;yLNoDV_ui;1?@9^vY-_jVbW$zHW2eOU94?6
zR;*#_8UF#<LNDVl+Y92_wM~>{4H=v}K6*J?HD6**0Fk=vza(OufuYM9&YI6EnvshU
z()GfO-kU{5{KWE>8eMKgo-Za5MhtxXel7_Y5$OMFB5yeo{O~m+iLJYTD;G|Mt&aAd
z+wst$J$-3yd=gb*eW4+dv1Y|ZRm{3jmxd{b_aHXx*EdVu@P-5h#?pYJZ61Wt2f&-G
z00prRqPe#8wT%`r{olNSHIDPp()PW$>ENe}Hl(LYUIWLhEnR$()%@fRHT9}X^YMTS
zL8}J<sO5R|8+|NKp}v6lM%)L$Qv_U<AC2OE$83}OFv`7V`^Z-eFbU!Q7QX#{D)?d6
za|$p{z@WQ1<y=3-QqeloWOZiYcT1{}keQiiKt9WR=eNVA4iJMudqk&n@(&0ca|?P^
zR}pS5KEndnPNy?ksT%b@78wE%dZw^GU2}8y*nDutU0#LZbz)y#)&WNWWMoMj40;zW
ztbxFW_r{I)KtpPcDBIw<4>TTU=mYcWRT84xH_7&uA1LJ--4~Py08jZdpxbQUjV|xK
zmdcrof4L<$F$etgR3;R-+2@r|H|nuf{Ix8-sHn-1x9@n6)t5wK5lrfe#F(`(<vEj3
zSM~`fvdn>>F_s7C>ZJvXhRB}elm1+#7x|VhJLdRM%wY~*;)F;;2ejLa^YgXBaRIA|
zBc}>IPSdksW40+F`gRsq+9i=+Pw7vvb)NTEh2Y1_Rr^M$VWAqeeZBw^Gl!SsIL#F+
zKm?FYFv6gB=y*+mu>d=DSt7unNlU3Vbu?4~&WF6%fQYc}Kh;#`0cwE8`G2n8KM)>4
z0m#<-RvgT>jTKmaATjZa@%r4^*~c{=B(}k2+0fHIHCZ8nJr!I%zx(7M4ffQnkZKDV
zd!3?EBHCk{iw2dyHu`{u0L^hEpCc!vA1Pg>O)IWLniZ$|O?LG_6~>RQXCg5Gbcee}
z7=Qa^x8NQ9mR!o2F~43<O@Zqd6CQ%FjC$W5pB4Yj%4`DL`9z0`zJVA^-Pt_n+vZPX
zhU=aepC^2Z7Cn`0fTMHSBYX?b`Eu}MAmu;K+Kbv8Xh@%MV5j2CyJAO3acHl4Q-`BU
zu?Ua&-D|P?yp>bB4nL%8)v$W~WU(bjqirjsO}H1X9M^SumFnfHi%*xb(#h)bl5Tbc
zSN!DlF2%X_*iv~@Z4InzWSO3~45d%oDHu+rcGoswjPpkK&Cb`I83V_1h$k4tfVaVp
z(vc<QBt>`cTh6YE(`h3k{gNrSPRTn$cI2QSer}XnMUK^&NSADlz9^w^@vAJB(61b)
z`H(*HJ=MAs;>?ZncsWLVMhq7&a#uojYg*o)YUoid!g<FnIy6S5X-;CnX}@+Wet26K
z1K`y1`kKc(_v7H7_Vp0U1V-n@VDc2c><DkG^m}^Kl6^3;zhI-1_D6}~O}<ccvi3KU
zdrPVMCsg`Q+d0}Ps}@mI4rWo`f^R;$;Nse#zBWd!)mF7d&FjZ5r5OAu#i3q}E&~5?
zamQxU!kltySj#U{^qOAY1_?xSb{5dJav}b`vGnLp%hHT1)#7y6;dKCreBmXJVVruR
zc5D59L=HzmLViY%1)?k)I#rfxkqv@JAg6~tN_B+v^hx4+DOKJBz$5F|P1!}b=NdoC
z!gtN5OqtC+i9?KA+%d#t?MOhetIzOmca+F)kT9Rm(<Y!RD32N~_<2~5Prl1@efD%`
zqJ_6V8EaxI97#Z2sl)Wrx$Q95ev!Tr@m+ss0?H2!aq(rQ0>jQ!-S>{Wg;78<tafUD
zHPj2@Po9;t`oehZ!gHJaGU3w#BhPrL5Zbq9Cn=noxSi{eQyq8=ROn)wlKH3*6<ES%
za_(ypYGmDk-H=GQceNVV55bQFZZ|H$613b)XfY=$&V&n_HUuL=&2@S-8v)})Z03g<
zZYpwoXx{)HN$+#NPzDiv`t&hN)WpH+<9sjk10bWpXxW>r7uj=9pF$<?XL3t5$a!7N
zT}&1$;<lK)F9$VNm;4=_v;~(McV@q7KlSqo;M95`qw%+|T4tXJhx+Lw$;Di!6K_RU
z%*7xix`#9?jJ6OdWb?gY9VMkwF?wYDyk0k8Z0Kk*AOdZ(7g*?dIzGNQv*+g7XcuTc
zn9KksS-z~RcUcKWAYZ_H`?RU3?pLNL3uGLn9^xMCSFzx=Gx}x<W!!kg6e1kcF*VdB
zJZt6fgc!|)&-cZ$e%Tt1z@m||n|TTd<y{9)@P_JOIJ{Iu%=>e(N@9{`OUGu*^ADg>
z!7_$b6GA7$trQ?xZ=HHv7uV^HBWfFVcV@z6xI9eM|7-_jO&F^@l(5)!grRGpdE-Un
z(wiYHj_?4W7^<7F^3Y8upC*Udq-{uB(0>p*o1i9g*P+m?+dTaxGr#Rzl+RlQ_GJLG
z*m@BqTn02(C1hQ15yh2r=)8SxFrw9ac-b-kF|LI-#>92R?o0f_(hi7pj?`a8H^UJV
z|5TVUBOXP|8X|5d+8uGgVs*O2T`ejH6nE*2UBumo$7sqWQQrq0Bks>k)e)K^MoNo!
zy+^N@RTHhD89)mvF4wL77I8tI8&FMM@%{+thd(>zXtt=5>^Iikqz43sKmtE|)k7oX
zzmaS^1$--<lDnjiu*Z5iSMdNi?zVTUrTCKSf?*QPq&<S2h!Bb^0`ONC`ci)kyNN?c
zd41;SUdwvh?bE*xxL~L$|H8EbGIb9?cC8qtPkL&`H#_=9S>N72jpKYe&C(l8H-Q9Y
zst31Ni;SvVw3JS0>^NT|D!+Tf0M=uWjkGcdL)<v&+f}9V?yV%-(^#~ZTUH~<LuJ8%
zs62B)Z{Jzi66e*c@Asx&VAvE1C6CMKYfdt`Kk&1r=HH3_;g8`g$j_5J&YF;HFld<K
zU`<X9KuUKjR^*#kdeJ&_QBu)<F;>5`8pHRJ<fH-4C`wPOnvw@SVK5x|(G#gVWUgTd
z;Nc&AyFBn2=f#@fq+`B(x)dss1FT{g_#HtQErPsTCS9zQX{nEiYWSIOXQ~D&?=s`b
zd6{O3-aV}w4OFO-tHT!6J_I{qe<*~EPvm~PZ;K_m<Gp-slM-36s8d@)JY$;xN~GD?
zU2BIVz@uH<Uh%L;e&W0;3L#oSHKpvz0LUR<C!rd#ru^n!E^CEB%%*S+z=PFCPR-{R
z-3!SE26{dKxF-eqyOV3v%MaEHARv^Ess`ysK6x?}P%owrym8oJ8&++1xn$SVCy7uk
zhtVs%Rpc{lzNEX5zxXY)h8eqa2!O~}A}5*=AEZA$&Iy}4G@l(LNY)LZLV)JLX9RX%
z7nLrFhF?4_es?g$*DWy00HQ2LLQzt%z{WGffbXcoeDv1_mW1i2D4plDVb(mchX`i6
zRI8u`e~R?^Hx0fFkj-hq+N_Q8e4llWf%fRwwvhJudEw#by~cqmYnMwfB0Ywia+tiR
z^{ZsgwiPF2?}I(VKuoeM3V(x7Q5;+*!HUt5J@WwxfDqJ~#ck2(n!@ccS$@u>;dO=J
zNDFdAm80g$;hw;oBhB0s>bc`}zoZmbfh%kh*wRxFS63DWogd+~c9?y=eSWO`&}&Lb
zYt4mcJV=ZPl0+k6o|i`l_`bCB{Mp<Czj|MJjCh;J&W%FtESJO0T0*b+(bt<c0*Wj!
z6@%SF`TIr%!n$;_Z46MW!#y#`18UDyQ|prIK|MY&skXgni?^J%dABlN!3G^bQrUu=
zgv|O?Igxg9apAl+XBez7v#Ar`t?btoM-cz8vf<c?MM*;6rG5Q@yA^Uq<I)o}cw7q}
z`C2WfV6DA>Sl~vxcx@b2WMzD}q(g;^DzaA&jy^lm?b}1L@<XS64N?0#X6ovVqyvuJ
z9v(h~oea&N%neN`Y1E5d^)^iHTG2+ceQ36QRA-WuL^^!mIZ`cNT`J<NvCko}EHA7|
zzI|c*yR+mLb=TYN{OaE;k$jV}Kse^RzzHyDL{9Q52R9|U9eM&ZT$Jf|z)6}AY|%iw
z84TyVa*d(FpD8z*@O)Z_g|T_dohqQwPYSU>V^Vo>rDB657rmL-<@d=@4mE)stcQ&m
zx)abHPuL73g8?3CcSU~VNkEQ~VDbkedE^Mb{ySkdTsJwlu8=rIz^+ZnYWyH(v)l;@
zIqP-b&y@rXH3YSVZpP^vS?nTmYQ-<X7P-K%IJ@+Z-Jv3~*qu2n<N^rc&a0)4<vF^C
zt4bey17?d1+y{x-y4&4!<Nemvomb!2wbZ4H*7_X8bJz-qUQ$8!3wRJdiMzNyvL<g;
z(K*iY=B21EObs(r7E`Z5a}j_^j3+}z0EjnCwaLJy<wGe6Lxl0t5r<~Dp-Fg;syM$S
zPV@1?SwDYZ5g(UgFRMb!SF^>&now^MByxDD-AHb}p{ftp!B&*TJ3`DGDFd%@fHhUp
zmT)(|{p|PFhz$mOeHlwzvqTp){$wXq%9xXM3yOozWnF_|M*_hG|8&8mG=>``ix_h%
z^9nZ`M4{uM5RY^4t_0Tr5Athi3%Nv*Gv0=PMZYcbqSo*w$~kU_Lm(-_dJ4l>!2E*A
zVZcU`m!TI;67&sYLqw0kb-Ag%!j{csOP5$1k8!*?M8!q?eff*|t|EAsW05Z}fO;az
zo#8;uo49vTq}T8VYD%u@uUrexCFx>65tHZ1Cd(CUSl^hHhNaoxH>$-GoUp!g(t*h-
zSts<vX=5dHCbM5b!J75_sLA-?<6I;j!`q7;tstn8=gt>Puy9_Cr4q?9_h7?5rHktX
z`6y~I(<k}#-OX{OPjw#k>~vV@+O~imZ|ji$M;03m=~S|}zd2$_vrO00y?@jz5rLeX
ziYiO?hSqCiTfy#{({WaE;(S|Z*<kWwIHhaCwu;}xIgXX`o2L{O5#)sA;d;=bpcXQ`
ziD<y=-!~>cL)8_}UNX{C4lc&`Qp@Up`DS;oB%CB0tok~`^+1`uneZuVS+yz7P^y|(
z6a}MYR3}75PMnQC*WkmuPXWy-zAQ*U%1p29PSld^rh?O<grT9;vwkYvK)cTCltr#C
zW9vqnn9T(hB8M{oBe)TcY)x&G<gnjCsG%ccyJ}!FDU(WyIG4gJAVQXug8&ll+vbn4
zCW>991o2YMkP_(U4OF!$V1`PaVTUBn#AH3C?7noJ(_2)=K9Tj2Pi?hvECvWIY*9sn
zkGc~+JYB-_kDRyYz_666T92UuH{KJ*8ERNEOunUzVhB+l`fh=1#H1->qDZ{oAdGfa
z{Oo<cb6qj40+}>g<|LrOioLepHC_P;oK&xWu0!IjF^oM1vL^FQn6yUmM&$929{qM=
zN^(N!Zy&kg9Wuof;{<t1#ifZfvp4?!TUcRx3nx?ELG;dyy8+DLORiTkGhMr+7P7}m
zmY-KSyAk5_h!)S`>D!U|1FYbIcK1*>{w&okt)NwAZeM$6+-#Ro#%ftMNN%o$(X80Y
zI{uHVB8d;?5aBmH18W;^HaH8!SW>n`lh%K2^fCljIo^po&GdMt$iy^!Y-_;SM1j#3
zxuFX0w3S#Lce#s_d~cqokJ>b>JbEK$8r4e<<BvfyNK!)+rwwy}lZrZV@$(VkoNl}g
z;-P{kHa89{qlK57PAT6Ga#Zt+61x--te}pP1Hq^+>`u{u{_M{B8l$wAVlItxDT@FM
zdetUXax!?aA!e|T_AI@2LuSiYl35796Ot39&lWOn<BA#7CI|c3ab0sUDgT#hG=nJA
z#fh?vuVStf^}MfNuI|}5%M2Wv>ey<`!UuIMVFpEh)-muz{Yf=c*{%1EhnDU{)dunc
z(&TIJSoPGbk&3BRfz)=x6>;`<R$ZoDHNLYh=yGjTel93)KBs3D+ve|_E-foEgwjKL
zOV0t`EMJSs8eiy^)uTNvQ(BG1WKovagN;?p`y_{nH{dilV)A#guKz?(TJ5SjF-p2h
zk-H}wY|$|6!`I&z!iq5uX+t9Gwc88;t#B&!@C{e!&vqAqnA=hyCLo?8V7yEDsH&51
zrSh0ZiGVWAjA}g@c!?+N{^U@#QABWO&Bl~<72eNf7Z>PZbh&&KsGy>JL%ec)QTOe1
zn6i%^B(@HPoApDTR(t%sMnuZ$wbf_843e8MB^Nm^eQ~oJrWdZ{s7Zd7xM09*A3|W%
zH9pFUbDKl11NMQ7i@WPfA6^<syr#?p(Ivpal3w8m2-u*}awYTpZ=bqGJuL7$#!z^J
zUs$0@6mr(5F?zYlyNszaPLNx6B)!ZEqdIAWi_LiKzV;8&e*i%6rMT9?yFa%M5X`!m
z<Ui&R=ztZDx?iqg^-pNkfhmJORZ~a^YW^C3YnYHY`gLci9yreMtmt$-s+f@nj$VL`
zNIFA3Ec@BU)eaIx^_edQRf{r?ZY42`4~=HgCR|@J&)NQM3UOwnhi!%I;C!Fj@x&`%
zMt)J8{47AX<FrvqA}6>|e0<TFRM&Pae|N=%SmZ`d)LlP$)Trlmi(F_l>le8{H^hil
z9J!V@cR|IdQj82Fv5nF>i4!yFjj{mq?$r;Jo@sP=1j=ryxFs$ahNeM_@IqDpf?pM;
z&tQkZWGXRP7a4hEDnyY!u`Pk7srBjS?xRR_o?BZ{1!(hFqi|T6u}_FYEi2c8G*^W=
z%`0a0x{M0f<{#cKLO%pg9e-=Fi=rML?+!mp^m*Ymy?`Oyx1Iqf-%%9fwoNkUY_|aG
zwx3CC<uXFV<=}CzCbji1hR1CnzS^6d(qetv>j#eEU-Kt`jh}h1zi-qk-7GE9LS=#F
zEs~klT$`AxK`RcwSp5(M2|E**c84#U6DeBUe3>G~w{tbO$hV(clb6S>-La=aPh^Lh
z*7W(>H~70gdGSbyNnJUQEVVf9+LzpOcYKVoIEFo?uXNI9O4x{yNJ7u7_Qu9lSa8i)
z%d}h#e5Tw`m(_!hoO1H<iViSP;6^zUk2hUqbrQI*eFf>^WImOd)07`VVT3i`YyrQv
zmvOG)C@PDygna458nfwMb!{L42raf%zKMWP3XpbOqGFLTYiCi?Eg8D4l$zhkm_tw{
zf2OC*ch-}@0#c-j?Y+(r*%5B*-hM9rx;`aR*SsxQJ2qfYH`xXv%6SsXdo_r?<pK=J
za^JV>iSsWpTC&4m{8kE{c4m@M=SvaQss@?=+E;JDHpcGB0bLG;`Fttl0x#|oD~80F
zld(WWas$|{$z#nSmSN~jALyc@sY=abw!~ehOO5RH7f8z~I*X5DyMnCvzVATI?VrxK
zQEgSeRA0W%yIk7cMb8ZjY+?=Kt!F+c{8%Q$AD4tHZi7WZdmIf*vasE<l&W8_2+EIb
z|Ncf?axd#+SxS>&?USaVyiE{w7RXXfFD(LN#kH48V;g!@YU{F~SoI|)-vulQ7rvk!
zs2sDmA^lz?@VMM)3vV<xrlS+5EU^IbEfp_%K!AAu46VIDA=uKu()@rP!6LC*Q>Yv&
z|3#UH8Y;oH#-yCusHlTleOJOK7c!ptUNbbqV?bdLmO-_vyGwN+wG)$3bFq$yi4VNk
z!3@fpgIyvf;p`kx)61Bpp<oGKj*X(HRKn$-5VNLAuX8lvDBn`DrY}s`Sbz&#iR|<e
zKRWVmV?44$*iZHyxg0niKSQ$VPE8IK?#CDBsaeET-$OUJ@ToTwhj7&_pSy@R{IY+n
z0mkwvH`O(XHIIF}Thd0%MD&|<wqJhI%Q>Tw;8k>f8DSV>L93tsalZb=kn=%vQQ$Tb
z4J{4_iA!55q|89r-<Lpmvdcwkg$q|oWW*+{lNa`;)79Qn;H{=TK^fZ%A_5f*a+D~M
zUj1UUNasVATSmDNJ;sJpvnEaFr(eUfHx?6P%)`KerGsnijSLgav5p^%8e(9<J#zGs
zUavDy46PHcf1rG7#rajC?d!}G->B)h+qxTu%F2W4Cb_;9ovk)j(DG@-pr1H@Q1Ts4
zRex=X6RHZI&j?$?Oab)!q{h*J(B)ThC>tM7JJ5;p$m`}k)f$BJ%`firdH;eC{SAH&
zQ}S2Vwqhc}Vlj?;OTjct6teatp}V@uNiQ@_82E1M$*6dH^+5rAc{=H^U4;eF!6$V1
zRA8Rb36|2840kQ309n6!S|=#UcY~3!dFg>3rFcpktgv=r7GW{7Z2L7(g_wc^X8U@Y
zMI@lS!9mTv^{`5$XRPmzbphwoJW*>c$r=9vc5K3AvWfgI8)YWTfp?S`^fggTN;Ewb
z{Tk>ZJR->w*8R+&wPB*%Jbj6E9aU4FtILERWd*l5{maj9lN9GEGx(7Qsw5A9Ml);C
zXHRIo5Jr4}bpX9U+L5feLvAi+@UlIJBNU~r%}sK|A6{gqB+jG1IOxgQ1tsjuebygR
zhrq}Rz+IohJNQ&L4WW#DJ8a7;g#kf)1=eBmj_8Q@X{Y|IjHcPxdS*UJY}zbIUVfJ%
z9lVv1BKn?Mjupu&nl%UU&s6pX*6S8F#8%+J{AEhCPN%tEN3C;fCFU|$6r*4&jXL5J
zap^#V1A!!UjUMFMc5`~%MO#!LV}V#t_sFe)Ht}(nczyjBimd<!ZlNQiF@0d)Kno&(
zrO;YwZOCY$*SXkmln94`9N&O4-ZH{9SQsd>2a7noK<{R<(Err&(_;{^T9JfI8$crc
zj3Hi=)(}6vviMp9-Eq-5bJLqYAn#=ratsJ2N=5b7V=$XcmU6$gHf6i&h5D-mIxUpQ
zHkR17K--<Ql$3qBrhqc?G{>vHRfU9nT!c6I6?lb6oQvB$O2lKqE$*Y_uZ~1PvuaM%
zZK!4Q*DAvv*DH;-h4GA%eU=WDsRQBSl4$Agl}}_QiatW8qmWZen}gDBTec8169#D-
z?mZu)DP0m~jg4A$wsbXfmub&}Uwgm8TZ3m!BJf@N4Fe;AD%J8pr7aBHODi%n)quG5
zr}>@Q7^JBLgA|7#)P8jOnaA*Vi+|Ry-Q=z4G=T}h@}iw|_Egz~<HmO@TYK7dINCp_
z2UQ@V3i|eG@Muk2AIU`**I|Iem|-9^+UX#+7v@;l&;VHrVk3((sp^^cfi;e*d{cGR
zC-iB;@<=GGPt)f-V4!V@B;qj>zhdnzz0AQ4jozH;e%%Nb9JeCu8~I$(O%$$tgOWtJ
zGf^%WpySf9yXx?4uGbOyB-zaPDs5y1p^a?)@$q)2<V&W!M)o~^sV9#3!Bym|&s{fB
zZKKGZUv3xQu5l{Rp8;#3VA6(Up!TM;<cO+@D57r;ZhG`4bMq8DTTKWmcPMI7m2m0F
zDEdU@hlpc|J9vGXxfJdw*R#+KfcU@jl`(a!V!RcK*TZ9NVBrKWe%|TOVj3ha5y$N`
znEB#?CGq0&6@NgLbVfhFm*$&-Q+D=OKPm;4`aFXOgsfs?QkwkY&FV&_gIZR+#jrDU
zL~LD4X6>vErt@#*@`X(je384c8Tc7)ROb`I7VhhsntDpO=_b;5nbY@W_FNcfo3JYs
z;kmaN*3(VY3S%lnb%Dm)tvWM<UyjP2!`LH(i0v3Vv}T@!6IM$#iQV+@8E&m3bII{9
z)d^(2x!n9YT%16Gb)E*H`^d9zf+l{7Cvs#MPE8FAoX`{~SH6&_ZJmqZmF4gFGJ@e&
z5b8SXsxyC>4$%KIF8!b6?8ds~=h8wcK40IS^AWMY@{}q18EjhO$A~m_7Y;Q6CXQ&u
zw3!+wa!mrtf-$NTi$fK3e80Ns-Sob_FMf+6<7xllo2V>FO%G(3&MAX=r9rwEsR%@t
zgTpG|FY5CpFM_kwj2D1*c-I_6gCi;StSIz--%UQqT${s<<YnqF&`+z9Is0c<Eb`Vo
z=!g0Wn;jW7tWNS`c&QSx*79rRFK<cCMc(bouHUi@o+e?p5r`h3yI=eLXs6s+9VGf@
z5YNP<M<r(=`i$W~ySD_f?lB@=p}5>ddQ>S-*#E&Qg)EXMI9p8W`@MU!>xxGO^tu((
zjCC+yU#W1gxcVzJsmg>b3^EVKo16gta2UsX`VK<)e1X3iGRgo)&Tf0FH3GYi(}36y
zkB1V77Gp8eOX62)vIfN!KQeYdaK1;qP__-^>0!c#Z822K<TGiwnv0u3=|Q@Jb5$8v
zn597V!1kssv))ki$jacAi$6fPS4Fj4A6gwTt@QrglNkbE2-Oy6utYp@LW!}lzMZy<
z!QzJ~@bdFT6SC8rMc}jJTfZ!kFY7a&R-tI}ddmBVE9k2W(|g}un&28^E*r*m1Pf(%
zPLB_8F{f`Fkscb}eDvwfg9nRO4J-a+s_k&Dv+_=-r>u>}la!OEqOmJV4hkx@f#P8G
zUY1z-KtQ={jF>)5Zs^16;}@LzYX>LZ&aMPcA)CAoFWWki256rx35y4n<lutG$0;c-
zLKlhgACF9-y6`4~FT`E!*S^UMsWMF^H>60?u%~v$95TAR*uIgkko8t?@R@2y=0OAI
z<`2Cr$%{t)lfVC&6XSF@8O<v@a~*MMvCgaDqPMq~eip&NS9tmP3Et2mDYz(@|8-|d
zzg5b8;FxZtjlpoIY)D)ZID>%oLuZQDm94A8)f2uv`Far^%t|`ip{?m^$GSkpHHCw`
zCLH47-c*z`Nuuh=L0+sMF)>KXS+(1q&9?>POG_U9X?+B+0uhanftK*rcP6e!y#fe@
z5)lW*@y)}cB%Yj`B0=#&2Vvqw@%-ndl^zO}mcFmfzI=S4yQJ9DW$yDPtz9q!w%8q!
zTZ+}o>5PnOGX<X=lIkI(_4P$!$w+rww)294(}>se2}|_O&~>jISup2D_Q9FF>45rs
zsyc4FlYUn$Sm(Oj7znxnzkAAjyiwKyk!3s)C9!~Q;WxXC<kwJT1rA$|e%a-l?e(;E
z$+M68wD0u+70!nUSa9!HAuxzRQmvV(WIo}6dOrYO`fh{-xxZr~m0-W%jCVutMWbe2
zzzy9Tpd2(sUa3aa-W$;jl~7|Nul>lyjWSdz$`@88)>(%^wt!|HHk~_Dak%dQPan&p
z{T^%V2T`mE>1`Fv^4yD<-jJ>?EYP8~lq_ksNOjsLVF8M)gv99srtW*8{OueNL(n=x
z6^(|D3mLY&SFc}SRvJ#y_)s|qWH9)R4u>>2S{#kJ%0Pb-=Oig9n(T0`)?XtBVkE`l
zUBWpZg9*)fvkc;xZmJRfCZmUKNkpfiFpw-31HSu#x=5rhiSlRCYLVq8idZ$ZU_l9{
zdyHa#E_HtZ(AKveOKeEucao)VK6@6$ot#)jgqu$_nXu!P@TT9Jiry+Ql`eyDLU<?J
zFIsoZlU+};v&xc@Cg1iz_h(U^?~An0{SIakqSib`j0_sPZG`)Hr<=io4}h-%%hl};
z^^pk%5Hobe{ZYdw?!(HzQmtEq<|e$;nMZF;9C0psiAP7*nrE&2nW{nD5FORU^<2!)
z%Z8b4Z><U%XPgete<|lF9FV|JvB@6*0gR_tu3#Si>$MMahB%f#JSI;=VHL9<tGVEH
zAd#I6&z&7u#1D^Y$WxrpBvD#Ai80)ZeWSt$8Pn9o(G`cQU!Ta@MeHlpE+|S#k<kWt
znJ4zz`CWgvEmxlO8e>%JCQ6hCX7fTXRgFvcB%wlbAw`^BHNU_Td}_O0>!Gwq*dF9Z
zD)*fecX6!qp=mD%sWx8hjJS8Z_jZ#Cl}aQcf~cE@#fogB%*uGB^W#)9-z&n#xShGV
z1euy`Fs0ngBjwQPUGRo)enhBpMHLSa30B8~#}Nxkv3oiD#BV3#3iZS$UR{m}BzDm!
zN8WDphrd`zhspvp-pf}7%K(Gl?s;_#=~k?U^}on?E%EjI4p6@eh?ExM=k60h3WL?M
z=3u(o3h~SjB75~!DTY3i`cXoLbWxD4P}P|Aw5VPV!hymf-bhY;>}ORmP0|=x*6-D1
z_1=lVIxZl!rt~!Dbv*B{t;@STMUO?0o~UotGqWt{=lOY%(EB`tUiUJiDzA401e^<L
zjHW-ldtdiF{mC8TvB*U|BYIS6jA{_|5wg_gWq1ZX8@99YqplA(UpF}PrG^fRz2|$i
zRmOX0uUJ>A%^O#f57n7^+$q`reymi+_5iSg1y)tWuvuBcbRlxQVIe<UwPrehn(2RQ
zf|W$g@*;rUo||j|sjJ3oODas$j&f})ex%gKUD2^2#Pi@v0>9&xv;H_=ITNl^o}g;%
zzgX-QbK~ze&I^88pJ=i3b-rS8<eHq8f6MjG%}&>mzD`dpvcp;Q6Iv+%ZyiNdVcFvc
z;@IC#di;6f<B#0Z?rdRx**1bQHh$0z2ocXYLeafAn5YBTf+&;;dl@3+QFLimE<U^2
zqz6=yEzNmlW@3=RcpB(VLVAf~oVcAXC@;n<7jtzj<sfc(0-u6&g+~WRRDmbO%!RI`
z;ht7!mZT{lhBO-4#7&F_ViN!`VbWFcCti8%#@Bi&ClX}D`Ckf1hpu1=%y%|0pZO08
z-?b7*!O>e>5`=~u@1wW`t_yEnAqokX*@9CA(&pG<Zg<75$at?pG+LuP*+sZf)_M3j
z;$LE`BPt3+;|=ek3CyPStZn<bz4=gAC&hQrX#uBmL?iE!gbM~eF6Ud0ck-T>&FIeM
zBx6o|;meHthjgqxUhCL*OAUKQcqg=lfY*T(?Qj`;-GO{j)^qO62SApJoy4{L`Em^h
ztu?cNV}BZ1l5j7Y2$i2UYQ9ju^A{|saz&_<{G869D(BSq%ai*NjEkLUr?QgW`l}h2
zUquQU=<ys&>hTSo1l>fg5WpxIV(@dpAGPOAq!<W#lIo2XrkUUP_(#OImGM-gtnugB
zFz)ZC=`Kd8PDaJd@{Si5Qq!4Q5>>3@^A6py<Y&E{TJ@)L%MiWUGM^)uaRPCOfVHnQ
z@6EIPC!gldc5=AQcbFvbp2>78;mO@!!S#vMP-G??^P`spr3n`A`g-8)TX)I^dC>sY
z|0J<J%>nOXm}DY#vn6?oqXUsO>xfP{w9i0{d5l_21rlw%GsiiYFFSv_)Ct{7@e)v?
z%84rMgk{8aq~4;bkYYSH*iRtVB=S-Zzs(1%tTo>H@6Dx?g<lNVUZskfHl*65n#4Rl
zssgO2EUd{d3hSr1xg%2OT9PY2*}n>Ajg+AZJ?hm)3dS?G11tvN+3wH3V*({un2}}9
zt4N>SiEJTed*<mG4Kh|zViZV!H>(<0H;I3a1il_N|6V1OS+c9lI1VD1zxmp=U;gdN
zky7y-IiAP2_*dR(@9S}RV@pC99@px}TzSWL@k9Y_>((qW>OXKm9@H<)vBc0k@X^(i
z=M<#oLxtBML3lf7wWI6%5G}Tm7%B3VgjvE}kuMk*?DL<5Eubx#&PUXRwNS}iZO%4!
zP{Nf9U(ixpG0gs%<61CNmb%pyq4YC{(~<5zgty0Z5jSyYG;6`nqsmPif?o=l!ci1n
z@8ZhuqDXPYnF_8pLFQhqo7Ag{nG5e^xMtNEbUh;#n#Lcscik}a2{V;iBA+y8Db_D`
zb@#PiW0+9>4)Z!a7_*{OdpA1r$vr024UfuIk|HW^=VYPRrM{0TO1iXv1tFxjgvz=c
zWL*TC6^M0wA{W0g&zQK=e{i5%=^f}?VKD3|>~Ei$&`aI2@>xV$MYBS2>${(MbZTef
zBIDVd9lzL&!Uv!N6M>u*qiZ;&pNB*y!1+%{lF5$AO=f%6Q~eS*m(g*cNv}`pCzbYI
z;KcbCRn(xq+@c4-4EL@b<1(MZP9eqH0WIcFHA?=@K`gVz*NXcOfUOyg2S6KnP)FIq
zb*gGYzIU8vRD9+ajsohpWk%GRlLn5BclOV#Gw3z~-;=8)LV=H^TUW>uu0z|4z=n3G
zAN$#G65)*I)f4juV@uv?wS%1;VfKv^1%r6*(DaFMV-FSn#P%<gM?{Nr!zD7qPLP)Z
z_B1*k_LRa#Lv0sZhw;3JPt{7<zG{4jf6679`J;W3;^|udc$uF@>%rg8npNUAs$zaE
zH53Lq%R`(+&IfyviiWpT5=8^~p@mk<KHm>V2FxH*VpWRwnu9dJ1{q8h45vm17Z5x|
zn@dc#q}$xxZ`dhb-p91<RZ|Zt2=H}bsT2Ati`_t}bwyPiZ$0Rm^bUdR@LXx|+R1XF
zGA@PI!&=3yngn2R_f1k=d)aWDG3#mes}@@`ve=PKW}R)XlgFb05%*_rY#1WAYx0wL
zo7wtwoK{vy7W;uQ=F~favEGl*{rXE}rl*eMR?)64RklKKHWyS8xPX4(0|3E&^#PFZ
z09Y|T#%P7sLdb%wmR3Ww9{^Dv8KG`cH-Wyz#gXT;n-JZ7?r9M3x0DV^18V-$GtFh=
z@6qPAuAa6I8hUC=AaeA1#AKt!mCU8q4p+UOsgcgy=cm;m;xN1#UZ_bxzjrl6bNvaL
z%KPs&ibf9rfG2CvANr+6yJPYSI@>D^TET8zHT-t?CA~HJ5N?0;1ZE)Xh!18LUVqt;
zx}fKkIBW9DZuq{nWS4{@Zy(I6&ZzNuMG0RA!3U=~+eP&C#B1YgJ<@*XAbU*l4El5_
z$WR5`WZk|0>awDRJwtOntL0nE7=I>?cyDXaHA)b}#C;eI|6RcYV2$<EQDU1HE-|2b
z&+xd9W<M{=UdM_3LbgxlR>90dC!;h`<ThWpw`7a>EvMBzT&>C&D#;nXA1}~P+wYe9
z44c45WZ+npq0sDv``|6Qcjp7(4`}4yWs85IidjEWxc<j281;2n&=2#Tprq_OS=+st
ziQ4OC+w*e+kVS}08)*hUG+8Xe!}x9TguXzWHI@qR(^#cAxn6-lXTrPXP^^Di3Gc-u
zGRaTJ$shJFcnEq4tDOl@77&0EEQ-rx0R~7mUO29}g^WEge&7cYN_R(ckzNt;J>xEk
zQ_kI>iyiL!VB7DHlrq5p^Z=-24LV2A*UVJ>=s&#oc1c<zr|?+R?{QMGKL9#(9;JTK
zlzI;syeA6!`yDY<)>Z$}f`~WU|7Jn5?BnFQ2f%1W;R7I;?*R}_`ycKd$7Bnak8%BI
z`yS6WabKgd{N3UT0Xo)U_aFW+zd=&$l`=_cP@Bj<8*lv&jr}h*F8}wQ3_R3rz3-5+
z-k(a`VcjD{JpeF_vhRNG8eb!EU#tJSJMBfH-xvW~xp#4oP#<xb{W~T6i%R~z%>Ta>
z@;@*0YvZm<0%C6K65iL1?xytZ7qpyC|6bK-eCZWYRFw?2&+HD-ttcFV>;1Mijx^*e
zZPoQZ*_`i9LFvMtOsr!{?e)O6OY+8#uF?L@2O%Y7-72K8a>A>=M@);z3}gG*hX9`r
zY;EjIN`xDv8jiErQaJLRzGQpn2J?9DLXB|aTLF&d<a$(<`;UA12SCW95$mSUJ$y1f
z2HSqbRwWdJl4zE>iOuf`n#CRfy%mKM_W_m<0L+Dg!?#JlUD4mP?auz$<^xjauVf#M
zm#H|#ecrShif0<K<vTq29DZ-}kmzC_067D0`NC*%?t1zk@$H741pUY8P1hH&;uhy4
zZr4=i;B+*NL<j11#NS)MB8M%JCaRnVqiOW7L<X(F29;m)#_Rtss*w4K_&fD4t}r{r
zNW{+{$bFEC?Y#d!cnp6H58wK@jhgQ|vP;!kDXjTos0ZgfQ58SgOP@PVTofR{$`@(V
zqH)-x*FZ(5qYBHaJzzh&#)7Fq+x{`?ksp8mt7)S6XaKzbd&cMg);Yy{r)tXKOqJsI
zAwPz%?%g~5Z8_Ld3D+igX~fq$zL&^m=<s<7QsTe!JZLYPQr>+P!^&SoFQBUqvg(fj
z7RkYMrfT(=^|C?DwyzNkcd9MSi$(plE$f!kt3-`KxQVVF2sqx2YpNUx-_DX2+Ni8=
zCdx0+ngZF7d^!9=-}zdkz|A6|RF)VwbCc%90b{mTcPIycu5P6Bug5U@#*gCDw9TtK
zWEVQ)RY<b}ZD%OzW8KDEnj9YhX>4RmQl@ln1tziW?&D=b71R3QVy2c&({(&H!Hl6I
zjawa&+ZYg%%|4F88MUbHgel}^G0!NK^D^|2Gto2FS_e07dH?wB+R#Q{-q7BoJkF#P
zBORFr{2Q3^LYBrm>Bk9b_Y*I*knv~52iMIsf#!n=u6`=pQ&h|IKEr9x#j{~sCz?}T
zLH14iNTr1v^?4ax6qXDs?0}Z_MAf<}W>w;vQCEgGmNY|w8+u|NPEy%dp)bArF7hUh
zeg&@m<s<D|qiS!S0O#zm!^mEn(o=cdQ^f2wDH#fO+Cq!0jAH303i^#-9qeeg$emVv
zBD%cd>#p}{oyu@VnKZdInZin2uQ?HIY>GC0AuYezU~dAv9ZG}g02xoFKRmJ{s&k2d
z<0*s&<G?g#Wxfjwnb`_~9R#g&G-pN6@2!P%?@4}op3{$P&K37H$CcjLL1C63jO#Kh
z>03yZD88sG?wa4=T3}=liThscy}o`U54Qg#!eY=9tChYhk~a1DCKFj^d5?>@O>Of%
z!e2K0(jsde<X4|iIO)!psxH{v8_%0z;U7KqMQQnls<!gyWy8Je^di3F((0QhaU401
z1m<n|+YnhIt&Fv2ixP_PJI=L0ir!9bp&9*m_{voNMr5bO%*_-|-S^vfpP5KtwwXkq
zJ9v_qKQ*h$J65jWeq26Iv!OjFhTaVe-WjdV0q{!ad1#92FS)PeH{^cTDi!YEhTXQV
z#AV~%RX?uSsSf}Kv&s*5V%grW>u6tURR+9V{cs)7SSRZ8-Auu&9)nGa91d#||L|+<
zM!hPiNrH_4vitq$ntt^Cy<X6Y(jz0_m00CG-XsdLftmW@sg8%O9pQfMw9_uI7Jkv1
z?6_}y7&j36yZY<e+sfv9ba&I%6ys}x2f+F()oZ0Y(bk~L%!)g6FW-C3;vO1WaZe@l
z>*?v=vX;!YF488GWP4r(wc|QGI~{ynP>t`RBA2+1_hN(!?K(x;x3UL*_g91Js5tp`
zTh(~Oz@PLs|1{t1@eP2tn)-n=iw^)~k*~{x+;<p3r_7K1VTpRd;8t;lbMohX^7~Td
z)$*5Gfzpq=19Pg3_LRR;EY;dUK#|v@L-x>r^iKJ^xw2x7@@GDEk75v}kpGJM1Gd<`
zPk@A52+K)D8AAj%k`arV8{WbN(cQKW(mi3U0aDJbM$k#qk=H*}U*=qHCXp7{CovZU
z`9lFJ=$Y8?7a~(5BIHe~_s^7CWgY;zdpeJ1se2@s>@9yukiX{>w`uH#cWJzydXL<C
zLl~SaK)&izbn^YnNnfF!^7u+vJFQz27B}IwTSw2*IJGts75*1cW3@{@l9<2t{a@;o
zRlL|`S5#aI&LR)|Hzd5Rt9(Bsw%MI;(k1?RHTPHNUmRcT9X<d;U;Jf**hkWn67m3$
zG6)(P``h*O$X`AEi{buWX`Gha2ATFceg596zW%q0Ua}oXu-=vY%NTLKepQS26Z5fL
z+&{1C|5EDztMHGR^nbLL67LDAkt9xu>vwkT?f;mWjz-HK9_M)zhreCPaKxXkgeS54
z1vwZ<<bA0`z8lRW!J#3)mW|!@3D{aKI}gR=;oX@i7X0=PHzf3X(8v}q1}-1J{DkvQ
zUuKF&>|<-8V{pr}+e{q1&YdItct}cTwJxp5NP0-I6B~YnzpcXV{)ZF}zkSmO)uu;(
z_+E(e=8;o|aNo=HJif3!DEUYe%Ud^X{&sC|{r_tHrJ4WT+OU;a<vqUXdtO+Dw)BUG
zkd)oppbljH7Z1U^)`}C(CWysTK8gGOJhw$}wZ~+*1V%!Qu`8jt@Ru9=*U`p!A`!tf
zziB*IKSO~10PyjU(AR5uy;}Azfq9aX+Cl@2cfy@pe(=}Y$N7gNq?&c!_`?ljUsrZI
z5DDd4Nyh;E@diFUix%T|0Vmp#fGdQ)iueAHD^uPR(&E8sVJWjeCxTYFjt|$CVcN}+
z?IK&We_uD%ZLf^I@1AUS2640=Gd%$MK8AHKQR6h&Y3By__`^>NjY+S45im+&B`hpx
z#C89p+2PK+dr8q6;{h*Tly?EvXTR;da(hkNH>0jVgd2>h@2dP}eY%wVw4{u&ysTCG
zmTUEqije&?9QJ2e?7zw%PXs(h2x|Y6_lW-t!TvMP?!JC3#`#~h`1^mlql?E4=Kt<-
ig0pPm-I+0;h&9%VO|SqavHCzrkAI*|Ua-W&+<ybe0L)7O

diff --git a/tools/python/images/clip_image014.jpg b/tools/python/images/clip_image014.jpg
deleted file mode 100644
index 65d0d3963766e72e2c8593137a8d4085406c5c84..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 18658
zcmch<1z227vOhco2n2`V4l}qUxHAxfClK7-g1bW|K!5~yf(^lfy9Wpm++8xb1Rvb>
zo4mXC?z?;U?!9}T|MShEd6;whbXQeZS65g6iig>UWdNRnjJym02?+_{g}49@a{ws-
z1{&HEw8t1v(4SypVqoEt;o;)o;8GKl5Ridr85w}IKze2lq36u3{A~0<?w8#BB4QE}
z5{%Df6s5%#guoJFzZ*fq#KgqK#-+l;qY`5QvWWfLpNBR8J~|R5QVuc_J>U^O5;8v0
zLnnX+06;=TQ2U$UzkZM&A)}x^Mnivsfr+S4iwAgwgpB+M1sN3;1qD&tAMqT3f{#i-
z$0hmrsfr02y)z+qKx`H|@I_@ik?PnXh{x0=@CgPn2`L#l10xeN3o9=lzkr~Su+&Rw
z8Cf}b1vPaIO)YI5T{CkFORKlmHm+{&9-dy_K0zOXLqb1(3X6+RNK8upoRXTIlbiQ7
zzo4+Fs=B7OuD+r1+mDXUuI`@RzW(uv$*Jj?*`KhL)wT6s8=G6(J4eSSr)TFEmsi)n
z`9cC9|Anpp<m?~#!bkA+2n7Wh1?@LqNRK=b1sNX&m5%E%fustWiStu>?f`Vc7qMBD
z?N5L_s)t0TE@K$PAl?;*qu;Fk!P)<gvB3WmXaC9A-}r(7u#k}ugNKX{00Yh_eN|ae
z-w+z(Oa(mv&?9J0E(F$@9{?X@aIdb@jtP-kgY>OBanDjAH&qb6QtcYj)45&w@l{!I
zkA)J!U%JKmUaC}v=Q}%kjj^eo23eBGlv<iNxhzv^X;rd}{OLPmPJvBUPr_2CS4uev
z%;gr6vwH&)?j*|w?(rxbukJJNF9|ab-@_gNBc~65i?(tdGpA2=^}gGUD<#zg8RNGq
zgNmh#iiMJXgHSh8e878pq*2t<ud+o$WK8{aE|xq5hs6Xv^<pi>MF^rF&)h@u9{}G`
zY5vq?&TfN2>x=uWh?WO{y6e(m+zyS+)5jl>$in;aai5vM>+^M^{C&<>)!05aFZB2C
zl44UY1}K}r4W8Z7^0%Jcf$t7kAS+094}i?=2f#sqyy;>1J!yVmcdgY``i)n<Do<{}
zibO!GM=T(+>A=A-l#dch*PQCeFI{g*!EZ^_X`&=yBxykNpRN5C?|)Lt5Z+#XY3wCx
zyL2LS<@eg3W8G8FJ|}~;?s7$gBQ~gMQcd||Ir#-#DQS2G#ismGqJ*$LRk6Dtdeyul
zQS-Q9xWk4iNx{e~a>y-PU+Z_s!SVxO_3iZ{?zP0B*USR|CrWsARj=t2Q^vpm-Ta_$
zYE+ZarB*z|?*XvX{s8D4djMb@J04`-T!JrC$1F+Rvmrs;-VO{+wU;?z`CV)sv~m2b
zYdJXCP0#+@*B~&#cMpId+`U1C?aQ1<Z$pWI*$Qb`<DM8>>80lbfDSV=`vJgM2w5?F
zeRuKzfS^49o+=rI&Y-)*eap0#xK9iLlDsHodaW>XY$Zd2`Y}gCJ;BkD*WITsD1SE=
z<dax5T$x_x1w1O2x2Dt#grPp~EUfhVj)7~l23~p8gczBW)`JJY13)+WQljhuu({!Q
z^ZV;`Ior?idq_L9wYK;HApQU#bPMW8YPCpW@=ak#oR(?ymQQxM8>grfao*ikmClyJ
zGBHhsld=V_nl*--arzRk1x2wXBZdp(;QpQE1EBQ*aA)(6Jz6PVJpdk4PnX}`+^S5$
zw()dfBj`MG4m_KRZ2NJqB3$03V<cgVi&)dWg&EU@_P*rwfUm^~Kkd<7djR}P>ht%u
zA>ic>*c5_6NSJWY3Y=R}5bnTE+ZaaQh1zM$INrlVMtyoGB2zma$Gn%g8piyR1<x@C
z;F{4Q;M%uioA9qmPp;0p{XK5BTKE3<%-gw#i-}?%_>=kZ^rf*Z1qY8xBHmvU2o{#s
z_$o?T(uSM<>^2RSMUn(ii+SzpXWr6kK~7Et?tTk;R|R1_h?Z_|hY+Lt0JxsK%K&fQ
zlKSelo*W#>8bC}h?Cn9S=o|G3awS@{5hkIlPl}QZ$!n_O7zqrNVz|6ap$2_VDUWkz
z;%5ZQT<`JNTCc9s?teFZR`3r@7!D_&++&HA8eLsn=44ZoRt<ft)^8?WMydC9rIrAK
ze3ny<4@(Q8`1H~aN7LIL0B=RUkakZAZU+V8_-dm4OR{~1jYck?pVvY*msX>)c-aJ(
zy%IrlpvMc}UE^}&l>FsQT~#4wbM_XnOY1n-6A1YlO=!Phnex9RSS;h&Z&@h(A&Zw#
z@)|`Q`<L~WKMz%bTysy*oLCm*#DoxL>xY=#jo+0+8Q0Sv_ijzilMY!e{*@70eE%R?
z$PZM+oC5PemL352wbXwn4M*%B=VQ_g3pdTrHl%ozZ+fG+g6-UZw3~bkQK3Y^gB(w}
zLeLjXWd@#e&SivCH?B5j{{4i>`8!SuZ-gPg<&pS@JT}qBV}gQuY*zjUwaxo`wOwGR
zsXV<0-Ujni5&XbBZwZ^yT=`Z}riSRLa(vb3FBA12`W5CiJTY=*=+7<n@%7Fro~-#W
zdH%_Pb5$Zwhc+R*H^0CpVSCiE#v<CddL*v6O{Ly}>$D8~ceMYb+PwQiQ00Hsb*95T
z5_DkP&g~CRv~W0@-TO+AfS+r0J|Q?{4|+QP?oK7c!q+|KqroJAw7j70HrN$%7+`={
zr3RNSersi^?BDCr-?J<v%4f|vG;Zhw)ejvJO5s+T0e1?4GiVMYb^L4Qacfvp5aze~
z6MFdI{r!^Reac4T$iEtff^)al%tI5n4Wj?nE-Hdc2j+cEuX1<hd3u@IhT+2&0doQy
zl`N_A30L+R*XQhGx$y**#Jvo%W`Po?WoG|UEPqwbzf4pqgsSc0fb008Z=cZIz9t&^
zdtD>+W&PXqh+M^<Kc=(P&RGr?WVdOVJlb}Zc^QQtJs1iyn<TH=W@sK_S8Sm<`071U
zkQcWoa;Y{FnT#EStD7+Hv#0kO-Y}P}Bi^}tq3aI3zp~Mzf21EjsxJr8eJ1GpT-ZMg
za!Q<6c~ehwcEog268Q?V)N2TW8BC9E<T2`PGg^NHA|b*5Q6YyFK}gUMF|j!Y^jDxL
zs0-E{{MZq4tr<2akxLXpEBgY^*?=d5jSZNJSCrPoYAZbSH7LIpewCn?g_J5d0KcI1
zD0EwRL8bYFZ9s)7o_QW>N{BEZQ=jk|B6DIj#zM%n&G8Qu5KirLGyukiu!7cgiI3mM
zIGJGsPYbz{v=;LrcdYZBUQ;yI9=hTzubh~`qa6IZUylontTDZvpBTh;ehI~R{u-cL
z)vUPd%=)N#xp2=q={m7xXkxBs!#nnF`AvjIf^_5~`e2n&j5lAnXaRbKXJ62)TaT^~
zh7_S`qc$v@3{m#kR<sevHU<J|0}p^pwEG|xzR1pMNAyQ)l&L32{W(>^_cEv6?u})k
z%=xgYvlf%npB!K6Bh8^}gLYhPFQwUs4sL<eXZaXaKLYL#DW}P|F9cxSrS77ZT<iW(
z6OaqJ0fE&{$Pyo{Z0(qPeC<@3aMs4dDY-Gy;az_{L7}GfTtH(&J*wpXlP>L`&Qyc`
z{;~OGY5bJt-US}goen38(1sy%gG+JSIYl;WPT<SXSH4UZC>cZ5SZL12>dC@5`9&4+
zZ)^o~Lx%OdUSbVW<-W=L9P?aoi|4F3Sr4N30BECgYdx#9yBv_h=z%Y}qcq?(GT<Hs
z)H8V9bVU2qy02pM4_IZmJl_J%e;g*ll`;JiY~&iCE*qLp66nZHm7{Qj^W3#b%*U;y
zdO1bY%_?df4yp-Rv`DfxLs#*L%kr`SR`$%Kg3AYOuWzx9@V!D_bG`E5>f!w{ZTpqo
z>!tMaviks@aMW3>|5|=c(jolHu0+*SxvGEBE-E*m!KvuE(58@hqB}2J_^yFdjp?Fd
zL1bc4>&s8-MbJysPvBRo`j@)<wg$QkP&`LFliC-q!RA;`MBO%`cM(3%dzzVF4*--p
zzKi-$MR^^jq-{eT^J5r3rCg)ctc#2wshF!WVxb$6VwL+ovfudy-JBu*rl0>;@?=E7
zVpgbPV@u1_KIhW`4zyW&g!y-M(D@!5y`G)nxU#*{T$Pghq4fJ_yN`hN%DLXP`~fM*
z3}SUaK0uwghCcw%_BrP-!(Y{Erz&Rrcze5koMVkMa)9?`#eak_`8)lDU3MgNTYCUM
z3Cu$ND*xj98>jD^Vf__{_k}rxaa|=}J*&cC7AYw&YK-6aO9)Xyal~tq#7SF6*CV#$
z=9Eg!>#EWMd({g~$((R$gs!}{gtE@+&suK*(xp6I{0A#Hm~aJ&c>Ruz#1JMw9C$EF
z@P(-Q`SoT-nPHpZQ!~_DEf&@b>?S5Lu3(~Il0b(q;Yu9mdX2Qz#MQtdcD9(#5Y<+-
z7}<P2&a9Q<&l&K&9O>GYkj1_v_5dt7?OyLa^<@{I(Tj=erF%e9cWH7MM{Z?J>dnQ|
zg{&k&pl0F+?<?(KGC+ij*G5j&kw&qNfQRQqt_#CTT`2mTa(|wbfP|6~w~USKc)8tK
z-?P(po1+mFr#{NPfnR$}sB&mPl%pQWA+3hPg~7Ud=aPz$Otq*}YUf&=CumFS;5<yo
zZtE5D+%^2(GXH!Z>e!VD1N#{b&;ae8MZlLLn_8+<^9#IEr+DF#r&lj$h$}@NnVFGU
z><E}yx9=bgSy|uX@TUgwNZN{Gx?JE7Pbh9Yk~cZ_pAU3y0tdf-98}n~(iDl+l2VT+
z`h1wIzRt%%-`IUU&{cjZd9dv3bY_vBQBABAv-dIvBL)us$6=lc@t>=4o4S=Xlcp7^
zeg^$b;|wyUT7o0U0noc*HtAYrbluuS_*ts;O|?B}{6jobk|q;U<Cbf>doh<z_=0zB
zxJZ*XtuN3nnrta#DSh2Zc1`_aG_jKBGFdSi&Zt+;t|w!W&__}BRFIup+yuHtU$}Jn
zb#}X+VB4jny6xz(Jka9GpbZTTC6FgjumpT5c_w7qa3kGSt;`VwYc6Gv<1HI3ZK-;m
z^9#&O={x<kc17n*=Rhu%H)>;?!p<THJC4{ab|R>!h3<P^Q#nBo`zY+~sz`C6rI)13
z068I<iyq}#KH2)z0*RWvW?ya`9oCV;;cHU6V0S@5u^#8M5<LixjV6oPxi<%cKi2|>
zVv;g9F)95k<Pb_1q+y|5J#y+a#uN+s;hl@8gBJt|H%*_{1NMa9JWeE3kwE<yg{<6K
z80#t)vcz<24Sj{!L51I6f!B1O-nE~VneJ*#y_?{h%1eR^kA=xo;4Y3tk;VUX?kVR*
zayb{8wj~(CkA@>VVYpuCk#6Kol;$OJl2JczeH$N`3vJca+azEJFY~C`>JSgso{FS}
z*ER|=kq}Vg<fj5WXC1(sxkeX#4*+uU4(A7e7(HT>t9&`bQ*aq|&GA-`@A~I_v&c#X
z@izZTx<#fFY_X9}#&@E&#ZhCN*MMKKIM13X#T05Qm@{|64^TaL-)5DLCkL0HV*2k|
z7I7@M#VnvOp5_saE1PAAw~PJik;vZ4;zaa0U^i^902#a|Kyxzw>}LG|5J_Xw@Bo1R
zAr{*gVStev?4H}xKSU$@r}js)-!tp|oYx!T4F|m(sHZJlL#D13=WK03yE}$-m#9b(
z9l76afe+&Ec};h3Mt=+0ri&2Ch{R|y=(m_vhD4`#$gPc~*-03bxP7dktiSli%w^;W
zxu3VIa-)C2$o5WM`J!XY&AM-a;YC$FBp+UWAo&3B%RDNi4`fEdmnk4!hf}?l|DQUN
z`QOv@f33<8<%v8<Jj~O*&U8JIB=W(a@Z24_;sv<$HPZr(%I)^)-`M`YcJZ%;`Tu9D
z8ez<jSnijvCy7KK08aik7=XNjvziA$c_PBQ(aj%!BX8{z!CmUi44mV8oHIRf|6S_=
zAZb&X1L<k$x%2D0-9jdQh9=fBHK!NJV4P~O6+i1enDdsh(s^4Bip61KO?cckK4-|g
z1B^D8;7yBqnoiY>^TpKw*@mkaZtnChD77Yn3K{2vWp%LjYuGc>JmRE$9_!`P$VC>6
zyc&7(IbMfXO^E|n5pr#I(4Pke(uA5Hg8zjT_-7UZD}Fz^;NI=p)&-2O^yN_)^Z%-#
z=idbFzEM9x*pC9{M@j#cclq0$@Bj5KLL!e62Ozo6Ca$N4svYVgZcMX}Igkl{&@bjj
zOnuPMKmuUS5p{)hC?(A2Uho&H$m<Z<pV?v-(kuElfV_ihY|5S(=VM1_El>4F8h=7(
zBWKZH4n`*1Rn@Zos3J`W_;*U=#8F<ZLuMogfKWyHe^sQx^EA4F`I-B;rIHeawbXT)
zXUDDAm>Q83_hS3t-BsmHjkAApQ-bV*#;m@^vmu0?NJv~a3ACACSSZToSAVYB%#r5i
z|1r-ak=BJPzDuojELo$mmM8aycHuklDJn!cwU*bOl|xum?<H+G=*pNnzm@N4F=;LO
z*SVWGLNmrWUn1}d?}TWlM7`Bu?8>L$CR2IB7!_rY<$Ddmqt`(&N!}N$S{Vxj3-pWz
zy-v&JcAf>ahZ8?iZSD$aDh=gb-YI{lfeg$<IPSt#%0nDVInA0Yn?D-#@a~Duev*;b
z=2AsMqYdcgvmLbPSPjRm>SBNASEUGiS$EX=s%hqc#W2UwZ;A@oMQ=-y7GBBI*qVgt
zML@v7+HmD@9gx29;jnh9q{2vdvFk3kT9LPu{bd(j44{pf_C0@@5PaBGBeR`b9-~ie
zsB{Qs_}Wc;%>ElOfiV#abar{OmS$$1W@f`Mx|7CXhPPlZ_W>Z?7_~Me`%2Ia_T06X
zHa=GOTT-E8UR#{Lqh)NTa}RlvH|hBs;c2<Ctnsgvla_JQFbdLN1Psj-cqv@H$X~e*
zQr*Yuk67v&#tw2wbo~}>s=IY?)C)B!De;u0B=EPXfl2$XO?6Bfz2Ie<%wM$86W>XL
z@Xwf1eZ9V;HBA6(YrZu^JpjBGE7Lo*0!e*>(yvVmhgK-&0-Qk?qlz3|s|%v9pB(F>
z{H#piSBNk-KSLHOED>dK{@VT()3*VuPHl*8kU}L%RUS!lIpPTR;c<=6LiJ9LvUx+8
z6_%MbJ!=)b8=n82M`ibV^PH%qxz!xHJDfo`hp}F^7J-N;HBB?*2R{HR`Iy~p=Vr+`
zum(>^mD8TT3tj@Hy8)cxx>|91USE3##t+O+?7v1C4}K`HuzN3yhDKGehf;6ITv?O!
z(yh@1Bc#FYL)F~6)g~~XGE-rS`*A&YcY8wFFOyzTXZ_oIs#c~qQLt-t_!&5LotGhn
zv_O+9eJw^LD6A2uG<S~l^mAET(sPa}bkI5$Qo72haKfoF(cHYXGv8_F2vyODJAKcw
zEcAVB8&k!uX7-UrP(zePV|_v&<vab+M3YuJ?FT@W98kPA)_(a})iw#azGu#!mdb`3
z*XG4?AV?n?ip=UF2v=?%%bE}sft)w615?eP(470{C+YOC%*T0}!NLP)x9#20pD}$4
zBN&{nLSGm0T6ph-`wqRR+AFXdeScBb$=*^MWDx4?(LIXhw@+H6>%LGPQWLJt=bIaK
zY{8>6%j6f~vKg>rJ^t3&Mi*XIiQUVeN3@97tgxHOaWaP#?Wn9E9Qs1>0Ivgc9#*7`
z-5aPC@ayA;nCx}9*pZ6$B2%3`;k=<-x2>LFnL}8&^U2q}Xv5F|oeD2%^A-{ULy7(D
zWwPd=O~s8uWZ(GV`9n0){E_9|b}O;kuUS~u?q)=}a1d&gib9CdXEWMUp}dLf-@7fb
zXLsJ@Gw+xxsuN0}&Mz@666qP}7A`!Y3)0$Ce8mQ|dUyGQ9Ii}>7(HYz2Z7E3rzMt$
zF4&rSvD6u&o~YzTwYpEJFbdQvt4H4p3blW|44hFFxCSSS+S5+ztePOrQNGhN(MBF|
zh6+#M7hA9DVfJMQ`)zH+X@2Nom)4=u@|Zql@TRWy_;HV|K$1FJEmOto8@9%vgd-*+
zxzT7~s{kEQ%;;)MHfU**FNHDps?fhL0&HTC2!XC=H1c(1ziv;T$K3V4p}pww0aQy9
zzCl8iM*_;-NwMGF@z(rrO*9hWUzS`bq(QtE6`(rl1B1-;KLEBklqN_^v`9ga+`ilG
zZ_#?n*H=b&sD(+!X^Iss@5Y!;ilWvBOip%HB-#i%iD5WCskPWM(6rdOdFt#&+i*|$
ziz^G2JkpM*O-oUk3b1jyI3{P{0m`)*@_4PYJmqL1<KB=M{BBL={Qjtn92p`so@=jp
zOEL7q*E>Nz;Mb#d0Qf^>&IaYg$|NVrUedRuJm<$yPR?xKYQ`SZ$eS8#5hrC)E21a!
zOV{_`*XkCR7WuHtvWT`4<_7hp*k0VpQ2B(d<xFu>>U8-RC_j#8nA}P3x~)qj#APNE
zK|XQqhG!S(8R_d;&v7{P{~RgSFR70o95ctV8+>o>Aa*${d?JnTKpNN)zcR0=0~oUi
zwJKnuoyx-1GM~ZE^{p~H)#sLOx*MA0H|nN%Z6g)VMH0dmkM76Ftf;kNGTQI;kiQER
zSJ`msdxfdDOn$AY(_Zx@jFQ4@z~a`st4_ymX%YG&D`oFbwc0y<oQ&T)#<gyeTu@kF
ze-Y{AV*wx3{k12cHehXi9-dS)>r~I<iQ|hWgt-(COO}{qXpWY6Zgb(8?8z#9`<l~(
zu_KA?7KN%o)JkkiX&ZaX(^R#C^@{q-MzMUcu9Q!b%i@n@RsUIC3DRq4{ml=mwh$cj
znx8=E3RA3A)G;Dn>wZ#C3W)JtYt_y1QP;E8^-F`)mbruC(0i7g6PO2<0LUYD<j)Ft
zJ@w{px+4+8rhlI2d6!>g?Q!lo(Stm%jNKj(MxY|3e?}_SWp1rS9w(h=z<0MWSZ$!M
zC+bq)yOnir?l#gd?(j7{i>ezj=C@k}Nr2*Y>JOAN*eh3ORGM;&{;J&L9ZWE|>V_0+
zoiGXvMOvBC{h}xd_QB&F;kccB*`^LCc%c8Hl>EomW3yh=dh^4e#qdZr=%<#}=C{8!
zAZ=y0u)J9)-$1`^d!t@9W{u2LXtl;BwYrie{I7L*6I3hqIS~Zp_(HtKRipz~4;&B3
z1rSnaAk^mkZy#X6qnmxM$A8zg-o6~4s3`keRfUd*roW0sUYZ#5Z#}6P|37*F&S2oy
zHfxON;zo<Sv=Br8!U@tE9U=8{P<;t{-@6=zgA3MWOFKM9KMj8QCan6^qk=Z%p?8y-
zm0#1<xD#bEqQ3tzy_NrPtQwWF|Kv%=D|~DgK*4_i&~QV9qg8gT9{@&d4}dSV+T-9g
z*yjCr(7oG41)p;?b{ZGh`jfJs9I8$T%5hIzuiBK7>*b`658XuRaMuGsQv@UWRe=37
zhcA(zyNO62i}7S0<`4F*OSe{Msz}y@jN!oXO4$fFxm+T<`(vFAUoZWa4RIPEA{qfI
zTqfkGTHbf8wNP{{;u)9u;bVGO5pxXbFpUtM1lo7s2$jK(MDmdN*r1wd1@U3Io`GnE
zL28y7uk2&m#?&1hL;SZzC`El2?R^+rvBS>2-L5(?qi_@tEgl<c=ftxoU+M#X0E6}Y
z@<GgP`^u|tH%$7Y;`cVE-rfia4lfF8Vz+(_DJ-)Fn<NI8Y&$W24DLnvK5`>c&`_d6
z@p?+1*vdfI#?IRxHQhu86(QRVal*$=+)l#3sUl{m;qE-DBz0$1o;3Y3;g&eTJLk)s
z=^{xQ!Q;PyJVTV2dzrwuFqOVxn=-xOdQ(dp!XQc_YBuW0Czc#uS{|HMnt1AL#KK>t
z5LV2Fx=?>$PJq<ygd;qz>Kn?+cR%c}8C{*e`D>)qVoRL<eqwC;;>~;On_P2hyN^T_
zolY+PBGc|mANhz#t|JIuu(NY^RZFOGF!zVp|6IPE$944a8x&F*ze~Rk6Cd`MR-JO3
z9#NS{+{_@BVmV%uW@DV>2w{qiFgI&auQ^H#?w1BtEa<lLRU1fi{6N);bkZXQ1#%10
z`dTK7;HcaAr>|!wGquoIf%mRY%iJzgbws6%t0X<mOsA`rnPB)5?r(AhP+|k1IJjc0
zorZiBpB1YxliWVc({pxPo1r{XXQNI*LqmfmTzz)QvF&wKk&Ri*{xad7X0NkxLwsct
zjywYJc90t;yyzjjp1KzL?2_Gzs1Z%qBCVYrWl333zAjLZgLqGqIVUBQmF)_9hxwRd
z60w0qjA+=sluwvw$QhAo*nfSoWRzbxs^&YQM#k^u(e?Q+X$YcKYQ&97X-b5MjQ&bZ
ziHa-_j=C12RTV^E#e~1dnCLd-jL;i<hqa6F|K*E&@EzvAbbzmXfj0f6aPyDdUc#Ri
zVNKq5_GSNN%R>|H8-0Ble>IBY5jTLE&LwZ<+K9)}rpogG@K>Lv*ZTe~WhD;eE^Tnx
z*X=r*smt-J3A!lcH%sCZ(vzeFKH4!5KDUi$_vG?+m5n%hkBEVsy+I}Q<BlnVwpzQM
zvlVj6_KLgQd^fUlykiAlN-sW%+vvaS@T8oOtKyZMEXgH)(MYzAMH!Visv^_oTD&3?
z@TIY~)qK({N!~nT6{{)*fP-Of>{lE?&9(Ri5w&LaX6-&z^hkDDV6WCIE;GOBSIeSw
z?gfVJ*px<PJ^sSP|0_C{N8(D~Z(#a8k8-HqPz_Y166<_)h`i<vYuPs7D)5xFN6CMC
zAZtOgkndMnwwdpTNQa;wqzdZB+{gTJ;yL+5S^POD<dH4mNhkY6u;(O!BxIXML^kh*
z@(w>w?C+mpQsm3;8)`MGP)q-)bIP?Uf=1316o_)I+~nlFjkEDo<<@yg&1M;D22AW3
zb{$5dbrL%<ph~<I*yYhnq9@ZO?(G@Oe_S$JgFTqRBN^4*SF%qoGT@$=8*K5x0tvJX
zb#YM^B+^~`v1%J0tXGxlKe+2GRuEy4ZkA*9ULhjl9eq80l2DAOQdwvYOK*{y?Kk)D
zU+TCQ=;teut<eC`_lT=4Jryaj%#F;EljB!ot2g-*M)SMz2=*}bXEY0F<vu0%xPi7J
zm^x+VZ%f&8s|DX+hQ7&b`8a1lq)|0VJ1IVP?$Wb&!i$^OP3D}rDIGM3Fw2O=zHrrF
zP1QCxY+CPiIC=!PARx}=orCb{dHzo?o7Mh)ZTi+=5ZA{XC>uytWj+p<BkC6CvUH8z
zRO92fPo%_8pmJ17V0`5-SS=({(US7^d%x&AW5)Cl!5MW(IQIP0VKMo$QiZ&NU1DK{
ztnUr@j>BU$m4Z7?NP<)&3Q-50<Q8E+W=LX-2Af7BD%kG!d4e|!SKsr#ExBp&eO=iQ
zg_+hcE)Sq}kw94{ki~eirNo?RFi~2IT9vv@u=0^YP7vi3ah;=_NRSh1AeUKps1iTk
z79ZDh7^HOQ_SoNUqFfUyJ_V>~92|U^u*+-uW?K1@yAvyZ79bMml^3;5KB(VIj`FkR
zLx;8G{Y)M}S=w^WD^#(m9_r-?JT9Ga%L_2bscc6Mn!U(=hG!kGJ>*gUmi9e7rh--d
z993>uNk(w97GSNS)6q$8GBLE(q+A_(+}FO)deh{bu{b*<a^v{=eo#2v@JF4QP-5C!
zw68cmlt)oDNZ!9KO<Vh#rdpyMo_`^N#Q!ODqEYIOskoWK!3j+H0^u}>%!u6b-+96Z
z7v^s=stNVw*T^u_bR{#1K?kG)MzbqGM<bytE^$kH&QrQ%NHifZR_~m7zw*l3KLt?K
z>HoCYSLQ-`YDez;{(0C){&ouqhKL@2vai2$CDhHlOf!Vc2)3TvsO0N*W+C^731V<k
znw=Gv?yb~Wc?BiKQ>3I<3T{u1qT!=PPUb5lDMl*$4`+#L(R`0V#?cQ3UdmzyKKI}5
z166R8kmt?}<jT6bQ}lg*{S!y@%doh8!$J_$iN^p=$N^Z^TwRv6{FYGPm&G@E0~!eB
zdM}!>C+?KL(e1P1s%~mq?9XG4?ff{6vDkT2(Wz>GF5p)Zs}!8+yFZU%iu7M%7|z6|
zwj5G24UG*+$E3zh@x$8YE8?of!`jPEJblG#1|8r}2DS2z!1}kV_CjxZ!>-6oU!2)3
zn|#A^p8f`7MjkmmZ>pau7hgL6YzJH2+>_yGCs#|qYB-2)%1kczu?pD`sC@f)eD@9X
zSZ?GzIRg~zR~0H!-6m282d17rF(c?as`L1kD0orewo6(wB)r%^Um1qpY2C2C?8N<6
z*Ln+U!w+c7R~|Z4m!wKjPzEA-;sNq+<uL|mn?<;-78>T1ey`5ub#P}}khFZy%1`y=
z1&!w`W;Li+muG^?Bw4@hw**fC{zq0#;qp4a!r)`(5m#a25^^2(OrlY^l+)NX8*_?D
z(}HE1uub($`>u{xS?U1wj0TT|xmd$^X*Q%ym@3c0rK`%6D!dFkq4L0tKAzSBmTN^u
zOWoqTk$bpsp5?vzIF4#GJWUAXm3xvwpCG)VU)jC<q}QOezX`_r4FTr)NSQY|zHJB$
zL7=Ia$6CTAK!J>`prs}UAs~=OUsF7VVvB$LYcWc|{%K9f!b`i(if5l2C}}dh5J0k1
zVOIOR*0R<ODoFXWV^O$q@~iP$1MX`U>g(%%ba>*M`p+-v_}86}>s-%<6rBkxW_PYd
z3l0hw4SGO=GUBGUpY3P6Nvub{vbAOxUC#?^uRIcny^H?UdY*jV15?6#02H5iUZqt=
z*sYzGI^v0!;y_;=m;Zd-|0H!-K!;$7$$41*w9n8vb^9$M4siScpcUQKtS7lvDyE8_
zV;a|hze0to`YrKXq{)kftx>mae51j)h9$sI@A4``6pes(DyXh(W@M=0l<SX#J<H<4
zgdZ(#BA3v>n+nTMXLFPK7Z*`Kd%qHZe-@!dad{Ec^PL42<lm~5tWzf&|0-;CEZ3RB
zZ$)6=m@AIN<$TYdtvUO>pn%W}xZXo(T;>anXVTJ1$G{0!#F{7Bt&K;`jdHM`l>50$
zpELW3Zb<Y7OdY!oKQ{g8<&3Q~o3-O^{uP?h?)8qO4SMB!{GdvoJ5*)41u(hp*L_e4
zBHBv#-$tqFkP?aklFLDhNnwCrrS|1t;yYEgj-;^LQ&3w|F=W6VIikoD+tUS?Hf*K1
znlrG~PLPTeVZ=6@+<1c~Xw!IJ8FVw_C=GD&pvJ45DlwVlD>h@e8^4+$W(u4~gwEav
zqOC}~Cuq3VR*DuE{#}egUYbL;H2==8{627sGlMSd2fRlEs=TRUBW4fY{e^&a6rLs!
z=*}0r2@oJ(jiL#7y7MLrdg@>0#4NPnKoT9_YDGTxnibR#b)Xr5Wi^b>_8rr9AKt_S
zbDWimuz{;ttlB+Jw@__`7;A*ACsH$hMv3ZEw)6V9cWCPyaFl4#C1Z8f?a!qL_li!|
z-7^aCU(7r)-gdMGc5MuB(>DQ)kCl<<r-57b91yGgpPadM9y7bHRc<O+3MJYrJ<sA0
zCr5C-crVpO1Fn*;&Vl>Sjw*-wcER;-Kos*+`bs^Bq0ymIe(TvL0-aHc7HQfPHnEq)
zqbxRfJoj-emP})f3@q-X9<}<V@rYrlYa{JkPcJy*8ADw|^<oY?kR8b(%^Qt(AQMwx
z&&J1Uc7MN5z>|c>z(hV0D(b=&@wA&u_Z&(dIIU+LaUw6O*3!{CJ6I(Q2Q52sjbfQS
z$~?P*w4>d<10OzNuiYx=WjT9BuYDW9oJ8FH>un!aBPF%uMs;);bak4GA$aKX_gZgz
z1`oA{4-0`v{pl{^iM)%{{5~3c9-ac6)FNMx#SN-N8y&p%yl8Yo<qkU@09LbgIi9X;
z9aigIgaNWKI*;ll`+CTPa?g~n_r+3yqs{UzFo$?5cx0RJfXTWtu>4^CNkgD;p&-a=
zJW`~ape)p?VAsRQJkZj@d#pp2#vnK93jx<zPpSTdDEl|xpWZVU0?yiu1KJyyAMXts
zK>+QwC?E3)v%?4)v#{=s9or0HCQCh?M{uDn(ttR^^oWa|JhdfBS$i4jRVo>OPvu&2
zHCab@hZC+OX#!fTF9bYW-c;V_d}Gt1y2#SbCgAhnfg#IC;tPydm2o)JYj?4|n_;uL
zzk_H0s{9*)HFc7R0V_84uk?f&J8Atl!TUBoA^Ri^bzW;3%dQvZuPl^D58ov`mrdUJ
z&U3^E&EqAvpGrUfD4Wc%6~&VFUg;Nu^RV-A6qmI7j5tbNYet#l8PyBh<eU8}cH^~x
z<A8uRLiH1MJ%&5KunWX_S$WFX$RC@BeL=~yqATj0lOj<5pN`K;-rnlXLuNWNhl_VW
zPpwC8n}1I{Gr0#qv~A`en}|nqX@&5dgauU2tO|O{cQxPar#eg71+43|D_ppG!7u!o
zCtTbH9B{8IrR#5l%hB$5!XE&ZUocO#kaD*Ll68wR6l^~qd>Hrt!jTpLvI0_j!1JEV
zaP_t=n<nV&R&UP)5O0W-a{erVf9gG%a~cfq?K}SQ-gPonSYLSdponFkzBYUiDjg|+
zI^Tl~TE^{+VH=r~O{uZ1He*-=jo6i<c(~}<(@W^QH&ueS$gMrfpQ@o**!395A7Y5`
zBu>VAiT~N1+FEicpqFO&y>7-CQTIpaOjq5>&pAbRlq^C}j67LQ1e3b!G5ky_C|-Fo
z7jd|tUjJ>X<XP@`Zq4AEG*2_ixhT)<^`LrECWPNMGbyu*@4_({zDdXl9AS{`cHWNH
ze;v1@YhG7v^L$CTFMkLlSTixP`NvqX{V;_IGSldZO!QYwdqs(sN#2{F=M$pUbe~e<
z-l)G{3?<u@JwxDslUETwibwY<L!+K6{ih7YBCEI@K>9o2NfFGlkh&;t0EN4R1t?P`
z2#^nyWE*ylk)_o{oysW7TFqcmYnWg<-mLYykVkfi>{HEZyJ>dV?K08ZN+#CjOeXnh
zxY+l#LBRT4Hf>|kuWGCiCBg~~I*GKcu7$~&S-9=NZ>8+pRNox1rvu|xL$~ChNbG|V
z5^#HW&rg!TIA+dkrsI@dv7Aff^9Pfnd02zV*l3OM>ya}Sympv((7sH*nU``h$LlRK
z^^YRlv~_R4Sl7aL%{4$SG-4;Q_e*WIcUbdysT0AaayKquRj(^MW*W%xj&vD1Z2b%+
zTMlj>YZ3m7ma5fTzL%ErEa&>Rbr6BE+W(nISY4rk42plxno(Apzo;|fi8^U-?=V<3
zFs5G6w|;F^`sVO%Z6JS6*T6bXxB4Z#21pth(njVWBZ<PYNuw~>H1W(qRIGfiov;v;
zydl@7&+~;@Y5vd!3Aq*vdvd09z1H^Q^7pqL04^&=Ju!3&7ukjawJu%T4)&A=DZ*@I
ziaEJ5b0D3DT%X`(*kEOIbFG2lPCtjjM!$%=tykH!6mTRcChOk5)pJg$OI|#N-Hl3Y
z(e~p3M;-~b=o_+6i2;i`XzVzvg4;cL0P9SG?N;-!6^q#0nYZJhPpPWA7s<vHRa_&K
zs`9J$TpaeBI#7!S&9~E}sj_tMj$n^vL^ktWlVBE88AUtEPZzp;F5;Cb!g*L=J0!RS
z$2>LoBb4T6*>kVHAwr+esBL>|AM?c5O^nP}iQh#O|EO~wmZ01SL6di0HQ<JkTAaYt
zQ=bW8;n!p~QRxSxVUDOtFMZ-2V3cJ46(qyJL20-1(MJRiT|@k9V+&{ZW{0>+OMOGb
ziQ5>)6&D$Lj7X(ajdR8t6zy?V-NXT}f$)9z#<=(9_lc~Kr-afoUJ+R@I)dAU`r5j1
zuWuzGMgdcXUh%Ihzww_8w<lYD-lgD{ZVFy(GjW|`%wYQHc3F#CSyQ^<9Z0{*&sFll
zoX*c4%WF-n%NZ7uT+r=E3%0$HVyMz<$D&yNhF&~a9wpQ(#MVY-vsagFWZ676RI;(N
zrcT1f{HeK>y|>C?DZ&Yfbl62$4FLl*e6i(ms`ad}{jQ~St6NdA(u!ifq>sMpk;IQA
zs&*hy%+|o&eZGLbJVaN01393&b*wdXI%eoGB5FIye-d6Smyh2^8Y^X&=ux#<Nfr6}
z72+`df3W{V8D)A#i`J@=c_V~Rdtao0Mlf%+fmuC(^DkJ62cPbVCdw^q8YYi;*;RgY
z00FXwFP6s)Wmgdn@8$c;?2A1Z`w%*7fxU5TcAJE_S`?&*S4*8m&G~g?sj}BJcx?*2
zbRSZ=Yo@*N^Y>p3mwwXcA3H0oeIhF3oVLkhoSb*{UE-oP61LQf(gNqMcWk525ZNYY
z3y^rt?&t>q^~E13*s@o)E>|&`d-lqo64v?+y&QzkN~r`oU8S*h?}CfJZ)a3hJ|hzo
zztA-tXKYm~ojWqxuqn}1YOkemw9h)yJiXit<V~V;x}0xIPj}nZ_Aq2fVK+9~qcW*@
zZ(*uWbL*2`HKRLyye{>{7D=0K7N{y~r1=04%z?cW&)2J8Z5`ZwTgJHA?U{V-J_p;<
z4V-Ohy7CO`aI(XXYk4oU>1h)+(Q|Bl{x-EN+Mm3~u8KV?C<V`$gmw)X<a35-#po|I
zd8%t+n;o(1chIFtwMI#-;L*+_94|=_Cju(Thm*Ia*rXT9x~V?ySaYTqq8TA*k>(Op
zgi9s_3}il!#VwrP`+jzB1(9|hS@VCTz>#y<wPxu3&LlV&8QFs;BR|^DJ(n5`h2y)G
z<1Q}f6D=N9qae8;QVupZRE2RuqOI|+GaQz}9soR5*$wi3?j*3T<89p(F4#aKH)`}R
zW75R!cG87OA6LZbP_#R!szt7bBHXt#bogWMN`mafpo1XtL}uwTYy3N<dW(zl6OnIh
zsWnF$yRhyVL&_;lQ*jdlw(@0pn*Ohd&`h@DHOX1#&2i?f2G|-;TX-ZNA;qQa3h)YZ
z+I{ey)%`63^-m;w%kUI3+FF8ulOggoRttV}sR_R<KM~j}!M#*qd;nxYbP%~GEDwM#
zM4H8umrJM5cEK0Fz?Vg0xl%hCP4C!lhpyR>?+HG&ZW&EO4*DTS8bm!2(z%SbrB5|O
zh=6X<1n(OKH#aWw;HG>5Weah)T2~nigetm}0sr6^{TqIeiniy(lGJJjCzl&)MtcIO
zCfD%fu%i3!c36JFh$uB<O<;#=V?$GtyER-~jgbHHD%{xbduMvQ>>J@Qwhv8JM=z6S
zIKj1bWa&NY26{zy^B&6rz+XMOtO*X~E|hz4OZU{)Ti^VfwJY*<(Ps|;dU!b^59XA=
ztQCO{igrWbx*$OPP{dwj-Jtfm^*&@O{&FVg4rdn9q<pJ#`_<;pb7-g*fg_T?-KB(-
z);|Esc5hSbcp;bP2-I5nCvb*NKmX5X_Hfe=u<kBG&X=<z_adr4r39NBpkCu#^w~wl
z``ei&yRYk>j#v|xdNNzR>B0DXuWnsE-!z1}{8+9oqk}RiQ#lWy72csaD(j4;C17j)
z-1%IE_ILua$kr{Z>YVO6p0w5>>_6Qu+`ChkX@+4GnSusBu#n{|0=>|FtY5AIO<@1{
z6V~r>*1j!4X1}s!Ik8^T+|<&f1(O=0T_RIUWL3w)^B&Q)TO_Y(4kMhjX!wrLLvtFU
z*5r~%`ySAZYcoZVWzarZULdOa2`>h@5M+*ZMqPt6fQza0?ba^2{%B4*|1QJfWxqL-
zw}_JD2<ax-MJe%NZMx~w9*_QPGrl(_L(}`glwk@d+LK|Qb*Qbea5a&p>cAv&ZxI3@
z@;Dho^oOjqzAT|i*=B6CG58S#16;m)fGn<zz8&pAi^(d(VP5;A^{^4Ehw1VY*~oN$
z!y!CzvReLmY{WFg%()49U>G9qGgT^}8l+y!)86u;QdXYCR;Ht<VozF!07!sb2rR=@
zK*HCVqfnGv=27HtkLFgzq$vZ?<SD-*pkqfaWSfB|>_x1JUcY%X_SaCGCR!AD{mu6W
zKpo_o%l2J`C?dW^AAE6F{QwvXSJ7I27pjSNw~=|7yx}+|O=QKa?y5{wBO{-o@)*fd
zkTM-3uGmn|Kewxi0ciY7vK@p_D{|%D0?@AmJ9+FMO5?0nTy@>O2luZPwWljdd~1*M
zVkj~{dpESTZI;{cx%5FO+khbcs%o}M%=9v0e{2P4vcUP1kwoxL_XnTg=4$lBk<U5_
zk8;h+7YnIZ%v|!!K@bmq;K`%L0bbe3>6yNd{72p_LUcgZ=SX4~o5y7#S1IhAWKT4;
zX9GnjRaO1!%}QmSr-tqDvt2TNv$f#ZWIbSg6M<VuB#=S<%ZQ769tm1Y700U66)?bT
zDb@#!vn5zhnkEBAdxtRDnu;5YJLzzXRbFK(e4Y1XB+OBL&B1X8F_hU+rW|3V8+h4n
z6cvidF`#FOThjYcFvObZfq{{J?4v=&O{AE1m6C@ABGZbA0egIy5>XajoL&^%>&$GD
zcfm6boV6G!Lw6ib=HYk~?r69aCXfSV(U}&)52JjpmhNvu14kDhAzW{e*QUqL2gL{U
zT&-4f5-9Jn_30rG#k<)Ma!$?zDBnRMSz&B8uJu1V1BJ{UKVR}>i(GmyCXP72QKz$<
zn>($)N=wirh90+LH}mghSvF=m8!J8D01}x}>=u8r&%q}54t%}-)mtq}DC;#oyU6RG
zh@jO2AhuZAjI-Ejdio?Ltg0k-&GEJt4a4VDP}HCG*RQyFJz_Vl47EUcWMHb_%xto3
zVjjOvh}Y&vxzJxslo77!W+w%kErGDNYboc|AtqaoBI1-ybbK`Opjn`qVuH_4)IU8D
zMD<Dc*%w3vLV}bUYkV^-rDMa{la{@Wmr-%RYHEXfk^T`xvc%{2tvFvxNVw+7M5~mO
zZYY4QXxEdq(O-+h!;|nuOyL2HMoq_2&Wm-dtNUeUnivFC7|huzF@KFCBWbSwph(+Q
zEEMog%8T$4_3J<b2fwL1T-8||ads?Nl3_|8VK?s9m_0*D7#4Bg*5iHiSp6G&mBI*h
zW#P*b7kp}NlI_MPw?jn5(ttMz<GHo~UZUxpp<6*r62kv5r%ebLQM5TCZSR_V{b`40
zC9PapE&8YxMluNLQe9r$eCDijN=AKwMJ=rs=4)i}?1ZmP+`g6O__ufRJr?1e%z%IA
z=#(km<8S%(%Ffk_TOw8|8vCuK6<uAc;H*@#=<}qKhKw0ZuMR=*aQHy|D)n&a?JeXI
z_x7ph>%Iy5*4@w9GxslMGS7ye-KADQRxrj~WarAOMEnb;K8rpz128TkHgpkSCW|?&
zdkxl{q@~Zk<ZRxpw`liI25mG2_=V9v(&`Ce=e!s?|Fnv)G;B^o=LGQ*j#+3ZC1c-@
zvz=SOmG(8)qhVN1eV)DgVtMl|n15<v_?_JvU!_1`DW~b%{7F7*GY2y2qmSS>h!gU|
zkL?Hq>T4@R3I#M{bM6S&O6vX{WWDuJ$#kY;#`STCA{nf&R?j+6lm(sZLq}a#8?u<}
zyR%}(6j$rjdVB8CgwOU?sq?6HD%6Bsv|>Eb@znWY;|PyH2KO3c7ko*13i$y<yF+!)
zyv6bHOm^&9r|w6H<N?qJS^j)aFve8hS`URgW;X}&>kR01$M*LGB_ngYaC5(Q@Ctm=
zwU}HWg%w|#us1GEI*@4279+AI)Ft~z`t~W&G~`fd>(5=eDp&ZjgAoGvyd|j1q*~{n
zx!L1Hnqq`nkkT|#0N$*L68A_#o2Q>{ac}!@q{r%zqK|O`o1LC3?ipl_xTdd-e{;Ni
zX^g-Olp(BYz{u<P_PT1Bv4KLH1B$?sQlC!zEosJ0TfL*>`W-%{-~%}L*yxfGCnc+=
zs0<oo?K0cOdmeVKL*kg(0#Hf6UB_?owDX;A{UAP8AbOp5e1Ao#oh8<39%`~T5`X$A
zUl~nLChSw8%}&^JLc<DxkE+yilcHA=vpbcp;7m5d8UT!u&$4)aE3aOlyEyRxV0Zuo
zx~U`bI_aN7Mzs)*Qrl+EJw7RTiw)sXExV3yWIA@f8^A?mO#VUF9Z|r4FDQg+{;8(1
z*V?@_=D`g@4RVO1|15%6$?YET-Dc1SfJPEeL~e3Kzmsb(ANOX0Ze=NM@pv(Ll%ouE
zA+vByMVhFvjfk+~Dwl>^y2rv!z2*%sZ1(FnI~x!t``RF=4kU^=Ea$!P9!v7-%t~M(
z@SDd;v<%ZPD7O~dw;IXTnMdJ_0?EvSwCG04<aTiL6{>yT*4LFI_knEIP;=}{qf3>g
zDT_^Rbeas{6_XMl3vw`W9;MPo*!_6cm(lDtq8k@xZjeY<5w*xNDq6nTXvO(cblFV&
zGil-L8VirxcISx4w5EB#;k#^fjeSjZ>hZ7ZlgP3y?5oWn!@h1k`6~+F{J1_fw`^H<
zZ)geilA@5;N)t<+$JdtMLF5W?QtYujc8EUDZJO~AxcJ2R#;fp0+L?bWf`^GM(K5yL
zfHsBTx`e8~qD_jXo+1+P%eNC#hjdl|0a)5>cq$Q5p@;cHcFB|qI`QAEg3itg3-ZmC
zMZ;9BH4j|spKM4m0~twJ8gK^D9_QZ(`<KIoM9#gvMo5QkY?!Mk3YnVd0v9+2^LTD%
zxcU8+z(2lbhnd&SH6e^>z(0bt$*y~Deew~>rlZqhcD6J}JDCR$04eQLK+v6i>z$|3
z`qY2m&HuqqbiBOy?I)&yGXQFhj;F!bnq7^L7rDXTSS!cJmVBEw>x4~9yGX;HhsU9n
zI+?sx&lh0fAZyX-mIt|ncDsH}DOwP#!E`o%Li}nk9ybly3z&t1CnL+}+$`;G;?4KH
zlqi>GtJIyam}M4VoTnd)!0O8CMjITad+I7>*9FQSaGD&ynSZ6tN{mv#v>yeQ)Itm9
zoeo?Tn)V*ts;nO35n(Ot4+i~`5ItIK8pQf|Yvl`K<gNU)-*u`PByKMEGQ5GGN(#$g
zJkeWJkym#;iDcW+jxOADP@DJC0D*`zNpRsxKeU+_Cz;5C@YszjSScpO$)}{`N(O`C
zx77x+VEQ&J-rx+j97nlDRWr?Q0n{xV#G*nov;0^*a&;g>X@}t2FMD{EF%cxkI4FS?
z+4%(x_;uX7*faI6Gnt?GTBEAT(7Tmyyjg@F;H}yyG^9V_#%92BVl(sTIl3Iw^6eUR
zSnje$VTm#Hd~rvb(bnk)Pki@EP|u1Ka%#nq|N7WZgC%Wg?2Pf(_hqX4k$9k^2t1EE
zk<((l%cNtXif@zl#!&-rMiPI?>B`-|Saa&ec>&G9wr>BTt=C~q@WMlvr$(Yua4F{J
z+3?zQcJ<+KrkSXVOts(#d@Ps#;h3Oy^xS3Vo{2C(TblZ$JGNAZXFXmAYGNmx-Bd`s
z!HDG}4p&g@2o<)x<LW@EF7Zzj+Ujrhi8+a;38e2;)vJjxgQ9v0i7M?^#JHzdVcOHK
z=Q11)(IVVZF_IX$%UINj9QmQ_B_BRLX}}Y%`iQk>Cb8gA<|Lf@+JKiYr}4#rrm$83
z??4WPGMipI<y$?}h4TUwXgai3wllS>`_o;#ZwY!rkIMsqqnJx;(2meGYTd=_Kriz!
z(z?~c*YXn#El`l3w?snyCfBu{ItQAtva(Ab%h8kJ{)^|i071=%ebrpbGg787m#He%
zD!GySpVMh7L9Z3r7@B;+wQ@r|gb@dG^#So`Mbl8aDZbar(zfdLA+Q~mh{Ila<FTq@
z<AAwzpUqUrE!`GhgRT&#Syf<XJ4N2aC&J|6`ArdLJ(^i!R(8M7i?|hmw8!aXglnwM
zlayFN+b`~0@lz0Krsv%i)irTD3l5GipURnf>Alr7C9I(GSOZR%czIaAgyki~%X8~2
zcsq-hHr6F*wwv`Kel}{{v4ydf=6V~jYo5ECo0dcfVWF?%%Vg$De%1SI-(m8(sk+kA
zLrmw>dMe4;CGveM!PGy=+Ccw(U?ziz4vqe+1Asq2d;G2v;nox){`<fwU$h?i-|Pl}
z_5q;h_y91Nx#F{XyR^dz-rZ2%)L#3O1u6IsTk-Zkb}0WNDBvGJ1OI+Lh1KqVnzOUY
z5=c)Gr1cKlZ1fTC-=av7_t|Ur-_7scqrlU14}cl>``3Ep0@vZu*H;Lb(c{Q}BDa4u
Pp#SXqzb{*~hoAo+yD?5`

diff --git a/tools/python/images/clip_image016.jpg b/tools/python/images/clip_image016.jpg
deleted file mode 100644
index 05dc68f17f3a7f0554712a39332fa3e21cfac93f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6056
zcmds5c{r49-@X})ElWlrB4e4zUbbRLh9S#Lgf`i-WT(bHrR+NiNn$M7l4J`>L`1S?
zEBls|Z7__PZ%94w@jT!0zR$ND$NL@MJ;(LWegEeCo#*+x?(4dKH@TlY3b5%QkO+W^
ziVE<e`~dP0paC#~!3<zJMh1EYCMHH^R&F*{78cgS2M+G%77`E<h6+FhVG>HxFfn;?
zL8$B*S^1MHYHDgC(g<BmRUIX`n#v9c6%!K^>poUKHa0$$qtK%&|K*GP8epdfPEx-B
zQ3(Rn>{K9jDsm&h4**oO6t+8r_bw`G5DhIIn4W==i4su822fLhK-4rKT3Q+!O0+-a
zIY7fsyZ?xcIvoef0xakTk-Z(4K@UA$QqPI*TojVCbPr@;JaCYUn@3ng6n0eX_z8Ih
zMI~j8Gnxo3ZKTdQBjfWXre+tctZlB@+SxmJc;2{)@$&Y$b2lhBBs45MKH*;C{iNiS
z2bs96?3~;udHK&vUzC+sR93xw)6n>~srg+?YgczqZ{Mf>fx)ryiOH$ync2CoOUo;(
z__g&7!sd=HDgg9T)^BBh(#1~EMNLBkq5<#dqN2u728f-8_J|DKesvVs!i_^v_BK7_
zbX-PBJp)t@y~t_l-pP1C==hlM(vGxUWq(gt;D1ZmZ()Dv8U&a@RFvX@*a0{&ej<H?
zV7~4kA*q<#0}Z}-Grj#JJ57+Pr7HvUS>BDG4+FJ7R(iTeDoZOum$KGD7ruWo*rFX?
z``FHNY)WF%TULnhYL;t~b}}7JYi*RdHt{X9=TggBKqlXeOR;_;rvTp+k_?<0ArX#m
zWX5XOLCL^~NJOy3Nn&$xk<i&M4#ajskEqB~&z>kQy@tztFZZz{WAH9y!0&-;xL(M*
zOAnSI2khuSO`NMAvhr*1!Q}p6_fbh7Bc1j}V>=Ycz$?AOM(Hc<WZ*{vvbL^fB<;|U
z;ujA*?h8}rH!{Gwe6Tfv<XyMDY`2_61|IDq>n&_g6KB7Sd~x{@<0Yk4N`${N7c(GC
zlY#XX++Mi7LdAOO3eFenU8k%vT6?%-kqjL1%Y)CP?kI@)6`r+=615As9=PfYZy*N!
zk?7Q~_=H^?g>tu=nR%_6gSY__|D;r+C~*b5HQD*wq12(Rc0%W_h{KClVr)0Cv9Eq<
z)7p#fa&h^gFoReltps@b*8g;Z@Sivlvy}ef@6y^;`p^9O1`n%K{%OcR3;TE3`mgz8
z|0e&nf6KJ^m;C?F8v8#c7I!bNftt1Q=X>UtbuU5`E`>jSIrj;{zE#%kRr++&S1nD5
zF&8ZarS^9R3;b)yR`cdL`{r#!GT=o9besSPN)s^e?LYA*Q6Q>TQ$Z^wvuAwhm>UC2
zbG#;vV0sP0>jRjRZ)P#z1hcfRhiDQFT4&=!e@{vC$8)~7t313aeE5iwAGKzN2_=H}
zhUtD(RUAsXQw@!lF@>xLo4yYdyaBseX#=%iRSizj<`!-y`oa5B4{rA6UcxpvY}=P$
zi!LPl!G<`9_M3HWSLgKk-Cp4~yR~#&x1#X;eNP&7=FaO$u1)B@@R|swUr>mfo;;q#
zpBMvH!b2CIIw4FMod8d#Tzh`Yo=GIG%}I2iK6U1h+Y2j2ftCIlK271C0}7vDg5h1m
zNb8*aNnCV>2ah}tK&v`ecGP<G7}B+m+buSsxz-5@tp&1e9z0;fTt0dzrYd)y$GCbV
zOPN<sk>0FPug2;o1h0aSBL-P?S>96k<~hV&C~@D$s9Hf6l95g5=cZ0%WaT=H8&Sz0
zzTh0Togst~fweX=gh?vER4u^FrXJ5gk$9DpCdU}gGiL=Vv-cQ!&$k2D4Qw1YZR+}Q
z(Pmk%heg}6-%E>6ZC;f+<*z3ZRkJpSweXB!PPq6zN_4R5>ibBUS=U3g)@TX=TzQ2P
zTw8$woHSFJh91*+o>nO;D*ljPz1bo5tp&)Td`ZJbj?2nR_bj42))Z9D&AXH^*X~i7
z{TQrO;qzmwc20Pxs-~`bFg_gYf6S?E4davFAXIgmZGlECX8{MWX>sEPaq>Rcf#S>7
zXEU%u+^Wtpw?92xnKy$Nub5|q=kyk~dykjBOgUA#FwW|jBl~=m8@O~Qp?>SLPmhUs
zE+WWEZ)nO@$czQ0u`ideNzUvA+I^WP=w<4glj2TiB{Cv$2z4vLhknD-!`dH|??lT*
z8cEq&m&dh;3$P5V7%yI2&(`O7Hq<d@y1HMVO6Yd?U^+auHubv`(mpq$>cA0>G00u-
zwLC(!{<rk8`X($(FOyQl=~6H17xwN8it|Y{j-MAQ6p7d6g;>S%X8n(Pu&!HE_Qa44
zBOQ~jrqx;1E4ES+F5M*F-qP!OI^$+8&_G#v$xhMxoJzwDJ<*EmbNYcpI<XfD3rkj5
zAB>;7KYYj)T26l@P0may<YfyP@Mm4jrWRCVZ$-{h@sbh~uH?LG!O9Y`@2Y%rxh1D~
z9HoS`3+gO}IZHZ}P6dcO9oD!u-KyO+kR`z;pby+wZW?cZOR{#W;=}#o>XC|Mzz!QE
zVnO<rzW$L6FdicV#T%|`D3Z+v8HjuNz(4B~PiU+bv@rf<Vp5P^VXKJ?_~ErDbLBbR
z=SmaM<$|9l`R7@BRaVk<a!LiEV%)vv!HgUG@c5i-tlxjK%}N;BB-VGwtVlHzqplv4
zIzIotDtaihy$lms405JJxO+Te9XlUXo@9%)H2KyIUfHi^Bdj_**f<jz?=vwgbBXku
zoo`c|Ra>xihu*O5YQ|eaNU3>S9sjbhZv$uHTDt9R4H@B(wsUc>o!wQ<uS{oDmEHf~
zB6ed)q}RXd0|-E1sTFgkSyF$N16|rUS?XPAfWE{*%<ryH_Z242FHueuv+pFz<AQA)
zcvr#`S}RtT5(lE)zmNfQo#JF{wdnRL{7q#2_5l1x1__czY=>C&=~DhBh+w?R7@mD6
z?%}fkQD~^-m{`>CRXL}Ls+?>5doE<7IG!0J&B9&-fk@($o=b7Hgro`5nef<ektMuw
z3Du7X6BQ1|(d<Ef9Ir(&`YhONM5e;k0^Y*r$21(oyz0llDs*Cn|Cn#{PEw7j<S}6*
zmPmD_f&*WRT)avMCmHu;hwghAoidZ-kQtixEKi(YO&#8vAe}yzK-%OVGvRh$<p^{b
ze!ru%dq~&$*^7LL9#^8$WS$Uc;bV{S5wFxA-nk&&%26Lr9J|xL=a;iac_TRp=;>wX
zfYgk641vAV;PbC3eXdpD+Br!5J)2-}{wuKQWmfCDv;g>|hNjM&<4qe=mm6GLK+sRw
zA{KMyXURYf{7tlrz?v@M6GgYDahyZjC8q0<O+oz_MnzGsIh1nlJw6a?DwpVS4-gLL
zfE@Zn44usk$Fb!JZN*`$xAnBp$aO(7fZ2SI^in>1(V!cD#uKH93Pou`fWJE?NKdde
z*lhWS*k0E4nAMz*RGJV&+SHCwqFr~6w$!+NR=grZW3}9U{cJCM6Rd{B+LU7N;36Uu
zW)Z=7(X;4<+fc^LVCpd9-eytR_eA$3_EstDdmWYXsL#pPkBCyBQ#bI1J|ACoN_uYH
ze1HkDOU!&Dqa^0)>K6S9pRVl~KS5IEpVy1ncIbWzpBN*(=&$Q5I;#qC3L_ogHlLRE
z%w??{L2cXPvK9TCHe=?EEE(3{3UfD~u-Z~y@k6d5*Uu^-PL(Dd#`WD2?=*owmm<AA
zF*0DorbfCHX>#b|Z~!ODIC;z{VV2I03YKjoA#m0rjOot2Njjn`+$8#1uxv^TW+IsB
z)8VXh$lAM;I2;T`L3yh2O)R5ww89|WmK1c>m*MULvzq=_*)Q4>F<lWd9?ex3!k-QL
zcMWN}T42Sj?mFH$UX**8VJhsZHxD$VKWnt}8gJ+K^O?EPP19EN+Cr)xPLT^^F(nz_
zoN@6+*S!@qNTrPWBOaQ`gd}%HvBsMR(MW1~A-b`^M$@Y@DoR9$qZ6jKUR}nCh7&$_
zf;Vl;j^w)Cqnr?SP6;~$=|ZZ7^G~7j#MJpTl@*qUrMa7}bYH;<<M1Ci!|#upmZlO<
z929!s;)~aP?jJ#Ir}w@ls_J5H%TRZ=%#_A{!2NwiIeqgL2NlZ9?_j5_qC5)Clr~6f
zd)5A83yHQGUS7BCvMg+7u}{WPej-+Ta|zXSK;GmUg6kBEuMgTKhU68qHQTwDQLBYS
z?G~|hoeYRj-b3U2yh_f6%1%FGmU6jAkJZ5@w$mwXjj{S(G=j2}&V*7nfhcir_-lue
z4*r+slPXi8VQL07Uaxtv7b>pjaXV|6>#)2_If98)jEa`Nxjtuv*&}E-!cSb{U+>KK
zDXJ5+;gp!BFeRPoO*QNu<j#!dWUorRwH)kzS;wQt_0Ze{=K|qJ**DWIu6wZ?O=WHO
zH+fx2QzHZT2W@*WI6v%4BX$IljTof#UsS%79YC23wA2we^iM{qN^mX=7babN4#&jB
zPQ>=N9P$`h%$us~ez1mwdVQ>M0Ht1i7jjdZ>2|Nz%-kb<#9t&znu~6LOJi1wiIl!9
zPF|p{dw**?59gQo0^Q_gSKfFZcg9gG(K&;XUPr5!(cbc!Fw^&y72uO*6#)1&;)_<c
z<)6IP2-<|dR(Xg`!%mJ6!!QN|R=!TY!i}_?>yY!q6L)&uOdNl#)^-RFMs+ZXKe=m>
zYQHa9afVOEw0eTlUP4Km$M*oc-KIbP15$}go>@I@L|~?uabloGGYo-*Cd8kNTg_KM
za(*j5C3o?RyyTSCv!p?Vt5nfd`F}?Hn3&duz4Z_7VYbfcm`RuksO51pGn)x4fWVQ5
z)s<$7FF0<d-Mx=t&oeyad+dtVdDvq%YzQ+$q26C-qPt|rB8&F#dN@y@x(|=bQERcW
zD;FeVydBks5}Ot4@$-EVQRiE=@t{Ks-fg(#0DA?_LCj46^!CrzX$evqZea3PF-}bN
zNI)w&3OhSQ2hfsE8!X)Jt_UUW+!l${+4UmoW}XlWE?9MP3Kg0K!z{(sq*!?QH^)EN
zD*2_49{vFTusaL8y76=grqOGpQA-+2%ni3Y%eR!{FB1CfxL%-0Bdghab1uspBw(!e
zn0FCbu#kZ=3`}~J_9;EdGZ>}GX)A3M@BgmuGbso6Yn|8qd<eZEI=7jr49k4d2+Iry
z{_b!i14i&K@UB$-y^g~U$P9;3`=;#ItLEZGn+-GR_B%6M8XRr7YjsO|ZnKAJ-JtJ&
zzC)LY<kzXC{JOPEeUVQ)WZu8)idS9x&hc1@&mjn6bWdl2IodRZO6A#6D>g#J9g%>*
zj9g7pnS+s2cKeihN|j<k-F)R^jd{_@qpvldt%#!fji7t$x0Y0LZ9d7EvH~QJ9}$Rb
z>3CK&K-8C%begiWF@VYAqY^Rk8OXU=W!^q&|0N_P+}D^`K(Sp8?XTbG^}|ZOEzAR!
zazGw9^O}7llB1aup%5-V%#`pQM|9wUO|9@0+1}u!z^BO8h|9KYz0sg+0zbX3<K*>D
zHc)$Vxy|<z_tsV^G03V@@3P~&dlRA<+MVd!0|mT>O*u+DXT(z&wp#fEWab~OuJ%5@
zI3OxBto@u>7724)dE~C_8&H;@dGdW_G3z8{^H19MwqWorb6l%m#Wh+}mP1dP$NHW=
ztzgE;wsY1*Oh2ze6DFC@pMZ!zeLMxOIXFSOhWq-lEK53zmf1PpnTrrULzw9c#p`-K
zvf|wjEOQ$j{Lm++z8RCeWafz~cCCTO1;D56-sXiC7d<s;YowEf!f%8F;5p*gR03^(
z@q>J%fUB!&87HTNLQl4_1cZ{oIyt1z;2kQ|-<lYtrOErsYOR1^^4HrI3nt6O&BPWN
z*245y9NT6KfiXH<$**Csc#5Jc-jd%8egts@pRmly2XQBal$p2I@vjIqsy+`m9l29j
zF=dL^tPXXF6oSazWL!IWjXY8Brw{jH#faFpgk99#1W#shNo^`W>z*RA0}I*Bfa?6C
zz_Mp|aACVCaGBBO(*K`{XT>E3(tLner5DMDE2;O@Tte%N?R-TD`d|2pfF%$72QNtS
AQvd(}

diff --git a/tools/python/images/clip_image018.gif b/tools/python/images/clip_image018.gif
deleted file mode 100644
index 3995b48b3d4d0921f5da9eb4320a61e43a507aa2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 70465
zcmXVW2UHW^)Anu>NPq-FPpAn^KtMv1A{atPP<j(E6cGiHrqa|6geqM?x}ho3qzelE
zLPtalh*A`!3yOk@f|Zy5`My1O=iD+oJG1lLyXVdwGYeDgqh6D+Uf?PK5dVqTU-ApT
z>J@rcmtcL~C-hR-L6dOG&+oj?SwIOmxc<kPNK_yaiTMBBc&viL0mWm9SUeF=B2f-n
zL_DeuRZqu8D-cn|^=%}IT@F2<xT-icr<kaqU}<T&ySpD2;HIE!X%|2zQ79x`5`}~h
zaI;LMN7uJiQQX4n>#K@$a<F&>5=j9|B%)B*svNox{XmQzo@9x_V^JvJV2oZ|Uw^<?
zT%Qx>7UrfK9UWGc6V}~U6`fPuJuqNrsY@b}Qhg}J^;K?mx^B8eyMVATdNCe_+I_ui
zXGf&qv35Q_J^|5oZgzNG{1^b>vBe5_qEAl!n%kD0E|I7}D#qFsV-L(5Ad&E-1KY4+
z^Z2pUwR!qMZ?`2rteZZMT{|fMkB`!AH;-*QD8&aoV-z=^oCCF^>CtX$__ZpxoCB%z
z`1&gR79O92kH)vz{jWb7UmuMRz}w;5*4)PMYh&|xdcE6NJ$?*dKjxMi7EPzu*XK|y
z-Rj%ge736Gban0A>U}Ip-2?RppSvpHK$og$dX54XwdH0<Ukh6c$l1bMZn@#@*1{+x
zg(^Iq-WF!p7M7axy1uPD%&i`Opc=)ln1U}3z+>?gEQL<844@ZN+KSikK9=~c1EaR+
z^OUuD3O>g&8o&1cxhY$A^OUxOa_}&I&Bx7cASbMf9<YYr!eWUOU0wXzfo)YjSZux9
z*6aDT`U4~Dw<x+Kg)n-36+P#5)d5g)+6LCP!nSh4+P1>h2Ey6~!Yr|t0a!~^7`<(N
zZ4HZh-Bx9(8*SHpU{M%7+GjqeI9iupZ%H4sv|Br%c-=8eyaJZ8mAdO=Nt_=W2n&cl
zK!;oRmO?k)t$PgBjSsNY?VcZVLq%`Z@2;&4bXOGz_~5&@s#3eRP(&cCyNaTKt+%Wi
z3pfy1HHLQ^3pju)o<vC<AY$8cx~quwMEn74QCqtQP69|I%VJa&9h-{%AFNSC1w24J
zKsph%58SjF1`flyIkge;4%!QdQ~_~5)UayoB8T7MlML316u_onNj(M_dcD$11O@E;
zCt#+Kvw;#C$C4U+ETCJtH~hs5ed4%sP_Qu|MdK6L)eyY)3dTb6Z$;>I@Dj)=!d3!z
zS+{^&X~J#!R45Zh&$;Wv+ok=?h=a~Un6Nmj2B_3|Lgl*DUX26@5MyhlhGq@a2xNCD
zcwN~#Z&`apt1)7{TG1?dE2lp~k=avISZHM)^Xg__GQLoEc(|{Qqd`DR@shQT)6xk^
ztsu1j*Vqd)R}BCirRetY>8lu$XUwi}dolozN~JZB;%|<nX<QKbb}5+0UD91<e?5S@
zct&c$2-mw%UJmdXcvn~B=dlJp*vvcY+szwS!~)9Lz#~~4gh6g6i_ku7+V(^&hNOCy
zhI3HPalgp#sKE?@X^fvhgWGxHX!ssr2yE>Ta=7L|Z?cC>Js-p)U)h*KynV6SO+u^@
z%YiwX#ER9F30#M<nS=`)v8!<da|ynJuV6@gaIhfn5*wAN$4;L<9m}QF3sWgHmzwf~
zkt0t_sF}0O3sfrO7J;BoO-B6Dw)uQUB)0Y`2}(TG+o2niDWVk_4&1^BjlQ?G&evXe
zj^&`LCU~`O5Q<{aDFm?ML4@Cugr1wKlWVA^CevItjqI7|BD>EPOZ6_12aE%+0KzcN
z3B;1#?J|FbaD#B_Ju|>KmSWm|;b|2hAoF;p3RF2Sdlv1Dg=O3M5(YSI>US1vYGuf=
zkj1Om-qN+UkDniJ_nrXRP^=S2xB&-;EJtOCX=tfj_xrZpkMFgj>_?ylB*&pHR*#;f
zOXDGSf_ZSC9!M8v9+&I_Xacxl^#p+~^KF>=R#KKKQZz+rAVG*)7kX*ym}&wMw)rI%
z<D*fI^%t+(HbfKuqk{HFb`tXYw$_zsQP#yk>`?`CYN*qp%PcJF$X*o!#k2hK2oknO
zRYM+aTYxz-N%o<-H}sij9Cd|%0a`~~p5CDtckL5;OsDH8{azeCw*V%IEE5k%`&z-r
zVCJ7NF!4WOFhc&$W6NYK<3K#>V_A1*^Xh#;*l_Z<UD`3CG4ZIC)#zxCK2Na<B=GYe
zoAV`^9T)|*@3#pn`D&#D94YR=G2s$7J|@b^-}=(~ocAcvP}gep5cypk0y0b^$}mDv
zN99{Djh7?;aF+)95G+tN|6zLr&ctuaO)34hxt?fH+!fPX6hO23M*{=VM4pb0VmX3M
zdmX*{hWtz8iD50eR2@wCwYuBOZ1yC9R8U)t%v4Nh<W$In(EolV?nw1^2{XvVZ0XFS
zBSr`=EGXKsO><EKvJmD(0Hr#!w(n&2BxkcWUdhPCmoZf`W_UJI4fj^Dp9Ox@j77g#
z=IM>f>*20@AE&VKX4t^Sgu}e-5?>=E2Qx!($B_9<e<uR+32DPRN}!l)G2pTQzhiPz
zg6Eq%u$_hMaRor-2sh~uQXB<hXMr2G&xR#}+nvoXp@sW~b}Aba9R(Io-u9*N#}G2|
zi6Ov5%R6}wR1-(94C2QdEJkJ`mi}{5E_>T3az_yG{FNQjjaWM(dUTWu13;us5<50l
z58R0h;fr)O`B!%Q>R%h;e#c!2aG?Ra+H;O*nFMwuKMznd16IAJmUi;_E*1{y3(|1v
zHRYMk)CDi#8JB)l@c!79zZD!@;}uR)<ybK*d*H9Txt3t4wUAO!B#%8x=lSmKHwp;Q
zI2bzfZ>cnty|n~%DL(e*=j~gnM!?$`<dY5^+xW(tWv~bni62O-V&AhwZEW=kkg2k(
zv{UugUF@i#BA~<B$`Q{>wjTKgG`Khb9A)>+?r<Ifj@dJSz4PjloRECiq$0w@oL^$r
zHl@Ag*p0|_z3^YR<zm8!AJEmBwG(3?f#Gs2YT(n8UpDbJS=I)>@97<$we3;-eBZI4
zTH;xhb&gt2)vVGBH=ZeH*dP;dnX46hZgjFeXfu=;f8VdZGxHV|?gOU1=dqXJr#0WY
zr>Y!aa#{Da+<rLooHXGa1@|le+TxC+{;-jVtew;J*Y@funGG}h3A^}41U8Dz3Pl-t
z`L?<BHqg;<**3-Fli%&fZ|XlAW3$01ym3CC;02TBXLhBZldHJA9vIs`^4uL3u2L5K
zaP9OjkG=V;PxfTbMZbBa9k;n5{TT*0{eI>;Gw9qSG$*ISkBtwMVSs<DgMnt-Mfvdj
zJ~3_};cy+u^_Czil}>@{ia|U#@;h}kAzn0#2ky@p{Hxll<2$;1y;yq4V}g`<K9C-b
zg9eWqAOQ{5viJ+!u+G1x%&M4lJiqIR+P|iZ`eQ4HBxeSJy|3Aw#jDr%aKlceAwcP<
zpJMIL{ii2YTGHEORu28%8_{$M%Xn72s=2>6VwV0j^M2|wUes+6WQ5*oiOCS$s~9m$
z2+PTeSrIJmdwOFnG`r}Suf)raA@_!Fw<`kuDKBPTRtfD?o22;~_C=0eo^QGbPxBv4
ze*GF`f+|S?677}_XXNMZHpYEwNNLQi`aFK#Y%0J&07GqQ|9mNg?O5RNpq3CW!$!+s
zFXMP)!+cY;-;trTi{7C*Zx{WvD4T6oaj{dcpXSAOy|=y;mBE<*`s6BXCv7Wc>on)!
z=iTnDxveOEu?5u7?xzpdu8|M<7m+U>_HY?&#qr>#r5PB$yNzGc!~QHw@aPQRBZp=c
z{aIGnPl6p?d<vMUk|lpdM2>!6MCHE}ThqMp^O?u>u!47D@6{jZj9nWGW4tV0H@x$6
z{Gv+uokM>&^qe2DipIk8KmOS`{@<?`w^eqk4$rNdOhvu8SbDkf^tF!;8M?3U1%<ad
z|NZ1~^wI1MiQR@b9-E#oeonuezfv$H{^|N5y;r}-c9<VyaDSO9T^U@T-}Z|3GOx=G
zI-iUjTvd5E*xFS;)fxKiR?*jifwrdI_%A~Nr8DV=^c$XwK%4AgwTGQ!ntd`NeUQ07
zmg<GMM~bX|aW1?`e?G;V9-O);dS|ci$iFYS4`NpI_J0nad-A2|$lsOI`#)d3FxaU$
z{`aHNncqY24EDc<a9N=q=JVN*VSE$+UH{&)|Mzpojo&YM_J6!M^Vgh#;A<Y&X500_
zHBgb?RH1JKE_erkG>16-Ay^W`xd-vAL*E(!SU!Z1rwMA)gga<>c^W&4M!?aqdo+n^
zh)<rz)<F}=ry*TvlKuykSVj3bdH+~-R2;WUoOVYXX%9N0&7*>fBPPWj(vFis(WTZQ
zv+7v({5V1qq%=%dou*UFp#~=Ko_XTuMAY|O&;HjutAp-6lQ;gG3Z%5TEZ_BHI$Up^
zin{so#_>t*3wtNF*%EhF6Rvh>|J_cQjMux8uSYev{d<qvB<!&I(32u~vE*TV|LYqa
z`<Y<ZQLOfMRO?l}AyuxlOzt!m$|^ItXc|S?PA*wbuB=YJ+mT#0om_}ZsohI1OG;_n
zORhwvRCOfRo8N4#PA;y#`CvVzG$N%Fms*Uw*;k!Bl*zqxIf@-%%g@C8&BT~)r~2JJ
zJDKk@7U6d;&-bN&nkf}AW}fz83Rr^!zsYEa;Pdaa&(7?n$>*JQyn8?~EcpzBA8Fs$
zecqEZ9Q`sZ<T7R$Y1`{*Kld`eSNqK4GLQ-f^;GtS)Qx-$=6?E5f5g9qw2u*4f=@GG
zKQlJ9vwr8Non}wN-_H!s^A*Bp5YA`+S<n19oc$+1TOl%2p8fpq^(^~@%$@y?Y}L-p
z-`Zz&2eU*t(zYTp<aBa==if?h&-z}SVX($Clb-{e&pQ4i<J$u2??P0~p`gxqJ-U2U
z$+@8Bb5?YmRnNJA7vWLkrnV1n=Y`69z7n)jd>>hZ^vby%IVyB9WY{)f#;WaH<cmtG
zmqpad-P_LvtPqBtjS#kH=(d{_n;DhD$#!HEy!gegAa6!RG_0U7*?P{npa@^^uN7;i
z%JuGSq9e#=pNVm5D|FU@)fXJ`h_|c%S=gC-n!0C|k>$}a<7l4AvAlLefXtSMJk{u0
z^eFjEYF}dg?TBUi8ELA=V8M~c1$q-42|p(a8_pNUxfBol^!$Cf_)TZw-~B1ufdZ#x
ze~;D3+%b*>_u-P)ucCI&mUPJgd_A61k<rt69?p>^zj~A2cHSAEiQ23x`5alom#3F;
zE@@oB7Or@Q`$92on9+|YT2*kK{dw_Od;E|4Mecirk2n$#mc?xw(GBMjTnrvp^V@3)
zj2qgei%yBp*IbzG@oVgfA|r_lrWIGK%XBzTJunNL+$i@lw{qVrs-6m@o8LZ%bF1Dh
zia}K*%G)Ask9-gHJgwlVZG*52Mt2R~{_@@7-7A%mHJ5J#RI^_$Lb8nFvq>tBd0w-w
z!<~n2e)rIAvle{idNV?AP!=#_p$ugsyC-=B$Q%lc!pk~A$tKSA!a(wVVl~auISqb4
z{H)(%eq<k+?ZwW;l&jHko#o;72CgIEZ@Hk9!V;&pBS|B+H=3%$+iL18PknAX(xjU(
zEgKCfICiK937VYBAl~o#Q0qvmF^cz~q4L6JYG*d8yE*OttEp}nsmUrV>ak4XSjZc!
ztiJu;?m6e3%)$s`x7wcBn$LoDdC2IL+qIc)dC99aFGT7ed}vtyRrF9ga_M_i_Eqk0
z3?5U}ywSV4(f4zsbZgp>)q|X#4<9<Nzq&tF7<lJiP2`40`PHDBsR!-%a#MtFH%{e#
zjL-Frj9$_#d#~t;$O1E5upNxNv_4oi6U`Bfh--xBSIN$b1no@T`MuwK!jaap;?~0Q
zGTtJ?A~OX~55ss3TRNO1@HVYCh3os=1of%lE;jHa%&HoJf@NZ2gHMl>P92|=eJkr2
z37}>!qbX$A_wP7KEXpR6gTO#y2?i<z4tXk|55RmHL0|zcu}x7{<<_ObY*|=>6rf3l
z3sF%#40senRG$pTs={U%*Ar&7_s!T88xK86X|aD+dt<yK{0q3WeMC(J@TNY@H4J>F
z4(D++0+}#A0JfY7;J0B>3sRT$*g04oL^512n8zUp(^ZZnsGds11qBS7B?MS;Fk#9J
zq<+O?4Hhbz(GgCgTzb}}frY~xk#|2n;>C7@2kT!)QN^j*8=xxU?lborSzyWu!S(2e
z-c<PG$MCaF9g2s$<gx$x05S*ro=`4PAHu`r)x@#N_-DT#@x6?CB?1^PfK$I8k;pJ(
zEQqy}@Ou2DL-Ywu4A7^-t*}ps08FL%<oH^}vF}fE4J+#3sJ{<*C`#_=`^~-ixa0j&
zm%%dpsUDvS^Wss?%H$-*Uz=P@p^Lwrx^7M-%nanGKCQTdOUQK2^F;x#W&@XX9-Y5m
zcDN?dQK=%`vCcE1U5M?Jj7Z=a*XuFsCn7o$4Z1>qP^wZo9UW<k=?V{J022p8(1?l7
z?SWmaeDX18NRS3sqk?HINHwxxf*07a-+~k(NQ2B_3AD3tV{>`Huw)=E3N0$MeI(#1
zOo$5FtXiy6*rOUbDSFy=RG?&%OJ|$Yh|J#7iaehaAS3fgrWI~W7Lg~zWP;JE)S$ep
zJlr&dhbnhRCKkv9wE$GtLXYwna2bMgW+U|=Fyk9AoC%CC!W0(3G!{$;0@ax?Emc@I
z0Dlj#Z7(>AazVxrL8OthHW<!s!$%i;q6QI6f}8bJVT#WXr=`SgMeAh<9KGY5?=Jx0
zvWx;5E=pi11K_R=T$RBwuEb%rJ?gvIQ&!%sB@MiPdpgRZMy!qtwE%KYYR`>=+S}Z|
zOf(M`l?Dmp_8DB5G`PrRSUrTpUldViMHy_r)BxeD<5nph&e5^La%u@`189~Kz&XL^
z-8gQQi4<bNRUp6zn2=XRIFzVoQ!#G(Fm4D2&y1>Zdw`?DFxTwUz5yE0OJj9-@ZVl<
zGTaJK(#Yg9!dOHSIMm5{;sDVmbn1I@uJoL?HG^=Xz6Rq3+hsH#1;#YHr+aBI7Y`PR
zc~PJGl3Sc#)8kH5P1CD^2YWR<J8retIvYJY^GX#5Sv$EmhjZ6WEC0)>jkPG|2#Aid
z%<I(8?<lFPsfnyAiR`gXuDJNN!FurK?SPM0u<Tj=RSN9xi}|%cOV<S05)K<?!PNK3
z{8$G3y4MqLRd&AZxo#><jfvn{0L5(ta111`GO*_bf6d^~%0x=IS2mKSUoRL&GLQu%
zZm$z#|CB4(7vMfv)h{RDSnub2gau71Og9srwD?XELK{G63>oAG;1*u+xDsw0Yi>pf
zFbcLGqQZinyK9rjyCX4#ZIT8Jtx&c=U?O4}@b}NBG|OP!WO!{R#uorhAi?vkuR#Ln
zvj9uOcHY?TP4)2uI?(ce!FQ_D9IS`js&E(qR0Ci$BzWUEV0~6pp9;%F00I!#zdCp=
znf=|h#6lY`ELoVx8%(U=SOQSrdeHh<5VHU$Hg<qil;nZ`cOh_z3LXj&By6(=e%pvZ
zTyIU;eN?+CZ^X37a5)BX=?`Z%S;H(;h7SwB_XW^`xOACdXnIE*7H;tcqiRER^AEcC
z`30d7evhW^#O9b*#&yVWQ8;eBmm9eGPb~AZ`flc$+XPH&%pRk>ddu@Vn0c^9vg<Z(
zWYi%Af<O4Oz}b_+-7H=<eQvQ#3=j2v#=nC}A@;t!TiZtX73Px3$Vn6fsn<A}#5vUw
z3_l$L7omZ|7u!U8IoYh?OSBCE2HffK`th{e23-liLb)B7z_@<^@9Ex0ZWRH0xuzrW
z0o8+(KNH9o2FH>UlkyYe^Y3^JCmlYMB(i^|BFH7#GUaPjlIPKs0lg39PmW?&bHVSS
z&8eNpY!)vp7@c4vc&Z7e8FDaJIW!1>)Mo%C9*A{>FK=^yd5)H5?Vxh!`50VEfUeCu
zIMxRC=CcbJ4EpiFl`@gPO1LNIK!6AKJy<{ijOkW|&s&?+(qK3y`T=3B`yfET;^bj)
zoxFr8?BSBg1a7K~3(f6iNdCO>#lwt%2;7Q@+{Qpq;Z<)IoY}S2u)rG(7skS@R75oh
zhqRa=cP3mP3o~l`q|8M79-wa=ycqy|7#iDKMK(Z4?9yjAqe{~dR!ac%32<yI2i53j
zOaG2dlNsN_c9$1uxL;qrwJ`U^dW?h}R~YX|%ak`}aB(mejx0#YFl=Xx*qd%zZW%ZP
zJhi`EB$<Ex_iNyO;6H9){O}OKmkr>j`OBSBk%W}iq;ca|jK`Lzi#ADGG+AFOe=m4E
z&Qux%Ez?n|<uY6xyg*aOx{oPP2s7<E;ng>LSki|q=0FT412Gctu0GY}5(tE05FG?B
zKd2G}07kOi>Z=^9r*xO^y&~3y7|B6``dXZX+yk|B=nK=77b|u$A3L+|IZ*{C7+8C$
z%39YcRsmQ5ILj$s>szS7Y#l(OmxDF(0Ge^BC!G(S`HWrx3F$=9apip?{uOR)Vp=-o
z!TX!H-mh(_PiUU?0%7HZ^oi)*fcd1hE30$gFCQ-&JSAd=KK<i!L(fIIyPu=lN9Gcf
z`CoYEKipjyPqW|s?D5-c{xtsy{HDRR@VOzj1qLJySK>qk7Y}`g=}%-Wp|WAvICe0q
zX$>J@w@v1>&?R^yn_&?HG<**35XdRN&c;p}^fF99$Vtb-g=8$q$zld9*+X)r<r^>_
zw^0x-619y2Io+u7Y&>X%i_pD!Vh@P&?I0GAbk~6jydA@kQ#Kj!<q-*{s!Ch?n-4*$
z^D{;nSH&9ds;T*#Gmhn^e=(BHZ4d?GoEXE#1D^SWWp?XuLngb?Nq^b<;J#Rr7nDk_
zSS;Zu>@n$bGBZHAxg=^uT-J?*;E)l~esIUPx{;ZHle+d^{J&7u1#wcK%Yegw{a<_l
zMSjM8WEK6RwYi*xrc6Ayh->bMevnLDE;4`7Bs(LNWz2(sonJsWxtt&aNZ_a18&7YS
z3kdgi-N2KJ!}&Cca~A8w1i;0G4U#&J8>)<cqOk(<%=6aFl;Es5Y24X}{zT;Yd|S9X
zU$io*KdNeHJeqz{1|sucgRIf+!?&LUE3ir#P$qfte$f8Yxa2mp0t(B=E&A|iQO;up
z1<$}~2qE?M`CiGyW?*kZ_*egxas9zBhuPyuJ+4;#c`znTY~QoTkWaws(Iu~(d3z-Y
z;q(@mMki=ONm!Y2lezF5s3#LmYF{LXIpND%&0n2`tqkK#IiQgG52qhr7oskR>C|US
zHR(Lm{qv~dacPS_6A34L8DU#L7w{as<P!Eo=iQ&Kdcltv+3#2+tFhpLu6}SG{*D-F
z;>eaBIE)wa2wCyFa!OY6+9+^b%y(HIP!v^Y<n7mL0b>n)%)L?(21x{%T(G>i;G^Tg
zL^__22_W>{8$f!2T7+Q+E&?wtjt*T|Wy6M__vQI?mFk5y8m6M?Satz8Djeu0Ar=s%
zVd#fb7C`D^vK^f^LOM02IwEZNMsW}>oRPRMo(afkG@5cE{9)jVA)gj?5Kh?Umh4DH
zMdHkKEe`>FVikBU(R6`+1Sn%=2qU<70q2=C=M}>kE2<%v%olyWRsj0|3*h^wlH^(;
z_R--4xo|T4_D_giE}xfAu8FpXpyC>EaThc|T42@!q+wxru(#$Hi~;a+UuMwd+vyVd
z1TB}cMI?Ef45#1CJl)C!k_a673otoy%n)2`frk5IloHipSVAT9TSEZin7Uy*j1@<o
z{;c~>A=%gl2fzUii^#?vTqUEr_WNCS*_+<q{AS_=rG{@E^_RjBs7=ah(!g`?SK%uN
zU@eo1N-efE^P_{J^nCzki|El-OQv~=K<HP#KAbz5^;a_&FQB|-kev6qY`p<|hgxcc
zRWF|?gY4LC!^!akKO!?h$ii(i;rb{Tf9eA4be9PJWKyimEX00vAxpD@3=>@;9L`lg
zb*c1e9wn+$hT-b9x5GlwQw<#t0f)4>YAn&^lZ7)<5{~mfRX<JlYo)8kTIL%yY{!@t
zbXydM7y4Lkk%vO+MR_h2s*6pj3hH=loVLx1o4hx2U*{d|1xoE|_HKV-?eVlj4v*FD
zJn%NtvZJQ6e{Ffapjj8NbpaI3@Jg~z+0>Q8r-J<~8YvYNcH&JB_49Ww(6RE9BT~*3
zj29((VFLEi@EVK$ZJFloK^bR-I)OYpm%*==es(BDU;dVqr|z%4hRlZ?fgKW$4UFH3
z$aUox=(6~>zd5ewu<L@6i#x|X$<hx<zu)6L-ZqO~sdH`{R;T$Ssll@13(W2h1%gF8
zPQJGKN?x~rPp_jEB&Qc&g`_PD<GSKP8&7<;<1>Q|8@yI*4#0Wp1WZkZ<lWM6arX|q
zt(|i$&-o7<z-<|e8yX2jAV0hH`6dX$4&#7&V;bmtRVP($s!499K8FA3LN@kpU#4cN
zqxjVYAm)5$qIAg(j_9|5Oi(9TQ<xSI>s2JiU2mPLdgIJdoU{^I@_oli>rcy%UVX<@
zJSo^;451`?6fTX0mcDEXbsl=$kR5O(uf_encB1%fNW)_*iKg>&MvKpN>Ml24e7g!I
zN=p8H-w>t-aAzaWBz8Nu#mmyN&^Kq;goU159o#Izkg@CA1x@@xM>xz0UT^Qe9xw6O
z4*9!z&a8!u7NUyzWiaW4n}UE%KIM3zvt$AW*@%Dc;oBxvx)`M>+Cb`6t)|k$aAgNt
zw$%re1i)FdC_FBx9uv$B$1x;LS<Hs`i*(O-zg~AITxp1W9(eA^<LM`P?ah}<0x-Vt
zHy!LpTh5CEUPn*Q{8hc$IA9%gy6ExTy6MCA2i-h!klvd|uQd-pRNg%M?#*0>M0>}6
z3Fe|7|JygrpdZ~cpSC>j_$-2_9zGsd{tU$x&2gW((%hoIownt+#Cme2p<915`GnrW
zrd(&crt0a7&PA^uK2-p$HXLF6mqKQ~C^$b`w*Ms-^vZK}EPVRg(6)JQ1paxA{n=Y$
zQ-Sf5{cjY)?Yu|sHXPE^7QSHhA|+o+02IQz)IuKjbQQ$lk&N746rU=r^<np?2O-h3
zhR=R<4t0MHNqjsvbMk8Ix4F+Z{`);Y<+<CvtN*zp#Tqtyo$k5`zgl|}CLm9Y962@r
zHFxOGs^*=@G5c7)vaz?+v1*=c2Z7ymg8m1K7r?6rrQqF5-5*`g{CfHF$xe+~_lEtc
z{oil5=6C<Q^mo(Zc+A{GiQT^A23v`jelH8Z`_Vh4zZ3W1&#bfL&)NqDAEGm^z54j?
z*V{AKHY@gj%^#Kg{SayJ-HYeny6FDzS7-kFe&fvV`3J}UJ<a%Ux9rlj<s#3$r$_Xi
z<;DK#Dk>VO$^35KjZ%9%&i#V+U^zmKnWFff+KjPRTlE&3i_E~F)rY?{>ojqTepKTr
zE@6@yF|A_n$<3jib^SAOZEh`<v2`$=1ZnM9k**%GQA4qQL-Cy+iA_WC6+>dHp=4_h
z0nsb1)GI4Sk=5vxqxVXs_KMIctDVe`3v5=bUgecum7U%r2qRVByA@oGZ23s_;zW&O
zM*Et^MjAP6n)JTNh`z|iI87@f_2NEVUn6Z}BfV6k<C}d3JAFFn{^NmtdMifS#eMp%
zeFk+#hLc9dR(&SE{ic8VjNSW8`i(Rzjde7PPZb+m)EOJf7~4AZnQ53D&oMUq(|2;k
z*rdzY*>}Jwwa>23*s;>cYQ=~X)iA@9|3+1uU*xRqm49H#Q(UX?UcO?0H={z48?BL1
zr&!|cAXgR2vx=?xuprt3Q~Xt3e9wpbM=<vWqor?~+no$FzRdWOuU4yuCi>G&p`q~1
zp@>Seh=cQxRI`}6p_m=BsLG*h0>e@LL(ryKwDE9Uzgdhg?7I6f9c>QPnI+SQ_9Kml
z6J^YAY?={ORr~SiqV+_1RJS||$~10XAXwy(pY{+eY(-UaQ2Mq$ck=`+Mip=OH$Duk
zZokg5i0;$8p&A=VI9iH+7^scB#P!fkZ=@nzt3+euZlz{v;K;qvgnRv(HE%7d0!NB>
zp4P2At*)~?^Nbx%9(gcoQL)p05KOx}YH>z%q<O`%G~BX!(z2>c+t0|-NA!f-%V&%o
zO>d*l68f{d#qAB1BNeG5wHhbxD?J<RY6I3Sq_>gHqa*z*Cn}9Q88Mu-GM0~gt<HSZ
zEDId%aJPKmeu5y)JbYMf)lwY2Qm7PNj6pyP9ptIt=7NHPmjxwl6=LGIO7<&1w!WQw
z(Dc#zJ%c=}YB;e`GVd<_E>KK>OO4ZJ=-)8U_i1j6q6eFkW1s#cf~+xsdIXGM+Y$gA
zi7&GE8s@c6$tcuy&y4N*zW6!PGQXF&CjjhL5(}DMJo{xnk1~8hIyqXyF@#TVh)Q|V
zquQL=R6xA>FY3WuJ~z`AJz-H?ulwkI1cvukTGi=CwbQKMU72SKZFn407gAVt)yaI<
z9$=Jh5ie_=p0{!8Ei7ZC&4jX07!~|?*0aL&UOPEd$#dTBmMVBn?Jt{`kK_w`Q^1nJ
z=|bh+q<Z0jbO4vaU6Fi9Hub_L>phl*@~GiY!w96+WB;lmF?B++w);|_Y=mBpui4n*
z=Ek$bSr2=a(E)>X*@j=MJLT&$z&2I3ZY;Zp?gwvK<Us8O1P`bC#2q)g@>=`D@ydA3
zNtC1Mz1sV=c9RN+OV575EEb#uKNeyFGKMPD$E8{>IvDnwGsZ)>#h7gU3`oi*8KDRh
z8v(?6C4?)WctiA`{4xQD;une&^YXFcSSJ?`wmkqMc7O$CP(1)}PB-c=Xn<7E_0=nl
zS{w1`M5pt)6-h1PI8d7O0917blL|ITulpmNz(&bO8~`We49Yvz4wt%JD+Y!E0>$=>
zv5m7Mntz441X`397KxiP{>#=;D-z@52i*Q1j`(?aACUxb{WRt56U$+iF0g=4uDHwM
zv`&TBx&E(#{G$I-oFm%qxRof2`8(A`yhcb1k6IYY{x<h^P!)bDsIbk*Qk1?HKQ-6`
z`f1KM%D?VcaH`XGWq7cKq`kV^?No8iaW&9IQ7;{shPxlktdG-D@-zO{z$R(dG-+0}
zk+ZxlKXMMvO-)yMDgKh9dCkUoRw21e^G(iNVHO*ZIrrwStdozv?5=!s4Q4Fr+M65<
z_pNJ=n>ub+SAhl_+%r4eEtk``I-IjRieIHF7IwT+&DD8apM0*(-RINkkdh)t)zeXI
zZzE(mroLE<tCvi_eX#WN`3pR_5vaZ)p!bF?WB;t>;}M<1LXH`7uWYIT8VZ;Ax;s4<
zxQ)!D|MX&TSI2+#_GD!uLh`eCs^hu6Qbb(J=95oL>sRr!;I&yyU=QE{q`+-ZAE4EG
zffWo+4@NS7b-WV}5DErD0D3cnhKb<*fqRth$be}jxRDcJP7EPjV)Py$n#3n=nTf#C
z*IX0ir{NYT@QN;Ysu|MXOE{bFDd5O~#vN)9@!Vsi;8p2QMKnif_1ten6lHJ9snNuT
z7gF-QaAUb%6fAF0yfcf2S|~^VI<wSW%hlm2X9Kc(vir!!3SkpXI{=p&;r}22PNfk|
zan{=q#*aa&X1m$jBx0DqpW$0$lD3EsaKN{7uI=R~!cJkC8V$d27A(+XTBjovq+5Xe
z6m}DZGq?pdm4AY$f9#x%Ab&6v0(jx`J>fR5o1z9`vattS)ZM}Y<T~Ipyz)~Mae;3U
z7kmg$C>K%%?P*x4laPeXJFjL3J9Qr0C_y}x{r&}*I11!xWWNUbd~Fi}le|NwA!Hr{
z@IUh{_s|`^<Qmum-z&nO$!AW9+?6q;1)78SU?>15tIq=E_)xd1J+oM}01+5RBT_Dt
zsmC^G!GYgubVB0bJOD!aADU_kdgXa5=7ByeHp4u+SVSVH)49N|ma@mDSH;qQV0l_U
zIN%)2_8US<q(#Nqiujd8Ne|1LZPjRe{$K!8yoNyw#6kMgH0R(v5aLzTr={Q^v_IkG
z8_=UCm&5dAf8=R)@f1OeMDwyV00)y}WRpqsBb~?sTngj<IdGb0h6fPSaYEbd+r|QY
zme#^85GU(H{o03ywF{!bR1uf>l#p|96q;+eQix3BTW2HVV%ZlY6gog+!|P`Xq${EP
zGh(vXOauxg5E4u)cfEu4r;D)QR<mfV>Ro*oc05B=ZI2z*0V1!9T$_{P#z7K<YTgLJ
z5FET4$_j*TM^i=N5o}3RoRTP-a~dvAg(<1TeplqHK7hb_H3q<732FNp1$_F<i$)|!
zqFUJLLp<tZn$UEmYFps<NVdLz9J~{7{-oH45ubnj1W<&FIKnCM4%$_hiF?y=hq4iw
zrFgMMNRV2T`~~oC5<s!C1&2|HX-EK<c$ERTdDA+te3H3EF1%J-HvPtobYv<L3Q?2D
zP=(MPpeH~>5Z+DCFP$-@p%(x_@-5;taINex(MbIK-DDnYKz4osq65HY!cLGtLx5)L
zLcndj7B<frafz2l6|(PTvooJRnK6CycUZ8Xw7{mgvIoIqj<lASE%b+BBQC~`BluMj
zdgUDVHk^|7ZoQCC8)i1VsuP2gFEorh^Cu=bV^gwbp#9^dWKbjpb}G?tV}nGvSfzdD
zk+`8i#iMDPPer%*KM0im*}MNZ*5T(!Jqc4M|7y~flXff#!X2cv`sao0B^&kuzr+E_
zq$1Oy%B$4ilg|jkE}X~({;T(ODq_z8ggEZt0-<%_K2~<oCI`RG#h;15`NMpI!7Jr*
z0p)gj2X7~TtL%WbLhS3*AjfKeu>Ha3{e|cbTEaS@r9<n00ODt$WRH&nMaPi&?vti+
z{Yqp7;a7BJ%iOSJg22vYaBm$H#__^45tj%#WCd7>Im2-Q@CqswRfTwLIKNKNe@Vnj
z1p}W>-s~?37p=Z}I#T{_CWLyEc19q5pAvNQu}$_LlcJDoRDo?S-@Qt?C+u2YG*2uN
zeL>3CW&8UWoQ#F@4@<t8c_qdNAsYel#oJ;9e4<RYBq;K#?k${r8lNg|OJM2Wz$`bF
zJ*S#K_^5Krk0kVk((-ni<!w4CQ}93Lo=>QdqQ}NoQbwv}GQa9&FnFP0y`h3}*SLlz
z+;nKK6hvO33EOafGD$GC5sb2cuKF)|dF$fEqWsJu*llQ^O-8(Y=jsw$=9VnqULMu3
z2$3YjQGLn5v)uj-X8zB3#8QoW0yM!_0A@m@ZE)i=o7sbNov1^i*hCyT#*ByfM?|Po
z`<e{L-;}05H6PPbLFk1+!+xw;!qq(>Bnb+jT)uA;!#`aW6%{XR{vD4J5vFnlA!;jQ
zYmUC*PH5tWogNC`<N;fXOZbH`!H)6Z4U7#=412>JyZ`mj4fnl_c>Q945R2Eo7)k*7
z)IX%U&;Uy$9|J(Xj^E8ojs{S?y7^t4jPnU)NG;t-m;jlH0<rL8V!QxkB4*^Y=na;S
zC*lRnjM0$ci^qxj{j$sjZxb#7<<RZs@pTDy)Gnghzo+7Thvy?Sh$Wm|G6J@gfcs91
z3X|1#cV?g2Zs)?~x~<Il6wb!;;oxfKvD-mpt|pkd_Am)32U6KD%QEsc${|4FWqjA6
z!TxT*I((nCK&F`KMyUT#LXZ2ON<1b6+y3R!n8;zI?^DTAr%;d<s!o}-MhXxgEjfti
zTOka-0S#=-D{;I27M}1kiyikYZVQMhMy+D8vhf(PY%PD0U8XT0Ng7rS_}X+d0l#<Q
z^4?%SGEHch52bx5L<$1*-IH!i^@=1Tc-Adt_{!NZMv^ic7LZUp$hQHYII8QLEd{px
zDey=clN`tlLPLG1F7t^b1eCPcs|VYIMvW9$EOSrXdN$wbpaH=wllCTgUZB>6YzSy?
zAhO&$5ZeI8&Ri`4<E^;}5FYqb&Ch8=Sd){x+tW$lelf&K5pKgxT{N+!8t(JjHDbve
z+)T5H^uHyUMkiq;Vv(zoD#@K&2C+RtX5XR0`9X79mN73FEMRhD@cTXYsA~dtCNLZU
zJQrm{HiFApl6ntEw6oSFex5xvNO`9rRU)U73UOJf;;2RL{H{3?crA0PkuKJC2P)7h
zhI$WYzukuMOXia&fbM?C2pHBZhnuU%Fu1JbT_SBgEqWYtOln1(S6gc?=uy}^d>J5{
zHvPoKI|4IQ9!Ej?tsu7b_TLWL>yH9XDNFCbodH>Wf`x>1`M@kyyfv4itX?M-&*O8D
zQi{qRCRm`YGi|xh(l)GEj2O^3B$2&Bh3?ql$i`@CyyN7)5sblRctlnEXBypmvUjw{
zzb6xmnqG*^=2@0$G&?f<mWALQY*d4B39Kxtr*C+$z-0EiiAe`XGe<{G-jy;lU4>1X
zIIah+ED#0lnHuRfC;Rkpv!|gdY+=6NPQ!pk7RVo&wta|0IiK|m{bYt}C@ove%#O!K
zKom~GFsp|hkG3^_V{M+jn;v&GG#jC=;nmltaznL4;7~pbhUk%M8NuXOz|1}Mt$rLt
z<FJMyFyGXyyj3-bbr=;{oTU4~mjoxVFJnpGTt3<mj6*<`-JD-9lcvSNr(NV}?7tBT
zOHwZxS!PdZXFN;9WKu`b@GhSvlkj<Hb^ku-ZnLc@{pa^dYg*kztwa)#-)eQ9O`r6J
zvrq%#L;JJsu8+E+RUz=LPpp2X4VwZE0;`#j&-othKfyeP3ktMHnGgqGCdj!{E_is4
z83Eg-%|)I>inBm8$}jfMcZQJKw4sIN*H(<p;WH-p-@(HhH54LvEV8vd(NW~MRn4W~
zz!Nlh%(*mVY8ktMD3imDsH}h^orxqGp?9t{8`QI1ei4?fEt7Lha(zisD`p;kEHg19
zzu}xT?#(xZ4V}mwxn!{CC`8nPkYoa92uUh0y*F084(8N{A9=#q14Z1Aa3&gybBX<U
zd@CF>GRQY{vs+M2N+SXO`(hSP2<Hd8>qAbRt(4uBpwK*|AXs-l&D~BA5H)5)oq$Fj
zRoY?hnGZ-PxC&~(0lfRlSst+n6zpqb%2i)7`}|HeT8TUu7z!25=k%ZmEvZitOdy&t
zZj4~UeEK1T8BrMcs&Gom1v;kG2&l}1c)z4u2qSq8-&r|GLKSXlye)@x2|g)~huBOA
z84MWkByc5OVbscx4I<e28nB@Rt4-<>2=EJbzS(Wh`nYAjLgHN%!27_%op^hLTm({b
zBmw=Ot?<obd`LBFiE#pp#@pp%`Q4v#ga9$bFB9%w$x1TG2o=bkhY+=ywBaAY4i`8f
z)F_m>kIpZ<oj5}E=Y{~2*Y~8rojxi2I&?Hk=;TeARs);UA6>7urUt0A!aGy}Snv59
z%oF>w*7C8IsGGtd?y?|B8FozBQX;rvsXVV`@VLYT<!kGviOEx@+Qi6IcJ8a9qc{kz
zNQLX0FDVQIGz>&Ojh$Q)(FJHS7YyMR+fHHxKm~Jf<jT3{)Y<Q-5*%PCC_MS2-N;74
zb|^IN>Zpc|-`NZ7GW<XVUs+Z()l~a%C~ulnoV*EfDiXo4YfhmxTAm&-%K0_R6T$x<
z$2B$}YTNl|@5@uh7srqJP@!nP5jSntU76(IoZfVxX({*8X(TgtE8V$hB@)S@x_|n0
z!lST8QHNgb9gQsgpZB$hL3Hwijj@Wgsm3h;IR3FY@$BY;qyW2;i97+$jYu*X_?2+9
zQt*Y4bAZXi<kNArJFvvXh11L4KJIA4(X50D5ji%Pd08|40=?qd2>{mpwS2H+byZnj
zootK1;*zbll@(jQvBSgPKb-Uj<bHoc@nynL)X7xeO7R*8)At<gNvS*ET8!t*-s>W(
z#{x^MY|&))X@wMpk5cMgUFZuw2CMm65zkx+ok9x5tH&m4!bTW^&t6_(_<nc_w*F{8
zl&kq`ENmq1D4K<+-iVBg>-}nUMZd>tITA2=@~vE0AK+_bT!vva$`)g9?_*R%XH!C3
z9(<G9_@@0>e$L5t<O2V@t75XU^3mgmcVZOX<ScE!pXkATa1!9#gi`f!Mx~9T>t?-?
zFJy2`T;1L6c=fJD+YKh?A2D9FS$o^P=5MZ>n}G&Xk(c}e6$#|K%3`^Qx1Umz5caP>
zrM?-M;Z3Gm><sE_-A&9|naMNGDG5rx@mTBg&GwG(b@KjgMPmb3+5yijWMW;Dd}6}Q
z!iv?f+}}F-v<vEac9<Q^lis{vQsKNjfu?4r6C1zKONGd%C(hW(=Uo?(>tc4%gEa;!
zZ3`af7o6w01v0aG!qrZ_;6JB?yxtp|af$a!XTVzr3!9kGX4<meqOjL~!Iv*4$0irK
zq=D2rh44g9bqf#hNStfC9=<E9DW^V}>tAWs!z7SX;2r!_KyUh;%}Avq)k!<cr46@_
zxwroEt(Me#AA6^+E%<oMYkp6gys5vwq~1dt|2^D%I;4^3Ec=Iih;w&v?ffI<ciy@u
z7>kUch}#v}|Lzt4w<z8iCX;sj-_P3X+cT%a+e!!4=7yecckKW7g(IbXAp4^$#}o35
z^m);|m6w0+yX+9}{28zw#z}rzG_}h-p|NQHm1BPS?6YO-JME~284J3!PGQ#HIU3u`
z(#`pnBylz-j5~>|1N-dF>aD!ooVUr7=0yA31rC~GFYPBO&7bB+%Sxsc_6f=xB40i|
zzo|-0r;3#2kB`n}i+n1aVLM}>Gp7`iVZi3eck-Dsn<r35VM}#gf9_ErYkynz7RCW_
zM{KdOn)jx|yQNjT;^&K{n{GEZi0MkiGG8yr1S^9tWw1i=HZgC)h11{J#z(~$`-w|L
zO7@*ph6EpH`mvHXZ102(h_^zdMD>@RnlCmvo)Mrj3}n4`WyJd0-u2pI96G$sb>7;q
z^Z?q6VoN95@}>Av7T**&bt0r<=J?0T#64rmWVK2J3_OZ$Pr&kp?&_BeJ(t{<^)kSZ
zM>|R>ULgt<jw^*M-<%YjTqSmz_jDt&1T2@dCf*5r5b4xj5E7Xc7up}-f&ku~i6dSF
zV;ngsWTqf_^4Ea3z0%89h!oXd5>Wz-W&8@PdD>%T*2INsyH~RH)P$rZp<(?@My+&N
zRys<?mzV9#F^d%-!T~=kj#cDWpCMc3Jo_~6KIdz(66s5_yvf*+F!i=D;d`Y~GliHh
z1yA1DyYH+zF7e6G<5t?ja#iWH=Awy=OnaBGr_B5jp;Zn_<w@=(Lf$i!s>JH~cR&=O
zH`jqQu(V!(b1u2c;ZMaQResl$Ro5rpY+Yx7nvqiyZc5@Sx6Uurb<VYDukQR@V&^_v
z{%6(5p!A(SzMyX8&UV$Z;P{s7Gt;P5JC!@7F)JLND^{@iQxf~DfcM!?I`}B++9|a)
zyMHn#Ib}U4{oGg@(uVxd7vQE#+0Icd((u2VRu6N^IIvEfRa}xRmp&v_-e4jvde*xg
z@R9s1A{bQ>I8s(4Q1(W%{M?vK_xMD^!QZS>65Bzk-XN?um5G*13u?KVluJtoEpj#x
z!X&(S3q9mKi6Moy%AmK3+98=NpJ0)p=uQtEr?O!0r%{R$QEPmcL%qVC)^zx!g`$=O
zqr|1wPUv{g0(9p3D-V1^-&>slo+ydmv;x-@5%sK@<E8kn;+bE`vn1uXqBM`0=l4b}
zqzv4agi5W%<(5Zz*J<S?hi2EkWxeEjtL4k3PHG(rS=(O{p_RuOObGWbi^LN(K0H4`
zA>K8q2$%4>C4vtTsrtZXt3Kk6${RN|r*w44Y5uG<c)fP7M@p>|HkX`3{#;_?w%|Cv
z>i8=DqPrlTEyV+o<gue*V&z$Ucf@w>yl1=lh>N(LO#IDh6Vq`)Uo{!ibl;8p&kkRg
za5sp@VB;?mOv;WKA9sJ{nCxLYT{-DWH*~71^N{h5^~<@eSYapQJtD$Ty>MUULpFv{
z`s;o}jj)oYZ^QgXxq+`I=dyKg4JY^g>BSTFB9-+GmUrfjkP!us#BL?zdAa7zwq?&&
zpI=XmEuHHux3|H(B8xq!nYtf*;iiV$9nYutW=)1i_h)@fZ#Mb~G_rs6Zhu*4KrfMC
zlznh*7Qr2TbodN=?PCk^au`(NR&ufRu;*S$wfFhXuQI8VCik2+OFWZ%4P;G<15l>+
z|Ewjs%FQmMGmr7nI{suI;$-Rt&hP<cK~6Ppch4wC+z)LgpZosCYs8+nHQmHDPulgR
zk;~*Ce=@IF(!g6KKtLHAUl!qX&)DW<LscS8Ob%;7XJ3Hf&q9tIb_JYd_7&?p!~S{c
zl}g$2cS|kV>nPI4i~$?SWV$YW0h)M?VY&GU_yH(iQM(i=?saqT4{7pI71w4rF_rc2
zf;JvMO);*#$lL#EOy~qcyMHu9%A~U1#D;usai22b*YMWO$(UU48EmZQ6=X~eS`PX=
zQO+LX=KasYt86iT%CK7GDGNOr&$Hh1hkEwY3Xh-=xN4IklUZ;zWp>@8wr#BBxRby6
z`SsgPRWm68NwEbMm&u&L(4AvJFPxMC8>Ve#{j8h7s_!Rm*2Qxi<+o#(J~t=}eKs7I
zdj7oK!HOgMz2BAzaM9D3IqOFrciZZ2R5>QXJE^kp_cQZe^AWR64(`zm4c7;r$AaE_
z=Ycj5rEKon-65&@OaEB8*(3IEHiJexKfTnV6w?}G(zgDtCGV=#hko!^>-9PAvX*7p
zub8R;8>M<`E$gqmzDJV?Dy;lf&sChT_IG5T%+ja?S|N!)>2gYl>m^L>bznyu;)-D2
zLr{>@Ow6AoIE3PDg|r2x|Ke9YUNb|iP2W~0BD=_+3h64-gWu2)4rhceAuA`@ewrr#
zG+)csls)@&nC8NyO)@p<%%hBhG?Ql1WYd`lGHTsW00H61oLuW<6pQ>|8hCDu#E_eK
zPZ8M3BLJn&Vw9<uWcbA6X1P>gtNEwF!Cdat9_}QXOmWi1V`>X_#*wykoGJn>HYji!
zE|y`=_4R^GElEb*nA^Yis5^;M9#q?73g1xcuLPu{$q!g$6nRKxilV<tgS#8W#y}Dp
z0E6CKi2va2rus&1A<A)|AG40)Ag3bM*fNZP;6GBlU1VidkZT$^tUExm4S4wP+oQ@m
z<bM^dDo1|L`Keh@5bJ<D@yo}9yv3L>7!LNQzlB4|NV*X^dM1w8Pf9{TZ02NTmp+fp
z5Z*6I+;77)f2bpd$pZA-G7zfB9l^T}X>YU1J%|M+$$WYz<iVB*^0(Q-ODe0<5siFC
zDd8{z^C+3A6qnzKh6EOE-dT|d#Y{?ZtK4Ml3mm~y%SFB6HiZs&beRiGTd7ujN3A0(
z0W#NgT)jys+!%S0<NInC<w$BA;laf?%r}z+4}Z$dQ@{8EAIY)#lhL(GB>niq#qW4Z
zeebuOCztLE#QNQjN4zaXxm_;?&`QJm5sm^HNbWky{ROggB~#lROk1H>J~vjD3rhcS
z_gH-_aRqkxZGLCBDaV3^^$Le*#%GCj!=IZ>j(j$^R!UDhPFQit4F@6cEdB6jhavK;
z6*g&+BEcYY>_O+dOdHIHFVrz}lC=0E9JOK~F8BTucB*s7vISo!;3CMb1PC)3tKwjb
z-tNW4q)51^aX?7-WJhF@eQh`svuENLq3jh5ImSUMs)?ASI1Y_oK<UJKEE~ED<`y#=
z^YV=ZOP~}@zS3m`^NH8=WulIm0T2*23gJ@WF2nSzCt@|LnK{Rd-!T&i5viqB`zFB7
z)?ZTqC7;g&K}eeymkO9x`%dUaGAfXXTsIEofds_ZH!Bk>yN%A%IV>Memou0^9l1Ao
z{p;%z`yDEU>(V#REAb6?NcX)uB<#sg6bKHc(v1J3=uF(9`WiTX@9Y?iee8p=Z!^eF
zW8Y=Tnl$!3L&{p+v5tL^vNy6MRF;$^jU}PfkVNYg6{S+CwEeu^=ed8tInQ&>J?Hr@
zpY!7s7CM|O!{da}9j><anXL0*<2VJp3y{BYn8DSm*5lV}I_AptZP4D|P+G!G2*;N+
z3am8eNhQ!7J{EQDi{@@0DcYKMAi8SQPK1Oxr{(*!2z<O>FmM8J?-@f40O*uSOjfQi
z3#zAPD|thBj>CqR(y64d7%bYk<!kn~gX<}v?11A`p$mTpCdsv4z~AN=bo;Oat@GNK
z>bEko_U|^t*9JUxPKTb=BK`Vo7L;>M@7aMT&tLS`(j~`)a}FkOFJdPqkZBZj2?ONt
zKw83LpG_eT`@*jI%bTkQ4d2SY&C63;%B{6c%iV{~0(ku$m9(A%3`}}Wnx?UncFE|;
zZxLM|zIblv`6qCkTI)Tbio3L#u2!35{mQ(cBy0+dAG)Jd1te+J;C?<2B0@@%{sDaK
z53bV|)cjlPW3hOMNTcAtV8Z<2FqtU<#}a-D``9mDyNQWd1bg+$Jqx0dEmXnZ>N&Mi
zICIaQzAAsP{U%RCp8jVDq3z=Q7tJ$FL9@z|gL;i@`_gRbOPE;;ZVo?F55>qquqi<s
zRxdnuR?GYW6agOJ%!9z+<(pu`sh0f1IXyu|@c;wDF$_xSf!`^c!_c0F{$R{)w^N4>
zRPJ89-hBK#@<}(S{_KAAHpCY`^v024U4!(y0q8Sk@s8O@i~yoT>*4~E&zNo8aoMft
zxm?eGW;psH`ve{FacK!!E)mdYA$OykNJ1fhD^n#6LbUqx({5wC%lQU@Q+R0GpYYu5
z0}*XWp<r+n2olQof*c7{NF`4_x}>Ax{vEu$iZB&d&|N_XRRj6ct~opy6GHL8n2Ho-
z08n+nVJSpgISE%@UJTnBT_pk;EHkM}bV1Qf(_!h|=01Qm3?lyXxVwX@QkitMjTWWG
z2*5QDiI*&YB?70Z`=r7p_1mAEmZT{_)gXwJfwck{<|0&rQ&2>dZJybDISTPB3CuNb
z1mA%fln@lApS-9Mq~J|^W4M~=MbrXBqEJN8WGn_p<au{PaOefd9yoXAaaH=!rahUN
zsW4$}D${T`1p=2=3wDwUgXez{c=`94i>DXc0!#93sOcR8i%yvqmUCP<7w@CReU&j>
zl5F8{%-(bO&GNh)Uk#a>wcGC-0p%%d%<D!!b809cPax$g5AT1{#R4Wc5zX{_WBkzc
z{4Z*LrX}fB(|mA`0!lb#ik6QaStlS=eoVE%`Ch!*LFU?{FkHBVufGP$b&F}8EkGO<
zfMYBd*i6Amvh8F$K6ccs5fj?Q0xi`F5kcKK3F@LRcz|>Ej=Rm}gRpIW$(K6DpJp~9
zaRB-+5aOKM2_vzg&6M{Fx#CP77LxKQi`tNfAcSHq`GssM<*s1?$S;Kp5qN!_5*;1E
z02e?!68@>0ym5iu<Ppa+kR50%wr9c9Zvc()4Jq~U_20)oAVq*M?|cMEU`Wo()$x$r
z)xXB2XF3u7V2&@=S>Vb#_Eic}nTl7Nd7knlOibW~0vzV3DQ#sw+?UhLqh5B~uF~F%
zV*gAyg<_=&t0jamI)e{@gP<I1CnN_gdz3?nudneOk2~rd;*n1YY(`6e#i`1+w~s5s
z)bmR`E2R-6E@&aT5NE`jG@hUf82tIsKLDe!Yu<R+5T?M4IlD?p6vrrExFPFyzBG|)
zUE=!{`Kjr|PM+7y=}pt?;WsCaV6O@N_H==;YHX>3?!2SL7eUmZ>PGJ<>Og$&w7qmf
zuXqWMwba*|B=sc(4RB19V-P+e+;MlZWf7#v2A{**Q+e^coe4l~#3sS}v=mVh)hpx8
zGb4T}Afn*)2pCRqi>8?8C}z;DAjMB_Ku~_7!mlKlc^&K@Or)LCeu|!f$PRvrm+rrg
ztqrLz3&a<&6o}0`;F;IaEOL%)ASnb8c8_x`H{VKg;JF3r@@bt~tRjKprfi-A;JMEc
zX5{!DN>TX)kB2ZO*2OVj;2i!qWA@g9lSp_C8HO#ff$2M1{dpTDE8T4k>>*k4*@p>)
z<8**w7M2Ibf%AD_lm$DG2w!rxxC}4qGs)dF<y>Pbo~tOJ_A0zP1p3aQD%^zFF`FT9
z$$J=h#^$K;ISxhVCXuSmxy{22fgaY}fv>B1>$-B`QU(xWGCI$TL@@%lauKE#A;1sd
znxrI~fvK=Xo73QA4h5DIDywhI&i=Iw;4CQia?wzg_&ow(+yjw=XmIrs3a_z0kK|6M
zhoha;JTotz&PVak77QH6SZsmu85JhWg=E>&237!$Scu1;Pb!@P-i9)9gl79xD_b5R
zZ~gJXxhcly-3TlcUMk3*MSkuW#S$2BrzvPpCg*_D3sBf5h$4KCi6Ed@hz2p+<b;*y
z*&H<J#A4&rI$cq*o24?2%{)_5qjH~a=YrxrJhLgyM<**v0S^{ZF`LIZB}(}Dl{X1V
z#o6Ew8!28<D`69^1b_DSu*H927$|;1XPTpQV}K=Ak{M}W<Au|6e6KasE9ELfB%Ohf
z+92hikVoK|8#TABBB!TaLZrNF`zFYqF2#6LsJQIoDD1tHm7KB14zA&KC=klFm(wCZ
za#Ivk-cy^8yuxFRut%O)4F);X<I#nH1dN`4erp|-ZXe!tOjVMCiZx=%8oUNw`Kf*p
zY&v))`bf;O#`yg!(j7i(fCTlu9{UQ~34}1N)eCS#qc+inGUU2Ytout(MEn#rN7cqD
zjkyF!FB5p(_4)YJ4w&U1gX~qgf;{m{-lP*gDu@6lAz!aOWP@tN?olYpR)h137fG%t
zStm+=cqCn8Pk<5V{PFOSt22<KXEZD#EE^jAuLUFXR8&X{H!*1$Em;55NzS*o5}g8c
z+7J?~@<~hCTtnm#@|oP#z6**7GT`Pvc~zJxlep80I<C(c(~LL1RJ9#>)#FXfr`J#9
z$)pyg%o;5;PS`J7)EKy|Hs)K$vii!a3hWq*SD9*6HEcOt^CAI!TBPjMB|g}J09sJ-
z5cWe|s;1=N=iGWzziZ+J1M=%4ZV%6$rhEABYSsGzJ>27y7--V$*GJ6m&Gy0ahvD<m
zn&l=#=!OkMm*;$GL;$q^W-F!p_BwE+jh(HyeV9n?OK`wJ`$hI1C1lb*By_%A$=lCp
zTgmqD&Xd~Tc<*RBL-_%q!zOQ3?skclHKG%xBY4ibpAQzE^ZQ{+x{K<4;1TBGf-uZ=
z{!SIMqfo+wm(8YXZ!32lo9+scjbjEL-qF&yB;@QUBGP8?anTv92eSBk23jMP!NEJ4
z&$6F>`E-5JM_FL$*+cT_it{lhN$P$lN<Zp^-XJunTnJ!rP3|5))gIBJp5F71_oirg
z_!{cv#oopbY6nH8WsQ>sJ=iZR|9e8Xb;3@L_c3BVISsgwq<-V3OjWSRGr&b|E61_r
zOkXe1>E$?z_x<Xe_pScrKhj|ENVWugtDv3x$9?p!Y{Dh{use2Wma_e)@tOZ6?uB$f
z{)-omO-{$2pcw|As;s1Q>$o^{eZcl?waz5*?OT)!Z1ThAa5r&<&-$117XL@9BMnPd
zXT7GbD~cSs9MsbEFzroK>EXwEt?N&B-ttbIxpc~GHjATmejQ$9rgyjW&*Q`TrUhoa
z%G7;{%1#c~uES4e<FiFutw#nVF4})Q6>d&-k`S=L|3|3%)I4~zK6UbG@`KaND=pdz
zFd<(uLQ!;7XOBf~&L_#tfvNwR2Hji&mL@;th}`Kk;puy{6dAzjH`UdC!#WjPG~4iQ
zSX)P^CQ!@ZA%V~T^4b6N4w%b^TOOEw*dJA&rm_0x%+RUdu@xcHnW-ZU`(^9VhX%H9
zACX3R{>i-fBKG?M?T4Y>Wq&?I4g7qg`?p8=;cB^2^5LJ!?vKU<0d%{0hmH{~(dxyg
z?~{26BL&ZY>uTRW4@=7V72NrknrO8h&)h+9;M0kJCq5&r1mS}B+tl`Hp-8PK9stKb
zb4`29pS*6luli4lMmT)_?_}Ep@z(eMUEZElURg@~qk%K;K|ZG?hM(AnPVz9in)vVU
zzP~k>e+PA80xq_daID(ff6v1ZUnvXu5DdqPb57K9O#JN2iRFY^4VN+)YCq!d#7%wt
zVNjMlmaXOAE_mm4pPIwga6!b(>tkctKgL3Tye;K_%#3i>6%h3F7MNSsZ~&78RU$t=
z#nZwK1aOix;fTa%dKE$<b8$g}fK{(Rl7raI#Lt@~A@3xuAMQeYy^l^$3KcT8W?t_E
zTK^L#Z`t+;?DG`=Ls|~+#kur;2?`b6S#HPQ6cpr%Icw`77^5q0qYH?7Jj!N*FX2Ga
zJN(Tk!0N$W^Ev)oiGnd7#qxx3#b18>ro~bpe6QQ*+`77P8YS{PR4i_`t|qdT@<se9
zGp4VC5zK<CK@0w$b=nXA8N*5DX_*`L{BDZ;FP?X5J6^YQ@}YStY*gu|vpuSIQt8G%
zHrr<Vs-UvA@KlE29|JeV(#_wBmC7NBN*6nobmu!Kg|OtW3TB&%CVJb4%VEuv-v>FJ
zrnJ~j6%{M~pNPr!p4*rfy}z5QJ5sc+z*tN*?)Q+cE<gbPd!U?XV}A9$^cP#)A$P&5
z2Yq<mpQdiA#a_r!+<{i2N_vj=PNLrLsjuH_l6k-6J~Y3$-S<Fo=fQTF2kIshxJ@I!
z5mI|MuX){z*gt=1n^&z?PxG3v(K*&bgv#&XWSw`Of>C{npPg{Gn7`i&>vq1?vZBD)
zUFg9`;7<i(p?5jDfB6ZM-$6sz^Yz9^uRM;d@)~FS?xO_K(`bf_pY?<%lnZC}3-3E8
zZ!Sovzs&E~BmDEeT0x(cJ6WPuS0<`|$ua-AaqpM9kA0Wc<t;u}cgHOLCQA$?85MRI
zKR@!~2iYX*p>{Tdd02n>2hYd<IG(X{CW+@;X!{;HP6n=#aA1F{_?MRXZzk`A#4hX0
zD5ze2{~*?|GRAn;B*GJM#9MY}xk~5gXNxhi*6x0otyt6&lGS!FHRH9HoTGf+w07C&
zO%SzcCL2G38Sl{nd-R2!z6h@*@!Gu=d@5x5pkMB3zwybWu2vC%XeB(u#Hc~Al=oUT
zUlo<T6{{|*DSK_!2|JIpxZ1>iP0n$R7$b(9<Fnfb81~1M?W3CVQM+#kE-a*GIdtXH
zRFU1C+y(b?W?TLz_vdfj)mPnj`)_X+lxeIXDo0cIA^D1s!QIOWwK!*5aX4+hGmGZ=
zSrioRin*F%p5<YM)ZO>xl&9EHpNnnVuSL}Par;`S>ObG>_m;OUY!L0Ex-Gpt-w5sF
z|L*SgX#W9C>gK}d9~uTbhrIl!zj~C51^PS+Jcm*qQPb{!;N^BHhT`mBaM)X9>-+r$
z^c^@s#9L{vUM%AgP#_Q}=o5JJ=-Q->+uldoIPbtcB44*e_m6+~FBN-*-F*7#yOqw@
z7|tGTQ-Yvc+rm6f_)dlW2hva5DjjVhWd<uft)ML*Kx{^-&J-w5ogbBXXLZ6FAW{Ua
zS#Dcp$s=Dh_X(YSZ6{4HApim;DO9W>7c?gzC;-l%8y<K51tuFyp#-Z`h&{*gnrR$X
zc~=dihME1p28*z!8f+mDT2#Q$m!L;Lq(E_rSA!W8!E=i)k-#-QHb@NL^O#1!T8U_}
zaRw}mqmt*2PxURlLCzN8d(p8MUgHTPu<2u5n+YiD+eNhTu`D%b%Huh$N!7!v8t`B?
zqJ|ADP95=QqqiV<+1p5I5Ri*O-RFs@1xpmESgBX1g8*2l;@P)VkB0Vm7F?1K8W=bg
zS&UEwWT-Ot3dC0;b27fgh)4hiM#($pg0KEa`stf!1HrKnhQdmjy{ePAP#ro};~@e6
zgNd$iJylc`YsE$VfMT_|auWc;{|q6yGs2o8$cmNN;m9Ys08>;M*jz%TAK*@rFjY)+
zrKWE^K4NaDa_PrZJ}bqo*C2{txyNY4fL6Ys1h-wcBHl^nzc$DDNjzZ$Ocq>88;jdH
zOAL*A=xHF4=DC%O&d$p2KzfM7D36h}AK<Vrq2~+_;uX?M{MXi}kfWZ0^>cgoT!Qmj
zp&Szg)hKYT9zuzv&8UR!LuuLc-_Jj%XK!D2nC;ZPQU$ryeR>sDYm@)q9i!mgIXor~
zmplyTcz0D}iF7e3PgN#ST15#(vBqK<dhP&hg6d%oN8USw%5{R4VOsi+Tq-U*6VvXx
zj};vtpebkx-&nyD_GPqY!%LHSC^o3YzVaI>(Sx`8y&^jquHaauEcgA4>(%4-5R4s;
z_PvhrRY=uh9SgP&8nzYI0#x1E$f*{LSG@Pt+io({Y<mD*0>{-+OLkTwGMZv;J~P-i
zNhlF!J6&nv$+x!5prRHXd27bdfeJ`-2u@=ID2xQFTVibqil(Ad-cyP`q#&lPI^U+8
zNN3HjG#K3R%t}Z212h^H{Q`u$vi1@p(2msSD43HTq1>{v;s^!nN~vo^z#}1KJgf2u
z;PhLW7lKO&Q%%*l2A@EyPYNQVr;<BV8~5%3zT%w_1i{1kHrNi0D{u);L|BPR@%vU}
z4d|4;mbtl#ZnhP%W}`IV%9eq`q`>?x#VSwg$s6mgruJr{2=tOFw~6PWrLY!O-@{wV
zN=q{*1wuVh5{zewp%mRFV-a$cQ(n|C+-jH}>%xvtv?+}Z5~!b6Yj;qJ#VMzGtY?Zt
zAcQ3>s*Zp-OyNnfDg&&}1B8SSg2{XUZq3oZraGpzFM!V%_H;ir7{bot&^W4-HrZI-
z#yGv-AR$@H5rI4BrrItpkp-aAM)8p>&Lk+IM(xZAE8flLWOr!(_rvA*puFT4_$13^
z$fN8S1cO1?NiI^QAl<%Ba)Kf##X`GM;4}r~E?Ps8dnjwp{H(BBlwzF?!Q20RShzHD
zNi_p~uEa7QR$@)Z+Om`(cu81Y2mv_)!cL>DiLI?q6nRgC$z(K(rH!Fr7%PG`FL4yX
z>s(DC$lf<X)PU$fmbfb`YsYK?r^i4A1F*^%YzlzW1CGI>)lbADQgRhy2`}xFlxb|2
zXbNvI1ixYiz77LuTv@p+#3zubCXLu$3+cIfz?Cu-05B(k_pu-nSaO=mF`nRh(Ap!j
zCvKe?moj+e5!l?AmF-xfxa$5_lLkl_aEJA|6Uwxa<B+7VilQP_EQ2E9>T6d+NHir}
z|1f?0&nyTiAY(yP%6pTi$Gj<2bjOh7pS6S-H9&9jWQvu3Jb>_0XMWuli~ocAy<}`i
zP>%lwbAWEpkH}|?)#*_L*PfaonpGQKdn8kk86h&;?{ZF~S=-}{5C2N?lYiaMMvBW4
zpZG|NWP9xB<(DKA;L)plrMOU5wphwob`8*w6!LE3ndPka><uLB2N&5LvL5{&fi2?j
zA9rrOaW$mIp2U?p$Ib>6iUk}6n;Puz%-P+F+M84$=@N_iEBn0!kJfnj(A0obEl&98
zqv1q)mu7|Te)TNyEwaW6=D|Wwz6v!0vNKp`UXPnHn-6JRJ2Y}fUww~xNu@%cWB{C!
z=^s*xrDF+Bmrlb_&RmB{-<nXsSn34Um8$u~Ko@iUSLMEg-^0afVEcq9Xe|n?CPdyC
zc=GE_T-6CNwP55}yhjEXxkZR8^p!E>B6LR5J8Z4xS`tdS3Jt=M8)?X*oOlH&!;oUL
zQ<8&OV<3m((sLWb;=@@-5hBsi7%jr6Y&+Z+QgAOUccz}?)15rPr*Wi3f(akj&DLya
zwbxQZor0xE(Itu~!22ll@3yM6W41-yOBbzrb&qx5|EyT0CCGdw8wkmGQN?r!P7_>|
z9wgbS7BT6M?D*^XY<Q}~9L%6%8Qh6OM@>7LB(dPYs?BTFTPUFA$x&|HyLD9G6waAa
zSLq}{0>bhHG-Qdj+HCZ8^jb=GpT!;1NY>t~tKEkwB9kCGm<?-Kg)<cz!w1DLF+v-S
zRe(@NnWQvqGnwkPtNvr<A%A81vRSZc4VKM?*_MbJ`Z^U^MFt5<mF}R-IbhC8X)qa9
zLgRVZW(|6Ym*EJkLIFO@ND>y(-z~YWBX$E_ubOHoqQn4cR`YGFjRj=^7S64Pi~bC}
zGDf=3xh?s*WC+1zM?!LN{tBL6XNNHWb+|K{9(ubl3GXPtu~o9FQuNArgJ&!5^C#=Z
zzQK7Q3UXw?SQtP(rSi587F@>ONN;zY)!SPhJP_;nUJ*lkHi4xSSuY#Kp#gr;YY9Me
z`nykA2b@LF(0@pLr~-vo3Up_)NiDXzl*EKVlI=NUiEyamT2IAcsL~g1`rxfa|B-}-
z!8!RoUvxkL$x`eVW{xjFM4HDk*G*v>1z&*1lD&4W;IhdkS==#Rfe9)=d;45850F|i
zthd(vzRN+V+!e)=!60^nAgQMZ&POn?Mu^C-WkcxCSC~0?nN}{SBOkkaeEn1d$9aWW
z+*jUpYn6bzgzvVOyL=OA+~U9L>PF*I(R3I>aEVIC@?eFmF*pP}8|_Nk3WcFKX+(h_
zXV|y}z6RJ|Z0=>36A#03tg*zDe7Gwc<UTTi;eLkh5)uq$utZG6xVN#W0_CjnMo&Y%
z^tLKnqX4Ey)HRIXjgT3KPkHdEG(h)C$ir2$a5>_pz*&`T$hCWb`Q82WPj2ftzcK%f
z(|nh+@UJz%aB7Z1nj{O3_`HLMI(RZUn@ZS#(}ZdMOj~-zffXVNLXa3Uwoy`R=~cP^
zdNvhOKKD@MYxC{YfK#8<qTr0oXPcC!%GQ-2L6uXRTnl6I)u*n5(JBgV<74%gb}1)4
zS{R-MoGRqX=vgs-{9*;pdp~^;E+kDU$PrM;T5$`ILUE;BFW(eBAW~@ndv391IL}0v
z;*Y}81)MhdMJ%2H{;=fv0QG>&`_JDsle1QPIwS&Xd*`XYnZ0qFf`t5dR5CDAfF<!!
z?)l?S27{D-ZaPK^?ey#qj(>l#AUNOnp`qISljuu<uS7jIHv<}OUiEM@{DA6p)l0}<
zj55&vlg+>KjAhMlu_Hyv<<Bq}8{@0I$BG2%C<Ka};Sz2f_}ogeS5n-ca{Wz%6FD2a
z$)?rM8b24MtQi7l$BdxADWmwq=@huH{5J6v+`!+Xt3k~mWJt$GBr$!j{c#9;p{;o)
z8)4XU|Gdt!tM+e{`Wj)n#=;Q>L&H}U3Uxe#puLe;Dp4-Yhc}3M>DQxuo-u*Gw?q|9
zuxWsMz9SvJMHRv#U}gdU$UpxEZb4xkJn7|*$v+MKN)V8L`9&oAYN)VN4S`plX}jgf
z;gibU4WbYVJ%^K}zac2tm$Xsr9t9pjz#6bjlO-5<7UOCNpmvWHre@Su%JmDek}_&W
z0<w4hR#0{suji04>=J;<2HE|Ezza_?9?tZx_%(xfRECVtxg&2?JwJVVaPkcurK%MT
z@)MYl-xWyASIK(E5U^9Sw-O<!5qPqI>?D!`l3;ug$iToTy9Q<kZIs`OYHIsiAT1~G
z*|vEa{7u%1Iv!V0(@M#I+ze@i2|KHXrS8R%nt8O<T0~LYhL`NzEHS-(QKYa_RnY<x
zANvD!j`o5vWi3J$HH2JJp%t=BJ;gceHZY^?crzV?oQZ$z7CbjQdvgWsH-PZOkQ~Vp
zc3SY2Kupl;sGkon?q?cBdu#In&|o8S--Rw04!=WbwNp67$1Bi@fvbn_6jW$ME9x13
zZ6tfPhFWq$Vd0PL_Ba!cRl`QH8oBjH-`ew**B3_IhmljQdkz+i-;txzAuN}bc;i)l
z{F5AgF6NWr$+&XsApHdvk=tQ_bgl}2SYAJ9&wD<U5x<3yenP5Nz~HNWa^O{W2O|+@
zf5m!isX*Q_4F4qLOi#2%vlTDol(O#hB2?cu+TJxZt$H><QZGA<7eahA)pW1p=#0XY
zJ?<t$d{VSiohILu4Mh0;PGWLEY*gWRW!H8kZ@tQ+?!@~Bynt_}zRaKsQi!2^>poFo
zc}g5@VD}(yIj~-6!FBLs<%jS)zWpn_1Sfi^;LEK^Sn88K7CS?EkpX<6qT~_<r+k?w
z5!eHMMZqdy<#=Iy%nYnlFEySW;+UZ{@u+S<LOxP9Jf3FLes68Px35(5bMTRc>r!pK
zua2r@1ox?6uxz8X3)69|Czswhzmm;9*yVXH&Tr+ak@5Oyp})fIo^xf@$l413pyiCt
z0eigaxB0bu&eA^Y>s9;MM>bbU3&bBasR;c2i)AakqjG;1hsQHYJzzqwYlkOBPw;Jc
zB8`Rt0sxA#L8P8X#CV{4n(_CN)!m_S2EW!e^maxfp1%5?l4dQ3nmwLiDk56@Yb2ID
zyA(Vg87=TN%kJ>SGTHNH_dMA8iHkA0f7;*43&zd;Dc#vT@qBpCx7X*K7+odO<S@Ux
z{Ba!P<#A8)L209WRA`*QW(F;zmW6<ZN2|UCglGE+{=7ahs<gOzKULCo+<v-vvVSwX
z&)d*UN<TUOZAt(s#+t8U_Cfdq>xnySR(qa4_@jLp@W;8#SloZY)!($%Yl+z-Vmx-b
zOWy$fFCzGwuSc5Pi*xkjB45^hdU6)jBWY0a9AG+gb0jV@NO8pV#@zE#)c3xa3=v@u
z_Kk%6J#|OwVu!p^tk0-lKJKJ%K0qKmg*VB>So4$b?~gW^y$9&@#78U#w>wpcbzM1Q
zs;_n<^TK|+eS2f&q;zcr21eRqn-WBtUjU!X&xV*3><G#X5;UY|vih&B%#U+Nf90<`
zM<ma^d(zWBZriPAq;E6QRp0by@xzfl8BjgX!|x2^3tH!m%Vx(uRBIlMOqRFuH9L~J
z*>?0(?8~}@5~**q=c-5V8ytBmaq(J%=D+Ua*UVyXYANh-*m%#i9Ga-PztH&h_-oUb
zam!{ee#m5|x7WPtO00P{6P5Any^(k9+ehvhkE4FC@%YDnNKEuW{dqQ0aa(`>r@1fS
zz3^UgUOG`ANJ?NuZf45JGD^wfdvDVnbK8%`8Ql$_e>gNFh096|wuoInR<Dwpy7Joa
z$P+8M?~9`IKVIs)|0^%gPh60A5qU1Yu6}{2eGoPH$EklO(Rje@a6SzlDT1ys3tCLz
zrlyaINWm~}CaKV;clNygCxcPZJ#!IE73AywM9FB$uRoJ8OTJ>!;Tfsf@I{*HcBJfD
zdbDSTz7SHh<}i9Gf`yLRh|KwABqDa^q+tzR10QXq7TYF7+p89TAu`)~FdMfN&pi_J
zVBur~IZMnZ2c?#0ME3l7%}g%(#F=(uGA8EO&s-Hu{%;97A35Kw0}_zPS8&Lt4bqK~
z-U6xg(*8K4vB{s)1u}z@iFn9yNoUz1V!z7%&_{B6zJza79J8`L?j;dW;g{9mnOgfe
zd@^<aK&tQ1<D7Z1qq=8&{k|t>rxsGbGwP!yi^F!(3mzW}5wrDFJsy81>*3@5ZZKeW
zq3FiD%#J)pL{!YlRNqSm;WcX+Js3aI&lA%Q`@dZ)95gCC`#9smyR1<^NzcV2+h<ai
z)r<$wGL{a<jnSg*Q7Mm&rDu%G4qV&Q<CtoBzKAgxKJ?1p_{br*Gi6sL$}gYs&HGW3
zF;3j*WlTlK)Zb2QET9KOMe3YMqrW*`n_A)!mGZnsVcod&A|`GBoz$=ahR_t#pg@}4
z4Bxe()%_x*7`b#~bCEZa!P}*87vnE<oB#`HiS{{X$@!a)eYxnkdyJ@0t2rC0u{VzC
z$FvpEPv;@<$FR{Rxu*}Cyh%E`YdBwvV6;i<{Mw25<`DHxgo#4O60hev#dv45c{>;6
zMc6YJqo7R23D>pim<WB}!s_R5BM%*e4yc)2&i8kBGWmWrr*-kLM~zw9^;4lJX^-{8
zjr7VNVtF^l<M!8_$}&uDR!F?R<wM*qu)da)<aDwv<d9E#Huq=Vrb8kBR1P}L`?*T;
zNqc&fc-32>>={Fd9#96Hj*~;vkEI=|t2JXu#2YK~bU|=``-t<E{+CjV=Flb=PcdYt
z1NK@Kr;yE2Y+NMS_t<xlN0N;XerCz8??36?h?;JAAc0)`B{D)be8iKws(x@oQ4PA?
zK=Svw$8+}MucON`O>@g~Up*0nq?#eTNM!F$`1(}%@aR#pXH$xi@m(kLy}b<v{qXm`
z#m|?U8U|P&^J3<f1@B*Pm=9=re7*3cW6G#s&F9n*J0?-o17TC;w^GRXBH8$SI)HEV
z+~|=;4aYQZ)l6Q~My;U`7_IrxodyeymLKcM6_}C{p%i0Jh0hn7Ng2Ly3d@RE{m7}J
z(KvmP=h$7RCd()1R(Mi%J>~cU6W@78pMKoLGu>=5)V%1_+^tf5y(8S{m~>2UmFbTY
zBsF+?53kR$h~Zt=-RNXuQGWD#u5-FbH~`zoJ(RgAM5#a%m@-;8ZHkcfz(Y5MT^1wG
zS^+ZsOhCohs>pIMB;(WoyxT60)8^Yw?0O3tuTR-H<v?KCcMh4~od<acC^;O=*}%SI
z9Eax!<GeFak9ERX=xK#K3|Frp7Es8w*uz5Wvb!4J2M<)qV%~^^;w&-;wM-%Gvzk*G
zZKAt$PstwmEw8f&ejj~_5~c0$wgI|gil|{Wm)tc_3bYP50YxCG<Sp=?ew(W#41@QI
zWAexib@l+{q}?pT`rXNxuyw3-2-_55eY@0IL?*q@kj%Kmxy}ld0!-<Sak*9k3`twA
z&G+*i+Vq~=Ll;$WE+{?+0uF}X0u?reTt#8Tq%#|frz%%)1LJ!rAsq@VJ|&v`oIViF
z1vkQb?{=bfTwwV#JMI_)n9&2T%BfxbnPe{{{K78pSCQi|r$d{(_vRI}VUIbC(k%^%
zU-4G(8O1v3rM}B4PJQPx&|fbIIX1la^0P)1IIH-Wb!B>VrEgiGn4nMQ0A$#4BCz8r
zvE8*5O}PtWX^5JVP_8s^FpyGNOtv{?-xqorr&sq*B02H;z~`x4Ljb1DmXh*_7>Q=u
z-#OCBD150b6_#@mv#B8Lb={CHMol{S^P7An8>CyK?)4}S;$UVy68Qv7B_wuJSQ+50
z;A{~*Dr2S$;BGeLlUDFS2p2GKQ)nQm$$^J?_%3?b(3@rCC;ZbR!)g}cD{QU7@4_|;
zl4_(KRY?G5L;#I&!pkiEyEZp}au5nMxGEP0V_BZCu6L;9FOgsxcq4+X)PnS2qHF}Q
z2QD3i=)yHgva!GNLzQqy86o)aZFW8++4)7-n1Z-l1qwh|JGO5TFNQfJTmTv_Qeao5
zWB14eKB)k8oAvjB$_GM)o_Km>P(X|V>ZPZsF-h2!i*pO%_n0?e&ao`gq^FWZR5Zr4
zaASUPWN~&Fe}?+!5DYsR&etBBi32OVB$DbZ*_x_DI8rGW5cjx+(7O4P2Ahe4H=7~^
zTR~Nb*k7-Z6(J5VZ~l`?QvL?gnF7A|<o~0?=bysv(X<OfLAoO9$DbXAv{&-4)-#G2
zN7KZB%uU&-Y-vf3wpIw~K&3!gifnoo-Ww~&`aa>=-N&nNUPeXXN6fI@cv@UG4BjJS
zM3Mt-?z&P&Ec)!p9v&}8;g@%<FRr}PJSpr6DGYv>&eX}M0ssU?V(jM69($9X2^~O}
zOoORFckQw-s{jPDN4T^kb2zQ&a&O*^Imt?BpB)Pvt*uYKko`x6Zd8+XmY(=4k|c=(
zZ~b;fab@5%P=Nprck-z0f_Pab=cRx{t0Dl)k<5YxWbfj;VFLgt*zbJ?6(i3hD;9KF
z7y$EWu%OVcFbb#*U@U<d+b@bLsz5%ACxa<2P`xAYnT5#h;l()gsZ0jgl2E%?T^@yi
z23yilk32+y6??%v&Vhq$K0UXG&o)%yukHBKh~7S)^}Q-X9KMSj1c3^dWeTi}3IPNg
z`B9hv2PE`>-#gLwI0#`6J{>CPWo<zf(!Mzl%7X+$Hnx-|F}2z2MUyb0x+8>b;nCvq
zH2#Kzb{R~=fNa2vh3JNqTsaGa6wtU?>~9H=QkR$x&O9)LUywT&=B2*TYHVYGlD7bE
zk_2dDLSZ;{?Dsh$N7oquH8-_1F2Y^50;f^Has}=a3YZcN)LH<;HU;JB7cKcg%qB_g
zpyo{$A4LIE7E(Rxn;2uU0~_ANneZp)c2ni%d4H0G$A#d#DA~JY*rJW-x(g@_V4i^Z
zR#;?7mygu00b&J9ZqBxDM%UEA!&%D7&8C(LfYkGQUhJ1qV@g-GrLP~q6Dyvp#CmBu
z3Wjq<I6nk3H~DwLD}R#Ucs6e+6*d*PaF2k>-~b%Dak1S(ARQ($QvJ@zWP8#VM)HbM
z0f-4+WQ&IqLMqeEGW~j@<_Q=)jbEJV4}Pmzzk1@ORNd@W)t_royY)(23fQx|gIM5T
zT+408lAa1;bTsv&GLEqT*C8Pe@~7I4V|iEfp0=6dZlIMQ33$(|qoO4B+^aXa{0ejM
zDW;CT*iM?Rr|CV8$f7z*XA_2_JXd4Mb_uB19e<}lg7vh>2GAw=o6%z7aG4&lRFxMe
z;ik0FSKmy;&$U@vD2mdXg$20$nPg!~|9a-U>h)I<NBFJM+x6E;7McW<eh=(hD>|L4
zc9SXKsDP5CU)7=+|1x~K0+-L~6I#(j+}ku;lP20%fL@-O-?OAYd+4F4{!>x!Gb?~h
zHsZ(_tg#6`%(C=($fsf>9&2MkO%Uzbu**6}{iUH2X0zE*3F<+bS)u&uIJiv+CX@zO
zD%aHH!eux2UG~sB)&_sEsrJd#TD}Xu`+Sjap6hIu@Wm#e4hNQv3PDtS&3^>c=8nLB
z(t+u#V6SNUlAOr8i1V|Y_xhyKKYc->{r*3RPb>*M!quQckLE5ix|FKe_u`SD#~pnS
zxx}KQxYbLto5BJ2TLpS~4U*+;H~0N$4Y>4`7X~a|CVlxG`b95Ce-(L*o#fd)%csj0
z2^(wg6c)bi#ka^@Ek4Xo-iOekeUM?v*62u_r2imMHfe3*mmb3vb0-ExrU4QnPY6yW
zf4sZ{`&;4)Kd~$A0hK-9cXYJVzn_~@Ei5Vs%fGYVj)HJw9UuKg*TWR-!_S!wBz%N<
z$|P&~E1)Ky$Xui?&XIfD8h}Vw!z36!AE}%V!F37XYXB24|BuLqiRtge&R%)Nw^wZM
zxT+d}Q;hZidSB&SvdBOg_xQr?c%_uGa!II_gGH257-Cn&l$}7}Q;+}@W(x@c9=te`
zHT#C#P_Pp$oXW@pV2+MO3Iw($nhYB@n~MdCal#Y7Ou!HomY6nS)qn%b2h}x=isq^*
zkSM>7&yXk=;1o*!v@H4dyED&NMgOAMacY=Pfl#P*Zp{n~A#yg3Uy=z~SINm3YB^>n
zm6sGY*w#c#MXkY0?X}QXrmW5>Ujn><9xOq{o>$QIhrw@d_U&Ta%VGXBQ^G!swEZ;4
z5l67%b#hJ9c7cwu3oGh989k%92%R9U+yBLdyhilx0NSB=(k5U#l&YLDJNSZNckvP`
zi-Ko)qY0;B_AWx~;`RsiCPw3Gp8JurnR49HJn_)tS8{Glr`?FV)F>Mv6OLz{kx71k
zz73#p91Yg(!wMw?Wg1?oNdo$m*3`N+Hg4rp@B7Bz*&-~Ax;tcrl~6&f$}#lDJ+y*?
zta$)|m8vnzZ3Nz0jd^Mc%#l168VXblck<?!dTkfyK21LG3URgd1BWPZyJ#PzlAWUm
zh)X@wLc2y&t@CoUj=X9NuHj-^#Z5bqd0-ulEiK%){-tX!ND4#?6o*gSSP^^k+vIXX
zn;EIbak1^v#AU)Lf~jTIS*y0WwLAa1Xr1%6$Mp)k?GM~!j{lnM^SM%uSfgTs^jJW_
zZrXX@NYy>jVoSwBWuAaJcV~ku4i3_#6sh(Z_cN)$<|r!Mm_k825gb`)r}&x~w3H&_
zwK4uloHhF0ep3OaXv!ul2f(hq(FiAt`U1({2b;~L0ZEF8ATb1`;uJkd-=(ZdDuJKP
z+3FCOL3!0A*DVG|NJrwRM!3U0TFjKzyVJ|v7z2qF@gNcrLT^o7xI#9}_?h2;Fuq^^
z*u3WbbM=Gmba?lvwr;S_&*F!z6w&IH;_gyjmBIEXHA_f1=lhon3>THV@#yh!VX}xH
z0=am{=nyO#I8J%Jla#Ot;DqB@3W(NBMs&+@e%z1N>sHiAfE>?(GqVm=u(^VfYbIfl
zb)?B$+_ALX#BvZbyX1aiPgVm?1rFw3KrEekR0UfD5j=#<hBmaykM-|MB6!OC)Vjz}
z19aNy@)1CfGn$`7TXu~EVmW^daAUHsj759KmI{{LJ5Gc<Ar7BI;^O@bpvJ4Z$0D$Y
zbbmDt<^Hr%6XqbBE5O)K+JK&be83m`Iwt!W0?$R<TfzX9Vvrz05#%xEIX+=+%q-10
zq0L^Lisojk4NsyF#54R<2>xykz`d{vRvLVmGb1^U6rwitAqWGlLOg($K5%_9DF-;N
zw4;-0Ea#5kWY=x)ixzdM@xam=9?E>qUl0lE+h55>O7#Sc@dAP_{?%qYz(J95aF76q
zNJ9|(Ck`GSeQZb|d_|8ys=+7!P^uj6$+%Y;+STCG{px~Nxjq&^;<yg}+s<%}aV2M8
zMH|W3H~w;CVdogF9ot;4D?^Wm&;9FVp>QupCES5qBZ?}g!;v<-tO<U-st$y?i{sgn
z2hqNZ{g+o)<<IPw)LI{(vC}S6c4LkS#c_%vN+Z(b^j4Pw#pKUCn^*n%g{hVV+;!w-
z6GM3c7==^`Fjgbg3zfTl4Pn<@6I3^s83xBIg<~XXjwT_ZYO_vKZs!sMjdvK<yw(Ks
zKd!u<w}*+pLksWux(lOA=38A}^g74X+0w4$zB7@xI~~OTslnR1)ILiwUlQs^CdU?n
zBBnMBDI8i^fiEDp?Ox=Gud-UWi2Py2JU)<XBxw@U#h9Kqe#`5NVy?E_51&8D%LcB*
zAJlWuhYn482HKnpcQUuFOC9m5t3B1~gnGXaE_Fe<R>n;<>nl+ApQGzUShPdiZ{1`6
zrBoj3?Sz{VyFo{w#{Rjg(M9Kb5w9un|AqRfoz<AF514oS;L0-_-9vY%j&ZyY#B+)F
zkAHvVn{xIHpoJs3sfJb(OZRKZM@(tisPL5uR`1iOPH%?`^RdRWxI2+Hl4sSpP*jub
zOL7FpDe$XjIX-k1`!nsyzPGuSqJ&~>`Ukbbv6*YY?T<ZwZ_l0r#I9&z=Izy3LY5ho
zwc%gh3lMpL*JH^6owJ=i=35Icv3|;yU)1#Osw2fcC{0aUtquxKFU(_qcC+o)Wd+mk
zMqS^m(HtKY*#ETu1W;V@A8eBhcMc@KO7cHxcR4QXMo!Zs&+gcP^<Kx!mnTboCLVlE
zNIUO8|7GN87EAQ9+?&Fe`n#dbulyHp$z&bsWqoE6PcLr!E<gTe;sU%ns(ts^hXcf}
zW3}^N!&3sj`-$!~8mX84oQ0COQ~CZ&W$Ha30{%<09TiaXFS%PWZYNJo<>^c_Gw;$x
zFR%6?-fttNy!v8!nUOY!`+N$XZHATdP5UeK`6uVc);!t>`^C)diPLEhQ!<P68@XO4
zcko?t{SBwnOp2zg*Fsk0o2hE<qHnXnKw@LVfey>73x4^;X2Fa1g|=EA%8F^4COB&v
z7=3+Ege6zJ4|1P6w!iwY`=_2<Y?S=o$_oJs#Zjo|mHT$qe8&tf`@h+EU*|0CX&DmR
z^x)aLO-B7f?P`B-a`iip)Q*OW*%xe;K5&u_nLRIk_M<!Lpug6ig}G3>YfWa_Qf5*&
zS{nXc)x2S{(dHajA(4H__pg7zAq6Va-PZRKmf>(DYI)&pTT8{YhQyt@Pl4C0Rmc}*
zFv&YQ5-ZJa*H0#tbnh3adOdk{(BIto><N-fK<9>jbz<Wo^779&=?16!Vl{!wGosT2
z7Qc?<On>3w3s_;z#^2K6clP-$^KL--`;l)oM_!!xJJXb~nD(#!>j~w?)sVBB+SfL=
z$}<hCQ?Z%Vx{};;nKflc<xLX~eG2&2*?9D}XnJly+mSY<;urR1R~|e#qa~Q>`_W!?
zIx2M}werTlYU>{wb6qk|`ZAl!1+}yv#9VrM--vl_?P{NBg!>)hhv5zR)ay%Cb|C-u
z7e7(PeU`M@TiV0SISJuwwO8GiUaRUYDMxUdXa8h2dR5g=JV-WrHTyPQV&~bD$MOyN
z3d^5YYz!Y<zV_Ll^)ePJe>qrj=~`0lM?ZhFl*jyc4Ei0Ozg=xvefU$=%av?$uSQ>E
zwWVvUPH;?buj^eshvE^3mUC~tTK_)1mg-_O$0}d&HWGcPA9%z7?qvxN*N?qRb~01_
z_tY@5SS>G3UqG@_FKIxmRnuWb<&VCVzexL<*{!<Y{UNcp^bS~>1z!Dggkt-f5-dIX
z+Dz5Kx<AE+Eqlt!!-Y~G+pce>@>1H0dfoa^2z?^dEGndLWvDN(s*|vCHDqr1=MzS~
zmCfO&j1wN?i%wTy@0F=7eNP@1`C0cJy<6lLDrWGEaYIQiVtIVT`KCrr%h8><YBm3B
zi|d^hGHpkTZT85R=*d_te%oVdU^Mmg`qTj%gDtH&FAFOzRlOkFDG?iRjo)R<#wg3y
zbHkPr-`Eszd-SQ*W9jSXzmYGBK!#Y`v%-}=TivsoEXP&D+V>NdTDLde_w2x~>IK=j
z8xXB>ZyRW_lrwLH&zblSn^&u`?6SsXR+Pf94id52x1{YAs%}#d4#68X7b?ZC#tkKG
z)cPLivkbC1N|cKZw<l)qb+t7=sbz8db*mHK&9HAnLTi24?G{R%%Hc8nXGJ5I`Y!zF
zG87mE1=z-d>M+irXsJw};u2jNxLh7|s(iIpV~k`tH-%M)l}=ni9=Lj9XDMgd)RK3J
z?o#R0*y{vxfYZ<D!LM(I%mMw=fWQ<FFMe9Q``Y1gx*|^h*AkrV!L!S<{LQ*_pl`Zb
zX4<ky09eYY2<K_mk{iL*=bH6ju;sC^u{`8L!G_F8PeHN_0U3j<E;CU1>Wm^4<|PGh
zx-b?JD$*D}Q)cem_{@pj;nd;kwDDy3gTvua(>>lnSKT*H4XtG^-v4%9Y(h;Pzk7Xv
z-^}fvx;NA>Q%aEJzw?aYQaIDJ(V_@BN5<Zq6225fzxx6(YQKkV(#n<wDyyi@XK?pq
zaZaT&{QN+voy%T*^`?>5_Gy_S>>z!c198Idz5X?2j|VUBl-zwi>Wt>KvQWGyL^Bu_
zmE*-VVXfQ;H_QQ+^Q$0NcPX3pSg|kh!>2I9<yV|WTb<z@PL+;5YdC0{Xw~98xFhBG
z`ADl=D;u|URWGGYfJra-rsNb$=(D*Qj8&P<G~Zo*SSCW2L~lJ%xchdBi4$FbnM+A4
z_!I+%;{Zhtm=ui=wQEWxNQ$OO;5a$<Q;+~f&ZpJ3mr8&y@e8gMMmaS|FlFR9IZFSU
z0tsjqi@xK8#&#g78gkyrvQixSg*7^^BTqcz#+0U<695WD6uzK>vb#zMx7Of7AwVjV
z@640&0Octt`5I3$MbIX?N}|ToutEsfp_CyuJHPzBRSF5~)fFYgtNZT10t^<E(xgSW
z0GWw+TWGr{MHZ&Pzc)niQo45yzTm#TZWP%&$+2J-Ul6eb>ZJ3PQb8wtjT{pSWPriZ
z4}U2r7kHK@rOlhC(c>JU39H6>$@ZcZ!<dvwgZ+iyI~Aq5wT(Sc<SIsb4T$8l@U0`8
zI~1iHC!TZYGH-l#V+-*EP$~|-x3;Cq?CeLgLW>?y*JAi}5@1myFY!-aWe_bI0-^_1
z)YtNoXr8=1^xbj#;u=^ilgNz6rFhuZpqt&(9R0i!%ST8Xd}%Pd-s)nZ0{ao3gGl;6
zppri1?{(Q@s6VrfxikA{WJV?2AH`=ZAhijdBZA4ZHTq&_;^&bkZ|0j%`E+lN$;+52
zV)i$I7t1y^^e_wdRJy1g>Oo0e69$rk0GpmAT&YP@runtd;gbeWux-|D8GbEQ0MF7*
z&4~d!Z|A9uG25p^%cu5g>Y(i4lPZp|I30>-BXB(^VfLv#p5+%5l+%_A`5_Fye_qEf
z3SiE~@u_NwlbDlZK@O(0g||q!2MmJFv=5$=s3}z>`HIw3m{ZGVqw_L3LUfkA^tAdJ
z{9M?y3Wj}P@|6;T7Sy#ZJgw07*0@NK#sHxiQBDqG39h$&@EU^-;sNDo<mfBLCE6vV
zDBNOaEzei2EszGP5a0)8bL?v{oGS!a1`x@l`)oJxk0JH+S`x2I*H9j+Im#rFz~8@i
zL#H4gqv+UjNh1H%?xh@yG;ps6{n{6&v#&~0J<4^z)=rUg$J3r=PCZHGEK5U`e2Zn^
zE+3Ool|qx&5*ox}K=RjnvNu6-Ch+^8xW%q~iHF~d$j`*HGI5Y!nqag6G><RvVe4FE
zLrKwqa1D+EQvV%?k~R;{3_3Pv3-|Jju$1<Ui!nN~3ejI)Vi_<u&XQN1@V|rfvNj08
zc_|4}H6#mFHY|mo<tSjPUR_WWsNs1;fQhW}D=Z3Lfhs`Gp>KoWa0Cpy0|CM8k`%LR
zS##xk(oe@rp{3MU_1udGwt4RFQMQUM^W$0sbLT#VjXHL@j^_!Pee>-mJ=1dHhZC|9
zyUn(@r6RQ;?pNdK&A{1<nmalAwnd|(F_yMTTKRk<=O0!pu;~oG(~`-Ww>A5IB@eZE
zYuG06*Z>bh#DXmD?b0p5E<y|cds%fd<fTU!>cL7=z-x0zG=%-pOXpu(Ng5en2?ckb
zLo7(&{c8YmP6k*O-Q1@okgEoqKst-_YRo-B4>mL`mzK`ts}TG*hRg(>V-z_#F^83_
zA&d&N>zw7JY|`T}bRRbUV~pS%3}B<yQaT7R10oKx=>j_(hE62Q^afL+h<=XoHp!R=
zO9MCf0ckFv4AFn|f^k!TqCZk#>8#`uJ(csaw5P~PF58F2Tp@w4J+tH3^&o}6NpiP*
z`mVkII<Qo*U@Ra(gN}h^X1<|7Ph&6zrLrj?nMTKVNS+pd$DM_k)4XeqAl`HO%LPTg
zsduRiX*KmWX-*l|AE}2g$m0U{J?LZ6f?%4U+f#VLMMXg-@M29>oYI8Z7ER(7r*aU|
zxLB^^9DWU<5mCHB$zMKarc=_=Jy^P%WSgkmqy5h_rxy3_A{|dEEia6FcMvmt9rI$X
z!kjUq_wcql42n-%=XEtW`LXET;?n({b~-sNZX$YpmX=T=gbn;xWtFQW!-&EvfoQH;
zv>id?D1G`9a3MN-SIDtyr|*(`<2eFk<JaxDH(vV}wfFM`-dyL*wEIp$y61cj*2twi
zU(70$KABeT+*=;lgVEWqUG&uWzT#z<3B7(L;*5*EdJm|`rsqvq1Q*|`3LX6C;@+rX
zcP8!4+vBGIHrira2D8l@jCnDzS!7S4;+Yq8#Ua6MR8G1?&Rmz5b+^f?igHMPLG9U`
z%_5&VKhdcWtN#=cKTvYwxXYV<7=Joi9Yjxo6TNg9rpnIu(!AG}XYs3k%n!|aM5{u&
z7|<x6kZTT#y^kq(1WS(Wa1Pl_5ydb?AD93#@bP=Qaeoo<UEHdpZ?U95eLr{xKNWJX
zk)Hk$noox!)*v5D^{=@ri~gWQvxEhcA)0_B7eLaJpk8N*sl@s&$ksr#t9!}k9h)x3
z!m8KE-Dg9T4zJ*XO7vjWPZRKuJxZNPs{qfl)>htsuWhdDnP&hzlzi8idLfV3D150L
zjb7SUBI{P*xAxvuzxZ+=<fEpgcP+o!izj#esarD-&-O<%aX^`ni-^unP0L%Lf+vsC
zvmV;0>*=3O!ni^bzQO=(RE3#_41=H|wU)Pb%yB8+E8Lp+M+#>dsr&M;)sYbf-*|GD
zA?yEfbSM5$y>B1D&pER*#@Kg_eP=NCZH!%LvJ)ChLK0F*i!=5;d$ww<AxcGyRAWg<
zp%76GsU%6!Uf+4<_xuGjuQ_w>bDitH?)QgB#B!tFp71{-KjP~p{Il*f<}ICR`<?&p
zw9^G1mJBC?Y}D(S#mM8B5C9A+{_0!hCq=}_lbF3D;5*jOqrBC(YD;#@u4Yh2z<;Zv
zsyHDY2>ywY1rp^1U$u}kwpM|8HXuoocGz-|PP%_NQP+N_bx_vm<|SjWE8pgKqwawL
zyRcoDZOd277OGwpeHN%_+R@$>hgpUD{#Css|G1?OAd52|bY(pvK{swqRrrZXtEV}x
zG84$&&p+f2EjNOj__n#TfJRl}Y7P?ckBkCKq|%Ca%eRf>T19<ybUvxrI%7SXE;<E9
z7RQL5iVPh!t8o7N(tkf+TFH&sk)2!xNM>$TYLn^Y3@a9AtW?LmI>e;jVm{i7SG^@_
zKt=nW`Y^v2o!`(L0>dGUOfl!W9q(ci{E05`-|S65Y88NsOG9Vc6><K~u4*J$Qd-=R
zA^AQW%`Y>a(tj(PIjjj<yLH24mc%thlUH9#BMR+d1#t)okoGAM40`BT5~3c3&k$GE
z58a)CzlKyeQ6Ht4TkTpO5#^k~b{oJWOHUh2dFM-I1M=ZmB3Y50Q=;lYGUSkk;bVf9
z>Y|eevIHaMlcj2>#M3Vy3puhVN#yOO1A>Yx;M8g`zeE<%V2>6WDI{1(`#o7fW#>z(
z3>AE-cXD+;Pksn-$>Xtx00<1{1<%>-I-dYKZ^xj#G)Q2<9;xDM7jqDj8c~eKetQS!
zek>>u<(ZKqAdeX0h<@~G8P61rnFIFdKZG$oq>j9=FVQSfd2Q1Xg>!Bs1s(Iv@$c;6
zEKqHGn`)<|Oq{AX5MJ0d2`FPZz-cnRc)(<mSt%7xG=;r{th|3b!BCQof)Z3t!UcYp
zAD5Ga;Gv9~1NtZ+Ku>>hiz(oSEVdQW>1vqXHG7gwka{DTNYfEtB(dX-lSPEG-DD-%
zwvQ-J74vovf0qte&?#pe!^OPcm)4c*Qh7AAPzn~X=lqWm?URZ;;5j}ooz`~T<>I;I
zJR|PKYNdEJwi}c))lay0TN%J%S3>)3pzTTK4s=(SYa0FtgF_7GI1gz07~5vy<rb-=
zwh*bZbU$p?Ogk^u&s>E65mkyUZ;|{!-O!W$0&Kg2XEp-%+RvD%=li7D7IurA8N@JO
zykKB$N#^^yP_Y{2dOeV8FE7`Xyr7Arlwb@CGhXx?$l@8oZkf{g8kkTAKjamABO+WV
z)0eCP8H`NEid;&fi=#XGlyV5^9p63bFX8!~U2>p2jcGF3470@)lJX+}cj8pP?Ys7$
zIwWONfa#|^2Pm*2BPaV?S4(4I)$s8<L;(FkGZPeCDcJ%lq)P{G;>3HogTRR-Mx5wN
zY3Kd^+9T|7%^S2|Yqz{i%r9e0hc?yq%*Tmrtp|lb6mK#&LDP=$5C6%g_7(NRO&?$L
zsF7QE)&bFfzh~2QFY&$DRT=7#7Y=9*P$qNxb+?Q5FjeGciDH;yXej+I?Tgm!<KHf_
zfTkM{Z;lsG880P97MY@(2H}$3V|)y~j@GhbCUfw->wt-VvP#c%dNZS=L*|b7*D%M%
zd-V4{8&e%=Qww<q(vrT5uu(k`I=!MXHvE0O5c{oTfSp9>0gRmO3-I;9hbR7%cdL5Q
zQOzH%NVv}^npe4y6xWuz82$L)+RA;PSCtuaq6UcV%EQxn!*nBDfDjp`rj_t8P2Ao+
z&sR+3QxGxZxcQEB#f#@atfQ@R74EF_&jwVmCi63OiLn8SoJ?X^ygcq6J4~}nDWUFM
zFx$(wyJ}#exF{!0VVUvjiz1?l7ny~9QUR%yZS~|GRf3n(kUd0Hb9@#e`2i!u6hO3+
zwUOf`NVYI6&w}G=_MteOi1-fmPMpyk%omd5_`EnyJHw0a&l({qUM?<%ekv#m#jD#k
zGw`)$^h*WKdi~BzC()exCsnO18*yQb{a>e;|5}G69%Lkmivpr1ZG+RrMIo8`1<E4o
z=c>0!$n9qwwMWSg(k+odQ&0q-SK~QgvC)U#)C6$xYRry*I`q^#(`lv75A=lTh3Q|$
z@Vuac+*3GJZArU|+1Z--Km#LiL|LN0iiYTGu-F@+q8jkQLHlW`QOEKUcJ70HN<FSR
z(pxEZx|*GtaXCXtG<_^N`N3?*V*no`AdUIvFo54p28fN0hGtb@opX*=7c6o!$>YB^
zeFyd5@6OibvZ^L?-PBS(_$O<mE3$Lb5?sAv?H``|bG$7u^lRJZ&y1GyX)esrs|;Y`
zxM_X+L9&Z`SQR~jZTsMB%KPpz<KQEmZM|#pfmTt1kG(sWK4~!1HDfLeyqJ~0!RMyw
zt|>iXHnh{Br!(ZK>RIg?)$+t1AJ3ib)7DsSi+^uSrlV0zh~V0EPa&@1s*8}I4xh7L
z=}cS4+64*l+q3^Z`LzE~CP{CJKDupf+=X6=@jTNzYa`{=nr^k|sc^!!=i9F4a}Ju`
zgTHO99V$Dr%e3t$TmtTf9lcp9Hl5q_?&96c!dE(bD+7CS_jshu!gJdf>NqKP8)-jU
z8*4X`pU<i-9r@Z@eRAIVqo_J?ZEe>Nnb>pVO=GhhYFhl$LkC)ZJw6>uu1S*iwhdNd
zxW>5bRE*t?*+P!o4jk=YbP=(+-fdeJN4)$4)p=}bE%@|lk!Hx7M1K_MlU>=CO)&bB
zAyfxfKI!S0>x>EDy1EbokF-Js3HiGRmp*gT+IEOu*09mCT=_KAohN1dweT*#m+%$;
zfvINw*rZDlBWZraP*ePc@6sa}waNOFz<p;bdIvR^O?RS#_hGc~VX_kQ-6b!-R&Y9&
z@QA$BtOHlF9*C-#|M9n2^&tZxmXS}RN4xv*@ACrwyjOkn>%NrnmzqXV58PReI^Hd7
z1EJoZDgx5Xj))M34^U+g-=@2C+5>iLR$3j)y>oX)gtKn3aA|CG*28Lhb=-SeqW1Q^
zb(HM+z{eNEUb}Q=rGNIREu7Y!_e)Z@Iv`wsC)V0$XDe+VqvtT*=Rju}d`s+O&m7<0
zgYUo3W!K^^HCy<G)XFTxSeV4^T=f0zd58OaFiS7d>QC%}go^p!;_$CTi}{7w0FO_z
zMm>dvi>DuY&RvcBkt}4o_f1RB&5##chw~Q#IF7X~SFOLNbn>lRRV2T{I+5-pX4XXh
zycmDuv(V;Keel2jqkY}truUsNi4`V2?LW#$kO7d7U1kibnrx(D{w%mHOYJmxSs$IP
zInaJj^ugDI+i83H2It$&2C(H{FJPqn`@-goaC2YF5TBx}LN-7BiG4RC_bF2LPR&Kr
ziKLojsqbwff89b-EY$V{Mzt798|B8GsOtQE<Z?<)HS==V^8+dSG9Gz-=}VfTOXzu#
zy{yJP&Lp?+K33)P%qI!o&FPD;7<h5Y*T>vg{WL`94;|*_gA}!d^9%n)0B(lO3LECw
zGB?$;1C$~8a`IAH9fx1*)vz_eWWJg}>v6LFV!b!9Rzfa#@&37Kk#ZI~$@)9k?t2Ac
zsegE$UnF<Q=Un}X`vLX;_GE;_6nYA<2A}7z{63GS9n2k?+<Rm6!q7I3dsQM_LG)Je
zSnlKn(!Pc0zP;2_MALnqfMW5jrPmT>&mZ@XK<5|!HkbO8Jm@)9f^2w-ydittfx|yw
ze^rQpOndgng@pTF9($zj>TOn%z7Lm9g1*;<M%SXBl0d~!yf}+ABhwC6UdUMVM1^kl
zUHESwN7R}F7(W-Qe|h8q<ADo9yq(4UD_167$^n<<ll}=xNz=u|In~n_ke8=~tU31r
zb{^kH;sA8vWk#s&0^gG*@SX(w!D5@)tH;f}^8SpXbj6gJqAs1d5bRPodY0o8#%Y#!
z7wsp|*{OH^g*Y^TLluf`ILo2^x10R}1*16w66YG!#liB7$%CU)nm3k#{;&!pfRfGg
zyTxA@+n|mEP=5tbv5LA;xzB>-W7e0IK22%{x*Dc69Du{#!~?O5uGo7G8_Lw8RREt%
zKZe{J!=b9*8?Mc4Osbyt$D(vM!?fh9vB7!hjse1An9(&Vb{!DQofj)qRD0)a6+<oX
zRx(ozR~33gu53`ZF6HVcE<ZtryD3M!KN8X$AYY##W#{Y(6f@<WchL*kgyQ)WA0<jr
zX3<<@X*`<{#a6FuD5@Hwb%jx9s=r1{f18tt?P}0_F`*tt&}%QbGc-g9;-eFk)KX^4
zRo*Zrl`txj57-%KQ80Eh%q><CRgmdjl)3XpgIy84^A&sB>cZ-b$Mq^-!zUSbijC^8
z!hpWzCsisMS^EqLj=a3Dh}@Uzk#aTOjXQ<k?slEgXc_i!eS487DE$|%)*e6=comu1
z7#F<43W&&Gm*A1N$eyr{e%~9+dPIy@;5W{t6<a+b)YIzD55tMYOo0_!9qL0|dEvxQ
zIPPSDa>h=soh&jY1t_8{wX_BEwPtu4F$t?o46k5<Vdd5WK^VXeu`ws@Fim5?Hmn<A
z$2UD7={&$uFYf6kfuS(Q88Q`LKscunR?|ec)A+lo(G4w$mH>7Y#>F?u7rg(KF=Vyg
zf4s5~U;qa8WfR~Wz+mGg(<H}%o(zK2)4ef$gW3XA{we^o{+=T#O1P0R#>ms}re(Ce
z8>QGkh!1@bOqF+r3{2e)46wC$QsX3$Z^_&JhXj;?_@e&dAXquaPH8hupo(_}G+lBI
zPKrdvt?f|aBW<XJmJZL-B7hU>u33K>1>0$+Rd4@wujUV{VWS-Dry)GYj+-Qnz_T)u
zJn7Ln4Yx|&T@1`+u;@LZ*Jk!7XK-lV$bUKLxh|NH0mE+Sc_j7xFra4&A0V(Xl#65A
zFmf8&>=Nhu?#D;DsEgd|-=`+BqJCBtK+map5N-L_S1X@h6fQvA+Ez!d(D_N}f;otC
z?3rPmq10P#0l8qRF;hS4;=y7n|4>P&`oqh;z}zNH-V{(+-PxcD@V<}izeR3@V0_|-
z*Ey)$5z$I4)FlHDt-N2>p*73eDWho6U=b$G0qsTX>vQnIk=mi^z(WcP>Gg||U=fg0
zGo>Jx_vO6ogK~`7OKfYyy2h)AFuN&=$WI>RS&?`Wz!hnN<)>E*;pqa${$K1!zWElg
zc!F;;#zAav8qQN7XXphvL%stRTw!-^Lf)%@2v14mum$?k!kUnKD9oon0Y1~IFX!e2
z(CmwrKw;dd#dyo`Fk2o8pd}xgI`iW#0Zd<-G6kYDXe0`|K$U$2pq(-32ciZzD^31g
zlM&kx(%(U+3J`G?(km_#w1?%Xqqs@xIvrpq4aFD?3M|?^mxk`%W1Ggq;5Lt8dxnr4
zG%6N({sPVK&$d!t$M;YP(_%IAAI7BN?A0`dD9E6Ljia)yegeq9C9aEz_&`8ck{!QF
zHFwTVWm82=KaHLCASjmsRaxB4eQf?!@J^C4b`jurhUE+T(Lq!s3}7g>n+E0GNpb!>
z4PN%?uN7w@9)gGWiWy(!V*tqb4Ze;C^+mP?>hAphg9r+ZuOMAiwBI7)O1CK*XF6y7
z>7(ma;D3hV<VBQ&ZAJq%V-9$?MmzcRvQc;Jpdyuf8rCfclW~Z`Gtw|Y2Op&rv#yG}
zt=i*EsfYTUhgG%HPCrI>vk!lF$QX-bw%D6z*#9op)$JRUx9>-+_PhB=U>;zxO@rXX
zG|=IIa~=>t4nKZnCs<%-+$DVF>lFprO$$mtKS0Ax1AI|w@{;;FBSPW2L)t?9c>gY=
z)+tT-*a6*G?w~PKplL9{TStJxR@AW7T14d2sJ~mtZk8}1mTI20XHF&1-(F-_eWTfu
z)IP__pS!8QMrFD_RNcy4j9HSo6j7?$Jszn%vw969`itsV$2MA};``DF08Ma+I=D>5
z#8SU+fQEet<wp&KB9A%6O-H_`@_kf!?Moo3BV=x*`n({z3U&;#!;%LT<sQnO*~3X7
zEp$Ahkumr_L{;5J@v9Mw?lz+)Ha`cm8upo!gHDOOX2byY5Y<m5>NK|)Y`B19LqP1>
zBhv!Z-W=w^Z5pADs?`Jw&|!h%K@_oHGaeS*W*`6i33H1INwVKx-nteKIQAi=s;FF>
zUrwa)M?lliC>|Ev{(AhSgxeg}lmH8p1_=~fy(CzG1*_%b-xpm2R@stW*>>qbISWb0
zQwz5rU~=rRt!_5-T_>$z6mrCZVb2`i60nTpTirI@Ou}!*;1<^bXE6V0W~aA%T14WZ
zstV7j{e*QVd1|c65A-s$4ZyK`nvfz*L#Fm{7ef$!aACR_rtCs0VAvw5*LGLI%Bv3i
zLFu{_9_>aUOi<(cc$X(zFe6Jh78p~1=oamJ^{$;@-!($iReH67-u8eI6?pdo7KmrB
zwzP>7*UtWa7&OS9V^NaqTOR?>FZZDJoDVy5&i{T2w}*Vv3s2*Qt$-qPdLRD|x}14$
zwT?mFda&=hgWe?UbgwPHb=UezDc<d~XAhcdz*IK0v;UZeSfv?<10+kfX&3yl4v5PC
zUj1K}Kp%3C8HY4_KrTJETa|Imp)9~^xpuw0IU!(G!)KQ1i`+{j*xR~x486ds?ZHz0
zw~q0V?9h^fP9~#9j3dx?8g3iXD$vDJvsvEdnm$t&K?imt%O$Je$#*axa8+Rw`urU}
zRJP?zj`#hDl6S%_b&U~6sCtS?Vj-?*OH=30>k1KU{+sJooEASk?;*EUES+u=gqgh8
z6G;&{tDLi>JcJpC&%95BEdT*V;u`H&S&=RFr!5yf!Zzs3xYjbLwVIEU9Dq#XMX}%G
z&i>@X!stG`cX`Ptv5xkQ)OZFdZ`DL~5lfJy=?3ZYPdk*551X69=@V?50`@!F$een{
zZ*#};6!ewT0_An-j&fdtU21u=z@7s5*LhyhMsK@(tMV;s>d831XoH<}K**nJve9mY
zXQPfgs2o#*7EKAs&cu&b{M1qZ>JEz<>hkE%vK<v}<5v!z=p@acV(rEXqtbAbx}?Pp
z{xgO`L4#;&$<InQX6G831H8#Y7VZg)A%e(0+Wu)D)qg+u>i)zVry&6-J9WZ&g_^6D
zKws(9#qtf5W}ooJ(?0l@j~`N+yTT=Yo{?WS%5u6bY5(4YY#Zzh2uMOeala93=Ngd7
zyNA0!gAZyCpZ9uewLjgX_2{7?&x4R@y!DUkc87J6*d`R50qkVn#lEKoi~jsWBbweH
zVw6_=dH?OrjNFB#`f{XM!)dw2RbbDxsXcNxU!cCL+D)>(;@t!a?5>p#X|~4OR}~^N
zvT(}m(iQ3_x|6wNIR7~NeYw55HGl*bIv)?IpNA!7sBxl?eW-tYMQueQa$bbT?ZyMN
z3)ESg?R~gX$vz~XCf1F?_RWbn)9(5Xi^UZn@sd<m7BI(9OOYJMlk_!SPzg!*nty(d
zq=22%UrcwPv?K>kTV`;7Z>YyHoOcB|^y87%{IKUHAe;XK3nU1PQzaXzaUF1B-*0;?
z*O`QP{+d?^WbBljrBRSVlTp#Ib)Fzw-P1{F6?>*H4M|3kx&Wa?`+)Alq3f83@$EBZ
z?8gL8)*84Q$$pbIJ>lz?u{MB1(xARHURU=Lh#;@>kzjj%Dh45GniKBaDP@YDmOMtM
z@{58{6Kl%ZAzw<E)VDJ<Jr@R02xZa|@Zhd!DX+uG3ygy6z?315sUjN-*dD$Y11`1J
z^Badr-s*aTEKEY5?C!moJFF=KdsLAOFHZqGh-fOsScxwy!P*TQ&mM09#pR^iWJ68V
zxlSDh)`X~p5_`RfOVXjNsr2E(M(d=d;%--Aksf>NFe+bq1~Rr35Fou-J~wt=_AueD
z7HVm#Nif#`*-j-U5K@PsXc!D-?GShHAnoKx4EsozKkL~vCu9?(%o$Y~bP4!Fy)1&!
zEc^YlZU3eCyWtvaizq<!JNc#ikfZq{9rA&eenk3t%M{_wm3X2Npz#m6IJ2u<T_aJL
ztXGemyJGV!iEoXHMvVXxNcCbeLN*cN1GpOlL?aeu$mKr%$k#3#@V5}z2J~L9PnV&I
z461=6#R_NS&~xG0>Ha+Dlm*=qlBjU@Z@vc_%+=>wMnp~NO-c?CdEz#A0ql_nc1NEf
znBKa|dmy*~Ox<QO;3QT{YCxz$>`*Z=6Fg%)B+v{yhVa2t3v_kpB{_25V+^r^tXm&#
zalZP>>4dM0cb66ZlBs#j?@`LT^*$>is?nR>pt%r&N)qH(CsP6O!2vePE<}0|(r_Lr
zbyg6B7}!;$WP^in4$RT@KP(Dk9gjr}&q!?nO}G-82unh8btw7)MSvYTHRtcIqNb4h
z_sfW1k1PjtI1w8rM$`ByHb;mz%gOm8Dhdbk=M{$usyY)HH`TrE**g{YLMtJW@00SM
z3v<0avyOyPz672^uvl2RQ3eIZZ+KnJks&DlxSVIf#U|N<4N(JCvLv6Y_xJyi@>LO!
zXuYJc5T@kTY^kSe_YyD-DRVl)JXJJ)rvIak%sKO+k%yK>;(+71it#sDep!-(59Cgq
z+<!)YLy7r7woSzs*1joE3qQA0o;q~r=bz6XfBXBZjsWp1Igg*cl49jJN{97NsAf?=
z&58>|g}JDro$J%BUVr#hM>rBV=@DdP%*d(T=kNHYV5!f?y<()>1Xkz|5;eMtaXK7a
z8a_@`1HJB%uaw(R%p=eN4~xIyLq}mqK%Kqx1A&PkALWWGZ+D+7_1pdWtxKQGi>!^_
z6EG@wN7Ug8dF}@fF--gU49jWCW-#sg!AYS0G*|iCL$Fe=)mE%Is`Yo>(;Yk(eRy)=
z{x5;`WKxqf*ycHG^%vd`ActO^a|TR5;W#J2TfimLr|E^nU8m-1<$r!<gBi{w?PJow
zIr-=MCT&eA#Ew^0!s<4NKE2G>N+3Z|m4_QM@G`faWm#q${QRVBt7%3WJDqgpdvmc;
zEAHIKE)zL(7fhNmdsL+7;gHbjm{g1C0fOUZkvv@>`c?XCgu64TfLaVgM3R(CxLS5l
ztE~o1jg7#NXg!msRXi0}jy|P{#Z6>~QvfhEiHIm(3j6PNaoTC5vwjhgDJj&n<Fj~|
z{^eZ`pBW<v5|vh>GIAv9WDs9A#LGW9e<;VkFNCBMJ#zH!8+Uo(66uea^V@C2+ySNV
zzvWmR8*w3{O@2TvTtqRrbR=0NEJFKJn6&A>OW|^l0BvP^1=*<94AWds?H;LX!kS)c
z3a&z9_7o3`eIHLMK~uGTxdvht=%d%)ghoGcxwG}S^!yYUWHvK8L9QIGJDRdT`uF|`
z_?oUwM&kSL9>(-^6~le#!3n9y5(cd4k~%SzZ2k9N9+-`ibBa$VO!l`qzgUWEnos0d
z=-XSw-_tmM{-o@>{Th3<Yn!Q_^eh+OGG41`P@m!!aN0P=qDb29f{eg>m*BBMkt}R&
zJmmr$`Qgbkl2|;cT`4zGda7iZJKbn{WYK3Sw=XAT`<$T4((XSGC%P*}l5g8-nLmC%
zEo{qw6h3hNTwVKQyJJry>g^?;(cEh{TBf5=F)u#`@>w*+RU8$DLNZTh_e1lj>z4;F
zY>Mt=2`RIw=Rak81>aO?8!)oP3)3NS4a;W@#x(+-uVX_GxXDX@zHu?LF{VD+X`$s{
zi6=4dqb0x7tu?Q+;g2Wx2;A{O2|T}bv{@%9K*qjd+bD-?aWHPz$d32FE3Ys9eXU<{
zd_knhq=&YfNLf-3TCnzM=T2SzyZP{y+Wh72DxJq=%fC#yOP^f~-(HCv)mnJ0__-U*
z(~U`QotT9lT(_`{gHzX9h&>WD+}BC+r?&WmF;dR`wVyV>P)^e3D@5xaK8VK|_PLEK
zO7^v=j3wywzI-(Nb>Lxo1J~oEpP<adkv=>4b8>e2>6TWO-R<CyoGQ<uQf&R&-{S1<
z+_JS0(VP2&W6QSEU&mHT1QH1WuBah}6SKKvo|&^#Wb<8YmG0VteKBP-zr3Yw%#Lal
zme!Q?&F&ACQu{sD6)3b2p?%<2(}PP}4$*w?OKv1h6uexl2Z2DHAL5gTIw@8Yfv=43
ze3&?&b+GD7;5+i3q|ThIL~n(4e?vqPb-(Jj`m(ycx8=`d{3|S-O2j<Z%oiO`d6gJ#
zXZ1cf|N4c6zg?e_U;2AlNKPIp<r)G=fvAR>)u{>3!#56}ZE(Ec-F9W?w(sG8>q5Z9
zG3?w+qgtHn2PF3XN1cPIALjRQ8Jh{#9#;%A6W>a``FW44Rh*Bjv$U`_8o;;fW+DhN
zE1f!hZx0-c{q^#7`ibwH-_S~2<L!{WT3go?iw>o5J1@+m7jJsM&B@%+8`dstVxB)_
za;Dn-?Idk?rT7hP!OTC0gq5&IZ34`$>yKQ(ZdKX$^}J-C`8D)tdC$WyzVT_aq<Olp
z)lH0F=b`HJ@mph0?*A<6G>EodkhAe${@@-Pdsa8{^ds?g-@OSBzZD*;*85ERyL)fO
z#AW{?>$5|OGQU!6`T{WS*GEM7gg<ioQa-6Ysu|BcuSTb6M6a2aC`=0}9-&MKxqolm
zs=2w}EB85ZuhT{Af5)@!Ou5?6@85eWQ0*ggxFJDZ#OM2wy4PdjhpgPq6Sk^%{N8L!
zPnYA+F8gF{iKfc=9lm+{n{#aim1}rH1DEP<miI09=i}q!$F%jHODMLrAT*wwxqo)<
z%BhPV+h<COe6Pj~y9mJz%)SppPpJvm<7PDHWy9BFi4#t4D}PBk^VZ`j85>dlhyJGN
zeU}Y89TL?*6;LYHT2%j}yB5xM8!GUP0|MR$eE<BnCH=a%maDAbo0CnaJ=?7<i+|A0
zP<~&rb-TjlDF0CIENb`IBDilg0B8|z{8szJKUtfo>fCYK<>T%6byfuJbT*XN@g{D)
zKXU!;+NE26A64^S?@lVt4m?B+!#Gb8L}L^gP$4$?B#n{kNyQAa%5<p$pB`Xn47BtT
zG`zX9>q!;z)2<K}!Dc<noVr(;gru<pQUIjxDr8Oq2_N{4vXJC#HyTOEEUhZ>$-9Dl
z?2Y)}F{+Syr0%vVVtG?;hy=ZJcHHJzJ0_%VDhu6>!XF7mc(TEhi0dJ)z(%N`Ht7#d
zY3rz<lI|G=^GGa>gwCR3jF<nIe|Dz0;D<?4Z{MNJb5TWMdBa-eBUCXA3&Z!t8iMiS
ziei5HNH>;HsWNh#%MwZswcbcaHC*-Hh9!>K3l1en9>HyATrSKgG1qM`3}kqOgvk!8
zna^d4NV(vtupht>sSU#pZ(7R(NbzNm#zs{!mEI}x5m^F9*lOiZIy}4RYn$h`X@WFZ
z;s+IzoQc9F6a_v-#c>>czv>QLJdC8CtSNZ3@+n~i7I_~{*t46snx^!fv(uS@-RA6+
z2LwBtonjoNerP=^7(Jrts2qz@_h+HqSh2zmdvmDrJ9i8E?*oLts^F%O!c@Gv5NfQ{
ztbsHbS-|)3i}V3B@}51w^ZtByLu<vz7?{w^!gzQ}sK3=bq#}K62$eXr?yUV#bm(Jb
zK?%&oU|L+RIbWgpe~j8*ugAB~2WBe_!RYU7geUC+(pET(gqSWx$AcJ%g|1pcMzV08
zWze-H1n&*$C4}NjLn$*66j+T1+);pa)2*^aMf6Dqx*hW$Lj;O|kVmP?@-SMRB{5>F
z-B2cx0;1HZD%Kpu*$6>T5ryTjMa4Xu%?OH;(!K_ci5Ux($UsFJCdsm)5vs)LEwn5N
zJyb?%25f8w^?#RXF{pS`6&Z@HL|)cdVi%To_A7>sHf12yIlAQO|LzIVJ!$IZ?@V+6
zOgvTZwSw_?hS<r?z&=Mz6r3MVvIUpq%t%rghH-~N$LTz^ASy+K%j@JGs){1<VQCWI
z0prbQ)(LFPa6hqkz*&TYjAfwLY!x-A!W}%+kZSpDpI{~Wwj<#1ZJ%8_6jrI~hGOq3
zn6OBy|C+=>V_U=RuUmSC*&49XqDmNR813I^%HYv{Ea`(J)n$a8Ql{DGCB!xWDuXD_
z_7Ut*Kjn%<A{A%;;qtby#0CwsB7&}5MyIW)4>4pN*`TS|ZoM$|dx+|+!WOszGt8uX
z+V{f)A{rb4A`fRkO6U!6$HuhHgJ(wBiW)$Lp$EEUZKz6e$Y#EA4B{Y3R2^xydKhh2
zXF0AHl4xQpi6S5*6_FB(eg$I24AGH;GOFB}@{_UiAx6G8)ey?{Hr!{(6mO++mY6?7
zi^AD)k|*>fAt+(mmIJO=lkg!%#tolFeh)6BX{z1&JgI6wa(;3|as5{7T9t8U^>U%6
zNCXmwc*djBl?-|fOCpMmY@mw7QZ=3r5v|#%NCYyH1X{Dv!e7L0&~~L7Mk3%yt*ftQ
zKh`+H62{-c1#*n4K=Tk=3}8!CrlNybLY^#fIv^3!4@Lp_Lr2kZK$-`MsMw5=4iDZC
z5U~)KRbYtLrqMmAg7N(r21l#_gxZabj6mOoSqvR;cl?4jeS=71@)}JTWdKELjk;tf
zk*9I}TBd#q2>AoZSS|~}1M&j<l^j_TJgz>GVQdZ{pZ1H6#v^$^;&4AitPrE%^hLk=
zK7<V;X^DdkCh`qP%D}0wm7~i9j4&X=XheC76TMjy!%OImQ0+}yj4}e^ovp;%#vJ=5
zRejysnvLupjp=0Wt$rFpq4lqzrANb(RY0^y7(UQW<lZsk%VLl(`zT0_zM^cu_9i@m
zg9>W2rSzLbiV*fO@ULuf#R|~np@6m--Y;_8sGr9|#(A<4YtTwnv|ZW_9F>D6Ja<ZA
zMTP?*8%xOHoA79w719_P2jfSUwWl+fXRh=XYz$nEKW&MCOog4i&_?dYpz}Mk<nPjf
z+)Rl&V`LQ^`F4sI2zfAJ$yWiu6zlb&AGO9)`I1A;^Nl;>8MY7W!T*f+%!C1dMG&zZ
zR2WBDdI{;jq#2lw+*nHe&~$V+i>d2jYuSj1kkpe04u!IwT81IwIr7#=jk|pJV#Cn0
zP1bvssNQTFkR#V&D7T(}>0R#qld<7?c?PK$NAL%)CT_lH9iNpIROjKi!z{431Y7C5
z6@`+ROAqo-$8FeFWM@F{wgT6a5IM;)*+>c1Z-E_Nhm4;&q5Z^MIJ`C3Y|kN=`-*(8
zK&@Bx(l+x>!5l4N2G&Cg<>{-1e-9=x&~cSsndDK3UG}~e6T~P_C^uK;CW=uSFvU!R
z$N<5X%7YVTn0ry>)o|ru4j94^iz!FuiR}`W=-ynjIqhZDK@*AO$g>>;BVcW1TPqUF
zvYY0p0YkHIAy-4oKhJ?cpx{F|R+|;DQIBCTP$Xv+i(HZ}ltxi39LK9ejXHO>!-d#r
zZP;*xfwvAvklBbE#ookpobp%Q{x^sLv9zkgz*kO7Ff}`je3dtSb`p?!RgdaWKxuQJ
zs`A)6>Gs-g#F`%}-p?V4Ewj<QmP113kl^O1w<kSi2Z%^t128^A<!^(|r5EP<CYZ=A
zxmQeljn|W=L$)WY5zbUhhFP)MAwg4?6Nb9Wgj9KjyIoUQFIMyah~)1XbocV!J+_t?
z+ENogO?nS>I7u;*^=eKipjFe+<YUM8b9N{L$muVTDciXbP>tMDjstpLgSWFFuB0F)
zkSbRHvrTZ%>S+;%rni!To$%AaF#8#z@l|p2aHpm)Rrf%hRqQm@RV6D8JIpw5GErx=
zEL)|5c_OJ*#hY6M!Pqe1`)xxsptT(dl#_y9hZ>u*|7Jly0>?u0oq9;0X-bs_R+oBz
zdb}U;)f8M0UU``B#x4e#ZkEn~VoQ1e2hB3YdtKN54qw)<lW-LYVuYLgRm70%m{yeT
z)+Y=qiwsqxv>C|N-;n~@R=%XVd9DeDQX@f6gTyb2@zh&ihUb9?am_5$Fk7<xi_uk~
z5KHj^IqHDO9i=E1SPr1h_aNzi;-eVi(HyxLmWcutX}@SL!Z!a9Xc~q{_|O_&&##~u
zZv5HS+fGU0g*jQdQOti3N>NU-a3NAjD#?}Z?u&79=~6?-&bX5(IgHBc?yG_WLoZj}
zU2}i}m*kuon8;aVK@#fKtihP9gd#$V$c8eGnN`;#0|x<X-?D2b3QzcDQ&{2YBYJL(
zqei_79fzF{-8GS>#_-=oGk}womG?}S!!f+&OzEmmleIhNnF0&xX^3#-9D1#w{K69v
z2QxKHkWHfZ&E8a7b7OloV~wZ7(RhVn2mKfu2`%}|LmkHGSWy*cHgBL0ZG9B=_b?eX
zNGyL(qdh7rwyM429k+NDtEqTJfr^5?CauB*heJ{20Ga|wNctDtKXvU02{{z10IffM
z{gLwI<;?To{fsp4(nZM}s|5@UNiU82u}Db?Vj;G|e_0s$yP|jh_7V%`e)e)YmwZe~
zCH*JoxJ&AetDthf44O6c-s|2&1><$^fqg@$wMHBxEWrE*UjY7{{|0CHvO$9Fh~`Rj
zFRuMb!N2nwU!R7k%+ZA-j2olbJZ2ITMU@L+gQAHEmqYPY{To(13Y&zYQiaK-n$kyT
zwj^W&mjfkH6=WdQmWc#<3r0CqvA<co0}QgH3b*dYY-b7Ba@KaRw-!qAM|*`TZ>RV@
zhuX8R>}H@6pMCFk&3W}@E4TqA{CwlfX>kRcVv!5pC#jqJr#3A#zP$dk5zH7)ICfgH
z+j@Ol)nb@D^dp{U@ZvE%sx(R2R*>EhZAT@(Y*SLC9@0erIYd>oVg>CbAsJ9mQ@L8q
z(XN9qTA6zP+q4r<N-1p-9|lOnu|FB<Xp*pX6daWVW++E~9^s{zre_jdRPoVYNo|7`
zilwnE#_pXHlB$A9Ie6*-h(~xL3y36O(3DpMI0K?2cDg<;s&q=J-yRY!rE~lES-Hc7
z03^EzYXIyVKwQPvnZY`Hxmi1eeEbI+k)sc0IUA_6vjrk#J>4QIN$fj<#I@+(u7_1&
z*4>a{B=NB7W58<G;}ktV7t|-k4kgd<S7%731ttUL9Av>UC`Ucw=}KmkzW(b^)!<A#
zpg)rKyCT2`n9V0bygi~;9RUd(aTNfUHUbICfd$XfJ+m>9bRmh<v$B&mfKUDRu}~^?
z37CuKbH*<ZTe-jP5MYyZ?X!D@cH@&-7dIRQ#i=&|%IoxOLe@4TU0%PKUW}Ao+yV{2
z?ZY$2GEl_>@wbmW7z41!=@%q>c+x1paY6sMjSkFugykZB!n9)*g`u`0!v%+*VtNX|
zeynqX(}J6zuThn#FA7-kF<?5$ghF&)6WWx{8zQ|meN^nsjmM(w@@^c_VJ{FAAXA(X
zYC=m6cLl|C=?S@V1pTQDyS=7SFv2@F&rj{(!DWUD|6$iCgh)t{hRRWJRU;(XUnrod
z8k!DRt&P)^R`J$WcP$hB{FD8n;@^5zGq5|wN#>mX)bS5)$7@{n*POO(W*#&U%1c-7
zozCp{^9#d7G-iR-OaRf%64fgOo~pP@vjTn2whF&M>MBXHFP_W7{cjK9Ywk`_jSO}f
z&^N~@1bvHe)Vf=E-_G+^M$+f%p<<$@FZVDX3)D5WAKicLlvMN#qX)&-J9}zh+xxv5
z&QR;Y9g-D`rY54=A+2XqV)lr96CZ1=+w|0KId&R6z7LR?WZPG~3dI1W()lJa&5vS8
z7i$mRkotVC5Z!!jECl`N#n^4MbjV%QmFRsUS6&S39+h^sr2!LsE?&vH632be#*|Wn
z>sKqs;vPVZy4W6lBLxR2c$xfu?6QK~%kJ6X+%W#z)${Evd(S@YHov0yD{jAL`85`f
z#<0F&e-%~h)ks8~ia475aBUwu`HXC2qiZCP{|<}HjkxF+Fr)YUJXbws#2RfD)Bmr#
z^4-Ip+>5QN4u1Rgt<znYg9NWV!A+}NdgovL`bVz6QrficxEamdd)Dow)Nl9mP5rft
z(Ra$duAe-`dVb8sqQt$3{#;%yKf0-5Cd?1n<bc1?ck$I{&5+ve^kXO3qkf0>3*Fvt
z_NU$tCtuzE?%ZmB9$mBX{1|L+qn+uLEPUNN<LG|N9hsj`UPYY$5~bEYcuPC^+9Tm=
zwqX?z0Uzu6CAr2hsx@}`#GLthVLC;vee&EB=My=!12N76yHzGn&)dEJIO%7!@5@MH
zGmTBaOZI-4Zz2Cn3|P(UwvtaIAD`V~0pGhoqP!48v)26*G<)Z{%A}rbft)}|&(LZ2
z(IeK^f>}Y?O_POe7uT^jwP{C(5)%$`-O6f(pWo{?_?27WN_$kX(zQaxXqLMATFANg
z8kXsN;7ckVEaS7<Cuy7L8Fgyg^KviJJh=L+bFVZreI-ZJdRWu<tjM&+9-Oo7H)OF`
z_uH3_Dr0hHIcI43Vij4KzL6#O<o(L8E?4clXC1bSujj9l!l~y@E{;V9vV)pQztmk=
zcRfq2qyluDj8wJ7Cd<5+g_e*<oD)xLtCn}a7?JmT<P`U{a9uf4x%v1=_%A~3=wVf9
z3(3b+gZvZ3#R{thF;y??ug-Y|%0DD?=!tn{WnJ5fA6lzNk~VTq$RA<x)kcdg2gmun
z8S=E)i)$IxX>-{%2K4<+GdA<Q>U5TSsqx=CN!4kAEAIAuF|uX*M^lG~O1<!!#0RU*
zBsn1$dc_-Cs~;9eK5e&1{yn$0e(RZxuTkcaf<}71=H1qNCGDq|VwCPn(|u99+Dz_>
zCdXlCijQAu_jQ3KPh>mGobJipT{o)d;56I5jbKzFWBN;LXx*nUmqK60tUa0Y8KF&`
z^bQUPo50uuq>3W*!k7=@M$=WS>AlY`eRQ(PJ2w<qQc;JB&8X}3Dt|Fe?z|f!a<kX_
z>hRT!5R`V%;}oBk|Khp=#n!hYc9!V~43`!ahTB;Bc};$z-*S|04}SCeEXwsrN9*CI
zr4eJw<&UqHsf#?b-Ty`<^ZU2(+<ou3T3hqGpE5@qPIoAbZCsiRuJ5kyTzcVfW@T^O
zc?lL$b^n;?@fVTq&)AP+u2A@g@3y?qDMR=8yIvY{+wFTfsK%|@avo>q@hR%=W6=cB
zY5pZ_`>~P$^N2}l?I&4@{{2wVyIlUh$06Y3x^s0w>G~g*9^}o{S5OS{Yxh|<>Y5$<
z`cio3%))HU@zPp_qY+l2Zee%&buXCQ__*WFp6=k##Cynhs;;>OyFN3n)$gioPCJ4R
zbL=fiq9MvsOSB&+r2H(u6IhC-Tx?4JcR5$$TgSeDg}wptug>v4F~x3LyDrKHc38;D
zw`7mbOf6GJ&dX@o)>;@HIVF+)s`I0V9ak!7vRAM2fq>c`eYqI(Ycp5(ZfLWwMZEM7
zX{&cidP9&v&2)u{T{~oH?2faL@jvn7`Pt4kxmpS8^PWrdzAEbmU*@N~zjw{{%<42n
z>%fCF<L-gCx{Qk87p`h|E;8S)LnTR3<}_!&Z&?(%t1`15f^Cb7=i4n8+%3k<y)NQT
zYkF0*2kr1NF5W8dJinuJjkhU&<ZPs;JSw=^@b$4`?#mjjKW1MbX~0r`_VEZ5pP2k)
z#&{|quOv>&1Cf;b*2r2fYnSda|HR*wrJ>uCenq=}RpM40F7G#ed#Qb6du7G%IA+=>
z?$LVpBR5MyPR&ucMPkQ4me!7hD^syyTp!fkhgW#1%KJJKJJwk(H&AL%5Oaxjuj}4f
zrkamk2>GNdK&lHA{6)4+c_p<KyUU^~7oErIG1L85M?Ujpk{NRNGhp>K8~poL`{bB6
z->G{>H=mAeJvquAlr47DJZ`ZNU3LG$MDNWN*U6oS4qke{@%6*E#HWujNjA^^BbS%|
z3;pk2@0~k@yXAYxYbO$avKAcIl-?X)DSZ87azc1=r=wKP+4%`=1rkC!^=YkqMu55p
z@8tCj1^Rs}5k-2_bg(8r22Fx3`!1k#U}iByMG{~&K_MdI##Iav1dbD-6qeC(7G}#S
zu3HQcB|_nVRR&7z(WdAL5);>j;ieEF1vY5KfrRI<Je5$K;~;b_xMH8=69*Z47Ut3>
zE$T2lZVU0>;>#)qg_99H2TFcZ=kG%ib5rIS5VB;7$;BJ-FfsvZ>@y|Qdm%)eQ)<7`
z@o}{S_MVz{97vXo6pBxS=n{U(nNsZZD?*SEX@?z)*OW-TU1BbtC3c*hiGTrzDXcOX
zNdn})Nq{f|;3;f|lUVd-nmR{B9t$FfX*`rqDqdV~GaEYzJ~?u5Sv1Myxm=ikW~l_#
z$JWGZ33$UsDg*v@OQ|QGiR;D-8;`*4k`R$s9&(tdWd#0Kj$$$~1)DV=X}&qDB4x^)
zy@3hegt))YiZgzuspTMq$>>ydbXSy$6$w_|mQQ*Rr0_;XjSagErQ)6<NF?Sh4k8@L
zj}`(anHheDY04x}fsEkGwnAMzs*#{HtBRJZ+QGB=zRw|WIbse>rg@jxjj7bkrv-0C
zMD7ncAD&U&bFJ{dAO0b8S^TD{Ow*j`{;b60d?ps0d~-&vD)7eJDEvde#dIHOrw{`b
zM@Oef|5B|-D~@BwT&)z)Z@MsiN?{NQGzJhq;<W@><XnA1&=-Kx1tO^E-c2C&sjxm7
zIGG69=kgBoqMclv$scwFGa<D}zNt`^9aKaXr~H>1S_lS=0j%a%Frye4GSKA`>_hKN
z?*_M|DP0)VMpaW7yc#OvIFHp~<8LYgMw4Y~&qZa5Wz|_4?(8zHr&5NS1b`*fQ%XG%
zsbet-8FrN!ckvygh=`IQDLPV?AtTkr$8QLw4M3#~(3A~Xv-q=sf=gX0rd@nHGZ1=I
zge+`&f+W?)LdbG}uU{%@rKiA6khfysNZM*-gf;+)vJm>z3SV4R&^%wFEqIp%FX9lI
zll<eb>JAovR5e1Aj(EV7JFzKR=_Pi8Qw@=8?&5f{UC3P^@I?cqU0fTLsKv5{%*kRm
z%IkQ@Vk%?+@vKN9LYfE|GX(=gK-qX`XB$SkSd{DiAsxnnqlywHldQJ=uA$4x9*qcb
z4&rvd+wW@8&s1$VUOojrEB70GL)MaIRc94L?oz;j0pOM7q1?mxJc;Oq&%$Y%XhyjV
z9a00p`?>t0?Z{qsjR^<X^{ei4mrM}uTqzs;>ltDF!;#+4PbzK*nKxA9K)kFDSvsPV
zRU2KcNF-McRHy!k2bo=<W^t-`yJ1;+gu4phkkUi}tPD#4%M`>yeVzLhcBri!cNk#~
zVAU9qU>8w`0mk>9Q+z>J3P}7_5%y?%KeXvtjo^Qi?K9SF?p_&Ika@a2S2CQKvqCn4
zlOT0YlP(E(aLwUbwd@EL<em$K+*#xuB*OVzvEgC_f~=-NMG%D}+S}BcLF`T1VSfc&
z(0(Bk7%cf9k^+dKIZ`Sd#Pcr1$78<Fjw(+%NVXprvm&DhzGUAWfF7IgP$<TPb1vz#
za3k?zy?oNs)nM;oK^|Lb&FRpf^Q%+y+;(g2-aI5b4W)1ZB8(s~5X3GpSgLb338y>B
z5?27fEoJh0+d7j9JVuo#23k!aDCFXE`e;YVa$`1LDPGW(OrWPDR3;&_O}^@DiX*=2
z!Zd^uwO*jH=1!<U#3VQ{h1CPn9GQy#zsrn^3FZ0(U9!gZ7lJ@OWM7(!76neQ_=U*`
z1SyZ}(%-1CsrF~&%)=MhiQAAhfY??8DqJB0rs-d&D|!q8uXHivcyZh&AjH06E2M3<
zNzvedICd4^Gw8mMzS1V2zpEI_+}o$#SZNaZ7QB2jNm7!E?M;B5!?&xoaC6b+q9mLQ
zaJfGXk}d{nNCa_`x7#<f=!$Y8ENC8Y0EXWdQbHkEb=Re|^{G4F<UoCN-d{a?U>;#e
z!WwfRBDquGS8$F<?^|1B$CSjm5?)Gb<Mmn*XcF3WyY1&3R%!C$R|W0SGz6akVV-p;
z`mN)2S9HHc-`m+>;Yps|2mVn<5aT;w`gs!$bW{>iGtlVPb$wTgO3fb~uJY(<YEtg~
zS+BF|B1>nTu7tHSy~$eQE@Nj08X+l1p|nf%HnNk*xi3=MB(QX{63xPW>u(hAlItTv
zr7-^UBtMa*!BV&mQ{{%tc6~w?i`w#5ctnk`sK<}G<Xpr?y+#&M`?rK~X*^<R0Ixgc
zE%<FesOqY{2za|7Pba5*X%K&JA=?pJ*~7Z2kp|hgAX3eC@Pz|KADiA#(b>PHdzNkG
z9U)RN!0Cm;$cP`sgT2KV;wGOtnP|u^zeJMKrh@c_R+~8$f$PodwUE6T(h85@!LP(F
zD5Do+KFRlav&<f+Z3IK*Wr;cpI}qy#$E7A!x?ltu&9x`@3A6EDK^=QyvdyN>2fmQH
zv<UT*Mr4S`-;{#}@epkhnk|OJ=>}P3<oieXxmc{)BsjR|4kw}C>I*N!1rnG<C~OYQ
zZc_Y8vu_orhJuhGU1ExFN1(L0ZNZIO@lZ!s^Vv0r!@kO<jH{@x(D1ml=cHU@DOhv7
zL!ElP9|Gq$0bK?@mcOV6zykUF6$hZJQaoukDvtu#6QMyomVB|*cyiE;2-L_R)XqX9
zlYrnRBu?r!R*+VrkFTf$yb!UM)jrL%Fr(OvW`zf}!g5k<&{v8?qVw<4V8L{V=b%O~
zL`Oef=TED2BLl*CvvXmpLSZ|(98*LV6|LKe$l#~l7?Ble5IZIW5o+5mk-A@oA*@fx
zr`IXI(FzMN-I=J-s(0KZ`0FXfppYOV<>1WV6~Bc4RL!gyki=if|DLOyw@o$qckJit
zHA)vWVFr}zOYuDuHD%+AYGofxNxwqdg^VM2Y#JI+(>S-pxuiSYGPl*r>fUrU8Z)p!
zZ$GEP;`*A`)7JUBWZf`Msy62Z?N5`+<T))y`5w<ZDx5PiT1W}HHcO}b>mR;SB64((
zmgVxno>Mm!Zi#->A~dy<ca4>clMpyY<+~9`9<a*!B35*1(3JcvFhMYaSY3Y=tRXLk
z-Ue5eZ<TlZ(QQp!V+FV;gh-TP;M^3SVi%FXSm-4^3Id-@)eugwMoLC&))cxG8Hi`&
zdi1xAs_<iL&CDNk@gt#t4&!N9HQ2+JHtf53ir;}~j(BG^(UdFZut``y@bV?DnzeK(
zbwqgAc{^{7WluZhQ(m?K12xw_&T2Dw(?UpfTSQd!VlTa8ig`EuOWCy#W16--`Y=vc
z-miV$>YwMzyHApKoK)Wc!3m8y9C4~9@ePH&qR&E-8L!G?7TEn3v`4q+(gJSSNk07j
z_6PBupDkDjzxly0rqjP;{SHSU{@nW|P_u+klK+fOk<5A;;nNR&1}>k715W)%0Pa9}
z8&aAi+$?F1Yw3>YIU(4`0oMA<VW|Fk-~BzO3#(O2Zzp+)7K67~rLhgsBWf}Fw>?*l
zT5m8@jk_lLm#-fTMHrqFsNq2L5=pDJE()MOdS?Q?3CdFuC{M+2^|+qRB6VTOm9U6x
z1@wV_5dr>ZP*;=V7m&me<%6zcChn!G3E9lqJc%@MW8$C1&Us&-q>whozBK&22WoHt
z5(65!q)7iDUi#&j2Iq-uynq55574WkH6#M-mzq2bQWNVcvq1_8NA7Curlp<y)u4Qc
z_{mu|P=x$acv0yyWKt|lVSraAA*%Kc+h}Mvl_*a{Xfp7WP3Gu{R>98|-<m$_GDNx1
zq0h>|_gBK=vL_@;KrGpS@JhH`@M;caGkui*I(HLLz96Gal$7*tBEcf&%xBQ1h}15m
zJS>8vN?qs|^$^mt^DqjyTHXIu3>&Ig$k_U$4OP4lF)w&}^KqgTX9vvY*I&*)IW4Rw
z<UVsPhyOleOX#VJI&ObWxk$pj9x_3QJ}bE?1(ANIM#-cRA-66eN_-CECaJ<%<xZ*C
z!ku5N1gLRooIZuD_|10^gi=UfpMYT2CWv@lWVbg)dr6pl)ujws<dN_vRkB>3tX*La
zF#U<Qh~;+<5k`m*@P7>5by!nh7zgmXf(-_YbfhDt8I6RF6c7-}fq)}Kq(P+BjT|W;
zAgLooLTO1IDbgX0{3vxQD5+ra+n@K3J<ol%yL--g&-Z*@c2T-<B0pW+#sk*l0s(vs
z$3$6OI$tWugcin~(xZtAzBprCDXaznyou;HTOhcJdwG>t=B6OxY*vUW->$0>(O%9s
zu70wAp-oX8>=d%VJb+7Jt-#kBXO{LQqhZ=DOtzw|Oz@7-0T9l#uR+wb07g)AfBn=;
z+0!h_&fBpuXpD)fXesK*(q_~(OFnak)#;kLTR9)Vm`>Fww-g{<)+chQ<sn0&epGLy
z^6|RNO6DH8Jau^43q#q<&L)6=!MqgRarzB+v>*fl4O2ksH27dVO}##0ptzz-P}gbL
zO(6P#XCO`qU~q=myW5zBVPGHzM_E4+ge9RXkiYbfBp(p}Ms}YSnLe+$HF(e304q(|
zzcG3W`t^4f2Wi5iNpUrS7}i&1To2t^2mW}&&vX|sv&g6KcO=U<JC@bAG|(ce&Zo#z
zN-{}cnk$2paX1JnVcaRxwlwJdO>*Uj$V6>J`tsFRb@0xy+F`XxFIi9lxb{3m&j;BT
z=G_nPN$u5CP)Qu>uxxrqh$XbXOD`WY21zjc$sXIS$kvayD&2+-%6_rbkb{I%8AKXt
z(&YK(WgCT&6<)m^J(T`Q^9}HAuLdMq=4%xX21~g#tCTe|ye?oBr~pw99>u+R{%hpL
zX4}$aEhBi5+R<9uL6+U#buyQ3Golb>o~u+HNja#OHaDfzH=547sCIbSmmliYp1w|7
zlFfZ9qv+fuHtDG4T_adNE8RCou48JbeZDfMO+5sUO1D&I^#V+mQC~Vz`o-CnsFhi{
z)1-Ru+_z<^oJ1It$^sc&_I#7`Qg<@V*I7bernU4b(U3>EPSQEAl`aEh#tU)~(q_Hc
z3Ujr;Qd_E2&0eau4`n^w_f^PsaNlvBSNXz7@_B8LYs2<(XQ8*4KRu;6{zy81eLS$u
z4nIe<JutmKQ2qlOKGx#BI;Ld$2+aAPV?p&{7p&8p+Vzop>f!6vh+$Gh{IFDD(01Nz
z@X>5bxx&c%d6hclQJxxqL5sTO$FtEbU#gysE!P6RxouzW)mwipsj5$CKgC0pXiT@E
zfErtWr_u3?)%HfmuPc-T*^%ehoptWo4XI{-l^c`J>VEBT*>U#vlxppCoxNzHY4zOX
zFO+-7ezwl>=%Bp%Ny9~zpBHZ{iq09ORQ)Xev91`IsJ1Q4_~g#~jnli;LvLOc&K_)S
z2x5OOUqY(>e0tyI=klHYhY>5!EFPSs)jtumYP{58+LP{m!0hyoY}b`9K4ComRq#~l
zb{E^CVPq0J3*v(MnV7e}vHS+B|M1Ee7til{G`eH9=cXkm+DGNvZ$D|ScQ0Ra4v5=^
z2ty`bUYyY#k6rt11%LLbZ~TPSuJWsyk@wLHam_)W-X-qS)uXmIHUk0W5vTUIc43cX
zBvOLwIP4NK_PNq?SZ-!M<n5N2$G%N!P=3;YT78#QSKVfQEeL7n_EF=Es$0elq7UBW
z9j)&5)vsw?dc!|LK4eHnBV_!_mP8i^8|5Os+(J|HgBbFSrLE$U_xW-~f3LS*Y#mL1
z5K`W?Qq^VFC@1q;q#|fbyg+O}C^*YqT)g;WixlgN;FktWZ<<SUq&ci*XpNio!>yNw
z+ouCOWF(Y8)CTwB8tHkcn==Q(MCM0k+GMrFY2>FC*IaYoJ9~DrJ6^H}DoRC8%O7oq
zjr&Ddf>Rp#$o7GhcE`H!`uuL?%xovdJe~}Z0#0e)B?kDHww_5%#;zTndh5Ru^2f@Y
zZeQ`%%e$x@v>l56aJ@4`a_AlIipp9Ojdg@kIe9-rJuK=GQ0Hzg#E?>k5p%1-g<rfc
z!V&tA?{Q=3(Dc6C3sc?SMKux{V4haP%ja#@Oat`mkHl>*fAF!fQO9fRC~@7nV6$cc
z(XgrLcz4gX;?|PEO-;!Sgi2Vxmknpr^iKPDwbc8^3*H5f6Wcvyr?t*JE~@T`51vx{
z-Fd4!TY8egN}=R6XD#@Xw*k^f0`jYzTAI~9D*I15)7{;@bM%f*?4r;0*$$W0A32T#
z?|pQi$t^42mEz`q7Ni^VCvT%P*m2g*M>nkhgWzWk_D9aj0rO4sr#$7S8vRptj#YQ%
z*q<USM#ZQ_b^?`(w`x}COP##B_s@5|ZjE>B&2<d~%Xe3QTK=#^6OO4JlCtHwaKr5&
zi1JyE{~EvLgGHY-GDKmr^!GKBU`6sWj3+$j_jFo0Pj_C7e5CF2c^y5xo&B$!n&hLF
zLp`+Ch}`1lN8>S<^Y+D0{p4=P&dU~+yzH_WDO>sXX=E&8q?`AZoKHZZ+nh^S>NV*g
zj^f~TJ6Bfw!kQdcDX&I5So+-u=U4q`VO-_ns(MiiB!_ANB66GHEUt^rY3dS0fG8sl
zMtrcqOrdweEvnu;k8{H;p|35eBgti`M`3|z4G4h||J;~8)Dqo!DmEuwVm+F7v;SF_
zSA-JC^5DKC_SX54^P_-d@7%T-NIsFstJcaAYH(;Rm=n@aDKtsx+&0*Vud(85JHUlz
zJmN}SH(a(J3+|E?e!fjm%KMgcoy+~^pryx{F@2Tx&sPLg>m%Z9_Yjz>u6F(M<w-ab
z5sleJE5)rDR+}`pSsdPQ-!xemo7=DaE<*8Kj)3x6-!`#*W4L&<_)J4Grl9cdO&4y4
z=^}~SChzL<l&<_5F*YYc&bP$g%rSf)7|C8PUe2Qz@JsUUs!D^(B-v@^f$P}g<6m(3
zcGX0FEpUg1HD{VsZ=C1c1dMilujK|R;rx{Q6p=Eo9f)USI33?vy$wWO@S~rGR|!ON
zejVKBeomW7wL0EXd5vzrogQZI5n>FsqCiaYh{9H`x(a5KB%^XF2!quHUzj90QYq&!
z6q2bJDU>UMO7Hz(cv%x0!@SkQFoc&oPBmoES-@e)pAjr#foVo$U2qGXpu-dLfo<1s
z8T+6W&>ktEUJ!+@*JZR@pH<7f`pEHO=e%6*a~HT6&;{sjDdHd|6yR*}OI?{!))?L{
zeS``PV1!PJ=wNj@OI`wRu$WA%R%zfTm5M|*Fv5#yFXsaS0Aye`sF?~AK*fp~H?9F}
zB;M~t`0};AA8lo127bxN2B8eP*}W<B`jqXJv1M-@*P)(38WFL!3GCE>;F2UB%dS)F
zm>?Qa5QPY$-!clq4}da`<s5_S=N#){2X$#}06+QxC<E$Q#gvTGX}(OvYLXm>K>D<c
zL2apF8l++rk+VJ#-Ntyf9(0zdix5bJqM+R=F^n|&2N`cY6VaFEfM6hO8Bh|nUWcv{
z65>HL(wh`*&@n3n1wmC&fhrIi1%#`5$Vy0nh6C;ya1IO@fnvmAiI&qm&h5nZ1I?3~
zcoE-t5)G!&51_DJ0GbHyN@T;m0_gSPHZ6{!SRSJ8@GgMEJW}&6Y|tR}J~a4K3b@1L
zLTKsVDXC2}r?DYeBBLZb2ou}N572j^osEe)mik}4L1VWo&KEppCM5CJKd;Uo;D`WB
zCP}<rV19ZQaxz|LNS~ibx}F_ZoY#GhEA9b}NKZ&B)GD}V6(&u~L;6x26&oFs8o!@E
zyO+-#<fFQP07UXg9D#rjCAVH5<fOr<gntt{if|rcP^6H!H9%<ErBG;`3-$sh9h0mF
z5G6<*Y-W;G?VK?&#w?KyOY&A^j+=`65}rt726&4|QM6YAR45x7gz}2JHwxg0-O#)q
z{$olZn6nwzA317ZTEeQoK<31fqTp=*%0Nz%F`!9$VH5%nhcPY?d59n;$^IbxDgDPG
zv7r-Xm&qxk35?!3?@<uJKmd*d_)&?9UQi{vXt~g-4F|;=7^3O{<7+UwgC{`)@e>%F
zhLRL<uaO!f{Ai+Z6_tGf!r#VFFWaveZp_<u5sf82ChE<QnTG?}^N0Xbcp@V@@%IRc
z&fKAl6CWE7mrX+Xyokd-ag2eaZz*O2=olU{Rk#gg7Je}kpX|4;gqVb|qKW=8uedUZ
zs0BpVyI4U2sFw@Eg$AD5fIt|K;N)dc9vl+Rc*lkm8p6wikB4I{n9wm40pi~_#xJ#?
zjTJ}^-F_U>!nGw{-vaJm2C*6EfIG?a8p*q4W4^ezjOf!VbP*D{&=+<L@L}V=>4Sc6
z4@VvY&fx$XK0a9tY;OiYc1Z`Z{0Wwl24be4qsj7L5eG#*-!e&(l9u|%qtKy=Dgjby
za{{dpF)Kja*v>lAE&cq?EUi3O2Oobmn(?j~@pBWA289AU<04fcG@3})Z|rF>AIvly
z3+2I@iQ`6%7Aa+c<D5hyE#%Et#evFE64%>KBS|oC94JlEN`jokkrBr~Nn)8y;>w7#
zC4eDlFwPjnx7#1D^vvhBOuSo-XJ_qba7S)Lg=RrHJGLA1yWqQKvQRjPcapT%26veR
zaRNQ`jRd7hkPi{z(MIa}3YZL$5QnFoMj1rb45A_mU_?VuvIawaJoqJU7dVM^7vu~j
z{_dT~3ulDTp;0fWOBL)){pGXhSnbJJ20&*dJpRx2(Bf@kG0?c+_%t<~(R3`<D6Nk)
zq{Y{k#5D;K7=q|~ft=y<VUh+q*f@XDuB>;lOYYXZ*sh6akUf%^(k#E$4(A9YqN_-D
zLsxV%P)GvI9L-37t+3)ut!2Ud^$Exf5HB7En(R?rBl>0xg<;LBisnQDf&N1fC%y@U
zBm3|Ycj3AS9BJp8Xw<qkGq4y`r30M=@xnn|;f%&Nz<GFkuJCvxafk-gG7n2yx;B-E
zCEjmK<P{)`U{9G^u-wdaXcJ9LEAAO?BSqf@Jj0j5Xno)DBsr%{qxWQ;3{p$EUHxA^
zZrYoj%N7@b#ke(qL(-z3i}*aA#DN~J5eJ#TL40i>+V6w`Xn88dg13#devJV^=}BoD
z!-<hxa0xCqCOFqF@?*Q~GoWx93{9Bn_KQ3CqYHvRC`6Z~WF_C4g^0AhThL6LDCa~`
zNjInnZcC<204jpLK%d9yV-5J*NMTh-=XY`bJAla53VrbVyd2_}lxOG70a3$*QY<kF
zKBc-t{8To**v6=M+ykP;O$a*d5wc{eyBfwAMBpUv_`a%zSX2h+>uBeL@Wg@~s1NaU
z8Qh&HL?LnF64=qR4lrVFJ*XA7m}FCd)hoMZ+B@4&cFm!5EJ=8_<V8nyc5?wGqzL2^
zPLf?=7FfH4rh(vIF=v1{iJ>db;T(d^&*{?dJzI-NK@#f^oYu*cVGbc4%W?1+!z7xW
zFPu5UFCP4Nh_fE>ABab_fkdh<M5mGLsh~6O%mmd^#6gc86ys=pgfKClyNBI&<Pi}w
z#x@o0ln;*5elZje&}TYlJ4xgLx*W%rM=18kUDYuvc5q1pPCRIM62wL(4W=2xk4IQ{
zTNyJzB5OqG0^^v|ln|D5>I*tQ3z6Cd!RtX>cv6WIsEY{a9Gn-(J5Ad#0C7<B>ZdjN
zjTr@So5)EBnU1jzb&f^577r08n?L~z7Q8|vEuQ{M_h97X6{jB@$_Sz`9HjQ`!Y@H{
zdoyN5Nl%YSXTHf;cHo0CJJCp-G<+?A-pv{{==S-%<AxYq(3sJsJ{8q6x`>}J8oI_w
zi(iEhxh-P^7RZ`=2FcOf0+^T^Z6FSeNm?_jIqQ_uw>Jn`y%GbEBKiss4t|^x1L@cG
zSB0+7F7zA7y;kD&hq(IqV?p7J<y{_*7K|CiB<^tH`0wSkp=(!Raa<*E(H4fkM<g7X
zHY44<L;#_vFy5AQpbfZPhs-hC{7ZWk<V6ZPzJf%ZS`AaW-iCnm!*v*Uw|G;yn^4!7
z!xKxoHoh#z+Wr7>9Rts8jA_cGZ+onP|1QJVKJiOVl4witKg8c2&$k_FQa5BCx;|#`
zEH&!O6+0A(9wr-Uo&(7=L{xx8b^!_6t1m=6^CT$X*i0n+waMezFd8D?(z_^c)<GPE
zs3)G>{VIe`3_*Vc*$4@+BhcYo`o}nvLmfMO))@kWAlwp#k3SgzIENr-;B3s~G(C@3
zFTUaVXEZ(W#<GibZ{#!kLwjO!{OH1NnBg{E&m^I|tKzmpsPum^*JH2lZ9zKx+v7`_
zzt`}d^DP-PwBka5eKt&hcE3Iyx%O`EbcKDT6Nv#cqp2|-F+TrBZ8N|{+jxO`5Hk&S
zZ|BOo4AsFfz=P5>54jFP#SW-E6q<yI9QO!eY*A`M=RD}ou!qe%e|X?G;i?;%j}c0f
z{^&gWy%ex{70h!Xk1rK;{a0e(?0>+}ypyV-NRlJx<bMonFn3z_XRPZMm#Z!xFd@6m
zK|*8!M4bg&CoekR(aDzxoULLVxee+NgAA<3idcTPU-MmlzB?fZo(>h)HLzeb-V4k7
zAT$>1Ivelp$-syMf$%W4n=$DF|49<2qS}ax#BG)~KyZr~;{>bv;mjdhQQdZxk9xfb
zos_h|#xY6Mew`pj1%lR2iToKc!t^nD1)_V}<3dPqFa)52`q2j<kKKOdFximlUapH*
z;<+`Iu-g1aq)Y68JC-EdXNJ(|MFy@k*2MD;DM5ETU*1_^cnxs$x1@zw+>P@!`QDA$
z-k#uR<hU7AI!Y?5dT5k)lMi*ZJemt%=a&<{Cm4>zC*~s;OOy86upGpZJ)`=zl7H;N
zWGn9G-prKXd{-ezK-<T40rHXaNEkY%6b(Spvu~&%&v3HN8d2Ppq>bqeTY!w;OH4Tn
zK_K?4TsSuziuec#KZat@DALhXBq$s}9IL?DNK>3Z>KmAPVuaM>u?R&e^;E5ThyjU-
z`vwwv#e3>0Dnu5>j||72kn#|x{}OGnO!7_MX*5tm*zv<paAr7CslFvkgA@ceS41U7
z4sbwefYJi#J^(0TI{(z_xzOSj@o;&r?xe{JX%q`8CZ?2N&?rh0rUAV>;i*@E8;Qi!
zAxP@|k%dWSce42by(s(L`Nk6?&HM=caH68|Dm(7?trc>&$F;d=*1Y>e+UQw+^bsrY
zmoqQOcZAq;e?i#mgbN?<oci|$<>Xuu3(o>R4ST05h#b$>!G<3x_zoo1sm=w&rJQxy
zIvXn(9<TBDNfb?qgYa?62b`+TRDz0d_>VksF_{aa1u-UR1jN}dgfpX=AsCXa*Xciv
zpRpuNj8^+ELE_BTY4gz;gTG_zm0&nX6bz>+ejL=|(2<40ajv#RC>Wf9&mkH!B6wE2
z`7s|E5F+k@u%XEkeNm9U9z9wZki+HD;)X;4xMV7z4ucP2NW9}*k}PC|Vr48B^dcul
zW06!2;z1o0ZBgBjv+PlWsjd<jHe1%87MPJ-PAZjMJBZJqDueDS@!d&YVb38i0Z|$#
zBqrenP$DSC^8t70l?a%pX8ul811wqq=*L=Z6mfjyW{?$C7o>ZWsSRq}Q)}mUYx?1(
zJb!0R-0DK*xkerh2P&~Z8-g6tq5_}oJ76m{=XsB*3En|$w9~@EoBs4Ty0HpN=h+*h
z--%~mM(_h4KXDdbNx$F{20)&%1SZ}sL?(tktedy{9lak~1p3x}O>2tI1nqco-tK&_
z&K?d9>9=MOrc9z3*i3T^=<v@|Od$A}3POP6GSZo}*FAB573M~h!8{y?{JN*)Hv*nw
z9(l`vn6da(Kq|F?WL}@igS(`;m%E<v;V*+=poU4N>-}FJLY@4@c7GIxYRlR4r*NL2
z<1N%PQO-{9b|iARK;#g8&+NTHPESq6d4b!?UyL@157+5@sTCCAd^<>&Qe^(3p>-uw
zR4ijFo53^h%oC`q(OSVLM81&!g?^mdgR1wHYryxa)1&l4mO6&V7AnIpuU%&#sEb^Y
z=4kT(3jW;U_x<)vYPF>N&Y@FjP~)Oyk>E#(bAc*WA>ivxSB)i$Zg8kf=|JTi%I=kb
z9bGLhS#V*VPU&D{4Ms8x=8|gLKe_w1Rx<Fh@<!)^Sl8B}IPe2!HPnsF^U{mmniZkE
z7Jsu_1#WDto-YZ5KKA_3w84R{BCnO?I$QkBvI`9LayH8+XDy0->pi&_t=Pk@ERat|
zUn~+Fws(B1I4T^~^{^`EB>Z9VYJ2MU8iCH#rrrw=|2W5JmB^OfJ9}|tzfbb4^M2Qv
z|9Fl@6eglRjnv#oaO)2n4^u7V=iHXe5bKO6d?WPqR6w!tz;^8Wps(Bernv{j1G^Sl
zQhuSU?Efv_8UFA*bwB!6-QPPgz`1Spel5AM=6=*j_s?7Yj?ean*?y)4#n?t&_&MO@
zo}m0n{FAKz_*HwM<}cEZih`-4G@r%^$zNWJZ@9crOW!mmJ9g(kMLId=H%u8_^Jz6P
zSKO<q`|zhXX3p`6pVhmkiam~60{I7_nosBx373B_esYZ%6?Qa5Jb1qLRQyS6W7AcI
z?%xtF=^MZQn@A{*l90I9?p<v!|GnMM^&9gV=;ncFaKi()x?5`?Q8Leol&{Tt;n<D{
zS^k4F8)+>_s2q7y-rRMRC@#Pa){>xJ{d?K@wYAWR)9Iu7`?eDvwZcz+t4Zd6erA-|
zYIT_l4ATKM))HYz3=2DhieT&axF0iIzct~nV?IwL>dkP$W-1FJgDh@XQ7yc)7W(po
zkg=bX%N~G<Kv><LG9SjN8?OH19`~+iLQ^VD`l%wV;?{$!kb*fief4K7W*G@Jg|CE^
z4LTQhOfx>ajG#U1Gc)~t==#KKKJ`o5!zsbezuW~b*>${KXU)1HyFS-w-GTJ9dT_sd
zT}UP@A1D1S;Nn-26=`Lys6~q$aB{`Y@f|h2&=<LOUlL{b!;A-{I9xBv2r4OmZmazO
zjc$UhibHOg*m6__G~3{1-XFf~`XEP+fbjqDu24VK;hVhPP+sminDq8GQ{%&#Y@|+3
z+)KTJ7neVAeVP+Ul)v?*A*-t^RdWiVj;g#78EVGK_EV#AGQ6_%;1&UO+2fp`=NC_P
zpOQWQX=>U9i>S#c%#l%E*L2{NL9dpOk8y!QR&q$3kYLIFJN3yBmhY7lzpL(Ks9UR=
zWF{+fKi;*hnto)lonlb&%$sINJ!B8kQ>s{F3uvVtp3h0V)g`o~q#=EF>}w71=gXlb
zdhm$X!X(3nZ~{GQWh!kc7clT%YBsGYl2RplncZ?!&l7B%&aS^;eI0GzGFQZajZcsh
z`lYgjZ)Y;SuhCfrlB>e??bzHGP;uX9Ft+7eJvF~~(SvqCdwO9Or}oUfHV0TZ;$@;T
zhnI{lYslnE1gddm{0in061*c4ku>a(zvVdAW;1rT!X&o`I>)fAB1N-$={ev>E@Mg>
zop$4J(`t6s%!zeg&P;1x{5x?Y2A|Jqayh@-jQHAjzPC3YnX&nyfa6-5;<f~%)h#Gj
zJ?d%eDvw!_UVj;feMD}E+C!h^SwE>qN$TH6r;3kMuf`W9t@(dZf9$wZ_e%cRx~=zv
zP!{I^5p{iw-{OruW(Q>nXRVE6R~26(=-{|k1kX*LyqCx3AB2?ijbW#0Rgk($sXq0|
z7d!Z3b$=(LiO{i2(!6pV5SA{@zP{4ABiB>y*&5j^{V{^LJ*6l@%mbASj@`9VGk5>E
zLOeAtsVd@p+io61!WmqHg@KCRk{3g$C7;u_D`Qy+uA~C{Q6$v}NRSxHOZy}w|0BTr
zQ5`q)H(xwp35=)Ag)mI^b24q+82!jaExw*Ri!@l%b-yF#6Nu4@aGh%w7La5`@5rm%
z%&Z+|Zhlf&57GDv9|O_*$jQ4uBlKoZQ?E5Tk><2PRrAl}Z~GdT{Z%S*<a}|}xz_u&
z;C%2&Hc9Ph;ZBBzd<XZ7M;bauJ9%F|94}6)iQlWXzY*G1r5oCyuv6<`J@X*wb>Pf{
z^3+Jg@&{8!TQ%NPU!OKSfB*DSN&Wa~_VtucUE`6H6^RCGJGe5CT4;hf2jh1MnDmvi
z%K0v5CP8#r$RfH0$0pQ95~lBOYcQ@sm5q&=acdxzCdwHi7RTmY!mJOcFtiNm68mwn
z^cdn3m2b?vUTY*Y7aoZLKu{icun8>&rbfiMVF|IU*nUQ#3<D&do`0gbWy4%<XwSyQ
zx{%$+q1E=x&8uY21swS9DZnC@3E?<i`}}h<j)m$X@$4L?AkAP3F?cniYx$cHI<H5k
zA0Yk92<w{mvwnh{1ak%w;8|GR>-9C_+pHvB3P&u|8w8nU792z8MP3mD!D9{q-j~Jp
z-RS7%{HXcf%~3`2DP1k<0GUPUiR`{{{Tq1E^xNQ%j%)gRhF5at=i9%_I!Ox-TG&<p
zdGx;Xj*i^N^|$zYiCx-m;^qm(cf7fyw9<DM37onOMLoORhwCg!`R1fC{Ol|zxYcTT
zbm8>K{NB~9X*g&9T1=-+_P3Q{OV<S(b7MQ+E63&rY<W0@26gv?AyYU`la3MA;uI*j
zW;)x3C4n$R`X#+A!IaT6fxxk6kPvivl8Xe@9+J_(u_B4Lbkeu?z;KmbMp|zZ@D_ms
zB(|N{SM*sZIHpOe^coePM`aPiF+pR5ZG^#iB9jIkrL*}rIm^1E%icwus(|q+_32Ov
z45wI-1WgyL0dRWhy%!c{LgLt000m<(X9jVX1&9923ZP?zP5T{K`nAZqP-8F@Lll#A
z7R}H_?&>k7;)LRe^5sz8aBA#!k2D@&gahmgfbv?ePBalgOuj<vVMS6=G%C^nw3XC{
zR_WdRtOr&hAy@R-J{mFO-pkMcjLI8WI1t^PQEBW{=_|0CdUdv$2jR>kdu!$DdrSNd
z%6mw7AK)JUgSvXc4OAFaaAz(P*=}t=5o?%kDZb|M?3ztZVdFdei4DJXLxw|6{#$si
z<|~v=>87ZmjkX(-?LBvU6`z6C1Ba{p*BaQCjA!f21%^y=-}q$=w20n3UAHlpax$qf
zVxaai_*16g?@|;WCXW3llTTsJAhF^hkuZSwU9W};Q6f5)IfN)o2ma!K`S-Cb{y220
zn<kPd%m)UL)H5I5<WhTTiRB_7fF0Y%0w=N#5utxf>bA%%3jkA~0a~v@EDm7trGD?G
zBH=^`4yVwp=kL|~&Sb>HfH&*>qk;@TRg)o}{-ulb1BYue##Hn%aOZG+W1WHS0}2xW
z=r(CbapHxO8`n_YdpJ&^pUBbNtA!#7t>FLyAmi1m0jI-vIJPhJ2Z6vK3t_JX1II*`
z`c;4?lJO3J9#S>B^x10l6jVys@`&3=VgMNIe@ciEqaZ>`iP;{jtWbcNhGRS~7Ww<+
z4`?wr!s^_6kAoxg6sdO+=Q-qd-YGbnV-VR#M{{5L@nl_o1zj_hqswwfSE*(MGH+J4
z6ib4`+BUrMDDBBzrTKe5?8M(r^%>;T^M)3A55H*Ji~8Ynda~5St%S#x5{mIh+<C^u
zR<DWXF(giZahq>MhE(rG$-fg?-;icIgTl6RKFuG|a4K`4b746SigaNG)@6*rjE8!{
zd}kGCIM!N#x3-u?rT4s7ul&X!Li-k*+Qu$u*pZ!t#FnzI;JD{5U*7^LYZHYqWfON0
zZ1OH@zOI@oAYLa3XO}*chR8pxSj8!h#}rIe*$0w{?1ofVa}^OzGf@ngq{NEL4Nbo8
z4<P6^A3ejBk!EWCSc!>R;$>?_#A!wYv?LN@B^H3kKz!e;RK`k-QiW;-;n*Gs-P7wY
z;j$2)9wQ2?NMHytA5U;}B^DVj1kIE7v2(p|;BKzsDZ)fW)VlzLn6ND9l^iyQt2wf+
zMP1_Qyi+CJ?73Mwg%^8K==HP4&bf|tbNX}du9)z^A|~~~?1SRDjCXS^Oza4E1rj2}
zRN1edRBi3S4K9OnX7NkF?6sHvPf+bVw{$!P|JL}Ihz{(<o!{H6iMIBunm@8B|K*yi
zHoHLTaR+Q{7`zB?PuZ(PZlGIijqW$Ty<h#7j?1!dJGsjXf=4S^DO5&U4=Nldiu#1W
zyDDJ%gjB$+wR+i=5ZPuTi}9zL0wNcID0E02=cS?mqhswi7Y_-{IHIr<Zbq{jG3v`j
z#)(zp;3(2WvaW}ta0rhNUC%{s6Tc(p+^g7?w{k?+acpP^G8O0EK@>KVR>Cf65s2K?
zLeOJs#RWf6ZJZpA%92XeKkS|Gdvedo4O~w|mO}>)!ASf!CUTFjX%8St(hvg_h~JP)
zYtlt-41_*u9F*Bu&lC!5)b+wMNb<+H#YG}4e1qxFDxTvhbd(Ap<;u9LL-=qC_*m7`
zNTM(rpYnx0#^j-Yl4pNMAxaPlO`cvAu!(*yd!8v1@!9<Mz<(C;V@jk)+aWXI4Ca$t
z&L<mob51kyCFq~GSjYwVhe2jWW93J7|H+QwKhVzHwW~U<;7rna5@y4}@ak#O;}?#2
zmmiL!D$YWr4i2oM!(SaM$amJ<p7Pp~cWreZ`ql2{id>$?AOx}p?fdjPHQs4!M+ObO
zUQcAh;Y6{kN+Cp7u{f9*l`({hrcmKUIHNxs`KUO>E#RLe@=84&p&+JF;@q}+bl0RT
z%@~jeu;TrCK!pUWf-zeTgbgnn5CGx4XH4q-F!Y`=AgZqd;)C=`fD;5y%i@~&_E`J(
zOj3y;4e{$mY{zt`SOxgD1Uw*krH2ra;gBl?15qT=M<Pxv#E>hzAB_ys4@|KM_ty=f
z;<G?!Q$S*6Z-w;5Z4a^dR1ii*m)XhN*Ca+g#CWf`sJWd*KY7k@?$J|!yFW#97lkIW
zPG)cPOrmxi?m}-5l>4ZsWq1FWkxX0rW%SN**2|;)jW==yIXiXYwAHcx&ZHw3{t<SP
zSB}V^ef>PUW&ho9^6bvgtjRyoA=X@@_9Ts8*#k`J=Py55GcLrx&4HM3#8Sm%sUB|y
zk-y%(P`bMvxtB)%Y??+fwcuhjoHx*?4L;vO-0}*!1YEjwd)VM~tq%>%OR|{00B{Li
zZ^Prp12jw)$mIAC@{#Qzf<E&QK2zLvXv<2K$-ot&$l+#5*?41fn~^=UC&sF9r$cJ2
zrB(jBW-}kMs|pEfRzY%tUkFMJkQ|N(S9!5(^mr1E5b(Qx2q+u()AwMBbLVh!nET29
zLAB2QaAW-h^L)5B1#UX`mmM4K92ga}TKo4?c*1qu4eS4k3ZlSQhNo0B5w+N{)Sp>T
zjw_0yNNKQ7w0-#K3(Rzkbm;jGe2U9s^B>dqvI08qUWde9KQM0<*Rk2b2HqY}i{cs8
z{?P9`Q1v~06IC3b7RBqn?(UaiMr|BBsLHq>y%j7Iws2a%6;v}hwKiQzF!wuN8qjkZ
zUU*{JosMir9{UbUJ>w2t@ESv2aT;LJP_-KkMETsA8Rk--rH(FPj4c3rB52Dke|Zdd
z0D8c0dBoQD>*q*O!L=m*PR*5^uBGKvW(*zf99L65Ekj(A?Piztxa(lNw@Lg@H1?_O
z-GQ%x50sfd(7H{{)21)QzS*qY@lk#n2+vY5jVZH@*lu)^^<B)CmrA+N_tMDI*LWZL
z)++nhC<JWn;9=ve;V_$e@9YoRdR<;@)?TLEn3`aV^%lW`fgkJAH)9d@f{FV_PjSG-
zyEn+mW?Cm4^z(7qm$Q$zU@kL~=X%UN&5^~03f!kVpTI8zVy{vP!$V;c_W6^1>L4KD
zR|Moi1mPiT{GxS2=*jT&?Til^z73(iGP~Nsqko3aL`QK-%pRZ0d(nG-p<15&`1h2!
z!#q%(pd8N`(0=B^d9^-SUp41;(%sFZsdF<<YURq--PvpAj#Uxkw6iTU_cz%;jk4!D
zJG{M%zPpmGe2o*x{@N0JO}#xR*m@#Xe@;YrO8LGGwflkMWm$cTWBK1X2A(n<W!G3M
zwpzv2Q~rL{euO^1y4m;Pw$bWDFLe|A*Zuy_-y8!r)JUGTfgIIGlO$)4{0eo%-OWo=
zCCU5~_pCiy?&V%!9#4BQyF)Q|&&)V)bmdR2(d=n_ja`s{@tsYh^wy4(C4t<V6^o|7
zC!=&YGBhmW2E#0y^KBi^U+a3NWr6tfs)j2~;eqyMc9eTf8$WAHP5oONJHDfF-&+9K
z>&Y*UR3P|zy6p_C)GD^*A#@IJkjXBXhK#>u_48;URy4F*InqmOxa6`Cq%@KpHb(Aw
z7|StXHhi<5IkcA<{#Uxa@XC0x^^>_PYKd2$92_C5A=!}~t;sC{ub_cBoA2YCPaMR4
z3|e<2ef$A%X6wsIs(ytgTnJ1)dNF$aMS??w+d1vUOX*%d^`f73MtHO;Qc!<PRZAY?
z<Ld-+QXaw%6p~3#Ivp08|6|^t@;JO@M!aTQae9u^Z-Oy)aIP~?)`zd#sNpN|qOyhs
zzmpC}EC1Yt=mYD|EpKuUVyt-voOzsIM;}MDF&bre$6T|@Db&h8-6LEa>QEl|`J_*Y
zUH!fmE)_r#;(!>&ODHaEJ%uX)N)J<1?XLI7fd%!r)&kx8Q#eG8Lf|-$9!Vi_tGK*9
zcil&lx}i3b;y8miX7*JZNlY@3p~5`u3(u5%^MKyqKwlc=0nQY9A`hbmotw!;>M9*p
z1y<tX*v0vpCDo)aHAy~o%b5;Ze^<>mBaf6(*)dJN+3sc|eIvjo#ZqcqwYolFn3B+9
z*S~1_%)T-<mZx7DRl5~0`_{YJm`fBmeX1yU&+b^!Uk}75xke#kRMYFEW-M};ar;ZL
zjW^~WcjvacCd=P-92~obd)<=YsudFPQ^|McjF0^Hy22kcV9Y!qr|A93ooU_M3h(}E
zNAB*ADA?NvMEr^PKF06<dbzV>rDf}bD4W!)=&|3A0t&`oZxzU-U2LcLu!(QXG5bF!
zM{d0Bnr1*E+A^{`E!6fj%#t){)$S9zD?Fwc;Z;jntp18wsyuO%qm*qBWE(xz2X+rZ
z^(^DWCa4Q@U(OBo7+YC9&;H04ti$(m#?r9a!c0MyHzj3T2jMQjq4iI8RBAv#S3q8e
ziT_Ym{me_P$v4~IX0Ux*P_Ek=z^UB`MwLm4YA7j{wp3NWl#EPjlA3Td)2p*%3Q?)`
z1<R}Gg2a>L(p#%XDUFZ$((1}I>b(2AHhN9V8Z|o2Yo+djO>4_FIc81Y^zx}TH5_i%
z%2)9yE6Ur7r&M2!9nVr(xF9vk-UMCNlUHB~$V#q**A?7qe0lGS40c*&?1w^s7A)k|
znnY88Vy16P&O#nK9$EAFm-ejP&9wc?a={A0Iq!y=myBm3ABp%_MSaploToPsZIZsV
z==QGXg5olFMRB-ch-}lw40&GEH7)7cebqiwU^cM+q3Zll+~{i@!u$mRh)h1vTOCwl
zeG`k3!Pb6fuu-b}RP^=RrPb5vjk*tNdm~oo)~4S*=q^ANHJ+P}?#%D|6<jp^%zi9u
znzowNwb<$NT0ZNw>|J^<OqCBd9G*`QW~i}-&dZJ3)iT*L>S`q_+#LwpifbmH<^O2;
zR`{)InXU}Y+34h5H()j)rCRLmF<Bc#dvnPMCJmK_^#=x5KXp29EK^wd;)a|qv|~cz
zr9@%#g|rdv*YoR0nIH1j_6IuiW6V0h+_sGzL||j*_<Dcp*%9lo6TvTv*<Q2$lkBzc
z)mBY1gUK(_cOzZgFNo(X*tWx!vpriXuiI%1+*wJXOjOyoHUYxH3o|$Pbru>T#JB#{
z)a&WL@^cuto<};{rlF?z`_$u$LTuy+1uvrbM9lk~+-9ypoJTza&-PVc650$tlvM;g
z4OyF+KhWluwv;MA2d`lU{f3qAjeWXyclwDNEwRYa4~3JHvhATYmL;s2MqSdf?Ny6^
zhVBRy6D)jtg%8&4fG`xVUov@Y#MAZBCLP~@YbH&}_$5lP%KXJGt5xbj`(sjiTI`h5
za8<LIe6rv9oXy4XqDGMw^3LU#r2HgP6(ixR&`8&)hfH6G#NRf1nM^w{H~VsRJVV?J
z7T6b#U=THVv`9-$6*YO#dU*cJ!jz#p$K+@ikLsdf`ekXY{I9PV_!}Zv^@iRO&WX>8
zs&qH%_qt|es?8V)Jo|iV(sFqQ(;|BItm&0VSZ03Ynss|)Cd$C6VCtUZr&7h|NqO$b
z?A)#XH!ezPY1n5Ul9U($JUJEowZ8{7#i9GmL;}|kIo`EumFbaQAM}0GwFG-fF5;KB
zHERJn!P4%9&#gL<L4EWsOP&EiO{8SXm#04^@t2Ix50Uh>ZC`kBUnI&`N7*zZJG}Fx
zA`Q#2d7!mp8lvhxFWGK4Zmd-3+|`E;hOX{=tCxlod)^AmIq?sS9*)juz#a!CPl>p`
zYE`ESjZ0tueQo$Ak)ztK;f5j!#Bv!CZ~eG&(h^#j`uF*?JSJV-StgDTx0!F|swj40
z(lN)~N%9@V1K@$5;>LywjI<Ud<8KIhODOup@EgY@YVgLHe;yYb8MB3F-8OwVCG=aR
z^8CK;jgZ0z7Y<uoZUtP{s`2o+55;JB`|DS!)=kXuEQ}4a6d43FZ!{-W>;9|D>oW8z
zN85L7<1d*&MKa|-<AqLaKAB&;JZtItRjZEo07H@$oeG_~{G{l){@;a<3&Ea4m+x*}
z%o8s>G((6ACRHiTo#*reA}TjTSbUuXJg*h-Uv5saDm1$7nUI_Cq_a5HEa6}?`0ww>
zI-WyKy`3|=F8ldUC$l4twy*!u?x;(BHK|?i?FOIu@Mve_3ngt?O3kpA+td%5^nuTf
zyW&T0e)Q#%gT6<nEUuEJkGc-ygdpSQ25tHCph|HGF~>F))!ORS*~dKAHaD5Pv;J~<
zzme#{SCfq|R_X3~S~H~7fFI6co-+ELiL{TIZm-p;i??!7#a`*WgeL`0SY;;@T)q`>
z?NT3z9sLPpn-|BSPisqj<v#z89_<p<JSoeWx|zpotPr^4bLFfVMQ7}dy*Z>+19*J%
zj+T_ci_BKt>OWRF!Ih<}?X|Yf?+Uzbw&whCOxH%iKF*`(g^Gp{UUNl_x@W8wg@0ew
zQGJVuUuacTB+azFF4|i0?^jk%d;7{se{Rz<$?58ov|9yZEJ{<HqK?-uC9WZ^5-)sx
zcsse~LW=uGn)Io|@~X>os~2^iov-0yImPus?ORz~bv6=jPp~<aEk(u|D#flHg_bXp
zHu=XsHdz!e`zQ`0s)POBB0hg>Xv!X)1RItH&3LMcGv@!;e^mA*{y1^Kfzro1`$1}b
zG5@Mv(C{NZTPcKWC0necc|(jX1Gt_7@Agqb&}^DDzmgQ>c`7T`y1#<B9oPiVyu{a>
zZK?_&&1~-C`4>J$@z~gL-*F-I&K@;1%SI?B=A_PqpBY1c894uH*Z$=&dlaj4!r#WK
zOHHQJ2API$byq&saCj<OnixK&9cs-sxjo_&tu3!#TAK`ry}1fR=<?sVQj!z?@az?#
z$ALO7X)H$#r&@n!Rjrm=Q>x$N3GzkDGuzMSDR0kTmFZfXkq*0m;lk*F|EtUY-K`6H
z(dyvjQ*;v_^Ve4+#XR`=)dIJ}_gNhO&Yt|@>(+|;=XYQ}oUj>e@ynsi>G53ae=2Ws
z3~oPER}3q92&CU?m$-S-lca9a>aSN~QJC<Y;q|e8uvb#U8d=kbV#wgzlgDfu^1>|`
z8Y1KqVCj>B_4LnUF*N<0PDV71oBs+nxsQ|UBuHIylsK77IZtFxd)~in1YG$i9bPs1
zHcxUZJ$``C$X3kER7~#e;G5^)JjX($ODfZ(IMN2EH;I}OU$i|B;6nqH=TF_fIX6~j
zgu@+>iA(<SrzU7UT4e2GH~ldISYaEJ_Q!7@y|!JbiGBaRl|^H}7i5r;15+Pkn9h_3
z<t2AzncN;v-PMzwsJ)cr5>smwaD{9ZG%D|4Hc>n2KkE2FhGJQ%Ij~<!Cb*et4ePm&
zV|Q1jFHmJGOFcPqVz#-M#Q#Af$C(CYV}~oH6WroeQJ5=s`jF^zfbW!fsQx-Ds~5%N
zlE0!bVNA~Vs?N#=F4y2>6oHO!(CjrTEBgF6AycCH)|}WmP(h|Aa)akYre|sA=W~Uy
zjzKfMucD9NzQW%+)_$r(WB%Rx?DeYWinM$I2NNPlM*AIHe%vUN@cm(jsHwjcS~T#r
zNc~ynr>DGHwFW~H>*Z<qM$reFPg9c(d)Df#Ug<MiCxtDs-04k8St&CKg)V34U-lTJ
zy3rFbSr;rT>S<RnX_p^exOpe^W*EaH_`RO!)twuTNokP=s{fd-92(gCz=1W15dS@&
zya68f0m&EoeD80H;E700B4TwN<Uxt|ZNP*Jv*ls<U&O-fdibn2r2P~jucptWjiFRX
zyuO*R&^19YfsM>DVxTX}CIYPLB4r7(L9S9~KT0#Yp+Ojac>;GSj(<577LAcsPoW%I
zJTrPxRyjOiwYP&JKlMzn<6#@7?Ao{amuWm04~?1n7f4BeQ;{da!BQSsKMHD>#%pez
z67Ly$uXyi{4j(<9RQmGt&DPUugYQu>H?IFFE&Il}8)IFmNpmxujv`x1NK`1UROfaf
zpPF$h%EBt)-<9^ZCALk~OikUJ!S`PEN<AiD4g4TOHZCm{TVdKvo_kwt)feiu0Uac;
ziIuS1xHBTbjL8&sWDIz>Ng|n`0Kq}PD#AYqjDbMc1pq3!=1!(SR&{|uSGFG<Kmrcz
z1qN6{pz=5l4IECH0{7AbBykt&6piQ+WUCOU5{6B7P4KZ5hd++Vf&!7m1s*oBBiD>f
zQ~<|A!%I{~x(qp4b`Gp^@#oArNeUy5ijr;CQ!Y1B#6W$wK6ib8*ei1yV3ieu8_~vI
zT4hN2S8M)y5YZuGp*H<pa%=b>SpF<a|4D|YH;>QBN=#6*LSgTB2;>~xHW*^P+tDw1
zvL+WQU`_TM@n?`Rq>g$15b?jqFw1i@t<n9Jro8^h<sO>PA5E+F4z8P(HT9wWP2pRb
zDwpMrrq30azo$w)-x$l$yrijSvZPsS+9%iG8=UWg;@J~Z-M;}?05F3!SfDP#iE_r0
zf^b}e9b!QJYcLl8yt)q6TY~|@u*eb!9TVoU0q-kGuIh4%>2j2EfFePT4wN%`I1Zo$
zVL_yG#DZ7=Du!VQFF7NI<H!Tfx=>Jq7+|9X=1KWyOg&}X$3TaOklh|o-nt-)0`sLn
z_|_p}x?nR5D4kw&!-4g1*UTx5zj7{IB>;y<t}lbnS**e6Fm1*W%n5hx+#Ia*jw8rb
z3bw{AMueycLzxctRZ1O<^zKz9a}Hc`tUdU=>-*i?SV<<W4r{JBX}yC9e3q&#?Z~7#
z_QGDMd?KclDeVo%hw%;VZLZT#H~lyl0Err|O`G6AX7S9j{Y)j?4ZlY>2eLms{UP=l
z>g8h4*Jt3y#Wtq&V0bXu`u*TJfuDxA_L#0WTex93leLYX1=6xC;;&|WVJ|VW!F{wq
ztrcs0fr_oeEZ4XlDUNakHa!9?o63Lhh&zyY3KQ!AI|2*^YcO9GfG33ehn3zhVNj)t
zE@X{P8>8sDGjiRY5rgM<1^myPu>zo9dKld>E=f3!=@M8Tj&qQTa*Kz@S%CxBg)R{2
z0bc1V>W)_dP8R~i?E}b{02dQxFvTIBuRAjW5KRKGx(?B#Ks`kP{2FY!1YxuW8KgiR
zseD%IFg<`V(v{l<CwCG{NFpF*=a#Hc+6hK~L;6xAE_MGI>ss+CSj{xv_N~&G6+C${
zS1RSdTKf~;IP_da=<i6=4+V5uxtV-pNP4wIPtD9)jQy(syFPaluJmjsB(F{zA%^W`
z<X{)75tln=A|Ws4mr#k_J^hW5Ut8cZkCBO&DGzJodX3vzC`YujCN3XOsaPqUnATZW
zi(T9{KT;xiND_I%+<<6+VO3bK<jj559@xKiSRR!zy#<cPaMTO4g|40bL16PGA_{=B
z(F6f${PhcfBa6t*CJ-kMgGdbv%_l+$ptIRjsOS7ccY<ybm5zjjtoJ~7vG;Y?piY>G
zqj?l~?ZJPpz!1P6uFLptUOD*WPD2U1fxXdb$vu6w`)@FMq%wp65urq7`*(b{_Dp1r
z0|&<mgG#Sww=m3!0C{&V_&|#MYooR(RH{H+37+ycI(?w}CL$=E)cDYD0a=|4iz}_#
z=3t6mNo8h(owEJ@XtOZBhi>ry3v*1E+aFH$dsAKm_O0ZoW3}IPpv5!Pd5K^@#ILr8
z+hwZzd}}JoUFiSfGdoA?Ab*#rNX9gM7LW-lR?9E9OT;h?F7bLb?2<?2mHE^9(~V~W
zbv-of)&9(aU3$(%6Cn&Zpn%Hgf{9Y?aX6C;*P})iV%+s{Oa-=*{|W!~4MX_`!3C%c
zc_0v7m$4v1Mwb}$DD3?J6c-lC(!*#c3=^ZEEZ5F%965-pi2n;W{jY>kj8qH3z+5PG
ziU5pDLy)zfG0ut!3xIcfU|>Sz^+&Ezu1G%NTr5tAZZy+FNRRpC&Z$Ayj$jPBS3F8j
z9X_~n56N8;m<}@Dzr22YQTCbX1NV&jolizB8DAjFZAQ<j2AyO@f+gaV^<zR>n{$_+
z?V155tn@eD$9$zB#e82!`z^qNwVDkv49Sb{hO9_KK^GfM8u=|U3OoB`Zh;#&U6cfT
zT8>i{g$CdoUTqm(SGqd5*1hXGUNM_J$5%Rwypkvf?r)~de9l<%u;eV=qAjGWC&2!d
zA$<9HeDw{la)V+@5wa`iY_eB2ZeT7*7|J;@l@oh|2)A8bq5nXj!1c4xO+k$nHMO4?
zP~R?E2{4jx%1P3X>l<OC!JzWBvmA%+008>MbY@P^P-nQ0O@By7TlY~BPs`kCpDI;r
z{L6U#2kC;^VZyd0Ikj5MAdc_0YjWt<!;Fti`UIY@wLknvv9W}YkdHqS+pz32<?(IQ
z3QwH=L3!(_l>+<Ifu{-MPceIHGd}`K(np3%6|6cQ3m1HNuBYvD<-Eg(s~;^&%oF^X
zhnOx-Ex3HVQuMI+yO9;vVovhqw+$XEhK)XhE22zG*V|aiDBCx0#h<g*Pno=Bgg3in
z$!+UbI)C*@L;j}sy}t{Y#VaU_h0OP@@x?2a+)EE)P2bhcvq2Usj2G`0FF1_qq>Dix
z(3e<^LhXGv<2$z4oB#^jugIt~NogYg4eI~O$2GFWrA~3(jcW0HE8vc3Yf3szy0d9r
zllL$3*z{4Q`cM+s^=8rZ9^dC&s@vD@<7GSXmRqd#r=`meP4U)6Y#|9|YE71_;>;ob
zZx$mzwmRf2(j;sUN<X9VD}#HO3Uf`zJ7#IjJ@3qIZfp(h8Q518&B`3q3c!ZDKGjI8
ze?HXiixYavw<+ZkQx!-bsl}W$6zhlQp1Hgj)G!;&xap(6zdLBCDQw`vp%`eg_odqG
z(!OE;NB{eAxgR5y$1gQK>6_fw=wtSa-rK0y&r13<RGK9m;A>LNq3bvYakr!-PB_Zp
zET^o@G$)>%lgEv-g%$Q~JsUYw<;pO?t(*F6GPz9gzVDvH{-IOMq`>z`>r;X!IpBfB
zsKbY+1hbJm?{k)r`M;*g2FDFCTivdWOJu3_SLk!sr*#GoTN@v=B^FwC!g@mA2BAy>
zzObc^7;W~*+@6cDa}_<YGqOwk&HuY5EN)D*3LUp2xIWxqHF*UXf%=XAJ~b*|kGtFz
zcv-jdt_Wz`@<8FQ=)aT|iALzx4nU92j)Gx8hTG&CH6F?+A^-<A$Z<%JfQsaRNN+5_
z6f8hycYl-4FU-S*#+3rG2nm;v#o!P9*&vP55VVrWKmjka|DR{rb(0LvbEWSPjiLq=
z10SIqZ}&EbGud=H#H1-_Cm*puvbQ6%U3GI5IE~69a`hOX-l~?TR7FL1Q{Xnt_*ti=
z)O+@*9Y-!Wre~;1=S;HMVnD4`KZ2RrSjxJ5Eha-a<-_ItfDff2JJbCu54Sebi5C^p
zmmgKFc9i^mwcW{HfBnJjiz8+%?caNr5|oDr1jLoLMtg7au|zMunCUFo+<)wI@NzWQ
zG>2dHRpZmcv5K^d6H&JhR%ffux(^m!i+Zy@Huh81Z}+zUsvgV8kGqbC*ZYdhP3Omo
zBHB-EiZ7)vZrK~DDaJXt{2x^is_(WD*IsJxxx}1qc;VI{fEtM=UwgsrWtn;Rk#`zi
zfT<^<VH|olVtN)qrXYJI8i!nQ27%~OWCGyu76oh}fWwVIJ@pVlTwzt>|5yep#8X&Z
zIYiY>OrAyMRZgBYWkpRLb(c<FA|$|v6@A5ElS}3F)sRblxl@j1Uc?oZVd}V8LBY8x
zlT#8k#N<^q?dj#7ML7j0XVu9GR7pVr@L_lxa@o;`3Mz_HNPu!_>2gehiItF8HdUo-
z8SSYino5o~sFO)as!^qpW*TLUDyq~^j&Ocu6<R-`>d}%Wi3KN<1!<L~fKqyCS6F#w
zW+bOme#KF2#Ez6|M_QG5>#}v8xge%zal2=(Uk+9zw>e=&R#i={>0^&>!6lo6MFEhQ
zY>j5t*G)A#MjLjmsmNNs*m*YabpqyC*mwpGmoRaJ*+v?|dUcl@|3@YUHr9YJiYDrd
z-eqXp#v)eupixpJ)>BH8p~&)z^!*m{ydE7Ipp+wPcwxsK#%Hj4kk#9sg8J@k8)zi)
zJk)^AaqLn}C9m}Icr8;j6pj|p%bj&8()?Xx8#2~3U+Sd>wb-!XZ1Bb0VMY>f<IPo=
zc=&dgF>01YmzaLPksa`C5;KNka1Kw+-$>!LH}uDJ?H%Hd8NBs4kh@`-?Vy$a36@Eg
zVGb#@ahBUvm6*$U9p?5)i4|J5a!V<NW|2AiuDzD^>E=<Y*iW~Y>$I$&RDM)b#8ZJj
zuAsRAihQ!aqLt^mrZcaiK(!A4ESd$b{^yk6_IbFTu5LBE|6p+=)$CN^jjxjB%w?bS
zNrY9WC7&okTjchI(sbohuNt~lks`$jx5&gVP^w#rm{j*9rOD4=Cb@`JdIyy$afwe>
za$W7XGO0(+Dp`SoAeGS9q{NH~R!O1Mp-LB)$UTlnIH=Cu>LROn35-4wid%y^v$uZT
zW+>U&%y$+ylD@qSJDa21Zkls9>9p)>2cyq?5+*d;6h?&qV^)KB!kv^AWqNgKRCLy3
zGQ+WqJsOLiqcD;;f_Y7gvw_>A4rD{qXvc@g;h@)=$FsY2k276!W35C6AnpjxC2d=p
z+fX!-;7G?_RD(|$&z80JaHchI^qB5KM4tMHXgGyb|4IV<<{s;?XOj~F8eULlwaqEc
zZ$X5Ti461?j!jW(jik{{Rz@?r`Rh<?JB(rI_#On{qDO3L;iXDvIRS`oSV^fAPChu2
zPpU*os=^#Xu2r$Bj7E7uiC<i<bS6-_$w>1P*0{38rfqTWEaf_vGqX3qIR&dCz)RNQ
z*2y1u>grJ8!Q+v*M<HZsQ&HP1;s3@L8*~cIKKUF}xQ>_;T~Uvrugg=gHuTMLl2Cy>
zJXJ!^^ha^ku}ls%l9g!UzobZofLyWOMg)_(F>Ont;N;vi-?S!%@+nwX8WlmDWV2S?
z51Jc9(slmWLH!vjdi6^WKt~6unnF-5Y{|g8|42zNR2HpQjyce)>c+$QAW>qTLR}si
zBu23w%y0&qS-i|Mnds37k#9whYUVV@uxZFWFiOZMACeydR_#F>1Ek3G$e;QQ5orig
zB779aA~3>oHjcrXAq^#>@w5^nZd@#2?>3nsnkGbvbxnt&QrKU5=OBqu<rneAot<s=
zHa^2s)YLdi3^DD9i+!ymznV5=@|BXZ93qbNMOn!j&5EEL0WOm(GU?4mktES7TD_7l
z(~#|beqCk4Vuqd_QQ%!Z(tyE8XDVX-v7{f&)lNVpx-FrOv_^udt1<<V<WSD1S2I)Q
z`qUxz5vEPnJeOC@Vk_TdX@LlKDxzi@|HP>3<yTXYil>|h6_Q%CbihN_t#}tMzEotg
zeu7F$LkH6U3nV7<q+sZTM=!A6$t?E;RyfIt&W!!@!v<B-O?PUpeAZ4?PEzoKMwibv
znUhn;<4LQgMNs5H&zS%<6_ulT!TvZcpbk!EVhdGapN_9H0X3CQ5Cf8o8AMVd*&sJN
zMZ5JygqZEZffJpRp}IEBF##)=gq(`VzNGPFTVtcyHfuNR>=KqSvyiPA#>7N+muCut
zq8=A{9pkRAJ$f9Z2xC&_h13f_L}O591q-oIk_>T*#$n$I_UfH6^?DBTXU-U^Tf?50
zI$Z^$izHN8)_Sd#6U#Jf1`Q%3|1z#TnC9hwbaunp_^pgPoON;;O~irO251wabazC!
z)!UwpmCpgApbvW7&|HYm^ODZT1lZb1CP!;?$sJ!+<mi<kV1+AeVg2%}yaa-{$rm*9
zGU0T>|1LFxD!L0;LBi(t8qb1;fpS<_9L}kzvdvwITWrK>!0P2VPV3qfRbofwjI%W-
z!+Hp~M@q(d3o64-E@yw=>E2kOYr!Ok9!ilDn?YK%owG_H@x1Kc9Pj){9nT8u;J0y$
zk9<uyUn(Z6^3D!x_*(TdD@(U>Q<+R}zEZjtl-dWU8N1KIfMT*)Qy!u*bH?VlTATzE
zY$^RIwMo%Y^NoM37Fc8e{}(IF5f9l`7awYoPj*et6@_fWzDR20RGFB3-ulIEn|nA^
zL@mHFeO9lPY_odFp}TSEq7$v0+NFP^$E<Uxm@Ip-(mn1yjdEKz!oIAR#Up)bgx%N@
zl0YM(NOiWe+@B$hK88E)(_HxYhTuqK|BB4jbQ4-wzEAhx&5K|ly_{IWjE~<WjSUq$
zn{LZ+lzr4|;t@9?Hx8)3sx9utL^)Qo^m+O>lyrZZmr9!9Yv3U^w>BPer625P6g=`H
zKH_iCbWg{UU;Nc@!eSDNR8b*TKCx0wu48s46*{2?RK1~J8P`14L_S(KJn#cx)-yZm
zB63HU6$AA>2lW&j|5j3Mg(N~}IYGugMnOd-BveF)T^`{@Fo9l6qC5WcWV-??DQHxJ
zrBh=8YCna8wsUqG$X5C!W$4sU4#pHabxj|Kh47?f{{mI<Ltx(YGh5;kgT!|mhCfhJ
zawX+XGUY0ul0ovMhQOjOMQASD1ZBbFQ^%50O2~x$^h||#W7{)5J;EdJay0$5S(w&F
z<?%3*bR#cxG)psnQc^~jRvh`#eIjB+cJXr72N_WiF$1z}qc|7eSAUc8L$zUlkEUn=
zs2HHs9h%o&0^>4#u`-P}TJpwyUxS2=VO<cVIKPHa>4uD**jK_6FtMl`VWJm;v0E?G
zU9NaNu62pU|J5<_q&2%me1z72^Or?6A~LFV8>yFr=7S<~V>B*;W}R^?H^W%RND#pW
zig{L9pF~$F@^eT7R-0CA)OJVtVUPNCYQgnChh-G|<|C1#0SaP457863<2iGOEo5j`
z=%OhixrN_CMg|6ALl+W|LPjOVC?Y|IX{J6LR9SOaDLgb!dE$hm)hE!xa!-+p_v3c!
zB4)3mJ1J5hdV)U}fm9B}KL+-H1e6fO(iA-raRdb|{|8hiIZantJ1EqHnPVi|Lp8=T
z8nEL$1IA-~mrtRgmI;LrGj>eUlRgN<PK)9jgmO)gCpuS_KFP9ALYQD*ITZm_VoV5@
zEN2mQ|CxeR1BD>CAHCyWZ&*#zh!i$cbX}-r*fIh<VpWis9ZaKE<i|l$v|9%fALDm7
zFg6|eM`@jfd`6i;saGMf30R=yZZTvLOmr{x_i9oS8nvaDYNdL6<UZ47d~O42ju}Un
zwKD{{XeJ>;=h$r-0cu$>F-(JQprm~F<{oUsMBG)4BLHsAW+Di2ei5f?#}zR)v4oF;
z9w>5rEQnU3my21$Mi8@(#aSM&6kOtXF<KNIqIe$em~EB>GrB~5lGP~kLT@@%FsfK)
zd099>6CWJIiV2b)`zbtpmI1H;3l;emk4Y`^ah8sPm9XMdF8NM5rIg3<g%T)H>Et^?
z|8qZeL^~6h5OR5S8AvD|h)^qGZwp6cQ1?u7wPq4{f^?Upz>{Rzl8<1PlkqiQa<L)z
z<CsadLFu%IhB=3vqji92Jy~c`haz-#d1O!-a9Gk$^eKjIW>Jtblh)Ed9W-z?#U^E$
zh6i?0I%Sd<6i3QZEkOzqwnH1AvZSUqE#Bf#E73h0frXsKhVhj>sL7P|qmk;=VjSqC
zgcuZ6#X@hvM(wnC0ToN*xQDDkOJ>tcwYgcVC={d0oO=~$^Y~@GWNq|FLv-|xn&?Wl
z6d<)Bpb&Lu%a>>h1bXOcIJyTO;}?M7^*7a59?k_n@CAD@lUb4XAd#0KlJ#-g|7jEy
z(;pM^a^p5g&<7nYsauobHhM-P$#Sl$N1MWDieW@BWP^PDk{qwdM$%=BK|>d!(J+`n
zMY-9I1vxvUwOC?BAI8@c*H)n3C~ZhuobD!8fkl8aR9m7X0D~7fY7r!sLpj~_UYSa9
z?sODFB~HW9pik1KiqT=+GgNStb^O&OVOde3LMjRcD^$izW0^F4YJI6PPXFd&I!Y}!
zxKkL#NzKGMOLikWL12Y~m<ZG<VF`9%*ceJXvP_Y(gEA@cL6@O3Cv)N_u~9!W8x&i6
zDI39NbC;Mtgp(G?r$PFsQZ`vV2WHCREyR%(b;EZkhh7v`l{$e`FLk$s|Kg--wj0iZ
zsQ%G{UFd{mW3@Nght#w@Thd}ThN+7p5e=9DEYwOZiGQy&HMJFluvLCC0$q4wY7e_y
zG!u-#h`F6XM}1?R{3L>&r<|%)jAX_{7ptE-SR>$y9Pk0Kuy=mz2YPXlNc*atl-94a
zw=&7upXnGj@Hm~yrA&y1nPk%|$ohH{%74Ym7q7K!apj$K12@dqFsYht_r`(~0)4On
zHV|X6+1qX_5*fL9Fq38?6tz{c5qyTC6euG?@wSWaTU?EWEC6%B54x;l)t;)vJj0TH
zs5%@TOLz_lx6e}`w}WJm@|F>FCohJzx%86Kxu`fLYirwcm2xex|CBCd=45FZmSu51
zO6HW=6D>B}Qc01+bm(JdQiD|K!%SvmaF{D2_Bozn6DRDKd^fq|qqJwoD;q^}?qgp_
zr%rs>I3spqM7m8@QnpqiIvc^2C%CtG!lgq<r9%cTbUS1!3z>#88A!a6MfW>kr*T@w
zVI><9d1F6WImLxIeUEE{f7(y;WyW;~7WLFX^R-eH_`m_sF2(c~v}9;<WG&D&F^HtF
z-kH0-vNXl#T#v^w#G!mg1Va#;NwAx(kTq$xx}NiwkFSWkk%F<eA&dXZzo>MH$i;5g
z*lhopl$h~W8yjt`7Fa(qhoL;Wpz&DOR!E*)y4Q6>>XvAL|Iu5L+J0CSNmLUcQd4=q
zOK2Ori|19Z(i};hgjoNJib!c$*GDx3Gp{6Agk6J<G-Dt5(RcsxakM2_uv%%li&yR(
zHs&EhD<U;t+NhQ2tt|K;p$e)!S1m0{V&!CYBhjEZC&^zrKV`g>aTF>#Hqxf5CSyr~
zW+<ly(RnueKva3fn__{B92;Q#hip1PBG^Di8)^inxq@@TSV|TyxtCO2CtG}?GpuDP
zTg4fKcOQm!U;@%*_l+l(kBi82nrWgU45!C&a6PzSgM5A=1#%<!qDePS9kxt3={!^I
uJ73#Pp`$1Wq@qD<rgg2z9!9hPHpx`yqH=6g=Hs4w2sCBOvp;4M5CA(t#Nkf>

diff --git a/tools/python/key_2_emb_formatter.py b/tools/python/key_2_emb_formatter.py
deleted file mode 100644
index 617e7f99..00000000
--- a/tools/python/key_2_emb_formatter.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# coding: UTF-8
-
-#  Copyright (C)  2023. Huawei Technologies Co., Ltd. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import json
-import os
-import numpy as np
-import argparse
-import tensorflow as tf
-import json
-import sys
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--path', type=str, required=True, help='path of the root dir of saved file')
-parser.add_argument('--name', type=str, default="key_2_embedding", help='name of output file')
-parser.add_argument('--ddr', type=bool, default=False, help='if saved data was from ddr mode, default False')
-parser.add_argument('--step', type=int, default=0, help='the step when the data was saved, default 0')
-
-
-class Formatter:
-
-    def __init__(self, saved_file_path, out_file_name, is_ddr_mode, step):
-        self._device_dir_list = ["HashTable", "HBM"]
-        self._host_dir_list = ["HashTable", "DDR"]
-        self._device_emb_dir = "embedding"
-        self._host_emb_dir = "embedding_data"
-        self._device_hashmap_dir = "key_offset_map"
-        self._host_hashmap_dir = "embedding_hashmap"
-        self._attrib_suffix = ".attribute"
-        self._data_suffix = ".data"
-
-        self._saved_file_path = saved_file_path
-        self._out_file_name = out_file_name
-        self._sub_dirs = self._get_sub_dirs(step)
-        self._table_names = None
-        self._father_table_names = None
-        self._step = step
-
-        self._json_attrib_dtype = "data_type"
-        self._json_attrib_shape = "shape"
-        self._host_attrib_dtype = np.uint64
-        self._hashmap_dtype = np.uint64
-        self._raw_key_dtype = np.uint64
-        self._key_dtype = np.int64
-        self._raw_key_offset = np.iinfo(np.uint32).max
-        self._data_dtype = None
-
-        self._is_ddr_mode = is_ddr_mode
-
-    def process(self):
-        dev_dir = self._set_upper_dir_origin(self._sub_dirs[0], self._device_dir_list)
-
-        self._table_names = self._get_table_names(dev_dir)
-        dict_out = {}
-        for table_name in self._table_names:
-            combined_key = None
-            combined_emb = None
-            for sub_dir in self._sub_dirs:
-                dev_dir = self._set_upper_dir(sub_dir, ["HashTable", "HBM"], table_name)
-                emb_data = self._data_process(dev_dir)
-                key, offset = self._hashmap_process(dev_dir)
-                emb_data = emb_data[offset]
-                if combined_key is not None:
-                    combined_key = np.append(combined_key, key, axis=0)
-                else:
-                    combined_key = key
-                if combined_emb is not None:
-                    combined_emb = np.append(combined_emb, emb_data, axis=0)
-                else:
-                    combined_emb = emb_data
-                print(f"{table_name} has combined key {combined_key.shape} and combined emb {combined_emb.shape}")
-                transformed_data = dict(zip(combined_key[:], combined_emb[:]))
-            dict_out[table_name] = transformed_data
-        np.save("./" + self._out_file_name + ".npy", dict_out)
-
-    def fw_weight_process(self):
-        checkpoint_path = self._saved_file_path + "/model-0-" + str(self._step)
-        reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path)
-        var_to_shape_map = reader.get_variable_to_shape_map()
-        for key in var_to_shape_map:
-            if key == 'dense/fw_weight':
-                np.save('fw_weight.npy', reader.get_tensor(key))
-
-    def _data_process(self, dev_dir):
-        dev_emb_dir = os.path.join(dev_dir, self._device_emb_dir)
-        host_emb_dir = os.path.join(dev_dir, self._host_emb_dir)
-        data_file, attribute_file = self._get_file_names(dev_emb_dir)
-        dev_attribute = self._get_attribute(dev_emb_dir, attribute_file, is_json=True)
-        if not self._data_dtype:
-            self._data_dtype = dev_attribute.pop(self._json_attrib_dtype)
-
-        dev_data_shape = dev_attribute.pop(self._json_attrib_shape)
-        emb_data = self._get_data(dev_emb_dir, data_file, self._data_dtype, dev_data_shape)
-
-        if  self._is_ddr_mode:
-            data_file, attribute_file = self._get_file_names(host_emb_dir)
-            host_attribute = self._get_attribute(host_emb_dir, attribute_file, is_json=False)
-            host_data_shape = [host_attribute[0], host_attribute[1]]
-            host_data = self._get_data(host_emb_dir, data_file, self._data_dtype, host_data_shape)
-            host_data = host_data[:, :dev_data_shape[1]]
-            emb_data = np.append(emb_data, host_data, axis=0)
-
-        return emb_data
-
-    def _hashmap_process(self, dev_dir, ):
-        dev_hashmap_dir = os.path.join(dev_dir, self._device_hashmap_dir)
-        host_hashmap_dir = os.path.join(host_dir, self._host_hashmap_dir)
-        if self._is_ddr_mode:
-            data_file, attribute_file = self._get_file_names(self._host_hashmap_dir)
-        else:
-            data_file, attribute_file = self._get_file_names(dev_hashmap_dir)
-
-        attribute = self._get_attribute(dev_hashmap_dir, attribute_file, is_json=False)
-        data_shape = attribute[:2]
-        raw_hashmap = self._get_data(dev_hashmap_dir, data_file, self._hashmap_dtype, data_shape)
-        offset = raw_hashmap[:, -1]
-        raw_key = raw_hashmap[:, :2].astype(self._raw_key_dtype)
-        key = raw_key[:, 0] * self._raw_key_offset + raw_key[:, 1]
-        key = key.astype(self._key_dtype)
-
-        return key, offset
-
-    def _get_sub_dirs(self, step):
-        sub_dirs = []
-        for _, sub_dir, _ in os.walk(self._saved_file_path):
-            sub_dirs.append(sub_dir)
-
-        picked_sub_dirs = []
-        for sub_dir in sub_dirs[0]:
-            if int(sub_dir.split("-")[-1]) == step:
-                picked_sub_dirs.append(sub_dir)
-
-        if len(picked_sub_dirs) == 0:
-            raise FileExistsError("There is no sparse checkpoint for given training step.")
-        return picked_sub_dirs
-
-    def _set_upper_dir(self, sub_dir, dir_list, table_name):
-        dir_list_copy = dir_list
-        dir_list_copy.append(table_name)
-        temp_dir = os.path.join(self._saved_file_path, sub_dir)
-        for directory in dir_list_copy:
-            temp_dir = os.path.join(temp_dir, directory)
-        father_table = []
-        for _, i, _ in os.walk(temp_dir):
-            father_table.append(i)
-
-        temp_dir = os.path.join(temp_dir, father_table[0][0])
-        return temp_dir
-
-    def _set_upper_dir_origin(self, sub_dir, dir_list):
-        temp_dir = os.path.join(self._saved_file_path, sub_dir)
-        for directory in dir_list:
-            temp_dir = os.path.join(temp_dir, directory)
-
-        return temp_dir
-
-    def _get_father_table_names(self, directory):
-        if directory:
-            table_names = []
-            for _, table_name, _ in os.walk(directory):
-                table_names.append(table_name)
-            return table_names[0]
-        else:
-            raise ValueError("directory is None, cannot search for table names")
-
-    def _get_table_names(self, directory):
-        if directory:
-            table_names = []
-            for _, table_name, _ in os.walk(directory):
-                table_names.append(table_name)
-            return table_names[0]
-        else:
-            raise ValueError("directory is None, cannot search for table names")
-
-    def _get_file_names(self, directory):
-        files = []
-        data_file = None
-        attribute_file = None
-        for _, _, file in os.walk(directory):
-            files.append(file)
-        for file in files[0]:
-            if file.find(self._data_suffix) != -1:
-                data_file = file
-            elif file.find(self._attrib_suffix) != -1:
-                attribute_file = file
-        return data_file, attribute_file
-
-    def _get_attribute(self, directory, file_name, is_json):
-        file_dir = os.path.join(directory, file_name)
-        if is_json:
-            with open(file_dir, "r") as fin:
-                attributes = json.load(fin)
-                return attributes
-        else:
-            attributes = np.fromfile(file_dir, self._host_attrib_dtype)
-            return attributes
-
-    def _get_data(self, directory, file_name, dtype, shape):
-        file_dir = os.path.join(directory, file_name)
-        data = np.fromfile(file_dir, dtype=dtype)
-        data = data.reshape(shape)
-        return data
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    formatter = Formatter(saved_file_path=args.path, out_file_name=args.name, is_ddr_mode=False, step=args.step)
-    formatter.process()
diff --git a/tools/python/optimizer_process.py b/tools/python/optimizer_process.py
deleted file mode 100644
index 8a658e29..00000000
--- a/tools/python/optimizer_process.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# coding: UTF-8
-
-#  Copyright (C)  2023. Huawei Technologies Co., Ltd. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-import argparse
-import os
-import numpy as np
-import json
-from enum import Enum
-
-# 每张卡处理自己的
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--path', type=str, required=True, help='path of the model file to be converted')
-parser.add_argument('--step', type=int, required=True)
-
-sparse_file_prefix = "sparse-model.ckpt-"
-optimizer_prefix = "Optimizer"
-data_suffix = ".data"
-attribute_suffix = ".attribute"
-
-
-class DataAttr(Enum):
-    SHAPE = "shape"
-    DATATYPE = "data_type"
-
-
-def get_optimizer_name(sparse_file_path):
-    optimizer_list = []
-    for folder_name in os.listdir(sparse_file_path):
-        optimizer_list.append(folder_name)
-    return optimizer_list
-
-
-def get_table_list(table_upper_path):
-    table_list = []
-    for folder_name in os.listdir(table_upper_path):
-        table_list.append(folder_name+"/table")
-    return table_list
-
-
-def get_optimizer_param_name(table_path):
-    param_list = []
-    for folder_name in os.listdir(table_path):
-        param_list.append(folder_name)
-    return param_list
-
-
-def get_optimizer_data():
-    pass
-
-
-def get_attribute_and_data_file(table_path):
-    if not os.path.exists(table_path):
-        raise FileNotFoundError(f"the input table path {table_path} does not exists.")
-
-    attribute_file_list = []
-    data_file_list = []
-    for file_name in os.listdir(table_path):
-        if file_name.endswith(attribute_suffix):
-            attribute_file_list.append(file_name)
-        if file_name.endswith(data_suffix):
-            data_file_list.append(file_name)
-    if len(attribute_file_list) != 1:
-        raise AssertionError(f"under the table path {table_path}, there must only one attribute file. "
-                             f"In fact, {len(attribute_file_list)} attribute file exists.")
-    if len(data_file_list) != 1:
-        raise AssertionError(f"under the table path {table_path}, there must only one data file. "
-                             f"In fact, {len(data_file_list)} data file exists.")
-    attribute_file = os.path.join(table_path, attribute_file_list[0])
-    data_file = os.path.join(table_path, data_file_list[0])
-    return attribute_file, data_file
-
-
-def process(path, step):
-    save_dict = {}
-    sparse_file_name = sparse_file_prefix + str(step)
-    sparse_file_path = os.path.join(path, sparse_file_name,optimizer_prefix)
-    optimizer_list = get_optimizer_name(sparse_file_path)
-    for optimizer in optimizer_list:
-        table_upper_path = os.path.join(sparse_file_path, optimizer, "HBM")
-        table_list = get_table_list(table_upper_path)
-
-        for table in table_list:
-            table_path = os.path.join(table_upper_path, table)
-            optimizer_param_list = get_optimizer_param_name(table_path)
-            optimizer_dict = {}
-            for param in optimizer_param_list:
-                data_path = os.path.join(table_path, param)
-                attribute_data_dir, target_data_dir = get_attribute_and_data_file(data_path)
-                with open(attribute_data_dir, "r") as fin:
-                    optimizer_attributes = json.load(fin)
-                with open(target_data_dir, "r") as fin:
-                    optimizer_data = np.fromfile(target_data_dir,
-                                                 dtype=optimizer_attributes.pop(DataAttr.DATATYPE.value))
-                data_shape = optimizer_attributes.pop(DataAttr.SHAPE.value)
-                optimizer_data = optimizer_data.reshape(data_shape)
-                optimizer_dict[param] = optimizer_data
-            save_dict[table] = optimizer_dict
-            np.save(path+"/optimizer_dict.npy", save_dict)
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    process(args.path, args.step)
\ No newline at end of file
diff --git a/tools/python/readme.md b/tools/python/readme.md
deleted file mode 100644
index 3f5e86df..00000000
--- a/tools/python/readme.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# 模型数据转换工具（key-value）使用说明
-
-### 1. 美团1207模型ckpt保存路径说明
-
-#### 1.1 训练时1207模型保存参数设置：（estimator模式）
-
-![img](./images/clip_image002.jpg)
-
-![img](./images/clip_image004.jpg)
-
-#### 1.2 训练后模型保存路径目录展示如下：
-
-![img](./images/clip_image006.jpg)
-
-#### 1.3 下面来看单个文件夹下存储的内容，以check_ran0为例：
-
-![img](./images/clip_image008.jpg)
-
-我们的模型数据转换工具就是要对该**sparse****文件夹中的数据进行转换**,转换成key-value形式，保存格式是npy文件，详情参考3. 输出文件格式说明。
-
-下面介绍**如何使用该模型数据转换工具**。
-
- 
-
-### 2. 使用工具demo说明：
-
-**该转换工具model_data_to_key_value.py一共需要4个参数，path、name、ddr、step**
-
- 
-
-| **参数名** | **数据类型** | **必选** | **默认值** | **描述**                           |
-| ---------- | ------------ | -------- | ---------- | ---------------------------------- |
-| --path     | String       | 是       |            | 保存模型embedding数据的根路径      |
-| --name     | String       | 否       |            | 输出文件的名称，最终输出<name>.npy |
-| --ddr      | Bool         | 否       | False      | 保存数据是否开启ddr模式            |
-| --step     | Int          | 否       | 0          | 保存数据所属训练步数               |
-
- 
-
-#### 2.1 参数确定：
-
-下面是一个选择参数的示例。
-
-##### **1)** path路径确定
-
-我们选择1207保存下来的0卡模型文件夹下的sparse部分数据进行转换，因此路径选到目录下：/home/lff/model/check_rank0/
-
-![img](./images/clip_image010.jpg)
-
-**--path = /home/lff/model/check_rank0** 
-
-（多卡的目录需要转换多次，一次只能转换一张卡下面sparse的数据）
-
- 
-
-##### 2) name参数: 输出文件的名字，格式为.npy；
-
-例如：sparse_0,经过转换后的sparse数据就保存在当前目录下的sparse_0.npy文件中；
-
-**--name = sparse_0** 
-
-##### 3) ddr参数：美团模型未开启ddr模式，因此选择False
-
-**--ddr = False**
-
-##### 4）step参数:在上面1207模型存储的目录下面，存了第0步的模型。
-
-**--step=0**
-
- 
-
-![img](./images/clip_image012.jpg)
-
-#### **2.2** **执行工具命令**
-
-python3 model_data_to_key_value.py --path=/home/lff/model/check_rank0 --name=sparse_0 --ddr=False --step=0
-
-#### **2.3** **执行结果展示**
-
- 
-
-![img](./images/clip_image014.jpg)
-
- 
-
-### 3. 输出文件格式说明
-
-**.npy** 文件
-
-【在使用mxrec的时候，传入了一个特征one_big_feat，存在表名为one ascend hash embedding 的表里面。】如下图所示：
-
-![img](./images/clip_image016.jpg)
-
- 
-
-转换了的npy文件构成为：
-
-{**<embedding****表1>**：{key1:embedding1,key2:embedding2……}，
-
- <embedding表2>：{key1:embedding1,key2:embedding2……}
-
- ……
-
-}
-
-示例：转换后的npy文件裁剪了10个key
-
-![img](./images/clip_image018.gif)
-
- 
\ No newline at end of file
-- 
Gitee


From 53edea92b2ddab65ed4cbe375395d63aa4e66868 Mon Sep 17 00:00:00 2001
From: gegaojian <14206008+gegaojian2@user.noreply.gitee.com>
Date: Mon, 25 Mar 2024 14:27:53 +0800
Subject: [PATCH 007/302] =?UTF-8?q?dense=E5=B1=82=E5=8F=8D=E5=90=91?=
 =?UTF-8?q?=E9=87=8D=E5=A4=8D=E8=AE=A1=E7=AE=97=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/dlrm/model/main_mxrec.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index dd3e8d2d..2d0ee78e 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -24,6 +24,7 @@ import tensorflow as tf
 from sklearn.metrics import roc_auc_score
 import numpy as np
 
+from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
 from mx_rec.core.embedding import create_table, sparse_lookup
@@ -323,15 +324,20 @@ if __name__ == "__main__":
                                is_train=False, modify_graph=MODIFY_GRAPH_FLAG)
 
     dense_variables, sparse_variables = get_dense_and_sparse_variable()
-
+    trainable_varibles = []
+    trainable_varibles.extend(dense_variables)
+    if use_dynamic_expansion:
+        trainable_varibles.append(tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)[0])
+    else:
+        trainable_varibles.extend(sparse_variables)
     rank_size = mxrec_util.communication.hccl_ops.get_rank_size()
     train_ops = []
     # multi task training
     for loss, (dense_optimizer, sparse_optimizer) in zip([train_model["loss"]], optimizer_list):
         # do dense optimization
-        grads = dense_optimizer.compute_gradients(loss, var_list=dense_variables)
+        grads = dense_optimizer.compute_gradients(loss, var_list=trainable_varibles)
         avg_grads = []
-        for grad, var in grads:
+        for grad, var in grads[:-1]:
             if rank_size > 1:
                 grad = hccl_ops.allreduce(grad, "sum") if grad is not None else None
             if grad is not None:
@@ -340,17 +346,14 @@ if __name__ == "__main__":
         train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
         if use_dynamic_expansion:
-            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
-
             train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
-            train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
             # do sparse optimization by addr
-            sparse_grads = sparse_optimizer.compute_gradients(loss, train_emb_list)  # local_embedding
+            sparse_grads = list(grads[-1])  # local_embedding
             grads_and_vars = [(grad, address) for grad, address in zip(sparse_grads, train_address_list)]
             train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
         else:
             # do sparse optimization
-            sparse_grads = sparse_optimizer.compute_gradients(loss, sparse_variables)
+            sparse_grads = list(grads[-1])
             print("sparse_grads_tensor:", sparse_grads)
             grads_and_vars = [(grad, variable) for grad, variable in zip(sparse_grads, sparse_variables)]
             train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
-- 
Gitee


From 0c890e15fb0c7f989a2a7b4bdc78382c45deb198 Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Fri, 29 Mar 2024 06:15:34 +0000
Subject: [PATCH 008/302] =?UTF-8?q?!66=20=E6=B7=BB=E5=8A=A0=E9=80=9A?=
 =?UTF-8?q?=E4=BF=A1=E7=9F=A9=E9=98=B5excel=20*=20=E6=B7=BB=E5=8A=A0?=
 =?UTF-8?q?=E9=80=9A=E4=BF=A1=E7=9F=A9=E9=98=B5excel=20*=20=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0=E9=80=9A=E4=BF=A1=E7=9F=A9=E9=98=B5excel=20*=20?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E9=80=9A=E4=BF=A1=E7=9F=A9=E9=98=B5excel?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...32\344\277\241\347\237\251\351\230\265.xlsx" | Bin 0 -> 31412 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 "docs/MindX 6.0.RC1 \351\200\232\344\277\241\347\237\251\351\230\265.xlsx"

diff --git "a/docs/MindX 6.0.RC1 \351\200\232\344\277\241\347\237\251\351\230\265.xlsx" "b/docs/MindX 6.0.RC1 \351\200\232\344\277\241\347\237\251\351\230\265.xlsx"
new file mode 100644
index 0000000000000000000000000000000000000000..5224de2b2f5ed98b0b0d0b0c15e65ebf98ad993e
GIT binary patch
literal 31412
zcmagF1CS;`w>H?eZQHh|ZQHhO+cu|r+O}=m-P4}7jqRBm|L*<n-q@{(df$qQlaX=q
zd5$Xc$V&l(Kmq){!esXZ{{H`;3-bHH*v?4a(azq9UjADQ<@*K1Ke7IcH9K2C0DvGM
z004ylE@ohFPv>r9os&2r3CxHQ@)UT5P~SW?xrj!afv6xXZ_($uCXxAW1I#QTK5uc>
z(CR?=3|8%p4T?W731it+xtVj2o$dVsxuu21N!7<yrV_dD8jMI$9iqH|q8A;j$&JA|
zldRvS8o!6i(V1prJ6o^>R++XdUn);G#C#=_hcIF$&#(g53^gsh5%^Nus8mRfJoa1u
z&I52fi6qh!Bo!|iV2&v-q}Ad)?X#y?=q_?bCsvgV!0cWCJ+w^4mB$P^-W<fc#JTzx
z>SuthU4at)db0Q6NjGEL!DLvOv6@;?R)_=7ijzZ<umldd1bJ+Gj(f`zvUdx%h|zL^
z@dpeqnCg`M=ua0+TAv<}%O(-o?NCZGjF^m|NV*}?hD1tyiVF3HQWx9ombJ5mfr-1V
zjSphdKD$TDHawjbNA9RI0fe0(6~O_-=&NAjKTh^RWAPea;P#zx+RQAiu+tgJdssR6
zx$i=i__%a$3Ub}Pa*uHTiH%kVDv<6sHYwlOApUo3jO-ju{(|F^I42v#fD(Eo;|)IV
zWe&~N0;XPQdecuzHf`Q%wa%;>BD6jo)6!B4tHGk%|MC@gl^s9*q=M-kP*=W;S%@eI
zXs#et90k)=iS`=>blHK{L&IuR9vW@xZX!yVMS2k|S9;6e0m^z;Vx4Ku*nD4mfT7tC
z7_}uq&YWL+rDSiKKNE;u+xr&CUV0QnZMXFxb{`Id4HywOK@n!u)M^}$05lYdYIfQX
zOW{4qMW5=;tROEsF~!kl-;L`uc1;l{^oOV262j(%>m1!5)=vH*yNQuw-zE7!m#RSW
zxsrXsfR~m@{MP)<Tth|dTMv@9Tl4syRRY`19uqEgYMr(%l%Kp0t=pfyAeip>uC@F!
zx=9^xOF)OZl#rCLf2>8!&OL`AJg)eBmuKX_<U}pN>#byjga)2V>=J&QQ8+@uqAkt6
z>BT%#-ILu|i%<|NrAD*L`giI*zKf0b0RAT!f#`*S*Ps9Zq~BJ8{2ySrI62$d{Dq@S
zZ7p_B9mxmZ;*)SlyvU+}=1R-Ow5e0y2xhngmY&o|ueR^aR`cUZM%G41DXHF6xQ9UF
ziDNpVbwqe@!tO^Vk4t{}K95%pA^z?f@@ODWbi=~gWb>ez=c6*aFWRd_ZotYK#Oaz{
z-sfivdeu0zJS$^qo-JYhs|y;d%nf4TDFINKo?k1jSxb_UZn3YESEa1HFV8R_!fn2c
z;IaS(Op^TUvP>=?`p&>q_u$(q>zCCi1N`>lQY<SPdt07vHyD0itOasdO~VdoAUX&T
zS?(O@06#H9KQA-J58eS5m?E6<D433Cqetb=fVnlLqhBqH{J$QKh0yqvC<1=3(ps;0
zOGkFwmdkzh`(d)YP}rBxbVPkDs|3olFxm|KV(H(JLR)TQQDV{1qg&KfF1(=Q|KTn$
z`09Xq=0e5I)<Uda;_&$b5AycVh5x*@;MW8FWr^1Q>j23WvHZ~9{Pj*l6n>el(d(tY
zf*{TR$O!47q|yO>8bva3NlY#6+}2lDYYuXPYR=AD(_L;<tsD};$b4C$vBT5J_RM-_
zxHP?)<jBM=93ZoNa_Gr5I|g19Iy491%FVl$x>$uYdp#)ARP$#^)E43sxx9>jrvN|Q
zCFtu>%+~_x>Wog{=9+MROM24dtvCM)(pvKLo*~DstqYK9gxC6EdOxBbAQmQ6;3C>C
z0x9xs2EdIgw;T$>pwzMsx*k9s(>)n2c17<JSGjuB_Tr0fk+HAS)++Z+eIo`8-*x3x
zgF@{$NDi>5I(Y)DfPwTQiSkFJA@pzIv?#NqQ=o_~(9Zs2DYnW6mT{>=CQ^ue_(F19
z65>{En&bW{rOV?ICgx2-5s=Er1gxfMQ;PVZ!hKM<Fx_j#D<)SwgJjP$Hi6q~Bx(UB
z?XPqpxIpXf!?lZ8hA_<D+WGR_gvF5YRowwRML!R*Dgz*!Cy4^(ZwFn~40S~rOMwOp
zlWZI8Sa*iS@eD~st-|I@a~3qCS#9Kp*jQHo&RDq^S&Vr<E^3EUYqO<KF~_6|t9wXg
zq0>v#DS95n-x3;m<~ch4<}<pV0;lAvz%@d`5lA#J`**140@jF6r<=lR@$@W*E<}+k
zBI)4G<{WKT*IwGPf^>>b(8CXFq4!O)CmFMjk@$5|ZEqn-WP(xeTo-U3Z8OXEHB&M8
zWGlM$&`(l#AMwP4ClUn=!tJx_ReDCLvo49q`*fX;rAq=@IE*Z^K=>?T$wIXt@E|T6
z_{{cx4m;_<R{XXKgt4h|d6~9^a~dKHD@ixlsY>Ps0GAEa5|D^?;j|JrqS}y>_PXG9
zJax2)WvvM<@{M(dK+ZEkBhiQQqg=2_6_KF3c<$}s#0Kl7aGLhT(jsrvOfBN0G|C?n
znES6;n$?rj#Waag%$-N>=;QFe$2pMBoPDQux0RK46D~|ZE<wsI!yCf2*k3j$;NHD$
z58&IBPII`su0jmEU}#NHY3NihbOh3Ks{ogJA1a^OQ+%*<YQoXjy|6W+pa0COlF>EY
z2TO~op8hnA3u7?n6_bh~ajD**jvY7PGE?xm_KMwYXp%7*Dv+8c<FL!g-8d)BL2R?h
zFOD?4{<H*ADF(#qkKQbx<ee8y%vWcrm;zgs)F_){`fY@LMxoq!PTW2^3*9GiP;PdE
z<#2TT18yloK3h&7NQI3ZVNi64y8ARxd<T47mWk289K36+opA#y<T@U^YwAmHWasN$
z@@fOkcvZHmOj1&e*rBH~)xsW2>7CIJZ<1ZZJD0V^WY1<XZJ>h<3jW%rKI}KBOeBOP
zBtxHlW%xeJ2olc^NFJ$M80m0$N^D21pWTZxj4drTB-TNpR{JE*0xrLhLLl!k1^G8s
zYv3Uwh!>3x2zCy<tNrnhG?=!_++qI2Em4NQpM!h?jpI8#*U2Ov?;gcc$$bLtT_x%G
zNWU-O5(+VqNOWJM#!=oqSqL8Ga2qC|iUj9F;tbe(_sO%+5<w6XG4v_biZkUQ-uF-j
zZnK@xzkU4T{}&+s_k;`hJ3zQw|BqDb_WoX_2L=Ejo%+90t#1J%I~yAlTW2T6zv1IT
zQ%eb349$0^`~#lKW=XX^B?f?8!)}=)W!HG1`a(Y7{7|7TMF5o`?WfFd9!Tr0_+Vpf
zYq~w6e%e)W1nXJUPbsD?X3tx_)vNmqOn4!w<Uy~pS{1s=Kb%`WubVR~PJeRh5)4+*
zy9Mk=`PVNly}Jf3ZezBucW|&iIyXMXdck|TPFz8MY|mwF8+?`4Z1`-TJNG!fr1i8q
z5AZYXVZ(yzsOz(=Uf{um*mhmqfY`yjigxy`e6)BMx!+~*Gwi|sX1s1Acc<RH+lGOA
z^)5&-kW&*I(aJ@<nk{|1CB5Po3i=e7eRX)*$|C+z^rKI{7yA~R&g1#P<P7{|a;he9
z*w-G^JI|$K=fb%oz5B_ur2{xB%-e_nYF7{c6Qsx4=Qr-9>(};0-zfO!90GrokYWXY
zejj$ww!o|zoSUwD&@hjs?FosI-PfyH4zAw`uDuTCt6})T2ryF;W2_tYOMTso3Nc$*
zxG|{}O6;JfwDH30(WTpONLHFcCrw^6>*xtyypwl29=fNNgRN-w4U`T{1b<a-$;g<9
zv(K#Vr$v@3=U~EzBr@$F3U2L>Zh*y7unvaOQhE=2M|jx_144J{HrBE8uCK<)X%q!J
zJ*&0wccbuu)SsWN6nNI`hLk>^wst#rn&TAJQRc!Z<HI@a>RPE~=@iTNe`qoJSx)7Q
zJyobxjEOeGe?E8uCJ7%PU|n4xx9QYjgr?uz>Ec2q!cP>YGDe2Q+?ef|YgJfBg71r3
zZ<O*}UNEy=MLvHdSz6DJh^_D;{=sZ>x6Tb-oZH8TeGT)e&izfu5mQk!vb3>M;_BHg
z#+}#C=a;CG1m1(&dDsQ`6dDCK7Qa~~y=D4*$g$8<3q+CXob@7U3}^CQfEasSl+aF$
zmYe&B>-Cn4g?JvbNZ$OQ-=3z*7Y2D&^6pi-95Hvq&OCReL1&1KQa!9R<nTVh2A7Sv
zmcBKAH3PMcwvMePrCF{sat3om%F_iL`7kFuoNqx=LV6B8cf8D{s(l+0{=!{XukdT9
zaOteV*zko`IP$nDWOy%;QxA7jH^en&#-LkV4lC;k`?pxU#QxmE@Oxu8f8ZLU7CKi4
zoonY&`|L|c6z-`qy1s+H8r@-|;_8tKx8})J4;7i3FypvmpEG%YZq9<F1sOwJ!Jt@&
zGIcy<UYKyc_Xt|Rs4Od*nhl@HE#xyxq}<^-%Ie8^`!y7s?*prv*-BQUDjO+2u0Cq+
zjI*Xi7`jPC?czI_EKu&fw;;+9A-cFE_d3EF`SP~Nx{3%`B?10$H|#Oi9cD%aLto2p
zkxGBTDoqjPCh&)x;8hP2HMqJ2n`JYxG3qZ{erre0tg5s<o2ndb4Jm5=MRW?`?!jyk
zL3xx3O%utZTmfE)r5Nsm=xfO$eNUd~fwtmNiVGr%R4TO(_RfKzPkr&aO%w+|M=<ZN
z&eO+p<FiH}#~h~wJAMwoW`He6W9tk@mDEzjl+4I6X*3Jrqj=I=dtEb{W+B&!!p9L4
z(_bGS{G4AiBxc;qIXMhp?zuP-&{VA;YI(1^Dl`g^r76kBlW0xrkq-9)=Q0d{y0G{6
zgp|IHA)k*=F!u#yb>0w%Mbc!(eCrV8vl5ZF0h2cDNA*O$0#U-tj9XOK^Cxb`tado(
zPQ+zwIif=(PLL>l=Cl#&#fp}Ox0G~g>-64yJZhfK%4s;~`AV^Fq7FZ5MCF!${R+(C
z5f7gvIXY>odB|`nY9*#xP-aA*c<xJKw8YoNBCSZ+{INtuq-Cy6a}l)<;e6jIdI1Gh
zQdh^w*akzp&x;aCD?&(Wmb-3=ngL1d4L(F8A0Yaf(d`5x2?iT3s-jaaJYTN&XYFLk
z3fP%!=@*3&Av98k4b3F>>4O)MBe=1<Rj`sX0LylR(tEQ-F}mmkc9(tdI15?tu0q3E
z)_$$%0&6AMBu8%Xe(4@=7%y+aAKJ1`AnZ`?PN<Sn$4bAVrLv?E>FxAp3GbM@h<+q2
zFCrycg1cBIrDREtr?Iy33MHV|iYuiT8YAoBp-r@-^{}G|o~V0ub<li&>#`KG2%455
z?<bfLpdUv5aK?PGm!Y~t$C*b=F;1Xo);6Mx+fHW@4I&9!y9A=J0}hj=MeH05eK_}z
zQSuW%cG)#uU<s!=bbIw89=+P7yXA8Nd*q3=dw!`ojgzSz?zQI@Axs4%Tfx!=l`fCk
zI8X;}{}6_2JpJJd!V3e-8~q`U>sb~^mZGzxNI7{)(me7(nZ=Ez$f+glG!6ic=fJ~b
z=(4nvt{~V3ru(^t?mVyiRMDJDI8YLc0nH^hKWK#v&$iI%vQWLkd&TwW(5Q`ok(XPf
z?)MdjL(s=N0e^dPN+iLlT#SBuEfZ*vPZh>hK&UlnirCJ0LuQWzQ7_Z-yfCvw+c=-x
z&`;)o78#=s<A9nlNl}S8<RD5-Xl)L<4<S2Bf+xW9I?e$FO^_N1!!KBw!u~v`V1*Do
zNe2M3G_RLwME-^-huGm|`a>7AH4#J~)H+I|aBB=2!r#MSH%?VE3-uClwYu?^Np-Pe
z0xFn`u?=@ZIrkPDJsY(xL{Yu2@=C1^$H@i@Co@?R)E_t<v_#xo!E~Nub3XRYKbQkg
zo(&{GB<Ox)R4qUhu!h|Prjy7QAV?Tez$3{4GR5U#yj+(PY|##-BK3^A@<*SYBPp4o
z!nBd|&p(CIF?X4?Cd-d?sQtfgQ*&h1UDTHkGzwPrQ|8-+oYM2h>}hf#&6yL=WI5vJ
zrs;f`WAcT*1l>nHrw=V!_6l^p%yG7WGbz<9Rk`Fc6gOic&vhnUzH9A89thA2ua`W(
zYTa!pKkkh?nQ&DL44~jAAiWSE#E6@xfq;N+-*<ldro64}POX&U=-ONB1`Mgego%97
zXKo#yDOn9LgHTX)k`Ck<ON*vkFWxQ3rHts#4jMC8lXifTT5>apaRWHN164?^n<eO_
zsc{EkLcpTmE$OQme!YK`&PAtKQJY~|G=&$WhpVTiE7EJd8&ZsN9R^)-77}q@Oued&
zAA?Vq$IFxwnZOAre6sEQvfs3M<t_*44kvSz=Pzgh`-D*`#^1tuNQ_gM<_vwDVS(|c
zF;3MU%CfZw=vU#xWKNPd@Hv|xoo*?-Y-IX0R)eI-{f%X;%!@%Vdh^529njc|>WjYD
zB#2m8CQFGSU<1A?TcGk#_sXtD@7z5pDaypfvycWlNmY%#CR*o(q0_8b8ct2!a#AF{
zH+FuL)Df-^j{`F?SGB6a>yN}{14$oqUcaU`t5}7UaZ<Su>#tJ%&;Bvl%ZX%XBvNm>
zsj^F%pD|Sor+~KFWuf{qluWaC4+x+^o+vKFI10k?LQ+JkbH7|4LXfJHdhue+9Ga4w
zGg#t=(_|vmEi^Xe);ybySGq^IGPo~_Wq+^#k&y_VuW+Ip5Q=$opwcYK>&t8{6zza@
zsI{1X$Z7X#4w``;ID{hUo=^V)+ZsC|fKxnW`LgZ-1u{h_P{1Ioo`Iu%C{QX8i!O27
ztG3s{dqmNVxvEe=)b)i9z=*%o?bC5QK~H$`B`Wt~<GwVc55AiznttjN3>ArdGcrk!
zE4D?rPjF;IJI?PSp#PTI#l5UieezN(A)f_UAC4*x@y92f`qa`l#J+bf@{o96<9K6X
zBGEpzK|lJ}gs>(Dx|;d?YWmR1PgPkWi@W_lLxYrfeV~PajlnXL73O?}ju2Hytndzo
z`0FGI%?}@)Xur}EA_0;bGyuDiV|Z%TI4Xb0Q*kgwkpz!7zHSHWGcMHNf{;(=_XZ`+
zamEfVkPxMQ<GDswN`p*ZD^X0XBz<b_&5$9<QN($9fe4$ka*V^$2R1!#s^03WI{>6?
zVQQEn2)f?eR#@DpL;(lri?#@CVn29X!>jqw!)e}#sap4@mx8GarC(WzK?k^OFD1Yj
zpxH#Cg_{TQNQY(w(nNuQ>Yb1=bZjs5`XQ5x=k_#8Zcb8=z7KkTs>3o4(zu75z7FEU
zkkWS!<*2zc@*!Rdg(NC6KtY)a#|>#j`wAiKL)Y((pj8TlgCEaON9q8lH+a0xv#ZPU
zb2a?}Gmhz{Nn7~18esXpTNT#ou(b-LG||F+Q$ur2avxZ;BV%06pM9rhma(q(^?C?Q
zSQ<xI5D!HVrK>5_J05I)WOg8ewH%RIfH@70>DCDfkW8@0ZxtH@O32=Z>r9NlPXoL&
z=o9e@{%wBF^z5x$#KP)^`TN(a=&^gn_-E*uUiHWvqhpg4?A3O#2k~%9&FMfsR)b^o
z;)#*a;)gaqMO3X0-gp?;+{-O{7)8JwUy`Pux{0Yz!kAvYrd;2|7s%hwW&e9M^!KgN
zzprHKHvh9IQcxw(qQwIM;D7}D_gU@#k+X0%vH7pMD96fL+r)Du4p@o-GpyV)$X?Lu
z#=uH3dZgv1m?mq(G>MdRK)bgNhx0kwY#1>DxLewG7i7g58K!Qogwa3^MOFT%>bhz<
zRf@0qgrz}Ct+fKbvQE=FDU?Pr;)XmW5oo4t-j2tuDbIAUHP0ZYu5L_rw#SUubauv_
z_Sejphur7ar9=18xnnc;Mm84t@y{GVv(dHFb9=Dt8ZI{u*y$}<_0O7wIfGwnqHhN~
zHlLr3{6;M}%p}a+N1JDx`#+6mTs?|Sj%uS<viYC<t)04kF>zh-65McQyE<LRrRDgH
ze(}4u;P4X^S!5_(v5b?mbJ)~3>F_yKod4#!*W}b2sdPHU3_HO2a0H9ROk}=_XLe%s
z)E3=_e|>A^dW7TBcnK!VV5PY8JbZ(F8F7Mrr}D%#AXjw8MfY2QbqskIeOL768zDci
ztH@5U+a~ct#~(U}`$}T|tW3F$c}dkyIj&5d!M`6_;j%hhnN7<ztH{o>({TEY{5Z~T
zmm+6nule3X_Di-~PE(Yreu;*D#^PxteTha3F#|I}t;kQf;#}rUEADNj_sN0&U3cUC
zZ1?mTO`A2ZN9vRH3Y^~hhWg}ciq})Pnuyft1@&GHFVQWiyV0RJ#gTR6<E^4+H-8fU
zk-uf2%+c0ynOlN$2P-2XiJv&9{VGohU-r`X^0}`i*W_ricSnxqvRu-))k`7$Nz5p#
zNDfE4Ddqy~7m>H)##C3&j?3LqQJu)|h!b`6oocyilaw#cr*>ooW~U;0pZ9H6NA#&B
zxYs+ZL+KFWtDE95y`UXHi9KJmX0uPp2geW36=%P?W(HLrCM_=26jEHwgjms+Zp4Hz
zwgj=CK?&jXxU@vLw4dpxIysX!2Q_PH5C@oByC?b*Vw8~TSDRNmWf@Ojzh%UXsPKP1
zWlMe@tli)=2)!U9%MI(E@wI$S=tL=fZRl{%pW0;IE8<>4+3hy~zYRNa8%0ns59CmD
zp@*z_@@S-Z)B4<{HXsu#`E?(vJUtwgk$>(Xc=@_tx9o%d$)c1~Fe|$AzY{RjOZ5=P
zRBY5dfR}3ianD1W7}{-@LThanGpR-U#k4cJ<UP82T5nt_SdGU$PrfsQd?7fjY5a2~
ztxh+gM0L$FjdxOTxRyh`EPQeVuPMry4P1Lz_KLIX*nQ91FypEC!&>J_%nXml#zFSH
zQc@IoC(phlkE4}dlUwn^7Nab;92axOAv*y$Wdb#~;c&ZAGCP+)8UJj3QD;MAt7(&!
zxMkRn@1<hyW)mC!L&iD+dpvKfE0OCrTlN#I!7p;VX5wl0=?%xb)K6f1*-zbWv&?;t
zC_L=(9W4IezRWlD;C2~YSsZudVR4&CJ!;eJ%S$9kvlMP*=bNOrBOR(=xM~|DirmW&
zTb28#s6OPG2yd-@9*m!+>^t<y>|VZXnuko|E~mM2p56R?OUtf>O0PbtWL*|rk?=fi
zkrBlib##1PV`V9Kjz`=d75bGTjjYM<6gnGYb(2+2oXBVbejlc7?No01D|cJ_ZMlE=
zqqb9wkNyloqF>ZF^`vxHncj+vrOs_wQT4Yhd7o}?R$MJ#U7Xuq=%M7w>1ZjXRy%|-
z6*6~qvl-0P%~aiP{Jz*Y%>8{aM^3}J%}2D+b(?m>N=Mwld`ma__}=5V-{$DerTVyk
z`QuV&E%rRAE=g&>?X;{Xdp9=h_Na4oWKKP-L-5>Q(l^5fspkTrf~QlUFt8?#RAexO
z_Liu6gpQa|-Grn>L$q$j<rXe#i$KffR&vaB@T*1Ph(N&xS|~F5DhC%GgR&u^Z}Qy6
z^}el{hSLfW(lUv;F=9$0inRv0><@Aq5tKk^|EGGExR&Pj%9ym_6dES!qjeJIqztI;
zwB!(2!8?BYx8Y@bvOnaQt2`DzyH7utI2M8tH&I(}G90%W@O6E#kI&=NFnnsVf^x^$
zh!)M}C7(cQUDU%`7S;h=2>`Df+vV|ZW0`*t%Hqr*(qH*l7b-QC@F^jGJRX!!MTg!>
zj740c=Y9I`0>0<PKS``<_NcL1M7+suuYZo0ZOt5drkY*xBSrXmG2s%If7mvU^R|4g
z_UiEi6CsR$#STWKXqaW%5cw;~S#t;^bMzqYa<%b0jd=02T?soG>s-`NeTd1<cS|E+
zGl?!QQG6i*CSF+O9*!kgaH8SkA3)5iOIjbh!=1fMM6ioNGk!_y`P#o}C2noD(evI`
zL=5mY<>0&FUlP{1GY!wVT>8cNP;cQ^NS0T2)*b$qS$bJd_;E=mSmDa)hsEFGr?8sA
zhvn7o#Zjer6Ex`Zru9Z%NIVl0861Eaayys=>V4AcHE<0!cA1oDlW-P%JGxdjIEP^t
zd!dE9(3RM6&cErz(#$g8UbY_t<K(*p@&ZR(!S*EG)7`-o^=`HNY?n06lBk)J#~JnU
z+GUxu5HViHO{&Gk$=YgX<gBmP<Tu5;papJ+;ak9sm_j3St599+SzcwvV|G+P?(O+Y
zJoH$&<?C7hE2yATmz6m~ZR4fuQuXRSH;Y8A^5OtW^{sR{|Gi_vgSM;si}>ZjHU|A@
zX45Wqi8@E_tfQMo2AKIr%`EFJ-Ch#e99eUwNiakx1t%%7@U+ER1t)c8n9MLAIQ9xd
zrfS}cBvO<*??cfTZW4amu0FPC=O2^p{jsMlSny9D$AJLmjK}-JYq_o8_l^lSUl=+~
zYgw{pY3IsPQpQ5DP5l)YBon9hPIZ1RYfzAI4r;0`wlQw4Ds)^G^9)K!V@nA)8<wZc
zEi3*Fo2qvy2N$e`I)Os@Bke6xzJCZuCIM2qx<qGGN59fMjy$3AbU1o+0nO;Lmr!xs
z4nvqRaqKFi&W02Hr7N9kMGQ#~yj)vj9Q}TH+(mUYs#yoV8y=-u%bn5pZr0He8R*`f
zS#(q0vb)LsI4DpMtr6!&+NKZ>I+p4p8XV=XlNc0xz81?LsY3F}=kB`wlhEstSzMjq
zAs>LxD|LT!J9vhi-yzOqZm&|m?a|O-s1_3UQQt!J{wh3dGNEp52`!94?rmH!KGa@F
zI%%;QP3JRz_n4a}*PXcMR2vKW;ziG^_1GJ{(0y;?AV==Yc-k7WmPcA;KI|xXuIiq$
z=JdvY`FUTb7ww2DzVCi{wHbR(UAFH?Yey=}=S1^bR@1WI1}7+r;b%0lee61n=H@Vk
zyC&|f@SKKh$1~Wp58T843STq=@9Vie;9q|2gZ|WmIdG-i_V6bXMGVcaWkQG6oaj@u
zE3l9$J7njp(Otf%C8VRU9d_EyaFQT~;AhM<cSoAXyH|hLSP#1={ERHk#?!g+SAf@R
z9eM%J&TZ(_3i-~O4PV&JXF5A?&$_!_#=5w6O+t3r<?k4j2Ez5Cz2k_-rl`-Qu`m7B
zi^7ltNP{_JD*wLfgXDRcYvao^q69vXurYwzA=KY4NC63uB*xs7tq%P&ki+fQ^!bB0
zQI*x$ozznAPL3P%(UM7)b}3QS3>E63{8Ice!y*fMmp&xC@mOb$B{zV)3!*)vIzk?n
z!eL&s2|-&QjKk%>kd)k;u-Tk&^4eKmqI<w&gx@$?PBrRMb-Y9JicLK6?#~<UL#PX(
zhTBx^pgawSt7CGlDWv2wg@*@xNx-V5RHNUO2+1vp;eo<{G%O#Oi)*cu$iohGlA;|^
zK50f@5HsR$45>SFr+y_)Y!%?yX@0B_ri~W~6T#{Ff6YX9pL;A|<*5XZiRj9R>^)4^
zoxIuJaBSCXm$~Xiq7PFSU+&BBQ?<q9_PO^KtP(!G-sSP+dUbnsdAS1h)Jbrl4NlO)
zDOS$6dR>==jU|_VsKZ@wZ!T=}$9opMorz(nNy$`)=(%@uBk66r$QI5GE`kTyU6tL}
zC^A<K@X}j*GERG&>JP*$qfC!(-{AB>FrL}4V((8YwXDR?EJ?()m@#_|J1Hj#y)@0e
zN~d%%J+>g{4Srgl_&E}m$*QywmOY&wzr5rr=dV`uoGnOv==kySGUu@`z-QQ1@`ay`
zbYx`ghJwTSZEi6+(wvG-z2*F386GYvJv!SOR$3W+i_K1N6{#HYHdV!*n4fc<qpzpZ
zRW3ReRxM@q*GA9~HO9dO1tc3LDHQ&-VZ!#zkOE!1M{s5?$S+6&W6Pa8fQ}n==}wW%
zj|DxV#@%^k-xc=brC>0KkF0JYg1h5m#nLRUlkY0~EH)D>4B}kbFhRK(KEm}&FuOwj
zV)^uT4ujZi#?0a<T#ifa-0<D4<)=tbtKpukXk+)3+biu%beV6uk>6=|md+)UTaj5T
zcnm;@!F|=B&*eOc*3ih*yL2NfjvsrJof%pE*9e^&)1CBF8#bto*;h>9gg50)fEiy=
z^WB)bN%!;nD%Me;M|S!y&q)C$?IdXytLnMv7&6pOm>u$Xo}{v=Uv}xmAX=8y{x}u=
zYHMDMFA{T;mGw-ay@@ZM%Q9+Ch8J6D20o)n(Fvi7sUwfQsdMw#iab>a0a<NS>8`w<
zGFZZLWuFpS;B;VL%yH`mZVvafSTu7&<hpEAKd7iEp_Z52y9{Hs{OCpPy;`UmQI}Pb
z%H@X`TpRf&X*Y<Y4Es^f+!|qq;bF44`r|T($F$T2RRe5R#m{P;KVBNuUtvXzf<Pnf
zXf}{;pCoe$RS-M$%cQ?~a%l1s;v{3d?RQ?gPOoMGekRLl5Va4nX<&J&V55ALUY2J&
zZl3ZaYI8p1>DoOK3xbCseVtT0WpoVkZVqr<arVbyc^`yPFEZb72c-tCsZG=oL}kh$
zrzLU69j|^LQ~0xiL)ydIuw!4Ro4s@1Cv|Gxf2{TZMGgPB``UF^W;R!U4|5?ghDlV&
zzqlHq_gjh5DDEVc@$_~#R#~}h*X#fnbr~!|-yj$S)`dioaGSVZ`v`GgA*A3lu$UAc
z@-U(iFR3c>V^wabP33gBeOrpz%wc8hE+wl1^<i=1ilYI~7NZ;ej%6xW%E3=-Xj#{L
z@%;65yxuqj2P_m`gAd97qgYoqbb*FgSbh{LHT@OA5zrLCbcJLja7iH!K2@f9Q_m7k
z5A}sI?hSeaNyA?+LZvjRVJ{)YN6;m3HB-8EC=It9gYu;)Q=GI&hNMEJIfxYZpu1sh
zCvERp{qpLNcJ{QMMt;17pMY+$<62DEAC|}g3pEgkz(}Ths}CrlqiOWqv`+lMkJ2L#
zt~EMk@5x-6iJ1yysdiJ3hpn4Cxn*-_ZQEByqg}KMb4pPrh(2t)FXhL|uASl9?~mD)
zf;s?c*;O~;nA*Sy${_0*SXsrB()22Wysa6dwjNEh<<$jeys2|^E2Ip<H1bPgM`dmq
zlklp>bCsy6uX`K+7D5l<Om!Q>2+_KAXu(K2)W7NH=1xZ2w6#@JGmVu9+y=R7*mGid
zB2Tb3=$}GONY6oxCCQ%%eAph<rcAk=#IG=XBc00D`fWXe7nqrHINw`cHOm(fS!Hkg
zGfJPSM~|Ao1=}_6`c;ld@bX#((-BjmJAu}}Q1=Uj?Am&)4!3ot-HZfpx;q=0MCIr+
zxY$r%1Y~Oq$HkRlAjlx-JKZF8VFOYjmDn3L^(H=WQz`V{Ub<fmwI6R=MLjn47$RNy
zB?)E8_y~oTKTW6>g;|B^uW{Fi=!^W&K1d1yV~X8~N0(R3GYu-S$SMv$PcsqptS{+v
zLq>xhL<BHOiPBGSae&HxQ8!9_?%MhD=VPw7iH%T9mg|rCQTi7A4%WP^IsDY^QqdmY
z|C}e35|hX?$JVAWQOkE2^f6vJn7;?50vn}~$Kz`1`xF42+tIf$av=ScY>S0ge)u_k
zj`s~@VUOY}*(ismz%cMJt}94FrYa9}A@B(#E={CkAOqtt&ZkKX@TrpEGV;Vn)23ef
zHgw9H6d&kJx8S`@1)Mv*Gn$_0G{_TrUg#6irEnN)R$zym9F~Gbyvw4oDmBl<M~H>?
z?zraVRag>q+*{PKF!Y@4>BRVynTVh>8?9H07Z^T<o|G#DkWd&bs))VV`>oKd#^0%S
z+KOTi9sK5UO~+P}hagYIy(3$G%&Zdmg8Ga-Yl_Q}pYxhPvyabBLTXOkM~!^MOaecp
zamVSHm{c{|MWhr2xPVd0pGEWZB9RpWWdZY=r=Y9jcHZRIann9M-TnSd+}R6AK=iz5
z{W2aLTppYm1qerE<nUnPK%{PF!WJ#5X9^<29STpV_o5%VsoUI{SQ-6Z<$6PE53FpV
zR<LmnV;GOXsUjm~(0J}j6bR@xe64W-c+ICiYfS2)tGZ1+ljqDOj`Q}Ow3Q}Sq6ep|
zZskRgGZ@nd-mnd$<qs1TnZwV$!xR}amLyc6lNZ6az-3g`N~_Qmi?*JSUlx-`#;Glr
z^m71{j5N%3#EQmT%*oWkA`!N*DKzQ(wPO+JTE@wjgsa(t)B7<fI#hHFZ{K>II{5t&
z6INw`5-8aPYM?3;1*^mWtB*1!46&bQ<s!9R@&&Ebnmw%iXA`M!USUdfPw0!umv%gE
zN#+nd*gSl%MLImf8<PURc5@%b@v%gI3gCt$5S+z*^K8v2x3+|UbI^_8OcV*_s?U^&
zT-e8+KOW9dWR?)42XhVh2-!M=!G-xBd{!eLOP?m!N~*$SD~8z8csUE*P(#y;uSf$F
zwxKqrWOWbyvn=0|XP?u)h54?#0niIp(S8=mU%#7)zl1N=r$#)P+XF$2c$$cbt`dC8
zkzxR&_#|(jX`fcA-=H1cT=I6+@t)ATk=41PMQ70+EgGe%IT;FPr8<#40#w5c1nY01
zA8^&5D-lr-wDXf~H81T#dPMDS#s|Okwz#Bs!03ji4d4`KtF-peY4LxD@X2Q`mi^qN
zO`t`rQJ^<liq^k_`ku_u51r{L?U&$gS+YwFMF=@pS$Fg};~JVysyojtjIO<JN|HZ0
zG+s|xYorMS%*4l%N+|7<n?cPRbdXxdSSc2gLA@rygMi47a8a2l1Z73F)L;!TUD&LI
zqM)wR<hlRiy4BDJs2~ZEi3nh=*M0PoMucYBy3#MTMMS;uv_(GlLbYY!fHb)gv)bEE
zdc3C=AH3Ec%!u$^^T4LQ^~EJWfr<pw<wAepVB_9X{R=3n+iUiC#MoKg5B;&o!%ttn
zO}%)ndTq<V(;UJ0QeY}yDe_j+{!!p1*g~6S+TRrJShaqdK&BFrMb;A^RXwwSRS-uh
z)p1IAPih`u`S1F)mTvL7_UPNx10y`p@-NWiX66YWi(~qL;MghISQP)*KJn3|sG~((
z&*yeYKZzG}eZ8;gHQ;FVM5@D^8DKJKi;P(h749e!jy$?tx(}KsGty8^^Ymot;73dZ
zUI`yzQo;q&E~*m_A#exXtPe?OM<vpp%B{^p647p3OWXYW@f$wQ^Nic&574d)RLCOD
zvWm)H+}Tf%Q<}l_QB1Z7jz+=H5lMxx-$PypjOuL?4)+n+DUq-=e*v60{InEf=Mugw
zp|Hw3i4t9oPMT9fXRLTXTZeIW+&wEx%{LKT#{X7<>5@L@<6hjqM2Xi(2f(ouYz^qj
z0>Ofy$n3&wco$+|KCoekuc{K%ONTnBPwTJuO-1giM32bxss_y!%#JGQGX>!$eI-?#
zOW_R9cFEB@XK?uD$_Z9`MEBepj`5il%ao(vx1kzd(yQo}Ln*_Ta@2L#4C4$Qnr=am
z=swN2j5I8aVf}esXysSG)r-_KZ|g~Y!*;%;FU8M>=K7dre22}11QS=Sa`!5TYZhUV
zKkGNIQiT+R2`Y{mYICOCYWx?9?(_WHmtS0RqxxU-Bjpu=SGL7jKw}C-V~-U(mDCCY
zoL0$=YL8jt!1ACT+ri9ZA~xNHcxN*3PXl1NmWP~bi>5_KvZ~#5*S!W9zVa}A*6vFB
z%3{f>`X}94Q6pBkM+&bxhW49?t8}{tk}{z8Q!X7d9?YYU^Yz?4TX?#>DO^XpKHtgX
zj&rYcuX1yq1^3!Nva&mTIu`e6F8%J_@7g^+Ipl&WAM-?~7Z~B6u3=8l+k1S29a=kI
z7tM0Pr=0T9-Ey2NzDp^5nC`A$^F5MBA-7>(x~unwoBvF51=%J@t2b)em;r`^##mu$
zuc*|f&+;?{-5gAgC(Hzb2<A9V6I7*&;H^H;ciqMBqP=IeI%Mjjm|L`h(b<3RhXLW9
z!kk7r&N%Z~l1&v)P%g`h0!0)4MV$r)m(|}@_o`~`akf>Y4QW}U<`T;l^M>jVoLP{h
zl(QcO@2~qc{i8V9NBRDyTF|R|0zK|+g9Cn3kA^WPX4meXOYy0oy4sq+@{c$ys{1Wo
zAx&%ED3W66puaMB)y$luA3y#j{X<W*pWh!0g7G8Y2Cg8{HRnqLT;b(>adBica4+CD
zptv|zNXKNGww{&Lw+eUSR=>PaL~X_axP?-LMa8N_O@dYaRnkfnv052D*IJWGl%A2l
z)N=D}>$xyY!uF1uEVp}huWGgc_0Pmk#SIO!B*GHY@h>{L3kZli0@YZ7v08BSt3i88
zE4P;X34#OrLJ&u82ViEE>XW})i&c9PsuL^|1}G>32^k5pX^2f(zqw;j2kPWw>H3}9
zc)~=GM{xYEBvcwqvaG6=5zf1B?Jp;MC9zt187#Yo-o*#m()@1*SO|UZFD49M>KWhF
zfK|LKu0Eh=MWSwWA66NYqEojce5eTu>lzpO9cCz>&}~sS^~#qw{i^cIr0-vf#>EG3
z>p`*Pz{%QpmeV-<rkka=CjaW%l6_Llb7_|+OfO7pFWjr#$8gfifNGf=*@scF=3iN$
zH#nhOlEN&(lLYB}Z}?xP=f?JgPFd0uK2E17>n&@RtBN*?R7S%Pg<5d^<DLLHA|78+
z1waN)6SW+rXZG(T>?fOgj>gZ5`tI%+xp`@+PnyW03$s$M))^8d0=)6$tY;WL6y_%J
zPJa~Jzhn0|Z;HRuDmp63b;L3XGQ7z0mg*1@v!CSjETJ@*dZAB3W~K4qZ)*psZ{I9~
z&Uo7(U^LY1L}8_>HE5^OC=R7{4>_UFK%FD&j`l=TXIPi2lyHHbmWubu7_0nFCtSDm
zF6`7-H}zDlf|7Q~)=SS43VVz_PStLj&8Z$thEFm>-=b96Uv<-5DA9&9f&We8RFfe(
z#6IOS1ZY`ej&(!f`9p=A^h@bO!M5g>^Cl}36se>COXe*cn#TK{S-tz_JD(B(>qUut
z5p2lY>8zL5n4{vv?Ece5FvAX!vj?Iau_`AFy7QxhBc8DxPlKF7z22~`ClCHf&FgT~
zQ3D(Yj%>-GN(I4WpaY<%I9J>?x+T<3FeSd|q%{Z_h2{^%CE9oGU^VqIRMNXG7^#0J
z5359z$TNM7SSCUo;7n2bijo3aD8|vN3=>FeRQ$~QPl=)KkK4cH#&+&x<gcd!-t`th
zSBYcz1j0AP`j9X+U5Fb3REv8i;yf7BekxVLW!8c9#?76Ip^u?Es~WHq4&OYEUUh3n
z*8|9}IO#gjdC}mA^EoYXSgO@ZFk+F^H*OlYyLJ2T1d5c50BDxVMDwS30&yqb85*HZ
z%q_TF%sBKUplaDqLtxe5V(9iJDpYLse-U8fgS4asPAFR%oQyg@ANwTocCSfPhz5Wi
z{1LVd*Hc(qpgL?ARKxE(-i^2R7|Utqff0ku&B^-h@fGr^9s?W(pu_2gckXWe4kpIO
zf#)U}WimK5{ipJwF#VyU-&l5Uk3LJG9?u9sGOi4<5TR!Qax5MfOH4U>7v^g+pFHbc
zUP!*hzYz^>J+(B~IO}%N@5H~214j8X3Pi8Ehz^BGgt1^ht#u-D+v%|oX<q1u=>J6u
zSgxlu@APe}c<e*^z%7bGS-Ps3Pa?KO+ITk+E6*WtL`KnrR(-Fc|HEz4Z(}fV#NA`g
z)dnm7xhdF*Mp1GkW-gm_H8xAhaVzZTEP-?bT4hw+y7$i*I^x@U82tK5N6T6AfU5G<
zaB+tg`Ya2}Cn;Ldtb-fR`+x(n8$30PdWj|saal3G9l&?3U<dS+hDwI*y*1EMST>j;
z)9pL21gr$X609rloZ1ALT$Puw)-DV9FraS|RP3_+&+T2ZF5I^E43?TB_+*Z61#5}W
zqT~>PllYp*=*N3EkKQ`#*mP1M+FoOL^+0a!c(xI9|4i)GAxZ!=Tg61Oz4`+F&!&w$
zzM<K^@7?F~?;GR${?FazfBjJZwwQHfWup2SP&%PM!>%tGB~qFt7ZhDUYYKN+f7WQ4
zBmIf0pQ!)xA@<r>U|^mUTk>vC=&~;)bceMh3@B(Rz<!q*IP~5nWr!ShGljup^(GWl
znlV!m%bFJc!6@w>eWH#O8a&b!YM0TXSUrwAlUlr2;n;O(Lk-<Mosw8CFY-C}2_Ut>
zw-g`h6&)9YV0?7~L)#DR!}P|$iHw|MOLYIC?|^^4P!LCy#DxBsakRA*Bj5A&UM#no
zR7!pGrT+o_pK4?2Lj1}9R+s0ydFXGi%73cO^!KoFO57A&AOk}1u>u6VfCxNRkp1I1
z@^es~1RycR>+R{ttJ9r+X7fhITE@kGZrQ9#N+6Mr^65`i-55(+6l#@1L^qIi3)uF3
znBlaN*13g3ie%~_?jozk5n}nS_d++J7)N{FUT3b`%!xWrXY54J0VJC-2YuY*^mk?E
zaws==WG%%gDT~bFLuIyoaIF9jQ`~tn>nF3B`;C3ycghvy|5QjmMw+qzTao_{9RF#h
zBRgR^z<|(+ehY7S6=|&1Bvgx&oi0#a9HJZ2DGQT|(kW?9`SC<`u>2RuqBWnFj4g)(
zN-*FdO`w6@J%*5mfVd~(9e^U^0sUi)*<N;}^YtYu=2_R;F~vyeTqMub5X${B_Unb7
zWf#hnBNYY9>c&fVw^A<M`)?qZN4ltd0}e0ZtwwuEi_cUc9MSJnmv15ovy8MmWisf@
z5JwywSH;}!FcK+>D?>UCJ-+^!ndAHA{$a<FXpDLGtNr*N{Ouv#p7#9)@fGZU1CseK
zkW(rbwt)f&*<yq~0Ek)Z%jz=x^L{a8tKb6Al9BV924(a+u8(ja$v?lagdDRw8X6kh
z4BlVxj3Vi+6MFl|g(%Di#p?!e>2?dk_;%omFo8sOTFJ=pu?7QJGsC|d-iXaiWn)qB
zqq#uuV!aGD;)$6v5ViR@t<uipv((|KZ#$w*w878Meus9@at}|2Lsck0i;`!jDkK^G
z04tj_M#Ckvve6mb89%5es;D&umq{j~+jU-Fi~{3Ct|*E#_#wtw<~Mk{zIeR$8ln8o
z=QVgvLGENyl0?aTDt}J%hQM6SEbI+pVz-C$Fxqw?s|;LxGq_O2k_vNqQ&PUU14YkW
zbKQOi6_%sszTpuPG);Z~sZ+pVq4_qyv3U{@j&eoL*F;mu*CpDi(P<(B@7%iJew2w4
zOQMU2k=7Wi;+Zha`#AX1y*O&TCx~M;`-&H{zki2@tNe!6o?)A&+Bl({gFOa)If6M%
zJzh-7#Vf6(XidpHZ{dh4Eo&G3KWVxX(iHgrL(_kl{on594Zpu*#8?TtiUqX^LN^Wd
z5o=zJmL=kTNF?Fo3VUrMH#Eb5?{sP_?i`yO%aGxkFRhoMbrtrv_3pjLF13EL^~pH~
zFqS((Ux`Q75^fcD7ELk7Z^i;M4GbncEnvB_C+cF8F}DfF5%hK{ca_pebjuDKsiw^{
z$q#^u4Z@Z904d3pCWHgJ6WXbL)E{hb93ELrInzXsAG@_Nmq&~w1wR-2w{APXZi!iH
z`2GICpG&Ybebe*>^gs1B6-@5B{x(mp@4kosx4&WeOVi>+9ozoD{)R#UUI3|l9!;SY
zz%CoWT$2d2)UX0F?U%RJF{t?xIQ4CutC{IY)(7WIqkK&lmdUGXv8rrSEZ2z)JQb^A
zUk2iXCbEy2olrMaI5G~nVL_YeYv2a5g@A{ANh<6s_+?2O@xCCJOv;_hF8ksiA?u&C
zDvpM^KGF}0^qY?q%f^#Z_g0<@*i)q^v#SOAIAOPlhfuZ$J|KFjcR!k!$Wd`h(Ka*L
z+k4sJmorX^W)H?kuC8?U<~>btnc++743|pR&+^n?@Vj2)-qL%0WOrqeT5BLT9e`S-
z_tqyzdXkw@pO|@9{>K6}Zx+1|{D1qef2;IA{Fj(ZmOkgtCYZKVFi+H5E=G05YpP<x
z`<<d$ExCb7hB(i+ee%Z`<`_nEic};FF!VP@99A4Y?oX)bMA)tzLjhPdl2x{-U+uqS
z?2?L6%r?pIX`qdUeJXydrk1BTnz-M*cOR9X!R&OQqUvRpE7FgjM>F;G-nx#m{`{Rh
z??DXGhU_Ud;XWKcML4}(B7IyGKES|bUDV+YBay7A+OPAM8OSVgf0=<J{*M`L{~Xx=
zehlop_2q9<jIpDEn}w~}|7=9ItF_X{{_d|7Wd#7h{vVRAHrB%bHHwk$`}rbE%5I+x
z!T$>I6(8gxMOOUOV(qq2{7g$Lf2(jLglLY9J&{7ZasRw#hmg|dqVW5CDk=&n@P-4$
z4J&h=skm(FRh-Hn9Q6$cg<o`SL)8PP+9;jdC6`zD%MGh8Tp3Fus*#z+m&|9IPZd|T
zspWW`XLN`fr<WUCe{OLr>TOkTI#F|0M>Ximg{|3m%p_8!d`C&{_a2ur5u31X2G>Qe
zHQ}0Ki7o4lrpF5cF%<fu9ZDVC%Z3Z6ww4r0w&<+XGkBxAoZ%q3#}`dN(_XRY%PTpl
zT_U9elPO%TR3|zb)+-MCSjh6QkFJSa`0*Iuy^~RtR>nfiqhkHQ=9G{Oqwjy631GwC
zmt9#fwMlJ35o<rLOzD{AJ&$fK#q5_}8dKwC+(90!j~ydrR<|0{Mo($0U2tx&QFBxj
z=a#xl>&OR)J1VTuq*+#$ZS=FNSa`TGg)B`?F%&JVIVXb=a^kmG9B!?7zX7rq?vJY{
zo);h=>eh>lu`%_<Xp7IC?5Hf;dhjkB4#w?D>M?EB8hLjlK|UO{C;+78xf8lU7L+Fd
zUp)iUw|UFDc2cN!+2_F0PiQ9>%51q=wUb%t3#Ks6LP~FoPkEz5hR2Rcq7Z~y(+}}e
zNtUUNA>aFoD_h0x!DKG*&Mj06cV9)92qulocUxLwq|`&h6un|k^-Xn-H%nt;ok%#A
z-Ir@3N>JX8I5|zWgE_BI;VHQpy(a1p!tX)cEs`nN7wZ-WDmFtK{b`V)%uUQOe=$>-
zsMJ=@NpBJ|O(b;(Y}UITkk?~npOEYh0TXa3hi6FYPTD?!zF`Uf!Q=eWeIfF(@)Ail
zA*fP@4HkfI+Z)M-jliGoM~v7R282LPf!>ELsR?%k2#OtCAeu6kIxsNM2q1FG`ff1j
zBYi1la>Gx7_<4H`S@I>g@iHR>#64kjo5WGe-@3WgP=Bs3G;+XBaMyY8#sv;o#Xxk2
z5I}?|=n(CFKzkJ{;fF}|NtLUrj#@^4{lu&8p^{bj+0p)b9NzfM65Vw@orji5@2;E8
zhyKaieZtdO99iHHE;5K<M%j=oBZ(;(ESkv_DK0K%B+8-%);7U-SJFQj6||8?G4Lxk
z8mZ8(IbLIwEF`+n-*T8|Dm^T)&G0CzJ6absgCEsc@EXbRxULYqFC+%lrjo;^5<=Sm
znff*SxhN++mO>Ag8cgX1b@(Pq_B^|mfFYpx!EiZB=5z>vc0k?V+dW1*?nUjT)}Cs(
zkb<dTxAkE)d;I~!8gf-9uFQ=WfQf##&M^FFpAx#dkR~&-o&%?+BHRd@QnS0{h*}`Q
z!H_UG+PD!Nx@Her*1u#;g~Z)ndVf>>K#SdUE8Wz39aICv*1CF2J-YBFT|~`*Ef0nn
z>Ha2fXfik9BW!w<h$EjZ9WlCaZkwhXtN|icj{MdX<>~Fth_L~&C<fzKEJi@x>Oyri
z0*kLmiQBVYM+=5b+Ogor3uso9fi<uwHbb{yF9ams5e60kbk|&N^a)J7nh~2uJNV7D
zRJzB#H@qL-8n1WM8n!;$S7b(5C&s2mz8hSR2a-(n_)|F9#FPKj{B@v@ccc$C(|Wwa
zX*?pDk*~RsD1PC{&GvbDaNKB~wZT!+bKtqi_c^plMK(?oF2zO2etS!>7ks0*pJOT>
z_;bb{_$0meK$Apv+K+plU7v`{Np=b0{LUuGH(z2A&mck~N!_K26?E68`-=RRyMFnq
zdH{dp^8fCA;r`2~y4g8eIhmW7IR8)o%j~hi&-k5YR%8BGsr2os|C{W;A^%FHs*DN8
zJyDd<r?PSG(#Fy8p!i;W?%qXvME(6bYNs(xv={{qMP^0deX(@q;zW~Nf{k-UkNim<
zG1&^20y#GcUE?6S50|{6yRWTq)9Y6!6fE$>pMWu8C0mwRU=cp;aC;*KFTcCqPWxFD
zkA|%1e==fb&XBO`#dUw#+W#C=Y9|fGWDv@Ro1!)9pq`NYppEEk6mcU7O@);qit97E
zH81W6c0a3BLeZ4%*lHl650_!=SGi4OJTXOnHkSPG5jG?PlMp{)9Od5v>uAKdxO)}Z
zuR^|pn9)3NO59U!Ox|D9A?=~KM$VjvwR&Nx7q&cA($!3xcM|Pp`oF6C>ae($q;K5a
z-GjTky9Ey#BoN$P0}Kwq0t63%K(OHM8r*_A!JPyT`VC?Ca_?+*-@m`}JPe1S>(^aX
zefm^&S9QncX~(@6Ja8{HS|Ju(kU)dk=ks2mAnz!2jP7~0SEr!omu~K;M%HML&OQ4%
zN?JX}bJHT0&f$Z0PCLIpz0=OyAe)d)!t33`j-YG&pKN43r|SzwOPvB37$4%ew7WU$
zph;+bS649Xs(;Pkx5=FP;1Kl&1$`fNIdCo;`^*R@5{jmUzjoRbw)MKE{&^JFtjBe%
zlEtNS+3Wd_o|ZTle%U?H@FcZJ@jF#yV2hQkVSN!qvT$!DDn}rHaR#Y=9}k1Da~uzA
z8liZ@W<s;KZ;ss$nDLYu>+Wl8`$^ltyO;zCZhhr)EbPE|H6z%Q%DmWwNOZ#dHl^Kv
zwzI7~Y?ae&YT){GK-jI;b*yv*Su21MjVN;A>dQ+BWYNN6OrjHIZblX^?3E#LBHq46
z(XIxP-1LPQ@$|@~#n@y6+WzM+lXc1S4d~|V8Pg0b`k(iUDk1#rqv%)J>4#O>GlF+E
zR}X)FI7aqS1`<a&ALaw@FyDoK!^tIttzml`E2hUaA=o9;I2e6!W2maP5wQ{d6(1Wg
z35A-P{(Eni#xs6)Hudll#X5RNW=!YA?Y=JYXIJdy8sUg3bz0#HDRrvwgxIdRVk(;U
zTyXSzddPvhUZ2GZJ9Jm#ztOWo1LMfrr#p-`vJ2f&eyk8(GG@G@6#EJh0L}nK$Bqt{
z+#nuHHNGiYxhYx#Bi2961sCu3kpd;&tw1ccY8oP7qs+$wR?a&I_!k4Rnq1n@A+Q2Y
zuOhKnX?KE8-(-)`WQT+%7;yJsP{Ehi$pa?5WXa+~W2^oRS%46y=~E(B7OK%Dw!_Q?
zN3fwwR!#DaY(|0r%3Bs-gldVlPmZjXsC<mJ-7DlW{DOH17l)H_z-1JP2oZ`RX}k#V
zdPS1#hYkhzH={!MAnF}OOgQYBNwWMlXYBI08AR+IGUO!s)vVAKoZbR6e%ZXV*QU8*
z^v}90awt&bFTTKt)h|p^#G9=w<_)voOv>El?UFs~frw)SB&5SZ#nUs5i^(f<1;p~0
zr;5r_zmO{9*&=|_la|67$PY8Y517!+!`HDyv%$w!Wy}#PGqXbPWB;-Dd6<1PiV2WF
zgeX~oq~=5st3F`#>~pGUZyJXZ*}BpTK-Pv$2}M7B4h8XgVQvI%61wacv(&gmnD!)9
z^lo-w1$j7ejF2J;dyJ4nGwiZ`<)TyrWY1_Iz)4>e0Re%GHt~%^i7&ZmLU}uvh{clP
z&}VBuGz#w16#G!GJizF8^N<6X)FWk+0xnht*>8#zPniFSR$yQT1);@aFew%Ang9yg
z3HsZJ8~kvb<CfH#9GvNWu;@x<Kg8P#tlph9`q$`Tr;OX)m(o&1rVEJ?E@KS+UqqD+
z3X-J&MwJs39gHT38vFC>-W3V$=k&`$*w>pr7g*^KjmBc>@j*<P?o<In!!(ZL^BlV{
z<dkJO<C}U()-#Y-+ri*Ozx?XHkNYA?xr_4G_ZhK%8$|*nq=evxobsYHU((H^%h#UP
zMY(&sDSVsohKg#9j~vpQvBpRCu)7|ulo9cnQ$Jt3?z^20-MO7@?7P0^sN+4o-1tbN
z3#%25%E%Xf&p<cRq#DoOa{F_7c6#|<@cQC|Ve9Sf#-4p^6B%&=sSV-L$}juob@YDw
zAXCU{n_44$*{&LAx8ZH(MnvliNVBqmm-@EfN1_!|a4x+h#B`Z4_5D?{hlcDr%w$P~
z?)mO}+GR@u&qfuKtV46Oemx)97Wwb5q=!9ifUg-d4;M2hJ7XI~OFL`Ow^HbNrCuCx
z0wf0LSyKF|_}__tzn|W|6H|Z?!|FK_TQ*MdF40X1w?@N&o6Wrf0|+!IG;*+WgfEJt
zQDTLcK{Ub<^J`#9hvoU=n^)V9l@zJfMJ{gMF1@+4UrI~4R378}1d**sqYeI16ipV+
zJgjgn9U?mBd@5VigvbP&0E!YiogBsuqlm##V?E5oVt?;avO<Yf-T$knF!caq4moB8
zSRbX0c|}la>^%8UALDa$jg$~H9%McuBxzI3T|BByzs4ZNTa#0fXsWbH4q7?(&(s|Y
z)t`F(7wp0;y)wbWzq)9GbLNKZ^S_&e9hKal)}<Gf0N*)nsoO>MM$Orb+~)Cb$|@r$
z)jH^IC_%oy$eue>3G?d133u|II1I0iKbyc93J4u`B|t5aGD~^eb7+|uIq$eP#;~~K
z^J14IT=i(N!N(y@e<`vEZKx!F$s@Tw6z@$!E1_YwALXz}_eln=5(*Vd6dydFprjAK
z8cjTPYJx9Ud;PT`$?<n%(w4(c9-qrR)R@a3Or#`d-{(=>W!UTuGkuA+KF-hTY~SQ`
zq5gOMKWs|{)W3^`nXMVfUJ2-L9e6^4)<`_i6?~Xc_^a6O!oL-Lp!3|GPztM+@+Yyh
zx4kWm8-a{#;Y>N9#kh32-o%=R&{Bq%n>iPU@-CrOjI3-}pqH4qJ}+688bg5OFHSnC
zT-jTMrj%KOqb}?C1URUPP*e9!o}+v3lh^0RiQecEu#&R<Ye_E}wa?JPeTB0fN0050
zy>O?5sIa4Dl7A*wHQ~oCB@VeS*GKVa$WjCv`7-i23?L@TbJJt*bh0MldWxGQ6FQ?C
zLzhzIM?ncgHJ}$2YZO^0!i~>Jutodt5B*HOf)cv_`T4z}&@ng1t^o|A$+I*65lO>w
z>R}83v62-5xp_q=6n|Us26zs-2vMwezXXy3G$vE2mETLV6DMf<Pm#&G6*Xlg=6*X3
zF6|Z-CLR$+$sAwKc83(^`FtUbtk9<q+yoYNl}EtJjsSUDNfbj~_RUkn7C55kj&W|E
z=klivd;;!v0|$rP8W%0{Dzvj<OiA|;FVA*$eXYCO31ho?aDJSfYkjWVTtV(#oyPgm
ztT0sF5XqAjJLXa0KZSB|rnbvxIykFL=R>@*H;E-IBgVNtma7xxS)A0Z8+7C}2I&vO
zUThIH!ASmUUjonFffEbM8pZ>#(h)^`ywMT68T^UGB3pOq8a^Z;dEEPUe|~x-Gi!Q;
zgmm$ZmkThi5EeE1@QuACAELVh5f*OC)z!P@)=M;Cn!vydR1t=r0gAi#N5WorC!aS&
zd$Z~#S%lx*?2Y%Zkoevlr-eTI!4wcGb?GlHnN7Hkc5njc!V%*5j7Gw-1{;8;ZVrtS
z;0C`_f<V0T7XP(b9k=6KcRkUcw2&_|mRwB_LQM)S;J2)e*TLNR%x#(FksXvy=(Rmq
z-vXRAO?0~sUiqkeZ3MjPsa~{%(Bwdr?>Wzi<uy5EBVr(mO{V>{br}?kad4=zJLCHP
zDo{aKYa2z%R#8ETJxhWnb~Dn=0?IEXaR;fy&8E_uu$qry+$)W!PF5CfP(FwhZ8P~N
zeI`9oVjR&*7&fvWaZ#CK@15eMl>Q6)A1<PJGK=VO6*fak;8!^5e#jEd^sd*;X;RE;
zwET2Ap2W^CCGAe+Xu4F@LJr1x8BLboVZy0Ko!8L=j+}TTP6aN(dlVLZ(=d7#X>bGe
zI2|NYVZ$<0A}|bJK3fQ1zKeRx#-<4)-=JPVNpP<-`x<MvX5&YVO>Gm?Ojm!r`IdJ1
z%aCi_q-|Tlw-<1x-K#ifHR7#bMwi~Sh_eiAoRkins3B?U@=(=Gb7#0$RsAGU|9WNh
zwb#|s%O0L^G#?h=mBHs4e?yiO^dp}GACI?A)Tj~$zC<{Sem|5qprv`!k|ID+4i@8?
zeuw86()({y9^1ExLblwed|g^7;gigyv)?s^(usMT*rf~G(k(fpi&EPxQ@*^E4-d4n
z?<md-=I5$SMIXYF3e_vA;4bHT59o8AG-9o#cHxG*A}>28NWbQ?<~=f!mYzGDl1b5v
z!W|x$V(+4izmnZ+-p)yNRKsCFjIa>0Vco7=gV$nku(qmr6EJU?u3`GZu!H$z$?&Hd
zPP;P;c=g&&@2ZJY1s-ut)9Y8c-^od7nCy{c%siqnel$!BQN0G&=lvx=QzpWeqLeh!
zJUfWx{4NWomUQY65kbI}3|-Ck75>~PtGXpznYMGqaVck8@m8*=92b8ER?!|B(tR?P
zk<4_psUUR=J%1x>wynA0xLW_`h%i51en|;#>}$M*QVHx+d{uZJl01rS+`7#1DQ<N;
zN9A`V9(1oA+=wcvx-9D_=~B%qg3|&M28@;vRs-yED+o7+OCsccIjiO3eZA+<JoSTk
zfmTF$08UyG3N6jAu1kVTzpfG-AA2PP{uWR`jK<{9jW~{`;UH&whv<}bKc8O7si@m2
zr*<$729IzZhX76Pt&*g8nju=8bZC%7r2wB2Mjyt2$#5P^gvLF}<)CzQK!QL%p~NLO
z;NMOhBXS`$lx3WimTuC*m&my6ZHscVm=UA&TBZL>XWqxY{ST4JwXi1!lO;R@mCd#J
zVVTLbI%kXw{yN4C7I~KUAr$G3Mb+5WVrz-g)I=VRs;}?{3A3S?<74k2uSv0mDXF8*
zLiks%RrJt(T1UIcY{&{(X1+r1Kv=?tj`Gfm5h|u8aM{lBTNat6o$<Lh@GZakIu)Ps
zb=jQLZ7IEeKV4t&`K!-3ZZ@_(NvQ;a-hxub7U{&MQLhF+SFH9h3O6A}FNKW2sONpk
zwG~Z!^V{Qo+D`FCPMC545e#g}01OQI@xA5j;%Q^%{4fW8pnq>a{}ub*x9tu#u>HJK
zMGs5LSAogA+y)%FAzM_?sEr%1N^(}8PBk}w@Q{^EFV%A|7F?>vi~*t8I?halacK(K
zdX#%d1e5Ok`8DqOVf1?QyJ#$?G#un&aI5%L@1v8Y>xJgE#W-so6t8Hf`Qwn9dHeis
zO(o4@zxjii+vbp~#e*q%CC!kQ*`ws>`C5YAk6aq|#cyt{ttIY9%k2*Il1lv}?#ezM
zUf->FePp#c(i3v7+eIlBzWL_boVn9kQ2PQ8MQ~srDO_cQZaY<r?o_d{lpT56`+!s4
zEAL{;qU@WSH{^J&yKT=oY5}w6PIYN(&9u9a^E7xq>`S2uq)?7RZ?}2pP`hbEXieza
z`Re&8qz;qZ6`fjJjEshcmMPLRU7<JmRg0?2_W9B-a>lJqQvs1un#RgJizsa~sx7(t
z-WzjVwlbcHME6F}AAC;^;`Uk_I_^dVTAFk6gGaHZik|P8iQnJuq6@UWe(l$xW$tH+
zs}?}31b{0pu~NVLi5PfJ$P=HznRhngpF4(FJ#s^EiGS@4kt`m&cvf99za__P8EfWb
z=4*Qil%Y#j3YKNaU}pDlPfx4g&wcRvsU@Cfa!z<Ry*{#O1Vs<|;2PaLh>|t0h{Ypq
zSe)3;%=E&ytG80v7wXwLLtH;{%q7<3{22ok&UHtv?`Tz75T$gxjy=95R9SNRZu@6_
z&!Cfo(6_{sj_nTvYoKzM@^U0+fm-avw70MQSfk6OG;XC^xINyuDS*kxZe2VHVU25L
zgDX?=l@fQoAfdmU!dmgfpt8JL=c175m?g0<p&V;L;w?Yn1(Y?X3&ai!Z*m|P)}W1#
zf{|H)rj;f`N-lCeP8FX?SpxC%*T^+?T09&!?ic{3<7KH3PVzVJw-!^p?h}**+u88C
za9Fr);lJ)GZKtwSVN($lB~)H8%1#I45T&T6d^7pkPb!{4aQQ6eOAtERVK&EDh3#RZ
z(7RL20-0KMUNy@sWn|-gTLI<MAkD(qb{>5lL%?UD<>M<RUe&<0gHG_MldB&_@}t-@
zzRrhBCxe~Y$^$O^0U!5fhyzVZ<z(KA`TDzQ7pRN414y&Qe1eg$(O0Tg@@yRwu^cSu
z0@0!;5N2mr(U=E|z=R8ghp7AXw_haHqz(;MVqfLft}Jg{x>hRQ8AN}1ZB{h7&_JZu
z7PY%EJ2#haKGd0YGs>MnzDUUtVgC-F-nLq|blSRG{)<)-Veab2JPY8~t3a!C8%$y#
zwXpK;eVb3Ueo11a>mu#zMPt}=mcF5ih6PT_YXv!uiq-_q@i>q3FZC7cPv5-YDDf2g
ztZLwt8h4xf34hj5K6Pi%^!y7x&P0-K+F#$XSi*r>$0#WMMxngkQsnSe3=I!3nBHp>
zuf||(!Rc!tYOHD5)HWa`p%U`8|8R4*2<}<z;!B$3Ha*Q>Q@?et1dD?BQS$P=YzG1l
z<7*y`H>5Z@#;E||tu)GIWSlsJ^*qt*1#dbS1mnXmU>uTs7cVyl16!b;=MnP%k|~_k
zk9qxU^OB2!b)-=zyh=C_vO+PGZC{7jdm<78UkJ186jON+buExOo7+CNhdIZ}LNllM
zh4$ch1MU_1C|ekgVT*j4I8_{Tuq>GwLR^>BW$*i!->$-_zWoY=QUm@T-hI_W+#Zq;
z79&O-CoXrv{^Ir2NIeC{^R50*XRgZLaq}M;=3R^t8aAZna;tLT4t!fiIu7Jm1HXYM
z=G*cOGpVt+Ouy|{4Nk}bFwd1i>J{{6qE^s^5p^`VI`qn;3g{Se5PO#{S6-D_b{+dP
zDsd-QeV;hCkntVOP_rIo;#Fi31nb$>9BgmnvkCCE%E$nR@~aUWpc84uUtM{>bgI8>
zb?st}=Xf-zvSXLY0FE8PU|exbqS4U0FAp`Dr8iXqr7@vx+(2Hg+RV0lp!+iN(0biA
zH&JRWa4FIS0+3HkA3g6EX@ixsmm8ci3|<c&Gw$g~Ss)-0rrHURSW<w9w7AE{^!OMv
zkcBG&_kH#(qC4T#`ndlc-Z*1#dOY+GefJHrcg#9CX0(%XgV@>WOwPv6CWqwl0dE;2
zSR;BBu-3>R%+)2Df&y@ND~$SBBIq@^mx{!3)4=#%ac7s1>|((}!@Y+^!1e>5P@@d)
z=hYx3p{r|YlI@-ptIGr{bWOQ6(EkqD;|s{o5r@t9uzDx(?(P1Ed6kQgIp3t|ot0Iy
zcVSq}*Rh`!l6)fB;!Mgn!UF$^Eeapz9D!l3?~&K1N!UXh%qCu<V)7E<jIWk9I52}D
zfp_iK7a_Q3bV+=+Yhmv{>AnO{0Di%{=8!(FZSruN{2us$XXa;(I_v@bkWN{i18`&l
zKU6vhS6*b%&~Mb9lf6tj*7J7w3ht;3ECJucCl&rs1hV%N=CF$+lN}T35^<w;L1k%?
z#2YMTEt)L8_e&l$Be~q*%*D>kXvf8)`drQMV*=*%h#s|w^Vfkj&7%g%j-Hf)iA>vV
zXMn@(YeTeOV+)^Dv{v}X?Sm?tZ77lSKRdVDQ1A=R!%=H=I#->lVsSU`H)h_V6m#bg
z^H+MiC6Ib=8qM?j`|{YBtA8U9%0hvp(n#+GcSs`7i(DY@VEhRghf#~Yj&__P9!-x4
z_5S?`?Yid{()ZS4RVkF9^zTi=FU)KFg}%<7ROfgK-iR<ZssB(^0z)PbMmO!sYT^Xf
z^~##bB7VQ>H;~Usw_d4$?0R#$)3@f{xkO4^Ay7f=fee@=$eM_1yKQ92TqM0e#{Eos
z!;J-CC1X1e&v588H`-cXj_G;+1_vQ_uJd5@h0W)KrkgcqR`y@sra>+j`XMbW{7f9u
zGcBdJtX0x#U!UzTgpJ1|k;Dg(gT;S5svPRQ*;?8GY`t3+L^z1;!?O$Zt+Av>S{o4-
z;=9{D{r+<Rbx9~CiDsNiKeNS^!`At@v^A5Y8JB>H14kbW1}EMpL-5zY<?RJ^t4ONP
zWvxo|AUPSIuB$ntuZzpoPf~IChS+e)Pv1p6Z{YpY7Caesa|R;yYZy0rf>PD9VWeJM
z*QuzcR*S;TbiuH={6vPA8^?hI&wwD0BayOPf*`NsLc{pgz7(H{^1bf_cFi03E{7&^
zP0oC8hA0^=98(+?<zMfV();s+A&E$oq@?opsZfNa(Yxn|ig%|LKKJfx-tIJ{zW99+
zO{2NtCJ7(na*L1!PaCR~p5J+L)}KgOL@VD?qSJl;8j~q?5!oEVoZEF-xwh?B-ZPqO
z!`pmvg8Te~Bn1Vo3mxuPLqEUc7~ll?&v&7pN#A%J7d_{*?Hebs(ru_h-<#t}z<#Fs
zwii2D`5o0#a;fw>>Y%&agvMqOIv+Hq8U`1Jd$t?AahocgTsSsiqAYTT=6aCvhuXAF
zHye$*)B>kx8wS7|0$%ZgkfXy-k@6EiMe(4Z)%?Cn2LmDW;+?p;>Gj|CF2pC)m#coa
zlH4Rv+mx@vI4gb@SNX$MTTc5`PRC?eda&;q2G?VOjx?%=+wcidJcmML%Ol!wgmo$p
zMqEB?Sw2ES>=NnqV)6}}hf3q>LmpAdg!7^Cfw<WaScusWg=G+Q=8v#U3XdeFb~^nF
zGx-i&6wU)`n^vV30<b@riiS~cUmZ3%std-5Q@!oeca(aO9j#0P%)RqJjBg-Zz~+Lg
z=V>=UXIq!((#S$5^Q6of5s3IGDWs({mxT9;_-f;THOb7<86Gl#QDI&JXi0pMTq3~p
zLCMT}|D3l20L*sYP+G2z%)H6E*wHp{Uca0li$P8#eG}SphiG8nFr9X8JN=oSfL_Ee
zqltbPW|dXv;tFl)D5SN&w~}!8c@1~r+0d7y7PWD1D+cL4`qGI3!ZZf6oci+m*+T1v
zsT(?|c%RFgE3q-{{k2uyW)*W%(a?gUY)9YiHk#h%wpp+nCciHqvoT>loGwHA58XY_
zphe0-tt`F&J*o9%^}8*x3TZ)sfjI-0-X6XM&Q>{F7(1Dns=GKnggQJJ|IJ$S%7ibl
zd`l<Zl<5~drz3f{5;Vmd5ml%yk8UM9`C=dwiorush}!C9cKhb*HNs%9@}X|gu=zG5
zID{5s;gDJL=nt2{`#<HRO}}!g(r*kcT~to-Uq@XZe)!Hd&ml}7Leb&BINLdE+k_Sd
zCDHK?{5mJ@!<)KSwc%{7Xs0n!P=0N9>9_~(Ld$+@INiy}&d`o+!UU0T@3$So0#t?f
zalI+ln*1|+;w)xJJ6p$SP#gvOuK}yHBRw6jqF+Wk^L5<C8r4EmV2UGM^Isp-z>}Z$
zKpmMS=`lC<Fog&nCWlU(XxOz67tf=g)Dbs~Ib4P`;#Jootk(>oe4(`tE;vj~9?^}O
z)oDIzb#@Xl;NF^;VqU*sPLp$n>|M`iO#8x^Lf>P`Wqn?*#u!*kAG;i!E*{ZiTJRbr
z_ZUT!y3YwYm9G+2EiG%%<s$AAROp_HfK{n&%1PBa5#52#7cPp=SsFB@Se2-9WHQ*1
zu68hm@I=K1=>a$Tdi42QX7oqaZ-aE1fa_GKs)b}%pAy<zfAp(~>+w${(VHGt31y58
zn+D+vm*%UY7sW_-CEIJ;HV1qIFP1Z)H4#EvN45PHs|QmcHx5rr;>@D&>)V{Z;Q?H;
zp)L(ud3|I%oq@|-(%QOAB(Pcycib|4oyGL5?`GLbQ?2NJ;bc})IO2t}5JSNv|7$$+
zI-9&nD@Zl$FLnL|fNrFT_eNKewv2>29?EPO9{oL8s`e~3`A+gEYss=B({krr>y@WX
zvICaQ`vZ{iag&?WwoE2qy?eUnV07Pf1vHqgj}dO&5sq-2q^Pko(sMC0+FHNypVkK>
z(k;a`ap8O$H}zxb6+wjdSpI>03wuP^g%jl;P2LtGNV+xjBdeDw?>%f;Yv7!gb)(|?
zW>_37YK9Qckd|~V0KPb~?hQPA`?75bNy*lW((!afEpDc~XwEn@X--VI^;Hddk0WH{
zfTjrCD8`Za<8!mhA93FVSSz3o4ycv7hpJPc5Hz$zm&D^<Ap+R)AgIBcy^Z^`=2!{Z
z??aa^rrEtC;$`eaBDL1te|)9U(JaDuiVxE7_pzJFFFGs6^FdI8#|}1^5Zdc<Jr5wi
zW>MU$C=p-?`eZJ~+;&Ib#ZU~Zmfg+7kVv=Puzg<=jaay-E!BMPnEr0B!jk!uAbTVe
z^C#%#ANI8~@*P6}>aL*vtpjWwA;V5Kxm={ZlPhTbOie<XIaLYZ{!+xG7taL5wuDX0
zV&A!V6LhzcAajRDJHBjpyNt>*ntK+0sLW1D%jFx#Dk<y&n1E4Ak296F#bHI?s`%mG
z=(q{@x%miHnqrNY9J8J;8r*Y;&^Z_bvdS;|8e*{bt979b{!xdFg%Zt7&2>K<{2l8^
zQoXIiHo0%5($vXxKLKUK+J(jFv4wu(h<CS3L<|gR<f%r$6xJFRUwaIraR2Fd!n^*h
z9-%p3bL-2tb@Rb^hZUKeE#imC^`~*jj(E{k5CROW1~?X>K8`~VYxpMi_SO$W&|8f~
zyKz>m7MWFI_R9GwxqCgXl+<c`OlvOJTqJT%&%Qn@rgF;pT&XY~e}%g5#Awq^Bj-dj
za*Z`uAD6oLXqGR23H0>rWHOF-U4e<K4ePHb*qVYPdP>e_M813;sD`MchK>^##NjX|
z6T98zeOKJLvU-<v;;n$lx|5xvrUUy5f0*o9#UO{#mBOHud;&^0aZ(BxMjyVvJf?p6
z0gN)dS-#44m;|Q>(Ky<bVEX4a4RQ)Ha{2l99I0~auCG$ts`7g&bhs%B&YKlu6VVD_
z&^6KO^9v<f{FSn!8QDf|_>@kk-6l)O_Jy!L>0FEoGt{(um6n300xQB;UP3gd(Jgko
zt}Lj=swXCV!Cd&lmNK|@+Y_})u|=1Whj)U0dP?&?@vzo%tiJrCkZZml`c~Y#$l{bT
z4Kh5ruV{c#SQuSfmRfQir(+V)cNAfCfL=~Z9^Kgncxp;mPnNHMmHf!sQGp3;kKufR
zsNvX)VbA-^gXy*D52zx3B0j(TToz7Ztcg#GdEXS+SX;r8%XKIir*yltSqix2+Adh1
z;HqsDap6u#eYo)&s4Y5>FTmjyE7+_$h(F@7%JmY-pGs4{cTqtVF8yq6iM7^@u3$-9
zw_Z*kD%joaa)4Yo-oZy0mWn8J|FTwPFWIK7U-;P?MY&bqFieKyazcg4J*r3Uks$<Z
zAYdn;ZG6muT@cNiEqS2{9tHX$#IMzzw}pq~Vo4chdXMRc2w}uo%3frpuxHcVhR5SH
z*PjDRfU7iMz-$5=(4{+XUbm)u;Qy9^9TGSr_OCL`4<*0EkH~kiqJ2FA9xAfgqU4T^
zKI_6lv9Dz6>z}N6KT3@muXE7SET+mh(EuBh@@q3~*~hezaFOgKTV8ZipsQ-2sC}<n
zi|P*_5ADT3+2ZC&NKXxi4yeZ6W0UJ3z@}ir95Yp%fsfiKd#C{2JIk>h4HeN30$fs`
z`9xk(0=S#)RP6eXrN4e4QBe}?UZM~4ZiI2<4Q~2nZ(q1Kydsp6BQe6ku&xo8;c63$
z9R={onQ28klY)Wa8*y?K9yoHB4=w)knh9A-KR8)EVqrtS)sZo!M_-vV;#t<vFVXSw
zt&aA%F}<n*S^l7B*?ad?>^)uUNuI)7hO$n_OhxYheMRiy1eT{@{Xa?7MTF0*30R62
zxF`zTHt#=5|Fvu0Z$Sr-v`0X&A;GLPt_UP7WDI+t#qD_L;53ETtyH_Pb97s(r_x@X
z<2+|z+-HOTb@ypeq@(ySc~+fsRD+8)D%y)w22y0}Xc7l2@@;blU3LHs`+nm_w$OK*
zT*ja4m_^x=+^pfBGN$&ak&+nX@diw@h>8=~WB7I4t0{L~4eVX+Y(+1O3nlEyNUvZf
zex=AC-*Y1a+%KgLaaj9F(dQPmY4tmN1{3OfxnpQynv6VZQwtXm8?4WI-YkNoH0USM
zP&EG^eE&`X_&49cJOCiy-XD*QTny9CS>?mW74X-&r4gG6WaJcI4QR_-F8k$u#(@!#
z7hh)K3iVE&MCS00fZ^6eM##xo>KSYMb$V6~*=_R5-aNu$B>*YZt=Y=Yp++sq<2YPX
zm4Yx?^4m*mH%yoT>}OGC>?$Z)@+mSI#f@|5XEh}vu6|ruz8sp<guMo>;I3Yu;%{1&
zI#-BE`k26TZqPq2c8Gt+*gR4UkF+@nGigH%F}K!B^cJgq>vvsx3lci)eFBm5e=+_$
zs2qfG<KK+CJTN{$3uGL&kwC_A&}2Y+SNoB1p$Ep#LYM0bNg0V~@wZcPu&8C!WtZBv
zb}pNMjE|eXkhfR~L<;?GIsRtFDwp{u276H<6?a+Kdh^siPE-<MfAX$r9>$l63XhC0
z*EJ+(HMWMX^;|}E9Fx5Gv}+Rq{02@+em3KRu%Y(r!|3mBFh4G)1h2l(Uq&=5sbC`M
z7DbDio>EXhf)+ZxI&A$8VpJn_5ybyrjQ<Xr{v+cFZ2zAz{^!9J7ue<hBShj)<3hbw
z0h%6gOjrc|F#UI;{|J|O6kgnOV9W&G*N2!1{O#@Gk_59l1{3H&24m^>;3;nA_bZ2k
z6&y)!%=z1^QgFK}GhbePZ{HnXw_46_kH-jPW@#Ecxy!up%~RQz{OBZ!LBeVUW-#e5
zo<EfC?*%hszi-Oz-vOP6f`b1HW*Y+~V)Iz-qp~>TI@$#wnIT@h9^8j~RSB;C;2oTE
zVs}tK?)zuxI2B5Qc&{3Rg&-<ZlK_48Fs!)A^4)BrYK|^hoDJnJcOQoIdCK~5D(BHZ
zuA(B!@&{63g--g{xRw8SMo2?~4&wlb?HwXdoh4f*s!$H;DcP<4$p){HCS{?#$)KY|
zO|eIUE>7J&Cdb{HX(+X`IN5U`RQTSiA*3ojni_X&XK3y87w3T?<v?Mycj-}1yXk4d
zOO&sjp+9HK_%L~@xrm7qYK!APO<9}iyqRFR-O)Qz_~IrmY&d5U9c6ZXO$9atp?`^W
z@;i^|&#L`b^uwQ&xo@)S%nnp!6fhp1;8B^6#r|&1{nlJ_f|50GpbI$y+=i}PuTmd0
zpr)R(!vts4ptx~`$0t*{l}&uPy}&g71dnxu;l(D~adfTCK4%kSi|GT9uvMbSb|8mf
zO&O}D!~9wQrr8?{Jf6MYsyIM09ju$Xxp}rpA}5T+I9C8kyW5C@KKoEdevsY*JER7(
zGqIyV3q7<XA*0_)GcXD_vfAYpa-1?Jz#tQ@cHo0#nTYHDO=}GkLPmS!5DF7sgAsYT
z8p<)M(~Y31@rHbsq~bSnOhixokTf<*qzr<?&6MmI-*Z&wIm{8b;X_~AYmQ^K;w@8u
z$^ec+&+$CroTy6Lp1Ak>e##a5{-*ORcL<$69V#d~1@Lo;rO%73#~FP&2du}tgi|l)
zIG5gF4#}OoGI)y(R=&OLhOgFp9m)V1mA*fE`a5Ii&kFgM=l-{-*FWjTl2opH1gMib
zpnfPHb@Lw-Ptmf!mGmXH&kB+aN%ScA4lpJ(rHJw|!NUnYSYP4_ykm;drOBw?^7z7)
zHW&%YYe#i{{9R3B<cG<WN<}t23s=Ypz*{>6K)}WL{4R6?7DDtU-b`B}wA<pz_<1i!
zB0tqyf~2NA8ssaw9c<P8-b~4!u8&$*$wG+)o8XwUs04V^N|o(CBmQ>n8nEVMqzeQS
zJE^;yt+VAvGy~TbB0{0ZM$nnJO(Un8F-02vkhqY#IoExC2F;VzxIfC(uRkbrU!|2W
zM)|$_QgYvDxw5*v$n#I1V^=XAs~#wec(}(%9X$zqI~U-YCWacG4ra~<4}Ql`l7clM
z8(Pm%fM2N8^lIWsR?w;|ikug{92wndq5P}TvX>U7J$L8$bdK9F)2LGk`fg)Z=69`Q
zf^IS|6s!xOB`21+_Nz!L5YkWO8!bEigl_TlD-hl}$m1Pl88e8waostwd+9>AMHCZr
z7c0^EY!R6uY8f3s^3J7PT1aJ^r5}^6D=G$jKnA|;PMHG~013WjKeu71P3+WM8$b1w
zQkf1H?oD4*Xmm=#Nh`#`C2SeAjM<e>+I^pZtqb^|SOvAQv4Ekbbk3tHsD62m7G~st
zI13Y8J8E6QcsjG|Z@4U8_qj4gSL}?;qyaMtV#6uAZ+>~oA)`%LlJrN_k{=nNsR-2h
zA?Gz9nUkpB;T#@sSOmPY`~G#-Dx*|e0koI3#338B720_rz}<F-l8Cvb<krTw%)}DV
z$;E?nm$|AS(b=~t=Ma?Ci6%yC10K(HHg{ArO!F)K!*<ZGf<+{OqC1W(#M9buD6}?G
zP-n>$LsI)0am>;^7tGOB><+-45?n%^vRMd!I30WlU40$8QW;YzvNRIyH>dHo8hh^G
zqoF*mXZ=}BL)tl!p<~nL%0`UI4TOpUI0V-38Lh{|0zq{R?EYZjU}V5g3p+5dKWP3D
z;LnfSgWvFYJy^X4rvLU3Kr03HBOYiTD-HOI=J#Xvzbbod!UO3;Wp8l)ZaDuhx`)z_
z*TY?b+285@Jv#hTMxX}z1NCEbr~g3xPjelVbx@=C32Wr_f41ZS>&XNL#R9b>pRjt(
z|BUryRf1xHnr2T}?G}H=dNR~Nu|PKff5Q6w4=w#ADNq2=WalS<8(=EglQ;F3ct8O_
zlWd;=x_LkWe#-<D05tva3BZOQ6yUcYKmkD0)Sm#L|AU36q;*gf&?M<6lp$`=X8cC^
z-C00UKyz51P+a)_i1J5~fF?XXp}ZCNBg)_8^5@~=!CZR0{{2k;?L^amK7ycUnm&*{
zK7usByT8c(b;2pA06<UOe4_3IzUKa-{`VQ2pyZ(EL_U$L0ArY+$p1Pu@)6+AOam$Y
zbv-cc2$TWP8TelW|CCaq^>2bd4^*H==0in~Qfm4;*?*XvpbUc^YxaZ_|JdGt9CG%*
z8vl?W`glEX8zT6-@qbGf{il{e6Gk5>AKPCBLJ5*S`oEg+5R(3QJv8CH>>s?Z|3&`b
zu0384<PDa8Apf&6K;5?okVj^Y{yWH@ojFkbfQHaNL4W-dd;b9niU+#b^@L{w40`+1
z(ibQKXpr0!f}83i!e1eC|4Z2a?CbC4f0O<@nEao81vH%esm7MSlRX8OgEkp-X8j4^
z)cAM3e}7E=SEC=&@E)&+9s(u)7x7<7c@HiB^Jwtkpgdj=wA8ksw14hepbpCe!eiIc
p{x1k1?hB|aKr;oO09pToohK+21?b-vA1xRym?DrrHHQbf{|C}v&0hcj

literal 0
HcmV?d00001

-- 
Gitee


From d7431f9975415b970d4f5f259542c8cc9b264561 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 2 Apr 2024 16:53:23 +0800
Subject: [PATCH 009/302] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=BA=90=E7=A0=81?=
 =?UTF-8?q?=E7=BC=96=E8=AF=91=E5=AE=89=E8=A3=85mxRec=E7=9A=84README?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 4a2c310b..3efbe7d8 100644
--- a/README.md
+++ b/README.md
@@ -58,23 +58,17 @@ bash run.sh
 - CMake 3.20.6
 
 开源依赖：
-- pybind11 v2.10.3
-- securec
-- openmpi 4.1.1: 请参考软件文档在编译环境完成安装
+- [pybind11 v2.10.3](https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip)
+- [securec](https://github.com/huaweicloud/huaweicloud-sdk-c-obs/archive/refs/tags/v3.23.9.zip)
+- [openmpi 4.1.5](https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz): 请参考软件文档在编译环境完成安装
 - tensorflow 1.15/2.6.5：根据实际需求选择对应版本
 
-pybind11的压缩包放在与MxRec代码同级的opensource/opensource目录下，如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource/opensource目录。然后将pybind11的压缩包放在opensource/opensource目录下。解压压缩包，并且将解压之后的压缩包改名为pybind11。
-
-securec是华为开源的安全函数库。下载后：
-1. 将platform下的eSDK_LogAPI_V2.1.10文件夹删除
-2. 将platform下的huaweisecurec改名为securec
-3. 在securec文件夹下，有src、lib和include三个文件夹，删除lib文件夹下的所有文件
-4. 将platform文件夹放到MxRec代码目录下
+将pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
 
 为了构建多个版本的whl包，编译脚本在python虚拟环境完成对应tensorflow版本的安装。用户可以根据实际情况调整编译脚本，指定tensorflow的安装路径。编译方法：
-- build/build.sh：执行脚本完成tf1和tf2版本whl包的构建和打包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
-- build/build_tf1_with_opensource.sh：执行脚本完成tf1版本whl包的构建，构建成功后，whl包在tf1_whl子目录下。执行脚本前，创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
-- build/build_tf2_with_opensource.sh：执行脚本完成tf2版本whl包的构建，构建成功后，whl包在tf2_whl子目录下。执行脚本前，创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
+- build/build.sh：执行脚本完成tf1和tf2版本whl包的构建和打包。执行脚本前，请参考build/build_tf1_with_opensource.sh、build/build_tf2_with_opensource.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
+- build/build_tf1_with_opensource.sh：执行脚本完成tf1版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
+- build/build_tf2_with_opensource.sh：执行脚本完成tf2版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
 
 如需使用动态扩容功能，进入“./cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
 ```shell
-- 
Gitee


From d3c5f66b0e94533df66baaae3da51f9fbde8c1b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 3 Apr 2024 10:23:31 +0800
Subject: [PATCH 010/302] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=BA=90=E7=A0=81?=
 =?UTF-8?q?=E7=BC=96=E8=AF=91=E5=AE=89=E8=A3=85mxRec=E7=9A=84README?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3efbe7d8..fccc0244 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ bash run.sh
 - [openmpi 4.1.5](https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz): 请参考软件文档在编译环境完成安装
 - tensorflow 1.15/2.6.5：根据实际需求选择对应版本
 
-将pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
+将pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，并且将其分别更名为pybind11-2.10.3.zip、huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
 
 为了构建多个版本的whl包，编译脚本在python虚拟环境完成对应tensorflow版本的安装。用户可以根据实际情况调整编译脚本，指定tensorflow的安装路径。编译方法：
 - build/build.sh：执行脚本完成tf1和tf2版本whl包的构建和打包。执行脚本前，请参考build/build_tf1_with_opensource.sh、build/build_tf2_with_opensource.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
@@ -75,6 +75,46 @@ bash run.sh
 bash run.sh
 ```
 
+## 测试用例
+
+### Python侧测试用例
+
+运行Python测试用例所需依赖：
+
+- pytest 7.1.1
+- pytest-cov 4.1.0
+- pytest-html
+
+如需使用python测试用例，需要先安装上述依赖以及能够在tf1环境下进行源码编译，然后进入tests目录中。参考以下命令执行python侧测试用例：
+```shell
+bash run_python_dt.sh
+```
+
+### C++侧测试用例
+
+运行C++侧测试用例所需依赖：
+
+- [googletest 1.8.1](https://github.com/google/googletest/archive/refs/tags/release-1.8.1.zip)
+- [emock 0.9.0](https://github.com/ez8-co/emock/archive/refs/tags/v0.9.0.zip)
+- [pybind11 v2.10.3](https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip)
+- [securec](https://github.com/huaweicloud/huaweicloud-sdk-c-obs/archive/refs/tags/v3.23.9.zip)
+
+将googletest、emock、pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，并且将其分别更名为googletest-release-1.8.1.zip、
+emock-0.9.0.zip、pybind11-2.10.3.zip、 huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，
+然后将前述几个压缩包放在opensource目录下。
+
+如需使用C++测试用例，需要按照上述描述准备需要的依赖，准备好之后，进入src目录中。参考以下命令执行C++测试用例：
+
+tf1环境下使用如下命令：
+```shell
+bash test_ut.sh tf1
+```
+
+tf2环境下使用如下命令：
+```shell
+bash test_ut.sh tf2
+```
+
 ## 使用指导
 
 mxRec所支持的使用环境、功能特性、API接口与使用样例请参考昇腾开源社区MindX SDK产品文档。
-- 
Gitee


From 0b8faaac6b4c30fc46c232db67be5780e98fe72d Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Sun, 7 Apr 2024 01:11:54 +0000
Subject: [PATCH 011/302] =?UTF-8?q?!73=20=E4=BF=AE=E5=A4=8Dint=E7=B1=BB?=
 =?UTF-8?q?=E5=9E=8B=E5=8F=82=E6=95=B0=E6=A0=A1=E9=AA=8C=20*=20=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8Dint=E5=8F=82=E6=95=B0=E6=A0=A1=E9=AA=8C=20*=20?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8Dint=E7=B1=BB=E5=9E=8B=E5=8F=82=E6=95=B0?=
 =?UTF-8?q?=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/validator/validator.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/mx_rec/validator/validator.py b/mx_rec/validator/validator.py
index c9abde87..013fe565 100644
--- a/mx_rec/validator/validator.py
+++ b/mx_rec/validator/validator.py
@@ -437,7 +437,14 @@ class IntValidator(NumValidator):
     def __init__(self, name: str, value: int, min_value: int = None, max_value: int = None,
                  invalid_options: List = None, constrained_options: List = None, msg: str = ""):
         super(IntValidator, self).__init__(name, value, min_value, max_value, invalid_options, constrained_options, msg)
-        self.register_checker(lambda: isinstance(self.value, int), msg if msg else f"type of '{name}' is not int")
+
+        def check_type():
+            if isinstance(self.value, bool):
+                # bool is subclass of int
+                return False
+            return isinstance(self.value, int)
+
+        self.register_checker(check_type, msg if msg else f"type of '{name}' is not int")
 
 
 class OptionalIntValidator(IntValidator):
-- 
Gitee


From de72afa620e5abe701f1cdf4fefa5d5811f5b2ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Sun, 7 Apr 2024 11:49:40 +0800
Subject: [PATCH 012/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91test=20first=20time?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 894dc230..d5563ce4 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -663,6 +663,7 @@ void HybridMgmt::EvalTask(TaskType type)
 /// \param channelId 通道索引（训练/推理）
 /// \param batchId 已处理的batch数
 /// \return
+// lqklqk
 bool HybridMgmt::ParseKeysHBM(int channelId, int& batchId)
 {
     LOG_INFO(MGMT + "nBatch:{} channelId:{} batchId:{}, ParseKeys with HBM mode start.",
-- 
Gitee


From fb3c55c3a417188c6d50b38c6031784b63e8aaed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 8 Apr 2024 11:26:33 +0800
Subject: [PATCH 013/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8Dcpp=E6=B5=8B=E6=94=B9=E5=8A=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/asc/manager.py                    |  6 +++++-
 mx_rec/optimizers/adagrad.py                  | 10 ++++++++++
 mx_rec/optimizers/ftrl.py                     |  5 +++++
 mx_rec/optimizers/gradient_descent.py         |  5 +++++
 mx_rec/optimizers/gradient_descent_by_addr.py |  5 +++++
 mx_rec/optimizers/lazy_adam.py                |  5 +++++
 mx_rec/optimizers/lazy_adam_by_addr.py        |  5 +++++
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          |  7 ++-----
 src/core/key_process/key_process.cpp          |  3 +--
 src/core/utils/common.cpp                     |  1 +
 src/core/utils/common.h                       |  8 +++++---
 src/core/utils/config.cpp                     | 15 +--------------
 src/core/utils/config.h                       |  7 -------
 src/pybind/module_main.cpp                    |  2 ++
 src/tests/utils/config_test.cpp               |  4 ----
 15 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index 2829ab98..4d822b37 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -18,7 +18,7 @@
 import tensorflow as tf
 
 from mxrec_pybind import InitializeInfo, ConstantInitializerInfo, NormalInitializerInfo, EmbInfo, EmbInfoParams, \
-    ThresholdValue, HybridMgmt, RankInfo, USE_STATIC, USE_HOT, USE_DYNAMIC_EXPANSION
+    ThresholdValue, HybridMgmt, RankInfo, USE_STATIC, USE_HOT, USE_DYNAMIC_EXPANSION, USE_SUM_SAME_ID_GRADIENTS
 
 from mx_rec.util.communication.hccl_ops import get_rank_id, get_device_id, get_rank_size
 from mx_rec.util.initialize import ConfigInitializer
@@ -205,6 +205,10 @@ def initialize_emb_cache(table_info_list, threshold_list):
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         option = option | USE_DYNAMIC_EXPANSION
 
+    optimizer = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
+    if optimizer.derivative == 1:
+        option = option | USE_SUM_SAME_ID_GRADIENTS
+
     # [train_steps, eval_steps, save_steps] pass step information to HybridMgmt for data process loop
     rank_info = RankInfo(rank_id, device_id, rank_size, option, [train_steps, eval_steps, save_steps])
 
diff --git a/mx_rec/optimizers/adagrad.py b/mx_rec/optimizers/adagrad.py
index d99be3b3..a5fa7975 100644
--- a/mx_rec/optimizers/adagrad.py
+++ b/mx_rec/optimizers/adagrad.py
@@ -76,6 +76,16 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
                                                 initial_accumulator_value=initial_accumulator_value,
                                                 use_locking=use_locking,
                                                 name=self.unique_name)
+        self._slot_num = 1
+        self._derivative = 2
+
+    @property
+    def slot_num(self):
+        return self._slot_num
+
+    @property
+    def derivative(self):
+        return self._derivative
 
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
diff --git a/mx_rec/optimizers/ftrl.py b/mx_rec/optimizers/ftrl.py
index 5c68b929..d6ddb093 100644
--- a/mx_rec/optimizers/ftrl.py
+++ b/mx_rec/optimizers/ftrl.py
@@ -80,11 +80,16 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
             l2_shrinkage_regularization_strength=kwargs.get("l2_shrinkage_regularization_strength", 0.0)
         )
         self._slot_num = 2
+        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def initialize_slots(self, var, table_instance):
         val = constant_op.constant(
             self._initial_accumulator_value, dtype=var.dtype, shape=var.get_shape())
diff --git a/mx_rec/optimizers/gradient_descent.py b/mx_rec/optimizers/gradient_descent.py
index 6881d6ad..2ba72789 100644
--- a/mx_rec/optimizers/gradient_descent.py
+++ b/mx_rec/optimizers/gradient_descent.py
@@ -55,11 +55,16 @@ class CustomizedGradientDescent(gradient_descent.GradientDescentOptimizer, Custo
         super(CustomizedGradientDescent, self).__init__(learning_rate=learning_rate, use_locking=use_locking,
                                                         name=self.unique_name)
         self._slot_num = 0
+        self._derivative = 1
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def initialize_slots(self, var, table_instance):
         return []
 
diff --git a/mx_rec/optimizers/gradient_descent_by_addr.py b/mx_rec/optimizers/gradient_descent_by_addr.py
index 22b33852..11a9fda6 100644
--- a/mx_rec/optimizers/gradient_descent_by_addr.py
+++ b/mx_rec/optimizers/gradient_descent_by_addr.py
@@ -60,11 +60,16 @@ class CustomizedGradientDescentByAddr(gradient_descent.GradientDescentOptimizer,
                                                               name=self.unique_name)
 
         self._slot_num = 0
+        self._derivative = 1
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def initialize_slots(self, var, table_instance):
         return []
 
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index d79b6d23..1ed68556 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -72,11 +72,16 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         super(CustomizedLazyAdam, self).__init__(learning_rate=learning_rate, beta1=beta1, beta2=beta2,
                                                  epsilon=epsilon, use_locking=use_locking, name=self.unique_name)
         self._slot_num = 2
+        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
         def creat_one_single_slot(var, op_name):
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index 92252824..e147c7bf 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -73,11 +73,16 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
                                                           name=self.unique_name)
 
         self._slot_num = 2
+        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def get_slot_init_values(self):
         # return state value list of adam that needs to initialize in ASC DDR.
         initial_momentum_value = 0.0
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index d5563ce4..eb618f40 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -663,7 +663,6 @@ void HybridMgmt::EvalTask(TaskType type)
 /// \param channelId 通道索引（训练/推理）
 /// \param batchId 已处理的batch数
 /// \return
-// lqklqk
 bool HybridMgmt::ParseKeysHBM(int channelId, int& batchId)
 {
     LOG_INFO(MGMT + "nBatch:{} channelId:{} batchId:{}, ParseKeys with HBM mode start.",
@@ -705,8 +704,7 @@ bool HybridMgmt::ParseKeysHBM(int channelId, int& batchId)
         LOG_DEBUG("channelId:{} batchId:{}, sendLookupSyncTC(ms):{}", channelId, batchId, sendLookupSyncTC.ElapsedMS());
 
         // 训练时，使用全局去重聚合梯度，发送全局去重的key和对应的恢复向量
-        if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
-            channelId == TRAIN_CHANNEL_ID) {
+        if (mgmtRankInfo.useSumSameIdGradients && channelId == TRAIN_CHANNEL_ID) {
             SendUniqKeysAndRestoreVecHBM(channelId, batchId, embInfo, infoVecs);
         }
 
@@ -865,8 +863,7 @@ bool HybridMgmt::ProcessEmbInfo(const std::string& embName, int batchId, int cha
     LOG_DEBUG("channelId:{} batchId:{}, hostHashMapProcessTC(ms):{}",
               channelId, batchId, hostHashMapProcessTC.ElapsedMS());
 
-    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
-        channelId == TRAIN_CHANNEL_ID && remainBatchOut) {
+    if (mgmtRankInfo.useSumSameIdGradients && channelId == TRAIN_CHANNEL_ID && remainBatchOut) {
         SendUniqKeysAndRestoreVecDDR(embName, batchId, channelId, ddrParam);
     }
 
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index f76f6907..8ab030a8 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -470,8 +470,7 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
 void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
 {
-    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
-        channel == TRAIN_CHANNEL_ID) {
+    if (rankInfo.useSumSameIdGradients && channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
 
diff --git a/src/core/utils/common.cpp b/src/core/utils/common.cpp
index 38e64444..9512b181 100644
--- a/src/core/utils/common.cpp
+++ b/src/core/utils/common.cpp
@@ -47,6 +47,7 @@ namespace MxRec {
         useStatic = static_cast<unsigned int>(option) bitand HybridOption::USE_STATIC;
         useHot = static_cast<unsigned int>(option) bitand HybridOption::USE_HOT;
         useDynamicExpansion = static_cast<unsigned int>(option) bitand HybridOption::USE_DYNAMIC_EXPANSION;
+        useSumSameIdGradients = static_cast<unsigned int>(option) bitand HybridOption::USE_SUM_SAME_ID_GRADIENTS;
     }
 
     RankInfo::RankInfo(int localRankSize, int option, const vector<int>& maxStep)
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index f6c3de3f..9706a699 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -115,9 +115,10 @@ namespace MxRec {
     using TensorInfoT = std::tuple<int, EmbNameT, std::list<std::unique_ptr<std::vector<Tensor>>>::iterator>;
 
     namespace HybridOption {
-        const unsigned int USE_STATIC = 0x001;
-        const unsigned int USE_HOT = 0x001 << 1;
-        const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 2;
+        const unsigned int USE_STATIC = 0x0001;
+        const unsigned int USE_HOT = 0x0001 << 1;
+        const unsigned int USE_DYNAMIC_EXPANSION = 0x0001 << 2;
+        const unsigned int USE_SUM_SAME_ID_GRADIENTS = 0x0001 << 3;
     };
 
     string GetChipName(int devID);
@@ -226,6 +227,7 @@ namespace MxRec {
         bool isDDR { false };
         bool isSSDEnabled { false };
         bool useDynamicExpansion {false};
+        bool useSumSameIdGradients {true};
         std::vector<int> ctrlSteps; // 包含三个步数: train_steps, eval_steps, save_steps
     };
 
diff --git a/src/core/utils/config.cpp b/src/core/utils/config.cpp
index 9cfec739..57478553 100644
--- a/src/core/utils/config.cpp
+++ b/src/core/utils/config.cpp
@@ -20,13 +20,7 @@ See the License for the specific language governing permissions and
 using namespace std;
 
 namespace MxRec {
-    namespace ApplyGradientsStrategyOptions {
-        const std::string DIRECT_APPLY = "direct_apply";
-        const std::string SUM_SAME_ID_GRADIENTS_AND_APPLY = "sum_same_id_gradients_and_apply";
-    };
-
     // 设置环境变量默认值
-    string GlobalEnv::applyGradientsStrategy = ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY;
     int GlobalEnv::aclTimeout = -1; // 默认阻塞方式，一直等待直到数据接收完成。
     int GlobalEnv::hdChannelSize = 40; // 默认通道深度40
     int GlobalEnv::keyProcessThreadNum = 6; // 默认6个线程
@@ -42,12 +36,6 @@ namespace MxRec {
     /// 配置环境变量，Python侧已经做了变量值校验，CPP侧直接使用即可；bool类型，1代表true，0代表false
     void ConfigGlobalEnv()
     {
-        // 设置梯度策略
-        const char *envStrategy = getenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY);
-        if (envStrategy != nullptr) {
-            GlobalEnv::applyGradientsStrategy = envStrategy;
-        }
-
         // 设置ACL超时时间
         const char *envAclTimeout = getenv(RecEnvNames::ACL_TIMEOUT);
         if (envAclTimeout != nullptr) {
@@ -117,9 +105,8 @@ namespace MxRec {
 
     void LogGlobalEnv()
     {
-        LOG_DEBUG("Environment variables are: [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], "
+        LOG_DEBUG("Environment variables are: [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], "
                   "[{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}]",
-                  RecEnvNames::APPLY_GRADIENTS_STRATEGY, GlobalEnv::applyGradientsStrategy,
                   RecEnvNames::ACL_TIMEOUT, GlobalEnv::aclTimeout,
                   RecEnvNames::HD_CHANNEL_SIZE, GlobalEnv::hdChannelSize,
                   RecEnvNames::KEY_PROCESS_THREAD_NUM, GlobalEnv::keyProcessThreadNum,
diff --git a/src/core/utils/config.h b/src/core/utils/config.h
index 4c56c0d4..3ecb4c36 100644
--- a/src/core/utils/config.h
+++ b/src/core/utils/config.h
@@ -20,7 +20,6 @@ See the License for the specific language governing permissions and
 
 namespace MxRec {
     namespace RecEnvNames {
-        const char *const APPLY_GRADIENTS_STRATEGY = "APPLY_GRADIENTS_STRATEGY";
         const char *const ACL_TIMEOUT = "AclTimeout";
         const char *const HD_CHANNEL_SIZE = "HD_CHANNEL_SIZE";
         const char *const KEY_PROCESS_THREAD_NUM = "KEY_PROCESS_THREAD_NUM";
@@ -34,13 +33,7 @@ namespace MxRec {
         const char *const RECORD_KEY_COUNT = "RECORD_KEY_COUNT";
     };
 
-    namespace ApplyGradientsStrategyOptions {
-        extern const std::string DIRECT_APPLY;
-        extern const std::string SUM_SAME_ID_GRADIENTS_AND_APPLY;
-    };
-
     struct GlobalEnv {
-        static std::string applyGradientsStrategy;
         static int aclTimeout;
         static int hdChannelSize;
         static int keyProcessThreadNum;
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index 403692fb..b0249ca6 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -69,6 +69,8 @@ namespace {
 
         m.attr("USE_DYNAMIC_EXPANSION") = py::int_(HybridOption::USE_DYNAMIC_EXPANSION);
 
+        m.attr("USE_SUM_SAME_ID_GRADIENTS") = py::int_(HybridOption::USE_SUM_SAME_ID_GRADIENTS);
+
         GetRankInfo(m);
 
         GetEmbInfoParams(m);
diff --git a/src/tests/utils/config_test.cpp b/src/tests/utils/config_test.cpp
index d7e51b57..54e0ec67 100644
--- a/src/tests/utils/config_test.cpp
+++ b/src/tests/utils/config_test.cpp
@@ -24,7 +24,6 @@ using namespace MxRec;
 
 void SetEnvironmentVariables()
 {
-    setenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY, "sum_same_id_gradients_and_apply", 1);
     setenv(RecEnvNames::ACL_TIMEOUT, "100", 1);
     setenv(RecEnvNames::HD_CHANNEL_SIZE, "50", 1);
     setenv(RecEnvNames::KEY_PROCESS_THREAD_NUM, "8", 1);
@@ -40,7 +39,6 @@ void SetEnvironmentVariables()
 
 void UnsetEnvironmentVariables()
 {
-    unsetenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY);
     unsetenv(RecEnvNames::ACL_TIMEOUT);
     unsetenv(RecEnvNames::HD_CHANNEL_SIZE);
     unsetenv(RecEnvNames::KEY_PROCESS_THREAD_NUM);
@@ -56,7 +54,6 @@ void UnsetEnvironmentVariables()
 
 TEST(GlobalEnv, DefaultValues)
 {
-    ASSERT_EQ(GlobalEnv::applyGradientsStrategy, ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY);
     ASSERT_EQ(GlobalEnv::aclTimeout, -1);
     ASSERT_EQ(GlobalEnv::hdChannelSize, 40);
     ASSERT_EQ(GlobalEnv::keyProcessThreadNum, 6);
@@ -77,7 +74,6 @@ TEST(GlobalEnv, ConfigGlobalEnv)
     ConfigGlobalEnv();
 
     // 验证环境变量是否已经被正确配置
-    ASSERT_EQ(GlobalEnv::applyGradientsStrategy, "sum_same_id_gradients_and_apply");
     ASSERT_EQ(GlobalEnv::aclTimeout, 100);
     ASSERT_EQ(GlobalEnv::hdChannelSize, 50);
     ASSERT_EQ(GlobalEnv::keyProcessThreadNum, 8);
-- 
Gitee


From 63a8f1b259325a43152e2939ac699ca3f7297997 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 8 Apr 2024 11:51:05 +0800
Subject: [PATCH 014/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8Dcpp=E6=B5=8B=E6=94=B9=E5=8A=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py         | 1 -
 mx_rec/core/asc/manager.py            | 2 +-
 tests/mx_rec/core/test_build_graph.py | 4 ----
 3 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index 03fa28b4..a57297fa 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -166,7 +166,6 @@ class ASCAnchorAttr(Enum):
     MOCK_LOOKUP_RESULT = "mock_lookup_result"
     RESTORE_VECTOR_SECOND = "restore_vector_second"
     UNIQUE_KEYS = "unique_keys"
-    GRADIENTS_STRATEGY = "gradients_strategy"
     IS_GRAD = "is_grad"
 
 
diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index 4d822b37..f50037ea 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -206,7 +206,7 @@ def initialize_emb_cache(table_info_list, threshold_list):
         option = option | USE_DYNAMIC_EXPANSION
 
     optimizer = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
-    if optimizer.derivative == 1:
+    if optimizer.derivative == 2:
         option = option | USE_SUM_SAME_ID_GRADIENTS
 
     # [train_steps, eval_steps, save_steps] pass step information to HybridMgmt for data process loop
diff --git a/tests/mx_rec/core/test_build_graph.py b/tests/mx_rec/core/test_build_graph.py
index c15d851f..0b90b790 100644
--- a/tests/mx_rec/core/test_build_graph.py
+++ b/tests/mx_rec/core/test_build_graph.py
@@ -346,7 +346,6 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
                            use_hot=True, use_dynamic_expansion=True)
-        global_env.apply_gradients_strategy = "direct_apply"
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
@@ -363,7 +362,6 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
-        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer(use_static=True)
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
@@ -388,7 +386,6 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
-        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer()
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
@@ -413,7 +410,6 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
-        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer()
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
-- 
Gitee


From 5b0cb455ad570189dacc5e1b00942af04d74f810 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 8 Apr 2024 15:45:54 +0800
Subject: [PATCH 015/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-lazyAdam=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo/run_mode.py       |  4 +-
 mx_rec/constants/constants.py               |  1 -
 mx_rec/core/asc/build_graph.py              | 50 -----------------
 mx_rec/core/emb/dynamic_sparse_embedding.py | 10 ++--
 mx_rec/core/emb/sparse_embedding.py         |  7 +--
 mx_rec/optimizers/base.py                   | 61 +++++++++++++++++++++
 mx_rec/optimizers/lazy_adam.py              | 10 ++--
 7 files changed, 75 insertions(+), 68 deletions(-)

diff --git a/examples/demo/little_demo/run_mode.py b/examples/demo/little_demo/run_mode.py
index 0f7a8cc4..305d9f64 100644
--- a/examples/demo/little_demo/run_mode.py
+++ b/examples/demo/little_demo/run_mode.py
@@ -95,11 +95,11 @@ class RunMode:
             self.train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
             if bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0))):
-                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 
                 train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
 
-                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
+                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
 
                 # do sparse optimization by addr
                 local_grads = tf.gradients(loss, train_emb_list)  # local_embedding
diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index a57297fa..2c2cd2fe 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -22,7 +22,6 @@ ASCEND_GLOBAL_HASHTABLE_COLLECTION = "ASCEND_GLOBAL_HASHTABLE_COLLECTION"
 ASCEND_CUTTING_POINT_INITIALIZER = "ASCEND_CUTTING_POINT_INITIALIZER"
 ASCEND_SPARSE_LOOKUP_ENTRANCE = "ASCEND_SPARSE_LOOKUP_ENTRANCE"
 ASCEND_SPARSE_LOOKUP_ID_OFFSET = "ASCEND_SPARSE_LOOKUP_ID_OFFSET"
-ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS = "ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS"
 ASCEND_TIMESTAMP = "ASCEND_TIMESTAMP"
 ASCEND_SPARSE_LOOKUP_LOCAL_EMB = "ASCEND_SPARSE_LOOKUP_LOCAL_EMB"
 EMPTY_STR = ""
diff --git a/mx_rec/core/asc/build_graph.py b/mx_rec/core/asc/build_graph.py
index 13ddad4a..2bb72621 100644
--- a/mx_rec/core/asc/build_graph.py
+++ b/mx_rec/core/asc/build_graph.py
@@ -22,7 +22,6 @@ import tensorflow as tf
 import mxrec_pybind
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.tf_version_adapter import npu_ops
-from mx_rec.constants.constants import TRAIN_CHANNEL_ID
 from mx_rec.util.log import logger
 
 
@@ -81,46 +80,6 @@ def get_id_offsets(max_lookup_vec_size, config):
     return id_offsets, swap_pos, swap_len
 
 
-def get_restore_vector_second(max_lookup_vec_size: int, config: dict) -> tf.Tensor:
-    """
-    Get restore vector which is calculated after the second all2all
-    :param max_lookup_vec_size: the size of restore_vector_second
-    :param config: embedding config
-    :return: the restore vector calculated after the second all2all
-    """
-    logger.debug('Channel %s_restore_second_%s was built for getnext',
-                 config.get("table_name"), config.get("channel_id"))
-    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
-        restore_vector_second = npu_ops.gen_npu_ops.get_next(
-            output_types=[tf.int32],
-            output_shapes=[[max_lookup_vec_size]],
-            channel_name=f'{config.get("table_name")}_restore_second_{config.get("channel_id")}')[0]
-    return restore_vector_second
-
-
-def get_unique_keys(max_lookup_vec_size: int, config: dict) -> tf.Tensor:
-    """
-    Get the global unique keys which is calculated after the second all2all
-    :param max_lookup_vec_size: the size of global unique keys
-    :param config: embedding config
-    :return: the global unique keys calculated after the second all2all
-    """
-    logger.debug('Channel %s_uniquekeys_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
-    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
-        if config.get("use_dynamic_expansion"):
-            unique_keys = npu_ops.gen_npu_ops.get_next(
-                output_types=[tf.int64],
-                output_shapes=[[max_lookup_vec_size]],
-                channel_name=f'{config.get("table_name")}_uniquekeys_{config.get("channel_id")}')[0]
-            return unique_keys
-
-        unique_keys = npu_ops.gen_npu_ops.get_next(
-            output_types=[tf.int32],
-            output_shapes=[[max_lookup_vec_size]],
-            channel_name=f'{config.get("table_name")}_uniquekeys_{config.get("channel_id")}')[0]
-        return unique_keys
-
-
 def get_all2all_args(use_static: bool, config: dict) -> Optional[list]:
     """
     Get all2all parameters for dynamic condition
@@ -211,13 +170,4 @@ def get_preprocessed_tensor_for_asc(table, config):
         'all2all_args': all2all_args,
     }
 
-    if config.get("channel_id") != TRAIN_CHANNEL_ID:
-        return result
-
-    with tf.compat.v1.variable_scope("restore_vector_second"):
-        restore_vector_second = get_restore_vector_second(max_lookup_vec_size, config)
-
-    with tf.compat.v1.variable_scope("unique_keys"):
-        unique_keys = get_unique_keys(max_lookup_vec_size, config)
-    result.update({'restore_vector_second': restore_vector_second, 'unique_keys': unique_keys})
     return result
diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index 194b2795..c2e8d9e5 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -6,10 +6,9 @@ import abc
 from typing import Optional, Union, Callable
 
 import tensorflow as tf
-from tensorflow.python.ops import array_ops
 
 from mx_rec.constants.constants import ASCEND_TABLE_NAME_MUST_CONTAIN, ASCEND_SPARSE_LOOKUP_LOCAL_EMB, \
-    ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+     ASCEND_SPARSE_LOOKUP_ID_OFFSET
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
@@ -51,9 +50,7 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
 
     def _get_update_grad(self, local_grad: tf.Tensor, result: dict,
                          table: Union[tf.compat.v1.Variable, tf.Tensor]) -> Union[tf.IndexedSlices, tf.Tensor]:
-        return tf.compat.v1.unsorted_segment_sum(local_grad,
-                                                 result.get("restore_vector_second"),
-                                                 array_ops.shape(result.get("unique_keys"))[0])
+        return local_grad
 
     def _get_local_embeddings(self, table: Union[tf.compat.v1.Variable, tf.Tensor], result: dict,
                               feature_spec: FeatureSpec, **kwargs) -> tf.Tensor:
@@ -72,7 +69,8 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
             return sparse_forward_fn(local_embeddings)
 
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB, local_embeddings)
-        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS, result.get("unique_keys"))
+        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get("id_offsets"))
+        # tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS, result.get("unique_keys"))
         return sparse_forward_fn(local_embeddings)
 
 
diff --git a/mx_rec/core/emb/sparse_embedding.py b/mx_rec/core/emb/sparse_embedding.py
index d8ce63b1..938f917d 100644
--- a/mx_rec/core/emb/sparse_embedding.py
+++ b/mx_rec/core/emb/sparse_embedding.py
@@ -53,11 +53,8 @@ class SparseEmbedding(BaseSparseEmbedding):
 
     def _get_update_grad(self, local_grad: tf.Tensor, result: dict,
                          table: Union[tf.compat.v1.Variable, tf.Tensor]) -> Union[tf.IndexedSlices, tf.Tensor]:
-        unique_local_grad = tf.compat.v1.unsorted_segment_sum(local_grad,
-                                                              result.get("restore_vector_second"),
-                                                              array_ops.shape(result.get("unique_keys"))[0])
-        return ops.IndexedSlices(values=unique_local_grad,
-                                 indices=result.get("unique_keys"),
+        return ops.IndexedSlices(values=local_grad,
+                                 indices=result.get("id_offsets"),
                                  dense_shape=tf.shape(table))
 
     def _get_local_embeddings(self, table: Union[tf.compat.v1.Variable, tf.Tensor], result: dict,
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index a5d68a70..b4115bce 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -21,9 +21,13 @@ from __future__ import print_function
 
 from collections import defaultdict
 
+import tensorflow as tf
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.training.optimizer import _TensorProcessor
 
+from mx_rec.util.tf_version_adapter import npu_ops
+from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
 
 
@@ -54,6 +58,63 @@ class CustomizedOptimizer:
         self.unique_name = name + "_" + str(count)
         self.base_name = name
 
+    def get_restore_vector_second(table_name) -> tf.Tensor:
+        """
+        Get restore vector which is calculated after the second all2all
+        :param table_name: embedding table_name
+        :return: the restore vector calculated after the second all2all
+        """
+        channel_id = 0
+        logger.debug('Channel %s_restore_second_%s was built for getnext',
+                     table_name, channel_id)
+        with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
+            restore_vector_second = npu_ops.gen_npu_ops.get_next(
+                output_types=[tf.int32],
+                output_shapes=[[None]],
+                channel_name=f'{table_name}_restore_second_{channel_id}')[0]
+        return restore_vector_second
+
+    def get_unique_keys(table_name, is_expansion) -> tf.Tensor:
+        """
+        Get the global unique keys which is calculated after the second all2all
+        :param table_name: embedding table_name
+        :param is_expansion: use dynamic expansion
+        :return: the global unique keys calculated after the second all2all
+        """
+        channel_id = 0
+        logger.debug('Channel %s_uniquekeys_%s was built for getnext', table_name, channel_id)
+        with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
+            if is_expansion:
+                unique_keys = npu_ops.gen_npu_ops.get_next(
+                    output_types=[tf.int64],
+                    output_shapes=[[None]],
+                    channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
+                return unique_keys
+
+            unique_keys = npu_ops.gen_npu_ops.get_next(
+                output_types=[tf.int32],
+                output_shapes=[[None]],
+                channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
+            return unique_keys
+
+    def sum_same_id_gradients(self, grad, var, is_expansion):
+        table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
+        table_name = table_instance.table_name
+        with tf.compat.v1.variable_scope("restore_vector_second"):
+            restore_vector_second = self.get_restore_vector_second(table_name)
+
+        with tf.compat.v1.variable_scope("unique_keys"):
+            unique_keys = self.get_unique_keys(table_name, is_expansion)
+
+        unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
+                                                              restore_vector_second,
+                                                              array_ops.shape(unique_keys)[0])
+        if is_expansion:
+            unique_local_grad = ops.IndexedSlices(values=unique_local_grad,
+                                                  indices=unique_keys,
+                                                  dense_shape=tf.shape(var))
+        return unique_local_grad, unique_keys
+
 
 def custom_update_op(self, opt, grad):
     if isinstance(grad, ops.Tensor):
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 1ed68556..70549702 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -156,6 +156,8 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
             lambda x, i, v: tf.compat.v1.scatter_nd_add(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_add):
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad, var=var, is_expansion=False)
+
         power_b1, power_b2 = self._get_beta_accumulators()
         power_b1 = math_ops.cast(power_b1, var.dtype.base_dtype)
         power_b2 = math_ops.cast(power_b2, var.dtype.base_dtype)
@@ -166,17 +168,17 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         temp_epsilon = temp.get("temp_epsilon")
         learning_rate = tf.divide(temp_lr * math_ops.sqrt(1 - power_b2), (1 - power_b1))
 
-        abs_indices = tf.math.maximum(indices, 0)
-        nd_indices = tf.expand_dims(indices, 1)
+        abs_indices = tf.math.maximum(unique_keys, 0)
+        nd_indices = tf.expand_dims(unique_keys, 1)
 
         momentum = self.get_slot(var, "m")
         old_m_slice = tf.gather(momentum, abs_indices)
-        m_t_slice = temp_b1 * old_m_slice + (1 - temp_b1) * grad
+        m_t_slice = temp_b1 * old_m_slice + (1 - temp_b1) * unique_local_grad
         m_update_op = scatter_nd_add(momentum, nd_indices, m_t_slice - old_m_slice)
 
         velocity = self.get_slot(var, "v")
         old_v_slice = tf.gather(velocity, abs_indices)
-        v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(grad)
+        v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(unique_local_grad)
         v_update_op = scatter_nd_add(velocity, nd_indices, v_t_slice - old_v_slice)
 
         denominator_slice = math_ops.sqrt(v_t_slice) + temp_epsilon
-- 
Gitee


From f9de15aae3ba106aa216729f37505220d584aa96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 8 Apr 2024 16:07:10 +0800
Subject: [PATCH 016/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-lazyAdam=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/emb/dynamic_sparse_embedding.py | 1 -
 mx_rec/optimizers/base.py                   | 4 ++--
 mx_rec/optimizers/lazy_adam_by_addr.py      | 2 ++
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index c2e8d9e5..671c593e 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -70,7 +70,6 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
 
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB, local_embeddings)
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get("id_offsets"))
-        # tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS, result.get("unique_keys"))
         return sparse_forward_fn(local_embeddings)
 
 
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index b4115bce..77067b58 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -58,7 +58,7 @@ class CustomizedOptimizer:
         self.unique_name = name + "_" + str(count)
         self.base_name = name
 
-    def get_restore_vector_second(table_name) -> tf.Tensor:
+    def get_restore_vector_second(self, table_name: str) -> tf.Tensor:
         """
         Get restore vector which is calculated after the second all2all
         :param table_name: embedding table_name
@@ -74,7 +74,7 @@ class CustomizedOptimizer:
                 channel_name=f'{table_name}_restore_second_{channel_id}')[0]
         return restore_vector_second
 
-    def get_unique_keys(table_name, is_expansion) -> tf.Tensor:
+    def get_unique_keys(self, table_name: str, is_expansion: bool) -> tf.Tensor:
         """
         Get the global unique keys which is calculated after the second all2all
         :param table_name: embedding table_name
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index e147c7bf..ad9d6ca8 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -119,6 +119,8 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
             addr)
 
     def _apply_sparse_shared(self, grad, addr):
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad, var=addr, is_expansion=True)
+
         power_b1, power_b2 = self._get_beta_accumulators()
         power_b1 = math_ops.cast(power_b1, grad.dtype.base_dtype)
         power_b2 = math_ops.cast(power_b2, grad.dtype.base_dtype)
-- 
Gitee


From 278118aa8ec96d788f090d656172fa68aeaa86f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 8 Apr 2024 16:07:10 +0800
Subject: [PATCH 017/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-lazyAdam=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/emb/dynamic_sparse_embedding.py |  1 -
 mx_rec/optimizers/base.py                   | 14 +++++++++-----
 mx_rec/optimizers/lazy_adam_by_addr.py      | 18 ++++++++++--------
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index c2e8d9e5..671c593e 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -70,7 +70,6 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
 
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB, local_embeddings)
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get("id_offsets"))
-        # tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS, result.get("unique_keys"))
         return sparse_forward_fn(local_embeddings)
 
 
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index b4115bce..91c72d52 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -58,7 +58,7 @@ class CustomizedOptimizer:
         self.unique_name = name + "_" + str(count)
         self.base_name = name
 
-    def get_restore_vector_second(table_name) -> tf.Tensor:
+    def get_restore_vector_second(self, table_name: str) -> tf.Tensor:
         """
         Get restore vector which is calculated after the second all2all
         :param table_name: embedding table_name
@@ -74,7 +74,7 @@ class CustomizedOptimizer:
                 channel_name=f'{table_name}_restore_second_{channel_id}')[0]
         return restore_vector_second
 
-    def get_unique_keys(table_name, is_expansion) -> tf.Tensor:
+    def get_unique_keys(self, table_name: str, is_expansion: bool) -> tf.Tensor:
         """
         Get the global unique keys which is calculated after the second all2all
         :param table_name: embedding table_name
@@ -98,8 +98,12 @@ class CustomizedOptimizer:
             return unique_keys
 
     def sum_same_id_gradients(self, grad, var, is_expansion):
-        table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
-        table_name = table_instance.table_name
+        if isinstance(var, ops.Tensor):
+            # 扩容模式从scope获取表名
+            table_name = var.op.name.split('/')[0]
+        else:
+            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
+            table_name = table_instance.table_name
         with tf.compat.v1.variable_scope("restore_vector_second"):
             restore_vector_second = self.get_restore_vector_second(table_name)
 
@@ -109,7 +113,7 @@ class CustomizedOptimizer:
         unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
                                                               restore_vector_second,
                                                               array_ops.shape(unique_keys)[0])
-        if is_expansion:
+        if not is_expansion:
             unique_local_grad = ops.IndexedSlices(values=unique_local_grad,
                                                   indices=unique_keys,
                                                   dense_shape=tf.shape(var))
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index e147c7bf..0f7d7139 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -119,10 +119,12 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
             addr)
 
     def _apply_sparse_shared(self, grad, addr):
+        unique_local_grad, unique_addr = self.sum_same_id_gradients(grad=grad, var=addr, is_expansion=True)
+
         power_b1, power_b2 = self._get_beta_accumulators()
-        power_b1 = math_ops.cast(power_b1, grad.dtype.base_dtype)
-        power_b2 = math_ops.cast(power_b2, grad.dtype.base_dtype)
-        temp = self._cast_to_base_type(grad)
+        power_b1 = math_ops.cast(power_b1, unique_local_grad.dtype.base_dtype)
+        power_b2 = math_ops.cast(power_b2, unique_local_grad.dtype.base_dtype)
+        temp = self._cast_to_base_type(unique_local_grad)
         temp_lr = temp.get("temp_lr")
         temp_b1 = temp.get("temp_b1")
         temp_b2 = temp.get("temp_b2")
@@ -130,23 +132,23 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         learning_rate = tf.divide(temp_lr * math_ops.sqrt(1 - power_b2), (1 - power_b1))
 
         host_pipeline_ops = import_host_pipeline_ops()
-        dim = grad.shape.as_list()[-1]
+        dim = unique_local_grad.shape.as_list()[-1]
         combined_tensor = \
-            host_pipeline_ops.embedding_lookup_by_address(addr, embedding_dim=3 * dim, embedding_type=1)
+            host_pipeline_ops.embedding_lookup_by_address(unique_addr, embedding_dim=3 * dim, embedding_type=1)
 
         split_length = [dim] + [dim] + [dim]
         split_tensors = tf.split(combined_tensor, split_length, axis=1)
 
         old_m_slice = split_tensors[1]
-        m_t_slice = temp_b1 * old_m_slice + (1 - temp_b1) * grad
+        m_t_slice = temp_b1 * old_m_slice + (1 - temp_b1) * unique_local_grad
 
         old_v_slice = split_tensors[2]
-        v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(grad)
+        v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(unique_local_grad)
 
         denominator_slice = math_ops.sqrt(v_t_slice) + temp_epsilon
         update_list = [tf.divide(-learning_rate * m_t_slice, denominator_slice)] + [m_t_slice - old_m_slice] + \
                       [v_t_slice - old_v_slice]
         update_tensor = tf.concat(update_list, axis=1)
-        var_update_op = host_pipeline_ops.embedding_update_by_address(addr, update_tensor, update_type=0)
+        var_update_op = host_pipeline_ops.embedding_update_by_address(unique_addr, update_tensor, update_type=0)
 
         return var_update_op
-- 
Gitee


From b85136384f035fcea249b4452d9a119cc633c253 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 8 Apr 2024 19:26:38 +0800
Subject: [PATCH 018/302] =?UTF-8?q?mxrec=E6=9E=84=E5=BB=BA=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7=20setup.py=20bdis?=
 =?UTF-8?q?t=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...ld_tf1_with_opensource.sh => build_tf1.sh} | 47 ++-------
 ...ld_tf2_with_opensource.sh => build_tf2.sh} | 47 ++-------
 build/{build.sh => gen_mxrec_tar_pkg.sh}      | 96 +++++--------------
 build/gen_tf1_tar_pkg.sh                      | 33 +++++++
 build/gen_tf2_tar_pkg.sh                      | 33 +++++++
 setup.py                                      | 83 +++++-----------
 setup_tf1.py                                  | 88 +++++++++++++++++
 setup_tf2.py                                  | 88 +++++++++++++++++
 8 files changed, 304 insertions(+), 211 deletions(-)
 rename build/{build_tf1_with_opensource.sh => build_tf1.sh} (75%)
 rename build/{build_tf2_with_opensource.sh => build_tf2.sh} (75%)
 rename build/{build.sh => gen_mxrec_tar_pkg.sh} (44%)
 create mode 100644 build/gen_tf1_tar_pkg.sh
 create mode 100644 build/gen_tf2_tar_pkg.sh
 create mode 100644 setup_tf1.py
 create mode 100644 setup_tf2.py

diff --git a/build/build_tf1_with_opensource.sh b/build/build_tf1.sh
similarity index 75%
rename from build/build_tf1_with_opensource.sh
rename to build/build_tf1.sh
index ff59571c..fe2a78be 100644
--- a/build/build_tf1_with_opensource.sh
+++ b/build/build_tf1.sh
@@ -15,7 +15,7 @@
 # ==============================================================================
 
 ##################################################################
-#   build_tf1_with_opensource.sh 编译MxRec和动态扩容算子
+#   build_tf1.sh 编译MxRec和动态扩容算子
 # 编译环境：Python3.7.5 GCC 7.3.0 CMake 3.20.6
 # 代码主要分为四部分：
 # 1、准备编译MxRec所需依赖：pybind11(v2.10.3) securec
@@ -64,33 +64,6 @@ source /opt/buildtools/tf1_env/bin/activate
 tf1_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow_core
 deactivate tf1_env
 
-project_output_path="${MxRec_DIR}"/output/
-VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
-
-function get_version() {
-  if [ -f "$VERSION_FILE" ]; then
-    VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
-    if [[ "$VERSION" == *.[b/B]* ]] && [[ "$VERSION" != *.[RC/rc]* ]]; then
-      VERSION=${VERSION%.*}
-    fi
-  else
-    VERSION="5.0.0"
-  fi
-}
-
-rm -rf  "${project_output_path}"
-rm -rf  "${SCRIPT_DIR}/lib"
-
-# 获取MxRec版本信息
-get_version
-export VERSION
-echo "MindX SDK MxRec: ${VERSION}" >> ./version.info
-
-pkg_dir=mindxsdk-mxrec
-rm -rf "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
 # 配置MxRec C++代码路径和AccCTR路径
 src_path="${MxRec_DIR}"/src
 acc_ctr_path="${MxRec_DIR}"/src/AccCTR
@@ -142,11 +115,6 @@ function gen_wheel_file()
   touch "${src_path}"/libasc/__init__.py
   rm -rf "${MxRec_DIR}"/mx_rec/libasc
   mv "${src_path}"/libasc "${MxRec_DIR}"/mx_rec
-  python3.7 setup.py bdist_wheel --plat-name=linux_$(arch)
-  mkdir -p "$1"
-  echo "moving whl file $1"
-  mv dist/mx_rec*.whl "$1"
-  rm -rf "${MxRec_DIR}"/mx_rec/libasc
 }
 
 # start to build MxRec
@@ -158,13 +126,12 @@ echo "----------------          compile MxRec so files        ----------------"
 compile_so_file "${tf1_path}"
 echo "---------------- collect so files and mv them to libasc ----------------"
 collect_so_file
-echo "----------------      generate MxRec wheel package      ----------------"
-gen_wheel_file  "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
+gen_wheel_file
 echo "----------------        compile MxRec success!!!!       ----------------"
 
 # start to compile cust op
-echo "----------------        start to compile cust op        ----------------"
-cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
-chmod u+x run.sh
-./run.sh
-echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
+#echo "----------------        start to compile cust op        ----------------"
+#cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
+#chmod u+x run.sh
+#./run.sh
+#echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
diff --git a/build/build_tf2_with_opensource.sh b/build/build_tf2.sh
similarity index 75%
rename from build/build_tf2_with_opensource.sh
rename to build/build_tf2.sh
index 08aaf164..50a6c5a0 100644
--- a/build/build_tf2_with_opensource.sh
+++ b/build/build_tf2.sh
@@ -15,7 +15,7 @@
 # ==============================================================================
 
 ##################################################################
-#   build_tf2_with_opensource.sh 编译MxRec和动态扩容算子
+#   build_tf2.sh 编译MxRec和动态扩容算子
 # 编译环境：Python3.7.5 GCC 7.3.0 CMake 3.20.6
 # 代码主要分为四部分：
 # 1、准备编译MxRec所需依赖：pybind11(v2.10.3) securec
@@ -64,33 +64,6 @@ source /opt/buildtools/tf2_env/bin/activate
 tf2_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow
 deactivate tf2_env
 
-project_output_path="${MxRec_DIR}"/output/
-VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
-
-function get_version() {
-  if [ -f "$VERSION_FILE" ]; then
-    VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
-    if [[ "$VERSION" == *.[b/B]* ]] && [[ "$VERSION" != *.[RC/rc]* ]]; then
-      VERSION=${VERSION%.*}
-    fi
-  else
-    VERSION="5.0.0"
-  fi
-}
-
-rm -rf  "${project_output_path}"
-rm -rf  "${SCRIPT_DIR}/lib"
-
-# 获取MxRec版本信息
-get_version
-export VERSION
-echo "MindX SDK MxRec: ${VERSION}" >> ./version.info
-
-pkg_dir=mindxsdk-mxrec
-rm -rf "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
 # 配置MxRec C++代码路径和AccCTR路径
 src_path="${MxRec_DIR}"/src
 acc_ctr_path="${MxRec_DIR}"/src/AccCTR
@@ -142,11 +115,6 @@ function gen_wheel_file()
   touch "${src_path}"/libasc/__init__.py
   rm -rf "${MxRec_DIR}"/mx_rec/libasc
   mv "${src_path}"/libasc "${MxRec_DIR}"/mx_rec
-  python3.7 setup.py bdist_wheel --plat-name=linux_$(arch)
-  mkdir -p "$1"
-  echo "moving whl file $1"
-  mv dist/mx_rec*.whl "$1"
-  rm -rf "${MxRec_DIR}"/mx_rec/libasc
 }
 
 # start to build MxRec
@@ -158,13 +126,12 @@ echo "----------------          compile MxRec so files        ----------------"
 compile_so_file "${tf2_path}"
 echo "---------------- collect so files and mv them to libasc ----------------"
 collect_so_file
-echo "----------------      generate MxRec wheel package      ----------------"
-gen_wheel_file  "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
+gen_wheel_file
 echo "----------------        compile MxRec success!!!!       ----------------"
 
 # start to compile cust op
-echo "----------------        start to compile cust op        ----------------"
-cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
-chmod u+x run.sh
-./run.sh
-echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
+#echo "----------------        start to compile cust op        ----------------"
+#cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
+#chmod u+x run.sh
+#./run.sh
+#echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
diff --git a/build/build.sh b/build/gen_mxrec_tar_pkg.sh
similarity index 44%
rename from build/build.sh
rename to build/gen_mxrec_tar_pkg.sh
index 0eb688fd..2a53285f 100644
--- a/build/build.sh
+++ b/build/gen_mxrec_tar_pkg.sh
@@ -18,11 +18,9 @@ set -e
 warn() { echo >&2 -e "\033[1;31m[WARN ][Depend  ] $1\033[1;37m" ; }
 ARCH="$(uname -m)"
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-ROOT_DIR=$(dirname "${SCRIPT_DIR}")
-cd "$SCRIPT_DIR"
+MxRec_DIR=$(dirname "${SCRIPT_DIR}")
 
-
-VERSION_FILE="${ROOT_DIR}"/../mindxsdk/build/conf/config.yaml
+VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
 get_version() {
   if [ -f "$VERSION_FILE" ]; then
     VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
@@ -30,96 +28,54 @@ get_version() {
       VERSION=${VERSION%.*}
     fi
   else
-    VERSION="5.0.0"
-  fi
-}
-
-remove()
-{
-  if [ -d "$1" ]; then
-    rm -rf "$1"
-  elif [ -f "$1" ]; then
-    rm -f "$1"
+    VERSION="6.0.RC2"
   fi
 }
 
-project_output_path="${ROOT_DIR}"/output/
-remove "${project_output_path}"
-remove "${SCRIPT_DIR}/lib"
 get_version
-export VERSION
 echo "MindX SDK mxrec: ${VERSION}" >> ./version.info
 
 pkg_dir=mindxsdk-mxrec
-remove "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
-src_path="${ROOT_DIR}"/src
-cd "${ROOT_DIR}"
-
 release_tar=Ascend-"${pkg_dir}"_"${VERSION}"_linux-"${ARCH}".tar.gz
+mv version.info "${SCRIPT_DIR}"/"${pkg_dir}"
 
-gen_tar_file()
+function gen_tar_file()
 {
-  cd "${src_path}"
-  cp -r "${src_path}"/../cust_op ../build/"${pkg_dir}"
-  cp -r "${src_path}"/../examples  ../build/"${pkg_dir}"
+  cd "${MxRec_DIR}"
+  cp -r ./cust_op ./build/"${pkg_dir}"
+  cp -r ./examples  ./build/"${pkg_dir}"
   # change dirs and files 's permission
-  chmod 550 ../build/"${pkg_dir}"/tf1_whl
-  chmod 550 ../build/"${pkg_dir}"/tf1_whl/mx_rec*.whl
-  chmod 550 ../build/"${pkg_dir}"/tf2_whl
-  chmod 550 ../build/"${pkg_dir}"/tf2_whl/mx_rec*.whl
-  chmod 550 ../build/"${pkg_dir}"/cust_op/
-  chmod 550 ../build/"${pkg_dir}"/cust_op/cust_op_by_addr
-  cd ../build/"${pkg_dir}"/cust_op/cust_op_by_addr
+  chmod 550 ./build/"${pkg_dir}"/tf1_whl
+  chmod 550 ./build/"${pkg_dir}"/tf1_whl/mx_rec*.whl
+  chmod 550 ./build/"${pkg_dir}"/tf2_whl
+  chmod 550 ./build/"${pkg_dir}"/tf2_whl/mx_rec*.whl
+  chmod 550 ./build/"${pkg_dir}"/cust_op/
+  chmod 550 ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
+  cd ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
   chmod 550 *.sh
   chmod 640 *.json
   chmod 550 op_host op_kernel op_host/* op_kernel/*
   cd -
-  cd ../build
+  cd ./build
   tar -zvcf "${release_tar}" "${pkg_dir}" || {
       warn "compression failed, packages might be broken"
   }
 
-  mv "${release_tar}" "${SCRIPT_DIR}"/../output/
+  mv "${release_tar}" ../output/
 
 }
 
-clean()
+function clean()
 {
-  remove "${ROOT_DIR}"/dist
-  remove "${ROOT_DIR}"/install
-  remove "${ROOT_DIR}"/mx_rec.egg-info
-  remove "${ROOT_DIR}"/src/build
-  remove "${ROOT_DIR}"/build/bdist.linux-"$(arch)"
-  remove "${ROOT_DIR}"/build/tf2_env
-  remove "${ROOT_DIR}"/build/tf1_env
-  remove "${ROOT_DIR}"/build/lib
-  remove "${ROOT_DIR}"/build/mindxsdk-mxrec
+  rm -rf "${MxRec_DIR}"/dist
+  rm -rf "${MxRec_DIR}"/mx_rec.egg-info
+  rm -rf "${MxRec_DIR}"/src/build
+  rm -rf "${MxRec_DIR}"/mx_rec/libasc
+  rm -rf "${MxRec_DIR}"/build/lib
+  rm -rf "${MxRec_DIR}"/build/bdist.linux-${ARCH}
 }
 
+gen_tar_file
 
-if [ "$(uname -m)" = "x86_64" ]
-then
-  echo "-----Build gen tar -----"
-  bash ${ROOT_DIR}/build/build_tf1_with_opensource.sh
-  bash ${ROOT_DIR}/build/build_tf2_with_opensource.sh
-  gen_tar_file
-  echo "-----Build gen tar finished-----"
-
-  # clean
-  echo "-----Done-----"
-fi
-
-if [ "$(uname -m)" = "aarch64" ]
-then
-  echo "-----Build gen tar -----"
-  bash ${ROOT_DIR}/build/build_tf1_with_opensource.sh
-  bash ${ROOT_DIR}/build/build_tf2_with_opensource.sh
-  gen_tar_file
-  echo "-----Build gen tar finished-----"
+clean
 
-  # clean
-  echo "-----Done-----"
-fi
\ No newline at end of file
diff --git a/build/gen_tf1_tar_pkg.sh b/build/gen_tf1_tar_pkg.sh
new file mode 100644
index 00000000..0464597b
--- /dev/null
+++ b/build/gen_tf1_tar_pkg.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+warn() { echo >&2 -e "\033[1;31m[WARN ][Depend  ] $1\033[1;37m" ; }
+ARCH="$(uname -m)"
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+MxRec_DIR=$(dirname "${SCRIPT_DIR}")
+pkg_dir=mindxsdk-mxrec
+
+function move_whl_pkg() {
+    mkdir -p "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
+    mv ${MxRec_DIR}/dist/mx_rec*.whl "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
+    cd "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
+    whl_file=$(ls .)
+    mv "$whl_file" "${whl_file/any/linux_${ARCH}}"
+    cd -
+}
+
+move_whl_pkg
\ No newline at end of file
diff --git a/build/gen_tf2_tar_pkg.sh b/build/gen_tf2_tar_pkg.sh
new file mode 100644
index 00000000..e9d71f48
--- /dev/null
+++ b/build/gen_tf2_tar_pkg.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+warn() { echo >&2 -e "\033[1;31m[WARN ][Depend  ] $1\033[1;37m" ; }
+ARCH="$(uname -m)"
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+MxRec_DIR=$(dirname "${SCRIPT_DIR}")
+pkg_dir=mindxsdk-mxrec
+
+function move_whl_pkg() {
+    mkdir -p "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
+    mv ${MxRec_DIR}/dist/mx_rec*.whl "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
+    cd "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
+    whl_file=$(ls .)
+    mv "$whl_file" "${whl_file/any/linux_${ARCH}}"
+    cd -
+}
+
+move_whl_pkg
\ No newline at end of file
diff --git a/setup.py b/setup.py
index efb4c994..ead4083f 100644
--- a/setup.py
+++ b/setup.py
@@ -16,64 +16,25 @@
 # ==============================================================================
 
 import os
-import stat
-from setuptools import setup, find_packages
-import pkg_resources
-from setuptools.extern.packaging import version as packaging_version
-
-
-# Patch Version class to preserve original version string
-class NoNormalizeVersion(packaging_version.Version):
-    def __init__(self, version):
-        self._orig_version = version
-        super().__init__(version)
-
-    def __str__(self):
-        return self._orig_version
-
-
-packaging_version.Version = NoNormalizeVersion
-# Patch safe_version() to prevent version normalization
-pkg_resources.safe_version = lambda v: v
-
-try:
-    with open("README.md") as file:
-        LONG_DESCRIPTION = file.read()
-except IOError:
-    LONG_DESCRIPTION = ""
-
-env_version = os.getenv("VERSION")
-VERSION = env_version if env_version is not None else '5.0.rc3'
-
-INIT_FILE = "mx_rec/__init__.py"
-with open(INIT_FILE, 'r') as file:
-    lines = file.readlines()
-
-for idx, line in enumerate(lines):
-    if "__version__ = " not in line:
-        continue
-    lines[idx] = f"__version__ = '{VERSION}'\n"
-    break
-
-FLAG = os.O_WRONLY | os.O_TRUNC
-MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
-with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
-    out.writelines(lines)
-
-setup(
-    name='mx_rec',
-    version=VERSION,
-    author='HUAWEI Inc',
-    description='MindX SDK Recommend',
-    long_description=LONG_DESCRIPTION,
-    # include mx_rec
-    packages=find_packages(
-        where='.',
-        include=["mx_rec*"]
-    ),
-    package_dir={},
-    # other file
-    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
-    # dependency
-    python_requires='>=3.7.5'
-)
+import shutil
+import subprocess
+
+# clean pkg_dir existed
+pkg_dir = "./build/mindxsdk-mxrec"
+if os.path.exists(pkg_dir):
+    shutil.rmtree(pkg_dir)
+
+# build tf1's wheel file
+res = subprocess.run(["python3.7", "setup_tf1.py", "bdist_wheel"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"build tf1's wheel file failed!")
+
+# build tf2's wheel file
+res = subprocess.run(["python3.7", "setup_tf2.py", "bdist_wheel"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"build tf2's wheel file failed!")
+
+# copy cust_op, examples files, etc. Then gen mxrec's tar pkg
+res = subprocess.run(["bash", "./build/gen_mxrec_tar_pkg.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"gen mxrec's tar pkg failed!")
diff --git a/setup_tf1.py b/setup_tf1.py
new file mode 100644
index 00000000..4ad4cf20
--- /dev/null
+++ b/setup_tf1.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import stat
+from setuptools import setup, find_packages
+import pkg_resources
+from setuptools.extern.packaging import version as packaging_version
+import subprocess
+
+
+# Patch Version class to preserve original version string
+class NoNormalizeVersion(packaging_version.Version):
+    def __init__(self, version):
+        self._orig_version = version
+        super().__init__(version)
+
+    def __str__(self):
+        return self._orig_version
+
+
+packaging_version.Version = NoNormalizeVersion
+# Patch safe_version() to prevent version normalization
+pkg_resources.safe_version = lambda v: v
+
+try:
+    with open("README.md") as file:
+        LONG_DESCRIPTION = file.read()
+except IOError:
+    LONG_DESCRIPTION = ""
+
+env_version = os.getenv("VERSION")
+VERSION = env_version if env_version is not None else '6.0.RC2'
+
+INIT_FILE = "mx_rec/__init__.py"
+with open(INIT_FILE, 'r') as file:
+    lines = file.readlines()
+
+for idx, line in enumerate(lines):
+    if "__version__ = " not in line:
+        continue
+    lines[idx] = f"__version__ = '{VERSION}'\n"
+    break
+
+FLAG = os.O_WRONLY | os.O_TRUNC
+MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
+with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
+    out.writelines(lines)
+
+# compile so files
+res = subprocess.run(["bash", f"./build/build_tf1.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError("compile so files failed!")
+
+setup(
+    name='mx_rec',
+    version=VERSION,
+    author='HUAWEI Inc',
+    description='MindX SDK Recommend',
+    long_description=LONG_DESCRIPTION,
+    # include mx_rec
+    packages=find_packages(
+        where='.',
+        include=["mx_rec*"]
+    ),
+    # other file
+    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
+    # dependency
+    python_requires='>=3.7.5'
+)
+
+res = subprocess.run(["bash", f"./build/gen_tf1_tar_pkg.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"gen tf1 tar pkg failed!")
diff --git a/setup_tf2.py b/setup_tf2.py
new file mode 100644
index 00000000..3bb52ffd
--- /dev/null
+++ b/setup_tf2.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import stat
+from setuptools import setup, find_packages
+import pkg_resources
+from setuptools.extern.packaging import version as packaging_version
+import subprocess
+
+
+# Patch Version class to preserve original version string
+class NoNormalizeVersion(packaging_version.Version):
+    def __init__(self, version):
+        self._orig_version = version
+        super().__init__(version)
+
+    def __str__(self):
+        return self._orig_version
+
+
+packaging_version.Version = NoNormalizeVersion
+# Patch safe_version() to prevent version normalization
+pkg_resources.safe_version = lambda v: v
+
+try:
+    with open("README.md") as file:
+        LONG_DESCRIPTION = file.read()
+except IOError:
+    LONG_DESCRIPTION = ""
+
+env_version = os.getenv("VERSION")
+VERSION = env_version if env_version is not None else '6.0.RC2'
+
+INIT_FILE = "mx_rec/__init__.py"
+with open(INIT_FILE, 'r') as file:
+    lines = file.readlines()
+
+for idx, line in enumerate(lines):
+    if "__version__ = " not in line:
+        continue
+    lines[idx] = f"__version__ = '{VERSION}'\n"
+    break
+
+FLAG = os.O_WRONLY | os.O_TRUNC
+MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
+with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
+    out.writelines(lines)
+
+# compile so files
+res = subprocess.run(["bash", f"./build/build_tf2.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError("compile so files failed!")
+
+setup(
+    name='mx_rec',
+    version=VERSION,
+    author='HUAWEI Inc',
+    description='MindX SDK Recommend',
+    long_description=LONG_DESCRIPTION,
+    # include mx_rec
+    packages=find_packages(
+        where='.',
+        include=["mx_rec*"]
+    ),
+    # other file
+    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
+    # dependency
+    python_requires='>=3.7.5'
+)
+
+res = subprocess.run(["bash", f"./build/gen_tf2_tar_pkg.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"gen tf2 tar pkg failed!")
-- 
Gitee


From 3e47771661fe9eccf47d2d01fad6d4b4364cea3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 8 Apr 2024 19:26:38 +0800
Subject: [PATCH 019/302] =?UTF-8?q?mxrec=E6=9E=84=E5=BB=BA=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7=20setup.py=20bdis?=
 =?UTF-8?q?t=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...ld_tf1_with_opensource.sh => build_tf1.sh} |  44 +-------
 ...ld_tf2_with_opensource.sh => build_tf2.sh} |  44 +-------
 build/{build.sh => gen_mxrec_tar_pkg.sh}      | 102 ++++++------------
 build/gen_tf1_tar_pkg.sh                      |  33 ++++++
 build/gen_tf2_tar_pkg.sh                      |  33 ++++++
 setup.py                                      |  83 ++++----------
 setup_tf1.py                                  |  88 +++++++++++++++
 setup_tf2.py                                  |  88 +++++++++++++++
 8 files changed, 300 insertions(+), 215 deletions(-)
 rename build/{build_tf1_with_opensource.sh => build_tf1.sh} (75%)
 rename build/{build_tf2_with_opensource.sh => build_tf2.sh} (75%)
 rename build/{build.sh => gen_mxrec_tar_pkg.sh} (44%)
 create mode 100644 build/gen_tf1_tar_pkg.sh
 create mode 100644 build/gen_tf2_tar_pkg.sh
 create mode 100644 setup_tf1.py
 create mode 100644 setup_tf2.py

diff --git a/build/build_tf1_with_opensource.sh b/build/build_tf1.sh
similarity index 75%
rename from build/build_tf1_with_opensource.sh
rename to build/build_tf1.sh
index ff59571c..f59c13fa 100644
--- a/build/build_tf1_with_opensource.sh
+++ b/build/build_tf1.sh
@@ -15,7 +15,7 @@
 # ==============================================================================
 
 ##################################################################
-#   build_tf1_with_opensource.sh 编译MxRec和动态扩容算子
+#   build_tf1.sh 编译MxRec和动态扩容算子
 # 编译环境：Python3.7.5 GCC 7.3.0 CMake 3.20.6
 # 代码主要分为四部分：
 # 1、准备编译MxRec所需依赖：pybind11(v2.10.3) securec
@@ -64,33 +64,6 @@ source /opt/buildtools/tf1_env/bin/activate
 tf1_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow_core
 deactivate tf1_env
 
-project_output_path="${MxRec_DIR}"/output/
-VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
-
-function get_version() {
-  if [ -f "$VERSION_FILE" ]; then
-    VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
-    if [[ "$VERSION" == *.[b/B]* ]] && [[ "$VERSION" != *.[RC/rc]* ]]; then
-      VERSION=${VERSION%.*}
-    fi
-  else
-    VERSION="5.0.0"
-  fi
-}
-
-rm -rf  "${project_output_path}"
-rm -rf  "${SCRIPT_DIR}/lib"
-
-# 获取MxRec版本信息
-get_version
-export VERSION
-echo "MindX SDK MxRec: ${VERSION}" >> ./version.info
-
-pkg_dir=mindxsdk-mxrec
-rm -rf "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
 # 配置MxRec C++代码路径和AccCTR路径
 src_path="${MxRec_DIR}"/src
 acc_ctr_path="${MxRec_DIR}"/src/AccCTR
@@ -142,11 +115,6 @@ function gen_wheel_file()
   touch "${src_path}"/libasc/__init__.py
   rm -rf "${MxRec_DIR}"/mx_rec/libasc
   mv "${src_path}"/libasc "${MxRec_DIR}"/mx_rec
-  python3.7 setup.py bdist_wheel --plat-name=linux_$(arch)
-  mkdir -p "$1"
-  echo "moving whl file $1"
-  mv dist/mx_rec*.whl "$1"
-  rm -rf "${MxRec_DIR}"/mx_rec/libasc
 }
 
 # start to build MxRec
@@ -158,13 +126,5 @@ echo "----------------          compile MxRec so files        ----------------"
 compile_so_file "${tf1_path}"
 echo "---------------- collect so files and mv them to libasc ----------------"
 collect_so_file
-echo "----------------      generate MxRec wheel package      ----------------"
-gen_wheel_file  "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
+gen_wheel_file
 echo "----------------        compile MxRec success!!!!       ----------------"
-
-# start to compile cust op
-echo "----------------        start to compile cust op        ----------------"
-cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
-chmod u+x run.sh
-./run.sh
-echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
diff --git a/build/build_tf2_with_opensource.sh b/build/build_tf2.sh
similarity index 75%
rename from build/build_tf2_with_opensource.sh
rename to build/build_tf2.sh
index 08aaf164..70acb99f 100644
--- a/build/build_tf2_with_opensource.sh
+++ b/build/build_tf2.sh
@@ -15,7 +15,7 @@
 # ==============================================================================
 
 ##################################################################
-#   build_tf2_with_opensource.sh 编译MxRec和动态扩容算子
+#   build_tf2.sh 编译MxRec和动态扩容算子
 # 编译环境：Python3.7.5 GCC 7.3.0 CMake 3.20.6
 # 代码主要分为四部分：
 # 1、准备编译MxRec所需依赖：pybind11(v2.10.3) securec
@@ -64,33 +64,6 @@ source /opt/buildtools/tf2_env/bin/activate
 tf2_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow
 deactivate tf2_env
 
-project_output_path="${MxRec_DIR}"/output/
-VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
-
-function get_version() {
-  if [ -f "$VERSION_FILE" ]; then
-    VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
-    if [[ "$VERSION" == *.[b/B]* ]] && [[ "$VERSION" != *.[RC/rc]* ]]; then
-      VERSION=${VERSION%.*}
-    fi
-  else
-    VERSION="5.0.0"
-  fi
-}
-
-rm -rf  "${project_output_path}"
-rm -rf  "${SCRIPT_DIR}/lib"
-
-# 获取MxRec版本信息
-get_version
-export VERSION
-echo "MindX SDK MxRec: ${VERSION}" >> ./version.info
-
-pkg_dir=mindxsdk-mxrec
-rm -rf "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
 # 配置MxRec C++代码路径和AccCTR路径
 src_path="${MxRec_DIR}"/src
 acc_ctr_path="${MxRec_DIR}"/src/AccCTR
@@ -142,11 +115,6 @@ function gen_wheel_file()
   touch "${src_path}"/libasc/__init__.py
   rm -rf "${MxRec_DIR}"/mx_rec/libasc
   mv "${src_path}"/libasc "${MxRec_DIR}"/mx_rec
-  python3.7 setup.py bdist_wheel --plat-name=linux_$(arch)
-  mkdir -p "$1"
-  echo "moving whl file $1"
-  mv dist/mx_rec*.whl "$1"
-  rm -rf "${MxRec_DIR}"/mx_rec/libasc
 }
 
 # start to build MxRec
@@ -158,13 +126,5 @@ echo "----------------          compile MxRec so files        ----------------"
 compile_so_file "${tf2_path}"
 echo "---------------- collect so files and mv them to libasc ----------------"
 collect_so_file
-echo "----------------      generate MxRec wheel package      ----------------"
-gen_wheel_file  "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
+gen_wheel_file
 echo "----------------        compile MxRec success!!!!       ----------------"
-
-# start to compile cust op
-echo "----------------        start to compile cust op        ----------------"
-cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
-chmod u+x run.sh
-./run.sh
-echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
diff --git a/build/build.sh b/build/gen_mxrec_tar_pkg.sh
similarity index 44%
rename from build/build.sh
rename to build/gen_mxrec_tar_pkg.sh
index 0eb688fd..72ccfe49 100644
--- a/build/build.sh
+++ b/build/gen_mxrec_tar_pkg.sh
@@ -18,11 +18,9 @@ set -e
 warn() { echo >&2 -e "\033[1;31m[WARN ][Depend  ] $1\033[1;37m" ; }
 ARCH="$(uname -m)"
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-ROOT_DIR=$(dirname "${SCRIPT_DIR}")
-cd "$SCRIPT_DIR"
+MxRec_DIR=$(dirname "${SCRIPT_DIR}")
 
-
-VERSION_FILE="${ROOT_DIR}"/../mindxsdk/build/conf/config.yaml
+VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
 get_version() {
   if [ -f "$VERSION_FILE" ]; then
     VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
@@ -30,96 +28,60 @@ get_version() {
       VERSION=${VERSION%.*}
     fi
   else
-    VERSION="5.0.0"
-  fi
-}
-
-remove()
-{
-  if [ -d "$1" ]; then
-    rm -rf "$1"
-  elif [ -f "$1" ]; then
-    rm -f "$1"
+    VERSION="6.0.RC2"
   fi
 }
 
-project_output_path="${ROOT_DIR}"/output/
-remove "${project_output_path}"
-remove "${SCRIPT_DIR}/lib"
 get_version
-export VERSION
 echo "MindX SDK mxrec: ${VERSION}" >> ./version.info
 
 pkg_dir=mindxsdk-mxrec
-remove "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
-src_path="${ROOT_DIR}"/src
-cd "${ROOT_DIR}"
-
 release_tar=Ascend-"${pkg_dir}"_"${VERSION}"_linux-"${ARCH}".tar.gz
+mv version.info "${SCRIPT_DIR}"/"${pkg_dir}"
 
-gen_tar_file()
+function gen_tar_file()
 {
-  cd "${src_path}"
-  cp -r "${src_path}"/../cust_op ../build/"${pkg_dir}"
-  cp -r "${src_path}"/../examples  ../build/"${pkg_dir}"
+  cd "${MxRec_DIR}"
+  cp -r ./cust_op ./build/"${pkg_dir}"
+  cp -r ./examples  ./build/"${pkg_dir}"
   # change dirs and files 's permission
-  chmod 550 ../build/"${pkg_dir}"/tf1_whl
-  chmod 550 ../build/"${pkg_dir}"/tf1_whl/mx_rec*.whl
-  chmod 550 ../build/"${pkg_dir}"/tf2_whl
-  chmod 550 ../build/"${pkg_dir}"/tf2_whl/mx_rec*.whl
-  chmod 550 ../build/"${pkg_dir}"/cust_op/
-  chmod 550 ../build/"${pkg_dir}"/cust_op/cust_op_by_addr
-  cd ../build/"${pkg_dir}"/cust_op/cust_op_by_addr
+  chmod 550 ./build/"${pkg_dir}"/tf1_whl
+  chmod 550 ./build/"${pkg_dir}"/tf1_whl/mx_rec*.whl
+  chmod 550 ./build/"${pkg_dir}"/tf2_whl
+  chmod 550 ./build/"${pkg_dir}"/tf2_whl/mx_rec*.whl
+  chmod 550 ./build/"${pkg_dir}"/cust_op/
+  chmod 550 ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
+  cd ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
   chmod 550 *.sh
   chmod 640 *.json
   chmod 550 op_host op_kernel op_host/* op_kernel/*
   cd -
-  cd ../build
+  cd ./build
   tar -zvcf "${release_tar}" "${pkg_dir}" || {
       warn "compression failed, packages might be broken"
   }
 
-  mv "${release_tar}" "${SCRIPT_DIR}"/../output/
+  mv "${release_tar}" ../output/
 
 }
 
-clean()
+function clean()
 {
-  remove "${ROOT_DIR}"/dist
-  remove "${ROOT_DIR}"/install
-  remove "${ROOT_DIR}"/mx_rec.egg-info
-  remove "${ROOT_DIR}"/src/build
-  remove "${ROOT_DIR}"/build/bdist.linux-"$(arch)"
-  remove "${ROOT_DIR}"/build/tf2_env
-  remove "${ROOT_DIR}"/build/tf1_env
-  remove "${ROOT_DIR}"/build/lib
-  remove "${ROOT_DIR}"/build/mindxsdk-mxrec
+  rm -rf "${MxRec_DIR}"/dist
+  rm -rf "${MxRec_DIR}"/mx_rec.egg-info
+  rm -rf "${MxRec_DIR}"/src/build
+  rm -rf "${MxRec_DIR}"/mx_rec/libasc
+  rm -rf "${MxRec_DIR}"/build/lib
+  rm -rf "${MxRec_DIR}"/build/bdist.linux-${ARCH}
 }
 
+gen_tar_file
 
-if [ "$(uname -m)" = "x86_64" ]
-then
-  echo "-----Build gen tar -----"
-  bash ${ROOT_DIR}/build/build_tf1_with_opensource.sh
-  bash ${ROOT_DIR}/build/build_tf2_with_opensource.sh
-  gen_tar_file
-  echo "-----Build gen tar finished-----"
-
-  # clean
-  echo "-----Done-----"
-fi
-
-if [ "$(uname -m)" = "aarch64" ]
-then
-  echo "-----Build gen tar -----"
-  bash ${ROOT_DIR}/build/build_tf1_with_opensource.sh
-  bash ${ROOT_DIR}/build/build_tf2_with_opensource.sh
-  gen_tar_file
-  echo "-----Build gen tar finished-----"
+clean
 
-  # clean
-  echo "-----Done-----"
-fi
\ No newline at end of file
+# compile cust op
+echo "----------------        start to compile cust op        ----------------"
+cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
+chmod u+x run.sh
+./run.sh
+echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
diff --git a/build/gen_tf1_tar_pkg.sh b/build/gen_tf1_tar_pkg.sh
new file mode 100644
index 00000000..0464597b
--- /dev/null
+++ b/build/gen_tf1_tar_pkg.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+warn() { echo >&2 -e "\033[1;31m[WARN ][Depend  ] $1\033[1;37m" ; }
+ARCH="$(uname -m)"
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+MxRec_DIR=$(dirname "${SCRIPT_DIR}")
+pkg_dir=mindxsdk-mxrec
+
+function move_whl_pkg() {
+    mkdir -p "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
+    mv ${MxRec_DIR}/dist/mx_rec*.whl "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
+    cd "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
+    whl_file=$(ls .)
+    mv "$whl_file" "${whl_file/any/linux_${ARCH}}"
+    cd -
+}
+
+move_whl_pkg
\ No newline at end of file
diff --git a/build/gen_tf2_tar_pkg.sh b/build/gen_tf2_tar_pkg.sh
new file mode 100644
index 00000000..e9d71f48
--- /dev/null
+++ b/build/gen_tf2_tar_pkg.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+warn() { echo >&2 -e "\033[1;31m[WARN ][Depend  ] $1\033[1;37m" ; }
+ARCH="$(uname -m)"
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+MxRec_DIR=$(dirname "${SCRIPT_DIR}")
+pkg_dir=mindxsdk-mxrec
+
+function move_whl_pkg() {
+    mkdir -p "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
+    mv ${MxRec_DIR}/dist/mx_rec*.whl "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
+    cd "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
+    whl_file=$(ls .)
+    mv "$whl_file" "${whl_file/any/linux_${ARCH}}"
+    cd -
+}
+
+move_whl_pkg
\ No newline at end of file
diff --git a/setup.py b/setup.py
index efb4c994..ead4083f 100644
--- a/setup.py
+++ b/setup.py
@@ -16,64 +16,25 @@
 # ==============================================================================
 
 import os
-import stat
-from setuptools import setup, find_packages
-import pkg_resources
-from setuptools.extern.packaging import version as packaging_version
-
-
-# Patch Version class to preserve original version string
-class NoNormalizeVersion(packaging_version.Version):
-    def __init__(self, version):
-        self._orig_version = version
-        super().__init__(version)
-
-    def __str__(self):
-        return self._orig_version
-
-
-packaging_version.Version = NoNormalizeVersion
-# Patch safe_version() to prevent version normalization
-pkg_resources.safe_version = lambda v: v
-
-try:
-    with open("README.md") as file:
-        LONG_DESCRIPTION = file.read()
-except IOError:
-    LONG_DESCRIPTION = ""
-
-env_version = os.getenv("VERSION")
-VERSION = env_version if env_version is not None else '5.0.rc3'
-
-INIT_FILE = "mx_rec/__init__.py"
-with open(INIT_FILE, 'r') as file:
-    lines = file.readlines()
-
-for idx, line in enumerate(lines):
-    if "__version__ = " not in line:
-        continue
-    lines[idx] = f"__version__ = '{VERSION}'\n"
-    break
-
-FLAG = os.O_WRONLY | os.O_TRUNC
-MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
-with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
-    out.writelines(lines)
-
-setup(
-    name='mx_rec',
-    version=VERSION,
-    author='HUAWEI Inc',
-    description='MindX SDK Recommend',
-    long_description=LONG_DESCRIPTION,
-    # include mx_rec
-    packages=find_packages(
-        where='.',
-        include=["mx_rec*"]
-    ),
-    package_dir={},
-    # other file
-    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
-    # dependency
-    python_requires='>=3.7.5'
-)
+import shutil
+import subprocess
+
+# clean pkg_dir existed
+pkg_dir = "./build/mindxsdk-mxrec"
+if os.path.exists(pkg_dir):
+    shutil.rmtree(pkg_dir)
+
+# build tf1's wheel file
+res = subprocess.run(["python3.7", "setup_tf1.py", "bdist_wheel"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"build tf1's wheel file failed!")
+
+# build tf2's wheel file
+res = subprocess.run(["python3.7", "setup_tf2.py", "bdist_wheel"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"build tf2's wheel file failed!")
+
+# copy cust_op, examples files, etc. Then gen mxrec's tar pkg
+res = subprocess.run(["bash", "./build/gen_mxrec_tar_pkg.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"gen mxrec's tar pkg failed!")
diff --git a/setup_tf1.py b/setup_tf1.py
new file mode 100644
index 00000000..4ad4cf20
--- /dev/null
+++ b/setup_tf1.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import stat
+from setuptools import setup, find_packages
+import pkg_resources
+from setuptools.extern.packaging import version as packaging_version
+import subprocess
+
+
+# Patch Version class to preserve original version string
+class NoNormalizeVersion(packaging_version.Version):
+    def __init__(self, version):
+        self._orig_version = version
+        super().__init__(version)
+
+    def __str__(self):
+        return self._orig_version
+
+
+packaging_version.Version = NoNormalizeVersion
+# Patch safe_version() to prevent version normalization
+pkg_resources.safe_version = lambda v: v
+
+try:
+    with open("README.md") as file:
+        LONG_DESCRIPTION = file.read()
+except IOError:
+    LONG_DESCRIPTION = ""
+
+env_version = os.getenv("VERSION")
+VERSION = env_version if env_version is not None else '6.0.RC2'
+
+INIT_FILE = "mx_rec/__init__.py"
+with open(INIT_FILE, 'r') as file:
+    lines = file.readlines()
+
+for idx, line in enumerate(lines):
+    if "__version__ = " not in line:
+        continue
+    lines[idx] = f"__version__ = '{VERSION}'\n"
+    break
+
+FLAG = os.O_WRONLY | os.O_TRUNC
+MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
+with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
+    out.writelines(lines)
+
+# compile so files
+res = subprocess.run(["bash", f"./build/build_tf1.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError("compile so files failed!")
+
+setup(
+    name='mx_rec',
+    version=VERSION,
+    author='HUAWEI Inc',
+    description='MindX SDK Recommend',
+    long_description=LONG_DESCRIPTION,
+    # include mx_rec
+    packages=find_packages(
+        where='.',
+        include=["mx_rec*"]
+    ),
+    # other file
+    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
+    # dependency
+    python_requires='>=3.7.5'
+)
+
+res = subprocess.run(["bash", f"./build/gen_tf1_tar_pkg.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"gen tf1 tar pkg failed!")
diff --git a/setup_tf2.py b/setup_tf2.py
new file mode 100644
index 00000000..3bb52ffd
--- /dev/null
+++ b/setup_tf2.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import stat
+from setuptools import setup, find_packages
+import pkg_resources
+from setuptools.extern.packaging import version as packaging_version
+import subprocess
+
+
+# Patch Version class to preserve original version string
+class NoNormalizeVersion(packaging_version.Version):
+    def __init__(self, version):
+        self._orig_version = version
+        super().__init__(version)
+
+    def __str__(self):
+        return self._orig_version
+
+
+packaging_version.Version = NoNormalizeVersion
+# Patch safe_version() to prevent version normalization
+pkg_resources.safe_version = lambda v: v
+
+try:
+    with open("README.md") as file:
+        LONG_DESCRIPTION = file.read()
+except IOError:
+    LONG_DESCRIPTION = ""
+
+env_version = os.getenv("VERSION")
+VERSION = env_version if env_version is not None else '6.0.RC2'
+
+INIT_FILE = "mx_rec/__init__.py"
+with open(INIT_FILE, 'r') as file:
+    lines = file.readlines()
+
+for idx, line in enumerate(lines):
+    if "__version__ = " not in line:
+        continue
+    lines[idx] = f"__version__ = '{VERSION}'\n"
+    break
+
+FLAG = os.O_WRONLY | os.O_TRUNC
+MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
+with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
+    out.writelines(lines)
+
+# compile so files
+res = subprocess.run(["bash", f"./build/build_tf2.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError("compile so files failed!")
+
+setup(
+    name='mx_rec',
+    version=VERSION,
+    author='HUAWEI Inc',
+    description='MindX SDK Recommend',
+    long_description=LONG_DESCRIPTION,
+    # include mx_rec
+    packages=find_packages(
+        where='.',
+        include=["mx_rec*"]
+    ),
+    # other file
+    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
+    # dependency
+    python_requires='>=3.7.5'
+)
+
+res = subprocess.run(["bash", f"./build/gen_tf2_tar_pkg.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"gen tf2 tar pkg failed!")
-- 
Gitee


From 67ca37a888d4d5f059b4d8bfeaa51d10b332d060 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 8 Apr 2024 22:21:55 +0800
Subject: [PATCH 020/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-lazyAdam=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/optimizers/base.py              | 4 ----
 mx_rec/optimizers/lazy_adam_by_addr.py | 6 +++---
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index 91c72d52..c5c0e601 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -113,10 +113,6 @@ class CustomizedOptimizer:
         unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
                                                               restore_vector_second,
                                                               array_ops.shape(unique_keys)[0])
-        if not is_expansion:
-            unique_local_grad = ops.IndexedSlices(values=unique_local_grad,
-                                                  indices=unique_keys,
-                                                  dense_shape=tf.shape(var))
         return unique_local_grad, unique_keys
 
 
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index 0f7d7139..22b8af33 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -122,9 +122,9 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         unique_local_grad, unique_addr = self.sum_same_id_gradients(grad=grad, var=addr, is_expansion=True)
 
         power_b1, power_b2 = self._get_beta_accumulators()
-        power_b1 = math_ops.cast(power_b1, unique_local_grad.dtype.base_dtype)
-        power_b2 = math_ops.cast(power_b2, unique_local_grad.dtype.base_dtype)
-        temp = self._cast_to_base_type(unique_local_grad)
+        power_b1 = math_ops.cast(power_b1, grad.dtype.base_dtype)
+        power_b2 = math_ops.cast(power_b2, grad.dtype.base_dtype)
+        temp = self._cast_to_base_type(grad)
         temp_lr = temp.get("temp_lr")
         temp_b1 = temp.get("temp_b1")
         temp_b2 = temp.get("temp_b2")
-- 
Gitee


From 8f6ff1ba4fa6eb332ad1a2bbf60fb0f1a735176f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 8 Apr 2024 22:23:50 +0800
Subject: [PATCH 021/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-lazyAdam=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/key_process/key_process.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 8ab030a8..b72f3c8e 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -470,6 +470,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
 void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
 {
+    LOG_INFO(KEY_PROCESS "rank:{}, channel:{}, useSumSameIdGradients:{} ...",
+             rankInfo.rankId, channel, rankInfo.useSumSameIdGradients);
     if (rankInfo.useSumSameIdGradients && channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
-- 
Gitee


From 9c253c4d1a36b017190da6cec3be27b3d99786f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Tue, 9 Apr 2024 10:54:44 +0800
Subject: [PATCH 022/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-=E5=85=A8=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/optimizers/base.py              | 12 ++++++------
 mx_rec/optimizers/ftrl.py              | 10 ++++++----
 mx_rec/optimizers/lazy_adam.py         | 15 +++++++--------
 mx_rec/optimizers/lazy_adam_by_addr.py | 17 ++++++++---------
 4 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index c5c0e601..395e60eb 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -58,7 +58,7 @@ class CustomizedOptimizer:
         self.unique_name = name + "_" + str(count)
         self.base_name = name
 
-    def get_restore_vector_second(self, table_name: str) -> tf.Tensor:
+    def _get_restore_vector_second(self, table_name: str) -> tf.Tensor:
         """
         Get restore vector which is calculated after the second all2all
         :param table_name: embedding table_name
@@ -74,7 +74,7 @@ class CustomizedOptimizer:
                 channel_name=f'{table_name}_restore_second_{channel_id}')[0]
         return restore_vector_second
 
-    def get_unique_keys(self, table_name: str, is_expansion: bool) -> tf.Tensor:
+    def _get_unique_keys(self, table_name: str, is_expansion: bool) -> tf.Tensor:
         """
         Get the global unique keys which is calculated after the second all2all
         :param table_name: embedding table_name
@@ -99,16 +99,16 @@ class CustomizedOptimizer:
 
     def sum_same_id_gradients(self, grad, var, is_expansion):
         if isinstance(var, ops.Tensor):
-            # 扩容模式从scope获取表名
-            table_name = var.op.name.split('/')[0]
+            # 扩容模式从scope获取表名,偏移是-2
+            table_name = var.op.name.split('/')[-2]
         else:
             table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
             table_name = table_instance.table_name
         with tf.compat.v1.variable_scope("restore_vector_second"):
-            restore_vector_second = self.get_restore_vector_second(table_name)
+            restore_vector_second = self._get_restore_vector_second(table_name)
 
         with tf.compat.v1.variable_scope("unique_keys"):
-            unique_keys = self.get_unique_keys(table_name, is_expansion)
+            unique_keys = self._get_unique_keys(table_name, is_expansion)
 
         unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
                                                               restore_vector_second,
diff --git a/mx_rec/optimizers/ftrl.py b/mx_rec/optimizers/ftrl.py
index d6ddb093..3659ffcd 100644
--- a/mx_rec/optimizers/ftrl.py
+++ b/mx_rec/optimizers/ftrl.py
@@ -140,17 +140,19 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
                 self._resource_scatter_nd_update)
 
     def _apply_sparse(self, grad, var):
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
+
         if self._l2_shrinkage_regularization_strength <= 0.0:
             return self._apply_sparse_shared(
-                grad.values,
+                unique_local_grad,
                 var,
-                grad.indices,
+                unique_keys,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
         else:
             return self._apply_sparse_shared_v2(
-                grad.values,
+                unique_local_grad,
                 var,
-                grad.indices,
+                unique_keys,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_update):
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 70549702..bab8245f 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -149,15 +149,14 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
             self._resource_scatter_nd_add)
 
     def _apply_sparse(self, grad, var):
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
         return self._apply_sparse_shared(
-            grad.values,
+            unique_local_grad,
             var,
-            grad.indices,
+            unique_keys,
             lambda x, i, v: tf.compat.v1.scatter_nd_add(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_add):
-        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad, var=var, is_expansion=False)
-
         power_b1, power_b2 = self._get_beta_accumulators()
         power_b1 = math_ops.cast(power_b1, var.dtype.base_dtype)
         power_b2 = math_ops.cast(power_b2, var.dtype.base_dtype)
@@ -168,17 +167,17 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         temp_epsilon = temp.get("temp_epsilon")
         learning_rate = tf.divide(temp_lr * math_ops.sqrt(1 - power_b2), (1 - power_b1))
 
-        abs_indices = tf.math.maximum(unique_keys, 0)
-        nd_indices = tf.expand_dims(unique_keys, 1)
+        abs_indices = tf.math.maximum(indices, 0)
+        nd_indices = tf.expand_dims(indices, 1)
 
         momentum = self.get_slot(var, "m")
         old_m_slice = tf.gather(momentum, abs_indices)
-        m_t_slice = temp_b1 * old_m_slice + (1 - temp_b1) * unique_local_grad
+        m_t_slice = temp_b1 * old_m_slice + (1 - temp_b1) * grad
         m_update_op = scatter_nd_add(momentum, nd_indices, m_t_slice - old_m_slice)
 
         velocity = self.get_slot(var, "v")
         old_v_slice = tf.gather(velocity, abs_indices)
-        v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(unique_local_grad)
+        v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(grad)
         v_update_op = scatter_nd_add(velocity, nd_indices, v_t_slice - old_v_slice)
 
         denominator_slice = math_ops.sqrt(v_t_slice) + temp_epsilon
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index 22b8af33..cd4ee878 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -114,13 +114,12 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         return temp
 
     def _apply_sparse(self, grad, addr):
+        unique_local_grad, unique_addr = self.sum_same_id_gradients(grad=grad, var=addr, is_expansion=True)
         return self._apply_sparse_shared(
-            grad,
-            addr)
+            unique_local_grad,
+            unique_addr)
 
     def _apply_sparse_shared(self, grad, addr):
-        unique_local_grad, unique_addr = self.sum_same_id_gradients(grad=grad, var=addr, is_expansion=True)
-
         power_b1, power_b2 = self._get_beta_accumulators()
         power_b1 = math_ops.cast(power_b1, grad.dtype.base_dtype)
         power_b2 = math_ops.cast(power_b2, grad.dtype.base_dtype)
@@ -132,23 +131,23 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         learning_rate = tf.divide(temp_lr * math_ops.sqrt(1 - power_b2), (1 - power_b1))
 
         host_pipeline_ops = import_host_pipeline_ops()
-        dim = unique_local_grad.shape.as_list()[-1]
+        dim = grad.shape.as_list()[-1]
         combined_tensor = \
-            host_pipeline_ops.embedding_lookup_by_address(unique_addr, embedding_dim=3 * dim, embedding_type=1)
+            host_pipeline_ops.embedding_lookup_by_address(addr, embedding_dim=3 * dim, embedding_type=1)
 
         split_length = [dim] + [dim] + [dim]
         split_tensors = tf.split(combined_tensor, split_length, axis=1)
 
         old_m_slice = split_tensors[1]
-        m_t_slice = temp_b1 * old_m_slice + (1 - temp_b1) * unique_local_grad
+        m_t_slice = temp_b1 * old_m_slice + (1 - temp_b1) * grad
 
         old_v_slice = split_tensors[2]
-        v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(unique_local_grad)
+        v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(grad)
 
         denominator_slice = math_ops.sqrt(v_t_slice) + temp_epsilon
         update_list = [tf.divide(-learning_rate * m_t_slice, denominator_slice)] + [m_t_slice - old_m_slice] + \
                       [v_t_slice - old_v_slice]
         update_tensor = tf.concat(update_list, axis=1)
-        var_update_op = host_pipeline_ops.embedding_update_by_address(unique_addr, update_tensor, update_type=0)
+        var_update_op = host_pipeline_ops.embedding_update_by_address(addr, update_tensor, update_type=0)
 
         return var_update_op
-- 
Gitee


From fcc359ff92c87f95a5e746745662e012a71c9b6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Tue, 9 Apr 2024 11:45:15 +0800
Subject: [PATCH 023/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-=E6=A8=A1=E5=9E=8B=E9=80=82?=
 =?UTF-8?q?=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py                    |  4 ++--
 examples/demo/little_demo_estimator/nn_optim.py |  4 ++--
 examples/dlrm/model/gradient_descent_w.py       | 10 ++++++++++
 examples/dlrm/model/main_mxrec.py               |  4 ++--
 mx_rec/optimizers/adagrad.py                    |  5 +++--
 5 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index 540445e8..a47590c2 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -336,9 +336,9 @@ if __name__ == "__main__":
         train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
         if use_dynamic_expansion:
-            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 
-            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
+            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
             train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
             # do sparse optimization by addr
             sparse_grads = sparse_optimizer.compute_gradients(loss, train_emb_list)  # local_embedding
diff --git a/examples/demo/little_demo_estimator/nn_optim.py b/examples/demo/little_demo_estimator/nn_optim.py
index 4438627d..3be3c7ed 100644
--- a/examples/demo/little_demo_estimator/nn_optim.py
+++ b/examples/demo/little_demo_estimator/nn_optim.py
@@ -73,11 +73,11 @@ def get_train_op_list(losses, learning_rate):
 
             # do sparse optimization
             if use_dynamic_expansion:
-                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 
                 train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
 
-                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
+                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
 
                 local_grads = tf.gradients(loss, train_emb_list)  # local_embedding
                 grads_and_vars = [(grad, address) for grad, address in zip(local_grads, train_address_list)]
diff --git a/examples/dlrm/model/gradient_descent_w.py b/examples/dlrm/model/gradient_descent_w.py
index f3ae78d7..6c34b726 100644
--- a/examples/dlrm/model/gradient_descent_w.py
+++ b/examples/dlrm/model/gradient_descent_w.py
@@ -47,6 +47,16 @@ class CustomizedGradientDescentWithWeighDecay(gradient_descent.GradientDescentOp
         super(CustomizedGradientDescentWithWeighDecay, self).__init__(
             learning_rate=learning_rate, use_locking=use_locking, name=self.unique_name
         )
+        self._slot_num = 0
+        self._derivative = 1
+
+    @property
+    def slot_num(self):
+        return self._slot_num
+
+    @property
+    def derivative(self):
+        return self._derivative
 
     def initialize_slots(self, var, table_instance):
         logger.info("no slot for gradient descent")
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index dd3e8d2d..627b6c8f 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -340,9 +340,9 @@ if __name__ == "__main__":
         train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
         if use_dynamic_expansion:
-            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 
-            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
+            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
             train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
             # do sparse optimization by addr
             sparse_grads = sparse_optimizer.compute_gradients(loss, train_emb_list)  # local_embedding
diff --git a/mx_rec/optimizers/adagrad.py b/mx_rec/optimizers/adagrad.py
index a5fa7975..4ba444a6 100644
--- a/mx_rec/optimizers/adagrad.py
+++ b/mx_rec/optimizers/adagrad.py
@@ -131,10 +131,11 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
 
     def _apply_sparse(self, grad, var):
         acc = self.get_slot(var, "acc")
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
         return training_ops.sparse_apply_adagrad(
             var, acc, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
-            grad.values,
-            grad.indices,
+            unique_local_grad,
+            unique_keys,
             use_locking=self._use_locking)
 
     def _resource_apply_sparse(self, grad, var, indices):
-- 
Gitee


From dd259e7f25a7251e901c8e6a4b0e9e9a2f0b8d3f Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Wed, 10 Apr 2024 10:09:50 +0800
Subject: [PATCH 024/302] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=85=AC=E7=BD=91?=
 =?UTF-8?q?=E5=9C=B0=E5=9D=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...56\347\256\261\345\234\260\345\235\200.xlsx" | Bin 0 -> 19596 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 "docs/MindX\342\200\242SDK\342\200\2426.0.RC1\342\200\242mxRec\342\200\242\345\205\254\347\275\221\345\234\260\345\235\200\345\222\214\351\202\256\347\256\261\345\234\260\345\235\200.xlsx"

diff --git "a/docs/MindX\342\200\242SDK\342\200\2426.0.RC1\342\200\242mxRec\342\200\242\345\205\254\347\275\221\345\234\260\345\235\200\345\222\214\351\202\256\347\256\261\345\234\260\345\235\200.xlsx" "b/docs/MindX\342\200\242SDK\342\200\2426.0.RC1\342\200\242mxRec\342\200\242\345\205\254\347\275\221\345\234\260\345\235\200\345\222\214\351\202\256\347\256\261\345\234\260\345\235\200.xlsx"
new file mode 100644
index 0000000000000000000000000000000000000000..2fa2165b6f608106df678d1afd31562884a3029d
GIT binary patch
literal 19596
zcmZ^KV|XA@lXYy{p4fIKwkEc1+s?$cF|lpT#I|kQ$(PKF-FLs;{?WQsbx)mh`}Wh-
z-Exw^AW#5bONh+&_pj~07Rb*XLmLA*dmCE^I=N3Xl+P0of6Z!Zz!ts(0sz<p0RSNU
zcQZX(TUu8u%gneQ319{k;RpW@gjyRWE8zmeAAv;YIHnDfVX16Zx}=zK3%Pe1nuh|H
z@T-R#zaBA-jdNs#HJjV$9`^USA+jyQGZZ)58%c`-yoDe!j1;Vr7&<bt?_|iY$fYSA
zEC`A;E***B;9a*ZA>mkDktidgkDzLBnD*zOppIU8SbK)bsYrsSXi~=*D|F50S^)wq
zF@y(TSRqJ*>Q&C6%vRVdFS`m$lvuL6(riWms2|PL_SMB<G{S`$iL>El@gL|ioF<+)
z2Hh`&%nv}TY&HD^PC^%&^lk{hu}urEVa!Ny(!d7qbI!v>j!Q<%wDa+GcB}`9vO{}&
z%nw@Em)*r;u=o+&2RQMMAVzrgw*0|)6;2Wh=>mvu?S>Wnxw1VvFHE@B2;D5GrJDS{
zCkt3OUIo53;*r5OxcFd#sPms}-~av%s`$ya$tT;0|DA0E8+)TKx+CNGW%}q5gdfCK
z39or;;?Y2q<0O1r^N^5)37nI7q-l`IYPFVef<VzJplKEC;4%teOLYBqBM&~@-kI#V
zmrz<Bpq2-Jd?SZ}xc+|iLoD&zp&suYU5p8}g8yEjKggW!Bnb~sn47R$$(Bqqswq{E
zYg7Tuo|1pkyfTVGU$PuMGJA;qcHS&#-FYdKCc}F#V>cq<K{LPzNF5B@_pakG(`b}-
zQDv&kCV(D>T{+)cun_)GpLR4yy?0B~M8CS2#>!QxmuA7n&CJC%Hj(>3cka05qE;};
zeEqU@D=*H9I;F-cIC5CfGA&@nl9}^TM%Y8c?x;KbEoyq>5t%JkbR?l=oNHd~XN@aW
z53(-$uNp41&Y^Iyy$&^|Z2kOhI9eN{#pID<%=bf!@Ix7aj*cRbZq`|R$2R)TLHhXH
z+5Mq$(43t(2=Ko3V_vT;(>DJpLEbxACgrCDh@TQ5|Az!l4vscfUm_I7N=x>AM+m;~
z{~$c&S&Al=ZX}@?6XoYtt-hz!t1|;$TB~j=LA&Z|D^$MP6pm)PxxO5gYV|}7aM@kH
zl`sYXhj7(ooV&Nr_A5SHmi0BUBwjI=i!<5mhgIEgVmlHki?vvB*x?9d#$;FJq9tHH
z?G`owVoS1~rl7$(k?061J#qg-^_#S(FbE`<!!W)^;&BHZPSf>g{u@=T(y671^UBPj
z<IOFy_cf>93oEdVlA^LwAW`;g{VhXpkgN&wy}UmHg;+~b*)y)|4u_O48TOWP7nMtt
zQ~ggpPX^@!(BJjpCWgX4={3>G#kWgEDm6s)v=kpPcTl_s6kO*XiziPh9X}p7?=8uU
zf0p1c<1X;MLWGA#i*IIOZMuc-Y*=nSsVD2~<9|{6zfU>9pVYcq{+(?8cLTV87`WKj
zTR4~+89DyXiOII?^~@Or03habR>JuIZ2!72{JmmQB;(WB5qvK+8b18C@#q)-MAv49
zkKI%&l~k<lj_NKIfgwP<ro6sbb-1z&8Nv}i&aNF>Hw5nX9XxQ)tcxiX*Rvld)l9Pd
zAh}ILd&qyk;CnBwsGO75)ZB?c+x_)H(D43rdw?I3q+E3u5F-K%E$lUmFRgGcT_=M_
z5^WKXRN$M1U2)y)w{UDdZzQ};r83MkqfL#&6g?w=j&?tHv6$1Ok%o{=I+1}AN&h33
z+PeGWIX0=G_TKxA#$$ofnoxOfqFY$y*73d^^_osLK79Um9s_brdJ8Mc-sD`wZXc47
zIli2`QagK(j9yc~md-4RmCv<#m~i^mdAX5gxt29fz@0%Xrh!4+!EGLPY^^!$ZpLq-
zdtsOxx01pzR=Ptuc18X}|BQHLJ~!Z}r=++Bd=?Aq0bN38ZPcOrEcz(MkXUbR)$hsn
zSm+_fYD1eG>vv@3EljDj%(1Py+KU^`Cv`Iw`=8lr4!KJYx69m7&oU~fHp{^z9eh~i
zvxPOrBFJfeppR916lFSEBRekZjRGD=80ig_-?+WarsC2B29h`@C2%c_EcxWrCBB&^
zPG9HyDgOXpmFL9ntMxg{Bu>mxl4#uW%mHaRF!NQW@H)gTcc=uenD%4viDgScuLP+0
z@o2U##UR@bq>7jYhib*5nnSPW$w>(5gjr|RzkHEXiZuTO>hAY_UH6Ygj!p6J9?yVl
z@q&IeF<)mgy~uC0s=auU&8qqwNtf0A=G0pTVRx!T-ALoAMEywVH9{mL<$BF4GW~%G
z&Tc{aSwm#`qH9~Jj;CiNp>@aS@hvCiH;^s5kQR)@l?Y3h49XTBW<9ml3**+HJ_Z)+
z<%fUlPJi1YweaCQ8dV|zed{Uhw<pyO%3pWNX2w%yK6ZMB!YV{j?vs+h<T<W3eFi$c
zYq_gKSh|u9NxU<HTBW!w>dyAph3cgU1*yjAe1NkEI2(1XV1O#KnQC!M5;{#<wG+}5
z;ui800umAsA`<T}HaO(viqU<*#BHM+>h}7N#_E@9t?}lG!$mJ~uyf*VqHV%$Vk812
zA|!GDwGg1xo9rDs04P|Z?!n)KeS-t!-Q@k`KTJsx8dJA9Yj^RuRKcNOfn$I0*w-Y4
z#74{e%7@AqOiB@0KBPekS(?WHI7{h|JXng)dU*Bxj|l8iW1zj41@$CYVE!={gu8mX
z1}M5H`Y064$PgK-`#^fGNTEWXGpjvKAa{gk1W<%P1W}MN(9=*&#FjV#5-c$Ppb;5*
z(W0aq#uEl8Mi7QD)EG!FZ!nXEkT>yDqG%;zSkhW;OE+19#~Lv@F+4GTK0rP~zF02J
zvvc2>6lm>Iogmr+=OzYH+c9dtlO#@~!;iJ6-{td2&(w==kcwQCbUx!*u%p)ITJWnC
z!;doW;bk7`v+5$&Ic=NhnG%}){gQ%`#tI&}u$oLyJHVCCAkLYfnzQxTg^})lfI`_5
zgxFOtG+VcP|9MY2PMVe6{Jft`fd8L!731HzN=d^eogT$IyZ*!XRohy8h7_ojlw!F+
zy0l0uON*UE8IWlO@~ms4CKE*La2Xxu;5zeSRm$<<N8&hMpkaHI_~vuJALkB785_gX
z9aBf^w<yK>akC+d&><=@`()2|lm<=i5gK26gxn;CCCWK!ZdqfEU%wpPfLY9;XemoC
z6)d@zAaYvlN+sxS-%?fNm63l|BjERJ3imLo-dL*eS5Sxg#!>FlHI>JT)+fITy0+xd
z?7Q#I<Tp2_A#2ljY^u~q*$zSP8Mj|kH;hmptb4|oBx(UfqLk%ryDexE+6D~q)Wnsj
z#Gl6^{DLdC(rB_ONS^~o>i_~}!}~QSL#bU+l1CCI1&$Y#EX2`Ac|6Yr6rQpQ{?k!H
z&YIer$<*j#ZHmTc`&EsqbD!ag1uY@wXqyWqnx{O6+MpK7n4Q%J_LsM)*hS!y58rmN
zu@qiVydb|zq59P|{Ky<Dse^sO+vXX%ZhA#8M`rUv{}z{0Emmg_o`m8bL&gL`F{w(Y
z%ZOwQ$ZVP4w<*ZQd35`hRtZD>nL!Y#shqe1;NU7JD0mr{-0Yy8TrE#s$c&7!m!}!l
z8+K`5XE}o|rUmgxk<i``nVmL5cbD8#q7GBVh<Y@@A3&iEhJWhT=DL>!{Dx`EFx%rX
zZwO>x1Pq(@LJEv+8t^s-kKYR&GBmBlVn~IPo+l<<5R4B&j9;20!DVRSNOeuQqOh_&
z5^E(tL+aphOH-!?AAXW+Ii?cy`K1}sXg_lku^v;byq*h_vZEPEFM1T_h5@rf_cFYh
z;iiL`#p1$KGs;C-gaW^_JsZxo189SDhPY0C=9)b=R$JA+xuz0)u*1yKK2ZRl|D*l=
zRx`V<F*#WawN-)Js1RtXi#Z-bdzP)_R(e5=L|%~XGBX^utWZI~@93#g+eeR%@g>*Q
z;Qly_s=e?GYdags_Q=y~3N^!k{lyQkQ6w2FH_GK1mFxrHKd%bcCG!MnSO5S}-T(8d
zVETJiWN6uJi=hQys4Rc@HfML&MEWVmO_9i3s2nGX)afiR+j0IP9QU6&dV25}1{<z?
z;K^7Wb!3CzBP$-R5>MSZ(eXKN%*UU5uP)KC{$tOA8(o_0^w#Y5<jnN`(8c>vixCYs
zcD!fTP@z+rew96^_j+=s^I`v{ebI%VRaT051VI;)e&sOJ?wuwDcXD9f#Hl$~HTSdg
zTJgfAOyr~WgO#KGyll?krD)ALPP%x{uxYtHq79MvjE_;|?Y3S`1^({YN~ME&jo`(+
z&3yLbvfYg*L*(V@G4~4Zq0Hq?O_rDEcgT6-jq4BJ4ePq~g^=fx7WIWT@#0{<J^Nam
z*Niq67@_b6jkB(<j`uv^u4DKIr@A#}pGLlO@n7cK>=zdoDi>8DWp^V}c`5{{vZed)
zXOpvSAxd>v8`@1wmoMJ-9&gQ4l))wVXJy6dS2ov2{Kk8#I%cUKmZcAZ7Zdr$cX!!y
zjp}T3`Lsz_ze;qv6lKS}8rF{6etL34jci+J`M}$}wF%7Fw%^lik(W8uJ;^&g1_Tz~
zb=`MwO}I}EJ^gy)%ZA}?l015NJFIb6@32aLaevtuGq~|&(vFh-xEFnUZ!*6*Ymx7G
zHD|?}vl(-~`QcUKt*GJEH^vSRuD+mwtMtLOY}H$fAMIS0efF`@hG^6ZaiQI0#Dymu
zb#b~LkGqlo!Pj-Lja|=v#xD>X1wM3B#=$dOfNVK}NP2efuG1Cm$X{mmQg_q_S5;=a
zJX9<o*$zkdUQ`wAGv(HTiGQo(vua#-G6YYv{~WqXNc0<_MU;dpWKb}~@GPTbNlo_W
z)Nx|2t4Yr*^{=Y39AyHi@esicL=tSG2N1Q4Sn&<fXvZm09a*oWgV^P?bwrXoev<4g
zQOrk_sOZkD$6b58k{f;!NiT#NPsb$Md9u-M+s^A<U6)W_c>ahdN?d#|p1Yy=X96JJ
zP}RFsF5YMN<M<5euVx}QZY%H;QRJI#&mbY`eX2Wt5Kn|50{|FLW>>oB!;=zO_fBFU
zUJ!=)=)-~Jp8|klk<(C!U<nZM8ZuZ($(gOpb0{3e`dLLW1OYOGswe90IdI>96cZV%
zCkq7*A?;_;3rp4as3XyI?{VslqnTj^@gvzd3KWG>FgjiXY0z8L$PDHnGYV@a3zE^*
zgWjNJ#_*FFLJ=qTL!SeX-1z@(4T_cC4srp|Br}|ZOeHE>wLNbKu;D|7N5(k`e2S16
zL{a8-Bcx5~It+y2Bvk0nz@DYJvF8R@UquAVfMEKG$?AA*&)xSL#EA%&X`7K6o0^)u
z1895lWzF<+iK{$5c?WZ7L-po-SpdVa*UnfZ%0xQ~yt@LK@i1_7bnt3)<O-nqFA^r=
z0VcWu;7v}#&um8{a3<Pu;C=rQ=<^}>tJBK=)yd(@iQ@~7!XGKjU4`>K*MFunJ$SYH
z8J<K&O*4HU6RNi#tA3ORvzXbQ`{6TM81kty0@z95PJ&3(!51bc0kF&%ib$M^j(>Hm
zfW?vJ!GQ;`0Ezj@>p%VeqL9LzNYq1f<;XI4-^LRCFTYQH822w2pud>jXP^UfqWsL_
z`!;j5+QMImOl;RBm!F-ND2yiN|4nCg(7*TJ95@g|_iCm7e4zvs%iQMhEdPfE`vufz
zjxp>#<4k&!kOQgM2U=U7Ng@?<%zgng#+%sKay!^_^?UmHHwQIUwavdre|i0xLea<R
z#-XkIK2G=-7uu(!Ux)jfUn&`lt$k@gVu6zZ0W1?(vv0ls6`L%Dt2YNZsxI^nue|t6
zQ?ZQdzu};FmKndq2t%I#+qs^mhVj4IQ|mv)9nVbOVT$Xr!*yKvM+o{#m*>pzk%dhF
z5-0Wkli8{N?nhwPd-nySj=7TNzvQQmv>5rXkuiVaQuvAB@*e=bZ7BXN(5F|=SCd#q
zk{KETSmq$RNL2DC8UnjvgGxuW?_WbM>0hVR|Ge`29bD8q6Rd+*)1O%v$e0-;{dF@=
z`gansC5UN-=(~Vo1%_BN=j<L3xu}ec4Kh4*!;%rYkITS%a!>=YX51km7Q-XgNK7dS
zP=xgYB1IxZqaDof08+Z#iEWT12&#x8MOWN-v*zb}ZUEhsS)-y<fs;^}Z|{3g9$JD_
zbW;Z@{ft5Cgn`O~gR_G{Ey+iKzUqFc0;WL`p@=XJbaTL)8eff}{ehU{ImstmR5hJ{
zTeYa@mJX<6^}rAXs0@W75HdZc@Bn@(QNLc{2ticDATs=LJQwx==!P;6LQp3p?zo_W
zUr0<ob3hsbQvOPXNF<!L-4Y&PmQq?C$PZOdX#0DBI?d-9Uz@)U`i3vPBliFW%1Fd5
z8fPE^RE%=)So6ap_n={hkFLTw>wp?J9=&2{virH8VWE+Rj#NZ0AOe*w35YS#v>U+*
zGWqKTK7quJK6*A>*75-S1NN;teK6CZ(Tl$j<NRO3+-Yz>X;?ugiJSVz@rD_Oy4zAG
zL2SfdoN<_L4Y!}}=dDCvWY%JMe!}dqg#T_1>LyH3PXw&C!LH-Jga@$k5BG=DkcR78
zucuf`G6eNlKX;ek!OGf=bmAm^XW&4&sMv#IQ-#TAfMO=q3xm^InUI0Xh6E?^qtl<c
z`YN@1;#MGh^4Wj-^QzWMoCwnd;Y(EawxB+zJ)@U6A^WO|J=MPo9$B5kRJ2>_#EJd+
ztDe{e_A)Tlnw5j~Qawm{`RxOwh#y8k|7m=xJ75);vf)@h=8t|D!Xu|O5|$eQZ5L=h
z!(bE!L9tQZtP=FwR-;hk4zvvGGp6vAnq2*-_%t`Ys+C|<u$EzReUV_s7n36G&6CdM
zkkJ?-V~DUNVWv9)9X6JK#n4VGL4QRGg)ad6>Z69!+);3Dj0tVoZYgUVpdb`eVBhAg
zwd6Q0cJsX9DB~Y7=CtzDgR(G>fD^S1MyDd=OhpK4p4A*3=2(b(B1?LnC3dXs%X~KQ
zsI_X9Reo9<wHlK&hhhFG`0RdeC2u9Q5);x|(**yR;ad!^TENE4B#rb!1PEKnKTQsZ
za+7bjlSMUv&_rK@o*cn++mV?9`;V!oU{yZtT&mpm{}M3xj1_p^(yC9=G-OC5+>d}%
zb|s9w$U@w*cB00S2t%PC9HUx6ili=;X(VEwo<b-*`f0Kyv??>3Bqj^MOdMJC1$<e?
z(PXg<(~K20f=K|8$)2UAGlRrh#ayz0h1fXd{EHNIIg<KBZEcf+E5*xI5)=kv734BR
z;fG2srlggjhUZFLvJz?rV!>3m{ri$*I5zKsZ*^f`0=bXT^yy@-S}U#Xi7+vyiy?ky
zVV*qYW$D5yZi~$H#qP94r?~mPHn7+UB^NOLdKjjFe#6b7ccTUq(rSV6mq)?F-n`z}
zDy=f%Suu$jWRNikQrVi=LqkVetg*6j@RiJ0R!3s1QS(V+aS}P%!SekPn!x4r#C9+0
z8r#6e7#CkDV8!&GSzCp{r|A9V(p+%*W<$$_r1=W#ySiQ&Cg!lY=@hQ(LIkMGtMut(
zY7%ol5))&>+<ac!WhExGHM3xN3L%NIL0n+b{7AOH*h<;0yO9h_pDWpyrrUWB5SpKW
zJI)j0{|?ofG>KbYKNAsk70@w=zla-zE|8o5F$}(jGD|5;Rk}4>`MP-bgh~HDrJ{4s
z?ykUuJl9?rw#$s2SJVF!%-A_kY;9sGS+D@0|Ibtt1y<h|lK|>&m8tSWF<Zc>Be7?Y
z(sbz#FwNo<wdwlJSQ)-(oVExO8wYs6`krQ#ius4@Q;qU{sE&ZG`3R5)B-VQ|0~!Vc
zK};Q;DK3n!;dr=gHXyD{%+Qy6+cNuywPJ;*EW4ax;!Ex{`9TTTrV!-|J72YVi#7Hv
zzywZ}{GjtExhrP;EI@0rigj8-<10nJR0gO~43rDqZ^q{=zRa_=X?pGPONMf<zmikT
zsCCAfrj<uVza$QbsDeG|KwW?dNdI&(j^}6ajZ14;0GQVQY8M~FNZ40<puagkS}SWR
zdT(E1oqK=HapH-V@S=6Tt9!CH?X8<2cJO#>eO7)mX!vM!<A`W{_I2`#a(^kf^m;J8
zIj4KRyt~)i&^ZywK3nI(*37QbIg0Nt-g;V*b*cbAea{1TwbprAeK%lVe*%1O`T+mG
zE3#iNfj%p;j;2OdM*rg_P~h_m%p)iOK<8(n^{XcSZ?nI~f6J_AYGcvZEGS*XS9}rs
z>*pk61I}}n4Nu1cE-Om$jmJ~s84=hMu7+G~W3UaPBqT!onla6G-@g|sZ~u5t3nC)_
z@d283Hq<YJlZO_VV=lv#wc+87-}UesOQnKO!p#0#6CsJUcltLuj4h`jahwd+)|vN1
zuQdil*fe4u_3udAC}CHpkGF@)93IRa5O9L*aGqHR1B$wiOk9RQJrdelnBq&TQCEMG
z_UobX(0r!J^vO(nlsRW4p=o+Cc_8zPtlQqVpm8|R-z6yKx?*m!X8ly%Q}qSBKU+-Y
z@iK^(A^0CcGw-f|F~LwEA@Q;F3VgQ4-xG1`lcD8X_~+wJj&*30p(Bcnc*wG0!o{f)
z%?q974i^9@7wj_Ki|0#FpP2z*LpF<QeowV%RMn?(%mby0MxT#cx2!r{CcR5qu)2(l
z1^hbQWEniHD&IPpGj7}7W*OWpvv`!0r!rT@dBZZiY+&&$RyR}Pg3}JrmzX}n;tU^d
zkYN!;k739U(RS@nIL51YctirVB>z=dgi)vH9+s5z65+B{CLm8Djy32OJYWh@Z$&5p
zmH{Gah$P7ZzT7wXY#kh|ZzU#;9!f}-Y8i-c3<0HAayNH#q0Qri)$abj`?^(k6T+9p
z=kEA^37$WK$Sri#kLLL_R_x<>Cx7`eQ>DZEz8a0r`kwq*7k?_Ac>PYES13%^>3V%q
zM@HBAy4z%f@39Gv18eD;O`;0IcBy9j^V?=RT!aM&-sM94;Xo)m5;@o!-W)C_S2wnU
zkXJLQbAU^T;KwCk)z~>#<Gns_d_U989lM?HK*SX3&N_gmih~;B`NVXXk3DalT@wQ5
z-PoEyN&NCSbwH)KC#431^C)*Nd7`Pm0yXemq%_05t4lNjS>=G?V5+VG<|;8iGNO&x
zv*9L!8HJ-5(%{Id2|>Yf*w|HO&d{%k6INcQSUH4H_lOIF<G?GdH1UK-#d~>yGhci{
z%$JDMv8TyDPJw|^vh+s$CV@ehL#w-+Rf)GJlf18Hd__0?!uH}tyqTh*4A@J?Gs+Qh
zqakOp%#Ahpp%ii`pkz|dn**a22zPL9-;e${ZV)btKirGhVsjQ-O~(-&k3@LbyTLMr
zbaimx5|_8ZI64*Ukddq{w~xVGEISi(Z}w|&hfs~VBcJ;$=B=0}W$FG3zx#Hq72>^+
z+t<`7Xj8i9(@96C!Qg1tfXTdK)$t_r>T$u4TTga?f)Cm8hF1L?(Mta-@RlDL9lVA*
zSXAjx&RUN&JEb8h(E>9f{~r(>_`b-}S#VBC>!^W!SG7`=d{YKL-niaC_tr4p5?aNi
z(J+U6ZG*fxKuPETU1Egp`&`7N6A?Ta@X+m8Muz0-x9V)2nPS&q=xo}AGFz07=;lJx
znmF6E<)dOVTQC<CN&!83AvI#0K0g?|c$jUHS!2*CNKFHnZ)|Q}$eo6ogRDq3L#;(|
zYPd*54DjUQSX-4tvEne&EK2K?JMS`6>>y>P-Zbo*v!`8hY~N(p+U2eNl4;?uTq8oJ
zjf+_Fnsv2rl>3aor)zU_LW4Q}u_4Scr=Xle2@w_QzUsmP%$$YRR8gXGyQ2%ku_e7g
z!hFzA=t=BLiV8ACBNo(3(HAsACp}_To%b)(R;jPD$g<-uLV#%I2&C!--V04bIENR#
zw5RalmbX&&Xt-=D&(OS68O7cTYIoXK;`Vh|EWIe&USHhqy)vRLz5CH1CVXUXsI5(g
zdOw#L4!~Y4U}OU1FQ4G+cQS~+Z#fc>o}8f?5T*`gg~fTIEpHJFi@h*#CNPZM{17KH
zWt3<<g9sWoU}+dE>Ap5>QJJE_wLe;YNKI8jbCiNQvZc?k*6Tb{DjceJ*Hpon0kYif
z8X>~|z|Ug415AA-iBB|F(azg!(LPn0Nb3yboS*{Km@&#At=0}vV6<#M%VM=fVA}&*
zWwG3NHXuP_4&6Xy_i|zdxF}apy4Zr!VR17Kx~r@PEx7ahUbas7qeAxsv>WUpccgx6
z*h!?29`}2|{RhZ@mh?n>>*uRJOM0|6005txuN#Gfqno9X!&e<|P0J!0TLtqys^15L
z!yg`E0MElS)zPJ|(nYisB{vi#5QH3SWoHf1Aesx0tW7u($P5Anv;+!)JdwOY;PHE-
z975zW=^Il`-Ah<kx5v2VxCgfhZQGU6YB@^uz(EJ!0nY+Qo!wKljW`>uiQ#g<n7?O@
zW=sVCz<FB0+gR^ImBnvkeyo<~oHC93?nP=G%>3THiYgx~gIzqeKMlZrw%>FaFs#tv
zG+kUv6tSG(B070nP2nQ}U#IIbJI{MnOuCq{ID^-55%W5gN)txze(11)gGToidXquN
zcb>mGa#K5>zRS6cHJYWKBkn21JZDN)UBFw4+Gyk)=jMZDY%%t>+BoBQ7(T52>@GEY
za5o+ZH6>q=jIx^{S(k^_zz(p}IcXy<F^C2~H|B@55nrdPa@i`<jN#7La7LePCiIeQ
zy*z_PA7(397`%)A`MAQifw6LKA$$>#^f(S%A_pa;*O{o+i8uAp>A9<=%MhDgJv$h^
z9w{ZEXp(qYs7*bwIACqNN~a^k`XhA@Up~p5ZWHfmu8HaX0mMkc+p&q&{oWU}<wp>C
zJeLM`K3idWE9jgMDdO>B(wP`-4sM>*1<Y;dR$>ay$Nuj+@cLQ5^N5@{*FdG9)qJIX
zx+ONi>RscZ_^r1M(n3Z_i=(6^=TUY$oP$$MvWaNfEHBUJgSO)AE)VPZac_^hoSrVm
zS7OcmgM7P)XwO)ooFXG6aa2ioiSTYx2utHShWzjH<>Tj*5xvWcnbKIcFqh_}vcqbC
z*7|0@IG^@en6Hz?cil$IXqMYqS)*H{M$jCbNy%V2W+tQ^l9GIfOO@xa?t+3~Q}=fQ
zA<Ajt$mIQ&oekIwh`8YCL4m}tGs=P>uQalpOw3uC+!=f556y>fU^V>d8$fA#G%fh^
zs@<)wlAgBm{KTw!bO<$Xf<Vt6BXS;$)s+{`s^-Of;La_rx&eAwGW@`FNIssQrx4s7
zfH6*DsFkmW3lr8|Zi!n2RuU{hh*ucfgnquES?gni4zUb1=Xu69c3FLxI83s@!D6V-
znqAZc&W@LEm-knr^K%;aPwx$yHza*4oBJ{MZK%}rvI$3Rh2{ElUGbbXzpN2OEp5S)
zL}p<ls>KZV#w~Jk0#v3<PAY{1tE7}}-?3y_Dxsv^`HD5DQYPJ?4ahS5GuV9goifyB
z)_g3jTp7h}ilRHYi-gId+)GtFCH6Hj5OYXJJjE>8`l_L&bDN&{i3;+wCKE@`hPJx@
zBo2IculytZ-)87*zwi5CPRRI+y}#*+T&o}%llXko)Bb@Dkd&s3_4h#JT8ClO5dt>g
zzVXaaPwU6^!tyRDOiG7`26obvgWlwE$_x1JOhbqvghd-|`L30fqqNrs-XWT15>WVN
zyj-t8X}Oj)+A6TB2h6<PZH>jt2>Z7mp*vX;fLIy)fO0S;ye9i4r$qBK^DcxBioe?d
ziUy)xHeR2KDZ{x_)RVx{1ehDhf#l7X>YgbRx1Mf64vOa*3Div{31E)=mx@ah=0*fW
z-f>oBf?i{Jzl~F2QvAq6LX8d1B^Or44q)#k8*lO>^vbu0deP&A72WSY2mJ}5p&caU
zO%7YQDrcV9dT>)-3(VCl(({0(5jlz_AQVdz7)oFOf%u!-(vZqb%}NMm$x;Xe^K)y`
zP-79Thb|W?$`==!OAuB(Ep#UJ4N)~sB7SZ@fs!IA->h2RuaYUNP8-SsN@ZilYCoJT
z^~7Z}hi{RA`ULB!`K=n=849(Q;-osnS-j_+r8XegvY#lqx^_p$9rydi$UK;3$`bx<
zB;e@?1J1%}j1~{>qq(bw9h?V~C2H#hMPyY0e8_w(*ma5H1@^D*&a+7wnwL)8!yaob
zC(DJyW=B44%u%koQb*`H^<Pi{!MD15eI<?x8|v&zxi26K0!oT%kfy&A=YD>;47@dN
znL092lu<3$QlU>{80RlY86+U|tWgsVUjmOQG05y+F99*4*;9_bRi-NFVL6O32V=Id
z%%s#K=j{T#*dqIl7=`=>N}kPKJrToV$i>ec5|aQJxi!d$qTmQuu!HQNPN*}S-h8$W
z5&_&(I%FnocIxpZ;ZfG18Y~r~8hk68foiyESEjl{H6J0&Cf4bJFS}9FG$8MThg3Jy
z)KaA%-cIAg^d9^f^{Xd95^Zfap|<WZ7fXidS7gA5C#4!8)C)%tvn$6O5({BLNQ?rF
zd>Oxb49g_a4@a4Xe#C=dR=3h^)bei&@`H&<7|EK*nHn@Lq+BZM+o1=IM_ab38;#&w
zarJ^6V?wGUuZ=tq$%&uqzq{<)PgWk)NuMj*{+pG1U7F9;X6QlFS&#aD(-}Wv)#tiL
zz1QhoV~|qF2ey9HW+i~Q^SZG(Q01`W&b?j{eqwvHyhKoVxcb>+k?SIjXFK|p8M?){
zi%ny%VolJF(`9e0VCX<Ka3ahyLCa(&Y4N<yhAkz#Fl)l))vRgG-K(+{0sX1Ycu4-c
zLqq!$c%wll<Vdw)I@D5=;$X8?kmLD;+yHmklhS4m?Wh(T2##2_UqG`abnS2Yy|M$#
z>h_DOylTghsgQ+8!Nq}{vxIO<h~XTbNAJjnkniO+_Xt_1%T+iOla3|}N#6!DHgSUE
zEpm8Ds`E%-`lPfy_BX1e3^0;)^e%^2XwTnwnY(&(oIhQAtfT6Sy<83}@~s+QNmr_E
z=wf$tSfS&KWo7ZYA}vj$rJ=KYZ#Yk?88`%NKgNHWJdkxg?;Uw`K@aA5i5lLfl&aOn
ze{g4Uzqx(?&UqmP$FruJw7Hj^6%cbYkc%*#ugiQZ|E87(IejL&mklyaKtn9?eEUFM
zSd5oP=wKm*<(0rLaI?`?44)<I^FkgYXxxes>2j-Hy~JVrP@J8C<Ky;x`MBp{lRY{e
z&p|FCi|?9|$no~DA$!q*3_&u(9|53j4|>JvcHK18b|Lj)YxDfh2gJ$9A8^>-S8B|h
z>1}&)+s6ln;+TjL3$f;N{YWfl?f$|*7g@Q+xSWQ7pT$mbN-<riC<3DK4W#i&NDo|k
zp8?1NeLJkz8xg0n$4wA}O$Q0(j<RiZr;jT~pH-D?0$=Jp_>iEDCuTwKOfUzBu;oFG
zJV+FT`Z823>f`zPQBSPb00;~YDDFtKso2Mjx#i<!XpL(Iz7xXCO|#oC6-XWa8d1G9
zp>JCWasKG2>s;E#2QQTPL}`&wKuQ{q2YTd2EuSc`e|Ki5nmal;909ht$>(bOC_W}|
zKtMP&Xb3$!^YsQ!*YkRJ>nP+R`}G+aOoN>#yiQy-jB3wDOqe1THX1dh3`Iyl+@x=(
zn6h7%ClfZBBIOL(l$f5ZJQ)>FRf(`#zXPc@SD_KcU>z~BE|GSgDgri|i(OPoSEgJA
z1G`z^vLbv7<F`bLYd0(oFd?r&NAKcgN(B@H_6h2rU{PzO&{fqcdRA_GR6=`DRY)d_
z!~yDzUh2@l-gtGF^g+JTUxDyiE+1pQb_G~B!EnrlQ*c4G{Zd@dU^ybN{G<`}S^svd
zCLFr-?{HsT`TA7L3+zI*10mdW0F~1N6B_>$0OY>_NdF6f1xkyh3|{N~n&AB|&aK^H
z<ZM2Ii7XsU_9}FCt2FyOiGtpiTktnIXDgJzR5kXu9=`4Lc}LH3;>cnjFg8|;DaG|+
zESKBvQRG}bR|1a2L&5YdH_Ro}!e8)F7$KNRfd(uFH6Ygk4h`&?LS$MZ%qRUET($tI
z46;6^N|)U957g|%s<-@`kG1~AdOOhI6TP*VABJOn>g4QBh5acx@p9E4<Cv9lhXSSE
z2&@yRRl44rBbGVd!Vo@2o=6&*^tPtw;WRCI5Py!5r??d=;mUoV(_wn)J&gIODKa2N
zHRr`odseAZI`qC#0UelCB`GW^_@S2)O1!70XA04V$-R}bb4qsbL????_<1);u8Rsq
zS3(@j>T%DpxIZu2SB3Cdqb=Sk4frPiQM9pi87x7e551ApVX;}JHEK}PUC=&~P{sOr
zOBREy3J)p!EY;yNixnZd86=+khZHC8sxGayO~FQ4{dZa(rLd8WOI5QL9ox@6d&#vj
zGb8B=#SQ-&5GMNxf41%JAuL)RiMM$_D@;XBj4jx%!$XG3SHO0{VcSlV4XB?p3>KPp
z3&GNe&CIw&1BvIU<AYX9^Ud&dnb!@uf;a3ylF<ppJEhIniU7YH#+*iT5r=aGeTA@O
zOE4L$EXEG2li!;E1bokPWUn-2%mQuvVe{+GxJ<3wV%b{WOzGtJnX)8psmDrfBNbbE
z!@Tnk-F&}m(v{R!ZUmxO{}x+Qn4RzeH%1Fj6HQ2kSF_&A$Q5~9<a{1R&ifpy0?BB0
z{-I{+c8Cjg1F>9&XoqkzIVo{6hHZ$D=g!>J;JJ>xCRhuU@O%Zb5+T0H3WoLvnqLf#
z%SHXa1~(Z-^<yq{H%1c?BSN<nu((J`VmmT}Tic42!pE*<>V7Fks>@fLoeId9ot>VW
zo45S1?f~5+p^IyCB&Ry?P}5MODOrc!(=E;|*Tho0lwcc>ZTAN1_;YTBHknMdOD=9x
z0B0T`?1xw=|CrZS@d5mwPj(;?h_qu!008x3003WKhkrfUnd;da87es1n^~LueZc#z
zY2|=D;N`QC>J6SEL9JiV2K@a}Toy%Ip3y==UuX~>k>zm98W6`$wsj2{A}m8bgFI=5
z+}y;l;l4mRqlPDIw&uDh%&O%s1d?C8>KXNUMGF8Vn*H*--*E&8-nsKf(?`e~AKmz<
z6}xk(43N3o`BF`J8m1)T!ll$mMFwu0cJr^%h(huiErlkY<P2O3Bu-YB^NWvjm_q8H
z7Fr%IWusBPLbKUlNTS812gvBM!`hpNFggL@B5%Hjq!mcLI2t2Hj0$P_8t1CRmjcO|
zxFZuXQH25vt*CLJZe+=05t&?q6%HngElPqe3JalIpvLA$y_k`iT=`dI*GCeY703aF
zm4|R)9?GWAFIg3gJh~<R4L<XE7WW-pyNpO6xf7w7UQ=^%ssXB=w@bn!4Zf|Z$1f-&
zbW8n5ACvbmYn`S`yFhmB<g0sS&^IIK$w{9dv9W8&$m_6m(@ti_<;eUQ@9?Hh=g3sr
z07O&~9GD8&|0(-%vXoEP)b%n}SB&4p`)Ga8q;vLqd^6wmYJPA9z9Rck`!<&CoW;HR
zz8_ryj?d$By}3lzW%~BKg8qPa>UnoGF6#5XapAL;^>R3p`Ql^c^?3MuAo~u^)a!m=
z-XvRheq0pZ#ruludw0S-`bF2{+1f$Zlf}uF!8@n#^Ch1P9`9<)3ktDEQZU;o_ahcl
z7GKZeL-xj7izXIYkRNFDk@JM9Q(qCb{hUwROXv4`xo;GN))eUadPPwWFnn3Hrj!NE
zBiu08SQNoiF-rMq`Uv1@*TWQfJJ{c78&Z_WO2CqNMt+Np(gYA5|G=E#k#b14pBMKn
zpr9=UObOz^DNRv<;}-S&*<D97JnNr)Edi6xAEArWpg5tgP&9rUuNoG(fbLX7(8B4T
z{G#2h$PaoItI$JnY5I~5KJl449j{yS8z0{(_jBXj<%aT;cOXjp;{%^>DIM=y<=vX#
zBo43!eD*2#yTb|m$5m4spT*h-k1JIu#lSV;#=A_^^81!Wckc}}2>ar1>frYL(TS$>
zkCXeE35{$Xu+907E=xYI)#tsA`GpO8!p8$T^#{JBS4*C!PN$2T%$I0F=ZTNo-#7TY
zuJ?O<enTr8KK_7ocwVbp2cQ!?<3B`{T&Us@Ho+m;^Vt)BZ_5<56BMQ!4}Xhfz3ACs
zMK=F7Q5J@1wJ)}HXb<MOjJy5)EwM-GJ1h68Tk;gj5`E}IKM(&L$^cUI*`%F9mZ5eC
z7==z10?QuY_sQyCn$^3|$?ya}h$u$3Jc*3qi-}$ySIt#ko4*TXUwf2zhf8xeZ`XKX
zUDP66y{m^dYwkTP2n2OQ=jK22;beQP85iqdqc`e01gNKb4nW?;<MAbCX=rrBb8y5i
zalSlPOd1p`d5#z}YP99gA7-8oz0U7jpjXnOtZS$#UMWSWBKx8mJp`xW;p1o5O&jUi
zux<pdweZ`F_uN~uVnwpBbTqo_OaB(-lqmOA91O<q@OWFcznzb4_>)#!wB8<%{F9xJ
z_W`TPBte<Ysw|N2=&D8Gbt5u@(F0jBb1|*&=Y^DIA=_R&cB{RI_7eqO6$M*O#%&n$
ze*1n~qV)0A(iz|@>J!cTFPC6vhmB=)J<78~P7X|fk%5ok&sS)cjOQxAnQ^z8?$!#$
zD2!G`Y&_|F_bVH0Z^12U%+&aKsq6hRVP3|HVe)+nqX#?Fo_RZ0Y+*y`IQs6$oOBpI
z$nK-iX-6Eqqy#*`musd;z$0@Nx-(AXV3$GG>p6+(flg?Now2PH{rV?=mK4|q3Ue&+
zWGDPs*XR)9M~6~r=$lNwCs0oCz8x<;L}$Ofn^cf>^_$!>xRB)B7uH1ao}R+hY~{mK
zF!cJOPyqaDqUrk8(D`2NbRlWhSztc%YCL2=W~m%xkgIHs{;<0ERz#yYZ()eZoKLx;
zJH@sZgTWT+k-m69(^QV6+);t0<o03g_)4zQ*j``lYQ%CZo3T<y;-ppOR(R7-w^C&+
zpHMxi5^29j&JaS;WR|R_B9`B6^D7uwxuf-=b2_<~h6TL>%8u+ge`&w9k0!^f(-`S|
zXaGXb!&1275JLkFLVkpQbWt9Pns`xoh;_Er+Vxw5kmiKG`MqZlmD1d0hD!Btl>THO
zIeJ<DJ)&)i_YL_s$)J-e0KNhJ$DO2#@a5S|FXw_W4!mCZ#i{V$rFvmA!Yi2@4o^_O
z3XeN=$fW7y(#4Z3PV8H%#{(tnNcm8u;Ug$DU91E$<%;*Tgf$&Ny<c5nNCl?_OM0sc
z1pB5-1~Bp<Ofx2{Kw8Cv!D^;8ABWnF<=JBeC+^JDsFqsgvE7Ybw)8peP-mEeMSVD!
z9ZRWi2j>Wn#45T4<q8>Ud*BAbO0Csfy(CQr?R>F=&)}rv*CVIPyC?Wa7`2v(2J9vA
z-7$X<46{<tj}nr-NUzw3pp!L+*PDYDd=|0v7*JDR<iBH<_yudr!b{^)LA3i)%FNin
z!48xl%+zQ~yVgPZJC(u2GcaSE*Z;^~WR*i&BCYT+F;xxaw$i8^urc@OgoQ1GPldr|
zuulgR?9(t=Z!H72qW!UOUE&Ng-#3W}dIE!(w|!DBktl-d(S@SU8jXe8rEIo>5p*CD
ztO7AD;QUr)cygYilz4!KXyKq#pq`av>0DKa9OdzFCRfVr=|;jllP4nPrq4pizm4GU
zU)KXLOz?*rP~QW!Aza8@UnsUwkw(=q?4VYK^7mOmET>6taS|{5#C`9UJPq6N4GeuV
zVk1F0<xykBoX_mI3O=7VV=zd*sNi7((T`c$+AUUYQ9Mi%fe<comY*!Qz7Sm!qk-?X
zINuGVrKhlz$MTkcKV{<xx|XO`mrN?M<T-8>F>fYO=x<YfS54Fq)M$1uw6!{lJP9hK
zN>(({d8g=QUs#c=fppr0&`MP{K*LJi(|8Bl?Omv`o2iOQ|JYPJhf*;F7R=_AvBL*V
zg^?GFlowQHCtuMP90WASO|aX`k=`pNtz%luy42HThldSv3m3;I#d=hLq6+`T{e8)R
zn9L({G6*S$yxSHX1JB;KzH6WG7T{bU2<OC84O|!w+FgPPWvz+%5zddSC)`M-*$P2~
zC>IDvs32_VRFO4#$js`QpiN1^(U4S2!7isJ2-lp*341xzLl+5Sh$5-%Fp48_0L&g$
zJSt55x8nEktr*ZU5Zv7G8Zi>VhTB}g-B~jyfdDhzEQ2-DI9)MK2N<az^MdIVS#(0g
znU45+BYzGtE5&mR3Z!RhnVvLxt+R#iixlnJJ;@?km9ymt3%6;3N=_rv6q0DzR^?cY
zi^3J2oR=-6r@vu3IlrNIev2DJgo`p?aT2u+lx%J6&dmCmx5+-p7`0A)NirP1*fcT-
zM{JWImJ1Vi6OahlhzS`i4~7^{s0`&O{Tw#Rm=1|UY6$ksNv6r(x-QOme|0Ll(hI>7
zN>pytZ89s+5+umJ1c*4yAY*|8i;eE;M1uI@t~F3<G^0Ui*;WDxp#I&`pM-l(s%mRF
z%eq&mWyV>xk(qtkRebINxitE|bN?x&0}@WAw@1UOK+*p_f77u;cA7Ge#XW3hSBA<_
zJ#&Rs)10@tY_BfWGon1gd(3IAGUr{lv7t*s&_@T;lmLu=D2-4W{PQmi$fp9eGe%91
z3j9NZk(Lq)BM!FApFQ-~mHcT0T##Bk92FeoWZ=@E`bvG{F-n4kvx==3Ay!OQ+J)iB
zrSfq~hbzw060oM4Z+X!dvv7Qz;X+V?wAra%9lbIi1`P9WKV!;8yXER0tfKWyaxx1%
zmKdDkQ;$GOY0~aiu^q`}U?B!B?RO2Wz|n0%w8LxTvh4642UWDDOM(Vpbkz`KN(?Xu
zm)wj<>-lOS2!Fp{vuu$GcqEDYf{_@Ch!d=~a>`eWPx_#wD3V|{Cl2_wv<hRHoG1<%
z)y^=$geT+dGD~Heh*NTdQ=K?Vq#_{8U{2==5Xi#9-r0k*Rag^{<%zS?K?j#T=jyc|
zaTK7$fUO$20WO0InbR|y^#n{dccCo1qx-Os!zN`waDZ|Z8YeIWgG~tBBc5uDU!bYg
zwSL3V)IzKQBJ1$iRT<r+KoWL8AG+LCM-Y!Ga<w8OCR4F5?USysawzFPa)jB9y$Bw;
zY-;J^CYb2)*n2JAv%WlS=_Zu16D42Ie{LO47>E$A&E@Lk@kGpp6@I)02f*aW9e%C=
z;mpS$wr{ik24k>IC<e{UlbJ|_gl!Rt<Fq261b3te@<Au;zs-5KDoB}H2_SCXSaG#`
z+b_1Y=g@-KW1(@kYH01=kFaGJiIkU=o-vGrMX)ih<)hF%(y}`1h^?I=E}KK@-m>~_
zi%_%j*Wx92(eL_H3J1~!QiD@oiOx}8xgA^lr+rIsTS!(?_i+SxyQm>4kZ1=8O43I;
z#-~?ZLX4+Yex?n9xAQ$u(OmGu)rvZ_x2yhk{0syiSZpKZ8;~DT{eRBz5A10l5@j4u
z5StxLV-NC{oCI30Zob<Rli@uJL{O&XS%6n(Hi?0_#~&=um*q-~%C&(_=eW0Twp<Do
z*C!gk1jng()#2<olWag`vIal59eW(hu821p%E1Clj|vIDW?H`DNgkclY6&}v<X+L}
zIa_aLea?3(e->mg1IWVNrZdeiIS@%nEvSxWK=ZLz^~s3$01d^kQm~RBe#%*Wb1oQ7
zwZv_&CsMeKJ!&&++cGc=UIVH)c)a9wJtfshH9%t?u4v6J3}(UM`Ua0^cb!>7hGpJ1
ziIP}p@S}t(-2Zy^171B(4;r_)Pp@gc`VP!Q0$W-bMF$)W&I1L}QG-Nbl$g81SInrz
z)=5sg0~|uO_!JGUKk<)3XC{=h7Cf~)pIS5b$GpDX)|$)DvY;m_^!+NW7#j@I+ahfY
z3?W5jH_~Nz$+ZH_2Z5)lm79^|N*b*eIZuzP<>To&DuIwBYqHVFGx=~2=|nN`#(<*^
z*aCX6_AMDCqQ+`x6b%D2r@LqUnrc@!Be%}#P{z8`SIKGd1tm8R1>FuW7%a{n1J-^7
zMgl=Et0w2^z9-e>TU3OfD(Xi5EIp0!QS_}H56;;U-A{?JGS>;;v(KXf<F_z#QbAH#
z8x+drw6cn~ggr&eo`VI(u%b>%mdf|%pB^+thK#SmxFnRV(BFahavf|_9=0Mwyl`12
zRePS(y8N`ogc9N_IDM1ih5UscwQIE)zqbr7((&;;)y9N>!zF`^BO0A+x^UnDFIl8o
z=AjMHz7}8&6h7>6NF?Ju)Ck2q5Zv#mbELrZX8(4?(6yAEx@|bs8IOp_dd!QQ!5@ur
zVTw*fTbU59BX3?H-XKYpoRD~8LvBOUSxl`(K}}0lc0o?HP^sPADW@8Qu}`YVEz`K^
zUDXGvek<NTieKoD9IU~1!Yyx2ggG)%`E8W-s-#1nh7(oDK@=Wg-NU&&K0y?!30P}b
zRWk=qZ=id4!P48=oRV*<5_05+Q;W5HSs+9*OJb=}8aZwbbq+nw*>4KWsuy8Gb`a17
z@sY97-J%=hfWFJYQp(_5<yE=<n|2yWkG7ZT+-gC1uJKSP9De+%1~L%qrX#ZC^zq#y
z&Nb?@VffK#zzy|S3^vyV6*3=uVwTuDvc0U_N@nzcoMzureR>q*mkr<R^h2sVX-dUj
z25-bxKWq8(qpQwSzvY8NkZhtn6XEQw>ff8kDD`-DO5~l^Z{oJlXQRKQhMuXwY%Rm<
zhkw2ad)wC!$^s8u?j6PmD2=iM;GPFw94J^ii8yOxw8~C#p@vNQ`OhN)GizIIhDz6h
zDN!ec410_aGLW390T$h*Bv{KZ^3&`dKKk=WPCeKGSUlSo336^Qn#*AebcI{2Bav?A
z#Ya+U?(8iYxSICic4y=hBgm-evbk+1Bf;hT@BuuJ2`hVTER9Vr8~nZ)_yG{e?t(+i
zCmN|1ip=xa;0r9QYdgT-hQP$dR2o?7NMd5Lvh>7$<2gHPCkXEEl7qEg)mZQ%4>fWK
zgF({FbL^+!stZP&ElR`~yU^S;aVZTCE~o<8c1}mv<>i*hVr5?;r;1fyo}W)OF2~@Q
z4<?5MSuZm<%mne^PZ}3CNOk2oq}%4x4~;%>3zr1W5fOxKv{qv&e}#W2B~(BllQF42
z@iJRjy2qjX$0O*82FC=M;#a_LV1aqSz7h&1a76=tYh^BHib=aa>x7rp0_#@to&heD
z#805Gw3}q*+71lb4+w>A?IuMYp#XfjOu0r(>c%31GaY%!<lbTah>iknNywbxoAuMF
zy&9K>IUBfUY^WRYeAfPw;1-JJw44*dh6A#Aw(1bsSnC$Po<xwYtg)1p13cdE8e{<q
zPlzJyvyO9K9elx2e=5SgSM0;~t5^G_c$q4WXE#E({DO6GIUjufv-~C1Pm;v@`3E1>
z=T~s>|0sWb{h6$9V`K64X+xr-md!Ffir43ldogP5{>Z!%{DDwdc}nJGP^xV_gz>As
zN&``r3crfp9<)NDi%ytp$PTnOJjPoFoWWGuG+pU(U{x4N`1{E4289YWD+R8t@D}L8
zOsL~2kR=4fw0mO8whm+iDyAnS3$u7YVn|6WEPnD9lBx-gO9`S9*pVr4nxF}G<P}YJ
z|EL42Zpo3}=*r>Pi91Dne#U`GG~)IX`<9MVuohs7Fq-FCw;x2<$NvYE`<(1rRerS&
z$LRv(3WUiy&K}xKOdX??ru`dd|4h1m`xcOk2Thmw;U)8Kz@KjEmq9&?^=sPOM$IDg
z7t}{{tEpb=rk|?RHLsximY8u!ValzN&<KT^JZi`3R@WIvD$FZS4gpv|D6<ETxi-_?
zVI@g?6q(+ZX3FXk^X8HuNp>~g7m-2JZ1YSxpwz8%>rohK37ytEqT%e(xd?Cm5bgK+
zN2M4^8h5Lu(KD1uRqvZYo$JTy3125K|3bU9u&dz7K`nBr{}2pxPzj9+z*DWX;6K=g
z&!VIn56O!MKWiy;>_mSY%enz}zMo&Tzhg}c**$1>;JDZ5Fz&zwx9h;YeSLQIoT_xm
zYo0p-o4PAN{@~O;$`l;sT|LPlQ^!nP63Me~?V85d7PS>0vDS6YznX#t_o(k2`4e(V
zk$Q#<KC_N^TO5nMH~Ci8i^=N_Xo5BnZGxt~u`zZrUfz*8iY|NA@;krHj+wK|n>jPe
zdz5S=8s)Ys_y@OxC&WN}@xaIGCENe~O(xjqP}8wDvizSvzk9Gdw|{)LZF~+%=zrVL
zepyz;O-S_9BmBAWzd+dUly*TyTymr%I+Zr`@=cNYm?TAxQ#<SI;FqVKZ3U11f1CO9
z+3fo@DZ8cbD#RHs>)X>*8rbBJf1+$(;D?%uh`UW5YxZ7ct_&$S#IpO<r=;I4&ChJ6
z+|pB-7+Vu`!KmF@b8_azmmvWT^Q}!?ms{E=D>s|%%?rpo^~vqne7OK$o1^!%I;wZG
z6<W0J`I(=;>h-w?wlfzypEuTi5;AdaY0d=Ep5~gxH}(sBb!B6-JYQ65clv#~SNoc^
zX}Y|wF|9L#yY4)V*<YwL^LUJ(e3$Bivp*d!Uw(e*;KYm3TTXjewWuz7{a?lC_>m{m
zD(y_;J&Hc*t}e$NmeBRB*n(5x$2-a2z+k)rJe5ESNCOAAA<B^RI5>3oPClIlK7XV2
zU-OJw-P@VYoD(-|w&vXa!rWq8ClGBk>*eA4`v<#sv^~i4F>9LX_<i0**WZWzudmYQ
zxl)m?(3uqQqA1>EozaKFs@e)R*PPhI==T>6G_bV4`Iq;<ZEfmKm(t6Vg+$%ivYyo~
zH+^baHcjdN;>n&bozo*v1-!iXEIoRm-{XqahvRuqq|JI2A;EDxiTjnwmq&m0Z__`m
zc3gaZec6j2U9HP*vFY3co@%0U$SmXpTiRQX4#)WR$|%vc_OQ6(MG~)7e;+q}8)<at
zWUbEE3N>$@*`|{FYAu3$PY54h>N;tfkD0f@%ihFGsxzJ(GnhW(%NzZ){ip?EekH3?
zG4PlSSzr<cEeVcx&d)1J%`1rysVqn>js+EpTc@1PI|MqbWa=;eiQ7dD16HomV(PY=
zrW^E<eaE*Jmm~a@JgxiRA8}9>V7+v~SLOJ;>US~rY>St!o5Wh7v-V_!{@YC~JzG|p
zT<)p$iL?5yb5Nm2XsYRijiL(DyzAGqO`gi^v*K35u`Yo-%+EErFE9naK5)S&Z?9Nm
zTivO-6O0X6@4YX3zhjG91Lxd;><teea26S^clxwv$^C3~IjxkFkE;^To|;jxPHQR0
zlPUT=w-%R}wM@}^ET!YsZl2QovALz{S^0rw?|K(~T-ubPQ&S;2E2kj!>5b_2IXp{`
zhw|OnQW@u^(7SUPN6?Dsumx6qDhJzz>TKjwy<ac1teG7cdv}J#@r2UiXK{y(PR-$r
z50l}3=(v~NUtK8r^2JI4F7Ac}t4rLgt3&vX#{K44&83&8&^1qieOJhV$J?)4HE0ww
z{Yd@$=JKt`>>G8rrc|wR%{=$uxUSQ!+EtnNzpk6OYrn>w3-139HU7DJ{B8a32OkUN
z>wt4Mj7%cn`5DORJ?OCnj3@?11_dB#?}jzbAO>JRkpom1K>+9=pqb8yq6yVf^uh_G
z2V&`7xE^q3!e<#Mw?Y8OG7vyIpa|7I^dpKuiXrx?BWr^mScD!#=tu4#bWa5)Z@8tf
z19;H2qaPQ8&@KTiOc2`PhsWUaG*AZ=fIM9cJlzCX65-Q;8cI)HkcuvJhoYBpAd4WO
z#N&#j4Wq<EHwS$p5MhqK9@rd=X+d-o&=-j!ObAtgm;hfgO1$4$f#o(Zuwc6vP*Vu{
z{sn}MI>_1(yBW}(jNbf17@+3}wih|r@HrZEstp8yVt5K9hC!O)XWXE=9Q~9VkX}fv
z9l+3wIP(VG`RM0QAWX>trXXOjA-Mo-3ivb%bW_mx>my8Q;=yVPc=vvQH!HZ{5C&Eg
MlYwr2><-cg0N2vEpa1{>

literal 0
HcmV?d00001

-- 
Gitee


From 1c02c0909830740a7b1c75ebb8a6ca3f0f98fac8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 10 Apr 2024 10:28:32 +0800
Subject: [PATCH 025/302] =?UTF-8?q?mxrec=E6=9E=84=E5=BB=BA=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7=20setup.py=20bdis?=
 =?UTF-8?q?t=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                        | 8 +++++---
 build/move_whl_file_2_pkg_dir.sh | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index fccc0244..eb9bbb2a 100644
--- a/README.md
+++ b/README.md
@@ -66,9 +66,11 @@ bash run.sh
 将pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，并且将其分别更名为pybind11-2.10.3.zip、huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
 
 为了构建多个版本的whl包，编译脚本在python虚拟环境完成对应tensorflow版本的安装。用户可以根据实际情况调整编译脚本，指定tensorflow的安装路径。编译方法：
-- build/build.sh：执行脚本完成tf1和tf2版本whl包的构建和打包。执行脚本前，请参考build/build_tf1_with_opensource.sh、build/build_tf2_with_opensource.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
-- build/build_tf1_with_opensource.sh：执行脚本完成tf1版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
-- build/build_tf2_with_opensource.sh：执行脚本完成tf2版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
+
+进入mxrec代码目录：
+- setup.py：执行脚本setup.py，比如：**python3.7 setup.py**完成tf1和tf2版本whl包的构建和打包，构建成功后，whl包在/build/mindxsdk-mxrec/目录下，其中tf1_whl和tf2_whl目录下存在对应的whl包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
+- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**完成tf1版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，请参考build/build_tf1.sh创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
+- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**完成tf2版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，请参考build/build_tf2.sh创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
 
 如需使用动态扩容功能，进入“./cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
 ```shell
diff --git a/build/move_whl_file_2_pkg_dir.sh b/build/move_whl_file_2_pkg_dir.sh
index 824ac52a..d489c2fb 100644
--- a/build/move_whl_file_2_pkg_dir.sh
+++ b/build/move_whl_file_2_pkg_dir.sh
@@ -24,6 +24,7 @@ tf_version=$1
 
 function move_whl_file_2_pkg_dir() {
     mkdir -p "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
+    rm -rf "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl/*
     mv ${MxRec_DIR}/dist/mx_rec*.whl "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
     cd "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
     whl_file=$(ls .)
-- 
Gitee


From 63c1be723d0e076af36e121c4e69711de5cb78f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 10 Apr 2024 11:34:38 +0800
Subject: [PATCH 026/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-=E6=A8=A1=E5=9E=8B=E9=80=82?=
 =?UTF-8?q?=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/optimizers/base.py             | 117 +++++++++++++-------------
 tests/mx_rec/core/mock_class.py       |   1 +
 tests/mx_rec/core/test_build_graph.py |  95 +--------------------
 tests/mx_rec/core/test_manager.py     |   8 ++
 4 files changed, 72 insertions(+), 149 deletions(-)

diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index 395e60eb..49594d40 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -31,6 +31,47 @@ from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
 
 
+def get_restore_vector_second(table_name: str) -> tf.Tensor:
+    """
+    Get restore vector which is calculated after the second all2all
+    :param table_name: embedding table_name
+    :return: the restore vector calculated after the second all2all
+    """
+    channel_id = 0
+    logger.debug('Channel %s_restore_second_%s was built for getnext',
+                 table_name, channel_id)
+    with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
+        restore_vector_second = npu_ops.gen_npu_ops.get_next(
+            output_types=[tf.int32],
+            output_shapes=[[None]],
+            channel_name=f'{table_name}_restore_second_{channel_id}')[0]
+    return restore_vector_second
+
+
+def get_unique_keys(table_name: str, is_expansion: bool) -> tf.Tensor:
+    """
+    Get the global unique keys which is calculated after the second all2all
+    :param table_name: embedding table_name
+    :param is_expansion: use dynamic expansion
+    :return: the global unique keys calculated after the second all2all
+    """
+    channel_id = 0
+    logger.debug('Channel %s_uniquekeys_%s was built for getnext', table_name, channel_id)
+    with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
+        if is_expansion:
+            unique_keys = npu_ops.gen_npu_ops.get_next(
+                output_types=[tf.int64],
+                output_shapes=[[None]],
+                channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
+            return unique_keys
+
+        unique_keys = npu_ops.gen_npu_ops.get_next(
+            output_types=[tf.int32],
+            output_shapes=[[None]],
+            channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
+        return unique_keys
+
+
 class CustomizedOptimizer:
 
     name_counter = defaultdict(int)
@@ -39,6 +80,25 @@ class CustomizedOptimizer:
         self.unique_name = ""
         self.base_name = ""
 
+    @staticmethod
+    def sum_same_id_gradients(grad, var, is_expansion):
+        if isinstance(var, ops.Tensor):
+            # 扩容模式从scope获取表名,偏移是-2
+            table_name = var.op.name.split('/')[-2]
+        else:
+            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
+            table_name = table_instance.table_name
+        with tf.compat.v1.variable_scope("restore_vector_second"):
+            restore_vector_second = get_restore_vector_second(table_name)
+
+        with tf.compat.v1.variable_scope("unique_keys"):
+            unique_keys = get_unique_keys(table_name, is_expansion)
+
+        unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
+                                                              restore_vector_second,
+                                                              array_ops.shape(unique_keys)[0])
+        return unique_local_grad, unique_keys
+
     def initialize_slots(self, var, table_instance):
         raise NotImplementedError(f"Please define a specific realization on {self.__class__.__name__}")
 
@@ -58,63 +118,6 @@ class CustomizedOptimizer:
         self.unique_name = name + "_" + str(count)
         self.base_name = name
 
-    def _get_restore_vector_second(self, table_name: str) -> tf.Tensor:
-        """
-        Get restore vector which is calculated after the second all2all
-        :param table_name: embedding table_name
-        :return: the restore vector calculated after the second all2all
-        """
-        channel_id = 0
-        logger.debug('Channel %s_restore_second_%s was built for getnext',
-                     table_name, channel_id)
-        with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
-            restore_vector_second = npu_ops.gen_npu_ops.get_next(
-                output_types=[tf.int32],
-                output_shapes=[[None]],
-                channel_name=f'{table_name}_restore_second_{channel_id}')[0]
-        return restore_vector_second
-
-    def _get_unique_keys(self, table_name: str, is_expansion: bool) -> tf.Tensor:
-        """
-        Get the global unique keys which is calculated after the second all2all
-        :param table_name: embedding table_name
-        :param is_expansion: use dynamic expansion
-        :return: the global unique keys calculated after the second all2all
-        """
-        channel_id = 0
-        logger.debug('Channel %s_uniquekeys_%s was built for getnext', table_name, channel_id)
-        with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
-            if is_expansion:
-                unique_keys = npu_ops.gen_npu_ops.get_next(
-                    output_types=[tf.int64],
-                    output_shapes=[[None]],
-                    channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
-                return unique_keys
-
-            unique_keys = npu_ops.gen_npu_ops.get_next(
-                output_types=[tf.int32],
-                output_shapes=[[None]],
-                channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
-            return unique_keys
-
-    def sum_same_id_gradients(self, grad, var, is_expansion):
-        if isinstance(var, ops.Tensor):
-            # 扩容模式从scope获取表名,偏移是-2
-            table_name = var.op.name.split('/')[-2]
-        else:
-            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
-            table_name = table_instance.table_name
-        with tf.compat.v1.variable_scope("restore_vector_second"):
-            restore_vector_second = self._get_restore_vector_second(table_name)
-
-        with tf.compat.v1.variable_scope("unique_keys"):
-            unique_keys = self._get_unique_keys(table_name, is_expansion)
-
-        unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
-                                                              restore_vector_second,
-                                                              array_ops.shape(unique_keys)[0])
-        return unique_local_grad, unique_keys
-
 
 def custom_update_op(self, opt, grad):
     if isinstance(grad, ops.Tensor):
diff --git a/tests/mx_rec/core/mock_class.py b/tests/mx_rec/core/mock_class.py
index 7566aa1a..04c9ae56 100644
--- a/tests/mx_rec/core/mock_class.py
+++ b/tests/mx_rec/core/mock_class.py
@@ -208,6 +208,7 @@ class MockOptimizer:
 
     def __init__(self):
         self.slot_num = 2
+        self.derivative = 2
 
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
diff --git a/tests/mx_rec/core/test_build_graph.py b/tests/mx_rec/core/test_build_graph.py
index 0b90b790..14913cf7 100644
--- a/tests/mx_rec/core/test_build_graph.py
+++ b/tests/mx_rec/core/test_build_graph.py
@@ -156,84 +156,6 @@ class TestGetIdOffsetsFunc(unittest.TestCase):
             self.assertEqual(swap_len, 0)
 
 
-class TestGetRestoreVectorSecondFunc(unittest.TestCase):
-    """
-    Test for 'mx_rec.core.asc.build_graph.get_restore_vector_second'.
-    """
-
-    def setUp(self):
-        # 默认动态扩容、hot emb、HBM
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
-        self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
-
-    def tearDown(self):
-        # 恢复config
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
-
-    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
-    def test_get_restore_vector_second(self, mock_get_next):
-        """
-        case: test get_restore_vector_second
-        """
-
-        from mx_rec.core.asc.build_graph import get_restore_vector_second
-
-        with tf.Graph().as_default():
-            mock_get_next.return_value = [0]
-            restore_vector_second = get_restore_vector_second(self.max_lookup_vec_size, self.config)
-            self.assertEqual(restore_vector_second, 0)
-
-
-class TestGetUniqueKeysFunc(unittest.TestCase):
-    """
-    Test for 'mx_rec.core.asc.build_graph.get_unique_keys'.
-    """
-
-    def setUp(self):
-        # 默认动态扩容、hot emb、HBM
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
-        self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
-
-    def tearDown(self):
-        # 恢复config
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
-
-    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
-    def test_get_unique_keys_case1(self, mock_get_next):
-        """
-        case1: 动态扩容
-        """
-
-        from mx_rec.core.asc.build_graph import get_unique_keys
-
-        with tf.Graph().as_default():
-            mock_get_next.return_value = [0]
-            unique_keys = get_unique_keys(self.max_lookup_vec_size, self.config)
-            self.assertEqual(unique_keys, 0)
-
-    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
-    def test_get_unique_keys_case2(self, mock_get_next):
-        """
-        case2: 非动态扩容
-        """
-
-        from mx_rec.core.asc.build_graph import get_unique_keys
-
-        with tf.Graph().as_default():
-            self.config["use_dynamic_expansion"] = False
-            mock_get_next.return_value = [1]
-            unique_keys = get_unique_keys(self.max_lookup_vec_size, self.config)
-            self.assertEqual(unique_keys, 1)
-
-
 class TestGetAll2allArgsFunc(unittest.TestCase):
     """
     Test for 'mx_rec.core.asc.build_graph.get_all2all_args'.
@@ -351,9 +273,7 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0),
-                         get_restore_vector_second=mock.MagicMock(return_value=0),
-                         get_unique_keys=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case1(self, build_graph_config_initializer):
         """
@@ -368,16 +288,12 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
-            self.assertIsNotNone(result.get("restore_vector_second"))
-            self.assertIsNotNone(result.get("unique_keys"))
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0),
-                         get_restore_vector_second=mock.MagicMock(return_value=0),
-                         get_unique_keys=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case2(self, build_graph_config_initializer):
         """
@@ -392,16 +308,12 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
-            self.assertIsNotNone(result.get("restore_vector_second"))
-            self.assertIsNotNone(result.get("unique_keys"))
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0),
-                         get_restore_vector_second=mock.MagicMock(return_value=0),
-                         get_unique_keys=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case3(self, build_graph_config_initializer):
         """
@@ -417,7 +329,6 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
             self.config["channel_id"] = 1
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
-            self.assertIsNone(result.get("restore_vector_second"))
 
 
 if __name__ == '__main__':
diff --git a/tests/mx_rec/core/test_manager.py b/tests/mx_rec/core/test_manager.py
index 815ad843..ffa8b09e 100644
--- a/tests/mx_rec/core/test_manager.py
+++ b/tests/mx_rec/core/test_manager.py
@@ -385,6 +385,7 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
                          USE_STATIC=mock.MagicMock(return_value=0),
                          USE_HOT=mock.MagicMock(return_value=1),
                          USE_DYNAMIC_EXPANSION=mock.MagicMock(return_value=2),
+                         USE_SUM_SAME_ID_GRADIENTS=mock.MagicMock(return_value=4),
                          RankInfo=mock.MagicMock(return_value="mock_info"),
                          HybridMgmt=mock.MagicMock(return_value=MockHybridMgmt(is_initialized=False)))
     @mock.patch("mx_rec.core.asc.manager.ConfigInitializer")
@@ -398,6 +399,9 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
         mock_config_initializer = MockConfigInitializer(use_static=True, use_dynamic_expansion=True)
         manager_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
+        mock_opt = MockOptimizer()
+        manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
+
         with self.assertRaises(RuntimeError):
             initialize_emb_cache([], [])
 
@@ -408,6 +412,7 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
                          USE_STATIC=mock.MagicMock(return_value=0),
                          USE_HOT=mock.MagicMock(return_value=1),
                          USE_DYNAMIC_EXPANSION=mock.MagicMock(return_value=2),
+                         USE_SUM_SAME_ID_GRADIENTS=mock.MagicMock(return_value=4),
                          RankInfo=mock.MagicMock(return_value="mock_info"))
     @mock.patch("mx_rec.core.asc.manager.ConfigInitializer")
     @mock.patch("mx_rec.core.asc.manager.HybridMgmt")
@@ -421,6 +426,9 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
         mock_config_initializer = MockConfigInitializer(use_static=True, use_dynamic_expansion=True)
         manager_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
+        mock_opt = MockOptimizer()
+        manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
+
         mock_mgmt = MockHybridMgmt(is_initialized=True)
         mock_hybrid_mgmt.return_value = mock_mgmt
         initialize_emb_cache([], [])
-- 
Gitee


From 29683ecef60b80c034f19dcfc833f1583c7ea7bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 10 Apr 2024 10:28:32 +0800
Subject: [PATCH 027/302] =?UTF-8?q?mxrec=E6=9E=84=E5=BB=BA=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7=20setup.py=20bdis?=
 =?UTF-8?q?t=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                        | 20 +++++++++++---------
 build/move_whl_file_2_pkg_dir.sh |  1 +
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index fccc0244..ae3ec0a9 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ mxRec作为面向互联网市场搜索推荐广告的应用使能SDK产品，对
 
 ## 安装方式
 
-安装前，请参考《CANN 软件安装指南CANN 软件安装指南》安装CANN开发套件软件包和TensorFlow适配昇腾插件。
+安装前，请参考《CANN 软件安装指南》安装CANN开发套件软件包和TensorFlow适配昇腾插件。
 
 CANN软件提供进程级环境变量设置脚本，供用户在进程中引用，以自动完成环境变量设置。用户进程结束后自动失效。可在程序启动的Shell脚本中使用如下命令设置CANN的相关环境变量，也可通过命令行执行如下命令（以root用户默认安装路径“/usr/local/Ascend”为例）：
 ```shell
@@ -63,12 +63,14 @@ bash run.sh
 - [openmpi 4.1.5](https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz): 请参考软件文档在编译环境完成安装
 - tensorflow 1.15/2.6.5：根据实际需求选择对应版本
 
-将pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，并且将其分别更名为pybind11-2.10.3.zip、huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
+将pybind11和securec的压缩包放在与mxRec代码同级的opensource目录下，并且将其分别更名为pybind11-2.10.3.zip、huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在mxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
 
 为了构建多个版本的whl包，编译脚本在python虚拟环境完成对应tensorflow版本的安装。用户可以根据实际情况调整编译脚本，指定tensorflow的安装路径。编译方法：
-- build/build.sh：执行脚本完成tf1和tf2版本whl包的构建和打包。执行脚本前，请参考build/build_tf1_with_opensource.sh、build/build_tf2_with_opensource.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
-- build/build_tf1_with_opensource.sh：执行脚本完成tf1版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
-- build/build_tf2_with_opensource.sh：执行脚本完成tf2版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
+
+进入mxRec代码目录：
+- setup.py：执行脚本setup.py，比如：**python3.7 setup.py**完成tf1和tf2版本whl包的构建和打包，构建成功后，whl包在/build/mindxsdk-mxrec/目录下，其中tf1_whl和tf2_whl目录下存在对应的whl包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
+- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**完成tf1版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，请参考build/build_tf1.sh创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
+- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**完成tf2版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，请参考build/build_tf2.sh创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
 
 如需使用动态扩容功能，进入“./cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
 ```shell
@@ -99,8 +101,8 @@ bash run_python_dt.sh
 - [pybind11 v2.10.3](https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip)
 - [securec](https://github.com/huaweicloud/huaweicloud-sdk-c-obs/archive/refs/tags/v3.23.9.zip)
 
-将googletest、emock、pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，并且将其分别更名为googletest-release-1.8.1.zip、
-emock-0.9.0.zip、pybind11-2.10.3.zip、 huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，
+将googletest、emock、pybind11和securec的压缩包放在与mxRec代码同级的opensource目录下，并且将其分别更名为googletest-release-1.8.1.zip、
+emock-0.9.0.zip、pybind11-2.10.3.zip、 huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在mxRec同级的目录下手动创建opensource目录，
 然后将前述几个压缩包放在opensource目录下。
 
 如需使用C++测试用例，需要按照上述描述准备需要的依赖，准备好之后，进入src目录中。参考以下命令执行C++测试用例：
@@ -117,11 +119,11 @@ bash test_ut.sh tf2
 
 ## 使用指导
 
-mxRec所支持的使用环境、功能特性、API接口与使用样例请参考昇腾开源社区MindX SDK产品文档。
+mxRec所支持的使用环境、功能特性、API接口与使用样例请参考mxRec用户指南。
 
 ## 参考设计
 
-mxrec框架基础镜像，基于TensorFlow 1.15.0、tensorflow2.6.5制作的基础镜像，安装mxrec后即可开始训练，以及样例使用介绍。
+mxRec框架基础镜像，基于TensorFlow 1.15.0、tensorflow2.6.5制作的基础镜像，安装mxRec后即可开始训练，以及样例使用介绍。
 
 1. https://ascendhub.huawei.com/#/detail/mxrec-tf1
 
diff --git a/build/move_whl_file_2_pkg_dir.sh b/build/move_whl_file_2_pkg_dir.sh
index 824ac52a..d489c2fb 100644
--- a/build/move_whl_file_2_pkg_dir.sh
+++ b/build/move_whl_file_2_pkg_dir.sh
@@ -24,6 +24,7 @@ tf_version=$1
 
 function move_whl_file_2_pkg_dir() {
     mkdir -p "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
+    rm -rf "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl/*
     mv ${MxRec_DIR}/dist/mx_rec*.whl "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
     cd "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
     whl_file=$(ls .)
-- 
Gitee


From 7259761ec4ea349ca9a54212bf85b8eea934d961 Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Thu, 11 Apr 2024 11:30:56 +0000
Subject: [PATCH 028/302] =?UTF-8?q?!80=20=E4=BF=AE=E6=94=B9=E4=B8=80?=
 =?UTF-8?q?=E4=BA=9B=E6=97=A5=E5=BF=97=E6=8B=BC=E5=86=99=E9=94=99=E8=AF=AF?=
 =?UTF-8?q?=E5=A6=82deivce=E5=8F=8A=E6=89=93=E5=8D=B0f"",raise=E9=94=99?=
 =?UTF-8?q?=E8=AF=AF=E6=97=A5=E5=BF=97=E5=BC=80=E5=A4=B4=E5=B0=8F=E5=86=99?=
 =?UTF-8?q?=EF=BC=8Clogger=E6=97=A5=E5=BF=97=E5=BC=80=E5=A4=B4=E5=A4=A7?=
 =?UTF-8?q?=E5=86=99=20*=20=E4=BF=AE=E6=94=B9=E4=B8=80=E4=BA=9B=E6=97=A5?=
 =?UTF-8?q?=E5=BF=97=E6=8B=BC=E5=86=99=E9=94=99=E8=AF=AF=E5=A6=82deivce?=
 =?UTF-8?q?=E5=8F=8A=E6=89=93=E5=8D=B0f""=20*=20=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E4=B8=80=E4=BA=9B=E6=8B=BC=E5=86=99=E5=8F=8A=E6=89=93=E5=8D=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py                     | 10 +++++-----
 examples/demo/little_demo/main.py                |  8 ++++----
 examples/demo/little_demo/run_mode.py            |  8 ++++----
 examples/demo/little_demo_estimator/main.py      |  8 ++++----
 .../demo/little_demo_estimator/nn_model_input.py |  6 +++---
 examples/dlrm/model/main_mxrec.py                |  6 +++---
 mx_rec/core/asc/helper.py                        |  2 +-
 mx_rec/core/feature_process.py                   |  4 ++--
 mx_rec/graph/acg_push_ops.py                     | 14 +++++++-------
 mx_rec/graph/merge_lookup.py                     |  2 +-
 mx_rec/graph/modifier.py                         | 16 ++++++++--------
 mx_rec/optimizers/emb_optimizer.py               |  4 ++--
 mx_rec/saver/patch.py                            |  2 +-
 mx_rec/util/config_utils/feature_spec_utils.py   |  2 +-
 mx_rec/util/cpu.py                               |  6 +++---
 mx_rec/util/normalization.py                     |  4 ++--
 mx_rec/util/perf.py                              |  2 +-
 tests/mx_rec/saver/sparse_embedding_mock.py      |  2 +-
 tools/model_convert/model_convert.py             |  4 ++--
 19 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index 540445e8..d5a51312 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -125,7 +125,7 @@ def model_forward(feature_list, hash_table_list, batch, is_train, modify_graph):
     elif len(embedding_list) > 1:
         emb = tf.reduce_sum(embedding_list, axis=0, keepdims=False)
     else:
-        raise ValueError("The length of embedding_list must be greater than or equal to 1.")
+        raise ValueError("the length of embedding_list must be greater than or equal to 1.")
     my_model = MyModel()
     model_output = my_model.build_model(embedding=emb,
                                         dense_feature=batch["dense_feature"],
@@ -261,8 +261,8 @@ if __name__ == "__main__":
         MODIFY_GRAPH_FLAG = bool(int(os.getenv("USE_MODIFY_GRAPH", 0)))
         use_faae = bool(int(os.getenv("USE_FAAE", 0)))
     except ValueError as err:
-        raise ValueError(f"please correctly config USE_DYNAMIC_EXPANSION or USE_MULTI_LOOKUP or USE_FAAE "
-                         f"or USE_MODIFY_GRAPH only 0 or 1 is supported.") from err
+        raise ValueError("please correctly config USE_DYNAMIC_EXPANSION or USE_MULTI_LOOKUP or USE_FAAE "
+                         "or USE_MODIFY_GRAPH only 0 or 1 is supported.") from err
 
     use_dynamic = bool(int(os.getenv("USE_DYNAMIC", 0)))
     logger.info(f"USE_DYNAMIC: {use_dynamic}")
@@ -270,7 +270,7 @@ if __name__ == "__main__":
          use_dynamic=use_dynamic, use_dynamic_expansion=use_dynamic_expansion)
     IF_LOAD = False
     rank_id = mxrec_util.communication.hccl_ops.get_rank_id()
-    filelist = glob(f"./saved-model/sparse-model-0")
+    filelist = glob("./saved-model/sparse-model-0")
     if filelist:
         IF_LOAD = True
     ConfigInitializer.get_instance().if_load = IF_LOAD
@@ -409,7 +409,7 @@ if __name__ == "__main__":
             lr = sess.run(cfg.learning_rate)
             global_step = sess.run(cfg.global_step)
         except tf.errors.OutOfRangeError:
-            logger.info(f"Encounter the end of Sequence for training.")
+            logger.info("Encounter the end of Sequence for training.")
             break
 
         end_time = time.time()
diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index 8813de44..05d6896f 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -194,14 +194,14 @@ if __name__ == "__main__":
         USE_TIMESTAMP = bool(int(os.getenv("USE_TIMESTAMP", 0)))
         USE_ONE_SHOT = bool(int(os.getenv("USE_ONE_SHOT", 0)))
     except ValueError as err:
-        raise ValueError(f"please correctly config USE_MPI or USE_DYNAMIC or USE_HOT or USE_DYNAMIC_EXPANSION or "
-                         f"USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT "
-                         f"only 0 or 1 is supported.") from err
+        raise ValueError("please correctly config USE_MPI or USE_DYNAMIC or USE_HOT or USE_DYNAMIC_EXPANSION or "
+                         "USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT "
+                         "only 0 or 1 is supported.") from err
 
     try:
         MULTI_LOOKUP_TIMES = int(os.getenv("MULTI_LOOKUP_TIMES", 2))
     except ValueError as err:
-        raise ValueError(f"please correctly config MULTI_LOOKUP_TIMES only int is supported.") from err
+        raise ValueError("please correctly config MULTI_LOOKUP_TIMES only int is supported.") from err
 
     if_load = False
     save_path = "./saved-model"
diff --git a/examples/demo/little_demo/run_mode.py b/examples/demo/little_demo/run_mode.py
index 0f7a8cc4..e750ceb5 100644
--- a/examples/demo/little_demo/run_mode.py
+++ b/examples/demo/little_demo/run_mode.py
@@ -75,7 +75,7 @@ class RunMode:
             try:
                 self.session.run(self.eval_model.loss_list)
             except tf.errors.OutOfRangeError:
-                logger.info(f"Encounter the end of Sequence for eval.")
+                logger.info("Encounter the end of Sequence for eval.")
                 break
 
     def set_train_ops(self):
@@ -140,7 +140,7 @@ class RunMode:
             try:
                 self.session.run([self.train_ops, self.train_model.loss_list])
             except tf.errors.OutOfRangeError:
-                logger.info(f"Encounter the end of Sequence for training.")
+                logger.info("Encounter the end of Sequence for training.")
                 break
             else:
                 for t in self.table_list:
@@ -170,14 +170,14 @@ class RunMode:
         self.epoch += 1
 
     def predict(self, model_file: List[str]):
-        logger.info(f"###############    start predict    ################")
+        logger.info("###############    start predict    ################")
 
         # get the latest model
         latest_step = get_load_step(model_file)
         self.saver = tf.compat.v1.train.Saver()
         self.saver.restore(self.session, f"./saved-model/model-{latest_step}")
         self._infer()
-        logger.info(f"###############    predict end    ################")
+        logger.info("###############    predict end    ################")
 
     def change_threshold(self):
         thres_tensor = tf.constant(60, dtype=tf.int32)
diff --git a/examples/demo/little_demo_estimator/main.py b/examples/demo/little_demo_estimator/main.py
index 901bf23a..5c3c94d1 100644
--- a/examples/demo/little_demo_estimator/main.py
+++ b/examples/demo/little_demo_estimator/main.py
@@ -166,14 +166,14 @@ if __name__ == '__main__':
         args.use_one_shot = bool(int(os.getenv("USE_ONE_SHOT", 0)))
         args.enable_push_ops_test = bool(int(os.getenv("ENABLE_PUSH_OPS_TEST", 0)))
     except ValueError as err:
-        raise ValueError(f"please correctly config USE_MPI or USE_DYNAMIC or USE_HOT or USE_DYNAMIC_EXPANSION or "
-                         f"USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT "
-                         f"only 0 or 1 is supported.") from err
+        raise ValueError("please correctly config USE_MPI or USE_DYNAMIC or USE_HOT or USE_DYNAMIC_EXPANSION or "
+                         "USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT "
+                         "only 0 or 1 is supported.") from err
 
     try:
         MULTI_LOOKUP_TIMES = int(os.getenv("MULTI_LOOKUP_TIMES", 2))
     except ValueError as err:
-        raise ValueError(f"please correctly config MULTI_LOOKUP_TIMES only int is supported.") from err
+        raise ValueError("please correctly config MULTI_LOOKUP_TIMES only int is supported.") from err
 
     if args.run_mode == 'train':
         args.train_steps = -1
diff --git a/examples/demo/little_demo_estimator/nn_model_input.py b/examples/demo/little_demo_estimator/nn_model_input.py
index 2ce70d41..d763c058 100644
--- a/examples/demo/little_demo_estimator/nn_model_input.py
+++ b/examples/demo/little_demo_estimator/nn_model_input.py
@@ -39,19 +39,19 @@ def get_model_fn(create_fs_params, cfg, access_and_evict_config_dict=None):
 
         loss_dict = {}
         if mode == tf.estimator.ModeKeys.TRAIN:
-            logger.info(f"use estimator train mode")
+            logger.info("Use estimator train mode")
             loss_dict['loss'] = [['train_loss', loss]]
             return tf.estimator.EstimatorSpec(mode=mode,
                                               loss=loss,
                                               train_op=get_train_op(params, loss_dict.get('loss')))
 
         if mode == tf.estimator.ModeKeys.EVAL:
-            logger.info("use estimator eval mode")
+            logger.info("Use estimator eval mode")
             return tf.estimator.EstimatorSpec(mode=mode,
                                               loss=loss)
 
         if mode == tf.estimator.ModeKeys.PREDICT:
-            logger.info("use estimator predict mode")
+            logger.info("Use estimator predict mode")
             loss_dict['task_1'] = prediction[0]
 
             loss_dict['task_2'] = prediction[1]
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 2d0ee78e..4bbd16de 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -129,7 +129,7 @@ def model_forward(feature_list, hash_table_list, batch, is_train, modify_graph):
     elif len(embedding_list) > 1:
         emb = tf.reduce_sum(embedding_list, axis=0, keepdims=False)
     else:
-        raise ValueError("The length of embedding_list must be greater than or equal to 1.")
+        raise ValueError("the length of embedding_list must be greater than or equal to 1.")
     my_model = MyModel()
     model_output = my_model.build_model(embedding=emb,
                                         dense_feature=batch["dense_feature"],
@@ -266,8 +266,8 @@ if __name__ == "__main__":
         MODIFY_GRAPH_FLAG = bool(int(os.getenv("USE_MODIFY_GRAPH", 0)))
         use_faae = bool(int(os.getenv("USE_FAAE", 0)))
     except ValueError as err:
-        raise ValueError(f"please correctly config USE_DYNAMIC_EXPANSION or USE_MULTI_LOOKUP or USE_FAAE "
-                         f"or USE_MODIFY_GRAPH only 0 or 1 is supported.") from err
+        raise ValueError("please correctly config USE_DYNAMIC_EXPANSION or USE_MULTI_LOOKUP or USE_FAAE "
+                         "or USE_MODIFY_GRAPH only 0 or 1 is supported.") from err
 
     use_dynamic = bool(int(os.getenv("USE_DYNAMIC", 0)))
     logger.info(f"USE_DYNAMIC:{use_dynamic}")
diff --git a/mx_rec/core/asc/helper.py b/mx_rec/core/asc/helper.py
index 771f359f..aaa97017 100644
--- a/mx_rec/core/asc/helper.py
+++ b/mx_rec/core/asc/helper.py
@@ -281,7 +281,7 @@ def do_insert(args, insert_tensors, splits, table_names, input_dict):
 def export_read_emb_key_v2_op(args, pipeline_op):
     origin_batch = list(args)
     if len(origin_batch) < 1:
-        raise ValueError("The length of args is less than 1.")
+        raise ValueError("the length of args is less than 1.")
     if isinstance(origin_batch[0], dict):
         output_batch = origin_batch[0]
         valid_key = get_valid_op_key(output_batch)
diff --git a/mx_rec/core/feature_process.py b/mx_rec/core/feature_process.py
index 3963f6d5..7a90e78b 100644
--- a/mx_rec/core/feature_process.py
+++ b/mx_rec/core/feature_process.py
@@ -50,9 +50,9 @@ class EvictHook(tf.compat.v1.train.SessionRunHook):
         self._global_step_tensor = None
 
         if evict_step_interval is None:
-            logger.info(f"_EvictHook - > evict_time_interval: %d", self._evict_time_interval)
+            logger.info("_EvictHook - > evict_time_interval: %d", self._evict_time_interval)
         else:
-            logger.info(f"_EvictHook - > evict_time_interval: %d, evict_step_interval: %d",
+            logger.info("_EvictHook - > evict_time_interval: %d, evict_step_interval: %d",
                         self._evict_time_interval, self._evict_step_interval)
 
     def begin(self):
diff --git a/mx_rec/graph/acg_push_ops.py b/mx_rec/graph/acg_push_ops.py
index 625ef92f..ed3e18e6 100644
--- a/mx_rec/graph/acg_push_ops.py
+++ b/mx_rec/graph/acg_push_ops.py
@@ -71,7 +71,7 @@ class ACGPushOpsToDatasetHook(tf.estimator.SessionRunHook):
     def after_create_session(self, session, coord):
         logger.info("[ACGPushOpsToDatasetHook] Trigger after create session!")
         initializers = tf.compat.v1.get_collection(_ACG_NEW_INITIALIZER)
-        logger.info(f"[ACGPushOpsToDatasetHook] Got new initialzers: %s.", initializers)
+        logger.info("[ACGPushOpsToDatasetHook] Got new initialzers: %s.", initializers)
         session.run(initializers)
 
     def end(self, session):
@@ -185,12 +185,12 @@ def _find_op_from_base_op(base_ops: tf.Operation, target_op_type: str) -> tf.Ope
         for base_op in base_ops:
             parent_ops.extend(modifier.find_parent_op(base_op))
         if not parent_ops:
-            raise ValueError(f"Op {target_op_type} was not found.")
+            raise ValueError(f"op {target_op_type} was not found.")
 
 
 def _get_dataset_op(graph: tf.Graph, get_next_op: Operation) -> Operation:
     if get_next_op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-        raise TypeError("Op '{get_next_op}' must be one instance of IteratorGetNext.")
+        raise TypeError(f"op '{get_next_op}' must be one instance of IteratorGetNext.")
     # looking for the MakeIterator operator which corresponds to given batch_tensor
     base_op = modifier.find_make_iterator_op(get_next_op.outputs[0])
     # looking for the op which is the one before OptimizeDataset operator
@@ -198,9 +198,9 @@ def _get_dataset_op(graph: tf.Graph, get_next_op: Operation) -> Operation:
         optimize_dataset_op = _find_op_from_base_op(base_op, "ModelDataset")
         target_op = modifier.find_parent_op(optimize_dataset_op)
         if not target_op:
-            raise RuntimeError(f"The parent op for 'ModelDataset' op was not found.")
+            raise RuntimeError("the parent op for 'ModelDataset' op was not found.")
         if target_op[0].type != "OptimizeDataset":
-            raise TypeError(f"Op OptimizeDataset was not found.")
+            raise TypeError("op OptimizeDataset was not found.")
         target_op = target_op[0]
     else:
         # 'OptimizeDataset' is not available in TensorFlow2.X
@@ -225,7 +225,7 @@ def _add_sorted_additional_tensors(addition_funcgraph_output_tensor, k_inputs, n
 
 def _get_tensor_consumers_unsafe(tensor: tf.Tensor) -> List[tf.Operation]:
     if isinstance(tensor, tf.Operation):
-        raise RuntimeError("not support type: {node}")
+        raise RuntimeError(f"not support type: {node}")
 
     from tensorflow.python import pywrap_tensorflow as c_api
 
@@ -502,7 +502,7 @@ def _update_iterator_getnext(
     subgraph_to_push: Set[tf.Operation],
 ):
     if not get_next_op.outputs:
-        raise RuntimeError("There is no tensor in the dataset. Please check the dataset and data processing.")
+        raise RuntimeError("there is no tensor in the dataset. Please check the dataset and data processing.")
     iterator_type = ""
     if get_next_op.inputs:
         iterator_type = get_next_op.inputs[0].op.type
diff --git a/mx_rec/graph/merge_lookup.py b/mx_rec/graph/merge_lookup.py
index 8a11e515..b28872e4 100644
--- a/mx_rec/graph/merge_lookup.py
+++ b/mx_rec/graph/merge_lookup.py
@@ -50,7 +50,7 @@ def do_merge_lookup(is_train: bool = True):
     # get anchor ids
     cutting_point_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE)
     if not cutting_point_list:
-        raise RuntimeError("The sparse table does not have sparse lookup.")
+        raise RuntimeError("the sparse table does not have sparse lookup.")
     check_cutting_points(cutting_point_list)
 
     # get lookup info
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index a5843e02..8338e870 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -174,7 +174,7 @@ def find_make_iterator_op(batch_tensor: Tensor) -> Operation:
                 logger.debug("Op MakeIterator '%s' was found.", each_op.name)
                 return each_op
 
-    raise ValueError(f"Op MakeIterator was not found.")
+    raise ValueError(f"op MakeIterator was not found.")
 
 
 @performance("find_target_dataset_op")
@@ -198,7 +198,7 @@ def find_target_dataset_op(base_ops: Operation, op_type: str) -> Operation:
             parent_ops.extend(find_parent_op(base_op))
 
         if not parent_ops:
-            raise ValueError(f"Op {op_type} was not found.")
+            raise ValueError(f"op {op_type} was not found.")
 
 
 def get_dataset_op(get_next_op: Operation) -> Operation:
@@ -214,7 +214,7 @@ def get_dataset_op(get_next_op: Operation) -> Operation:
     """
 
     if get_next_op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-        raise TypeError("Op '{get_next_op}' must be one instance of IteratorGetNext.")
+        raise TypeError(f"op '{get_next_op}' must be one instance of IteratorGetNext.")
 
     # looking for the MakeIterator operator which corresponds to given batch_tensor
     base_op = find_make_iterator_op(get_next_op.outputs[0])
@@ -223,9 +223,9 @@ def get_dataset_op(get_next_op: Operation) -> Operation:
         optimize_dataset_op = find_target_dataset_op(base_op, AnchorDatasetOp.MODEL_DATASET.value)
         target_op = find_parent_op(optimize_dataset_op)
         if not target_op:
-            raise RuntimeError(f"The parent op for 'ModelDataset' op was not found.")
+            raise RuntimeError("the parent op for 'ModelDataset' op was not found.")
         if target_op[0].type != AnchorDatasetOp.OPTIMIZE_DATASET.value:
-            raise TypeError(f"Op OptimizeDataset was not found.")
+            raise TypeError("op OptimizeDataset was not found.")
         target_op = target_op[0]
     else:
         # 'OptimizeDataset' is not available in TensorFlow2.X
@@ -283,7 +283,7 @@ def find_target_instance_dataset(variant_tensor: Tensor) -> DatasetV1Adapter:
             if not isinstance(ins.element_spec, dict) and not (
                     isinstance(ins.element_spec, (list, tuple)) and len(ins.element_spec) == 2 and isinstance(
                 ins.element_spec[0], dict)):
-                raise NotImplementedError("The found dataset does not return a valid layout.")
+                raise NotImplementedError("the found dataset does not return a valid layout.")
 
             return ins
 
@@ -517,7 +517,7 @@ def update_iterator_getnext(get_next_op: Operation,
 
     """
     if not get_next_op.outputs:
-        raise RuntimeError("There is no tensor in the dataset. Please check the dataset and data processing.")
+        raise RuntimeError("there is no tensor in the dataset. Please check the dataset and data processing.")
     iterator_type = ""
     if get_next_op.outputs[0].op.inputs:
         iterator_type = get_next_op.outputs[0].op.inputs[0].op.type
@@ -640,7 +640,7 @@ class GraphModifierHook(tf.estimator.SessionRunHook):
         self._iterator_type = ConfigInitializer.get_instance().train_params_config.iterator_type
         if self._modify_graph and self._iterator_type not in (AnchorIteratorOp.MAKE_ITERATOR.value,
                                                               AnchorIteratorOp.ONE_SHOT_ITERATOR.value):
-            raise ValueError("The value of iterator type should be like `MakeIterator` or `OneShotIterator`.")
+            raise ValueError("the value of iterator type should be like `MakeIterator` or `OneShotIterator`.")
         logger.debug("In GraphModifierHook, iterator type is `%s`.", self._iterator_type)
 
     def after_create_session(self, session, coord):
diff --git a/mx_rec/optimizers/emb_optimizer.py b/mx_rec/optimizers/emb_optimizer.py
index c7f1b64a..9e6a80e1 100644
--- a/mx_rec/optimizers/emb_optimizer.py
+++ b/mx_rec/optimizers/emb_optimizer.py
@@ -57,7 +57,7 @@ class EmbOptimizer:
         Returns: None
         """
         if key in self._optimizer:
-            raise ValueError(f"Optimizer {key} has been set for hash table {table_name}.")
+            raise ValueError(f"optimizer {key} has been set for hash table {table_name}.")
         self._optimizer[key] = state_dict
 
     def check_optimizer_instance_list(self):
@@ -73,4 +73,4 @@ class EmbOptimizer:
                 optimizer_instance = getattr(optimizer_instance, '_opt')
 
             if not isinstance(optimizer_instance, CustomizedOptimizer):
-                raise TypeError("The optimizer instance must be an instance of CustomizedOptimizer.")
+                raise TypeError("the optimizer instance must be an instance of CustomizedOptimizer.")
diff --git a/mx_rec/saver/patch.py b/mx_rec/saver/patch.py
index fcf1134f..6cffcc18 100644
--- a/mx_rec/saver/patch.py
+++ b/mx_rec/saver/patch.py
@@ -289,7 +289,7 @@ def restore(self, sess, save_path):
     if self._is_empty:
         return
     if not checkpoint_management.checkpoint_exists_internal(checkpoint_prefix):
-        raise ValueError("The passed save_path is not a valid checkpoint: " +
+        raise ValueError("the passed save_path is not a valid checkpoint: " +
                          checkpoint_prefix)
 
     tf_logging.info("Restoring parameters from %s", checkpoint_prefix)
diff --git a/mx_rec/util/config_utils/feature_spec_utils.py b/mx_rec/util/config_utils/feature_spec_utils.py
index 4c40996c..f244bb39 100644
--- a/mx_rec/util/config_utils/feature_spec_utils.py
+++ b/mx_rec/util/config_utils/feature_spec_utils.py
@@ -25,7 +25,7 @@ class FeatureSpecConfig:
     def clear_same_table_feature_spec(self, table_name: Optional[str], is_training: bool) -> None:
         if self.table_name_to_feature_spec.get(table_name) is None or \
                 self.table_name_to_feature_spec.get(table_name).get(is_training) is None:
-            raise KeyError("The table name `%s` does not exist in table_name_to_feature_spec, "
+            raise KeyError("the table name `%s` does not exist in table_name_to_feature_spec, "
                            "please check whether the insert_feature_spec(...) is invoked.", table_name)
         self.table_name_to_feature_spec.get(table_name)[is_training] = []
         logger.debug("The feature spec of the table name `%s` has been cleared.", table_name)
diff --git a/mx_rec/util/cpu.py b/mx_rec/util/cpu.py
index f4d299ed..69700262 100644
--- a/mx_rec/util/cpu.py
+++ b/mx_rec/util/cpu.py
@@ -26,7 +26,7 @@ class PcieInfo(ctypes.Structure):
     ]
 
 
-def get_card_and_deivce(logic_id):
+def get_card_and_device(logic_id):
     """
     通过芯片逻辑id获取芯片的卡id和device id
     一张卡可能有多个芯片，对应多个device_id，但每个芯片的逻辑ID
@@ -52,7 +52,7 @@ def get_pcie_id(card_id, device_id):
     dev = ctypes.c_int(device_id)
     ret = g_dcmi.dcmi_get_device_pcie_info_v2(card, dev, ctypes.pointer(info))
     if ret != 0:
-        raise OSError("cant get pcie info of device {card_id}:{deivce_id}")
+        raise OSError(f"cant get pcie info of device {card_id}:{device_id}")
     pcie_id = f'{info.domain:04X}:{info.bdf_busid:02x}:'
     pcie_id += f'{info.bdf_deviceid:02x}.{info.bdf_funcid}'
     return pcie_id
@@ -87,7 +87,7 @@ def bind_cpu_by_device_logic_id(logic_id):
             logger.error(e)
             return False
     try:
-        card_id, device_id = get_card_and_deivce(logic_id)
+        card_id, device_id = get_card_and_device(logic_id)
         pcie_id = get_pcie_id(card_id, device_id)
         numa = get_numa_by_pcie(pcie_id)
         cpu_list = get_cpu_list_by_numa(numa)
diff --git a/mx_rec/util/normalization.py b/mx_rec/util/normalization.py
index dc9dd2c1..a9b25132 100644
--- a/mx_rec/util/normalization.py
+++ b/mx_rec/util/normalization.py
@@ -33,6 +33,6 @@ def fix_invalid_table_name(name):
     if not fix_name:
         raise ValueError(f"The table name '{name}' doesn't contain valid character, "
                          f"according to the rule '{pattern}'")
-    logger.warning(f"The table name '%s' contains invalid characters. The system automatically "
-                   f"remove invalid characters. The table name was changed to '%s'", name, fix_name)
+    logger.warning("The table name '%s' contains invalid characters. The system automatically "
+                   "remove invalid characters. The table name was changed to '%s'", name, fix_name)
     return fix_name
diff --git a/mx_rec/util/perf.py b/mx_rec/util/perf.py
index 3feb7332..81089f63 100644
--- a/mx_rec/util/perf.py
+++ b/mx_rec/util/perf.py
@@ -26,7 +26,7 @@ def performance(method_name):
             start = time.perf_counter()
             result = func(*args, **kwargs)
             span = time.perf_counter() - start
-            logger.debug(f"%s method consume %s (s).", method_name, round(span, 6))
+            logger.debug("%s method consume %s (s).", method_name, round(span, 6))
             return result
         return wrapper
     return decorator
diff --git a/tests/mx_rec/saver/sparse_embedding_mock.py b/tests/mx_rec/saver/sparse_embedding_mock.py
index 03df1466..83507e63 100644
--- a/tests/mx_rec/saver/sparse_embedding_mock.py
+++ b/tests/mx_rec/saver/sparse_embedding_mock.py
@@ -34,6 +34,6 @@ class SparseEmbeddingMock:
 
     def set_optimizer(self, key, state_dict):
         if key in self.optimizer:
-            raise ValueError(f"Optimizer {key} has been set for hash table {self.table_name}")
+            raise ValueError(f"optimizer {key} has been set for hash table {self.table_name}")
 
         self.optimizer[key] = state_dict
diff --git a/tools/model_convert/model_convert.py b/tools/model_convert/model_convert.py
index 7608917a..eb2432db 100644
--- a/tools/model_convert/model_convert.py
+++ b/tools/model_convert/model_convert.py
@@ -222,9 +222,9 @@ class ModelConverter:
         for _, dirs, _ in os.walk(check_dir):
             model_dirs.append(dirs)
         if not self._is_ddr and "DDR" in model_dirs[0]:
-            raise ValueError(f"wrong mode choose! you choose hbm mode, however ddr dir exists. ")
+            raise ValueError("wrong mode choose! you choose hbm mode, however ddr dir exists. ")
         if self._is_ddr and "DDR" not in model_dirs[0]:
-            raise ValueError(f"wrong mode choose! you choose ddr mode, however ddr dir not exists. ")
+            raise ValueError("wrong mode choose! you choose ddr mode, however ddr dir not exists. ")
 
 
 def get_attribute_and_data_file(table_path):
-- 
Gitee


From 7d1fdf6042854ac79cab9a4cc33282ea39100437 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Thu, 11 Apr 2024 11:40:33 +0000
Subject: [PATCH 029/302] =?UTF-8?q?!78=20=E5=88=86=E5=B8=83=E5=BC=8F?=
 =?UTF-8?q?=E8=AE=AD=E7=BB=83=E8=B5=84=E6=BA=90=E9=85=8D=E7=BD=AE=E6=96=B9?=
 =?UTF-8?q?=E6=A1=88=E9=80=82=E9=85=8D=20*=20=E9=9B=86=E5=90=88=E9=80=9A?=
 =?UTF-8?q?=E4=BF=A1=E4=B8=8E=E5=88=86=E5=B8=83=E5=BC=8F=E8=AE=AD=E7=BB=83?=
 =?UTF-8?q?=E8=B5=84=E6=BA=90=E9=85=8D=E7=BD=AE=E6=96=B9=E6=A1=88=E9=80=82?=
 =?UTF-8?q?=E9=85=8D=20*=20=E9=9B=86=E5=90=88=E9=80=9A=E4=BF=A1=E4=B8=8E?=
 =?UTF-8?q?=E5=88=86=E5=B8=83=E5=BC=8F=E8=AE=AD=E7=BB=83=E8=B5=84=E6=BA=90?=
 =?UTF-8?q?=E9=85=8D=E7=BD=AE=E6=96=B9=E6=A1=88=E9=80=82=E9=85=8D=20*=20?=
 =?UTF-8?q?=E9=9B=86=E5=90=88=E9=80=9A=E4=BF=A1=E4=B8=8E=E5=88=86=E5=B8=83?=
 =?UTF-8?q?=E5=BC=8F=E8=AE=AD=E7=BB=83=E8=B5=84=E6=BA=90=E9=85=8D=E7=BD=AE?=
 =?UTF-8?q?=E6=96=B9=E6=A1=88=E9=80=82=E9=85=8D=20*=20=E9=9B=86=E5=90=88?=
 =?UTF-8?q?=E9=80=9A=E4=BF=A1=E4=B8=8E=E5=88=86=E5=B8=83=E5=BC=8F=E8=AE=AD?=
 =?UTF-8?q?=E7=BB=83=E8=B5=84=E6=BA=90=E9=85=8D=E7=BD=AE=E6=96=B9=E6=A1=88?=
 =?UTF-8?q?=E9=80=82=E9=85=8D=20*=20=E9=9B=86=E5=90=88=E9=80=9A=E4=BF=A1?=
 =?UTF-8?q?=E4=B8=8E=E5=88=86=E5=B8=83=E5=BC=8F=E8=AE=AD=E7=BB=83=E8=B5=84?=
 =?UTF-8?q?=E6=BA=90=E9=85=8D=E7=BD=AE=E6=96=B9=E6=A1=88=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/util/communication/hccl_mgmt.py        | 95 +++++++------------
 mx_rec/util/communication/hccl_ops.py         |  4 +-
 mx_rec/util/framework_npu_env/tfa_env.py      |  4 +-
 src/pybind/module_main.cpp                    | 12 +++
 .../util/communication/test_hccl_mgmt.py      | 10 --
 5 files changed, 50 insertions(+), 75 deletions(-)

diff --git a/mx_rec/util/communication/hccl_mgmt.py b/mx_rec/util/communication/hccl_mgmt.py
index 6eb5a70f..2f50e832 100644
--- a/mx_rec/util/communication/hccl_mgmt.py
+++ b/mx_rec/util/communication/hccl_mgmt.py
@@ -16,19 +16,20 @@
 # ==============================================================================
 
 import json
-import os
-import re
+from typing import Dict, List
 
-from mx_rec.constants.constants import VALID_DEVICE_ID_LIST, MIN_SIZE, MAX_CONFIG_SIZE, MAX_DEVICE_ID, \
-    MIN_RANK_SIZE, MAX_RANK_SIZE
-from mx_rec.validator.validator import FileValidator, para_checker_decorator, StringValidator, \
-    Convert2intValidator
+from mx_rec.constants.constants import MIN_SIZE, MAX_CONFIG_SIZE, MAX_DEVICE_ID
+from mx_rec.validator.validator import FileValidator
 from mx_rec.util.global_env_conf import global_env
 
 
-def parse_hccl_json():
+def parse_hccl_json() -> Dict[int, int]:
+    """
+    Used for rank table file configured training situation.
+    :return: rank_id to logic_id mapping dictionary.
+    """
     rank_table_path = global_env.rank_table_file
-    with open(rank_table_path, "r", encoding="utf-8"):
+    with open(rank_table_path, "r", encoding="utf-8") as file:
         # check whether json file is valid
         file_validator = FileValidator("RANK_TABLE_FILE", rank_table_path)
         # 1.check whether rank_table_path is soft link
@@ -37,14 +38,13 @@ def parse_hccl_json():
         file_validator.check_file_size(MAX_CONFIG_SIZE, MIN_SIZE)
         file_validator.check()
 
-    rank_table_path = os.path.realpath(global_env.rank_table_file)
-    with open(rank_table_path, "r", encoding="utf-8") as file:
         try:
             table_hccl = json.load(file)
         except FileNotFoundError as e:
             raise ValueError("rank table file not found") from e
         except json.JSONDecodeError as e:
             raise ValueError("rank table file is unable to parse as json") from e
+
         if "server_list" not in table_hccl:
             raise AttributeError(f"Lack of attribute server_list.")
         if not table_hccl.get("server_list"):
@@ -62,76 +62,51 @@ def parse_hccl_json():
             if "rank_id" not in device or not device.get("rank_id").isdigit():
                 raise ValueError(f"hccl_json rank_id wrong.")
             rank_id = int(device.get("rank_id"))
+
             if "device_id" not in device or not device.get("device_id").isdigit():
                 raise ValueError(f"hccl_json device_id wrong.")
 
             import mxrec_pybind
-            res = mxrec_pybind.get_logic_id(int(device.get("device_id")))
-            if res < 0:
-                raise RuntimeError(
-                    f"get logic id from physic id fail, error code is {res}, please check if dsmi api is functional.")
-            if res > MAX_DEVICE_ID:
+            logic_id = mxrec_pybind.get_logic_id(int(device.get("device_id")))
+            if logic_id < 0:
+                raise RuntimeError(f"get logic id from physic id fail, error code is {logic_id}, "
+                                   f"please check if dsmi api is functional.")
+            if logic_id > MAX_DEVICE_ID:
                 raise ValueError(f"get logic id from physic id fail, the device id is invalid.")
-            rank_to_device_dict[rank_id] = res
-
+            rank_to_device_dict[rank_id] = logic_id
     return rank_to_device_dict
 
 
-def set_hccl_info_without_json() -> dict:
+def set_hccl_info_without_json() -> Dict[int, int]:
     """
     Used for no rank table file configured training situation.
-    :return: device_id and logic_id mapping.
+    :return: rank_id to logic_id mapping dictionary.
     """
-    visible_devices = global_env.ascend_visible_devices
     rank_size = global_env.cm_worker_size
     chief_device = global_env.cm_chief_device
-    device_list = get_device_list(visible_devices)
+    device_list = get_device_list()
     chief_device = int(chief_device)
     rank_size = int(rank_size)
 
-    sorted_device_list = sorted(device_list)
-
-    if chief_device not in sorted_device_list:
+    if chief_device not in device_list:
         raise ValueError(f"The environment variable CM_CHIEF_DEVICE {chief_device} is not in the local device list. ")
 
     rank_to_device_dict = {}
-    chief_index = sorted_device_list.index(chief_device)
-    sorted_device_list = sorted_device_list[chief_index:] + sorted_device_list[0: chief_index]
-    sorted_device_list = sorted_device_list[:rank_size]
-
-    for device_idx in sorted_device_list:
-        import mxrec_pybind
-        res = mxrec_pybind.get_logic_id(int(device_idx))
-        if res < 0:
-            raise RuntimeError(
-                f"get logic id from physic id fail, error code is {res}, please check if dsmi api is functional.")
-
-        if res > MAX_DEVICE_ID:
-            raise ValueError(f"get logic id from physic id fail. res: {res}, chief_device: {chief_device}, "
-                             f"device_idx: {device_idx}")
-        index = sorted_device_list.index(device_idx)
-        rank_to_device_dict[index] = res
+    chief_index = device_list.index(chief_device)
+    device_list = device_list[chief_index:] + device_list[:chief_index]
+    device_list = device_list[:rank_size]
+
+    for rank_id, device_id in enumerate(device_list):
+        rank_to_device_dict[rank_id] = device_id
     return rank_to_device_dict
 
 
-def get_device_list(ascend_visible_devices):
-    device_list = []
-    try:
-        nums = re.findall(r'\d+', ascend_visible_devices)
-        # eg1:4-11, 则nums=['4', '11']   eg2:0-3,8-11  则nums['0', '3', '8', '11']
-        if not all(int(i) <= MAX_DEVICE_ID for i in nums):
-            raise ValueError("invalid env variable ascend_visible_devices.")
-        ranges = re.findall(r'\d+-\d+', ascend_visible_devices)
-        # eg1:4-11, 则ranges=['4-11']    eg2:0-3,8-11  则ranges['0-3', '8-11']
-        for r in ranges:
-            start, end = map(int, r.split('-'))  # '4-11', 则start 4, end 11.   ['0-3', '8-11']
-            if start >= end:
-                raise ValueError("invalid env variable ascend_visible_devices.")
-            nums.extend(range(start, end + 1))
-        device_list = sorted(list(set(map(int, nums))))
-    except ValueError as error:
-        raise ValueError("Invalid env variable ascend_visible_devices, no valid device id is configured.") from error
-
-    if not device_list:
-        raise ValueError("No device is available in the environment.")
+def get_device_list() -> List[int]:
+    """
+    Obtain the number of visible Ascend devices in the environment.
+    :return: the logic id list of visible Ascend devices .
+    """
+    import mxrec_pybind
+    device_count = mxrec_pybind.get_device_count()
+    device_list = [i for i in range(device_count)]
     return device_list
\ No newline at end of file
diff --git a/mx_rec/util/communication/hccl_ops.py b/mx_rec/util/communication/hccl_ops.py
index 52fbf74c..d4ea6136 100644
--- a/mx_rec/util/communication/hccl_ops.py
+++ b/mx_rec/util/communication/hccl_ops.py
@@ -29,9 +29,9 @@ def get_rank_id() -> Optional[int]:
 
 def get_device_id() -> Optional[int]:
     """
-     Get the device id of the calling process
+     Get the device logic id of the calling process
      Note: this method should be used after mpi init
-    :return: int or None, the device id of the calling process
+    :return: int or None, the device logic id of the calling process
     """
     if global_env.rank_table_file:
         rank_to_device_dict = parse_hccl_json()
diff --git a/mx_rec/util/framework_npu_env/tfa_env.py b/mx_rec/util/framework_npu_env/tfa_env.py
index a00fd7ce..bcd0b0ee 100644
--- a/mx_rec/util/framework_npu_env/tfa_env.py
+++ b/mx_rec/util/framework_npu_env/tfa_env.py
@@ -13,14 +13,12 @@ def set_ascend_env():
     配置昇腾相关的参数和环境变量
     """
     logger.debug("Ascend env set start.")
-    os.environ["RANK_ID"] = str(get_rank_id())
 
     device_id = str(get_device_id())
-    os.environ["DEVICE_ID"] = device_id
     os.environ["ASCEND_DEVICE_ID"] = device_id
-    os.environ["DEVICE_INDEX"] = device_id
 
     if global_env.rank_table_file:
+        os.environ["RANK_ID"] = str(get_rank_id())
         rank_size = get_rank_size()
         os.environ["RANK_SIZE"] = str(rank_size)
 
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index 403692fb..4a08f992 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -57,12 +57,24 @@ namespace {
         return logicId;
     }
 
+    uint32_t GetDeviceCount()
+    {
+        uint32_t count;
+        aclError ec = aclrtGetDeviceCount(&count);
+        if (ec != 0) {
+            throw runtime_error("failed to get device count. ");
+        }
+        return count;
+    }
+
     PYBIND11_MODULE(mxrec_pybind, m)
     {
         m.def("get_ub_hot_size", &GetUBHotSize, py::arg("device_id"));
 
         m.def("get_logic_id", &GetLogicID, py::arg("physic_id"));
 
+        m.def("get_device_count", &GetDeviceCount);
+
         m.attr("USE_STATIC") = py::int_(HybridOption::USE_STATIC);
 
         m.attr("USE_HOT") = py::int_(HybridOption::USE_HOT);
diff --git a/tests/mx_rec/util/communication/test_hccl_mgmt.py b/tests/mx_rec/util/communication/test_hccl_mgmt.py
index f0257022..870f8a3a 100644
--- a/tests/mx_rec/util/communication/test_hccl_mgmt.py
+++ b/tests/mx_rec/util/communication/test_hccl_mgmt.py
@@ -104,16 +104,6 @@ class HCCLMGMTTest(unittest.TestCase):
             with self.assertRaises(ValueError):
                 rank_to_device_dict, local_rank_size = parse_hccl_json()
 
-    def test_get_device_list(self):
-        device_list = get_device_list("0-7")
-        self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7], device_list)
-        device_list = get_device_list("0-3, 8-11")
-        self.assertEqual([0, 1, 2, 3, 8, 9, 10, 11], device_list)
-        with self.assertRaises(ValueError):
-            device_list = get_device_list("7-5, 9, 10")
-        with self.assertRaises(ValueError):
-            device_list = get_device_list("17")
-
 
 if __name__ == '__main__':
     unittest.main()
-- 
Gitee


From ff3bb82d00a67b56aa91d4a017b33787b49ac585 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=90=B4=E6=B4=AA=E5=8F=91?= <1660398197@qq.com>
Date: Thu, 11 Apr 2024 21:33:43 +0800
Subject: [PATCH 030/302] =?UTF-8?q?=E6=89=80=E6=9C=89=E5=88=A4=E6=96=ADHot?=
 =?UTF-8?q?=20embed=E7=9A=84=E4=BB=A3=E7=A0=81=EF=BC=8C=E9=BB=98=E8=AE=A4?=
 =?UTF-8?q?=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/key_process/key_process.cpp       | 36 +++++++++-------------
 src/tests/key_process/key_process_test.cpp |  3 --
 2 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index f76f6907..eebd70a3 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -45,17 +45,15 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
                             int seed)
 {
     this->rankInfo = rInfo;
-    if (rankInfo.useHot) {
-        SetupHotEmbUpdateStep();
-    }
+    
+    SetupHotEmbUpdateStep();
+    
 
     map<EmbNameT, int> scInfo;
     for (const auto& info: eInfos) {
         embInfos[info.name] = info;
         scInfo[info.name] = info.sendCount;
-        if (rankInfo.useHot) {
-            InitHotEmbTotCount(info, rInfo);
-        }
+        InitHotEmbTotCount(info, rInfo);
         if (rankInfo.useDynamicExpansion) {
             // 动态扩容
             embeddingTableMap[info.name].Init(info, rInfo, seed);
@@ -89,8 +87,8 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
         }
     }
 
-    LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}, useHot:{}",
-        MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic, rInfo.useHot);
+    LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}",
+        MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic);
 #ifndef GTEST
     Start();
 #endif
@@ -342,11 +340,7 @@ void KeyProcess::HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <Ke
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
         tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch); // 按存储dev id切分并去重
     } else {
-        if (rankInfo.useHot) {
-            tie(splitKeys, restore, hotPos) = HotHashSplit(batch);   // 按存储dev id切分并去重
-        } else {
-            tie(splitKeys, restore) = HashSplit(batch);   // 按存储dev id切分并去重
-        }
+        tie(splitKeys, restore, hotPos) = HotHashSplit(batch);   // 按存储dev id切分并去重
     }
     LOG_DEBUG("uniqueTc(ms):{}", uniqueTc.ElapsedMS());
 }
@@ -387,10 +381,10 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
 
     auto tensors = make_unique<vector<Tensor>>();
     tensors->push_back(Vec2TensorI32(uniqueInfo.restore));
-    if (rankInfo.useHot) {
-        uniqueInfo.hotPos.resize(hotEmbTotCount[batch->name], -1);
-        tensors->push_back(Vec2TensorI32(uniqueInfo.hotPos));
-    }
+
+    uniqueInfo.hotPos.resize(hotEmbTotCount[batch->name], -1);
+    tensors->push_back(Vec2TensorI32(uniqueInfo.hotPos));
+    
 
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(move(tensors), uniqueInfo.all2AllInfo.keyRecv, channel);
@@ -449,10 +443,10 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     TimeCost pushResultTC;
     auto tensors = make_unique<vector<Tensor>>();
     tensors->push_back(Vec2TensorI32(restore));
-    if (rankInfo.useHot) {
-        hotPos.resize(hotEmbTotCount[batch->name], 0);
-        tensors->push_back(Vec2TensorI32(hotPos));
-    }
+
+    hotPos.resize(hotEmbTotCount[batch->name], 0);
+    tensors->push_back(Vec2TensorI32(hotPos));
+    
 
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(tensors, lookupKeys, channel);
diff --git a/src/tests/key_process/key_process_test.cpp b/src/tests/key_process/key_process_test.cpp
index a5e618cd..6b06dc30 100644
--- a/src/tests/key_process/key_process_test.cpp
+++ b/src/tests/key_process/key_process_test.cpp
@@ -76,7 +76,6 @@ protected:
         rankInfo.isDDR = false;
         rankInfo.useDynamicExpansion = false;
         rankInfo.ctrlSteps = { 1, -1 };
-        rankInfo.useHot = false;
         // 初始化emb信息
         GenEmbInfos(embNum, embInfos, fieldNums);
         splits = fieldNums;
@@ -639,7 +638,6 @@ TEST_F(KeyProcessTest, KeyProcessTaskHelper)
 {
     rankInfo.isDDR = false;
     rankInfo.useStatic = false;
-    rankInfo.useHot = false;
     rankInfo.useDynamicExpansion = false;
     EmbeddingMgmt::Instance()->Init(rankInfo, embInfos);
     ASSERT_EQ(process.Initialize(rankInfo, embInfos), true);
@@ -688,7 +686,6 @@ TEST_F(KeyProcessTest, KeyProcessTaskHelperDDR)
 {
     rankInfo.isDDR = true;
     rankInfo.useStatic = true;
-    rankInfo.useHot = false;
     rankInfo.useDynamicExpansion = false;
     EmbeddingMgmt::Instance()->Init(rankInfo, embInfos);
     ASSERT_EQ(process.Initialize(rankInfo, embInfos), true);
-- 
Gitee


From d1b1a871fe9f1dc9336db45bfbe6e2bd2ceb637d Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Thu, 11 Apr 2024 22:11:46 +0800
Subject: [PATCH 031/302] =?UTF-8?q?=E6=89=80=E6=9C=89=E5=88=A4=E6=96=ADHot?=
 =?UTF-8?q?=20embed=E7=9A=84=E4=BB=A3=E7=A0=81=EF=BC=8C=E9=BB=98=E8=AE=A4?=
 =?UTF-8?q?=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/asc/manager.py           |  2 +-
 src/core/key_process/key_process.cpp | 20 +++++++++-----------
 src/core/utils/common.cpp            |  2 --
 src/core/utils/common.h              |  2 --
 src/pybind/module_main.cpp           |  2 --
 5 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index 2829ab98..5f8eeb5d 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -18,7 +18,7 @@
 import tensorflow as tf
 
 from mxrec_pybind import InitializeInfo, ConstantInitializerInfo, NormalInitializerInfo, EmbInfo, EmbInfoParams, \
-    ThresholdValue, HybridMgmt, RankInfo, USE_STATIC, USE_HOT, USE_DYNAMIC_EXPANSION
+    ThresholdValue, HybridMgmt, RankInfo, USE_STATIC, USE_DYNAMIC_EXPANSION
 
 from mx_rec.util.communication.hccl_ops import get_rank_id, get_device_id, get_rank_size
 from mx_rec.util.initialize import ConfigInitializer
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index eebd70a3..1d922cee 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -645,17 +645,15 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, Uniqu
     absl::flat_hash_map<emb_key_t, int> hotMap = hotKey[batch->name];
     lock.unlock();
 
-    if (rankInfo.useHot) {
-        int hotOffset = 0;
-        uniqueInfoOut.hotPos.resize(hotEmbTotCount[batch->name]);
-        hotOffset = hotEmbTotCount[batch->name];
-
-        TimeCost computeHotTc;
-        ComputeHotPos(batch, hotMap, uniqueInfoOut.hotPos, uniqueInfoOut.restore, hotOffset);
-        LOG_DEBUG("ComputeHot TimeCost(ms):{}", computeHotTc.ElapsedMS());
-        UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount,
-                              hotOffset, batch->batchId % hotEmbUpdateStep == 0, batch->name);
-    }
+    int hotOffset = 0;
+    uniqueInfoOut.hotPos.resize(hotEmbTotCount[batch->name]);
+    hotOffset = hotEmbTotCount[batch->name];
+
+    TimeCost computeHotTc;
+    ComputeHotPos(batch, hotMap, uniqueInfoOut.hotPos, uniqueInfoOut.restore, hotOffset);
+    LOG_DEBUG("ComputeHot TimeCost(ms):{}", computeHotTc.ElapsedMS());
+    UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount,
+                            hotOffset, batch->batchId % hotEmbUpdateStep == 0, batch->name);
 
     if (rankInfo.useStatic) {
         sc.resize(rankInfo.rankSize, embInfos[batch->name].sendCount);
diff --git a/src/core/utils/common.cpp b/src/core/utils/common.cpp
index 38e64444..839d3790 100644
--- a/src/core/utils/common.cpp
+++ b/src/core/utils/common.cpp
@@ -45,7 +45,6 @@ namespace MxRec {
             localRankId = rankId % localRankSize;
         }
         useStatic = static_cast<unsigned int>(option) bitand HybridOption::USE_STATIC;
-        useHot = static_cast<unsigned int>(option) bitand HybridOption::USE_HOT;
         useDynamicExpansion = static_cast<unsigned int>(option) bitand HybridOption::USE_DYNAMIC_EXPANSION;
     }
 
@@ -58,7 +57,6 @@ namespace MxRec {
             localRankId = rankId % localRankSize;
         }
         useStatic = static_cast<unsigned int>(option) & HybridOption::USE_STATIC;
-        useHot = static_cast<unsigned int>(option) & HybridOption::USE_HOT;
     }
 
     RandomInfo::RandomInfo(int start, int len, float constantVal, float randomMin, float randomMax)
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index f6c3de3f..9ce80073 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -116,7 +116,6 @@ namespace MxRec {
 
     namespace HybridOption {
         const unsigned int USE_STATIC = 0x001;
-        const unsigned int USE_HOT = 0x001 << 1;
         const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 2;
     };
 
@@ -220,7 +219,6 @@ namespace MxRec {
         int localRankId {};
         int localRankSize {};
         bool useStatic { false };
-        bool useHot {};
         uint32_t option {};
         int nBatch {};
         bool isDDR { false };
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index 4a08f992..0df47092 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -77,8 +77,6 @@ namespace {
 
         m.attr("USE_STATIC") = py::int_(HybridOption::USE_STATIC);
 
-        m.attr("USE_HOT") = py::int_(HybridOption::USE_HOT);
-
         m.attr("USE_DYNAMIC_EXPANSION") = py::int_(HybridOption::USE_DYNAMIC_EXPANSION);
 
         GetRankInfo(m);
-- 
Gitee


From 1976ba3979a7040010755053852e83fdf9caa2ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Fri, 12 Apr 2024 01:44:23 +0000
Subject: [PATCH 032/302] =?UTF-8?q?!75=20mxrec=20=E9=9C=80=E6=B1=82?=
 =?UTF-8?q?=EF=BC=9A=E6=A0=B9=E6=8D=AE=E4=BC=98=E5=8C=96=E5=99=A8=E7=B1=BB?=
 =?UTF-8?q?=E5=9E=8B=E8=87=AA=E5=8A=A8=E5=88=A4=E6=96=AD=E6=98=AF=E5=90=A6?=
 =?UTF-8?q?=E5=BC=80=E5=90=AF=E5=85=A8=E5=B1=80=E5=8E=BB=E9=87=8D=E7=89=B9?=
 =?UTF-8?q?=E6=80=A7=E3=80=82=20*=20Merge=20remote-tracking=20branch=20'up?=
 =?UTF-8?q?stream/develop'=20into=20develop-global-unique=20*=20Merge=20re?=
 =?UTF-8?q?mote-tracking=20branch=20'upstream/develop'=20into=20develop-gl?=
 =?UTF-8?q?obal-unique=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB=E9=87=8D?=
 =?UTF-8?q?=E4=BC=98=E5=8C=96-=E6=A8=A1=E5=9E=8B=E9=80=82=E9=85=8D=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB=E9=87=8D=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?-=E6=A8=A1=E5=9E=8B=E9=80=82=E9=85=8D=20*=20=E3=80=90=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E5=85=A8?=
 =?UTF-8?q?=E5=B1=80=E5=8E=BB=E9=87=8D=E4=BC=98=E5=8C=96-=E5=85=A8?=
 =?UTF-8?q?=E9=80=82=E9=85=8D=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D=E4=BC=98=E5=8C=96-lazyAdam=E9=80=82=E9=85=8D=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB=E9=87=8D=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?-lazyAdam=E9=80=82=E9=85=8D=20*=20Merge=20remote-tracking=20bra?=
 =?UTF-8?q?nch=20'origin/develop-global-unique'=20into=20devel=E2=80=A6=20?=
 =?UTF-8?q?*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modificatio?=
 =?UTF-8?q?n=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB=E9=87=8D=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96-lazyAdam=E9=80=82=E9=85=8D=20*=20=E3=80=90=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E5=85=A8?=
 =?UTF-8?q?=E5=B1=80=E5=8E=BB=E9=87=8D=E4=BC=98=E5=8C=96-lazyAdam=E9=80=82?=
 =?UTF-8?q?=E9=85=8D=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB=E9=87=8D?=
 =?UTF-8?q?=E4=BC=98=E5=8C=96-lazyAdam=E9=80=82=E9=85=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91?=
 =?UTF-8?q?=E5=85=A8=E5=B1=80=E5=8E=BB=E9=87=8Dcpp=E6=B5=8B=E6=94=B9?=
 =?UTF-8?q?=E5=8A=A8=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91=E5=85=A8=E5=B1=80=E5=8E=BB=E9=87=8Dcpp?=
 =?UTF-8?q?=E6=B5=8B=E6=94=B9=E5=8A=A8=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91test=20first=20time?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py                  |  4 +-
 examples/demo/little_demo/run_mode.py         |  4 +-
 .../demo/little_demo_estimator/nn_optim.py    |  4 +-
 examples/dlrm/model/gradient_descent_w.py     | 10 ++
 examples/dlrm/model/main_mxrec.py             |  4 +-
 mx_rec/constants/constants.py                 |  2 -
 mx_rec/core/asc/build_graph.py                | 50 ----------
 mx_rec/core/asc/manager.py                    |  6 +-
 mx_rec/core/emb/dynamic_sparse_embedding.py   |  9 +-
 mx_rec/core/emb/sparse_embedding.py           |  7 +-
 mx_rec/optimizers/adagrad.py                  | 15 ++-
 mx_rec/optimizers/base.py                     | 64 ++++++++++++
 mx_rec/optimizers/ftrl.py                     | 15 ++-
 mx_rec/optimizers/gradient_descent.py         |  5 +
 mx_rec/optimizers/gradient_descent_by_addr.py |  5 +
 mx_rec/optimizers/lazy_adam.py                | 10 +-
 mx_rec/optimizers/lazy_adam_by_addr.py        | 10 +-
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          |  6 +-
 src/core/key_process/key_process.cpp          |  5 +-
 src/core/utils/common.cpp                     |  1 +
 src/core/utils/common.h                       |  8 +-
 src/core/utils/config.cpp                     | 15 +--
 src/core/utils/config.h                       |  7 --
 src/pybind/module_main.cpp                    |  2 +
 src/tests/utils/config_test.cpp               |  4 -
 tests/mx_rec/core/mock_class.py               |  1 +
 tests/mx_rec/core/test_build_graph.py         | 99 +------------------
 tests/mx_rec/core/test_manager.py             |  8 ++
 28 files changed, 168 insertions(+), 212 deletions(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index d5a51312..0a9462bc 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -336,9 +336,9 @@ if __name__ == "__main__":
         train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
         if use_dynamic_expansion:
-            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 
-            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
+            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
             train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
             # do sparse optimization by addr
             sparse_grads = sparse_optimizer.compute_gradients(loss, train_emb_list)  # local_embedding
diff --git a/examples/demo/little_demo/run_mode.py b/examples/demo/little_demo/run_mode.py
index e750ceb5..6a3301c4 100644
--- a/examples/demo/little_demo/run_mode.py
+++ b/examples/demo/little_demo/run_mode.py
@@ -95,11 +95,11 @@ class RunMode:
             self.train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
             if bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0))):
-                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 
                 train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
 
-                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
+                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
 
                 # do sparse optimization by addr
                 local_grads = tf.gradients(loss, train_emb_list)  # local_embedding
diff --git a/examples/demo/little_demo_estimator/nn_optim.py b/examples/demo/little_demo_estimator/nn_optim.py
index 4438627d..3be3c7ed 100644
--- a/examples/demo/little_demo_estimator/nn_optim.py
+++ b/examples/demo/little_demo_estimator/nn_optim.py
@@ -73,11 +73,11 @@ def get_train_op_list(losses, learning_rate):
 
             # do sparse optimization
             if use_dynamic_expansion:
-                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 
                 train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
 
-                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
+                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
 
                 local_grads = tf.gradients(loss, train_emb_list)  # local_embedding
                 grads_and_vars = [(grad, address) for grad, address in zip(local_grads, train_address_list)]
diff --git a/examples/dlrm/model/gradient_descent_w.py b/examples/dlrm/model/gradient_descent_w.py
index f3ae78d7..6c34b726 100644
--- a/examples/dlrm/model/gradient_descent_w.py
+++ b/examples/dlrm/model/gradient_descent_w.py
@@ -47,6 +47,16 @@ class CustomizedGradientDescentWithWeighDecay(gradient_descent.GradientDescentOp
         super(CustomizedGradientDescentWithWeighDecay, self).__init__(
             learning_rate=learning_rate, use_locking=use_locking, name=self.unique_name
         )
+        self._slot_num = 0
+        self._derivative = 1
+
+    @property
+    def slot_num(self):
+        return self._slot_num
+
+    @property
+    def derivative(self):
+        return self._derivative
 
     def initialize_slots(self, var, table_instance):
         logger.info("no slot for gradient descent")
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 4bbd16de..ab2eb04c 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -24,7 +24,7 @@ import tensorflow as tf
 from sklearn.metrics import roc_auc_score
 import numpy as np
 
-from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
 from mx_rec.core.embedding import create_table, sparse_lookup
@@ -346,7 +346,7 @@ if __name__ == "__main__":
         train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
         if use_dynamic_expansion:
-            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
+            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
             # do sparse optimization by addr
             sparse_grads = list(grads[-1])  # local_embedding
             grads_and_vars = [(grad, address) for grad, address in zip(sparse_grads, train_address_list)]
diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index 03fa28b4..2c2cd2fe 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -22,7 +22,6 @@ ASCEND_GLOBAL_HASHTABLE_COLLECTION = "ASCEND_GLOBAL_HASHTABLE_COLLECTION"
 ASCEND_CUTTING_POINT_INITIALIZER = "ASCEND_CUTTING_POINT_INITIALIZER"
 ASCEND_SPARSE_LOOKUP_ENTRANCE = "ASCEND_SPARSE_LOOKUP_ENTRANCE"
 ASCEND_SPARSE_LOOKUP_ID_OFFSET = "ASCEND_SPARSE_LOOKUP_ID_OFFSET"
-ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS = "ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS"
 ASCEND_TIMESTAMP = "ASCEND_TIMESTAMP"
 ASCEND_SPARSE_LOOKUP_LOCAL_EMB = "ASCEND_SPARSE_LOOKUP_LOCAL_EMB"
 EMPTY_STR = ""
@@ -166,7 +165,6 @@ class ASCAnchorAttr(Enum):
     MOCK_LOOKUP_RESULT = "mock_lookup_result"
     RESTORE_VECTOR_SECOND = "restore_vector_second"
     UNIQUE_KEYS = "unique_keys"
-    GRADIENTS_STRATEGY = "gradients_strategy"
     IS_GRAD = "is_grad"
 
 
diff --git a/mx_rec/core/asc/build_graph.py b/mx_rec/core/asc/build_graph.py
index 13ddad4a..2bb72621 100644
--- a/mx_rec/core/asc/build_graph.py
+++ b/mx_rec/core/asc/build_graph.py
@@ -22,7 +22,6 @@ import tensorflow as tf
 import mxrec_pybind
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.tf_version_adapter import npu_ops
-from mx_rec.constants.constants import TRAIN_CHANNEL_ID
 from mx_rec.util.log import logger
 
 
@@ -81,46 +80,6 @@ def get_id_offsets(max_lookup_vec_size, config):
     return id_offsets, swap_pos, swap_len
 
 
-def get_restore_vector_second(max_lookup_vec_size: int, config: dict) -> tf.Tensor:
-    """
-    Get restore vector which is calculated after the second all2all
-    :param max_lookup_vec_size: the size of restore_vector_second
-    :param config: embedding config
-    :return: the restore vector calculated after the second all2all
-    """
-    logger.debug('Channel %s_restore_second_%s was built for getnext',
-                 config.get("table_name"), config.get("channel_id"))
-    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
-        restore_vector_second = npu_ops.gen_npu_ops.get_next(
-            output_types=[tf.int32],
-            output_shapes=[[max_lookup_vec_size]],
-            channel_name=f'{config.get("table_name")}_restore_second_{config.get("channel_id")}')[0]
-    return restore_vector_second
-
-
-def get_unique_keys(max_lookup_vec_size: int, config: dict) -> tf.Tensor:
-    """
-    Get the global unique keys which is calculated after the second all2all
-    :param max_lookup_vec_size: the size of global unique keys
-    :param config: embedding config
-    :return: the global unique keys calculated after the second all2all
-    """
-    logger.debug('Channel %s_uniquekeys_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
-    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
-        if config.get("use_dynamic_expansion"):
-            unique_keys = npu_ops.gen_npu_ops.get_next(
-                output_types=[tf.int64],
-                output_shapes=[[max_lookup_vec_size]],
-                channel_name=f'{config.get("table_name")}_uniquekeys_{config.get("channel_id")}')[0]
-            return unique_keys
-
-        unique_keys = npu_ops.gen_npu_ops.get_next(
-            output_types=[tf.int32],
-            output_shapes=[[max_lookup_vec_size]],
-            channel_name=f'{config.get("table_name")}_uniquekeys_{config.get("channel_id")}')[0]
-        return unique_keys
-
-
 def get_all2all_args(use_static: bool, config: dict) -> Optional[list]:
     """
     Get all2all parameters for dynamic condition
@@ -211,13 +170,4 @@ def get_preprocessed_tensor_for_asc(table, config):
         'all2all_args': all2all_args,
     }
 
-    if config.get("channel_id") != TRAIN_CHANNEL_ID:
-        return result
-
-    with tf.compat.v1.variable_scope("restore_vector_second"):
-        restore_vector_second = get_restore_vector_second(max_lookup_vec_size, config)
-
-    with tf.compat.v1.variable_scope("unique_keys"):
-        unique_keys = get_unique_keys(max_lookup_vec_size, config)
-    result.update({'restore_vector_second': restore_vector_second, 'unique_keys': unique_keys})
     return result
diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index 2829ab98..f50037ea 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -18,7 +18,7 @@
 import tensorflow as tf
 
 from mxrec_pybind import InitializeInfo, ConstantInitializerInfo, NormalInitializerInfo, EmbInfo, EmbInfoParams, \
-    ThresholdValue, HybridMgmt, RankInfo, USE_STATIC, USE_HOT, USE_DYNAMIC_EXPANSION
+    ThresholdValue, HybridMgmt, RankInfo, USE_STATIC, USE_HOT, USE_DYNAMIC_EXPANSION, USE_SUM_SAME_ID_GRADIENTS
 
 from mx_rec.util.communication.hccl_ops import get_rank_id, get_device_id, get_rank_size
 from mx_rec.util.initialize import ConfigInitializer
@@ -205,6 +205,10 @@ def initialize_emb_cache(table_info_list, threshold_list):
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         option = option | USE_DYNAMIC_EXPANSION
 
+    optimizer = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
+    if optimizer.derivative == 2:
+        option = option | USE_SUM_SAME_ID_GRADIENTS
+
     # [train_steps, eval_steps, save_steps] pass step information to HybridMgmt for data process loop
     rank_info = RankInfo(rank_id, device_id, rank_size, option, [train_steps, eval_steps, save_steps])
 
diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index 194b2795..671c593e 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -6,10 +6,9 @@ import abc
 from typing import Optional, Union, Callable
 
 import tensorflow as tf
-from tensorflow.python.ops import array_ops
 
 from mx_rec.constants.constants import ASCEND_TABLE_NAME_MUST_CONTAIN, ASCEND_SPARSE_LOOKUP_LOCAL_EMB, \
-    ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
+     ASCEND_SPARSE_LOOKUP_ID_OFFSET
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
@@ -51,9 +50,7 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
 
     def _get_update_grad(self, local_grad: tf.Tensor, result: dict,
                          table: Union[tf.compat.v1.Variable, tf.Tensor]) -> Union[tf.IndexedSlices, tf.Tensor]:
-        return tf.compat.v1.unsorted_segment_sum(local_grad,
-                                                 result.get("restore_vector_second"),
-                                                 array_ops.shape(result.get("unique_keys"))[0])
+        return local_grad
 
     def _get_local_embeddings(self, table: Union[tf.compat.v1.Variable, tf.Tensor], result: dict,
                               feature_spec: FeatureSpec, **kwargs) -> tf.Tensor:
@@ -72,7 +69,7 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
             return sparse_forward_fn(local_embeddings)
 
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB, local_embeddings)
-        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS, result.get("unique_keys"))
+        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get("id_offsets"))
         return sparse_forward_fn(local_embeddings)
 
 
diff --git a/mx_rec/core/emb/sparse_embedding.py b/mx_rec/core/emb/sparse_embedding.py
index d8ce63b1..938f917d 100644
--- a/mx_rec/core/emb/sparse_embedding.py
+++ b/mx_rec/core/emb/sparse_embedding.py
@@ -53,11 +53,8 @@ class SparseEmbedding(BaseSparseEmbedding):
 
     def _get_update_grad(self, local_grad: tf.Tensor, result: dict,
                          table: Union[tf.compat.v1.Variable, tf.Tensor]) -> Union[tf.IndexedSlices, tf.Tensor]:
-        unique_local_grad = tf.compat.v1.unsorted_segment_sum(local_grad,
-                                                              result.get("restore_vector_second"),
-                                                              array_ops.shape(result.get("unique_keys"))[0])
-        return ops.IndexedSlices(values=unique_local_grad,
-                                 indices=result.get("unique_keys"),
+        return ops.IndexedSlices(values=local_grad,
+                                 indices=result.get("id_offsets"),
                                  dense_shape=tf.shape(table))
 
     def _get_local_embeddings(self, table: Union[tf.compat.v1.Variable, tf.Tensor], result: dict,
diff --git a/mx_rec/optimizers/adagrad.py b/mx_rec/optimizers/adagrad.py
index d99be3b3..4ba444a6 100644
--- a/mx_rec/optimizers/adagrad.py
+++ b/mx_rec/optimizers/adagrad.py
@@ -76,6 +76,16 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
                                                 initial_accumulator_value=initial_accumulator_value,
                                                 use_locking=use_locking,
                                                 name=self.unique_name)
+        self._slot_num = 1
+        self._derivative = 2
+
+    @property
+    def slot_num(self):
+        return self._slot_num
+
+    @property
+    def derivative(self):
+        return self._derivative
 
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
@@ -121,10 +131,11 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
 
     def _apply_sparse(self, grad, var):
         acc = self.get_slot(var, "acc")
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
         return training_ops.sparse_apply_adagrad(
             var, acc, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
-            grad.values,
-            grad.indices,
+            unique_local_grad,
+            unique_keys,
             use_locking=self._use_locking)
 
     def _resource_apply_sparse(self, grad, var, indices):
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index a5d68a70..49594d40 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -21,12 +21,57 @@ from __future__ import print_function
 
 from collections import defaultdict
 
+import tensorflow as tf
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.training.optimizer import _TensorProcessor
 
+from mx_rec.util.tf_version_adapter import npu_ops
+from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
 
 
+def get_restore_vector_second(table_name: str) -> tf.Tensor:
+    """
+    Get restore vector which is calculated after the second all2all
+    :param table_name: embedding table_name
+    :return: the restore vector calculated after the second all2all
+    """
+    channel_id = 0
+    logger.debug('Channel %s_restore_second_%s was built for getnext',
+                 table_name, channel_id)
+    with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
+        restore_vector_second = npu_ops.gen_npu_ops.get_next(
+            output_types=[tf.int32],
+            output_shapes=[[None]],
+            channel_name=f'{table_name}_restore_second_{channel_id}')[0]
+    return restore_vector_second
+
+
+def get_unique_keys(table_name: str, is_expansion: bool) -> tf.Tensor:
+    """
+    Get the global unique keys which is calculated after the second all2all
+    :param table_name: embedding table_name
+    :param is_expansion: use dynamic expansion
+    :return: the global unique keys calculated after the second all2all
+    """
+    channel_id = 0
+    logger.debug('Channel %s_uniquekeys_%s was built for getnext', table_name, channel_id)
+    with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
+        if is_expansion:
+            unique_keys = npu_ops.gen_npu_ops.get_next(
+                output_types=[tf.int64],
+                output_shapes=[[None]],
+                channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
+            return unique_keys
+
+        unique_keys = npu_ops.gen_npu_ops.get_next(
+            output_types=[tf.int32],
+            output_shapes=[[None]],
+            channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
+        return unique_keys
+
+
 class CustomizedOptimizer:
 
     name_counter = defaultdict(int)
@@ -35,6 +80,25 @@ class CustomizedOptimizer:
         self.unique_name = ""
         self.base_name = ""
 
+    @staticmethod
+    def sum_same_id_gradients(grad, var, is_expansion):
+        if isinstance(var, ops.Tensor):
+            # 扩容模式从scope获取表名,偏移是-2
+            table_name = var.op.name.split('/')[-2]
+        else:
+            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
+            table_name = table_instance.table_name
+        with tf.compat.v1.variable_scope("restore_vector_second"):
+            restore_vector_second = get_restore_vector_second(table_name)
+
+        with tf.compat.v1.variable_scope("unique_keys"):
+            unique_keys = get_unique_keys(table_name, is_expansion)
+
+        unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
+                                                              restore_vector_second,
+                                                              array_ops.shape(unique_keys)[0])
+        return unique_local_grad, unique_keys
+
     def initialize_slots(self, var, table_instance):
         raise NotImplementedError(f"Please define a specific realization on {self.__class__.__name__}")
 
diff --git a/mx_rec/optimizers/ftrl.py b/mx_rec/optimizers/ftrl.py
index 5c68b929..3659ffcd 100644
--- a/mx_rec/optimizers/ftrl.py
+++ b/mx_rec/optimizers/ftrl.py
@@ -80,11 +80,16 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
             l2_shrinkage_regularization_strength=kwargs.get("l2_shrinkage_regularization_strength", 0.0)
         )
         self._slot_num = 2
+        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def initialize_slots(self, var, table_instance):
         val = constant_op.constant(
             self._initial_accumulator_value, dtype=var.dtype, shape=var.get_shape())
@@ -135,17 +140,19 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
                 self._resource_scatter_nd_update)
 
     def _apply_sparse(self, grad, var):
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
+
         if self._l2_shrinkage_regularization_strength <= 0.0:
             return self._apply_sparse_shared(
-                grad.values,
+                unique_local_grad,
                 var,
-                grad.indices,
+                unique_keys,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
         else:
             return self._apply_sparse_shared_v2(
-                grad.values,
+                unique_local_grad,
                 var,
-                grad.indices,
+                unique_keys,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_update):
diff --git a/mx_rec/optimizers/gradient_descent.py b/mx_rec/optimizers/gradient_descent.py
index 6881d6ad..2ba72789 100644
--- a/mx_rec/optimizers/gradient_descent.py
+++ b/mx_rec/optimizers/gradient_descent.py
@@ -55,11 +55,16 @@ class CustomizedGradientDescent(gradient_descent.GradientDescentOptimizer, Custo
         super(CustomizedGradientDescent, self).__init__(learning_rate=learning_rate, use_locking=use_locking,
                                                         name=self.unique_name)
         self._slot_num = 0
+        self._derivative = 1
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def initialize_slots(self, var, table_instance):
         return []
 
diff --git a/mx_rec/optimizers/gradient_descent_by_addr.py b/mx_rec/optimizers/gradient_descent_by_addr.py
index 22b33852..11a9fda6 100644
--- a/mx_rec/optimizers/gradient_descent_by_addr.py
+++ b/mx_rec/optimizers/gradient_descent_by_addr.py
@@ -60,11 +60,16 @@ class CustomizedGradientDescentByAddr(gradient_descent.GradientDescentOptimizer,
                                                               name=self.unique_name)
 
         self._slot_num = 0
+        self._derivative = 1
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def initialize_slots(self, var, table_instance):
         return []
 
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index d79b6d23..bab8245f 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -72,11 +72,16 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         super(CustomizedLazyAdam, self).__init__(learning_rate=learning_rate, beta1=beta1, beta2=beta2,
                                                  epsilon=epsilon, use_locking=use_locking, name=self.unique_name)
         self._slot_num = 2
+        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
         def creat_one_single_slot(var, op_name):
@@ -144,10 +149,11 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
             self._resource_scatter_nd_add)
 
     def _apply_sparse(self, grad, var):
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
         return self._apply_sparse_shared(
-            grad.values,
+            unique_local_grad,
             var,
-            grad.indices,
+            unique_keys,
             lambda x, i, v: tf.compat.v1.scatter_nd_add(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_add):
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index 92252824..cd4ee878 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -73,11 +73,16 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
                                                           name=self.unique_name)
 
         self._slot_num = 2
+        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
+    @property
+    def derivative(self):
+        return self._derivative
+
     def get_slot_init_values(self):
         # return state value list of adam that needs to initialize in ASC DDR.
         initial_momentum_value = 0.0
@@ -109,9 +114,10 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         return temp
 
     def _apply_sparse(self, grad, addr):
+        unique_local_grad, unique_addr = self.sum_same_id_gradients(grad=grad, var=addr, is_expansion=True)
         return self._apply_sparse_shared(
-            grad,
-            addr)
+            unique_local_grad,
+            unique_addr)
 
     def _apply_sparse_shared(self, grad, addr):
         power_b1, power_b2 = self._get_beta_accumulators()
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 894dc230..eb618f40 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -704,8 +704,7 @@ bool HybridMgmt::ParseKeysHBM(int channelId, int& batchId)
         LOG_DEBUG("channelId:{} batchId:{}, sendLookupSyncTC(ms):{}", channelId, batchId, sendLookupSyncTC.ElapsedMS());
 
         // 训练时，使用全局去重聚合梯度，发送全局去重的key和对应的恢复向量
-        if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
-            channelId == TRAIN_CHANNEL_ID) {
+        if (mgmtRankInfo.useSumSameIdGradients && channelId == TRAIN_CHANNEL_ID) {
             SendUniqKeysAndRestoreVecHBM(channelId, batchId, embInfo, infoVecs);
         }
 
@@ -864,8 +863,7 @@ bool HybridMgmt::ProcessEmbInfo(const std::string& embName, int batchId, int cha
     LOG_DEBUG("channelId:{} batchId:{}, hostHashMapProcessTC(ms):{}",
               channelId, batchId, hostHashMapProcessTC.ElapsedMS());
 
-    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
-        channelId == TRAIN_CHANNEL_ID && remainBatchOut) {
+    if (mgmtRankInfo.useSumSameIdGradients && channelId == TRAIN_CHANNEL_ID && remainBatchOut) {
         SendUniqKeysAndRestoreVecDDR(embName, batchId, channelId, ddrParam);
     }
 
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index f76f6907..b72f3c8e 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -470,8 +470,9 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
 void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
 {
-    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
-        channel == TRAIN_CHANNEL_ID) {
+    LOG_INFO(KEY_PROCESS "rank:{}, channel:{}, useSumSameIdGradients:{} ...",
+             rankInfo.rankId, channel, rankInfo.useSumSameIdGradients);
+    if (rankInfo.useSumSameIdGradients && channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
 
diff --git a/src/core/utils/common.cpp b/src/core/utils/common.cpp
index 38e64444..9512b181 100644
--- a/src/core/utils/common.cpp
+++ b/src/core/utils/common.cpp
@@ -47,6 +47,7 @@ namespace MxRec {
         useStatic = static_cast<unsigned int>(option) bitand HybridOption::USE_STATIC;
         useHot = static_cast<unsigned int>(option) bitand HybridOption::USE_HOT;
         useDynamicExpansion = static_cast<unsigned int>(option) bitand HybridOption::USE_DYNAMIC_EXPANSION;
+        useSumSameIdGradients = static_cast<unsigned int>(option) bitand HybridOption::USE_SUM_SAME_ID_GRADIENTS;
     }
 
     RankInfo::RankInfo(int localRankSize, int option, const vector<int>& maxStep)
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index f6c3de3f..9706a699 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -115,9 +115,10 @@ namespace MxRec {
     using TensorInfoT = std::tuple<int, EmbNameT, std::list<std::unique_ptr<std::vector<Tensor>>>::iterator>;
 
     namespace HybridOption {
-        const unsigned int USE_STATIC = 0x001;
-        const unsigned int USE_HOT = 0x001 << 1;
-        const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 2;
+        const unsigned int USE_STATIC = 0x0001;
+        const unsigned int USE_HOT = 0x0001 << 1;
+        const unsigned int USE_DYNAMIC_EXPANSION = 0x0001 << 2;
+        const unsigned int USE_SUM_SAME_ID_GRADIENTS = 0x0001 << 3;
     };
 
     string GetChipName(int devID);
@@ -226,6 +227,7 @@ namespace MxRec {
         bool isDDR { false };
         bool isSSDEnabled { false };
         bool useDynamicExpansion {false};
+        bool useSumSameIdGradients {true};
         std::vector<int> ctrlSteps; // 包含三个步数: train_steps, eval_steps, save_steps
     };
 
diff --git a/src/core/utils/config.cpp b/src/core/utils/config.cpp
index 9cfec739..57478553 100644
--- a/src/core/utils/config.cpp
+++ b/src/core/utils/config.cpp
@@ -20,13 +20,7 @@ See the License for the specific language governing permissions and
 using namespace std;
 
 namespace MxRec {
-    namespace ApplyGradientsStrategyOptions {
-        const std::string DIRECT_APPLY = "direct_apply";
-        const std::string SUM_SAME_ID_GRADIENTS_AND_APPLY = "sum_same_id_gradients_and_apply";
-    };
-
     // 设置环境变量默认值
-    string GlobalEnv::applyGradientsStrategy = ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY;
     int GlobalEnv::aclTimeout = -1; // 默认阻塞方式，一直等待直到数据接收完成。
     int GlobalEnv::hdChannelSize = 40; // 默认通道深度40
     int GlobalEnv::keyProcessThreadNum = 6; // 默认6个线程
@@ -42,12 +36,6 @@ namespace MxRec {
     /// 配置环境变量，Python侧已经做了变量值校验，CPP侧直接使用即可；bool类型，1代表true，0代表false
     void ConfigGlobalEnv()
     {
-        // 设置梯度策略
-        const char *envStrategy = getenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY);
-        if (envStrategy != nullptr) {
-            GlobalEnv::applyGradientsStrategy = envStrategy;
-        }
-
         // 设置ACL超时时间
         const char *envAclTimeout = getenv(RecEnvNames::ACL_TIMEOUT);
         if (envAclTimeout != nullptr) {
@@ -117,9 +105,8 @@ namespace MxRec {
 
     void LogGlobalEnv()
     {
-        LOG_DEBUG("Environment variables are: [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], "
+        LOG_DEBUG("Environment variables are: [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], "
                   "[{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}]",
-                  RecEnvNames::APPLY_GRADIENTS_STRATEGY, GlobalEnv::applyGradientsStrategy,
                   RecEnvNames::ACL_TIMEOUT, GlobalEnv::aclTimeout,
                   RecEnvNames::HD_CHANNEL_SIZE, GlobalEnv::hdChannelSize,
                   RecEnvNames::KEY_PROCESS_THREAD_NUM, GlobalEnv::keyProcessThreadNum,
diff --git a/src/core/utils/config.h b/src/core/utils/config.h
index 4c56c0d4..3ecb4c36 100644
--- a/src/core/utils/config.h
+++ b/src/core/utils/config.h
@@ -20,7 +20,6 @@ See the License for the specific language governing permissions and
 
 namespace MxRec {
     namespace RecEnvNames {
-        const char *const APPLY_GRADIENTS_STRATEGY = "APPLY_GRADIENTS_STRATEGY";
         const char *const ACL_TIMEOUT = "AclTimeout";
         const char *const HD_CHANNEL_SIZE = "HD_CHANNEL_SIZE";
         const char *const KEY_PROCESS_THREAD_NUM = "KEY_PROCESS_THREAD_NUM";
@@ -34,13 +33,7 @@ namespace MxRec {
         const char *const RECORD_KEY_COUNT = "RECORD_KEY_COUNT";
     };
 
-    namespace ApplyGradientsStrategyOptions {
-        extern const std::string DIRECT_APPLY;
-        extern const std::string SUM_SAME_ID_GRADIENTS_AND_APPLY;
-    };
-
     struct GlobalEnv {
-        static std::string applyGradientsStrategy;
         static int aclTimeout;
         static int hdChannelSize;
         static int keyProcessThreadNum;
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index 4a08f992..cb128a15 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -81,6 +81,8 @@ namespace {
 
         m.attr("USE_DYNAMIC_EXPANSION") = py::int_(HybridOption::USE_DYNAMIC_EXPANSION);
 
+        m.attr("USE_SUM_SAME_ID_GRADIENTS") = py::int_(HybridOption::USE_SUM_SAME_ID_GRADIENTS);
+
         GetRankInfo(m);
 
         GetEmbInfoParams(m);
diff --git a/src/tests/utils/config_test.cpp b/src/tests/utils/config_test.cpp
index d7e51b57..54e0ec67 100644
--- a/src/tests/utils/config_test.cpp
+++ b/src/tests/utils/config_test.cpp
@@ -24,7 +24,6 @@ using namespace MxRec;
 
 void SetEnvironmentVariables()
 {
-    setenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY, "sum_same_id_gradients_and_apply", 1);
     setenv(RecEnvNames::ACL_TIMEOUT, "100", 1);
     setenv(RecEnvNames::HD_CHANNEL_SIZE, "50", 1);
     setenv(RecEnvNames::KEY_PROCESS_THREAD_NUM, "8", 1);
@@ -40,7 +39,6 @@ void SetEnvironmentVariables()
 
 void UnsetEnvironmentVariables()
 {
-    unsetenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY);
     unsetenv(RecEnvNames::ACL_TIMEOUT);
     unsetenv(RecEnvNames::HD_CHANNEL_SIZE);
     unsetenv(RecEnvNames::KEY_PROCESS_THREAD_NUM);
@@ -56,7 +54,6 @@ void UnsetEnvironmentVariables()
 
 TEST(GlobalEnv, DefaultValues)
 {
-    ASSERT_EQ(GlobalEnv::applyGradientsStrategy, ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY);
     ASSERT_EQ(GlobalEnv::aclTimeout, -1);
     ASSERT_EQ(GlobalEnv::hdChannelSize, 40);
     ASSERT_EQ(GlobalEnv::keyProcessThreadNum, 6);
@@ -77,7 +74,6 @@ TEST(GlobalEnv, ConfigGlobalEnv)
     ConfigGlobalEnv();
 
     // 验证环境变量是否已经被正确配置
-    ASSERT_EQ(GlobalEnv::applyGradientsStrategy, "sum_same_id_gradients_and_apply");
     ASSERT_EQ(GlobalEnv::aclTimeout, 100);
     ASSERT_EQ(GlobalEnv::hdChannelSize, 50);
     ASSERT_EQ(GlobalEnv::keyProcessThreadNum, 8);
diff --git a/tests/mx_rec/core/mock_class.py b/tests/mx_rec/core/mock_class.py
index 7566aa1a..04c9ae56 100644
--- a/tests/mx_rec/core/mock_class.py
+++ b/tests/mx_rec/core/mock_class.py
@@ -208,6 +208,7 @@ class MockOptimizer:
 
     def __init__(self):
         self.slot_num = 2
+        self.derivative = 2
 
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
diff --git a/tests/mx_rec/core/test_build_graph.py b/tests/mx_rec/core/test_build_graph.py
index c15d851f..14913cf7 100644
--- a/tests/mx_rec/core/test_build_graph.py
+++ b/tests/mx_rec/core/test_build_graph.py
@@ -156,84 +156,6 @@ class TestGetIdOffsetsFunc(unittest.TestCase):
             self.assertEqual(swap_len, 0)
 
 
-class TestGetRestoreVectorSecondFunc(unittest.TestCase):
-    """
-    Test for 'mx_rec.core.asc.build_graph.get_restore_vector_second'.
-    """
-
-    def setUp(self):
-        # 默认动态扩容、hot emb、HBM
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
-        self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
-
-    def tearDown(self):
-        # 恢复config
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
-
-    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
-    def test_get_restore_vector_second(self, mock_get_next):
-        """
-        case: test get_restore_vector_second
-        """
-
-        from mx_rec.core.asc.build_graph import get_restore_vector_second
-
-        with tf.Graph().as_default():
-            mock_get_next.return_value = [0]
-            restore_vector_second = get_restore_vector_second(self.max_lookup_vec_size, self.config)
-            self.assertEqual(restore_vector_second, 0)
-
-
-class TestGetUniqueKeysFunc(unittest.TestCase):
-    """
-    Test for 'mx_rec.core.asc.build_graph.get_unique_keys'.
-    """
-
-    def setUp(self):
-        # 默认动态扩容、hot emb、HBM
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
-        self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
-
-    def tearDown(self):
-        # 恢复config
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
-
-    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
-    def test_get_unique_keys_case1(self, mock_get_next):
-        """
-        case1: 动态扩容
-        """
-
-        from mx_rec.core.asc.build_graph import get_unique_keys
-
-        with tf.Graph().as_default():
-            mock_get_next.return_value = [0]
-            unique_keys = get_unique_keys(self.max_lookup_vec_size, self.config)
-            self.assertEqual(unique_keys, 0)
-
-    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
-    def test_get_unique_keys_case2(self, mock_get_next):
-        """
-        case2: 非动态扩容
-        """
-
-        from mx_rec.core.asc.build_graph import get_unique_keys
-
-        with tf.Graph().as_default():
-            self.config["use_dynamic_expansion"] = False
-            mock_get_next.return_value = [1]
-            unique_keys = get_unique_keys(self.max_lookup_vec_size, self.config)
-            self.assertEqual(unique_keys, 1)
-
-
 class TestGetAll2allArgsFunc(unittest.TestCase):
     """
     Test for 'mx_rec.core.asc.build_graph.get_all2all_args'.
@@ -346,15 +268,12 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
                            use_hot=True, use_dynamic_expansion=True)
-        global_env.apply_gradients_strategy = "direct_apply"
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0),
-                         get_restore_vector_second=mock.MagicMock(return_value=0),
-                         get_unique_keys=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case1(self, build_graph_config_initializer):
         """
@@ -363,23 +282,18 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
-        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer(use_static=True)
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
-            self.assertIsNotNone(result.get("restore_vector_second"))
-            self.assertIsNotNone(result.get("unique_keys"))
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0),
-                         get_restore_vector_second=mock.MagicMock(return_value=0),
-                         get_unique_keys=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case2(self, build_graph_config_initializer):
         """
@@ -388,23 +302,18 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
-        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer()
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
-            self.assertIsNotNone(result.get("restore_vector_second"))
-            self.assertIsNotNone(result.get("unique_keys"))
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0),
-                         get_restore_vector_second=mock.MagicMock(return_value=0),
-                         get_unique_keys=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case3(self, build_graph_config_initializer):
         """
@@ -413,7 +322,6 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
-        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer()
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
@@ -421,7 +329,6 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
             self.config["channel_id"] = 1
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
-            self.assertIsNone(result.get("restore_vector_second"))
 
 
 if __name__ == '__main__':
diff --git a/tests/mx_rec/core/test_manager.py b/tests/mx_rec/core/test_manager.py
index 815ad843..ffa8b09e 100644
--- a/tests/mx_rec/core/test_manager.py
+++ b/tests/mx_rec/core/test_manager.py
@@ -385,6 +385,7 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
                          USE_STATIC=mock.MagicMock(return_value=0),
                          USE_HOT=mock.MagicMock(return_value=1),
                          USE_DYNAMIC_EXPANSION=mock.MagicMock(return_value=2),
+                         USE_SUM_SAME_ID_GRADIENTS=mock.MagicMock(return_value=4),
                          RankInfo=mock.MagicMock(return_value="mock_info"),
                          HybridMgmt=mock.MagicMock(return_value=MockHybridMgmt(is_initialized=False)))
     @mock.patch("mx_rec.core.asc.manager.ConfigInitializer")
@@ -398,6 +399,9 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
         mock_config_initializer = MockConfigInitializer(use_static=True, use_dynamic_expansion=True)
         manager_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
+        mock_opt = MockOptimizer()
+        manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
+
         with self.assertRaises(RuntimeError):
             initialize_emb_cache([], [])
 
@@ -408,6 +412,7 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
                          USE_STATIC=mock.MagicMock(return_value=0),
                          USE_HOT=mock.MagicMock(return_value=1),
                          USE_DYNAMIC_EXPANSION=mock.MagicMock(return_value=2),
+                         USE_SUM_SAME_ID_GRADIENTS=mock.MagicMock(return_value=4),
                          RankInfo=mock.MagicMock(return_value="mock_info"))
     @mock.patch("mx_rec.core.asc.manager.ConfigInitializer")
     @mock.patch("mx_rec.core.asc.manager.HybridMgmt")
@@ -421,6 +426,9 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
         mock_config_initializer = MockConfigInitializer(use_static=True, use_dynamic_expansion=True)
         manager_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
+        mock_opt = MockOptimizer()
+        manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
+
         mock_mgmt = MockHybridMgmt(is_initialized=True)
         mock_hybrid_mgmt.return_value = mock_mgmt
         initialize_emb_cache([], [])
-- 
Gitee


From 05b163e6bd6c3a6ee8c8e2c8dad88537b215b8dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Fri, 12 Apr 2024 02:31:16 +0000
Subject: [PATCH 033/302] =?UTF-8?q?!74=20mxrec=E6=9E=84=E5=BB=BA=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7=20setup.py=20bdis?=
 =?UTF-8?q?t=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA=20*=20mxrec?=
 =?UTF-8?q?=E6=9E=84=E5=BB=BA=E4=BC=98=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8?=
 =?UTF-8?q?python3.7=20setup.py=20bdist=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84?=
 =?UTF-8?q?=E5=BB=BA=20*=20mxrec=E6=9E=84=E5=BB=BA=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7=20setup.py=20bdist=5Fwheel?=
 =?UTF-8?q?=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA=20*=20mxrec=E6=9E=84?=
 =?UTF-8?q?=E5=BB=BA=E4=BC=98=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7?=
 =?UTF-8?q?=20setup.py=20bdist=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA?=
 =?UTF-8?q?=20*=20mxrec=E6=9E=84=E5=BB=BA=E4=BC=98=E5=8C=96=EF=BC=9A?=
 =?UTF-8?q?=E4=BD=BF=E7=94=A8python3.7=20setup.py=20bdist=5Fwheel=E6=96=B9?=
 =?UTF-8?q?=E5=BC=8F=E6=9E=84=E5=BB=BA=20*=20mxrec=E6=9E=84=E5=BB=BA?=
 =?UTF-8?q?=E4=BC=98=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7=20setup.?=
 =?UTF-8?q?py=20bdist=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA=20*=20mx?=
 =?UTF-8?q?rec=E6=9E=84=E5=BB=BA=E4=BC=98=E5=8C=96=EF=BC=9A=E4=BD=BF?=
 =?UTF-8?q?=E7=94=A8python3.7=20setup.py=20bdist=5Fwheel=E6=96=B9=E5=BC=8F?=
 =?UTF-8?q?=E6=9E=84=E5=BB=BA=20*=20Merge=20remote-tracking=20branch=20'or?=
 =?UTF-8?q?igin/develop'=20into=20develop=20*=20Merge=20remote-tracking=20?=
 =?UTF-8?q?branch=20'origin/develop'=20into=20develop=20*=20Merge=20remote?=
 =?UTF-8?q?-tracking=20branch=20'origin/develop'=20into=20develop=20*=20Me?=
 =?UTF-8?q?rge=20remote-tracking=20branch=20'origin/develop'=20into=20deve?=
 =?UTF-8?q?lop=20*=20Merge=20remote-tracking=20branch=20'origin/develop'?=
 =?UTF-8?q?=20into=20develop=20*=20Merge=20remote-tracking=20branch=20'ori?=
 =?UTF-8?q?gin/develop'=20into=20develop=20*=20Merge=20remote-tracking=20b?=
 =?UTF-8?q?ranch=20'origin/develop'=20into=20develop=20*=20mxrec=E6=9E=84?=
 =?UTF-8?q?=E5=BB=BA=E4=BC=98=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7?=
 =?UTF-8?q?=20setup.py=20bdist=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA?=
 =?UTF-8?q?=20*=20mxrec=E6=9E=84=E5=BB=BA=E4=BC=98=E5=8C=96=EF=BC=9A?=
 =?UTF-8?q?=E4=BD=BF=E7=94=A8python3.7=20setup.py=20bdist=5Fwheel=E6=96=B9?=
 =?UTF-8?q?=E5=BC=8F=E6=9E=84=E5=BB=BA=20*=20mxrec=E6=9E=84=E5=BB=BA?=
 =?UTF-8?q?=E4=BC=98=E5=8C=96=EF=BC=9A=E4=BD=BF=E7=94=A8python3.7=20setup.?=
 =?UTF-8?q?py=20bdist=5Fwheel=E6=96=B9=E5=BC=8F=E6=9E=84=E5=BB=BA=20*=20mx?=
 =?UTF-8?q?rec=E6=9E=84=E5=BB=BA=E4=BC=98=E5=8C=96=EF=BC=9A=E4=BD=BF?=
 =?UTF-8?q?=E7=94=A8python3.7=20setup.py=20bdist=5Fwheel=E6=96=B9=E5=BC=8F?=
 =?UTF-8?q?=E6=9E=84=E5=BB=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                     |  20 ++--
 ...ld_tf1_with_opensource.sh => build_tf1.sh} |  51 +--------
 ...ld_tf2_with_opensource.sh => build_tf2.sh} |  51 +--------
 build/{build.sh => gen_mxrec_tar_pkg.sh}      | 102 ++++++------------
 build/move_whl_file_2_pkg_dir.sh              |  35 ++++++
 setup.py                                      |  95 ++++++----------
 setup_tf1.py                                  |  96 +++++++++++++++++
 setup_tf2.py                                  |  96 +++++++++++++++++
 tests/run_python_dt.sh                        |   2 +-
 9 files changed, 310 insertions(+), 238 deletions(-)
 rename build/{build_tf1_with_opensource.sh => build_tf1.sh} (71%)
 rename build/{build_tf2_with_opensource.sh => build_tf2.sh} (71%)
 rename build/{build.sh => gen_mxrec_tar_pkg.sh} (44%)
 create mode 100644 build/move_whl_file_2_pkg_dir.sh
 create mode 100644 setup_tf1.py
 create mode 100644 setup_tf2.py

diff --git a/README.md b/README.md
index fccc0244..6f49f4ba 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ mxRec作为面向互联网市场搜索推荐广告的应用使能SDK产品，对
 
 ## 安装方式
 
-安装前，请参考《CANN 软件安装指南CANN 软件安装指南》安装CANN开发套件软件包和TensorFlow适配昇腾插件。
+安装前，请参考《CANN 软件安装指南》安装CANN开发套件软件包和TensorFlow适配昇腾插件。
 
 CANN软件提供进程级环境变量设置脚本，供用户在进程中引用，以自动完成环境变量设置。用户进程结束后自动失效。可在程序启动的Shell脚本中使用如下命令设置CANN的相关环境变量，也可通过命令行执行如下命令（以root用户默认安装路径“/usr/local/Ascend”为例）：
 ```shell
@@ -63,12 +63,14 @@ bash run.sh
 - [openmpi 4.1.5](https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz): 请参考软件文档在编译环境完成安装
 - tensorflow 1.15/2.6.5：根据实际需求选择对应版本
 
-将pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，并且将其分别更名为pybind11-2.10.3.zip、huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
+将pybind11和securec的压缩包放在与mxRec代码同级的opensource目录下，并且将其分别更名为pybind11-2.10.3.zip、huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在mxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
 
 为了构建多个版本的whl包，编译脚本在python虚拟环境完成对应tensorflow版本的安装。用户可以根据实际情况调整编译脚本，指定tensorflow的安装路径。编译方法：
-- build/build.sh：执行脚本完成tf1和tf2版本whl包的构建和打包。执行脚本前，请参考build/build_tf1_with_opensource.sh、build/build_tf2_with_opensource.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
-- build/build_tf1_with_opensource.sh：执行脚本完成tf1版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
-- build/build_tf2_with_opensource.sh：执行脚本完成tf2版本whl包的构建，构建成功后，whl包在/build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
+
+进入mxRec代码目录：
+- setup.py：执行脚本setup.py，比如：**python3.7 setup.py**完成tf1和tf2版本whl包的构建和打包，构建成功后，whl包在build/mindxsdk-mxrec/目录下，其中tf1_whl和tf2_whl目录下存在对应的whl包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
+- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**完成tf1版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，请参考build/build_tf1.sh创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
+- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**完成tf2版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，请参考build/build_tf2.sh创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
 
 如需使用动态扩容功能，进入“./cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
 ```shell
@@ -99,8 +101,8 @@ bash run_python_dt.sh
 - [pybind11 v2.10.3](https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip)
 - [securec](https://github.com/huaweicloud/huaweicloud-sdk-c-obs/archive/refs/tags/v3.23.9.zip)
 
-将googletest、emock、pybind11和securec的压缩包放在与MxRec代码同级的opensource目录下，并且将其分别更名为googletest-release-1.8.1.zip、
-emock-0.9.0.zip、pybind11-2.10.3.zip、 huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在MxRec同级的目录下手动创建opensource目录，
+将googletest、emock、pybind11和securec的压缩包放在与mxRec代码同级的opensource目录下，并且将其分别更名为googletest-release-1.8.1.zip、
+emock-0.9.0.zip、pybind11-2.10.3.zip、 huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在mxRec同级的目录下手动创建opensource目录，
 然后将前述几个压缩包放在opensource目录下。
 
 如需使用C++测试用例，需要按照上述描述准备需要的依赖，准备好之后，进入src目录中。参考以下命令执行C++测试用例：
@@ -117,11 +119,11 @@ bash test_ut.sh tf2
 
 ## 使用指导
 
-mxRec所支持的使用环境、功能特性、API接口与使用样例请参考昇腾开源社区MindX SDK产品文档。
+mxRec所支持的使用环境、功能特性、API接口与使用样例请参考mxRec用户指南。
 
 ## 参考设计
 
-mxrec框架基础镜像，基于TensorFlow 1.15.0、tensorflow2.6.5制作的基础镜像，安装mxrec后即可开始训练，以及样例使用介绍。
+mxRec框架基础镜像，基于TensorFlow 1.15.0、tensorflow2.6.5制作的基础镜像，安装mxRec后即可开始训练，以及样例使用介绍。
 
 1. https://ascendhub.huawei.com/#/detail/mxrec-tf1
 
diff --git a/build/build_tf1_with_opensource.sh b/build/build_tf1.sh
similarity index 71%
rename from build/build_tf1_with_opensource.sh
rename to build/build_tf1.sh
index ff59571c..5d6632d6 100644
--- a/build/build_tf1_with_opensource.sh
+++ b/build/build_tf1.sh
@@ -15,13 +15,11 @@
 # ==============================================================================
 
 ##################################################################
-#   build_tf1_with_opensource.sh 编译MxRec和动态扩容算子
+#   build_tf1.sh 编译MxRec
 # 编译环境：Python3.7.5 GCC 7.3.0 CMake 3.20.6
-# 代码主要分为四部分：
+# 代码主要分为两部分：
 # 1、准备编译MxRec所需依赖：pybind11(v2.10.3) securec
 # 2、编译securec、AccCTR以及MxRec
-# 3、生成MxRec Wheel包，生成的whl包在当前目录下的mindxsdk-mxrec/tf1_whl
-# 4、编译动态扩容算子
 ##################################################################
 
 set -e
@@ -64,33 +62,6 @@ source /opt/buildtools/tf1_env/bin/activate
 tf1_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow_core
 deactivate tf1_env
 
-project_output_path="${MxRec_DIR}"/output/
-VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
-
-function get_version() {
-  if [ -f "$VERSION_FILE" ]; then
-    VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
-    if [[ "$VERSION" == *.[b/B]* ]] && [[ "$VERSION" != *.[RC/rc]* ]]; then
-      VERSION=${VERSION%.*}
-    fi
-  else
-    VERSION="5.0.0"
-  fi
-}
-
-rm -rf  "${project_output_path}"
-rm -rf  "${SCRIPT_DIR}/lib"
-
-# 获取MxRec版本信息
-get_version
-export VERSION
-echo "MindX SDK MxRec: ${VERSION}" >> ./version.info
-
-pkg_dir=mindxsdk-mxrec
-rm -rf "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
 # 配置MxRec C++代码路径和AccCTR路径
 src_path="${MxRec_DIR}"/src
 acc_ctr_path="${MxRec_DIR}"/src/AccCTR
@@ -134,19 +105,10 @@ function collect_so_file()
   cp ${acc_ctr_path}/output/ock_ctr_common/lib/* libasc
   cp -df "${MxRec_DIR}"/output/*.so* libasc
   cp "${opensource_path}"/securec/lib/libsecurec.so libasc
-}
-
-function gen_wheel_file()
-{
   cd "${MxRec_DIR}"
   touch "${src_path}"/libasc/__init__.py
   rm -rf "${MxRec_DIR}"/mx_rec/libasc
   mv "${src_path}"/libasc "${MxRec_DIR}"/mx_rec
-  python3.7 setup.py bdist_wheel --plat-name=linux_$(arch)
-  mkdir -p "$1"
-  echo "moving whl file $1"
-  mv dist/mx_rec*.whl "$1"
-  rm -rf "${MxRec_DIR}"/mx_rec/libasc
 }
 
 # start to build MxRec
@@ -158,13 +120,4 @@ echo "----------------          compile MxRec so files        ----------------"
 compile_so_file "${tf1_path}"
 echo "---------------- collect so files and mv them to libasc ----------------"
 collect_so_file
-echo "----------------      generate MxRec wheel package      ----------------"
-gen_wheel_file  "$SCRIPT_DIR"/"${pkg_dir}"/tf1_whl
 echo "----------------        compile MxRec success!!!!       ----------------"
-
-# start to compile cust op
-echo "----------------        start to compile cust op        ----------------"
-cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
-chmod u+x run.sh
-./run.sh
-echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
diff --git a/build/build_tf2_with_opensource.sh b/build/build_tf2.sh
similarity index 71%
rename from build/build_tf2_with_opensource.sh
rename to build/build_tf2.sh
index 08aaf164..639024ff 100644
--- a/build/build_tf2_with_opensource.sh
+++ b/build/build_tf2.sh
@@ -15,13 +15,11 @@
 # ==============================================================================
 
 ##################################################################
-#   build_tf2_with_opensource.sh 编译MxRec和动态扩容算子
+#   build_tf2.sh 编译MxRec
 # 编译环境：Python3.7.5 GCC 7.3.0 CMake 3.20.6
-# 代码主要分为四部分：
+# 代码主要分为两部分：
 # 1、准备编译MxRec所需依赖：pybind11(v2.10.3) securec
 # 2、编译securec、AccCTR以及MxRec
-# 3、生成MxRec Wheel包，生成的whl包在当前目录下的mindxsdk-mxrec/tf2_whl
-# 4、编译动态扩容算子
 ##################################################################
 
 set -e
@@ -64,33 +62,6 @@ source /opt/buildtools/tf2_env/bin/activate
 tf2_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow
 deactivate tf2_env
 
-project_output_path="${MxRec_DIR}"/output/
-VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
-
-function get_version() {
-  if [ -f "$VERSION_FILE" ]; then
-    VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
-    if [[ "$VERSION" == *.[b/B]* ]] && [[ "$VERSION" != *.[RC/rc]* ]]; then
-      VERSION=${VERSION%.*}
-    fi
-  else
-    VERSION="5.0.0"
-  fi
-}
-
-rm -rf  "${project_output_path}"
-rm -rf  "${SCRIPT_DIR}/lib"
-
-# 获取MxRec版本信息
-get_version
-export VERSION
-echo "MindX SDK MxRec: ${VERSION}" >> ./version.info
-
-pkg_dir=mindxsdk-mxrec
-rm -rf "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
 # 配置MxRec C++代码路径和AccCTR路径
 src_path="${MxRec_DIR}"/src
 acc_ctr_path="${MxRec_DIR}"/src/AccCTR
@@ -134,19 +105,10 @@ function collect_so_file()
   cp ${acc_ctr_path}/output/ock_ctr_common/lib/* libasc
   cp -df "${MxRec_DIR}"/output/*.so* libasc
   cp "${opensource_path}"/securec/lib/libsecurec.so libasc
-}
-
-function gen_wheel_file()
-{
   cd "${MxRec_DIR}"
   touch "${src_path}"/libasc/__init__.py
   rm -rf "${MxRec_DIR}"/mx_rec/libasc
   mv "${src_path}"/libasc "${MxRec_DIR}"/mx_rec
-  python3.7 setup.py bdist_wheel --plat-name=linux_$(arch)
-  mkdir -p "$1"
-  echo "moving whl file $1"
-  mv dist/mx_rec*.whl "$1"
-  rm -rf "${MxRec_DIR}"/mx_rec/libasc
 }
 
 # start to build MxRec
@@ -158,13 +120,4 @@ echo "----------------          compile MxRec so files        ----------------"
 compile_so_file "${tf2_path}"
 echo "---------------- collect so files and mv them to libasc ----------------"
 collect_so_file
-echo "----------------      generate MxRec wheel package      ----------------"
-gen_wheel_file  "$SCRIPT_DIR"/"${pkg_dir}"/tf2_whl
 echo "----------------        compile MxRec success!!!!       ----------------"
-
-# start to compile cust op
-echo "----------------        start to compile cust op        ----------------"
-cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
-chmod u+x run.sh
-./run.sh
-echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
diff --git a/build/build.sh b/build/gen_mxrec_tar_pkg.sh
similarity index 44%
rename from build/build.sh
rename to build/gen_mxrec_tar_pkg.sh
index 0eb688fd..72ccfe49 100644
--- a/build/build.sh
+++ b/build/gen_mxrec_tar_pkg.sh
@@ -18,11 +18,9 @@ set -e
 warn() { echo >&2 -e "\033[1;31m[WARN ][Depend  ] $1\033[1;37m" ; }
 ARCH="$(uname -m)"
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-ROOT_DIR=$(dirname "${SCRIPT_DIR}")
-cd "$SCRIPT_DIR"
+MxRec_DIR=$(dirname "${SCRIPT_DIR}")
 
-
-VERSION_FILE="${ROOT_DIR}"/../mindxsdk/build/conf/config.yaml
+VERSION_FILE="${MxRec_DIR}"/../mindxsdk/build/conf/config.yaml
 get_version() {
   if [ -f "$VERSION_FILE" ]; then
     VERSION=$(sed '/.*mindxsdk:/!d;s/.*: //' "$VERSION_FILE")
@@ -30,96 +28,60 @@ get_version() {
       VERSION=${VERSION%.*}
     fi
   else
-    VERSION="5.0.0"
-  fi
-}
-
-remove()
-{
-  if [ -d "$1" ]; then
-    rm -rf "$1"
-  elif [ -f "$1" ]; then
-    rm -f "$1"
+    VERSION="6.0.RC2"
   fi
 }
 
-project_output_path="${ROOT_DIR}"/output/
-remove "${project_output_path}"
-remove "${SCRIPT_DIR}/lib"
 get_version
-export VERSION
 echo "MindX SDK mxrec: ${VERSION}" >> ./version.info
 
 pkg_dir=mindxsdk-mxrec
-remove "${pkg_dir}"
-mkdir "${pkg_dir}"
-mv version.info "${pkg_dir}"
-
-src_path="${ROOT_DIR}"/src
-cd "${ROOT_DIR}"
-
 release_tar=Ascend-"${pkg_dir}"_"${VERSION}"_linux-"${ARCH}".tar.gz
+mv version.info "${SCRIPT_DIR}"/"${pkg_dir}"
 
-gen_tar_file()
+function gen_tar_file()
 {
-  cd "${src_path}"
-  cp -r "${src_path}"/../cust_op ../build/"${pkg_dir}"
-  cp -r "${src_path}"/../examples  ../build/"${pkg_dir}"
+  cd "${MxRec_DIR}"
+  cp -r ./cust_op ./build/"${pkg_dir}"
+  cp -r ./examples  ./build/"${pkg_dir}"
   # change dirs and files 's permission
-  chmod 550 ../build/"${pkg_dir}"/tf1_whl
-  chmod 550 ../build/"${pkg_dir}"/tf1_whl/mx_rec*.whl
-  chmod 550 ../build/"${pkg_dir}"/tf2_whl
-  chmod 550 ../build/"${pkg_dir}"/tf2_whl/mx_rec*.whl
-  chmod 550 ../build/"${pkg_dir}"/cust_op/
-  chmod 550 ../build/"${pkg_dir}"/cust_op/cust_op_by_addr
-  cd ../build/"${pkg_dir}"/cust_op/cust_op_by_addr
+  chmod 550 ./build/"${pkg_dir}"/tf1_whl
+  chmod 550 ./build/"${pkg_dir}"/tf1_whl/mx_rec*.whl
+  chmod 550 ./build/"${pkg_dir}"/tf2_whl
+  chmod 550 ./build/"${pkg_dir}"/tf2_whl/mx_rec*.whl
+  chmod 550 ./build/"${pkg_dir}"/cust_op/
+  chmod 550 ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
+  cd ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
   chmod 550 *.sh
   chmod 640 *.json
   chmod 550 op_host op_kernel op_host/* op_kernel/*
   cd -
-  cd ../build
+  cd ./build
   tar -zvcf "${release_tar}" "${pkg_dir}" || {
       warn "compression failed, packages might be broken"
   }
 
-  mv "${release_tar}" "${SCRIPT_DIR}"/../output/
+  mv "${release_tar}" ../output/
 
 }
 
-clean()
+function clean()
 {
-  remove "${ROOT_DIR}"/dist
-  remove "${ROOT_DIR}"/install
-  remove "${ROOT_DIR}"/mx_rec.egg-info
-  remove "${ROOT_DIR}"/src/build
-  remove "${ROOT_DIR}"/build/bdist.linux-"$(arch)"
-  remove "${ROOT_DIR}"/build/tf2_env
-  remove "${ROOT_DIR}"/build/tf1_env
-  remove "${ROOT_DIR}"/build/lib
-  remove "${ROOT_DIR}"/build/mindxsdk-mxrec
+  rm -rf "${MxRec_DIR}"/dist
+  rm -rf "${MxRec_DIR}"/mx_rec.egg-info
+  rm -rf "${MxRec_DIR}"/src/build
+  rm -rf "${MxRec_DIR}"/mx_rec/libasc
+  rm -rf "${MxRec_DIR}"/build/lib
+  rm -rf "${MxRec_DIR}"/build/bdist.linux-${ARCH}
 }
 
+gen_tar_file
 
-if [ "$(uname -m)" = "x86_64" ]
-then
-  echo "-----Build gen tar -----"
-  bash ${ROOT_DIR}/build/build_tf1_with_opensource.sh
-  bash ${ROOT_DIR}/build/build_tf2_with_opensource.sh
-  gen_tar_file
-  echo "-----Build gen tar finished-----"
-
-  # clean
-  echo "-----Done-----"
-fi
-
-if [ "$(uname -m)" = "aarch64" ]
-then
-  echo "-----Build gen tar -----"
-  bash ${ROOT_DIR}/build/build_tf1_with_opensource.sh
-  bash ${ROOT_DIR}/build/build_tf2_with_opensource.sh
-  gen_tar_file
-  echo "-----Build gen tar finished-----"
+clean
 
-  # clean
-  echo "-----Done-----"
-fi
\ No newline at end of file
+# compile cust op
+echo "----------------        start to compile cust op        ----------------"
+cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
+chmod u+x run.sh
+./run.sh
+echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
diff --git a/build/move_whl_file_2_pkg_dir.sh b/build/move_whl_file_2_pkg_dir.sh
new file mode 100644
index 00000000..d489c2fb
--- /dev/null
+++ b/build/move_whl_file_2_pkg_dir.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+warn() { echo >&2 -e "\033[1;31m[WARN ][Depend  ] $1\033[1;37m" ; }
+ARCH="$(uname -m)"
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+MxRec_DIR=$(dirname "${SCRIPT_DIR}")
+pkg_dir=mindxsdk-mxrec
+tf_version=$1
+
+function move_whl_file_2_pkg_dir() {
+    mkdir -p "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
+    rm -rf "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl/*
+    mv ${MxRec_DIR}/dist/mx_rec*.whl "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
+    cd "$SCRIPT_DIR"/"${pkg_dir}"/"${tf_version}"_whl
+    whl_file=$(ls .)
+    mv "$whl_file" "${whl_file/any/linux_${ARCH}}"
+    cd -
+}
+
+move_whl_file_2_pkg_dir
\ No newline at end of file
diff --git a/setup.py b/setup.py
index efb4c994..87454130 100644
--- a/setup.py
+++ b/setup.py
@@ -16,64 +16,39 @@
 # ==============================================================================
 
 import os
+import glob
 import stat
-from setuptools import setup, find_packages
-import pkg_resources
-from setuptools.extern.packaging import version as packaging_version
-
-
-# Patch Version class to preserve original version string
-class NoNormalizeVersion(packaging_version.Version):
-    def __init__(self, version):
-        self._orig_version = version
-        super().__init__(version)
-
-    def __str__(self):
-        return self._orig_version
-
-
-packaging_version.Version = NoNormalizeVersion
-# Patch safe_version() to prevent version normalization
-pkg_resources.safe_version = lambda v: v
-
-try:
-    with open("README.md") as file:
-        LONG_DESCRIPTION = file.read()
-except IOError:
-    LONG_DESCRIPTION = ""
-
-env_version = os.getenv("VERSION")
-VERSION = env_version if env_version is not None else '5.0.rc3'
-
-INIT_FILE = "mx_rec/__init__.py"
-with open(INIT_FILE, 'r') as file:
-    lines = file.readlines()
-
-for idx, line in enumerate(lines):
-    if "__version__ = " not in line:
-        continue
-    lines[idx] = f"__version__ = '{VERSION}'\n"
-    break
-
-FLAG = os.O_WRONLY | os.O_TRUNC
-MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
-with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
-    out.writelines(lines)
-
-setup(
-    name='mx_rec',
-    version=VERSION,
-    author='HUAWEI Inc',
-    description='MindX SDK Recommend',
-    long_description=LONG_DESCRIPTION,
-    # include mx_rec
-    packages=find_packages(
-        where='.',
-        include=["mx_rec*"]
-    ),
-    package_dir={},
-    # other file
-    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
-    # dependency
-    python_requires='>=3.7.5'
-)
+import shutil
+import subprocess
+
+# get the absolute path of the Python 3.7 program
+res = subprocess.run(["/usr/bin/which", "python3.7"], stdout=subprocess.PIPE, text=True, shell=False)
+if res.returncode:
+    raise RuntimeError("get the absolute path of the Python 3.7 program failed!")
+python37_path = res.stdout.strip()
+
+# add execution permission to the file with the .sh suffix
+scripts = glob.glob(os.path.join(os.getcwd(), "build/*.sh"))
+for script in scripts:
+    if os.path.isfile(script):
+        os.chmod(script, os.stat(script).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+
+# clean pkg_dir existed
+PKG_DIR = "./build/mindxsdk-mxrec"
+if os.path.exists(PKG_DIR):
+    shutil.rmtree(PKG_DIR)
+
+# build tf1's wheel file
+res = subprocess.run([python37_path, "setup_tf1.py", "bdist_wheel"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"build tf1's wheel file failed!")
+
+# build tf2's wheel file
+res = subprocess.run([python37_path, "setup_tf2.py", "bdist_wheel"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"build tf2's wheel file failed!")
+
+# copy cust_op, examples files, etc. Then gen mxrec's tar pkg
+res = subprocess.run(["./build/gen_mxrec_tar_pkg.sh"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"gen mxrec's tar pkg failed!")
diff --git a/setup_tf1.py b/setup_tf1.py
new file mode 100644
index 00000000..df8c731e
--- /dev/null
+++ b/setup_tf1.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import stat
+import subprocess
+from setuptools import setup, find_packages
+import pkg_resources
+from setuptools.extern.packaging import version as packaging_version
+
+script_path = os.getcwd()
+
+
+# Patch Version class to preserve original version string
+class NoNormalizeVersion(packaging_version.Version):
+    def __init__(self, version):
+        self._orig_version = version
+        super().__init__(version)
+
+    def __str__(self):
+        return self._orig_version
+
+
+def safe_version(v):
+    return v
+
+
+packaging_version.Version = NoNormalizeVersion
+# Patch safe_version() to prevent version normalization
+pkg_resources.safe_version = safe_version
+
+try:
+    with open("README.md") as file:
+        LONG_DESCRIPTION = file.read()
+except IOError:
+    LONG_DESCRIPTION = ""
+
+env_version = os.getenv("VERSION")
+VERSION = env_version if env_version is not None else '6.0.RC2'
+
+INIT_FILE = "mx_rec/__init__.py"
+with open(INIT_FILE, 'r') as file:
+    lines = file.readlines()
+
+for idx, line in enumerate(lines):
+    if "__version__ = " not in line:
+        continue
+    lines[idx] = f"__version__ = '{VERSION}'\n"
+    break
+
+FLAG = os.O_WRONLY | os.O_TRUNC
+MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
+with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
+    out.writelines(lines)
+
+# compile so files
+tf1_script = os.path.join(script_path, "./build/build_tf1.sh")
+res = subprocess.run([tf1_script], shell=False)
+if res.returncode:
+    raise RuntimeError("compile so files failed!")
+
+setup(
+    name='mx_rec',
+    version=VERSION,
+    author='HUAWEI Inc',
+    description='MindX SDK Recommend',
+    long_description=LONG_DESCRIPTION,
+    # include mx_rec
+    packages=find_packages(
+        where='.',
+        include=["mx_rec*"]
+    ),
+    # other file
+    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
+    # dependency
+    python_requires='>=3.7.5'
+)
+
+move_whl_script = os.path.join(script_path, "./build/move_whl_file_2_pkg_dir.sh")
+res = subprocess.run([move_whl_script, "tf1"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"move tf1 whl file to pkg dir failed!")
diff --git a/setup_tf2.py b/setup_tf2.py
new file mode 100644
index 00000000..31e61a99
--- /dev/null
+++ b/setup_tf2.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import stat
+import subprocess
+from setuptools import setup, find_packages
+import pkg_resources
+from setuptools.extern.packaging import version as packaging_version
+
+script_path = os.getcwd()
+
+
+# Patch Version class to preserve original version string
+class NoNormalizeVersion(packaging_version.Version):
+    def __init__(self, version):
+        self._orig_version = version
+        super().__init__(version)
+
+    def __str__(self):
+        return self._orig_version
+
+
+def safe_version(v):
+    return v
+
+
+packaging_version.Version = NoNormalizeVersion
+# Patch safe_version() to prevent version normalization
+pkg_resources.safe_version = safe_version
+
+try:
+    with open("README.md") as file:
+        LONG_DESCRIPTION = file.read()
+except IOError:
+    LONG_DESCRIPTION = ""
+
+env_version = os.getenv("VERSION")
+VERSION = env_version if env_version is not None else '6.0.RC2'
+
+INIT_FILE = "mx_rec/__init__.py"
+with open(INIT_FILE, 'r') as file:
+    lines = file.readlines()
+
+for idx, line in enumerate(lines):
+    if "__version__ = " not in line:
+        continue
+    lines[idx] = f"__version__ = '{VERSION}'\n"
+    break
+
+FLAG = os.O_WRONLY | os.O_TRUNC
+MODE = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
+with os.fdopen(os.open(INIT_FILE, FLAG, MODE), 'w') as out:
+    out.writelines(lines)
+
+# compile so files
+tf2_script = os.path.join(script_path, "./build/build_tf2.sh")
+res = subprocess.run([tf2_script], shell=False)
+if res.returncode:
+    raise RuntimeError("compile so files failed!")
+
+setup(
+    name='mx_rec',
+    version=VERSION,
+    author='HUAWEI Inc',
+    description='MindX SDK Recommend',
+    long_description=LONG_DESCRIPTION,
+    # include mx_rec
+    packages=find_packages(
+        where='.',
+        include=["mx_rec*"]
+    ),
+    # other file
+    package_data={'': ['tools/*', 'tools/*/*', '*.yml', '*.sh', '*.so*']},
+    # dependency
+    python_requires='>=3.7.5'
+)
+
+move_whl_script = os.path.join(script_path, "./build/move_whl_file_2_pkg_dir.sh")
+res = subprocess.run([move_whl_script, "tf2"], shell=False)
+if res.returncode:
+    raise RuntimeError(f"move tf2 whl file to pkg dir failed!")
diff --git a/tests/run_python_dt.sh b/tests/run_python_dt.sh
index f29bf7b5..139e7ff7 100644
--- a/tests/run_python_dt.sh
+++ b/tests/run_python_dt.sh
@@ -26,7 +26,7 @@ if [ $ARCH == "aarch64" ]; then
 fi
 
 # build mxRec and get output directory
-bash "$TOP_PATH"/build/build_tf1_with_opensource.sh
+bash "$TOP_PATH"/build/build_tf1.sh
 
 # create libasc directory and copy so files into it
 cd "$TOP_PATH"/mx_rec
-- 
Gitee


From 27252d274752efc6d96268d8b8476934b8861aa2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Sat, 13 Apr 2024 19:22:39 +0800
Subject: [PATCH 034/302] =?UTF-8?q?=E5=86=92=E7=83=9F=E5=A4=B1=E8=B4=A5?=
 =?UTF-8?q?=EF=BC=8C=E5=9B=9E=E9=80=80=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py                  |  4 +-
 examples/demo/little_demo/run_mode.py         |  4 +-
 .../demo/little_demo_estimator/nn_optim.py    |  4 +-
 examples/dlrm/model/gradient_descent_w.py     | 10 --
 examples/dlrm/model/main_mxrec.py             |  4 +-
 mx_rec/constants/constants.py                 |  2 +
 mx_rec/core/asc/build_graph.py                | 51 ++++++++++
 mx_rec/core/asc/manager.py                    |  6 +-
 mx_rec/core/emb/dynamic_sparse_embedding.py   |  9 +-
 mx_rec/core/emb/sparse_embedding.py           |  7 +-
 mx_rec/optimizers/adagrad.py                  | 15 +--
 mx_rec/optimizers/base.py                     | 64 ------------
 mx_rec/optimizers/ftrl.py                     | 15 +--
 mx_rec/optimizers/gradient_descent.py         |  5 -
 mx_rec/optimizers/gradient_descent_by_addr.py |  5 -
 mx_rec/optimizers/lazy_adam.py                | 10 +-
 mx_rec/optimizers/lazy_adam_by_addr.py        | 10 +-
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          |  6 +-
 src/core/key_process/key_process.cpp          |  5 +-
 src/core/utils/common.cpp                     |  1 -
 src/core/utils/common.h                       |  8 +-
 src/core/utils/config.cpp                     | 15 ++-
 src/core/utils/config.h                       |  7 ++
 src/pybind/module_main.cpp                    |  2 -
 src/tests/utils/config_test.cpp               |  4 +
 tests/mx_rec/core/mock_class.py               |  1 -
 tests/mx_rec/core/test_build_graph.py         | 99 ++++++++++++++++++-
 tests/mx_rec/core/test_manager.py             |  8 --
 28 files changed, 213 insertions(+), 168 deletions(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index 0a9462bc..d5a51312 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -336,9 +336,9 @@ if __name__ == "__main__":
         train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
         if use_dynamic_expansion:
-            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
+            from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
 
-            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
+            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
             train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
             # do sparse optimization by addr
             sparse_grads = sparse_optimizer.compute_gradients(loss, train_emb_list)  # local_embedding
diff --git a/examples/demo/little_demo/run_mode.py b/examples/demo/little_demo/run_mode.py
index 6a3301c4..e750ceb5 100644
--- a/examples/demo/little_demo/run_mode.py
+++ b/examples/demo/little_demo/run_mode.py
@@ -95,11 +95,11 @@ class RunMode:
             self.train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
             if bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0))):
-                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
+                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
 
                 train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
 
-                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
+                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
 
                 # do sparse optimization by addr
                 local_grads = tf.gradients(loss, train_emb_list)  # local_embedding
diff --git a/examples/demo/little_demo_estimator/nn_optim.py b/examples/demo/little_demo_estimator/nn_optim.py
index 3be3c7ed..4438627d 100644
--- a/examples/demo/little_demo_estimator/nn_optim.py
+++ b/examples/demo/little_demo_estimator/nn_optim.py
@@ -73,11 +73,11 @@ def get_train_op_list(losses, learning_rate):
 
             # do sparse optimization
             if use_dynamic_expansion:
-                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
+                from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
 
                 train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
 
-                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
+                train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
 
                 local_grads = tf.gradients(loss, train_emb_list)  # local_embedding
                 grads_and_vars = [(grad, address) for grad, address in zip(local_grads, train_address_list)]
diff --git a/examples/dlrm/model/gradient_descent_w.py b/examples/dlrm/model/gradient_descent_w.py
index 6c34b726..f3ae78d7 100644
--- a/examples/dlrm/model/gradient_descent_w.py
+++ b/examples/dlrm/model/gradient_descent_w.py
@@ -47,16 +47,6 @@ class CustomizedGradientDescentWithWeighDecay(gradient_descent.GradientDescentOp
         super(CustomizedGradientDescentWithWeighDecay, self).__init__(
             learning_rate=learning_rate, use_locking=use_locking, name=self.unique_name
         )
-        self._slot_num = 0
-        self._derivative = 1
-
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
 
     def initialize_slots(self, var, table_instance):
         logger.info("no slot for gradient descent")
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index ab2eb04c..4bbd16de 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -24,7 +24,7 @@ import tensorflow as tf
 from sklearn.metrics import roc_auc_score
 import numpy as np
 
-from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
+from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
 from mx_rec.core.embedding import create_table, sparse_lookup
@@ -346,7 +346,7 @@ if __name__ == "__main__":
         train_ops.append(dense_optimizer.apply_gradients(avg_grads))
 
         if use_dynamic_expansion:
-            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
+            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS)
             # do sparse optimization by addr
             sparse_grads = list(grads[-1])  # local_embedding
             grads_and_vars = [(grad, address) for grad, address in zip(sparse_grads, train_address_list)]
diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index 2c2cd2fe..03fa28b4 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -22,6 +22,7 @@ ASCEND_GLOBAL_HASHTABLE_COLLECTION = "ASCEND_GLOBAL_HASHTABLE_COLLECTION"
 ASCEND_CUTTING_POINT_INITIALIZER = "ASCEND_CUTTING_POINT_INITIALIZER"
 ASCEND_SPARSE_LOOKUP_ENTRANCE = "ASCEND_SPARSE_LOOKUP_ENTRANCE"
 ASCEND_SPARSE_LOOKUP_ID_OFFSET = "ASCEND_SPARSE_LOOKUP_ID_OFFSET"
+ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS = "ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS"
 ASCEND_TIMESTAMP = "ASCEND_TIMESTAMP"
 ASCEND_SPARSE_LOOKUP_LOCAL_EMB = "ASCEND_SPARSE_LOOKUP_LOCAL_EMB"
 EMPTY_STR = ""
@@ -165,6 +166,7 @@ class ASCAnchorAttr(Enum):
     MOCK_LOOKUP_RESULT = "mock_lookup_result"
     RESTORE_VECTOR_SECOND = "restore_vector_second"
     UNIQUE_KEYS = "unique_keys"
+    GRADIENTS_STRATEGY = "gradients_strategy"
     IS_GRAD = "is_grad"
 
 
diff --git a/mx_rec/core/asc/build_graph.py b/mx_rec/core/asc/build_graph.py
index 2bb72621..5e9fea58 100644
--- a/mx_rec/core/asc/build_graph.py
+++ b/mx_rec/core/asc/build_graph.py
@@ -22,6 +22,7 @@ import tensorflow as tf
 import mxrec_pybind
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.tf_version_adapter import npu_ops
+from mx_rec.constants.constants import TRAIN_CHANNEL_ID
 from mx_rec.util.log import logger
 
 
@@ -80,6 +81,46 @@ def get_id_offsets(max_lookup_vec_size, config):
     return id_offsets, swap_pos, swap_len
 
 
+def get_restore_vector_second(max_lookup_vec_size: int, config: dict) -> tf.Tensor:
+    """
+    Get restore vector which is calculated after the second all2all
+    :param max_lookup_vec_size: the size of restore_vector_second
+    :param config: embedding config
+    :return: the restore vector calculated after the second all2all
+    """
+    logger.debug('Channel %s_restore_second_%s was built for getnext',
+                 config.get("table_name"), config.get("channel_id"))
+    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
+        restore_vector_second = npu_ops.gen_npu_ops.get_next(
+            output_types=[tf.int32],
+            output_shapes=[[max_lookup_vec_size]],
+            channel_name=f'{config.get("table_name")}_restore_second_{config.get("channel_id")}')[0]
+    return restore_vector_second
+
+
+def get_unique_keys(max_lookup_vec_size: int, config: dict) -> tf.Tensor:
+    """
+    Get the global unique keys which is calculated after the second all2all
+    :param max_lookup_vec_size: the size of global unique keys
+    :param config: embedding config
+    :return: the global unique keys calculated after the second all2all
+    """
+    logger.debug('Channel %s_uniquekeys_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
+    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
+        if config.get("use_dynamic_expansion"):
+            unique_keys = npu_ops.gen_npu_ops.get_next(
+                output_types=[tf.int64],
+                output_shapes=[[max_lookup_vec_size]],
+                channel_name=f'{config.get("table_name")}_uniquekeys_{config.get("channel_id")}')[0]
+            return unique_keys
+
+        unique_keys = npu_ops.gen_npu_ops.get_next(
+            output_types=[tf.int32],
+            output_shapes=[[max_lookup_vec_size]],
+            channel_name=f'{config.get("table_name")}_uniquekeys_{config.get("channel_id")}')[0]
+        return unique_keys
+
+
 def get_all2all_args(use_static: bool, config: dict) -> Optional[list]:
     """
     Get all2all parameters for dynamic condition
@@ -170,4 +211,14 @@ def get_preprocessed_tensor_for_asc(table, config):
         'all2all_args': all2all_args,
     }
 
+    if config.get("channel_id") != TRAIN_CHANNEL_ID:
+        return result
+
+    with tf.compat.v1.variable_scope("restore_vector_second"):
+        restore_vector_second = get_restore_vector_second(max_lookup_vec_size, config)
+
+    with tf.compat.v1.variable_scope("unique_keys"):
+        unique_keys = get_unique_keys(max_lookup_vec_size, config)
+    result.update({'restore_vector_second': restore_vector_second, 'unique_keys': unique_keys})
+
     return result
diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index f50037ea..2829ab98 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -18,7 +18,7 @@
 import tensorflow as tf
 
 from mxrec_pybind import InitializeInfo, ConstantInitializerInfo, NormalInitializerInfo, EmbInfo, EmbInfoParams, \
-    ThresholdValue, HybridMgmt, RankInfo, USE_STATIC, USE_HOT, USE_DYNAMIC_EXPANSION, USE_SUM_SAME_ID_GRADIENTS
+    ThresholdValue, HybridMgmt, RankInfo, USE_STATIC, USE_HOT, USE_DYNAMIC_EXPANSION
 
 from mx_rec.util.communication.hccl_ops import get_rank_id, get_device_id, get_rank_size
 from mx_rec.util.initialize import ConfigInitializer
@@ -205,10 +205,6 @@ def initialize_emb_cache(table_info_list, threshold_list):
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         option = option | USE_DYNAMIC_EXPANSION
 
-    optimizer = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
-    if optimizer.derivative == 2:
-        option = option | USE_SUM_SAME_ID_GRADIENTS
-
     # [train_steps, eval_steps, save_steps] pass step information to HybridMgmt for data process loop
     rank_info = RankInfo(rank_id, device_id, rank_size, option, [train_steps, eval_steps, save_steps])
 
diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index 671c593e..bf1c6569 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -6,9 +6,10 @@ import abc
 from typing import Optional, Union, Callable
 
 import tensorflow as tf
+from tensorflow.python.ops import array_ops
 
 from mx_rec.constants.constants import ASCEND_TABLE_NAME_MUST_CONTAIN, ASCEND_SPARSE_LOOKUP_LOCAL_EMB, \
-     ASCEND_SPARSE_LOOKUP_ID_OFFSET
+     ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
@@ -50,7 +51,9 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
 
     def _get_update_grad(self, local_grad: tf.Tensor, result: dict,
                          table: Union[tf.compat.v1.Variable, tf.Tensor]) -> Union[tf.IndexedSlices, tf.Tensor]:
-        return local_grad
+        return tf.compat.v1.unsorted_segment_sum(local_grad,
+                                                 result.get("restore_vector_second"),
+                                                 array_ops.shape(result.get("unique_keys"))[0])
 
     def _get_local_embeddings(self, table: Union[tf.compat.v1.Variable, tf.Tensor], result: dict,
                               feature_spec: FeatureSpec, **kwargs) -> tf.Tensor:
@@ -69,7 +72,7 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
             return sparse_forward_fn(local_embeddings)
 
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB, local_embeddings)
-        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get("id_offsets"))
+        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_UNIQUE_KEYS, result.get("unique_keys"))
         return sparse_forward_fn(local_embeddings)
 
 
diff --git a/mx_rec/core/emb/sparse_embedding.py b/mx_rec/core/emb/sparse_embedding.py
index 938f917d..d8ce63b1 100644
--- a/mx_rec/core/emb/sparse_embedding.py
+++ b/mx_rec/core/emb/sparse_embedding.py
@@ -53,8 +53,11 @@ class SparseEmbedding(BaseSparseEmbedding):
 
     def _get_update_grad(self, local_grad: tf.Tensor, result: dict,
                          table: Union[tf.compat.v1.Variable, tf.Tensor]) -> Union[tf.IndexedSlices, tf.Tensor]:
-        return ops.IndexedSlices(values=local_grad,
-                                 indices=result.get("id_offsets"),
+        unique_local_grad = tf.compat.v1.unsorted_segment_sum(local_grad,
+                                                              result.get("restore_vector_second"),
+                                                              array_ops.shape(result.get("unique_keys"))[0])
+        return ops.IndexedSlices(values=unique_local_grad,
+                                 indices=result.get("unique_keys"),
                                  dense_shape=tf.shape(table))
 
     def _get_local_embeddings(self, table: Union[tf.compat.v1.Variable, tf.Tensor], result: dict,
diff --git a/mx_rec/optimizers/adagrad.py b/mx_rec/optimizers/adagrad.py
index 4ba444a6..d99be3b3 100644
--- a/mx_rec/optimizers/adagrad.py
+++ b/mx_rec/optimizers/adagrad.py
@@ -76,16 +76,6 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
                                                 initial_accumulator_value=initial_accumulator_value,
                                                 use_locking=use_locking,
                                                 name=self.unique_name)
-        self._slot_num = 1
-        self._derivative = 2
-
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
 
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
@@ -131,11 +121,10 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
 
     def _apply_sparse(self, grad, var):
         acc = self.get_slot(var, "acc")
-        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
         return training_ops.sparse_apply_adagrad(
             var, acc, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
-            unique_local_grad,
-            unique_keys,
+            grad.values,
+            grad.indices,
             use_locking=self._use_locking)
 
     def _resource_apply_sparse(self, grad, var, indices):
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index 49594d40..a5d68a70 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -21,57 +21,12 @@ from __future__ import print_function
 
 from collections import defaultdict
 
-import tensorflow as tf
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
 from tensorflow.python.training.optimizer import _TensorProcessor
 
-from mx_rec.util.tf_version_adapter import npu_ops
-from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
 
 
-def get_restore_vector_second(table_name: str) -> tf.Tensor:
-    """
-    Get restore vector which is calculated after the second all2all
-    :param table_name: embedding table_name
-    :return: the restore vector calculated after the second all2all
-    """
-    channel_id = 0
-    logger.debug('Channel %s_restore_second_%s was built for getnext',
-                 table_name, channel_id)
-    with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
-        restore_vector_second = npu_ops.gen_npu_ops.get_next(
-            output_types=[tf.int32],
-            output_shapes=[[None]],
-            channel_name=f'{table_name}_restore_second_{channel_id}')[0]
-    return restore_vector_second
-
-
-def get_unique_keys(table_name: str, is_expansion: bool) -> tf.Tensor:
-    """
-    Get the global unique keys which is calculated after the second all2all
-    :param table_name: embedding table_name
-    :param is_expansion: use dynamic expansion
-    :return: the global unique keys calculated after the second all2all
-    """
-    channel_id = 0
-    logger.debug('Channel %s_uniquekeys_%s was built for getnext', table_name, channel_id)
-    with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
-        if is_expansion:
-            unique_keys = npu_ops.gen_npu_ops.get_next(
-                output_types=[tf.int64],
-                output_shapes=[[None]],
-                channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
-            return unique_keys
-
-        unique_keys = npu_ops.gen_npu_ops.get_next(
-            output_types=[tf.int32],
-            output_shapes=[[None]],
-            channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
-        return unique_keys
-
-
 class CustomizedOptimizer:
 
     name_counter = defaultdict(int)
@@ -80,25 +35,6 @@ class CustomizedOptimizer:
         self.unique_name = ""
         self.base_name = ""
 
-    @staticmethod
-    def sum_same_id_gradients(grad, var, is_expansion):
-        if isinstance(var, ops.Tensor):
-            # 扩容模式从scope获取表名,偏移是-2
-            table_name = var.op.name.split('/')[-2]
-        else:
-            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
-            table_name = table_instance.table_name
-        with tf.compat.v1.variable_scope("restore_vector_second"):
-            restore_vector_second = get_restore_vector_second(table_name)
-
-        with tf.compat.v1.variable_scope("unique_keys"):
-            unique_keys = get_unique_keys(table_name, is_expansion)
-
-        unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
-                                                              restore_vector_second,
-                                                              array_ops.shape(unique_keys)[0])
-        return unique_local_grad, unique_keys
-
     def initialize_slots(self, var, table_instance):
         raise NotImplementedError(f"Please define a specific realization on {self.__class__.__name__}")
 
diff --git a/mx_rec/optimizers/ftrl.py b/mx_rec/optimizers/ftrl.py
index 3659ffcd..5c68b929 100644
--- a/mx_rec/optimizers/ftrl.py
+++ b/mx_rec/optimizers/ftrl.py
@@ -80,16 +80,11 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
             l2_shrinkage_regularization_strength=kwargs.get("l2_shrinkage_regularization_strength", 0.0)
         )
         self._slot_num = 2
-        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         val = constant_op.constant(
             self._initial_accumulator_value, dtype=var.dtype, shape=var.get_shape())
@@ -140,19 +135,17 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
                 self._resource_scatter_nd_update)
 
     def _apply_sparse(self, grad, var):
-        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
-
         if self._l2_shrinkage_regularization_strength <= 0.0:
             return self._apply_sparse_shared(
-                unique_local_grad,
+                grad.values,
                 var,
-                unique_keys,
+                grad.indices,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
         else:
             return self._apply_sparse_shared_v2(
-                unique_local_grad,
+                grad.values,
                 var,
-                unique_keys,
+                grad.indices,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_update):
diff --git a/mx_rec/optimizers/gradient_descent.py b/mx_rec/optimizers/gradient_descent.py
index 2ba72789..6881d6ad 100644
--- a/mx_rec/optimizers/gradient_descent.py
+++ b/mx_rec/optimizers/gradient_descent.py
@@ -55,16 +55,11 @@ class CustomizedGradientDescent(gradient_descent.GradientDescentOptimizer, Custo
         super(CustomizedGradientDescent, self).__init__(learning_rate=learning_rate, use_locking=use_locking,
                                                         name=self.unique_name)
         self._slot_num = 0
-        self._derivative = 1
 
     @property
     def slot_num(self):
         return self._slot_num
 
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         return []
 
diff --git a/mx_rec/optimizers/gradient_descent_by_addr.py b/mx_rec/optimizers/gradient_descent_by_addr.py
index 11a9fda6..22b33852 100644
--- a/mx_rec/optimizers/gradient_descent_by_addr.py
+++ b/mx_rec/optimizers/gradient_descent_by_addr.py
@@ -60,16 +60,11 @@ class CustomizedGradientDescentByAddr(gradient_descent.GradientDescentOptimizer,
                                                               name=self.unique_name)
 
         self._slot_num = 0
-        self._derivative = 1
 
     @property
     def slot_num(self):
         return self._slot_num
 
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         return []
 
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index bab8245f..d79b6d23 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -72,16 +72,11 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         super(CustomizedLazyAdam, self).__init__(learning_rate=learning_rate, beta1=beta1, beta2=beta2,
                                                  epsilon=epsilon, use_locking=use_locking, name=self.unique_name)
         self._slot_num = 2
-        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
         def creat_one_single_slot(var, op_name):
@@ -149,11 +144,10 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
             self._resource_scatter_nd_add)
 
     def _apply_sparse(self, grad, var):
-        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
         return self._apply_sparse_shared(
-            unique_local_grad,
+            grad.values,
             var,
-            unique_keys,
+            grad.indices,
             lambda x, i, v: tf.compat.v1.scatter_nd_add(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_add):
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index cd4ee878..92252824 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -73,16 +73,11 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
                                                           name=self.unique_name)
 
         self._slot_num = 2
-        self._derivative = 2
 
     @property
     def slot_num(self):
         return self._slot_num
 
-    @property
-    def derivative(self):
-        return self._derivative
-
     def get_slot_init_values(self):
         # return state value list of adam that needs to initialize in ASC DDR.
         initial_momentum_value = 0.0
@@ -114,10 +109,9 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         return temp
 
     def _apply_sparse(self, grad, addr):
-        unique_local_grad, unique_addr = self.sum_same_id_gradients(grad=grad, var=addr, is_expansion=True)
         return self._apply_sparse_shared(
-            unique_local_grad,
-            unique_addr)
+            grad,
+            addr)
 
     def _apply_sparse_shared(self, grad, addr):
         power_b1, power_b2 = self._get_beta_accumulators()
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index eb618f40..894dc230 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -704,7 +704,8 @@ bool HybridMgmt::ParseKeysHBM(int channelId, int& batchId)
         LOG_DEBUG("channelId:{} batchId:{}, sendLookupSyncTC(ms):{}", channelId, batchId, sendLookupSyncTC.ElapsedMS());
 
         // 训练时，使用全局去重聚合梯度，发送全局去重的key和对应的恢复向量
-        if (mgmtRankInfo.useSumSameIdGradients && channelId == TRAIN_CHANNEL_ID) {
+        if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
+            channelId == TRAIN_CHANNEL_ID) {
             SendUniqKeysAndRestoreVecHBM(channelId, batchId, embInfo, infoVecs);
         }
 
@@ -863,7 +864,8 @@ bool HybridMgmt::ProcessEmbInfo(const std::string& embName, int batchId, int cha
     LOG_DEBUG("channelId:{} batchId:{}, hostHashMapProcessTC(ms):{}",
               channelId, batchId, hostHashMapProcessTC.ElapsedMS());
 
-    if (mgmtRankInfo.useSumSameIdGradients && channelId == TRAIN_CHANNEL_ID && remainBatchOut) {
+    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
+        channelId == TRAIN_CHANNEL_ID && remainBatchOut) {
         SendUniqKeysAndRestoreVecDDR(embName, batchId, channelId, ddrParam);
     }
 
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index b72f3c8e..f76f6907 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -470,9 +470,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
 void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
 {
-    LOG_INFO(KEY_PROCESS "rank:{}, channel:{}, useSumSameIdGradients:{} ...",
-             rankInfo.rankId, channel, rankInfo.useSumSameIdGradients);
-    if (rankInfo.useSumSameIdGradients && channel == TRAIN_CHANNEL_ID) {
+    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
+        channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
 
diff --git a/src/core/utils/common.cpp b/src/core/utils/common.cpp
index 9512b181..38e64444 100644
--- a/src/core/utils/common.cpp
+++ b/src/core/utils/common.cpp
@@ -47,7 +47,6 @@ namespace MxRec {
         useStatic = static_cast<unsigned int>(option) bitand HybridOption::USE_STATIC;
         useHot = static_cast<unsigned int>(option) bitand HybridOption::USE_HOT;
         useDynamicExpansion = static_cast<unsigned int>(option) bitand HybridOption::USE_DYNAMIC_EXPANSION;
-        useSumSameIdGradients = static_cast<unsigned int>(option) bitand HybridOption::USE_SUM_SAME_ID_GRADIENTS;
     }
 
     RankInfo::RankInfo(int localRankSize, int option, const vector<int>& maxStep)
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 9706a699..f6c3de3f 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -115,10 +115,9 @@ namespace MxRec {
     using TensorInfoT = std::tuple<int, EmbNameT, std::list<std::unique_ptr<std::vector<Tensor>>>::iterator>;
 
     namespace HybridOption {
-        const unsigned int USE_STATIC = 0x0001;
-        const unsigned int USE_HOT = 0x0001 << 1;
-        const unsigned int USE_DYNAMIC_EXPANSION = 0x0001 << 2;
-        const unsigned int USE_SUM_SAME_ID_GRADIENTS = 0x0001 << 3;
+        const unsigned int USE_STATIC = 0x001;
+        const unsigned int USE_HOT = 0x001 << 1;
+        const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 2;
     };
 
     string GetChipName(int devID);
@@ -227,7 +226,6 @@ namespace MxRec {
         bool isDDR { false };
         bool isSSDEnabled { false };
         bool useDynamicExpansion {false};
-        bool useSumSameIdGradients {true};
         std::vector<int> ctrlSteps; // 包含三个步数: train_steps, eval_steps, save_steps
     };
 
diff --git a/src/core/utils/config.cpp b/src/core/utils/config.cpp
index 57478553..9cfec739 100644
--- a/src/core/utils/config.cpp
+++ b/src/core/utils/config.cpp
@@ -20,7 +20,13 @@ See the License for the specific language governing permissions and
 using namespace std;
 
 namespace MxRec {
+    namespace ApplyGradientsStrategyOptions {
+        const std::string DIRECT_APPLY = "direct_apply";
+        const std::string SUM_SAME_ID_GRADIENTS_AND_APPLY = "sum_same_id_gradients_and_apply";
+    };
+
     // 设置环境变量默认值
+    string GlobalEnv::applyGradientsStrategy = ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY;
     int GlobalEnv::aclTimeout = -1; // 默认阻塞方式，一直等待直到数据接收完成。
     int GlobalEnv::hdChannelSize = 40; // 默认通道深度40
     int GlobalEnv::keyProcessThreadNum = 6; // 默认6个线程
@@ -36,6 +42,12 @@ namespace MxRec {
     /// 配置环境变量，Python侧已经做了变量值校验，CPP侧直接使用即可；bool类型，1代表true，0代表false
     void ConfigGlobalEnv()
     {
+        // 设置梯度策略
+        const char *envStrategy = getenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY);
+        if (envStrategy != nullptr) {
+            GlobalEnv::applyGradientsStrategy = envStrategy;
+        }
+
         // 设置ACL超时时间
         const char *envAclTimeout = getenv(RecEnvNames::ACL_TIMEOUT);
         if (envAclTimeout != nullptr) {
@@ -105,8 +117,9 @@ namespace MxRec {
 
     void LogGlobalEnv()
     {
-        LOG_DEBUG("Environment variables are: [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], "
+        LOG_DEBUG("Environment variables are: [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], "
                   "[{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}], [{}: {}]",
+                  RecEnvNames::APPLY_GRADIENTS_STRATEGY, GlobalEnv::applyGradientsStrategy,
                   RecEnvNames::ACL_TIMEOUT, GlobalEnv::aclTimeout,
                   RecEnvNames::HD_CHANNEL_SIZE, GlobalEnv::hdChannelSize,
                   RecEnvNames::KEY_PROCESS_THREAD_NUM, GlobalEnv::keyProcessThreadNum,
diff --git a/src/core/utils/config.h b/src/core/utils/config.h
index 3ecb4c36..4c56c0d4 100644
--- a/src/core/utils/config.h
+++ b/src/core/utils/config.h
@@ -20,6 +20,7 @@ See the License for the specific language governing permissions and
 
 namespace MxRec {
     namespace RecEnvNames {
+        const char *const APPLY_GRADIENTS_STRATEGY = "APPLY_GRADIENTS_STRATEGY";
         const char *const ACL_TIMEOUT = "AclTimeout";
         const char *const HD_CHANNEL_SIZE = "HD_CHANNEL_SIZE";
         const char *const KEY_PROCESS_THREAD_NUM = "KEY_PROCESS_THREAD_NUM";
@@ -33,7 +34,13 @@ namespace MxRec {
         const char *const RECORD_KEY_COUNT = "RECORD_KEY_COUNT";
     };
 
+    namespace ApplyGradientsStrategyOptions {
+        extern const std::string DIRECT_APPLY;
+        extern const std::string SUM_SAME_ID_GRADIENTS_AND_APPLY;
+    };
+
     struct GlobalEnv {
+        static std::string applyGradientsStrategy;
         static int aclTimeout;
         static int hdChannelSize;
         static int keyProcessThreadNum;
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index cb128a15..4a08f992 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -81,8 +81,6 @@ namespace {
 
         m.attr("USE_DYNAMIC_EXPANSION") = py::int_(HybridOption::USE_DYNAMIC_EXPANSION);
 
-        m.attr("USE_SUM_SAME_ID_GRADIENTS") = py::int_(HybridOption::USE_SUM_SAME_ID_GRADIENTS);
-
         GetRankInfo(m);
 
         GetEmbInfoParams(m);
diff --git a/src/tests/utils/config_test.cpp b/src/tests/utils/config_test.cpp
index 54e0ec67..d7e51b57 100644
--- a/src/tests/utils/config_test.cpp
+++ b/src/tests/utils/config_test.cpp
@@ -24,6 +24,7 @@ using namespace MxRec;
 
 void SetEnvironmentVariables()
 {
+    setenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY, "sum_same_id_gradients_and_apply", 1);
     setenv(RecEnvNames::ACL_TIMEOUT, "100", 1);
     setenv(RecEnvNames::HD_CHANNEL_SIZE, "50", 1);
     setenv(RecEnvNames::KEY_PROCESS_THREAD_NUM, "8", 1);
@@ -39,6 +40,7 @@ void SetEnvironmentVariables()
 
 void UnsetEnvironmentVariables()
 {
+    unsetenv(RecEnvNames::APPLY_GRADIENTS_STRATEGY);
     unsetenv(RecEnvNames::ACL_TIMEOUT);
     unsetenv(RecEnvNames::HD_CHANNEL_SIZE);
     unsetenv(RecEnvNames::KEY_PROCESS_THREAD_NUM);
@@ -54,6 +56,7 @@ void UnsetEnvironmentVariables()
 
 TEST(GlobalEnv, DefaultValues)
 {
+    ASSERT_EQ(GlobalEnv::applyGradientsStrategy, ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY);
     ASSERT_EQ(GlobalEnv::aclTimeout, -1);
     ASSERT_EQ(GlobalEnv::hdChannelSize, 40);
     ASSERT_EQ(GlobalEnv::keyProcessThreadNum, 6);
@@ -74,6 +77,7 @@ TEST(GlobalEnv, ConfigGlobalEnv)
     ConfigGlobalEnv();
 
     // 验证环境变量是否已经被正确配置
+    ASSERT_EQ(GlobalEnv::applyGradientsStrategy, "sum_same_id_gradients_and_apply");
     ASSERT_EQ(GlobalEnv::aclTimeout, 100);
     ASSERT_EQ(GlobalEnv::hdChannelSize, 50);
     ASSERT_EQ(GlobalEnv::keyProcessThreadNum, 8);
diff --git a/tests/mx_rec/core/mock_class.py b/tests/mx_rec/core/mock_class.py
index 04c9ae56..7566aa1a 100644
--- a/tests/mx_rec/core/mock_class.py
+++ b/tests/mx_rec/core/mock_class.py
@@ -208,7 +208,6 @@ class MockOptimizer:
 
     def __init__(self):
         self.slot_num = 2
-        self.derivative = 2
 
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
diff --git a/tests/mx_rec/core/test_build_graph.py b/tests/mx_rec/core/test_build_graph.py
index 14913cf7..c15d851f 100644
--- a/tests/mx_rec/core/test_build_graph.py
+++ b/tests/mx_rec/core/test_build_graph.py
@@ -156,6 +156,84 @@ class TestGetIdOffsetsFunc(unittest.TestCase):
             self.assertEqual(swap_len, 0)
 
 
+class TestGetRestoreVectorSecondFunc(unittest.TestCase):
+    """
+    Test for 'mx_rec.core.asc.build_graph.get_restore_vector_second'.
+    """
+
+    def setUp(self):
+        # 默认动态扩容、hot emb、HBM
+        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_hot=True, use_dynamic_expansion=True)
+        self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
+
+    def tearDown(self):
+        # 恢复config
+        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_hot=True, use_dynamic_expansion=True)
+
+    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
+    def test_get_restore_vector_second(self, mock_get_next):
+        """
+        case: test get_restore_vector_second
+        """
+
+        from mx_rec.core.asc.build_graph import get_restore_vector_second
+
+        with tf.Graph().as_default():
+            mock_get_next.return_value = [0]
+            restore_vector_second = get_restore_vector_second(self.max_lookup_vec_size, self.config)
+            self.assertEqual(restore_vector_second, 0)
+
+
+class TestGetUniqueKeysFunc(unittest.TestCase):
+    """
+    Test for 'mx_rec.core.asc.build_graph.get_unique_keys'.
+    """
+
+    def setUp(self):
+        # 默认动态扩容、hot emb、HBM
+        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_hot=True, use_dynamic_expansion=True)
+        self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
+
+    def tearDown(self):
+        # 恢复config
+        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_hot=True, use_dynamic_expansion=True)
+
+    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
+    def test_get_unique_keys_case1(self, mock_get_next):
+        """
+        case1: 动态扩容
+        """
+
+        from mx_rec.core.asc.build_graph import get_unique_keys
+
+        with tf.Graph().as_default():
+            mock_get_next.return_value = [0]
+            unique_keys = get_unique_keys(self.max_lookup_vec_size, self.config)
+            self.assertEqual(unique_keys, 0)
+
+    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
+    def test_get_unique_keys_case2(self, mock_get_next):
+        """
+        case2: 非动态扩容
+        """
+
+        from mx_rec.core.asc.build_graph import get_unique_keys
+
+        with tf.Graph().as_default():
+            self.config["use_dynamic_expansion"] = False
+            mock_get_next.return_value = [1]
+            unique_keys = get_unique_keys(self.max_lookup_vec_size, self.config)
+            self.assertEqual(unique_keys, 1)
+
+
 class TestGetAll2allArgsFunc(unittest.TestCase):
     """
     Test for 'mx_rec.core.asc.build_graph.get_all2all_args'.
@@ -268,12 +346,15 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
                            use_hot=True, use_dynamic_expansion=True)
+        global_env.apply_gradients_strategy = "direct_apply"
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0),
+                         get_restore_vector_second=mock.MagicMock(return_value=0),
+                         get_unique_keys=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case1(self, build_graph_config_initializer):
         """
@@ -282,18 +363,23 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
+        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer(use_static=True)
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
+            self.assertIsNotNone(result.get("restore_vector_second"))
+            self.assertIsNotNone(result.get("unique_keys"))
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0),
+                         get_restore_vector_second=mock.MagicMock(return_value=0),
+                         get_unique_keys=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case2(self, build_graph_config_initializer):
         """
@@ -302,18 +388,23 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
+        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer()
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
+            self.assertIsNotNone(result.get("restore_vector_second"))
+            self.assertIsNotNone(result.get("unique_keys"))
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
                          get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0))
+                         get_swap_info=mock.MagicMock(return_value=0),
+                         get_restore_vector_second=mock.MagicMock(return_value=0),
+                         get_unique_keys=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case3(self, build_graph_config_initializer):
         """
@@ -322,6 +413,7 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
         from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 
+        global_env.apply_gradients_strategy = "sum_same_id_gradients_and_apply"
         with tf.Graph().as_default():
             mock_config_initializer = MockConfigInitializer()
             build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
@@ -329,6 +421,7 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
             self.config["channel_id"] = 1
             result = get_preprocessed_tensor_for_asc(None, self.config)
             self.assertIsNotNone(result.get("restore_vector"))
+            self.assertIsNone(result.get("restore_vector_second"))
 
 
 if __name__ == '__main__':
diff --git a/tests/mx_rec/core/test_manager.py b/tests/mx_rec/core/test_manager.py
index ffa8b09e..815ad843 100644
--- a/tests/mx_rec/core/test_manager.py
+++ b/tests/mx_rec/core/test_manager.py
@@ -385,7 +385,6 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
                          USE_STATIC=mock.MagicMock(return_value=0),
                          USE_HOT=mock.MagicMock(return_value=1),
                          USE_DYNAMIC_EXPANSION=mock.MagicMock(return_value=2),
-                         USE_SUM_SAME_ID_GRADIENTS=mock.MagicMock(return_value=4),
                          RankInfo=mock.MagicMock(return_value="mock_info"),
                          HybridMgmt=mock.MagicMock(return_value=MockHybridMgmt(is_initialized=False)))
     @mock.patch("mx_rec.core.asc.manager.ConfigInitializer")
@@ -399,9 +398,6 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
         mock_config_initializer = MockConfigInitializer(use_static=True, use_dynamic_expansion=True)
         manager_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
-        mock_opt = MockOptimizer()
-        manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
-
         with self.assertRaises(RuntimeError):
             initialize_emb_cache([], [])
 
@@ -412,7 +408,6 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
                          USE_STATIC=mock.MagicMock(return_value=0),
                          USE_HOT=mock.MagicMock(return_value=1),
                          USE_DYNAMIC_EXPANSION=mock.MagicMock(return_value=2),
-                         USE_SUM_SAME_ID_GRADIENTS=mock.MagicMock(return_value=4),
                          RankInfo=mock.MagicMock(return_value="mock_info"))
     @mock.patch("mx_rec.core.asc.manager.ConfigInitializer")
     @mock.patch("mx_rec.core.asc.manager.HybridMgmt")
@@ -426,9 +421,6 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
         mock_config_initializer = MockConfigInitializer(use_static=True, use_dynamic_expansion=True)
         manager_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
 
-        mock_opt = MockOptimizer()
-        manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
-
         mock_mgmt = MockHybridMgmt(is_initialized=True)
         mock_hybrid_mgmt.return_value = mock_mgmt
         initialize_emb_cache([], [])
-- 
Gitee


From 1dffba4b0fd93598d9b0cd903b3dcacc009e732a Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Tue, 16 Apr 2024 15:11:35 +0800
Subject: [PATCH 035/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/key_process/key_process.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 1d922cee..b6e4d5fb 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -48,7 +48,6 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     
     SetupHotEmbUpdateStep();
     
-
     map<EmbNameT, int> scInfo;
     for (const auto& info: eInfos) {
         embInfos[info.name] = info;
@@ -385,7 +384,6 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
     uniqueInfo.hotPos.resize(hotEmbTotCount[batch->name], -1);
     tensors->push_back(Vec2TensorI32(uniqueInfo.hotPos));
     
-
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(move(tensors), uniqueInfo.all2AllInfo.keyRecv, channel);
         tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv) :
@@ -447,7 +445,6 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     hotPos.resize(hotEmbTotCount[batch->name], 0);
     tensors->push_back(Vec2TensorI32(hotPos));
     
-
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(tensors, lookupKeys, channel);
         tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys) : Vec2TensorI32(lookupKeys));
-- 
Gitee


From 0f38f2118d4fcafe2f9707b4e288cf849ba3c256 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Tue, 16 Apr 2024 15:29:15 +0800
Subject: [PATCH 036/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/key_process/key_process.cpp | 2 +-
 src/core/utils/common.h              | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index b6e4d5fb..98df97ed 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -650,7 +650,7 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, Uniqu
     ComputeHotPos(batch, hotMap, uniqueInfoOut.hotPos, uniqueInfoOut.restore, hotOffset);
     LOG_DEBUG("ComputeHot TimeCost(ms):{}", computeHotTc.ElapsedMS());
     UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount,
-                            hotOffset, batch->batchId % hotEmbUpdateStep == 0, batch->name);
+                          hotOffset, batch->batchId % hotEmbUpdateStep == 0, batch->name);
 
     if (rankInfo.useStatic) {
         sc.resize(rankInfo.rankSize, embInfos[batch->name].sendCount);
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 9ce80073..99184fed 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -148,7 +148,11 @@ namespace MxRec {
             {"910B2", UBSize::ASCEND910_B2},
             {"910B3", UBSize::ASCEND910_B3},
             {"910B4", UBSize::ASCEND910_B4},
-            {"910B2C", UBSize::ASCEND910_B2C}};
+            {"910B2C", UBSize::ASCEND910_B2C},
+            {"910C1", UBSize::ASCEND910_C1},
+            {"910C2", UBSize::ASCEND910_C1},
+            {"910C3", UBSize::ASCEND910_C3}
+            };
         auto it = chipUbSizeList.find(GetChipName(devID));
         if (it != chipUbSizeList.end()) {
             return it->second;
-- 
Gitee


From 660c945582872750c98b99b494834d83155bb914 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Tue, 16 Apr 2024 20:23:36 +0800
Subject: [PATCH 037/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo/main.py           |  4 +--
 examples/demo/little_demo/run.sh            |  1 -
 examples/demo/little_demo_estimator/main.py |  4 +--
 examples/demo/little_demo_estimator/run.sh  |  1 -
 src/core/utils/common.h                     |  3 ++-
 src/tests/key_process/key_process_test.cpp  |  8 +++++-
 tests/mx_rec/core/mock_class.py             |  1 -
 tests/mx_rec/core/test_build_graph.py       | 28 ++++++++++-----------
 tests/mx_rec/core/test_manager.py           |  2 --
 tools/atomic/sparse_lookup.py               |  3 +--
 tools/atomic/sparse_lookup_with_grad.py     |  3 +--
 11 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index 05d6896f..5d5e151e 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -187,14 +187,13 @@ if __name__ == "__main__":
     # get init configuration
     try:
         use_dynamic = bool(int(os.getenv("USE_DYNAMIC", 0)))
-        use_hot = bool(int(os.getenv("USE_HOT", 0)))
         use_dynamic_expansion = bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0)))
         use_multi_lookup = bool(int(os.getenv("USE_MULTI_LOOKUP", 1)))
         MODIFY_GRAPH_FLAG = bool(int(os.getenv("USE_MODIFY_GRAPH", 0)))
         USE_TIMESTAMP = bool(int(os.getenv("USE_TIMESTAMP", 0)))
         USE_ONE_SHOT = bool(int(os.getenv("USE_ONE_SHOT", 0)))
     except ValueError as err:
-        raise ValueError("please correctly config USE_MPI or USE_DYNAMIC or USE_HOT or USE_DYNAMIC_EXPANSION or "
+        raise ValueError("please correctly config USE_MPI or USE_DYNAMIC or USE_DYNAMIC_EXPANSION or "
                          "USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT "
                          "only 0 or 1 is supported.") from err
 
@@ -218,7 +217,6 @@ if __name__ == "__main__":
          eval_steps=EVAL_STEPS,
          save_steps=SAVING_INTERVAL,
          use_dynamic=use_dynamic,
-         use_hot=use_hot,
          use_dynamic_expansion=use_dynamic_expansion,
          if_load=if_load)
 
diff --git a/examples/demo/little_demo/run.sh b/examples/demo/little_demo/run.sh
index ab74adb2..e0d1766b 100644
--- a/examples/demo/little_demo/run.sh
+++ b/examples/demo/little_demo/run.sh
@@ -104,7 +104,6 @@ export USE_MPI=1
 
 ################# 参数配置 ######################
 export USE_DYNAMIC=1            # 0：静态shape；1：动态shape
-export USE_HOT=0                # 0：关闭hot emb；1: 开启hot emb
 export USE_DYNAMIC_EXPANSION=0  # 0：关闭动态扩容；1: 开启动态扩容
 export USE_MULTI_LOOKUP=1       # 0：一表一查；1：一表多查
 export MULTI_LOOKUP_TIMES=2     # 一表多查次数：默认2，上限127（因为一表已经有一查）；仅当export USE_MULTI_LOOKUP=1时生效
diff --git a/examples/demo/little_demo_estimator/main.py b/examples/demo/little_demo_estimator/main.py
index 5c3c94d1..8df1420c 100644
--- a/examples/demo/little_demo_estimator/main.py
+++ b/examples/demo/little_demo_estimator/main.py
@@ -158,7 +158,6 @@ if __name__ == '__main__':
     # get init configuration
     try:
         use_dynamic = bool(int(os.getenv("USE_DYNAMIC", 0)))
-        use_hot = bool(int(os.getenv("USE_HOT", 0)))
         use_dynamic_expansion = bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0)))
         use_multi_lookup = bool(int(os.getenv("USE_MULTI_LOOKUP", 1)))
         MODIFY_GRAPH_FLAG = bool(int(os.getenv("USE_MODIFY_GRAPH", 0)))
@@ -166,7 +165,7 @@ if __name__ == '__main__':
         args.use_one_shot = bool(int(os.getenv("USE_ONE_SHOT", 0)))
         args.enable_push_ops_test = bool(int(os.getenv("ENABLE_PUSH_OPS_TEST", 0)))
     except ValueError as err:
-        raise ValueError("please correctly config USE_MPI or USE_DYNAMIC or USE_HOT or USE_DYNAMIC_EXPANSION or "
+        raise ValueError("please correctly config USE_MPI or USE_DYNAMIC or USE_DYNAMIC_EXPANSION or "
                          "USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT "
                          "only 0 or 1 is supported.") from err
 
@@ -187,7 +186,6 @@ if __name__ == '__main__':
     init(train_steps=args.train_steps,
          eval_steps=args.eval_steps,
          use_dynamic=use_dynamic,
-         use_hot=use_hot,
          use_dynamic_expansion=use_dynamic_expansion)
 
     args.model_dir = f"{args.model_ckpt_dir}_rank"
diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index 33770e59..373b3535 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -93,7 +93,6 @@ fi
 
 ################# 参数配置 ######################
 export USE_DYNAMIC=1            # 0：静态shape；1：动态shape
-export USE_HOT=1                # 0：关闭hot emb；1: 开启hot emb
 export USE_DYNAMIC_EXPANSION=0  # 0：关闭动态扩容；1: 开启动态扩容
 export USE_MULTI_LOOKUP=1       # 0：一表一查；1：一表多查
 export MULTI_LOOKUP_TIMES=2     # 一表多查次数：默认2，上限127（因为一表已经有一查）；仅当export USE_MULTI_LOOKUP=1时生效
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 99184fed..0861cdfc 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -116,7 +116,8 @@ namespace MxRec {
 
     namespace HybridOption {
         const unsigned int USE_STATIC = 0x001;
-        const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 2;
+        const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 1
+        ;
     };
 
     string GetChipName(int devID);
diff --git a/src/tests/key_process/key_process_test.cpp b/src/tests/key_process/key_process_test.cpp
index 6b06dc30..e2d289f4 100644
--- a/src/tests/key_process/key_process_test.cpp
+++ b/src/tests/key_process/key_process_test.cpp
@@ -658,6 +658,11 @@ TEST_F(KeyProcessTest, KeyProcessTaskHelper)
     ASSERT_EQ(CheckMatrixTensor(*all2all, allExpectAll2all), true);
     ASSERT_EQ(CheckFlatTensor({infoVecs->back()}, allExpectOffset[worldRank]), true);
     infoVecs->pop_back();
+    int64_t hotPosition = process.hotEmbTotCount[batch->name];
+    vector<int64_t> expectRestore(allExpectRestore[worldRank].size());
+    for(int i=0; i<expectRestore.size();i++) {
+        expectRestore[i] = allExpectRestore[wordRank][i] + hotPosition;
+    }
     ASSERT_EQ(CheckFlatTensor(*infoVecs, allExpectRestore[worldRank]), true);
     LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, normal status success", rankInfo.rankId, batch->batchId);
     // 测试batchId错误
@@ -711,9 +716,10 @@ TEST_F(KeyProcessTest, KeyProcessTaskHelperDDR)
     auto tmpTensor = (*infoVecs).at(0);
     auto tmpData = tmpTensor.flat<int32>();
 
+    int64_t hotPosition = process.hotEmbTotCount[batch->name];
     vector<int> actualGetRestore(col);
     for (int j = 0; j < col; j++) {
-        actualGetRestore[j] = tmpData(j);
+        actualGetRestore[j] = tmpData(j)-hotPosition;
     }
     LOG_INFO("KeyProcessTaskHelperDDR, rankid: {}, batchid: {}, Restore: {}",
              rankInfo.rankId, batch->batchId, VectorToString(actualGetRestore));
diff --git a/tests/mx_rec/core/mock_class.py b/tests/mx_rec/core/mock_class.py
index 7566aa1a..1e3e7ba1 100644
--- a/tests/mx_rec/core/mock_class.py
+++ b/tests/mx_rec/core/mock_class.py
@@ -121,7 +121,6 @@ class MockConfigInitializer:
     def __init__(self, **kwargs):
         self.use_dynamic_expansion = kwargs.get("use_dynamic_expansion", False)
         self.use_static = kwargs.get("use_static", False)
-        self.use_hot = kwargs.get("use_static", True)
         self.modify_graph = kwargs.get("modify_graph", True)
         self.max_steps = kwargs.get("max_steps", -1)
         self.train_steps = kwargs.get("get_train_steps", -1)
diff --git a/tests/mx_rec/core/test_build_graph.py b/tests/mx_rec/core/test_build_graph.py
index c15d851f..08b66c55 100644
--- a/tests/mx_rec/core/test_build_graph.py
+++ b/tests/mx_rec/core/test_build_graph.py
@@ -33,13 +33,13 @@ class TestGetRestoreVectorFunc(unittest.TestCase):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     def test_get_restore_vector_case1(self):
         """
@@ -115,14 +115,14 @@ class TestGetIdOffsetsFunc(unittest.TestCase):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
         self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
     def test_get_id_offsets_case1(self, mock_get_next):
@@ -165,14 +165,14 @@ class TestGetRestoreVectorSecondFunc(unittest.TestCase):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
         self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
     def test_get_restore_vector_second(self, mock_get_next):
@@ -197,14 +197,14 @@ class TestGetUniqueKeysFunc(unittest.TestCase):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
         self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
     def test_get_unique_keys_case1(self, mock_get_next):
@@ -243,13 +243,13 @@ class TestGetAll2allArgsFunc(unittest.TestCase):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     def test_get_all2all_args_case1(self):
         """
@@ -285,13 +285,13 @@ class TestGetSwapInfoFunc(unittest.TestCase):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_swap_info_case1(self, build_graph_config_initializer):
@@ -339,13 +339,13 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
                            feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_hot=True, use_dynamic_expansion=True)
+                           use_dynamic_expansion=True)
         global_env.apply_gradients_strategy = "direct_apply"
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
diff --git a/tests/mx_rec/core/test_manager.py b/tests/mx_rec/core/test_manager.py
index 815ad843..a14db367 100644
--- a/tests/mx_rec/core/test_manager.py
+++ b/tests/mx_rec/core/test_manager.py
@@ -383,7 +383,6 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
                          get_device_id=mock.MagicMock(return_value=0),
                          get_rank_size=mock.MagicMock(return_value=0),
                          USE_STATIC=mock.MagicMock(return_value=0),
-                         USE_HOT=mock.MagicMock(return_value=1),
                          USE_DYNAMIC_EXPANSION=mock.MagicMock(return_value=2),
                          RankInfo=mock.MagicMock(return_value="mock_info"),
                          HybridMgmt=mock.MagicMock(return_value=MockHybridMgmt(is_initialized=False)))
@@ -406,7 +405,6 @@ class TestInitializeEmbCacheFunc(unittest.TestCase):
                          get_device_id=mock.MagicMock(return_value=0),
                          get_rank_size=mock.MagicMock(return_value=0),
                          USE_STATIC=mock.MagicMock(return_value=0),
-                         USE_HOT=mock.MagicMock(return_value=1),
                          USE_DYNAMIC_EXPANSION=mock.MagicMock(return_value=2),
                          RankInfo=mock.MagicMock(return_value="mock_info"))
     @mock.patch("mx_rec.core.asc.manager.ConfigInitializer")
diff --git a/tools/atomic/sparse_lookup.py b/tools/atomic/sparse_lookup.py
index 570c683e..73ff7f33 100644
--- a/tools/atomic/sparse_lookup.py
+++ b/tools/atomic/sparse_lookup.py
@@ -28,7 +28,6 @@ from sparse_ops.config import set_ascend_env
 
 USE_PIPELINE_TEST = False
 USE_STATIC = False
-USE_HOT = False
 USE_EXPANSION = False
 
 from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
@@ -171,7 +170,7 @@ if __name__ == '__main__':
     host_vocab_size = 0
 
     init(True, rank_id=rank_id, rank_size=local_rank_size, train_interval=100, eval_steps=-1,
-         prefetch_batch_number=1, use_dynamic=0, use_hot=1, use_dynamic_expansion=0)
+         prefetch_batch_number=1, use_dynamic=0, use_dynamic_expansion=0)
 
     tf.disable_eager_execution()
     ######################################
diff --git a/tools/atomic/sparse_lookup_with_grad.py b/tools/atomic/sparse_lookup_with_grad.py
index 3d7d37e5..26633abe 100644
--- a/tools/atomic/sparse_lookup_with_grad.py
+++ b/tools/atomic/sparse_lookup_with_grad.py
@@ -28,7 +28,6 @@ from sparse_ops.config import set_ascend_env
 
 USE_PIPELINE_TEST = False
 USE_STATIC = False
-USE_HOT = False
 USE_EXPANSION = False
 
 
@@ -173,7 +172,7 @@ if __name__ == '__main__':
     host_vocab_size = 0
 
     init(True, rank_id=rank_id, rank_size=local_rank_size, train_interval=100, eval_steps=-1,
-         prefetch_batch_number=1, use_dynamic=0, use_hot=1, use_dynamic_expansion=0)
+         prefetch_batch_number=1, use_dynamic=0, use_dynamic_expansion=0)
 
     tf.disable_eager_execution()
     ######################################
-- 
Gitee


From e99ae38e3a0503b0720a31e6d3490fa9e4a2e827 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Tue, 16 Apr 2024 20:27:23 +0800
Subject: [PATCH 038/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/key_process/key_process_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/key_process/key_process_test.cpp b/src/tests/key_process/key_process_test.cpp
index e2d289f4..5cc4b90b 100644
--- a/src/tests/key_process/key_process_test.cpp
+++ b/src/tests/key_process/key_process_test.cpp
@@ -663,7 +663,7 @@ TEST_F(KeyProcessTest, KeyProcessTaskHelper)
     for(int i=0; i<expectRestore.size();i++) {
         expectRestore[i] = allExpectRestore[wordRank][i] + hotPosition;
     }
-    ASSERT_EQ(CheckFlatTensor(*infoVecs, allExpectRestore[worldRank]), true);
+    ASSERT_EQ(CheckFlatTensor(*infoVecs, expectRestore), true);
     LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, normal status success", rankInfo.rankId, batch->batchId);
     // 测试batchId错误
     HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
-- 
Gitee


From bc26150ce17f739c7479e2f0b55fa84464fb7247 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Tue, 16 Apr 2024 20:32:24 +0800
Subject: [PATCH 039/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/key_process/key_process_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tests/key_process/key_process_test.cpp b/src/tests/key_process/key_process_test.cpp
index 5cc4b90b..f84dfba9 100644
--- a/src/tests/key_process/key_process_test.cpp
+++ b/src/tests/key_process/key_process_test.cpp
@@ -660,8 +660,8 @@ TEST_F(KeyProcessTest, KeyProcessTaskHelper)
     infoVecs->pop_back();
     int64_t hotPosition = process.hotEmbTotCount[batch->name];
     vector<int64_t> expectRestore(allExpectRestore[worldRank].size());
-    for(int i=0; i<expectRestore.size();i++) {
-        expectRestore[i] = allExpectRestore[wordRank][i] + hotPosition;
+    for(int i=0; i<expectRestore.size(); i++) {
+        expectRestore[i] = allExpectRestore[worldRank][i] + hotPosition;
     }
     ASSERT_EQ(CheckFlatTensor(*infoVecs, expectRestore), true);
     LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, normal status success", rankInfo.rankId, batch->batchId);
-- 
Gitee


From a19a1e699423da0bc3ade7cc2a6594d31bfd0103 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Tue, 16 Apr 2024 20:54:23 +0800
Subject: [PATCH 040/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/key_process/key_process_test.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/tests/key_process/key_process_test.cpp b/src/tests/key_process/key_process_test.cpp
index f84dfba9..8bb21dcd 100644
--- a/src/tests/key_process/key_process_test.cpp
+++ b/src/tests/key_process/key_process_test.cpp
@@ -23,6 +23,7 @@ See the License for the specific language governing permissions and
 #include "ock_ctr_common/include/unique.h"
 #include "ock_ctr_common/include/error_code.h"
 #include "emb_table/embedding_mgmt.h"
+#include "emock/emock.hpp"
 
 using namespace std;
 using namespace MxRec;
@@ -60,6 +61,9 @@ class KeyProcessTest : public testing::Test {
 protected:
     void SetUp()
     {
+        int defaultUBSize = 196608;
+        EMOCK(GetUBSize).stubs().with(any()).will(returnValue(defaultUBSize));
+
         int claimed;
         MPI_Query_thread(&claimed);
         ASSERT_EQ(claimed, MPI_THREAD_MULTIPLE);
@@ -660,7 +664,7 @@ TEST_F(KeyProcessTest, KeyProcessTaskHelper)
     infoVecs->pop_back();
     int64_t hotPosition = process.hotEmbTotCount[batch->name];
     vector<int64_t> expectRestore(allExpectRestore[worldRank].size());
-    for(int i=0; i<expectRestore.size(); i++) {
+    for(int i = 0; i < expectRestore.size(); i++) {
         expectRestore[i] = allExpectRestore[worldRank][i] + hotPosition;
     }
     ASSERT_EQ(CheckFlatTensor(*infoVecs, expectRestore), true);
-- 
Gitee


From 93a734aa96f6d3499f5d8453ebd57a813c3d47bc Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Tue, 16 Apr 2024 21:05:41 +0800
Subject: [PATCH 041/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/key_process/key_process_test.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/tests/key_process/key_process_test.cpp b/src/tests/key_process/key_process_test.cpp
index 8bb21dcd..86ec3f80 100644
--- a/src/tests/key_process/key_process_test.cpp
+++ b/src/tests/key_process/key_process_test.cpp
@@ -321,6 +321,7 @@ protected:
     void TearDown()
     {
         // delete
+        GlobalMockObject::reset();
     }
 };
 
@@ -664,7 +665,7 @@ TEST_F(KeyProcessTest, KeyProcessTaskHelper)
     infoVecs->pop_back();
     int64_t hotPosition = process.hotEmbTotCount[batch->name];
     vector<int64_t> expectRestore(allExpectRestore[worldRank].size());
-    for(int i = 0; i < expectRestore.size(); i++) {
+    for (int i = 0; i < expectRestore.size(); i++) {
         expectRestore[i] = allExpectRestore[worldRank][i] + hotPosition;
     }
     ASSERT_EQ(CheckFlatTensor(*infoVecs, expectRestore), true);
-- 
Gitee


From 9239843a278b5d34bf8f457f259239b7feae6b75 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Wed, 17 Apr 2024 12:53:29 +0800
Subject: [PATCH 042/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/asc/manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index 5f8eeb5d..f1b2df69 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -201,7 +201,7 @@ def initialize_emb_cache(table_info_list, threshold_list):
     if ConfigInitializer.get_instance().use_static:
         option = option | USE_STATIC
     # use hot always True
-    option = option | USE_STATIC << 1
+    # option = option | USE_STATIC << 1
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         option = option | USE_DYNAMIC_EXPANSION
 
-- 
Gitee


From e4b6f672ca6596b362929fd68c05025cfb210c69 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Wed, 17 Apr 2024 14:33:21 +0800
Subject: [PATCH 043/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/asc/manager.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index f1b2df69..c006f645 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -200,8 +200,6 @@ def initialize_emb_cache(table_info_list, threshold_list):
     option = 0
     if ConfigInitializer.get_instance().use_static:
         option = option | USE_STATIC
-    # use hot always True
-    # option = option | USE_STATIC << 1
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         option = option | USE_DYNAMIC_EXPANSION
 
-- 
Gitee


From 5798ba29a7fbb995c69d8f7eba1b40f786fce438 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Thu, 18 Apr 2024 09:04:15 +0800
Subject: [PATCH 044/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/utils/common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 0861cdfc..3ef0bc65 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -153,7 +153,7 @@ namespace MxRec {
             {"910C1", UBSize::ASCEND910_C1},
             {"910C2", UBSize::ASCEND910_C1},
             {"910C3", UBSize::ASCEND910_C3}
-            };
+        };
         auto it = chipUbSizeList.find(GetChipName(devID));
         if (it != chipUbSizeList.end()) {
             return it->second;
-- 
Gitee


From 8d95edca3bbef48e368ba766538d798f6bb35be1 Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Thu, 18 Apr 2024 14:11:01 +0800
Subject: [PATCH 045/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=80=E6=9C=89?=
 =?UTF-8?q?=E5=88=A4=E6=96=ADHot=20embed=E7=9A=84=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=BC=80=E5=90=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/utils/common.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 3ef0bc65..95a76ca5 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -116,8 +116,7 @@ namespace MxRec {
 
     namespace HybridOption {
         const unsigned int USE_STATIC = 0x001;
-        const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 1
-        ;
+        const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 1;
     };
 
     string GetChipName(int devID);
-- 
Gitee


From 42400d51205c3c50492333b8ad25e3843b0bd989 Mon Sep 17 00:00:00 2001
From: sihaixianyu <sihaixianyu@qq.com>
Date: Wed, 17 Apr 2024 07:21:35 +0000
Subject: [PATCH 046/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E7=A4=BA=E4=BE=8B?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E7=9A=84=20USE=5FMPI=20=E9=85=8D=E7=BD=AE?=
 =?UTF-8?q?=E9=80=89=E9=A1=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: sihaixianyu <sihaixianyu@qq.com>
---
 examples/DCNv2/run.sh                      | 33 +++-----------------
 examples/demo/little_demo/run.sh           |  1 -
 examples/demo/little_demo_estimator/run.sh |  1 -
 examples/dlrm/model/run.sh                 | 36 ++++------------------
 4 files changed, 11 insertions(+), 60 deletions(-)

diff --git a/examples/DCNv2/run.sh b/examples/DCNv2/run.sh
index f30e0ac6..1709959c 100644
--- a/examples/DCNv2/run.sh
+++ b/examples/DCNv2/run.sh
@@ -75,8 +75,6 @@ RANK_ID_START=0
 
 export MXREC_MODE="ASC"
 echo "MXREC_MODE is $MXREC_MODE"
-export USE_MPI=1
-echo "USE_MPI is $USE_MPI"
 export py=main_mxrec.py
 echo "py is $py"
 
@@ -103,30 +101,9 @@ else
     export RANK_TABLE_FILE=${hccl_cfg_json}
 fi
 
-if [ $USE_MPI -eq 0 ]; then
-  echo "use for loop to start tasks"
-  for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
-  do
-      #设置环境变量，不需要修改
-      echo "Device ID: $RANK_ID"
-      export RANK_ID=$RANK_ID
-      export ASCEND_DEVICE_ID=$RANK_ID
-      ASCEND_DEVICE_ID=$RANK_ID
-    if [   -d $cur_path/output/${ASCEND_DEVICE_ID} ];then
-       rm -rf $cur_path/output/${ASCEND_DEVICE_ID}
-       mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
-    else
-       mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
-    fi
-      nohup python3 ${py} > $cur_path/output/$ASCEND_DEVICE_ID/test_$ASCEND_DEVICE_ID.log 2>&1 &
-  done
-else
-  echo "use horovod to start tasks"
-  # GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
-  mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
-
-  horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
-    python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${num_process}p_$(date +%Y%m%d_%H%M%S).log
-fi
-
+echo "use horovod to start tasks"
+# GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
+mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
 
+horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
+python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${num_process}p_$(date +%Y%m%d_%H%M%S).log
diff --git a/examples/demo/little_demo/run.sh b/examples/demo/little_demo/run.sh
index ab74adb2..394ce1f6 100644
--- a/examples/demo/little_demo/run.sh
+++ b/examples/demo/little_demo/run.sh
@@ -100,7 +100,6 @@ export TF_CPP_MIN_LOG_LEVEL=3 # tensorflow日志级别,3对应FATAL
 # 设置应用类日志的全局日志级别及各模块日志级别，具体请参考昇腾官网CANN文档
 export ASCEND_GLOBAL_LOG_LEVEL=3 # “设置日志级别”章节0:debug, 1:info, 2:warning, 3:error, 4:NULL
 export MXREC_MODE="ASC"
-export USE_MPI=1
 
 ################# 参数配置 ######################
 export USE_DYNAMIC=1            # 0：静态shape；1：动态shape
diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index 33770e59..30b5e0c9 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -83,7 +83,6 @@ export TF_CPP_MIN_LOG_LEVEL=3 # tensorflow日志级别,3对应FATAL
 # 设置应用类日志的全局日志级别及各模块日志级别，具体请参考昇腾官网CANN文档
 export ASCEND_GLOBAL_LOG_LEVEL=3 # “设置日志级别”章节0:debug, 1:info, 2:warning, 3:error, 4:NULL
 export MXREC_MODE="ASC"
-export USE_MPI=1
 export USE_MODE="train_and_evaluate" # 支持[train, predict, train_and_evaluate]
 
 if [ $USE_MODE = "train" ] || [ $USE_MODE = "train_and_evaluate" ];then
diff --git a/examples/dlrm/model/run.sh b/examples/dlrm/model/run.sh
index 919f0f98..f5cb4449 100644
--- a/examples/dlrm/model/run.sh
+++ b/examples/dlrm/model/run.sh
@@ -75,37 +75,13 @@ RANK_ID_START=0
 
 export MXREC_MODE="ASC"
 echo "MXREC_MODE is $MXREC_MODE"
-export USE_MPI=1
-echo "USE_MPI is $USE_MPI"
 export py=main_mxrec.py
 echo "py is $py"
 
+echo "use horovod to start tasks"
+# GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
+mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
+interface="lo"
 
-if [ $USE_MPI -eq 0 ]; then
-  echo "use for loop to start tasks"
-  for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
-  do
-      #设置环境变量，不需要修改
-      echo "Device ID: $RANK_ID"
-      export RANK_ID=$RANK_ID
-      export ASCEND_DEVICE_ID=$RANK_ID
-      ASCEND_DEVICE_ID=$RANK_ID
-    if [   -d $cur_path/output/${ASCEND_DEVICE_ID} ];then
-       rm -rf $cur_path/output/${ASCEND_DEVICE_ID}
-       mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
-    else
-       mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
-    fi
-      nohup python3 ${py} > $cur_path/output/$ASCEND_DEVICE_ID/test_$ASCEND_DEVICE_ID.log 2>&1 &
-  done
-else
-  echo "use horovod to start tasks"
-  # GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
-  mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
-  interface="lo"
-
-  horovodrun --network-interface ${interface} -np ${RANK_SIZE} --mpi-args "${mpi_args}" --mpi -H localhost:${RANK_SIZE} \
-    python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${RANK_SIZE}p.log
-fi
-
-
+horovodrun --network-interface ${interface} -np ${RANK_SIZE} --mpi-args "${mpi_args}" --mpi -H localhost:${RANK_SIZE} \
+python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${RANK_SIZE}p.log
-- 
Gitee


From d47551445874f9208cb722a5e516701ba4635eea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Fri, 19 Apr 2024 14:32:28 +0800
Subject: [PATCH 047/302] =?UTF-8?q?README=E4=B8=AD=E6=B7=BB=E5=8A=A0mxRec?=
 =?UTF-8?q?=E7=94=A8=E6=88=B7=E6=8C=87=E5=8D=97=E7=A4=BE=E5=8C=BA=E9=93=BE?=
 =?UTF-8?q?=E6=8E=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6f49f4ba..fd3b0691 100644
--- a/README.md
+++ b/README.md
@@ -119,7 +119,7 @@ bash test_ut.sh tf2
 
 ## 使用指导
 
-mxRec所支持的使用环境、功能特性、API接口与使用样例请参考mxRec用户指南。
+mxRec所支持的使用环境、功能特性、API接口与使用样例请参考[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0001.html)。
 
 ## 参考设计
 
-- 
Gitee


From 3a07cefc4a4ee873a07ab408389a383e361034b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Fri, 19 Apr 2024 08:39:53 +0000
Subject: [PATCH 048/302] =?UTF-8?q?!89=20README=E4=B8=AD=E6=B7=BB=E5=8A=A0?=
 =?UTF-8?q?mxRec=E7=94=A8=E6=88=B7=E6=8C=87=E5=8D=97=E7=A4=BE=E5=8C=BA?=
 =?UTF-8?q?=E9=93=BE=E6=8E=A5=20*=20Merge=20remote-tracking=20branch=20'or?=
 =?UTF-8?q?igin/develop'=20into=20develop=20*=20README=E4=B8=AD=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0mxRec=E7=94=A8=E6=88=B7=E6=8C=87=E5=8D=97=E7=A4=BE?=
 =?UTF-8?q?=E5=8C=BA=E9=93=BE=E6=8E=A5=E4=BB=A5=E5=8F=8A=E6=9B=B4=E6=96=B0?=
 =?UTF-8?q?=E5=85=AC=E7=BD=91=E5=9C=B0=E5=9D=80=20*=20README=E4=B8=AD?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0mxRec=E7=94=A8=E6=88=B7=E6=8C=87=E5=8D=97?=
 =?UTF-8?q?=E7=A4=BE=E5=8C=BA=E9=93=BE=E6=8E=A5=20*=20README=E4=B8=AD?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0mxRec=E7=94=A8=E6=88=B7=E6=8C=87=E5=8D=97?=
 =?UTF-8?q?=E7=A4=BE=E5=8C=BA=E9=93=BE=E6=8E=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                     |   2 +-
 ...\347\256\261\345\234\260\345\235\200.xlsx" | Bin 19596 -> 14736 bytes
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6f49f4ba..fd3b0691 100644
--- a/README.md
+++ b/README.md
@@ -119,7 +119,7 @@ bash test_ut.sh tf2
 
 ## 使用指导
 
-mxRec所支持的使用环境、功能特性、API接口与使用样例请参考mxRec用户指南。
+mxRec所支持的使用环境、功能特性、API接口与使用样例请参考[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0001.html)。
 
 ## 参考设计
 
diff --git "a/docs/MindX\342\200\242SDK\342\200\2426.0.RC1\342\200\242mxRec\342\200\242\345\205\254\347\275\221\345\234\260\345\235\200\345\222\214\351\202\256\347\256\261\345\234\260\345\235\200.xlsx" "b/docs/MindX\342\200\242SDK\342\200\2426.0.RC1\342\200\242mxRec\342\200\242\345\205\254\347\275\221\345\234\260\345\235\200\345\222\214\351\202\256\347\256\261\345\234\260\345\235\200.xlsx"
index 2fa2165b6f608106df678d1afd31562884a3029d..de085d900879882e548b9fa5cd539bb2f698cc0f 100644
GIT binary patch
literal 14736
zcmaKT1y~%**6pCd8QfihySux)y9aj&9^9Sa?!g^`OK^7$8Z?36{)Th!yXT&~|9|s+
zGt)KIYgN_m-Mg#ytWuJNfJ6s?17HCBo=I9?$T}k_pa1|c8~^|f004N}GP+us+nN9C
zlbONO&h|`O-eHXc`-L|6XO-DX{RXl9c1@#h5=VFgCjTrdeO-xIU{T@&N6|Mhh)}nF
z1_<{x<7Lb|D0i*U=g+nT%}B$B*c2<NR;uzzZd&l<VPfRDx4yZzN%bx_mo=F!Il++l
z#6!$6JQ8Q^<^c4z`Tm40W>n_3MGpIo^<X?wArf#@{0qcoW^j@n#JRod_0T7t7-jM)
z%H$BGVTw!nxL(n6a`F;8ZF$yUE*2q3(RpUVpRv)!lOze<q`h;9X|)NNI4Xggjj1o&
ztefu<@P%fXi|peFe-^&)0~}1qyTFnbfOqsvfNjoCI*t=D2F8TWS-Hc9xK}$8aM>be
z_FLpLiwZ5J#Lh1^HS5OsIe;<b|CWroF<xtKQ435AdC2HV-8j){rcq*q#QgwHU&?9q
z1%?d{%u+tj{<_LQ6vGiN@Sq)8q3{=idLRM@5MO@jd#X8S*d%z@fE-m@;b2Q>Q`JwS
zKt>~r@^ELUgy~c@7$VhX3dII=(Lgd{3+4<V&7g<)@67#1c#pychK>-us775seqVRa
zlYVFDuRFvh8dB2m4dmv%e>vUb=HlfR;P-a&zM4ClD=n22JSP9uGt4O4^>EYB-Ti|7
zbp4d6In`AL{B9n&<$t%=Vkl4}6v^^?Kkb|US1!N%g;IaNTe+}bEL;q`w@%?)tzk&L
zgovuIY?RZV^_9#71GG>A1A~v_MNz1RKe9?S_7JL=-UW;Z=ZSAB5YqZcg2_TIlKg_s
z4_ePpzM`6Ybme5#{7oJ41oucs4RK4%H=XgT#?0&UKz(bShP9v&+=4zS8nQ1s3D_(v
zkGRzPTNqP{F5YGH<hcky9Mhrj7vymPLio;k0{l_PiAH?$#T1w`J^;T=BQ*4(P{=4!
zpjah33PnbXF{x3RC8DTGsJ$qPRYa`CxH|Y$h+KOd43sFcQ#D5Wlm*;=hjX=>S`Sis
z2u(69--1XBNwN81fWjjff+@cPK9u?SJ0BWa%piT5E55Q-)*~HCNQs4X|A2&6qXvKS
zca>dQvq{0$A0d^SBrVOrl^a_jk^>2M4%d(T*q1a9ywi+*1*;S;Wxjvp>3_!*;jX;;
zUX$#}CfG3Me986BpSd+k+b-2`nUz2P<;*t-7AFEnc#RM5Lhs>+-N=M(f;62V%|M`!
z9=2+Lz^6gh+{)kF;nZolQqeN{E5(Vyx9^jDS{9w@K3E%*BvknrENVB*=GqLpV3fye
z7m(8;^;})HZj53KPm2V7O*SKR2!ve~t=6KV;=N(aLO^YF{he{P+;-tHqv+B}DOFV6
zlTV&%!x4Ztq(ZH@28k+oj9ooU(4|K(7##(+CmcH#d*CBV2ZVqOBPg^>DU>X@&6?Mx
ztqiq$b}(1x?e=BBk#K2xnKmOuR69i<yT_3)`l<Y#Ac$FdnNY4xr^&*mzh7@s?VHUH
zc*<NnmrL<bK;aGvW-nC$GY=<G`~cWEXXZkimh-3vMY}}^qxTYa2C%J<4P4JvRgoo%
z>UaCmlHC2Sea24-zeHZ!QiE!qam)Guo5)+!`kznh*|MsOX8qO+Srjd+h+}PzEb|WC
zsUiB4;+<Jh`2KZrpWEB<9(5h|Z}<#p9O&;U<Igr6F@H*Nq4Un`uIDVUmv&Zb?r5_z
zuzr&HdW`WEztR;bggkL^W}@2#h3+}#*Ux?M)i1hg`h(_lrRH=)#o@wM{HB+w`=C(+
zjo**23|5k^Gpksw_nNe;be&ull_~ygbnf;q5K8}uswO>CaJG;Dz&#NFfcUSd>f-8U
zYwq$EPxrMaV%Io;ei*{H)pYmWwFE`N2<TKnc^y>V@Kh0ruu4Tj${p@M`LETkmOZ2h
ziKQM{5T~;q;0TmowRQbB<&PKNr59NqJX(Yng<fs_deP!vd8gJfNJ^Sj<#uUP^x-r2
zI>or*N!?YK&Y>blr4%7m*QPNYd%We0&XzQepB;8WLqAw#wP!fa(~c7gF|7VJo-!4a
zbD<b@UMM^6L^J@U#6g45l4Ta3khN{`{R$x9>!Ljh6lzH|PG(9!HC6`XBlgfdP!vqT
zT;S{d{nTxDg)lNyFyX8*?ZP?<3fRK@s%HT-#+)24aWp);P0{qxoQ5;M7N2PHa!?j@
zRl1;!_T7!nR-Ywg?wkUWTe+s3zyIaZV+)c0<I!%1p^m<aAxaNW=A<n+aDqlm#iX}+
zdZzwnf&@}&2xsz?su$DAFz=JK{6{OXEL6_zZNW+&-X(Tv)Y!txka`QtZnT3Pe)pvZ
zy2~09FDi?KdGe~~nR%>%#e=?(lyu1@{G<ENEaf_x7=t<U^Wgd?&ZU@I85ZcWIhHF@
zMI5vv{qD-ZAWM6JR{>FVp#M@}0V*$inIhGsnHOgBMDd<*hbxT-<f$#ha~3~dwW1^8
zhh_wK(hogBu1v*C5ifBDZ8W{3n{!(Dwh;I9Uu?C`M>aom8dF<N%*(B=eEGhx-2h-s
z|LRNOl*qpr$S~=C(fKZp#t6<(JO?po@y37%sT44d+QS2um(NrbMz7ePd@WRZ>RUKJ
zOPnKG@~MUT`yBqB>No+`9=n#TQj%Ahhq<vfgw@w@2)wAOiqugnKE5`Y@GpsLt)mn8
zsWtD6eBLX0e!8vK@)AANO_TTcS-E<jCf~heaJ_$|z{tDWFjgYiZTk>4mgj$r(f*YD
zBAS9^xybw@Fs=g=f!i~#y^ab;9Zb7YGfJ7B`?=;COPWN4_OrVj6@FSnj&hY)Z&^nT
zdVXp(^l$huGERh*7~vCbV$}>+UWEB#T?GiY?dDw}eI7yNWYtb_>TXMH#<a0}*Y?jD
zvL8$sR6i7_9K(u2)uM_&ZkkV&xwo($!kMXbM3nJ*UEvmFFU&w{CVv0DT+&L7=xd@R
zS~-f0D4A2vZDwOUi5WHM%)+5KST6k_41VWuUD#hG`fbJk)%KYPUilGv9y|9Z@7!)^
zTL*9d?cChns+Vsp&~43o)XHEO!FO-^*|1YFM+sLN@>hp<FO+(56hFGEm+DjN;@FLB
zAQ|1anj9W!981hu#u56obk`My;wq7LxO8<F#FGAjSQ<(e=gW-1ALx~%DivlMXIz7$
z*p4(ZqPvZKNMzx|w@=maoOt^bVJu}d_Uw@##l%o8MDS}xOxMeVl@+0&z@n>*iH(nL
zifC&Jy(P*$$#qLrE2&p(n&$<DHVOsLu%1_bAj=nSy4h~+ayh6))%K@<l&djaHRX*U
zHTKJIJp_2_xvZ4okP<n-mi+V>9%7@>l)FTwo%a)B8^U0}d{L`(HL_$GeLd%V1^hEC
zS$pn9ae@N?*`Uyb{I9U&;o$ti#KGaiTVTpo<#$+P0zT8yY_pe`FFDa_A*pl-H=qlv
z3`7M6P%5IJ_?3}kteN_WmAX?Yxt^_(z&u!@nw=URYB%Fi7|b%v5|n?u{n<J9>%{Q6
zvrS`$f{U?AT6*0!0S2K(B;ztUnPkmN{090t-3?3i10(6v>fJywIdZ-f)ivL{37<Jk
zqjQD^g@%A~-cpV+^00*cVrVq0j)m1?By{-YkstNvW-%Ob9lD-6&X%1MB1yWPp#@!z
zJ;768K^idFEmrud6Ghsh{9%i#Kr?j`gokuB$95G`6Y+~2YpD^v1Sw{sO*kB(qM}db
z*}eV3X=2rMo5?E1zuV_*lERT5GTuMZ1hCTeOlJhZeJNdd;-+JF!7{J=)Q?5AtVV-_
ziJ`4xjO-E>B=+sRE*Z(uejey>KH}L|@)91xN0TixLY2OkSCoEu&wSpIyXD}}weJ7o
z<Z)^3{PFqh<pjfk>pPlpTkbr*@fzD}FWL|-#-U+F<y|@Xrx)jP1&Z;f{#g1sD}hvr
zWo;QpBCIA)jSy365reU<K)xx6YKNL7Q9*3v->3q$)-oFQ(>j@v#|H%IqkBmzh2P;(
z)x*N9%%ZC)Q6W62cHjzMo|k#*^yjJ<8e426fS=VYovQLwX%D}I+b_co7lGBS_kLT!
zJ+rNjw$C9LE-oO$Tu%1Vs;0C&F5(bl2npDalZR%XWyF1q;j+eOGq|xveC)<+B4S^a
zYyZh&JFv0w-uH0#9vDT`CKE7$iSY7=k2PHTZk2$1YZmkdg5G8hrb^BZjxLPGj*f3m
zCLb#g_lXHxtOm87GeSj%nwfN2MDrRBwcD!aDPHWtUjIs}Q$!TYF0DV)Yy9M5Z^9n4
zQ8;{c1vf3&7*eBMd98ftgUTQ*XV7i07mT#1Set`s%#Y-!^jRuPCQ)*nHMwMX3n}i<
z2%h4`;K`AN0Qp9fQz>IbBS~!dR}7P0-CtPrVabf)NX=tEAqK*K5`ncJLn*CoS*a%^
z;=}A-`S%?C%JTf<ao%?mlPv@W0I-1W9hCnblc|HV`P-mk666*7nSdcRDqgwEN66^3
z(iBuM<>)l*7%+8pS=KfQ%c*CH2ex!vsG7vEc2-dE%{jLM-JF;AFDMG`;+r2KOhXb;
z;CzC-iQ8mC#z`C9J~%{EIt{b$kM{IYAr8UTcCJ#=%X<ZLOj<|QZDy=%Qub4Six&l(
z)yBUAlgM5Sq(03Jb)Tj7&~UA&r+apuK7I&>ytDRe*~6py{JzLd&Y7#V^`~63x>mOW
z?a8)IdJy-T2-|KA-6v@GP<Tf=^Vsc@tkMp5DkN2fHn4-_2wKX9Y@>?R(7VGnZJEh0
znM-3j@KTJGp5C2V6v*%n>;@C5w=Il#!yyP44#us9L!1b+2KwpkGNQpqN#A-G*xH1h
za<9S&alFB#rG(y*$qc(B?GC&`(B0%dg?T2#yXc5Y?vm|*0bAM>YY6?p{u!pO%?K5V
zK;{Mmnfq-``X3iEb#rlbuzNFjal+V}!NvV4ehXX_N?$4^R>Z2p)&XGW=_~L{W7O6X
z&d_NF((1k^-9hOImuuAq?vLvc5KQ>``_f`5Iuqds*L~8!=s^JwRMPYKw5nm`?E5)T
zTz?anA8r@g7$R4*nX47<Gz$+NhJFjStu8^4%Kx+}0q=)oLrD9tcMP8olF&zmgQoG_
zGjgrmef{x6pmM)<_uZLgr*7X>y*vK>feX6RRa7(3GL<cwf(lkTlaoZ^bLc&tqG&RS
z7?rigeZ4POuO}QDuq*~0qt#|H$IWrj&!=&6p6wgAtfg7{9G8J|vN`pKgso=jb(K&e
zCWdVagT96#!4M0}{F4s6*B1MoScU#BUHHrXxV+%t&thG@FjJ;kFBTAt`in&Wv}XeO
zUOg(vZ?8oG0CbQ&L2G;$D`RJKGgVh-YkSMTCZ&VCCoXG4*<1X(uYn1fI`|*K61`XJ
z=xGyo4oSx<#C|A`^fLrfw2=mccwiL~%d*<*a4OVP|FXc}qL}O8$R<xZDt7AD%N%GM
zBT`jv7Q8lW-C__r78gOMzpx-jUH<Xwd39ef=f%<5WWr5_y${NKb3gD)0*(~QlnKZ6
zXxck)ZIN9FG))Fr5>=*Yw6u3LDi2uls#+L3CdN#;p3|x(>w~BUUMuJV@0T$#M2b#6
zx5-(|yE0!x8{v5GCw{FrGiGd(q8mD!KfR(*HE1)d$SLy(Y*05DndfttF75I;Bs$bh
z{49#h&-G!bVXb+}JJFHwE^+y>!yf%wK$Y%%@1={gOcIQ{!<R8Y%QbwhYt9(ceKQX!
zoswZ~i`&E0#`$N=N?W=wLF74BNF2Sh7l4WurY0efwp7tP#v>p|Ht@uRu<P!sJD2~#
z`J`h@;NyeS?x$bP{<jM^HG&RLCpDn?ZSD0{jqcWy*W*ZkmBY)t=lRU#d*mG7-(y1#
zUA|7gdT%sy?ftLP{C!?CJ%1K5w)}dny4WtnBJywkD74!k$S&{*X<_Go<@lJ^9fn~f
zvm145`1JGV<<?8p#d+N-l7!!#;TkfL{gy8rnMWWrysh9Jt_7bzxak&XQICaU1wcm_
zXN}0#FN|TA{35RO`uy{;RB@k{yjYxiKTv47yrE+#M_xGr4zf`O`jZwvg)!T3zPhxu
zQL9v)N~Dn|+5no`EUxrI%DghNMBh4UZQp15URUV3uS^+kgsJG%6h-*UkS_BT-Ubn@
zn%!r*Q~4seUa2PH5!b6slG`b(P`0B(sJUV3Gv0O=i-6)C46X~Dw4ozaP#>bh*>$)0
zJYS+-y)Hnud3`p1oN0*(d+jQcf*AL|Ao7Pa0ok@=$OAcWo~icYeB@@go9*Q;*T4Jm
za_CtQAUimz#lE+;C0W?)cZa(e^kNM@%^O08;Pp;kf#c`v+Tz?4UM|1Jj`Ob?T}2{J
zKew8T>z2sd*Wcp1FW*q`@D#fo)*pQyUrasyUVbp<yq@<qFuvk=c3eNdXZk)^7{`uR
zn(m=d@&)c@6y+s=dplCFe&qY*bZTF7_HQYE3+`I>65pm_E%mr?FH}S;X0s_E*0?71
z{23+$#ch#P0(;b*N=vptiprgEE?TQWr1+k_N$zi_bZly9yRQmRD}(S9XaV9VT6+~9
zCLey{*NV|^D?M#&Rj#h$GHozV?FGi3=;W>G;_Xn&ZgjCuX#<+ee1Ci&h$FaPrTw&8
z;qV2vJ^xBJGyUrO;MKtBZ1(tl{`kQ@7zAN``uM?Q0EW~1kA9vmyD3L_s`s~;76_gT
zWExi)FlTJoN4>qhuo(>2rF9pSTyf@3A4K?<PTUR+w0T-gdL+FC1G17wGP=934|djH
zO7>G8;B=VnH%5;5YmgMa7Gxy?QxL;2w`uvgoRpvGEvG|t0V~r7khVUSR;-;U*L0?h
z0ug?28?=X*0Z9UBq1-ih2414ISCfFI!Sj;iRsR03Zh#w{bB{^kNY$oP<X1O5fu4~>
z;qJaV;-iSG0ccDSoTMvg7qu3mZihR!kA+6cM0?DgVC^$?$b9wcAqJmohKkio$L1Bh
zdqOi(HUYas@3tU64SIggQ8MLPVFzqh)ee-0QE#2=fkB|>;f)peL=7Jd9qrH4(_?0i
zr7>>e;xBV$<^bx;9DuRo_ZRX;*7IL;5vBy5k5_Kva-X+<#Jukgnt~XG;DLtbckan%
z6F#|mQ4szTkQ5XE743G6H9oXN#PQP?$E$eQHQi6EC?|UY7f6S_f8Cc8n$78Q%u}7n
zO`OQIw;Gt8`tEo@_x+^$aS8${?MbLQPODqu`S^sw#5m(w9D+^&c4g~mq-KPnvjRT4
zu-SJ!jdGvF%m7h$u@kqP`OAvlT0JA#*PFAG5}?K^YopxP{2N{d<W`xV*_G;<WlWF6
zX=(39C2;z~QQr?xT_mMINDJ{IsVA%3))gp|6Bf=68_?7CTWO8+YcJoxeaIKVCt|_x
zM^<!Jg$QziENfHiOVcbcCmZ+)###G?l?sfoR?eJ)ipJ(Muc`-s4qXhw=_5NTi+yky
z;OIMy)Om>*Y#EL(DVj?qxN$7E83K-}GVSaXTx)~nnzU6*Yatw@OTidMt&xhz9>Oq`
zU%F|gE4LF#*274VG}M6e1~{#Z#Z0fpby?n#v=C9I_C^6amX=mtxj{K8m1TTsKtc6L
z;drG{rpXVERMd|`%3~MaMrK-}@ok)xp<?Spk|stYU}S85Mbv(WN;SGAqDB-n$w@So
znX4RyW$S@}U4;s7>=3<C?p^S<d?I)5HF~tQ;+*nbKFE6-`ar~=nD1bAf)(-K1uTju
zXQ$fl!;Y7sot=e!-13&(awaB=9$DF^sZ6Z${`@TqNO&FM1&vT0LP9fuI!m%=s*O@+
z0Om^zJzwXh+k~HKUQ~Z-Kgd#_M&FRmR%vdfeh*(SQG5(-ZsEWisEsOIW0b*xp+T&s
z%;*%NK>TUNvG(`5MRGmw7hAQWkDtq=`Vd#aEn+dE?8J8OL`#RqDsjnLJwJK#D<fbs
zn0#aYAQ)MhZ#k9o>)p9dG<={WKZ)@x)g3Pa?0#aU+9LD&1oukml398!snGA4Cyg>B
zWm!)nH~<Xgb6%+v*$-rD?{tXyFM=Ur!VeGTWA2}JPER;58!nd+ZiV#C-U-H4;SfhH
zPh-;9k;5@vF9jS4K$pwkh8VsMl(poY%9b*9@+PGOqo04PU;*|g_8)r(t{K4=saUwU
zdb*PHA_)a5-uCn^CYTLVqSEpBV0>bkHMe7GI!|wm(73k3(Qncu!58zyznnIkhM=-Z
z5uEeUh8l=#{$6HHa;l2PM{hY}f;&)O|814&RxUIjer3j%A&J!#uIz!RFAFLsMz1z&
zbtjCW0pgtxk`|}5bGSE5$IT&Acvl#yRG3734B*Tj_^FfK#0Vd(ku;d*h)iKj*ox0A
z$T)Sp_=KDkwz<i_UK;jYtl0Pxc1hBHR|Dd7F4Ya8g}V<I=eh)=yGG<`47gH<cms!7
zfRA`$1z`++MxjA%=;Dwhd|`iG;ui#R8}qT5mZ@v^v+AerZ7z1rPkB}r3gyk5u2R#z
zM_F<=op}@{v6zE=<wPYLjHQK*yaXGP1TcHtvA79E?#s;ehkGlg%+Y!uXNPD^Io#j<
z&{L6>mIPXB0A4<!#W>+%2;khvmG5k!L{}T;$F>p#crTaad~n_Y(gENz7x=@B@v@$?
zm)9IJQVqIy9wT%%n29v@Y)Ub{8M=@dRLN5pipCd~1WYgNNUGLN;!j$HK7v^z@|q(d
z*443#eJ3hA^a#Qm%G7w$chy398iBgx32w5}<Y!I8BM^6KtJA9?q~HZlbv7G!jTrL`
z7gDL9;<+K=pb0H&qP;^}Q+|(Nx3|Y7U1n#jH^tOyrK)#ATCs3CiD=32)JkXgsGYAL
zJ}<B|FzfLd5@qXo*_DPYy^gQ}cTWSXMgYhV>)fkzEz=QhV)ymDu3u*$D9BAASar=I
zO(zRhW7Zzy6aYu;f9|XLO~q(aCKzpsv_!ELDaZzBD|5Q?c}?Vf_h@-vKm;zu^%#4g
zbNZ>)5)`DCr1`OVeg6b12?r#r20Q8EAj4Ao=0N`!?fA6JnbS>5+K#pTan5wv$gFu1
zt7yUi$}F+P!9nn>y7Io*p5Bgi%eq2*^5CvfbxFwvUPa}`A1)e&Jm34JB)>%#!KkqD
zD|nHji&sfMQg=X7dzEp1Sad0PW^5Ltb+;uAdTkOIuH%7M5;psmCl;W1v#5rsChGUO
zkU#G;TraqGe|4^0?kAT*x{lp>)A!=ATR#_m+PZ%zTw8spt6O$#8Y7A&kPj1yj%$KA
z!`SGnFxpH&5c){O$-?1^P>JxvnXyUcl*EOst<F^Yok(Ju;kzE334Et?{tHd9%=JU9
z5_nTj>=@mmBOdIPZaF<sLZYT`E@auool8v*`CLvkiC@^Ry?t9WXO#IaLLP+Dx3^>I
z_}#4nr38G_wPaij<kq|71}jCA1O!XF>BC@6azFV*a-BceF#IU@+|jE%csiD~F=gF8
z!OM>X`l;)US7yT&6CZG?N$=TEFohF*juL{VzCgbuCd6aM>T3l<#Ou;OM!7xMwms9|
z<=-u3zKR%xhS$X~2}pM9EQ~8$?NFEvjE*SI5wq5~PV{pu*N)b>7D<5&DvgDc4<iU_
z;qLECb@pKlbaqBHS0-3oq7CMHoU}}gbM?W;(BvgLb{#Q--9hN-=$h{8iL>Xw$rQ#s
z%iu<lG>S~z<9G7KzZZrx&y6^7hPj{r#mn*liOO+^Te{Hx=_C@2*;(p?Ug@XI)EH|`
ztqIX9{<LnVL+MQgX|4*&N*g0pSDVkO`j0v70{4q@#>X2T@Ur(K_eUdx)}8iP5qU;n
zE#cL=Cr?l?^K{jx^X`6EH+$<%gDdw(eQT^yvJIRE>{U@G5!cy}F0H!QH=DmRB7YvU
zO>I1op)|<Ka$3UOr=P&PB;DgQco~-IlFi>YE`bsnnwt_4KdN=^7-Dm-wsqe1m6>??
zpa>*z5g^rnM1n88d#r_Qn(*@Yod7IHQRmr{XaKkKtiTN@jN<GH3D#$K!GRz?w>jod
z6JDX4rKkEZAj!0@=-SwM3jT<1hh{uOlVV3Ou}z}u0d$U^fDsan=o#C0GbQVsU_`&*
zaY|HHvDtsL;WL__IoctG=OM6a3Ne#5U)-S&XQPA|*-Y<D)j~B9r+luZ7fn1cHnchJ
z<8tdzbNG_0w8B_O0~_LHw~edQde7ti;eelwH#aw3RQ6Y1$ROSvXX$TQ8TU|xLxE$;
zakAx7-|&$-eN}eFwP-(ZXV0n~BX&lfp-pzfwBH%U>Wy<V;@(-F&`!p8eTRqVN<QP5
zhOrgO!cp;lBS7^sJ2d3pE~!v~e1nK41K_q6;AnjP&ePu3vn(@ut3J9O%ZhFx1m*H<
zQA6J-$Tz$UAqH7V=!fm&`N;kdOWVu=j_>6T?0d|C6lV7aOqmA4kBzt=K2~cDiE#CM
z8N;avZmOO&d%=qY%S-f&es_`!iS(H-9_pe<rr*YhS04>x^a?r$=NMq^L2pt3XA?Yr
zh$iM)3Ic-*3o0vfnF)G$Rug_kug)!N1y<p!L+~!nnacNe!63EGFQv~I<t|y(TgN7~
zbwZs6_e$0n<l=V<&jb^XIM1bX2{ctG_^68ueoK`Om&!MeA{?=27Ch~Tit8LW7cEAR
zd;NM?CbA5vr@gk!CKe~FcH!D}|JeGniza2>XpNfG0hh!O-L2P4Z?G>yL|gtywGG7}
zwJ;;b?@T_J2dQy64LFN0IQ2AfGItPop{)F|f||hj+}74o!#FZc5_SJ%yXDpw)QG)<
zI@~cfqCpy8>;$vnrBxHp_3_o;s1cg8-6iP|?ivQ?OG>RQfv?7@xW%k67kFI7T3Nzu
zCAMzGN8*Z+V=etn5?wxRdTfe@zGOa+UOtx?({2$x@EMsr8MKh41w@H)>jsN#Pq6|q
zrxanz{qo)OMe!azN>(!qRtt;uV0^%Kfsq0+taG{KMvr*Lr!sD+ro_5MsrO>Q%9C8C
z{zCZp{EBzXS(8Re(J`^SB*(kU;63W<GQ@G^T3XLOT~TDdn#-HLc!-!B`g%hr^JFEo
zE~2~r>C6!D{#A29KXC_=svZar3&(ntWMPG4{SE>x!(|FIzjamnv|Jy~{ElKxnp?6&
zb)f&HHE4Dpd8zLf2-7AYA4%pKPfx7XyfB&qSc2uZg$Pj$k!ZMyV-Jbfd_n$a4&`fs
z)S?Rn08oMi0AT+shw_#>cd;@zcXa`!Q$SyTi6v6f#u78RutN+(Oa!3^I>+2;HuSK{
zGe%7;<V{yfz=y%sCp*j&Bu8l>$<w2d(<g0h?_6VD?@)-JpI%HKw{_KN92%|RHGZj?
z7FNPlxwumu&na}ef9vME?)Up>I+1oGXI*;ryWQ8u{kZLfK`~?Tz*5KjEL7iWqlG|Q
z)m-0yHH*!i50e{F%dg66!&+>Kpeo;d@mjR$$Ifg;L5tu~WhWaO?>US*b$dm9fyS-w
z<O}s-RW<>w=y%PbE`;|-#-+!f5>wfTpf&4Xe#LZ6*RPioMO4z9Erk;9*EMh1m`Yym
zJ_1mtu00Cz51BhsH{fYs>5(?PN*|81eJxsoF-u*Rmgd)&zb<8^(7&ozn3OeDsI73m
z_@lYct+aNsbvF}KnxzeG(8!5p3|p~$iMh=gOJ;C!PXC2uE}g`mz8)jI>BK#|FUL^t
zwCU0|yZ@3Kki+JcG;;3{#z^pztEPx{8@2VD_@w^1*_g9<d*Z2U^*L+w-Fe-6`XBuE
zA0L{RT2|MlL)7>R`?P2bl!Q|w0T>4=C<T2wG^A?Fvf|NjmgQ1P+YA`gG+&kTuhsEo
zrH4>a?MY08cKG-Fx;Pd2ACRv_LoOWAz)6_JjcrYEpV7&hcXd<EA2vnlH3zGAfg7&Z
zVSSR9y@pGaLVVo<{a#k`m{m8T$*xE2i?T=2#jdUE8JauB_5AP=KJ*vA!C(V|g-D5+
z;~}MJ!pU;#@7Y^-*<1B?6iO_GziOKigT(`-AjzQ<0n$(x%=MfSH^lYt5&|KGM2J!1
zv8BMtul=xAh{zs+D`CSz;KU^HFj5HQV2KaiQMQM39U5WNs1vGWaKMmoQZeQNNM)KC
z9Zyp@2P032eu#hkdUu3j2SiY0;>!alhf|55lgR*!QL92YOV24oIhp_XwWZ`7GqmN;
zb93oqZ4n$yJ%zvufXcyC63N6e6gu#NzOslHRk(ydsXNVv_yp>WLPBWibc`?HWHq$B
zEg@@yq807s2GCf>o0N+uFq<*#6nuE+X}CKU`*$?5mBnpiv)&7=PxqTGJ20+9_>E?$
zD!0f;$8~D|kiC_#^&gP_#^57-H`?x(O}{w(92(2<DbGQWWijS`Kg@2d%PagpZ`4BG
z#xq<{;ur}1zuu@Uf2Hiz#+=uP(VsCweXjv4PJD?h5q9z&hjOJ+?($}4%)%mIwm-Jo
z58q#7!AWV5$Cy}>eRrJf{1L?P`tlS*v`K@cphiP^3>j(w0IPDTv%Dq=l+^dp?hRuT
zfWI%0kzr@L(y1~Bm#xZd`c?V02tzr?%F6Brj)b;gLWR&r8J(_FH-U&&=K~Q`?`&E!
znw?5{B0F63i~yP}dtzc|gVF_unz$C?+_G^uHh*O=zE=N~$k*k;7GH3Pv7)R44sK!#
zt{KFAzQ9aJ*g)@;#=`?Y+(+NM?WaUymo7i4+EOTP=T4L-&b1Skz}xenXY?O6$pkWX
zljtAevDmTn+8E0|^M8P$`F$|tLi=r5m$ZK(P9~}%3df+a3eJsYzOt{U--A=FRdqYx
z#!VK(t>REW#GY5rwPL@6=7A|~i!hRRiGGGWrkD%jsCi?Xetd8r(~~W{zK)owmF1io
zQTa62Ha_-}?OU(uq(v-R4EGxMq{;Zogj1@i0>`(u6Yj3^CPwV*kOGpA?B$M)UV_wi
ztr0=e@@2GcI^W)TF7EVx)m`rx?9bXBpzk_xFUnfq!)(crM(Ob4>R<XQ(YdtEpEu%v
zfv$haqoVMdI^2uq${A%b^F`Hv8n9O;KGa%*+A4X(tcRCkml}$H=s}A7MI&IY4;zke
zO|#zDYBdBi<|M~5U%sI!#w<sHhQ9X{OXSecn77t{ZSM7-{zR4&Fa`+^05s_Rul~gP
zmp^4~*zagzh8RLQ3HGktTqsgYSE?2-HqMj9SQMpypkao!>A`WVc-VL-AhI?vMj)pi
zebnWDETIY9AKdA==W?Ii>F$1YdhT~vn>dwi5&ZJFF|l4<fB$7|;}-d3V|U`oT<f_w
z{nhc~bJNzVx94%YF~_J!81iH0{TvgYzZM<Rt>T8eSL<<Z$A;%t#g;=_|J})p(@c%V
z$>UE||JV96_kPl;dYigm9Am~lW&Lvx_e_Ev;$M-b9_{ZPBWpLZs+z~H?fpLbwx1cE
z{5;v-`LR;_^fYBZw}HITfA{5}VHfRg{kEI%*OS**Yo83!*C*K)TmkpYiHRpk+SS69
zqtDD+w%jkb#UDEwaOsg=S3D|P3^7}lqHZNSp6=-f6-&?kx26c&7k-<w`kZpU8VDjN
za$dT>xUUI5)cBt*#wqq=*R{&LUwZU&;d<s>tM({8T-$f}QR*nLUVlK<>CcVue6ujS
ziNsHo+)bElh=27%61_$eeJhqKvND6n(rMDuJ0mx>|M!jemVfq{qDO9yEUx757tj8<
zZ^n2IRNrR0(usyLyVnnnyf5~xepj?X#H4I}5J*(K?mU>|oM7I?KH}GW$*639NTn03
zoGUivmzK!zNsMNj=s4)Wh8m#ye(sL?iER!$kD4V`StJ@2sYM!Lx)pnm%!E8V;rscB
z=z8GLAaRVRGEKO$aI^_31RDeNefUfni3DPpBIOW)l^()xY5c-KY<Xhnx8rgy33z55
zE*~Ca;a!t>iLaco^RC>noG)7!#0j!N3N(?*=n$W%`K9&><C8oqaku9KKACgGh}4dk
zC=ZW_p^H?2!H-dkzg4S}6aY04Jr5;AdX5X?RjCVrI*6YS3xQnv=+3Bo%zc$$Sib1s
zU!p`5f~Y0J*^LV0MB?3)hs!aQVH%))u9RUK8Q{37tARF%*hhsxDG<trTSlRAA~1mg
zE<z6-l_Ib;%PJFs%on|&Dhj-S9GJGd!!DwWdQU|x(-tn$I1!P+AQRk7E)&$uJ;M<<
zohl{(7oM1B2cjTrq)3PZL98;->@H#Kr1uH~s9;wQ#Mb!!jsO2Zbb)qJZ;*ocGSo);
z@MdvP6Tg*()L>UJ#nwojs{rB*aN#t<Yv7Dn(_SxEV%cxag33Wwx?<Hh@xzi}$`n>0
z68Lue?*(F3vIR2J&BUviu9Lt1yR`X_($9aF^c~1nF?)tb^_`+X!)a<ikZ7b2Olayj
z#>}VZfvrv!TSLXxR%G-_7?vb?&zvq>U<Vt0rA*o={(lQ!Dy@@Y>}I6L^H3S5QSC~x
z*g;n+pbV3>jC?h<M@o<eWwjtls|G}rXmuh%0kQ_W4GWr`EXcyB`ScOEbq3TC%@j<t
zQ+Xg0%u`hZ?PS|+PTo3BdJRrdU{_FCHAhA6tSnVzkSEqCkFuKhHeNCfrwFUefCN7^
z(nbnpvYy=V<_PSgzlJLjAc$gz#yl$?0UA(@)*y(3RHB^<1S~_z6$YWg6DHU|SMiYi
z4F!vRB>C2<rbt?`HFQvie9(b^9gESbV7*n8U*<(x>CQyXKO?@mLOF)Ncw-f)(YLFu
z;e)Rswg!j2@<v~flxGL3I+W?VDuDir=D1U&u__?p%vD&Llqe(%a)+kC4l3HVG@L9U
zPDBCR3Bz6lAo1|lvtgFr)<{QPFmn3LoQlNJFkxEu2$)t3eYwv$y-CBCU<u|X_i89k
zwBN}Q<RIH@DN0=B$EyO9Q{`K96GlUfY3uCEBih+(XX~krr1tysQtn{NB0AWoQ6(tB
z4VqEWIw*fH_Q=AZ2{Y=XubPORRt0_rQi2;cvefyNM|80>q4kNKM`<{lg?F>7%!Nb%
zu&d;lYKE$eNB~KrVI;I@)0MD2>?U(Da=|@T=5XF(-!p2h89)VFio7LYAl|?Mx_6PY
z@$~3`P6-J(Ckgn!P>}ZFJ&4A0CAq6bzm11!uv9xhl~Ui2Y)Mm)MK#(23`8OAVKV0s
zy*)tn(=u{rp)l(}4&To(fh+$k*acC5a|X*3QHmHft7P@kxBcfBMELJx)GX_W9(v=s
z5Rw%5ehri2CDcG&^7CZYx29#zrtLtL40`$??wJbU^oNKZyYQa3stRCKWh%@%D#wPn
zqsl##6=*;pV4nrE{#5*ESTB3+=T&-?RtnpPgb3j0s`(dxQb*b4D`Qef3tDy-vG1w1
z+H#C<XA08};(U<+ay$=<!9;5me~&iTizjw$Z($mJf;M4G@(n=vfQr*M_bxKv^Mp(<
z-dHVN0*R1so{}N>gqb`^|EQP*klweFVm`x&bpbeB%auSP^$Zizj%G#q5gf<(-t_~y
z66Clo<r+9nDisT41v2`va$fo=f|*wFFt9>EsZ2i5qJrB92a|>cvJzCOmY2>0)r{}~
z1E-hWoI$BfyVn9PTF<-?I)h><rH>;Me!wD9v|t*OjNN_JhnPim?Jnkn-MmDh5Q$1*
z`s%oYo_(PL?$<DrXATZkqrFHx_BpFyks=A!rn)TzX*<XzHvNoz1n~Uy9pKVNS+PW*
zND*bqgG;N^kC+OZtH=<c7z>-yZl}u_xDOigQ_#xLfzxtTQ?0KOP3&;!;Ov>y3?}fC
zWnZBKMZ~#`OIpMAxeDR%jHuMYOj(;Y25M0()yxWo>fl(?%O`F_JGlyZ@W$(v*kdQ=
zF@5^ovy{tYB&Z;5*pl%3VzZQ+V!+?pO95x+*pG&@ze;>7RS+31r?FPFDdz#pXTE5_
zOW01}q-;<rh0j}+*uaNwa2Jl^LBS>9PS}Xs*iPeMY<-s}pqZ&lw^7BuIgCDbK&KM3
zVN=b9>*vT;hMm+41egP1CeX3#R4vs)Ks~F87Gi~xuwBH#*m>i$E$In=<|)*{?+p9P
zNGV&j=9X%)g(z<=%!m5*SA(YivEU(6ppJPMFSoMNjB2xXQbx1D1Y0DEO4(G5Y41uA
zRBXbsHVO6LWGh=Efrr3(_Q^TH@OnxtaKoXKf=q^Fj5bv!BndLL1?wQm=`PN~;TJc2
zIQ8Gb=@~c6qpUP@%6ZG0PkC>p3D%1F^fi;%w+3uhO+}~5v@EOqkwc^=ESy)#%F7M@
zMZ<lTFt1k6+A@dtHoQGLQ5DsN*iBIql?EB#Ra2L#GRJofbZVPuH0)<}-InVUE=v=l
z7y|h!mITW?toDa(-CLmal1MS;{qjER&!c<La5L0F%@t0yR{ekgHTH&EvRq%eeCu?|
zL*qC1Tql>QcW<XfiY{NUp8iVDy#ckBo^>N-V|$TKln*RlkyH3c&hCCs{Fhw?&g!l$
zmk&V%gM!AC-PS<qm`(whf7wGB;(U@J@?(_4(AEZ59az`qiulpbzLGmMyu6)kee?qc
zTN#Ij*1P5(`(x9ujhhG?em4&>%S4lCerke@L>=Aqw{a&M#?40>yAT$oH3^%!-3Jd(
zW7C3L_}z35E`p$0*wwCTi_hVWWUuFI#jmhi!&sEfZoj7+|9{S$gYE7I&q1k@InZhh
zG=T;fI-A?NFfzP-Rwed=<U0tVh7_Z|Zk#|7s}+Wh(@-EI5Va*{bwGJX#ES!o54A?<
zI{+c3zCYH<&;P~j{%qf^Gy92{kl9>Pc*I;?Hn2C|EnnSv&%%kmCG(m9DMOajgTX>s
zM?|CEG?o*v;(F*-O=UtQh)7Y>#-|FeoN3SkQ`z1Xbd^McoNnTfBKNT)dFO?DhuYQ#
z*V0ka;tKn2INUHoWU*Xa!x{H`gvzVXye1S46n>g^wkq`dA)^@v6Po7;f&&LK*@Na}
z)<bN|6b1#&hmW`46Pgyj^eXjv<;M+tnc3$~GaDM;du4tY_u=mCwYJ~<L*Epu?^|XB
z(l_CNmZ)#~CS6endslONS3@-~M{^hbx7EZ%^1lUTs9w4h_EEt=n^maV5E(}>Q$^g$
zi?~y1>#3n2GAAhf(kehoD2=!Y8f|*|Eq~lMFMr?6@x=-mjqhLCc-|M<N}*1#!<1uj
zm)A+v4BPT}t!kZ1ODzbzrzq$wnZHCuzf7;o6D?v=U`D917pC8pZ3gfVA~8~P;tHEa
z#z3bl*ykI$g^X<Q_}JGW(I_Tna?}~`2W#s}PcjW0$wWCMyUGrWn2%o+H~Illr9_ZP
zO%5gds?SMW`=r4}vbeum(iF|Oi5o`i3g9)2$5!H>TStmG14EZ+lFc@GjQY|5WfgSQ
z=_V@3JP1Zs?2S{(4rt0HQHM^dfr7W8g;wv|&5#Dj+L=CCdYo%NmL(7~c?@~uI(Etn
zv4(?-*?c1BN+!@>?r(E)<ni_3%{ZP*Xp$`Sc4$kvr}zChvcv3!oA&bQ<@)gUp%fSA
z6^jhZ-#&PV==tgoec$E1j>?;-j)D?=xS(&4K<Z!4`rqo<zZ|zSN!Y#*q&urIndovt
z)lpN&=CECE?hM$M+X)|N=7UIXaGN2!hHDZEBF-<13QxKImh+k`k+bzHcksCK*8m||
z(<U`lJw7DQZsj67oV%W2XP-@*=QjlxZ{cP{a+N4Ds8sH+($YgFHPn}2`t9Yur|j=>
z@NgPeK>sMG|8T(xBXm5W=12)@Er$c<ehYZLxN+OS)%)0RGINm)<8|&Ni<yvA9*bBX
zBpb1JL{+~BC$d>eO(drpJ3CG&$BI|%m6I5Hw@jk1Jn7B$YCh_t(CHZ?IO5{;^lAJ(
zLC^w9C339wb!Y>2DkIx(3$irO9SsJK4ib(1Z`;K{#`>?<|FC)NPl7*pM*NM|0PRA1
zQ>OiApTwWgKh?{BL(gFUg8ui_`V;)8EcI`&GQxi;4F4@s{S*DC0Q7G(1!#N6-}=OV
z%R&Ey|0!tv8=eeGC;tuqkG%0uf<F~}e-rG1blh)N`%};NC&Qol>%SSONd9BI|0kRM
zC(ED7wZB<r$p1%{zfx^~vizA+`J090{r|}FSAykFmOmdaf3xVa{NrKsC-Bek`!~>)
q^?wY;e-iu|F8(HfV*jiE|4Z;tl7#|A{I?xJhyZy|sG;Y0EBzm7{jT=_

literal 19596
zcmZ^KV|XA@lXYy{p4fIKwkEc1+s?$cF|lpT#I|kQ$(PKF-FLs;{?WQsbx)mh`}Wh-
z-Exw^AW#5bONh+&_pj~07Rb*XLmLA*dmCE^I=N3Xl+P0of6Z!Zz!ts(0sz<p0RSNU
zcQZX(TUu8u%gneQ319{k;RpW@gjyRWE8zmeAAv;YIHnDfVX16Zx}=zK3%Pe1nuh|H
z@T-R#zaBA-jdNs#HJjV$9`^USA+jyQGZZ)58%c`-yoDe!j1;Vr7&<bt?_|iY$fYSA
zEC`A;E***B;9a*ZA>mkDktidgkDzLBnD*zOppIU8SbK)bsYrsSXi~=*D|F50S^)wq
zF@y(TSRqJ*>Q&C6%vRVdFS`m$lvuL6(riWms2|PL_SMB<G{S`$iL>El@gL|ioF<+)
z2Hh`&%nv}TY&HD^PC^%&^lk{hu}urEVa!Ny(!d7qbI!v>j!Q<%wDa+GcB}`9vO{}&
z%nw@Em)*r;u=o+&2RQMMAVzrgw*0|)6;2Wh=>mvu?S>Wnxw1VvFHE@B2;D5GrJDS{
zCkt3OUIo53;*r5OxcFd#sPms}-~av%s`$ya$tT;0|DA0E8+)TKx+CNGW%}q5gdfCK
z39or;;?Y2q<0O1r^N^5)37nI7q-l`IYPFVef<VzJplKEC;4%teOLYBqBM&~@-kI#V
zmrz<Bpq2-Jd?SZ}xc+|iLoD&zp&suYU5p8}g8yEjKggW!Bnb~sn47R$$(Bqqswq{E
zYg7Tuo|1pkyfTVGU$PuMGJA;qcHS&#-FYdKCc}F#V>cq<K{LPzNF5B@_pakG(`b}-
zQDv&kCV(D>T{+)cun_)GpLR4yy?0B~M8CS2#>!QxmuA7n&CJC%Hj(>3cka05qE;};
zeEqU@D=*H9I;F-cIC5CfGA&@nl9}^TM%Y8c?x;KbEoyq>5t%JkbR?l=oNHd~XN@aW
z53(-$uNp41&Y^Iyy$&^|Z2kOhI9eN{#pID<%=bf!@Ix7aj*cRbZq`|R$2R)TLHhXH
z+5Mq$(43t(2=Ko3V_vT;(>DJpLEbxACgrCDh@TQ5|Az!l4vscfUm_I7N=x>AM+m;~
z{~$c&S&Al=ZX}@?6XoYtt-hz!t1|;$TB~j=LA&Z|D^$MP6pm)PxxO5gYV|}7aM@kH
zl`sYXhj7(ooV&Nr_A5SHmi0BUBwjI=i!<5mhgIEgVmlHki?vvB*x?9d#$;FJq9tHH
z?G`owVoS1~rl7$(k?061J#qg-^_#S(FbE`<!!W)^;&BHZPSf>g{u@=T(y671^UBPj
z<IOFy_cf>93oEdVlA^LwAW`;g{VhXpkgN&wy}UmHg;+~b*)y)|4u_O48TOWP7nMtt
zQ~ggpPX^@!(BJjpCWgX4={3>G#kWgEDm6s)v=kpPcTl_s6kO*XiziPh9X}p7?=8uU
zf0p1c<1X;MLWGA#i*IIOZMuc-Y*=nSsVD2~<9|{6zfU>9pVYcq{+(?8cLTV87`WKj
zTR4~+89DyXiOII?^~@Or03habR>JuIZ2!72{JmmQB;(WB5qvK+8b18C@#q)-MAv49
zkKI%&l~k<lj_NKIfgwP<ro6sbb-1z&8Nv}i&aNF>Hw5nX9XxQ)tcxiX*Rvld)l9Pd
zAh}ILd&qyk;CnBwsGO75)ZB?c+x_)H(D43rdw?I3q+E3u5F-K%E$lUmFRgGcT_=M_
z5^WKXRN$M1U2)y)w{UDdZzQ};r83MkqfL#&6g?w=j&?tHv6$1Ok%o{=I+1}AN&h33
z+PeGWIX0=G_TKxA#$$ofnoxOfqFY$y*73d^^_osLK79Um9s_brdJ8Mc-sD`wZXc47
zIli2`QagK(j9yc~md-4RmCv<#m~i^mdAX5gxt29fz@0%Xrh!4+!EGLPY^^!$ZpLq-
zdtsOxx01pzR=Ptuc18X}|BQHLJ~!Z}r=++Bd=?Aq0bN38ZPcOrEcz(MkXUbR)$hsn
zSm+_fYD1eG>vv@3EljDj%(1Py+KU^`Cv`Iw`=8lr4!KJYx69m7&oU~fHp{^z9eh~i
zvxPOrBFJfeppR916lFSEBRekZjRGD=80ig_-?+WarsC2B29h`@C2%c_EcxWrCBB&^
zPG9HyDgOXpmFL9ntMxg{Bu>mxl4#uW%mHaRF!NQW@H)gTcc=uenD%4viDgScuLP+0
z@o2U##UR@bq>7jYhib*5nnSPW$w>(5gjr|RzkHEXiZuTO>hAY_UH6Ygj!p6J9?yVl
z@q&IeF<)mgy~uC0s=auU&8qqwNtf0A=G0pTVRx!T-ALoAMEywVH9{mL<$BF4GW~%G
z&Tc{aSwm#`qH9~Jj;CiNp>@aS@hvCiH;^s5kQR)@l?Y3h49XTBW<9ml3**+HJ_Z)+
z<%fUlPJi1YweaCQ8dV|zed{Uhw<pyO%3pWNX2w%yK6ZMB!YV{j?vs+h<T<W3eFi$c
zYq_gKSh|u9NxU<HTBW!w>dyAph3cgU1*yjAe1NkEI2(1XV1O#KnQC!M5;{#<wG+}5
z;ui800umAsA`<T}HaO(viqU<*#BHM+>h}7N#_E@9t?}lG!$mJ~uyf*VqHV%$Vk812
zA|!GDwGg1xo9rDs04P|Z?!n)KeS-t!-Q@k`KTJsx8dJA9Yj^RuRKcNOfn$I0*w-Y4
z#74{e%7@AqOiB@0KBPekS(?WHI7{h|JXng)dU*Bxj|l8iW1zj41@$CYVE!={gu8mX
z1}M5H`Y064$PgK-`#^fGNTEWXGpjvKAa{gk1W<%P1W}MN(9=*&#FjV#5-c$Ppb;5*
z(W0aq#uEl8Mi7QD)EG!FZ!nXEkT>yDqG%;zSkhW;OE+19#~Lv@F+4GTK0rP~zF02J
zvvc2>6lm>Iogmr+=OzYH+c9dtlO#@~!;iJ6-{td2&(w==kcwQCbUx!*u%p)ITJWnC
z!;doW;bk7`v+5$&Ic=NhnG%}){gQ%`#tI&}u$oLyJHVCCAkLYfnzQxTg^})lfI`_5
zgxFOtG+VcP|9MY2PMVe6{Jft`fd8L!731HzN=d^eogT$IyZ*!XRohy8h7_ojlw!F+
zy0l0uON*UE8IWlO@~ms4CKE*La2Xxu;5zeSRm$<<N8&hMpkaHI_~vuJALkB785_gX
z9aBf^w<yK>akC+d&><=@`()2|lm<=i5gK26gxn;CCCWK!ZdqfEU%wpPfLY9;XemoC
z6)d@zAaYvlN+sxS-%?fNm63l|BjERJ3imLo-dL*eS5Sxg#!>FlHI>JT)+fITy0+xd
z?7Q#I<Tp2_A#2ljY^u~q*$zSP8Mj|kH;hmptb4|oBx(UfqLk%ryDexE+6D~q)Wnsj
z#Gl6^{DLdC(rB_ONS^~o>i_~}!}~QSL#bU+l1CCI1&$Y#EX2`Ac|6Yr6rQpQ{?k!H
z&YIer$<*j#ZHmTc`&EsqbD!ag1uY@wXqyWqnx{O6+MpK7n4Q%J_LsM)*hS!y58rmN
zu@qiVydb|zq59P|{Ky<Dse^sO+vXX%ZhA#8M`rUv{}z{0Emmg_o`m8bL&gL`F{w(Y
z%ZOwQ$ZVP4w<*ZQd35`hRtZD>nL!Y#shqe1;NU7JD0mr{-0Yy8TrE#s$c&7!m!}!l
z8+K`5XE}o|rUmgxk<i``nVmL5cbD8#q7GBVh<Y@@A3&iEhJWhT=DL>!{Dx`EFx%rX
zZwO>x1Pq(@LJEv+8t^s-kKYR&GBmBlVn~IPo+l<<5R4B&j9;20!DVRSNOeuQqOh_&
z5^E(tL+aphOH-!?AAXW+Ii?cy`K1}sXg_lku^v;byq*h_vZEPEFM1T_h5@rf_cFYh
z;iiL`#p1$KGs;C-gaW^_JsZxo189SDhPY0C=9)b=R$JA+xuz0)u*1yKK2ZRl|D*l=
zRx`V<F*#WawN-)Js1RtXi#Z-bdzP)_R(e5=L|%~XGBX^utWZI~@93#g+eeR%@g>*Q
z;Qly_s=e?GYdags_Q=y~3N^!k{lyQkQ6w2FH_GK1mFxrHKd%bcCG!MnSO5S}-T(8d
zVETJiWN6uJi=hQys4Rc@HfML&MEWVmO_9i3s2nGX)afiR+j0IP9QU6&dV25}1{<z?
z;K^7Wb!3CzBP$-R5>MSZ(eXKN%*UU5uP)KC{$tOA8(o_0^w#Y5<jnN`(8c>vixCYs
zcD!fTP@z+rew96^_j+=s^I`v{ebI%VRaT051VI;)e&sOJ?wuwDcXD9f#Hl$~HTSdg
zTJgfAOyr~WgO#KGyll?krD)ALPP%x{uxYtHq79MvjE_;|?Y3S`1^({YN~ME&jo`(+
z&3yLbvfYg*L*(V@G4~4Zq0Hq?O_rDEcgT6-jq4BJ4ePq~g^=fx7WIWT@#0{<J^Nam
z*Niq67@_b6jkB(<j`uv^u4DKIr@A#}pGLlO@n7cK>=zdoDi>8DWp^V}c`5{{vZed)
zXOpvSAxd>v8`@1wmoMJ-9&gQ4l))wVXJy6dS2ov2{Kk8#I%cUKmZcAZ7Zdr$cX!!y
zjp}T3`Lsz_ze;qv6lKS}8rF{6etL34jci+J`M}$}wF%7Fw%^lik(W8uJ;^&g1_Tz~
zb=`MwO}I}EJ^gy)%ZA}?l015NJFIb6@32aLaevtuGq~|&(vFh-xEFnUZ!*6*Ymx7G
zHD|?}vl(-~`QcUKt*GJEH^vSRuD+mwtMtLOY}H$fAMIS0efF`@hG^6ZaiQI0#Dymu
zb#b~LkGqlo!Pj-Lja|=v#xD>X1wM3B#=$dOfNVK}NP2efuG1Cm$X{mmQg_q_S5;=a
zJX9<o*$zkdUQ`wAGv(HTiGQo(vua#-G6YYv{~WqXNc0<_MU;dpWKb}~@GPTbNlo_W
z)Nx|2t4Yr*^{=Y39AyHi@esicL=tSG2N1Q4Sn&<fXvZm09a*oWgV^P?bwrXoev<4g
zQOrk_sOZkD$6b58k{f;!NiT#NPsb$Md9u-M+s^A<U6)W_c>ahdN?d#|p1Yy=X96JJ
zP}RFsF5YMN<M<5euVx}QZY%H;QRJI#&mbY`eX2Wt5Kn|50{|FLW>>oB!;=zO_fBFU
zUJ!=)=)-~Jp8|klk<(C!U<nZM8ZuZ($(gOpb0{3e`dLLW1OYOGswe90IdI>96cZV%
zCkq7*A?;_;3rp4as3XyI?{VslqnTj^@gvzd3KWG>FgjiXY0z8L$PDHnGYV@a3zE^*
zgWjNJ#_*FFLJ=qTL!SeX-1z@(4T_cC4srp|Br}|ZOeHE>wLNbKu;D|7N5(k`e2S16
zL{a8-Bcx5~It+y2Bvk0nz@DYJvF8R@UquAVfMEKG$?AA*&)xSL#EA%&X`7K6o0^)u
z1895lWzF<+iK{$5c?WZ7L-po-SpdVa*UnfZ%0xQ~yt@LK@i1_7bnt3)<O-nqFA^r=
z0VcWu;7v}#&um8{a3<Pu;C=rQ=<^}>tJBK=)yd(@iQ@~7!XGKjU4`>K*MFunJ$SYH
z8J<K&O*4HU6RNi#tA3ORvzXbQ`{6TM81kty0@z95PJ&3(!51bc0kF&%ib$M^j(>Hm
zfW?vJ!GQ;`0Ezj@>p%VeqL9LzNYq1f<;XI4-^LRCFTYQH822w2pud>jXP^UfqWsL_
z`!;j5+QMImOl;RBm!F-ND2yiN|4nCg(7*TJ95@g|_iCm7e4zvs%iQMhEdPfE`vufz
zjxp>#<4k&!kOQgM2U=U7Ng@?<%zgng#+%sKay!^_^?UmHHwQIUwavdre|i0xLea<R
z#-XkIK2G=-7uu(!Ux)jfUn&`lt$k@gVu6zZ0W1?(vv0ls6`L%Dt2YNZsxI^nue|t6
zQ?ZQdzu};FmKndq2t%I#+qs^mhVj4IQ|mv)9nVbOVT$Xr!*yKvM+o{#m*>pzk%dhF
z5-0Wkli8{N?nhwPd-nySj=7TNzvQQmv>5rXkuiVaQuvAB@*e=bZ7BXN(5F|=SCd#q
zk{KETSmq$RNL2DC8UnjvgGxuW?_WbM>0hVR|Ge`29bD8q6Rd+*)1O%v$e0-;{dF@=
z`gansC5UN-=(~Vo1%_BN=j<L3xu}ec4Kh4*!;%rYkITS%a!>=YX51km7Q-XgNK7dS
zP=xgYB1IxZqaDof08+Z#iEWT12&#x8MOWN-v*zb}ZUEhsS)-y<fs;^}Z|{3g9$JD_
zbW;Z@{ft5Cgn`O~gR_G{Ey+iKzUqFc0;WL`p@=XJbaTL)8eff}{ehU{ImstmR5hJ{
zTeYa@mJX<6^}rAXs0@W75HdZc@Bn@(QNLc{2ticDATs=LJQwx==!P;6LQp3p?zo_W
zUr0<ob3hsbQvOPXNF<!L-4Y&PmQq?C$PZOdX#0DBI?d-9Uz@)U`i3vPBliFW%1Fd5
z8fPE^RE%=)So6ap_n={hkFLTw>wp?J9=&2{virH8VWE+Rj#NZ0AOe*w35YS#v>U+*
zGWqKTK7quJK6*A>*75-S1NN;teK6CZ(Tl$j<NRO3+-Yz>X;?ugiJSVz@rD_Oy4zAG
zL2SfdoN<_L4Y!}}=dDCvWY%JMe!}dqg#T_1>LyH3PXw&C!LH-Jga@$k5BG=DkcR78
zucuf`G6eNlKX;ek!OGf=bmAm^XW&4&sMv#IQ-#TAfMO=q3xm^InUI0Xh6E?^qtl<c
z`YN@1;#MGh^4Wj-^QzWMoCwnd;Y(EawxB+zJ)@U6A^WO|J=MPo9$B5kRJ2>_#EJd+
ztDe{e_A)Tlnw5j~Qawm{`RxOwh#y8k|7m=xJ75);vf)@h=8t|D!Xu|O5|$eQZ5L=h
z!(bE!L9tQZtP=FwR-;hk4zvvGGp6vAnq2*-_%t`Ys+C|<u$EzReUV_s7n36G&6CdM
zkkJ?-V~DUNVWv9)9X6JK#n4VGL4QRGg)ad6>Z69!+);3Dj0tVoZYgUVpdb`eVBhAg
zwd6Q0cJsX9DB~Y7=CtzDgR(G>fD^S1MyDd=OhpK4p4A*3=2(b(B1?LnC3dXs%X~KQ
zsI_X9Reo9<wHlK&hhhFG`0RdeC2u9Q5);x|(**yR;ad!^TENE4B#rb!1PEKnKTQsZ
za+7bjlSMUv&_rK@o*cn++mV?9`;V!oU{yZtT&mpm{}M3xj1_p^(yC9=G-OC5+>d}%
zb|s9w$U@w*cB00S2t%PC9HUx6ili=;X(VEwo<b-*`f0Kyv??>3Bqj^MOdMJC1$<e?
z(PXg<(~K20f=K|8$)2UAGlRrh#ayz0h1fXd{EHNIIg<KBZEcf+E5*xI5)=kv734BR
z;fG2srlggjhUZFLvJz?rV!>3m{ri$*I5zKsZ*^f`0=bXT^yy@-S}U#Xi7+vyiy?ky
zVV*qYW$D5yZi~$H#qP94r?~mPHn7+UB^NOLdKjjFe#6b7ccTUq(rSV6mq)?F-n`z}
zDy=f%Suu$jWRNikQrVi=LqkVetg*6j@RiJ0R!3s1QS(V+aS}P%!SekPn!x4r#C9+0
z8r#6e7#CkDV8!&GSzCp{r|A9V(p+%*W<$$_r1=W#ySiQ&Cg!lY=@hQ(LIkMGtMut(
zY7%ol5))&>+<ac!WhExGHM3xN3L%NIL0n+b{7AOH*h<;0yO9h_pDWpyrrUWB5SpKW
zJI)j0{|?ofG>KbYKNAsk70@w=zla-zE|8o5F$}(jGD|5;Rk}4>`MP-bgh~HDrJ{4s
z?ykUuJl9?rw#$s2SJVF!%-A_kY;9sGS+D@0|Ibtt1y<h|lK|>&m8tSWF<Zc>Be7?Y
z(sbz#FwNo<wdwlJSQ)-(oVExO8wYs6`krQ#ius4@Q;qU{sE&ZG`3R5)B-VQ|0~!Vc
zK};Q;DK3n!;dr=gHXyD{%+Qy6+cNuywPJ;*EW4ax;!Ex{`9TTTrV!-|J72YVi#7Hv
zzywZ}{GjtExhrP;EI@0rigj8-<10nJR0gO~43rDqZ^q{=zRa_=X?pGPONMf<zmikT
zsCCAfrj<uVza$QbsDeG|KwW?dNdI&(j^}6ajZ14;0GQVQY8M~FNZ40<puagkS}SWR
zdT(E1oqK=HapH-V@S=6Tt9!CH?X8<2cJO#>eO7)mX!vM!<A`W{_I2`#a(^kf^m;J8
zIj4KRyt~)i&^ZywK3nI(*37QbIg0Nt-g;V*b*cbAea{1TwbprAeK%lVe*%1O`T+mG
zE3#iNfj%p;j;2OdM*rg_P~h_m%p)iOK<8(n^{XcSZ?nI~f6J_AYGcvZEGS*XS9}rs
z>*pk61I}}n4Nu1cE-Om$jmJ~s84=hMu7+G~W3UaPBqT!onla6G-@g|sZ~u5t3nC)_
z@d283Hq<YJlZO_VV=lv#wc+87-}UesOQnKO!p#0#6CsJUcltLuj4h`jahwd+)|vN1
zuQdil*fe4u_3udAC}CHpkGF@)93IRa5O9L*aGqHR1B$wiOk9RQJrdelnBq&TQCEMG
z_UobX(0r!J^vO(nlsRW4p=o+Cc_8zPtlQqVpm8|R-z6yKx?*m!X8ly%Q}qSBKU+-Y
z@iK^(A^0CcGw-f|F~LwEA@Q;F3VgQ4-xG1`lcD8X_~+wJj&*30p(Bcnc*wG0!o{f)
z%?q974i^9@7wj_Ki|0#FpP2z*LpF<QeowV%RMn?(%mby0MxT#cx2!r{CcR5qu)2(l
z1^hbQWEniHD&IPpGj7}7W*OWpvv`!0r!rT@dBZZiY+&&$RyR}Pg3}JrmzX}n;tU^d
zkYN!;k739U(RS@nIL51YctirVB>z=dgi)vH9+s5z65+B{CLm8Djy32OJYWh@Z$&5p
zmH{Gah$P7ZzT7wXY#kh|ZzU#;9!f}-Y8i-c3<0HAayNH#q0Qri)$abj`?^(k6T+9p
z=kEA^37$WK$Sri#kLLL_R_x<>Cx7`eQ>DZEz8a0r`kwq*7k?_Ac>PYES13%^>3V%q
zM@HBAy4z%f@39Gv18eD;O`;0IcBy9j^V?=RT!aM&-sM94;Xo)m5;@o!-W)C_S2wnU
zkXJLQbAU^T;KwCk)z~>#<Gns_d_U989lM?HK*SX3&N_gmih~;B`NVXXk3DalT@wQ5
z-PoEyN&NCSbwH)KC#431^C)*Nd7`Pm0yXemq%_05t4lNjS>=G?V5+VG<|;8iGNO&x
zv*9L!8HJ-5(%{Id2|>Yf*w|HO&d{%k6INcQSUH4H_lOIF<G?GdH1UK-#d~>yGhci{
z%$JDMv8TyDPJw|^vh+s$CV@ehL#w-+Rf)GJlf18Hd__0?!uH}tyqTh*4A@J?Gs+Qh
zqakOp%#Ahpp%ii`pkz|dn**a22zPL9-;e${ZV)btKirGhVsjQ-O~(-&k3@LbyTLMr
zbaimx5|_8ZI64*Ukddq{w~xVGEISi(Z}w|&hfs~VBcJ;$=B=0}W$FG3zx#Hq72>^+
z+t<`7Xj8i9(@96C!Qg1tfXTdK)$t_r>T$u4TTga?f)Cm8hF1L?(Mta-@RlDL9lVA*
zSXAjx&RUN&JEb8h(E>9f{~r(>_`b-}S#VBC>!^W!SG7`=d{YKL-niaC_tr4p5?aNi
z(J+U6ZG*fxKuPETU1Egp`&`7N6A?Ta@X+m8Muz0-x9V)2nPS&q=xo}AGFz07=;lJx
znmF6E<)dOVTQC<CN&!83AvI#0K0g?|c$jUHS!2*CNKFHnZ)|Q}$eo6ogRDq3L#;(|
zYPd*54DjUQSX-4tvEne&EK2K?JMS`6>>y>P-Zbo*v!`8hY~N(p+U2eNl4;?uTq8oJ
zjf+_Fnsv2rl>3aor)zU_LW4Q}u_4Scr=Xle2@w_QzUsmP%$$YRR8gXGyQ2%ku_e7g
z!hFzA=t=BLiV8ACBNo(3(HAsACp}_To%b)(R;jPD$g<-uLV#%I2&C!--V04bIENR#
zw5RalmbX&&Xt-=D&(OS68O7cTYIoXK;`Vh|EWIe&USHhqy)vRLz5CH1CVXUXsI5(g
zdOw#L4!~Y4U}OU1FQ4G+cQS~+Z#fc>o}8f?5T*`gg~fTIEpHJFi@h*#CNPZM{17KH
zWt3<<g9sWoU}+dE>Ap5>QJJE_wLe;YNKI8jbCiNQvZc?k*6Tb{DjceJ*Hpon0kYif
z8X>~|z|Ug415AA-iBB|F(azg!(LPn0Nb3yboS*{Km@&#At=0}vV6<#M%VM=fVA}&*
zWwG3NHXuP_4&6Xy_i|zdxF}apy4Zr!VR17Kx~r@PEx7ahUbas7qeAxsv>WUpccgx6
z*h!?29`}2|{RhZ@mh?n>>*uRJOM0|6005txuN#Gfqno9X!&e<|P0J!0TLtqys^15L
z!yg`E0MElS)zPJ|(nYisB{vi#5QH3SWoHf1Aesx0tW7u($P5Anv;+!)JdwOY;PHE-
z975zW=^Il`-Ah<kx5v2VxCgfhZQGU6YB@^uz(EJ!0nY+Qo!wKljW`>uiQ#g<n7?O@
zW=sVCz<FB0+gR^ImBnvkeyo<~oHC93?nP=G%>3THiYgx~gIzqeKMlZrw%>FaFs#tv
zG+kUv6tSG(B070nP2nQ}U#IIbJI{MnOuCq{ID^-55%W5gN)txze(11)gGToidXquN
zcb>mGa#K5>zRS6cHJYWKBkn21JZDN)UBFw4+Gyk)=jMZDY%%t>+BoBQ7(T52>@GEY
za5o+ZH6>q=jIx^{S(k^_zz(p}IcXy<F^C2~H|B@55nrdPa@i`<jN#7La7LePCiIeQ
zy*z_PA7(397`%)A`MAQifw6LKA$$>#^f(S%A_pa;*O{o+i8uAp>A9<=%MhDgJv$h^
z9w{ZEXp(qYs7*bwIACqNN~a^k`XhA@Up~p5ZWHfmu8HaX0mMkc+p&q&{oWU}<wp>C
zJeLM`K3idWE9jgMDdO>B(wP`-4sM>*1<Y;dR$>ay$Nuj+@cLQ5^N5@{*FdG9)qJIX
zx+ONi>RscZ_^r1M(n3Z_i=(6^=TUY$oP$$MvWaNfEHBUJgSO)AE)VPZac_^hoSrVm
zS7OcmgM7P)XwO)ooFXG6aa2ioiSTYx2utHShWzjH<>Tj*5xvWcnbKIcFqh_}vcqbC
z*7|0@IG^@en6Hz?cil$IXqMYqS)*H{M$jCbNy%V2W+tQ^l9GIfOO@xa?t+3~Q}=fQ
zA<Ajt$mIQ&oekIwh`8YCL4m}tGs=P>uQalpOw3uC+!=f556y>fU^V>d8$fA#G%fh^
zs@<)wlAgBm{KTw!bO<$Xf<Vt6BXS;$)s+{`s^-Of;La_rx&eAwGW@`FNIssQrx4s7
zfH6*DsFkmW3lr8|Zi!n2RuU{hh*ucfgnquES?gni4zUb1=Xu69c3FLxI83s@!D6V-
znqAZc&W@LEm-knr^K%;aPwx$yHza*4oBJ{MZK%}rvI$3Rh2{ElUGbbXzpN2OEp5S)
zL}p<ls>KZV#w~Jk0#v3<PAY{1tE7}}-?3y_Dxsv^`HD5DQYPJ?4ahS5GuV9goifyB
z)_g3jTp7h}ilRHYi-gId+)GtFCH6Hj5OYXJJjE>8`l_L&bDN&{i3;+wCKE@`hPJx@
zBo2IculytZ-)87*zwi5CPRRI+y}#*+T&o}%llXko)Bb@Dkd&s3_4h#JT8ClO5dt>g
zzVXaaPwU6^!tyRDOiG7`26obvgWlwE$_x1JOhbqvghd-|`L30fqqNrs-XWT15>WVN
zyj-t8X}Oj)+A6TB2h6<PZH>jt2>Z7mp*vX;fLIy)fO0S;ye9i4r$qBK^DcxBioe?d
ziUy)xHeR2KDZ{x_)RVx{1ehDhf#l7X>YgbRx1Mf64vOa*3Div{31E)=mx@ah=0*fW
z-f>oBf?i{Jzl~F2QvAq6LX8d1B^Or44q)#k8*lO>^vbu0deP&A72WSY2mJ}5p&caU
zO%7YQDrcV9dT>)-3(VCl(({0(5jlz_AQVdz7)oFOf%u!-(vZqb%}NMm$x;Xe^K)y`
zP-79Thb|W?$`==!OAuB(Ep#UJ4N)~sB7SZ@fs!IA->h2RuaYUNP8-SsN@ZilYCoJT
z^~7Z}hi{RA`ULB!`K=n=849(Q;-osnS-j_+r8XegvY#lqx^_p$9rydi$UK;3$`bx<
zB;e@?1J1%}j1~{>qq(bw9h?V~C2H#hMPyY0e8_w(*ma5H1@^D*&a+7wnwL)8!yaob
zC(DJyW=B44%u%koQb*`H^<Pi{!MD15eI<?x8|v&zxi26K0!oT%kfy&A=YD>;47@dN
znL092lu<3$QlU>{80RlY86+U|tWgsVUjmOQG05y+F99*4*;9_bRi-NFVL6O32V=Id
z%%s#K=j{T#*dqIl7=`=>N}kPKJrToV$i>ec5|aQJxi!d$qTmQuu!HQNPN*}S-h8$W
z5&_&(I%FnocIxpZ;ZfG18Y~r~8hk68foiyESEjl{H6J0&Cf4bJFS}9FG$8MThg3Jy
z)KaA%-cIAg^d9^f^{Xd95^Zfap|<WZ7fXidS7gA5C#4!8)C)%tvn$6O5({BLNQ?rF
zd>Oxb49g_a4@a4Xe#C=dR=3h^)bei&@`H&<7|EK*nHn@Lq+BZM+o1=IM_ab38;#&w
zarJ^6V?wGUuZ=tq$%&uqzq{<)PgWk)NuMj*{+pG1U7F9;X6QlFS&#aD(-}Wv)#tiL
zz1QhoV~|qF2ey9HW+i~Q^SZG(Q01`W&b?j{eqwvHyhKoVxcb>+k?SIjXFK|p8M?){
zi%ny%VolJF(`9e0VCX<Ka3ahyLCa(&Y4N<yhAkz#Fl)l))vRgG-K(+{0sX1Ycu4-c
zLqq!$c%wll<Vdw)I@D5=;$X8?kmLD;+yHmklhS4m?Wh(T2##2_UqG`abnS2Yy|M$#
z>h_DOylTghsgQ+8!Nq}{vxIO<h~XTbNAJjnkniO+_Xt_1%T+iOla3|}N#6!DHgSUE
zEpm8Ds`E%-`lPfy_BX1e3^0;)^e%^2XwTnwnY(&(oIhQAtfT6Sy<83}@~s+QNmr_E
z=wf$tSfS&KWo7ZYA}vj$rJ=KYZ#Yk?88`%NKgNHWJdkxg?;Uw`K@aA5i5lLfl&aOn
ze{g4Uzqx(?&UqmP$FruJw7Hj^6%cbYkc%*#ugiQZ|E87(IejL&mklyaKtn9?eEUFM
zSd5oP=wKm*<(0rLaI?`?44)<I^FkgYXxxes>2j-Hy~JVrP@J8C<Ky;x`MBp{lRY{e
z&p|FCi|?9|$no~DA$!q*3_&u(9|53j4|>JvcHK18b|Lj)YxDfh2gJ$9A8^>-S8B|h
z>1}&)+s6ln;+TjL3$f;N{YWfl?f$|*7g@Q+xSWQ7pT$mbN-<riC<3DK4W#i&NDo|k
zp8?1NeLJkz8xg0n$4wA}O$Q0(j<RiZr;jT~pH-D?0$=Jp_>iEDCuTwKOfUzBu;oFG
zJV+FT`Z823>f`zPQBSPb00;~YDDFtKso2Mjx#i<!XpL(Iz7xXCO|#oC6-XWa8d1G9
zp>JCWasKG2>s;E#2QQTPL}`&wKuQ{q2YTd2EuSc`e|Ki5nmal;909ht$>(bOC_W}|
zKtMP&Xb3$!^YsQ!*YkRJ>nP+R`}G+aOoN>#yiQy-jB3wDOqe1THX1dh3`Iyl+@x=(
zn6h7%ClfZBBIOL(l$f5ZJQ)>FRf(`#zXPc@SD_KcU>z~BE|GSgDgri|i(OPoSEgJA
z1G`z^vLbv7<F`bLYd0(oFd?r&NAKcgN(B@H_6h2rU{PzO&{fqcdRA_GR6=`DRY)d_
z!~yDzUh2@l-gtGF^g+JTUxDyiE+1pQb_G~B!EnrlQ*c4G{Zd@dU^ybN{G<`}S^svd
zCLFr-?{HsT`TA7L3+zI*10mdW0F~1N6B_>$0OY>_NdF6f1xkyh3|{N~n&AB|&aK^H
z<ZM2Ii7XsU_9}FCt2FyOiGtpiTktnIXDgJzR5kXu9=`4Lc}LH3;>cnjFg8|;DaG|+
zESKBvQRG}bR|1a2L&5YdH_Ro}!e8)F7$KNRfd(uFH6Ygk4h`&?LS$MZ%qRUET($tI
z46;6^N|)U957g|%s<-@`kG1~AdOOhI6TP*VABJOn>g4QBh5acx@p9E4<Cv9lhXSSE
z2&@yRRl44rBbGVd!Vo@2o=6&*^tPtw;WRCI5Py!5r??d=;mUoV(_wn)J&gIODKa2N
zHRr`odseAZI`qC#0UelCB`GW^_@S2)O1!70XA04V$-R}bb4qsbL????_<1);u8Rsq
zS3(@j>T%DpxIZu2SB3Cdqb=Sk4frPiQM9pi87x7e551ApVX;}JHEK}PUC=&~P{sOr
zOBREy3J)p!EY;yNixnZd86=+khZHC8sxGayO~FQ4{dZa(rLd8WOI5QL9ox@6d&#vj
zGb8B=#SQ-&5GMNxf41%JAuL)RiMM$_D@;XBj4jx%!$XG3SHO0{VcSlV4XB?p3>KPp
z3&GNe&CIw&1BvIU<AYX9^Ud&dnb!@uf;a3ylF<ppJEhIniU7YH#+*iT5r=aGeTA@O
zOE4L$EXEG2li!;E1bokPWUn-2%mQuvVe{+GxJ<3wV%b{WOzGtJnX)8psmDrfBNbbE
z!@Tnk-F&}m(v{R!ZUmxO{}x+Qn4RzeH%1Fj6HQ2kSF_&A$Q5~9<a{1R&ifpy0?BB0
z{-I{+c8Cjg1F>9&XoqkzIVo{6hHZ$D=g!>J;JJ>xCRhuU@O%Zb5+T0H3WoLvnqLf#
z%SHXa1~(Z-^<yq{H%1c?BSN<nu((J`VmmT}Tic42!pE*<>V7Fks>@fLoeId9ot>VW
zo45S1?f~5+p^IyCB&Ry?P}5MODOrc!(=E;|*Tho0lwcc>ZTAN1_;YTBHknMdOD=9x
z0B0T`?1xw=|CrZS@d5mwPj(;?h_qu!008x3003WKhkrfUnd;da87es1n^~LueZc#z
zY2|=D;N`QC>J6SEL9JiV2K@a}Toy%Ip3y==UuX~>k>zm98W6`$wsj2{A}m8bgFI=5
z+}y;l;l4mRqlPDIw&uDh%&O%s1d?C8>KXNUMGF8Vn*H*--*E&8-nsKf(?`e~AKmz<
z6}xk(43N3o`BF`J8m1)T!ll$mMFwu0cJr^%h(huiErlkY<P2O3Bu-YB^NWvjm_q8H
z7Fr%IWusBPLbKUlNTS812gvBM!`hpNFggL@B5%Hjq!mcLI2t2Hj0$P_8t1CRmjcO|
zxFZuXQH25vt*CLJZe+=05t&?q6%HngElPqe3JalIpvLA$y_k`iT=`dI*GCeY703aF
zm4|R)9?GWAFIg3gJh~<R4L<XE7WW-pyNpO6xf7w7UQ=^%ssXB=w@bn!4Zf|Z$1f-&
zbW8n5ACvbmYn`S`yFhmB<g0sS&^IIK$w{9dv9W8&$m_6m(@ti_<;eUQ@9?Hh=g3sr
z07O&~9GD8&|0(-%vXoEP)b%n}SB&4p`)Ga8q;vLqd^6wmYJPA9z9Rck`!<&CoW;HR
zz8_ryj?d$By}3lzW%~BKg8qPa>UnoGF6#5XapAL;^>R3p`Ql^c^?3MuAo~u^)a!m=
z-XvRheq0pZ#ruludw0S-`bF2{+1f$Zlf}uF!8@n#^Ch1P9`9<)3ktDEQZU;o_ahcl
z7GKZeL-xj7izXIYkRNFDk@JM9Q(qCb{hUwROXv4`xo;GN))eUadPPwWFnn3Hrj!NE
zBiu08SQNoiF-rMq`Uv1@*TWQfJJ{c78&Z_WO2CqNMt+Np(gYA5|G=E#k#b14pBMKn
zpr9=UObOz^DNRv<;}-S&*<D97JnNr)Edi6xAEArWpg5tgP&9rUuNoG(fbLX7(8B4T
z{G#2h$PaoItI$JnY5I~5KJl449j{yS8z0{(_jBXj<%aT;cOXjp;{%^>DIM=y<=vX#
zBo43!eD*2#yTb|m$5m4spT*h-k1JIu#lSV;#=A_^^81!Wckc}}2>ar1>frYL(TS$>
zkCXeE35{$Xu+907E=xYI)#tsA`GpO8!p8$T^#{JBS4*C!PN$2T%$I0F=ZTNo-#7TY
zuJ?O<enTr8KK_7ocwVbp2cQ!?<3B`{T&Us@Ho+m;^Vt)BZ_5<56BMQ!4}Xhfz3ACs
zMK=F7Q5J@1wJ)}HXb<MOjJy5)EwM-GJ1h68Tk;gj5`E}IKM(&L$^cUI*`%F9mZ5eC
z7==z10?QuY_sQyCn$^3|$?ya}h$u$3Jc*3qi-}$ySIt#ko4*TXUwf2zhf8xeZ`XKX
zUDP66y{m^dYwkTP2n2OQ=jK22;beQP85iqdqc`e01gNKb4nW?;<MAbCX=rrBb8y5i
zalSlPOd1p`d5#z}YP99gA7-8oz0U7jpjXnOtZS$#UMWSWBKx8mJp`xW;p1o5O&jUi
zux<pdweZ`F_uN~uVnwpBbTqo_OaB(-lqmOA91O<q@OWFcznzb4_>)#!wB8<%{F9xJ
z_W`TPBte<Ysw|N2=&D8Gbt5u@(F0jBb1|*&=Y^DIA=_R&cB{RI_7eqO6$M*O#%&n$
ze*1n~qV)0A(iz|@>J!cTFPC6vhmB=)J<78~P7X|fk%5ok&sS)cjOQxAnQ^z8?$!#$
zD2!G`Y&_|F_bVH0Z^12U%+&aKsq6hRVP3|HVe)+nqX#?Fo_RZ0Y+*y`IQs6$oOBpI
z$nK-iX-6Eqqy#*`musd;z$0@Nx-(AXV3$GG>p6+(flg?Now2PH{rV?=mK4|q3Ue&+
zWGDPs*XR)9M~6~r=$lNwCs0oCz8x<;L}$Ofn^cf>^_$!>xRB)B7uH1ao}R+hY~{mK
zF!cJOPyqaDqUrk8(D`2NbRlWhSztc%YCL2=W~m%xkgIHs{;<0ERz#yYZ()eZoKLx;
zJH@sZgTWT+k-m69(^QV6+);t0<o03g_)4zQ*j``lYQ%CZo3T<y;-ppOR(R7-w^C&+
zpHMxi5^29j&JaS;WR|R_B9`B6^D7uwxuf-=b2_<~h6TL>%8u+ge`&w9k0!^f(-`S|
zXaGXb!&1275JLkFLVkpQbWt9Pns`xoh;_Er+Vxw5kmiKG`MqZlmD1d0hD!Btl>THO
zIeJ<DJ)&)i_YL_s$)J-e0KNhJ$DO2#@a5S|FXw_W4!mCZ#i{V$rFvmA!Yi2@4o^_O
z3XeN=$fW7y(#4Z3PV8H%#{(tnNcm8u;Ug$DU91E$<%;*Tgf$&Ny<c5nNCl?_OM0sc
z1pB5-1~Bp<Ofx2{Kw8Cv!D^;8ABWnF<=JBeC+^JDsFqsgvE7Ybw)8peP-mEeMSVD!
z9ZRWi2j>Wn#45T4<q8>Ud*BAbO0Csfy(CQr?R>F=&)}rv*CVIPyC?Wa7`2v(2J9vA
z-7$X<46{<tj}nr-NUzw3pp!L+*PDYDd=|0v7*JDR<iBH<_yudr!b{^)LA3i)%FNin
z!48xl%+zQ~yVgPZJC(u2GcaSE*Z;^~WR*i&BCYT+F;xxaw$i8^urc@OgoQ1GPldr|
zuulgR?9(t=Z!H72qW!UOUE&Ng-#3W}dIE!(w|!DBktl-d(S@SU8jXe8rEIo>5p*CD
ztO7AD;QUr)cygYilz4!KXyKq#pq`av>0DKa9OdzFCRfVr=|;jllP4nPrq4pizm4GU
zU)KXLOz?*rP~QW!Aza8@UnsUwkw(=q?4VYK^7mOmET>6taS|{5#C`9UJPq6N4GeuV
zVk1F0<xykBoX_mI3O=7VV=zd*sNi7((T`c$+AUUYQ9Mi%fe<comY*!Qz7Sm!qk-?X
zINuGVrKhlz$MTkcKV{<xx|XO`mrN?M<T-8>F>fYO=x<YfS54Fq)M$1uw6!{lJP9hK
zN>(({d8g=QUs#c=fppr0&`MP{K*LJi(|8Bl?Omv`o2iOQ|JYPJhf*;F7R=_AvBL*V
zg^?GFlowQHCtuMP90WASO|aX`k=`pNtz%luy42HThldSv3m3;I#d=hLq6+`T{e8)R
zn9L({G6*S$yxSHX1JB;KzH6WG7T{bU2<OC84O|!w+FgPPWvz+%5zddSC)`M-*$P2~
zC>IDvs32_VRFO4#$js`QpiN1^(U4S2!7isJ2-lp*341xzLl+5Sh$5-%Fp48_0L&g$
zJSt55x8nEktr*ZU5Zv7G8Zi>VhTB}g-B~jyfdDhzEQ2-DI9)MK2N<az^MdIVS#(0g
znU45+BYzGtE5&mR3Z!RhnVvLxt+R#iixlnJJ;@?km9ymt3%6;3N=_rv6q0DzR^?cY
zi^3J2oR=-6r@vu3IlrNIev2DJgo`p?aT2u+lx%J6&dmCmx5+-p7`0A)NirP1*fcT-
zM{JWImJ1Vi6OahlhzS`i4~7^{s0`&O{Tw#Rm=1|UY6$ksNv6r(x-QOme|0Ll(hI>7
zN>pytZ89s+5+umJ1c*4yAY*|8i;eE;M1uI@t~F3<G^0Ui*;WDxp#I&`pM-l(s%mRF
z%eq&mWyV>xk(qtkRebINxitE|bN?x&0}@WAw@1UOK+*p_f77u;cA7Ge#XW3hSBA<_
zJ#&Rs)10@tY_BfWGon1gd(3IAGUr{lv7t*s&_@T;lmLu=D2-4W{PQmi$fp9eGe%91
z3j9NZk(Lq)BM!FApFQ-~mHcT0T##Bk92FeoWZ=@E`bvG{F-n4kvx==3Ay!OQ+J)iB
zrSfq~hbzw060oM4Z+X!dvv7Qz;X+V?wAra%9lbIi1`P9WKV!;8yXER0tfKWyaxx1%
zmKdDkQ;$GOY0~aiu^q`}U?B!B?RO2Wz|n0%w8LxTvh4642UWDDOM(Vpbkz`KN(?Xu
zm)wj<>-lOS2!Fp{vuu$GcqEDYf{_@Ch!d=~a>`eWPx_#wD3V|{Cl2_wv<hRHoG1<%
z)y^=$geT+dGD~Heh*NTdQ=K?Vq#_{8U{2==5Xi#9-r0k*Rag^{<%zS?K?j#T=jyc|
zaTK7$fUO$20WO0InbR|y^#n{dccCo1qx-Os!zN`waDZ|Z8YeIWgG~tBBc5uDU!bYg
zwSL3V)IzKQBJ1$iRT<r+KoWL8AG+LCM-Y!Ga<w8OCR4F5?USysawzFPa)jB9y$Bw;
zY-;J^CYb2)*n2JAv%WlS=_Zu16D42Ie{LO47>E$A&E@Lk@kGpp6@I)02f*aW9e%C=
z;mpS$wr{ik24k>IC<e{UlbJ|_gl!Rt<Fq261b3te@<Au;zs-5KDoB}H2_SCXSaG#`
z+b_1Y=g@-KW1(@kYH01=kFaGJiIkU=o-vGrMX)ih<)hF%(y}`1h^?I=E}KK@-m>~_
zi%_%j*Wx92(eL_H3J1~!QiD@oiOx}8xgA^lr+rIsTS!(?_i+SxyQm>4kZ1=8O43I;
z#-~?ZLX4+Yex?n9xAQ$u(OmGu)rvZ_x2yhk{0syiSZpKZ8;~DT{eRBz5A10l5@j4u
z5StxLV-NC{oCI30Zob<Rli@uJL{O&XS%6n(Hi?0_#~&=um*q-~%C&(_=eW0Twp<Do
z*C!gk1jng()#2<olWag`vIal59eW(hu821p%E1Clj|vIDW?H`DNgkclY6&}v<X+L}
zIa_aLea?3(e->mg1IWVNrZdeiIS@%nEvSxWK=ZLz^~s3$01d^kQm~RBe#%*Wb1oQ7
zwZv_&CsMeKJ!&&++cGc=UIVH)c)a9wJtfshH9%t?u4v6J3}(UM`Ua0^cb!>7hGpJ1
ziIP}p@S}t(-2Zy^171B(4;r_)Pp@gc`VP!Q0$W-bMF$)W&I1L}QG-Nbl$g81SInrz
z)=5sg0~|uO_!JGUKk<)3XC{=h7Cf~)pIS5b$GpDX)|$)DvY;m_^!+NW7#j@I+ahfY
z3?W5jH_~Nz$+ZH_2Z5)lm79^|N*b*eIZuzP<>To&DuIwBYqHVFGx=~2=|nN`#(<*^
z*aCX6_AMDCqQ+`x6b%D2r@LqUnrc@!Be%}#P{z8`SIKGd1tm8R1>FuW7%a{n1J-^7
zMgl=Et0w2^z9-e>TU3OfD(Xi5EIp0!QS_}H56;;U-A{?JGS>;;v(KXf<F_z#QbAH#
z8x+drw6cn~ggr&eo`VI(u%b>%mdf|%pB^+thK#SmxFnRV(BFahavf|_9=0Mwyl`12
zRePS(y8N`ogc9N_IDM1ih5UscwQIE)zqbr7((&;;)y9N>!zF`^BO0A+x^UnDFIl8o
z=AjMHz7}8&6h7>6NF?Ju)Ck2q5Zv#mbELrZX8(4?(6yAEx@|bs8IOp_dd!QQ!5@ur
zVTw*fTbU59BX3?H-XKYpoRD~8LvBOUSxl`(K}}0lc0o?HP^sPADW@8Qu}`YVEz`K^
zUDXGvek<NTieKoD9IU~1!Yyx2ggG)%`E8W-s-#1nh7(oDK@=Wg-NU&&K0y?!30P}b
zRWk=qZ=id4!P48=oRV*<5_05+Q;W5HSs+9*OJb=}8aZwbbq+nw*>4KWsuy8Gb`a17
z@sY97-J%=hfWFJYQp(_5<yE=<n|2yWkG7ZT+-gC1uJKSP9De+%1~L%qrX#ZC^zq#y
z&Nb?@VffK#zzy|S3^vyV6*3=uVwTuDvc0U_N@nzcoMzureR>q*mkr<R^h2sVX-dUj
z25-bxKWq8(qpQwSzvY8NkZhtn6XEQw>ff8kDD`-DO5~l^Z{oJlXQRKQhMuXwY%Rm<
zhkw2ad)wC!$^s8u?j6PmD2=iM;GPFw94J^ii8yOxw8~C#p@vNQ`OhN)GizIIhDz6h
zDN!ec410_aGLW390T$h*Bv{KZ^3&`dKKk=WPCeKGSUlSo336^Qn#*AebcI{2Bav?A
z#Ya+U?(8iYxSICic4y=hBgm-evbk+1Bf;hT@BuuJ2`hVTER9Vr8~nZ)_yG{e?t(+i
zCmN|1ip=xa;0r9QYdgT-hQP$dR2o?7NMd5Lvh>7$<2gHPCkXEEl7qEg)mZQ%4>fWK
zgF({FbL^+!stZP&ElR`~yU^S;aVZTCE~o<8c1}mv<>i*hVr5?;r;1fyo}W)OF2~@Q
z4<?5MSuZm<%mne^PZ}3CNOk2oq}%4x4~;%>3zr1W5fOxKv{qv&e}#W2B~(BllQF42
z@iJRjy2qjX$0O*82FC=M;#a_LV1aqSz7h&1a76=tYh^BHib=aa>x7rp0_#@to&heD
z#805Gw3}q*+71lb4+w>A?IuMYp#XfjOu0r(>c%31GaY%!<lbTah>iknNywbxoAuMF
zy&9K>IUBfUY^WRYeAfPw;1-JJw44*dh6A#Aw(1bsSnC$Po<xwYtg)1p13cdE8e{<q
zPlzJyvyO9K9elx2e=5SgSM0;~t5^G_c$q4WXE#E({DO6GIUjufv-~C1Pm;v@`3E1>
z=T~s>|0sWb{h6$9V`K64X+xr-md!Ffir43ldogP5{>Z!%{DDwdc}nJGP^xV_gz>As
zN&``r3crfp9<)NDi%ytp$PTnOJjPoFoWWGuG+pU(U{x4N`1{E4289YWD+R8t@D}L8
zOsL~2kR=4fw0mO8whm+iDyAnS3$u7YVn|6WEPnD9lBx-gO9`S9*pVr4nxF}G<P}YJ
z|EL42Zpo3}=*r>Pi91Dne#U`GG~)IX`<9MVuohs7Fq-FCw;x2<$NvYE`<(1rRerS&
z$LRv(3WUiy&K}xKOdX??ru`dd|4h1m`xcOk2Thmw;U)8Kz@KjEmq9&?^=sPOM$IDg
z7t}{{tEpb=rk|?RHLsximY8u!ValzN&<KT^JZi`3R@WIvD$FZS4gpv|D6<ETxi-_?
zVI@g?6q(+ZX3FXk^X8HuNp>~g7m-2JZ1YSxpwz8%>rohK37ytEqT%e(xd?Cm5bgK+
zN2M4^8h5Lu(KD1uRqvZYo$JTy3125K|3bU9u&dz7K`nBr{}2pxPzj9+z*DWX;6K=g
z&!VIn56O!MKWiy;>_mSY%enz}zMo&Tzhg}c**$1>;JDZ5Fz&zwx9h;YeSLQIoT_xm
zYo0p-o4PAN{@~O;$`l;sT|LPlQ^!nP63Me~?V85d7PS>0vDS6YznX#t_o(k2`4e(V
zk$Q#<KC_N^TO5nMH~Ci8i^=N_Xo5BnZGxt~u`zZrUfz*8iY|NA@;krHj+wK|n>jPe
zdz5S=8s)Ys_y@OxC&WN}@xaIGCENe~O(xjqP}8wDvizSvzk9Gdw|{)LZF~+%=zrVL
zepyz;O-S_9BmBAWzd+dUly*TyTymr%I+Zr`@=cNYm?TAxQ#<SI;FqVKZ3U11f1CO9
z+3fo@DZ8cbD#RHs>)X>*8rbBJf1+$(;D?%uh`UW5YxZ7ct_&$S#IpO<r=;I4&ChJ6
z+|pB-7+Vu`!KmF@b8_azmmvWT^Q}!?ms{E=D>s|%%?rpo^~vqne7OK$o1^!%I;wZG
z6<W0J`I(=;>h-w?wlfzypEuTi5;AdaY0d=Ep5~gxH}(sBb!B6-JYQ65clv#~SNoc^
zX}Y|wF|9L#yY4)V*<YwL^LUJ(e3$Bivp*d!Uw(e*;KYm3TTXjewWuz7{a?lC_>m{m
zD(y_;J&Hc*t}e$NmeBRB*n(5x$2-a2z+k)rJe5ESNCOAAA<B^RI5>3oPClIlK7XV2
zU-OJw-P@VYoD(-|w&vXa!rWq8ClGBk>*eA4`v<#sv^~i4F>9LX_<i0**WZWzudmYQ
zxl)m?(3uqQqA1>EozaKFs@e)R*PPhI==T>6G_bV4`Iq;<ZEfmKm(t6Vg+$%ivYyo~
zH+^baHcjdN;>n&bozo*v1-!iXEIoRm-{XqahvRuqq|JI2A;EDxiTjnwmq&m0Z__`m
zc3gaZec6j2U9HP*vFY3co@%0U$SmXpTiRQX4#)WR$|%vc_OQ6(MG~)7e;+q}8)<at
zWUbEE3N>$@*`|{FYAu3$PY54h>N;tfkD0f@%ihFGsxzJ(GnhW(%NzZ){ip?EekH3?
zG4PlSSzr<cEeVcx&d)1J%`1rysVqn>js+EpTc@1PI|MqbWa=;eiQ7dD16HomV(PY=
zrW^E<eaE*Jmm~a@JgxiRA8}9>V7+v~SLOJ;>US~rY>St!o5Wh7v-V_!{@YC~JzG|p
zT<)p$iL?5yb5Nm2XsYRijiL(DyzAGqO`gi^v*K35u`Yo-%+EErFE9naK5)S&Z?9Nm
zTivO-6O0X6@4YX3zhjG91Lxd;><teea26S^clxwv$^C3~IjxkFkE;^To|;jxPHQR0
zlPUT=w-%R}wM@}^ET!YsZl2QovALz{S^0rw?|K(~T-ubPQ&S;2E2kj!>5b_2IXp{`
zhw|OnQW@u^(7SUPN6?Dsumx6qDhJzz>TKjwy<ac1teG7cdv}J#@r2UiXK{y(PR-$r
z50l}3=(v~NUtK8r^2JI4F7Ac}t4rLgt3&vX#{K44&83&8&^1qieOJhV$J?)4HE0ww
z{Yd@$=JKt`>>G8rrc|wR%{=$uxUSQ!+EtnNzpk6OYrn>w3-139HU7DJ{B8a32OkUN
z>wt4Mj7%cn`5DORJ?OCnj3@?11_dB#?}jzbAO>JRkpom1K>+9=pqb8yq6yVf^uh_G
z2V&`7xE^q3!e<#Mw?Y8OG7vyIpa|7I^dpKuiXrx?BWr^mScD!#=tu4#bWa5)Z@8tf
z19;H2qaPQ8&@KTiOc2`PhsWUaG*AZ=fIM9cJlzCX65-Q;8cI)HkcuvJhoYBpAd4WO
z#N&#j4Wq<EHwS$p5MhqK9@rd=X+d-o&=-j!ObAtgm;hfgO1$4$f#o(Zuwc6vP*Vu{
z{sn}MI>_1(yBW}(jNbf17@+3}wih|r@HrZEstp8yVt5K9hC!O)XWXE=9Q~9VkX}fv
z9l+3wIP(VG`RM0QAWX>trXXOjA-Mo-3ivb%bW_mx>my8Q;=yVPc=vvQH!HZ{5C&Eg
MlYwr2><-cg0N2vEpa1{>

-- 
Gitee


From 76a83b83526607fdb2325dd9271b4ee84522ab72 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Thu, 18 Apr 2024 15:46:10 +0800
Subject: [PATCH 049/302] =?UTF-8?q?warm=20start=E5=8A=9F=E8=83=BD=E5=AE=9E?=
 =?UTF-8?q?=E7=8E=B0=EF=BC=8C=E5=AE=9E=E7=8E=B0=E4=BB=8E=E5=A4=9A=E4=B8=AA?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E8=B7=AF=E5=BE=84=E5=8A=A0=E8=BD=BD=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=E5=8F=82=E6=95=B0=E3=80=81=E7=A8=80=E7=96=8F=E8=A1=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo_estimator/main.py |   1 +
 mx_rec/__init__.py                          |   2 +
 mx_rec/saver/saver.py                       |  43 +++-
 mx_rec/saver/warm_start.py                  | 272 ++++++++++++++++++++
 4 files changed, 307 insertions(+), 11 deletions(-)
 create mode 100644 mx_rec/saver/warm_start.py

diff --git a/examples/demo/little_demo_estimator/main.py b/examples/demo/little_demo_estimator/main.py
index 901bf23a..d8e801b0 100644
--- a/examples/demo/little_demo_estimator/main.py
+++ b/examples/demo/little_demo_estimator/main.py
@@ -68,6 +68,7 @@ def main(params, cfg):
         config_for_item_table = dict(access_threshold=cfg.access_threshold, eviction_threshold=cfg.eviction_threshold)
         access_and_evict = dict(user_table=config_for_user_table, item_table=config_for_item_table)
 
+
         evict_hook = EvictHook(evict_enable=True, evict_time_interval=10)
         hooks_list.append(evict_hook)
     create_fs_params = dict(cfg=cfg, use_timestamp=params.use_timestamp,
diff --git a/mx_rec/__init__.py b/mx_rec/__init__.py
index bdb85131..d7f4ae82 100644
--- a/mx_rec/__init__.py
+++ b/mx_rec/__init__.py
@@ -24,6 +24,7 @@ from mx_rec.graph.patch import patch_for_dataset, patch_for_chief_session_creato
     patch_for_assert_eval_spec, patch_for_scale_loss, patch_for_session
 from mx_rec.data.patch import patch_for_dataset_eos_map
 from mx_rec.optimizers.base import patch_for_optimizer
+from mx_rec.saver.warm_start import patch_for_warm_start
 
 patch_for_saver()
 patch_for_dataset()
@@ -34,6 +35,7 @@ patch_for_assert_eval_spec()
 patch_for_bool_gauge()
 patch_for_optimizer()
 patch_for_session()
+patch_for_warm_start()
 __version__ = "5.0.RC2"
 
 
diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index d776b699..dc545822 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -67,7 +67,7 @@ class Saver(object):
         ("prefix_name", ClassValidator, {"classes": (str, type(None))}),
         ("prefix_name", OptionalStringValidator, {"min_len": 1, "max_len": 50}, ["check_string_length"]),
     ])
-    def __init__(self, var_list=None, max_to_keep=3, prefix_name="checkpoint"):
+    def __init__(self, var_list=None, max_to_keep=3, prefix_name="checkpoint", warm_start_tables = None):
         self.max_to_keep = max_to_keep
         self._prefix_name = prefix_name
         self.var_list = var_list
@@ -75,11 +75,12 @@ class Saver(object):
         self.local_rank_size = get_local_rank_size()
         self.local_rank_id = self.rank_id % self.local_rank_size
         self.save_op_dict = defaultdict(dict)
-        self.restore_fetch_list = []
+        self.restore_fetch_dict = defaultdict()
         self.placeholder_dict = defaultdict(dict)
         self._last_checkponts = []
         self.config_instance = ConfigInitializer.get_instance()
         self.build()
+        self.warm_start_tables = warm_start_tables
 
     def build(self):
         if self.var_list is None:
@@ -175,7 +176,7 @@ class Saver(object):
         logger.info("======== Saving finished for rank id %s ========", self.rank_id)
 
     @performance("Restore")
-    def restore(self, sess, reading_path):
+    def restore(self, sess, reading_path, warm_start_tables=None):
         logger.debug("======== Start restoring ========")
         if not check_file_system_is_valid(reading_path):
             raise ValueError("the path to save sparse embedding table data belong to invalid file system, "
@@ -185,11 +186,10 @@ class Saver(object):
         ckpt_name = f"sparse-{base_name}"
 
         reading_path = os.path.join(directory, ckpt_name)
-        self.config_instance.train_params_config.sparse_dir = reading_path
         if not tf.io.gfile.exists(reading_path):
             raise FileExistsError(f"Given dir {reading_path} does not exist, please double check.")
 
-        self._restore(sess, reading_path)
+        self._restore(sess, reading_path, warm_start_tables)
         logger.info("sparse model was restored from dir '%s' .", reading_path)
         logger.debug("======== Restoring finished ========")
 
@@ -283,6 +283,7 @@ class Saver(object):
                     sub_dict["optimizer"] = optimizer
 
     def _build_restore(self):
+        # 这里build_restore的地方不变
         for var in self.var_list:
             if global_env.tf_device == TFDevice.NPU.value and "merged" not in var.name:
                 continue
@@ -294,7 +295,7 @@ class Saver(object):
                                                                       table_instance.emb_size],
                                              name=DataName.EMBEDDING.value)
                 assign_op = var.assign(variable)
-                self.restore_fetch_list.append(assign_op)
+                self.restore_fetch_dict[table_instance.table_name]= [assign_op]
                 optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(
                     table_instance.table_name)
                 if optimizer:
@@ -313,10 +314,30 @@ class Saver(object):
                 if sub_optimizer_placeholder_dict.get(key_state).graph is not state.graph:
                     continue
                 assign_op = state.assign(sub_optimizer_placeholder_dict.get(key_state))
-                self.restore_fetch_list.append(assign_op)
+                self.restore_fetch_dict[table_instance.table_name].append(assign_op)
+
+    def get_warm_start_dict(self, table_list):
+        placeholder_dict = defaultdict(dict)
+        restore_fetch_list = []
+        for table_name, v in self.placeholder_dict.items():
+            if table_name in table_list:
+                placeholder_dict[table_name] = v
+                restore_fetch_list.append(self.restore_fetch_dict.get(table_name))
+
+        if not restore_fetch_list:
+            logger.warning("no tables can be warm start restored.")
+        return placeholder_dict, restore_fetch_list
+
+    def _restore(self, sess, reading_path , warm_start_tables=None):
+        # todo:这里增加新的参数，table_list
+        # 根据table_list去改造
+        if warm_start_tables:
+            placeholder_dict, restore_fetch_list = self.get_warm_start_dict(warm_start_tables)
+        else:
+            placeholder_dict, restore_fetch_list = self.placeholder_dict, self.restore_fetch_dict
+
 
-    def _restore(self, sess, reading_path):
-        for table_name in self.placeholder_dict:
+        for table_name in placeholder_dict:
             optimizer_instance = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
             if optimizer_instance:
                 set_optimizer_info(optimizer_instance, table_name)
@@ -331,7 +352,7 @@ class Saver(object):
 
         restore_feed_dict = defaultdict(dict)
 
-        for table_name, sub_placeholder_dict in self.placeholder_dict.items():
+        for table_name, sub_placeholder_dict in placeholder_dict.items():
             load_offset = self.config_instance.hybrid_manager_config.get_load_offset(table_name)
             fill_placeholder(reading_path, sub_placeholder_dict, restore_feed_dict,
                              NameDescriptor(table_name, DataName.EMBEDDING.value), load_offset)
@@ -341,7 +362,7 @@ class Saver(object):
                 _fill_placeholder_for_optimizer(optimizer_state_placeholder_dict_group, reading_path,
                                                 restore_feed_dict, table_name, load_offset)
 
-        sess.run(self.restore_fetch_list, feed_dict=restore_feed_dict)
+        sess.run(restore_fetch_list, feed_dict=restore_feed_dict)
 
 
 class NameDescriptor:
diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
new file mode 100644
index 00000000..53324b06
--- /dev/null
+++ b/mx_rec/saver/warm_start.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import logging
+
+import six
+import re
+import os
+from typing import List
+
+import tensorflow as tf
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.training import warm_starting_util
+
+from mx_rec.util.log import logger
+from mx_rec.saver.saver import Saver
+
+if tf.__version__.startswith("1"):
+    from npu_bridge.npu_init import NPUEstimator
+else:
+    from npu_device.compat.v1.npu_init import NPUEstimator
+
+class WarmStartController:
+    _instance = None  # 类属性，用于存储唯一的实例
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(WarmStartController, cls).__new__(cls)
+            cls._instance._warm_start_dict = {}
+            cls._instance.table_name_to_prev_table_name = {}
+        return cls._instance
+
+    def __init__(self):
+        logging.info("start to build WarmStartController.")
+
+    def add_element(self, path: str, table_list: List[str]):
+        """添加 path， table list"""
+        if path not in self._warm_start_dict:
+            self._warm_start_dict[path] = table_list
+        else:
+            self._warm_start_dict[path] += table_list
+
+    def add_table_to_prev_table(self, table: str, prev_table: str):
+        self.table_name_to_prev_table_name[table] = prev_table
+
+    def get_elements(self):
+        """返回dict中的所有元素"""
+        return self._warm_start_dict
+
+
+def patch_for_warm_start():
+    estimator_lib.Estimator.__init__ = patch_estimator_init(estimator_lib.Estimator.__init__)
+    warm_starting_util.warm_start = patch_for_func_warm_start(warm_starting_util.warm_start)
+    NPUEstimator.train = patch_for_estimator_train(NPUEstimator.train)
+
+
+def patch_estimator_init(func):
+    def wrapper(*args, **kwargs):
+        warm_start_from = kwargs.get('warm_start_from', None)
+        if warm_start_from:
+            kwargs['warm_start_from'] = warm_settings_filter(warm_start_from)
+        return func(*args, **kwargs)
+    return wrapper
+
+
+def patch_for_func_warm_start(func):
+    def wrapper(*args, **kwargs):
+        ckpt_to_initialize_from = args[0]
+        if isinstance(ckpt_to_initialize_from, (list, tuple)):
+            vars_to_warm_start_list = kwargs.get('vars_to_warm_start')
+            var_name_to_prev_var_name_list = kwargs.get('var_name_to_prev_var_name')
+            results = []
+            for i in range(len(ckpt_to_initialize_from)):
+                results.append(
+                    func(ckpt_to_initialize_from[i], vars_to_warm_start_list[i], var_name_to_prev_var_name_list[i],
+                         args[3:], **kwargs))
+            return results
+        else:
+            return func(*args, **kwargs)
+    return wrapper
+
+def patch_for_estimator_train(func):
+    def warpper(*args, **kwargs):
+        hooks = kwargs.get('hook', [])
+        if WarmStartController().get_elements():
+            hooks.append(SparseRestoreHook())
+        return func(*args, *kwargs)
+    return warpper
+
+
+def warm_settings_filter(warm_start_from):
+    # condition 1: 原始入参为settings
+    if isinstance(warm_start_from, estimator_lib.WarmStartSettings):
+        # mx_rec 定制 warm start的写法, 定制写法的策略应该和原始warm start的过滤策略不一样
+        if isinstance(warm_start_from.ckpt_to_initialize_from, (list, tuple)):
+            out_setting_list = []
+            logger.info("According to warm_start_settings, warm start will load from more than one checkpoint path.")
+            warm_start_settings_list = _build_warm_settings_list(warm_start_from)
+            for setting in warm_start_settings_list:
+                filter_setting = _warm_settings_filter(setting)
+                if filter_setting:
+                    out_setting_list.append(filter_setting)
+            # 这里out setting list 必须要revcover成warm_start_settings再返回
+            if out_setting_list:
+                warm_start_from = recover_warm_settings(out_setting_list)
+                return warm_start_from
+        # 原始写法
+        elif isinstance(warm_start_from.ckpt_to_initialize_from, (six.string_types, six.binary_type)):
+            logger.info("According to warm_start_settings, warm start will load from only one checkpoint path.")
+            filter_setting = _warm_settings_filter(warm_start_from)
+            if filter_setting:
+                return filter_setting
+        return None
+    # condition 2: 原始入参为str
+    elif isinstance(warm_start_from, (six.string_types, six.binary_type)):
+        # 这里还有一种类型是：str 这种类型相对比较简单，传递就好。但是在这里要调用以下controller来指定一下sparse的地址和表名，
+        # 这里可以单独写函数
+        table_name_list = get_table_name_set_by_ckpt_path(warm_start_from)
+        WarmStartController().add_element(warm_start_from, table_name_list)
+        return warm_start_from
+    else:
+        pass
+
+
+def recover_warm_settings(setting_list):
+    ckpt_to_initialize_from_list = []
+    vars_to_warm_start_list = []
+    var_name_to_prev_var_name_list = []
+    for setting in setting_list:
+        ckpt_to_initialize_from_list.append(setting.ckpt_to_initialize_from)
+        vars_to_warm_start_list.append(setting.vars_to_warm_start)
+        var_name_to_prev_var_name_list.append(setting.var_name_to_prev_var_name)
+
+    return estimator_lib.WarmStartSettings(
+        ckpt_to_initialize_from=ckpt_to_initialize_from_list,
+        vars_to_warm_start=vars_to_warm_start_list,
+        var_name_to_prev_var_name=var_name_to_prev_var_name_list)
+
+
+# 处理定制的warm settings, 将warm_start_from进行校验
+def _build_warm_settings_list(warm_start_from):
+    # 这里可以修改一下传参，用参数解包来做，更加简洁高效
+    ckpt_to_initialize_from = warm_start_from.ckpt_to_initialize_from
+    vars_to_warm_start = warm_start_from.vars_to_warm_start
+    var_name_to_prev_var_name = warm_start_from.var_name_to_prev_var_name
+    # 类型校验
+    for params in [vars_to_warm_start, var_name_to_prev_var_name]:
+        if not isinstance(params, (list, tuple)):
+            raise ValueError("If you choose to load from multiple model paths through the warm start option, "
+                             "then the parameter type in the warm settings should be a list.")
+    # 长度校验
+    if not (len(ckpt_to_initialize_from) == len(vars_to_warm_start) == len(var_name_to_prev_var_name)):
+        raise ValueError("If you choose to load from multiple model paths through the warm start option, "
+                         "then the parameter list list should be the same length. ")
+    warm_start_settings_count = len(ckpt_to_initialize_from)
+
+    warm_start_settings_list = []
+    for i in range(warm_start_settings_count):
+        tmp_settings = estimator_lib.WarmStartSettings(
+            ckpt_to_initialize_from=ckpt_to_initialize_from[i],
+            vars_to_warm_start=vars_to_warm_start[i],
+            var_name_to_prev_var_name=var_name_to_prev_var_name[i])
+        warm_start_settings_list.append(tmp_settings)
+    return warm_start_settings_list
+
+
+def _warm_settings_filter(warm_start_setting):
+    # 将settings里面的稀疏摘出来
+    # 要考虑名字有对应的场景
+    vars_to_warm_start = warm_start_setting.vars_to_warm_start
+    var_name_to_prev_var_name = warm_start_setting.var_name_to_prev_var_name
+    vars_to_warm_start_res = []
+    # table_name_set从路径里面去获取
+    table_name_list = get_table_name_set_by_ckpt_path(warm_start_setting.ckpt_to_initialize_from)
+    # 稀疏支持以下格式： 1.str(支持表名) ； 2. list[str];
+    if isinstance(vars_to_warm_start, str):
+        # condition 1: vars_to_warm_start : str(正则表达式、表名)
+        # 表名
+        matching_tables = [table for table in table_name_list if re.match(vars_to_warm_start, table)]
+        # 如果匹配到了，那么这个warm_start_settings对于dense部分就是无效的
+        # add WarmStartController(path:table_name)
+        if matching_tables:
+            warm_start_setting = None
+            #add controller to set sparse
+            WarmStartController().add_element(vars_to_warm_start.ckpt_to_initialize_from, matching_tables)
+        if vars_to_warm_start != ".*":
+            return None
+            # path: embedding_table_name
+        return warm_start_setting
+    elif all(isinstance(v, str) for v in vars_to_warm_start):
+        sparse_vars = []
+        for v in vars_to_warm_start:
+            matching_tables = [table for table in table_name_list if re.match(v, table)]
+            if matching_tables:
+                sparse_vars.append(v)
+                WarmStartController().add_element(vars_to_warm_start.ckpt_to_initialize_from, matching_tables)
+        vars_to_warm_start_res = [v for v in vars_to_warm_start if v not in sparse_vars]
+        if not vars_to_warm_start_res:
+            warm_start_setting = None
+        else:
+            warm_start_setting.vars_to_warm_start = vars_to_warm_start_res
+        return warm_start_setting
+    else:
+        raise ValueError("vars_to_warm_start must be list or str!")
+
+
+def get_table_name_set_by_ckpt_path(warm_start_path: str) -> List[str]:
+    '''
+    Get the list of sparse table names saved under the path 'warm_start_path'.
+    '''
+    table_name_list = []
+    if tf.io.gfile.idsir(warm_start_path):
+        restore_path = get_latest_ckpt(warm_start_path)
+    else:
+        restore_path = warm_start_path
+    directory, base_name = os.path.split(restore_path)
+    ckpt_name = f"sparse-{base_name}"
+    sparse_path = os.path.join(directory, ckpt_name)
+    # 如果这个sparse_path不存在的话，可能是gpu路径，不能直接报错，只需要返回一个空的table_name_set就可以了
+    if not tf.io.gfile.isdir(sparse_path):
+        logger.info(f"under the warm start path {warm_start_path}, sparse directory {sparse_path} not exists.")
+    else:
+        for dirname in tf.io.gfile.listdir(sparse_path):
+            table_name_list.append(dirname)
+    return table_name_list
+
+
+def get_latest_ckpt(warm_start_path) -> str:
+    ckpt_path = os.path.join(warm_start_path, "checkpoint")
+    if not tf.io.gfile.exists(ckpt_path):
+        raise FileNotFoundError(f"Checkpoint file is missing under the warm start model path {warm_start_path}")
+    with tf.io.gfile.GFile(ckpt_path, "r") as f:
+        latest_ckpt = f.readline().rstrip()
+        latest_ckpt = latest_ckpt.split(":")[1].strip(' ').replace('"', '')
+        latest_ckpt = latest_ckpt.split("/")[-1]
+
+    path = os.path.join(warm_start_path, latest_ckpt)
+    return path
+
+
+
+
+
+class SparseRestoreHook(tf.estimator.SessionRunHook):
+    def __init__(self):
+        logging.info("In warm start mode, SparseRestoreHook has been initialized.")
+        pass
+
+    def begin(self):
+        self._saver = Saver()
+        logging.info("In warm start mode, begin SparseRestoreHook.")
+
+    def after_create_session(self, session, coord):
+        #这里mxrec需要适配新的restore接口,这里的策略是调用多次restore接口
+        self._warm_start_dict = WarmStartController().get_elements()
+        for path, restore_tables in self._warm_start_dict.items():
+            restore_path = get_latest_ckpt(path)
+            self._saver.restore(session, restore_path, restore_tables)
+
-- 
Gitee


From d7ed2aa49e8c464e6dc61c3e6216eb18f4e8ae42 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Apr 2024 14:32:49 +0800
Subject: [PATCH 050/302] add .clang-format

---
 .clang-format                        |  49 +++
 src/core/key_process/key_process.cpp | 535 +++++++++++++++------------
 src/core/key_process/key_process.h   | 382 +++++++++----------
 3 files changed, 550 insertions(+), 416 deletions(-)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000..f1f5b0d0
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,49 @@
+Language: Cpp
+BasedOnStyle: Google
+AccessModifierOffset: -4
+ColumnLimit: 100
+IndentWidth: 4
+UseTab: Never
+AlignOperands: Align
+AlignAfterOpenBracket: Align
+AlignTrailingComments: true
+DerivePointerAlignment: false
+PointerAlignment: Left
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: Empty
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortEnumsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AllowShortLambdasOnASingleLine: Inline
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+BinPackArguments: true
+BinPackParameters: true
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: false
+  AfterStruct: false
+  AfterUnion: false
+  AfterExternBlock: false
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+BreakBeforeBinaryOperators: None
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: BeforeColon
+BreakStringLiterals: true
+CompactNamespaces: false
+PackConstructorInitializers: CurrentLine
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DisableFormat: false
+FixNamespaceComments: true
+IndentWrappedFunctionNames: false
+Standard: Latest
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index f76f6907..58312ca1 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -15,19 +15,21 @@ See the License for the specific language governing permissions and
 
 #include "key_process.h"
 
+#include <mpi.h>
+
 #include <cstddef>
 #include <iostream>
-#include <mpi.h>
+
+#include "emb_table/embedding_mgmt.h"
+#include "hd_transfer/hd_transfer.h"
+#include "host_emb/host_emb.h"
+#include "ock_ctr_common/include/error_code.h"
 #include "utils/common.h"
+#include "utils/config.h"
 #include "utils/logger.h"
 #include "utils/safe_queue.h"
 #include "utils/singleton.h"
 #include "utils/time_cost.h"
-#include "utils/config.h"
-#include "host_emb/host_emb.h"
-#include "emb_table/embedding_mgmt.h"
-#include "hd_transfer/hd_transfer.h"
-#include "ock_ctr_common/include/error_code.h"
 
 using namespace std;
 using namespace chrono;
@@ -41,8 +43,7 @@ void KeyProcess::SetupHotEmbUpdateStep()
 }
 
 bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-                            const vector<ThresholdValue>& thresholdValues,
-                            int seed)
+                            const vector<ThresholdValue>& thresholdValues, int seed)
 {
     this->rankInfo = rInfo;
     if (rankInfo.useHot) {
@@ -50,7 +51,7 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     }
 
     map<EmbNameT, int> scInfo;
-    for (const auto& info: eInfos) {
+    for (const auto& info : eInfos) {
         embInfos[info.name] = info;
         scInfo[info.name] = info.sendCount;
         if (rankInfo.useHot) {
@@ -66,8 +67,8 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     LOG_INFO(KEY_PROCESS "hot emb count info:{}", MapToString(hotEmbTotCount));
     MPI_Group worldGroup;
     MPI_Comm_group(MPI_COMM_WORLD, &worldGroup);
-    for (auto& i: comm) {
-        for (auto& j: i) {
+    for (auto& i : comm) {
+        for (auto& j : i) {
             MPI_Comm_create(MPI_COMM_WORLD, worldGroup, &j);
         }
     }
@@ -85,12 +86,14 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     if (GlobalEnv::fastUnique) {
         int result = ock::ctr::Factory::Create(factory);
         if (result != 0) {
-            throw runtime_error(Logger::Format("create fast factory failed, error code:{}", result));
+            throw runtime_error(
+                Logger::Format("create fast factory failed, error code:{}", result));
         }
     }
 
     LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}, useHot:{}",
-        MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic, rInfo.useHot);
+             MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic,
+             rInfo.useHot);
 #ifndef GTEST
     Start();
 #endif
@@ -103,8 +106,9 @@ int KeyProcess::Start()
     // bind like:
     // 0 1 2 3 4 5 0 1 2 3 4 5
     // |  rank0  | |  rank1  |
-    // each rank creates KEY_PROCESS_THREAD threads, each thread process one batchdata
-    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF)); // 查看CPU核数
+    // each rank creates KEY_PROCESS_THREAD threads, each thread process one
+    // batchdata
+    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF));  // 查看CPU核数
     auto fn = [this](int channel, int threadId) {
 #ifndef GTEST
         auto ret = aclrtSetDevice(static_cast<int32_t>(rankInfo.deviceId));
@@ -118,7 +122,7 @@ int KeyProcess::Start()
         } else {
             KeyProcessTask(channel, threadId);
         }
-    }; // for clean code
+    };  // for clean code
     int threadNum = GetThreadNumEnv();
     for (int channel = 0; channel < MAX_CHANNEL_NUM; ++channel) {
         LOG_INFO(KEY_PROCESS "key process thread num: {}", threadNum);
@@ -136,8 +140,9 @@ void KeyProcess::InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo)
     if (rankInfo.useDynamicExpansion) {
         embeddingSize = info.embeddingSize;
     }
-    hotEmbTotCount[info.name] = static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
-                                                 HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
+    hotEmbTotCount[info.name] =
+        static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
+                         HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
 }
 
 OffsetMemT KeyProcess::GetMaxOffset()
@@ -182,7 +187,7 @@ void KeyProcess::Destroy()
 {
     isRunning = false;
     LOG_INFO(KEY_PROCESS "rankId:{} KeyProcess begin destroy.", rankInfo.rankId);
-    for (auto& i: procThreads) {
+    for (auto& i : procThreads) {
         i->join();
     }
     procThreads.clear();
@@ -192,8 +197,8 @@ void KeyProcess::Destroy()
 /// 每个数据通道的所有数据处理线程上锁
 void KeyProcess::LoadSaveLock()
 {
-    for (int channelId { 0 }; channelId < MAX_CHANNEL_NUM; ++channelId) {
-        for (int threadId { 0 }; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
+    for (int channelId{0}; channelId < MAX_CHANNEL_NUM; ++channelId) {
+        for (int threadId{0}; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
             loadSaveMut[channelId][threadId].lock();
         }
     }
@@ -202,8 +207,8 @@ void KeyProcess::LoadSaveLock()
 /// 每个数据通道的所有数据处理线程释放锁
 void KeyProcess::LoadSaveUnlock()
 {
-    for (int channelId { 0 }; channelId < MAX_CHANNEL_NUM; ++channelId) {
-        for (int threadId { 0 }; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
+    for (int channelId{0}; channelId < MAX_CHANNEL_NUM; ++channelId) {
+        for (int threadId{0}; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
             loadSaveMut[channelId][threadId].unlock();
         }
     }
@@ -229,8 +234,9 @@ void KeyProcess::GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf)
     uniqueConf.maxThreadNum = GlobalEnv::maxUniqueThreadNum;
 }
 
-void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
-                                  const unique_ptr <EmbBatchT>& batch, ock::ctr::UniquePtr& unique)
+void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize,
+                                  bool& uniqueInitialize, const unique_ptr<EmbBatchT>& batch,
+                                  ock::ctr::UniquePtr& unique)
 {
     uniqueConf.desiredSize = static_cast<uint32_t>(batch->Size());
     if (preBatchSize != batch->Size()) {
@@ -272,7 +278,8 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch = GetBatchData(channel, threadId); // get batch data from SingletonQueue<EmbBatchT>
+            batch =
+                GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -285,7 +292,8 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
                 break;
             }
             LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process with fast unique cost:{},"
-                                 " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
+                                 " get data time(ms):{}, batch name:{}, channelId:{}, "
+                                 "threadId:{}, batchId:{}",
                      getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
                      batch->name, batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
@@ -293,14 +301,13 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
             batchQueue->PutDirty(move(batch));
         }
         unique->UnInitialize();
-    } catch (const EndRunExit &e) {
+    } catch (const EndRunExit& e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
     LOG_INFO(KEY_PROCESS "KeyProcessTaskWithFastUnique exit. rank:{} channelId:{}, threadId:{}",
-        rankInfo.rankId, channel, threadId);
+             rankInfo.rankId, channel, threadId);
 }
 
-
 void KeyProcess::KeyProcessTask(int channel, int threadId)
 {
     unique_ptr<EmbBatchT> batch;
@@ -308,7 +315,8 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch = GetBatchData(channel, threadId); // get batch data from SingletonQueue<EmbBatchT>
+            batch =
+                GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -320,43 +328,46 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
                 break;
             }
             LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process cost:{},"
-                                 " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
+                                 " get data time(ms):{}, batch name:{}, "
+                                 "channelId:{}, threadId:{}, batchId:{}",
                      getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
                      batch->name, batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
             auto batchQueue = SingletonQueue<EmbBatchT>::GetInstances(queueIndex);
             batchQueue->PutDirty(move(batch));
         }
-    } catch (const EndRunExit &e) {
+    } catch (const EndRunExit& e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
-    LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId, channel, threadId);
+    LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId,
+             channel, threadId);
 }
 
-void KeyProcess::HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <KeysT>& splitKeys,
-                                 vector <int32_t>& restore, vector <int32_t>& hotPos,
-                                 vector <vector<uint32_t>>& keyCount)
+void KeyProcess::HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys,
+                                 vector<int32_t>& restore, vector<int32_t>& hotPos,
+                                 vector<vector<uint32_t>>& keyCount)
 {
     TimeCost uniqueTc;
     if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
-        tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch); // 按存储dev id切分并去重
+        tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch);  // 按存储dev id切分并去重
     } else {
         if (rankInfo.useHot) {
-            tie(splitKeys, restore, hotPos) = HotHashSplit(batch);   // 按存储dev id切分并去重
+            tie(splitKeys, restore, hotPos) = HotHashSplit(batch);  // 按存储dev id切分并去重
         } else {
-            tie(splitKeys, restore) = HashSplit(batch);   // 按存储dev id切分并去重
+            tie(splitKeys, restore) = HashSplit(batch);  // 按存储dev id切分并去重
         }
     }
     LOG_DEBUG("uniqueTc(ms):{}", uniqueTc.ElapsedMS());
 }
 
-bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique,
-                                                    int channel, int threadId)
+bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch,
+                                                    ock::ctr::UniquePtr& unique, int channel,
+                                                    int threadId)
 {
     // tuple for keyRec restore hotPos scAll countRecv
     isWithFAAE = m_featureAdmitAndEvict.GetFunctionSwitch() &&
-                  FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
+                 FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
     TimeCost totalTimeCost = TimeCost();
     TimeCost fastUniqueTC;
     UniqueInfo uniqueInfo;
@@ -365,11 +376,11 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
 
     // 特征准入&淘汰
     if (isWithFAAE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(
-            channel, batch, uniqueInfo.all2AllInfo.keyRecv, uniqueInfo.all2AllInfo.countRecv) ==
-            FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, uniqueInfo.all2AllInfo.keyRecv,
+                                             uniqueInfo.all2AllInfo.countRecv) ==
+         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
         LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
-            rankInfo.rankId, threadId, channel);
+                  rankInfo.rankId, threadId, channel);
         return false;
     }
     std::lock_guard<std::mutex> lock(loadSaveMut[channel][threadId]);
@@ -383,7 +394,9 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
         LOG_DEBUG("key2OffsetTC(ms):{}", key2OffsetTC.ElapsedMS());
     }
     // Static all2all，need send count
-    if (!rankInfo.useStatic) { SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId); }
+    if (!rankInfo.useStatic) {
+        SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId);
+    }
 
     auto tensors = make_unique<vector<Tensor>>();
     tensors->push_back(Vec2TensorI32(uniqueInfo.restore));
@@ -394,15 +407,17 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
 
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(move(tensors), uniqueInfo.all2AllInfo.keyRecv, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv) :
-                                                            Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
+        tensors->push_back(rankInfo.useDynamicExpansion
+                               ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv)
+                               : Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
     }
 
     TimeCost pushResultTC;
     PushResult(batch, move(tensors), uniqueInfo.all2AllInfo.keyRecv);
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}",
-            channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} "
+                           "key_process_time_cost_with_fast_unique {}",
+                 channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     return true;
@@ -430,8 +445,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     // 特征准入&淘汰
     if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys,
-                                             countRecv) == FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys, countRecv) ==
+         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
         LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
                   rankInfo.rankId, threadId, channel);
         return false;
@@ -444,7 +459,9 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     }
 
     // Static all2all，need send count
-    if (!rankInfo.useStatic) { SendA2A(scAll, batch->name, batch->channel, batch->batchId); }
+    if (!rankInfo.useStatic) {
+        SendA2A(scAll, batch->name, batch->channel, batch->batchId);
+    }
 
     TimeCost pushResultTC;
     auto tensors = make_unique<vector<Tensor>>();
@@ -456,21 +473,24 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(tensors, lookupKeys, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys) : Vec2TensorI32(lookupKeys));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys)
+                                                        : Vec2TensorI32(lookupKeys));
     }
 
     PushResult(batch, move(tensors), lookupKeys);
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}",
-            channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}", channel,
+                 batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     return true;
 }
 
-void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
+void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors,
+                                         KeysT& lookupKeys, int channel)
 {
-    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
+    if (GlobalEnv::applyGradientsStrategy ==
+            ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
         channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
@@ -479,36 +499,39 @@ void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tenso
         GlobalUnique(lookupKeys, uniqueKeys, restoreVecSec);
         LOG_DEBUG("globalUniqueSyncTC(ms):{}", globalUniqueSyncTC.ElapsedMS());
         tensors->push_back(Vec2TensorI32(restoreVecSec));
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys) : Vec2TensorI32(uniqueKeys));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys)
+                                                        : Vec2TensorI32(uniqueKeys));
     }
 }
 
 vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
-                                          vector<vector<uint32_t>>& keyCount, vector<int> scAll, vector<int> ss)
+                                          vector<vector<uint32_t>>& keyCount, vector<int> scAll,
+                                          vector<int> ss)
 {
     TimeCost getCountRecvTC;
     if (rankInfo.useStatic) {
-        for (auto& cnt: keyCount) {
+        for (auto& cnt : keyCount) {
             cnt.resize(embInfos[batch->name].sendCount, 0);
         }
     }
     vector<uint32_t> countSend;
-    for (auto& cnt: keyCount) {
+    for (auto& cnt : keyCount) {
         countSend.insert(countSend.cend(), cnt.cbegin(), cnt.cend());
     }
     vector<int> sc;
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         sc.push_back(scAll.at(rankInfo.rankSize * rankInfo.rankId + i));
     }
-    vector<int> rc;                                // receive count
+    vector<int> rc;  // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         rc.push_back(scAll.at(i * rankInfo.rankSize + rankInfo.rankId));
     }
-    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
     vector<uint32_t> countRecv;
     countRecv.resize(rs.back() + rc.back());
-    int retCode = MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(),
-                                rc.data(), rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
+    int retCode =
+        MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(),
+                      rc.data(), rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -521,16 +544,19 @@ void KeyProcess::PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tens
 {
     std::unique_lock<std::mutex> lockGuard(mut);
     storage.push_front(move(tensors));
-    infoList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, storage.begin()));
+    infoList[batch->name][batch->channel].push(
+        make_tuple(batch->batchId, batch->name, storage.begin()));
     if (rankInfo.isDDR) {
-        lookupKeysList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, move(lookupKeys)));
+        lookupKeysList[batch->name][batch->channel].push(
+            make_tuple(batch->batchId, batch->name, move(lookupKeys)));
     }
     lockGuard.unlock();
 }
 
 /*
- * 从共享队列SingletonQueue<EmbBatchT>中读取batch数据并返回。batch数据由 ReadEmbKeyV2 写入。
- * commID为线程标识[0, KEY_PROCESS_THREAD-1]，不同线程、训练或推理数据用不同的共享队列通信
+ * 从共享队列SingletonQueue<EmbBatchT>中读取batch数据并返回。batch数据由
+ * ReadEmbKeyV2 写入。 commID为线程标识[0,
+ * KEY_PROCESS_THREAD-1]，不同线程、训练或推理数据用不同的共享队列通信
  */
 unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
 {
@@ -551,32 +577,37 @@ unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
         this_thread::sleep_for(100us);
         if (tc.ElapsedSec() > GET_BATCH_TIMEOUT) {
             if (commId == 0) {
-                LOG_WARN(KEY_PROCESS "getting batch timeout! 1. check last 'read batch cost' print. "
-                    "channel[{}] commId[{}]", channel, commId);
+                LOG_WARN(KEY_PROCESS
+                         "getting batch timeout! 1. check last 'read batch cost' print. "
+                         "channel[{}] commId[{}]",
+                         channel, commId);
             }
             this_thread::sleep_for(seconds(1));
             tc = TimeCost();
         }
 
         if (!isRunning) {
-            LOG_WARN("channelId:{} threadId:{}, isRunning is false when GetBatchData", channel, commId);
+            LOG_WARN("channelId:{} threadId:{}, isRunning is false when GetBatchData", channel,
+                     commId);
             throw EndRunExit("GetBatchData end run.");
         }
     }
     EASY_END_BLOCK
-    LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, get batch data done, batchName:{}. bs:{} sample:[{}]",
+    LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, get batch data "
+                          "done, batchName:{}. bs:{} sample:[{}]",
               batch->channel, commId, batch->batchId, batch->name, batch->Size(), batch->UnParse());
 #if defined(PROFILING) && defined(BUILD_WITH_EASY_PROFILER)
     if (batch->batchId == PROFILING_START_BATCH_ID) {
         EASY_PROFILER_ENABLE
     } else if (batch->batchId == PROFILING_END_BATCH_ID) {
-        ::profiler::dumpBlocksToFile(StringFormat("/home/MX_REC-profile-%d.prof", rankInfo.rankId).c_str());
+        ::profiler::dumpBlocksToFile(
+            StringFormat("/home/MX_REC-profile-%d.prof", rankInfo.rankId).c_str());
     }
 #endif
     return batch;
 }
 
-size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT> &batch)
+size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT>& batch)
 {
     size_t size = rankInfo.rankSize * embInfos[batch->name].sendCount;
     if (!rankInfo.useStatic) {
@@ -585,8 +616,9 @@ size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT> &batch)
     return size;
 }
 
-void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
-                                            int id, UniqueInfo& uniqueInfoOut)
+void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
+                                            ock::ctr::UniquePtr& unique, int id,
+                                            UniqueInfo& uniqueInfoOut)
 {
     EASY_FUNCTION(profiler::colors::Purple)
     EASY_VALUE("batchId", batch->batchId)
@@ -605,10 +637,10 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch,
 
     ock::ctr::UniqueIn uniqueIn;
     uniqueIn.inputIdCnt = static_cast<uint32_t>(batch->Size());
-    uniqueIn.inputId = reinterpret_cast<void *>(batch->sample.data());
+    uniqueIn.inputId = reinterpret_cast<void*>(batch->sample.data());
 
     ock::ctr::EnhancedUniqueOut uniqueOut;
-    uniqueOut.uniqueId = reinterpret_cast<void *>(keySendInfo.keySend.data());
+    uniqueOut.uniqueId = reinterpret_cast<void*>(keySendInfo.keySend.data());
     uniqueOut.index = reinterpret_cast<uint32_t*>(uniqueInfoOut.restore.data());
     if (rankInfo.useStatic) {
         uniqueOut.idCnt = idCount.data();
@@ -617,7 +649,7 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch,
         uniqueOut.idCnt = keySendInfo.keyCount.data();
     }
     uniqueOut.uniqueIdCntInBucket = splitSize.data();
-    uniqueOut.uniqueIdInBucket = reinterpret_cast<void *>(uniqueVector.data());
+    uniqueOut.uniqueIdInBucket = reinterpret_cast<void*>(uniqueVector.data());
     uniqueOut.uniqueIdCnt = 0;
 
     int ret = unique->DoEnhancedUnique(uniqueIn, uniqueOut);
@@ -633,19 +665,21 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch,
     All2All(sc, id, batch, keySendInfo, uniqueInfoOut.all2AllInfo);
 
     LOG_DEBUG(KEY_PROCESS "ProcessBatchWithFastUnique get batchId:{}, batchSize:{},"
-        " channel:{}, name:{}, restore:{}, keyCount:{}",
-        batch->batchId, batch->Size(), batch->channel, batch->name,
-        uniqueInfoOut.restore.size(), keySendInfo.keyCount.size());
+                          " channel:{}, name:{}, restore:{}, keyCount:{}",
+              batch->batchId, batch->Size(), batch->channel, batch->name,
+              uniqueInfoOut.restore.size(), keySendInfo.keyCount.size());
 
     if (GlogConfig::gStatOn) {
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} "
-            "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
-            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueOut.uniqueIdCnt);
+                           "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(),
+                 uniqueOut.uniqueIdCnt);
     }
 }
 
-void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
-                                       KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize)
+void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch,
+                                       UniqueInfo& uniqueInfoOut, KeySendInfo& keySendInfo,
+                                       vector<int>& sc, vector<int>& splitSize)
 {
     std::shared_lock<std::shared_mutex> lock(g_smut);
     absl::flat_hash_map<emb_key_t, int> hotMap = hotKey[batch->name];
@@ -659,8 +693,8 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, Uniqu
         TimeCost computeHotTc;
         ComputeHotPos(batch, hotMap, uniqueInfoOut.hotPos, uniqueInfoOut.restore, hotOffset);
         LOG_DEBUG("ComputeHot TimeCost(ms):{}", computeHotTc.ElapsedMS());
-        UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount,
-                              hotOffset, batch->batchId % hotEmbUpdateStep == 0, batch->name);
+        UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount, hotOffset,
+                              batch->batchId % hotEmbUpdateStep == 0, batch->name);
     }
 
     if (rankInfo.useStatic) {
@@ -673,8 +707,9 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, Uniqu
     }
 }
 
-void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_hash_map<emb_key_t, int> &hotMap,
-                               vector<int> &hotPos, vector<int32_t> &restore, const int hotOffset) const
+void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
+                               absl::flat_hash_map<emb_key_t, int>& hotMap, vector<int>& hotPos,
+                               vector<int32_t>& restore, const int hotOffset) const
 {
     emb_key_t* inputData = batch->sample.data();
     size_t miniBs = batch->Size();
@@ -697,48 +732,52 @@ void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_ha
     }
 }
 
-void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &batch, KeySendInfo& keySendInfo,
-                         All2AllInfo& all2AllInfoOut)
+void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch,
+                         KeySendInfo& keySendInfo, All2AllInfo& all2AllInfoOut)
 {
     TimeCost getScAllTC;
     int channel = batch->channel;
-    GetScAllForUnique(sc, id, batch, all2AllInfoOut.scAll); // Allgather通信获取所有（不同rank相同thread id的）
+    GetScAllForUnique(sc, id, batch,
+                      all2AllInfoOut.scAll);  // Allgather通信获取所有（不同rank相同thread id的）
     LOG_DEBUG("GetScAll TimeCost(ms):{}", getScAllTC.ElapsedMS());
 
     TimeCost all2allTC;
-    vector<int> ss = Count2Start(sc); // send displays/offset 发送数据的起始偏移量
-    vector<int> rc(rankInfo.rankSize);            // receive count
+    vector<int> ss = Count2Start(sc);   // send displays/offset 发送数据的起始偏移量
+    vector<int> rc(rankInfo.rankSize);  // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         // 通信量矩阵某一列的和即为本地要从其他设备接受的key数据量
         rc[i] = all2AllInfoOut.scAll.at(i * rankInfo.rankSize + rankInfo.rankId);
     }
-    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
     all2AllInfoOut.keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
     int retCode = MPI_Alltoallv(keySendInfo.keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
-                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(),
-                                MPI_INT64_T, comm[channel][id]);
+                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T,
+                                comm[channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All MPI_Alltoallv end.", channel, id, batch->batchId);
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All MPI_Alltoallv end.", channel, id,
+              batch->batchId);
     all2AllInfoOut.countRecv.resize(rs.back() + rc.back());
     if (isWithFAAE) {
         retCode = MPI_Alltoallv(keySendInfo.keyCount.data(), sc.data(), ss.data(), MPI_UINT32_T,
-                                all2AllInfoOut.countRecv.data(), rc.data(),
-                                rs.data(), MPI_UINT32_T, comm[channel][id]);
+                                all2AllInfoOut.countRecv.data(), rc.data(), rs.data(), MPI_UINT32_T,
+                                comm[channel][id]);
         if (retCode != MPI_SUCCESS) {
-            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}",
-                      channel, id, batch->batchId, retCode);
+            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}", channel, id,
+                      batch->batchId, retCode);
         }
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC TimeCost(ms):{}",
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC "
+              "TimeCost(ms):{}",
               channel, id, batch->batchId, all2allTC.ElapsedMS());
     EASY_END_BLOCK
 }
 
 auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
-                                  vector<KeysT>& splitKeys) -> tuple<KeysT, vector<int>, vector<int>>
+                                  vector<KeysT>& splitKeys)
+    -> tuple<KeysT, vector<int>, vector<int>>
 {
     TimeCost processSplitKeysTC;
     EASY_FUNCTION(profiler::colors::Purple)
@@ -746,44 +785,47 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
     LOG_INFO(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, ProcessSplitKeys start.",
              batch->channel, id, batch->batchId);
 
-    // 使用静态all2all通信：发送或接受量为预置固定值 scInfo[batch->name] = 65536 / rankSize 经验值
-    if (rankInfo.useStatic) { // maybe move after all2all
-        for (KeysT& i: splitKeys) {
+    // 使用静态all2all通信：发送或接受量为预置固定值 scInfo[batch->name] = 65536 /
+    // rankSize 经验值
+    if (rankInfo.useStatic) {  // maybe move after all2all
+        for (KeysT& i : splitKeys) {
             if (static_cast<int>(i.size()) > embInfos[batch->name].sendCount) {
-                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}",
-                    batch->name, batch->channel, batch->batchId, i.size());
+                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}", batch->name,
+                          batch->channel, batch->batchId, i.size());
                 throw runtime_error(
                     StringFormat("%s[%d]:%d overflow! set send count bigger than %d",
-                        batch->name.c_str(), batch->channel, batch->batchId, i.size()).c_str());
+                                 batch->name.c_str(), batch->channel, batch->batchId, i.size())
+                        .c_str());
             }
             i.resize(embInfos[batch->name].sendCount, -1);
         }
     }
     KeysT keySend;
-    vector<int> sc; // send count
-    for (const auto& i: splitKeys) {
+    vector<int> sc;  // send count
+    for (const auto& i : splitKeys) {
         sc.push_back(static_cast<int>(i.size()));
         keySend.insert(keySend.cend(), i.cbegin(), i.cend());
     }
     KeysT keyRecv;
 
     TimeCost getScAllTC;
-    vector<int> scAll = GetScAll(sc, id, batch);    // Allgather通信获取所有（不同rank相同thread id的）线程间通信量矩阵
+    vector<int> scAll = GetScAll(sc, id, batch);  // Allgather通信获取所有（不同rank相同thread
+                                                  // id的）线程间通信量矩阵
     LOG_DEBUG("getScAllTC(ms)(AllReduce-AllGather):{}", getScAllTC.ElapsedMS());
 
     vector<int> ss = Count2Start(sc);  // send displays/offset 发送数据的起始偏移量
-    vector<int> rc; // receive count
+    vector<int> rc;                    // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         // 通信量矩阵某一列的和即为本地要从其他设备接受的key数据量
         rc.push_back(scAll.at(i * rankInfo.rankSize + rankInfo.rankId));
     }
-    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
     keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
 
     TimeCost uniqueAll2AllTC;
-    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
-                                keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T, comm[batch->channel][id]);
+    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T, keyRecv.data(),
+                                rc.data(), rs.data(), MPI_INT64_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -792,8 +834,8 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
     EASY_END_BLOCK
     LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, batchName:{}, MPI_Alltoallv finish."
                           " processSplitKeysTC(ms):{}",
-                          batch->channel, id, batch->batchId, batch->name, processSplitKeysTC.ElapsedMS());
-    return { keyRecv, scAll, ss };
+              batch->channel, id, batch->batchId, batch->name, processSplitKeysTC.ElapsedMS());
+    return {keyRecv, scAll, ss};
 }
 
 /*
@@ -801,15 +843,16 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
  * splitKeys返回：将数据的key切分到其所在dev id对应的桶中，并去重。
  * restore返回：去重后key在桶内偏移量（用于计算恢复向量）
  */
-tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<EmbBatchT>& batch) const
+tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(
+    const unique_ptr<EmbBatchT>& batch) const
 {
     EASY_FUNCTION(profiler::colors::Gold)
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize); // 初始化全0，记录每个桶的长度
-    absl::flat_hash_map<emb_key_t, int> uKey;     // 用于去重查询
+    vector<int> hashSplitLens(rankInfo.rankSize);  // 初始化全0，记录每个桶的长度
+    absl::flat_hash_map<emb_key_t, int> uKey;      // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
         const emb_key_t& key = batchData[i];
@@ -817,9 +860,10 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<Emb
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] = hashSplitLens[devId]++; // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] =
+                hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key] = restore[i];
-        } else { // 去重
+        } else {  // 去重
             restore[i] = result->second;
         }
     }
@@ -832,10 +876,11 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<Emb
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} unique_key_num {}",
-            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} "
+                           "unique_key_num {}",
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
-    return { splitKeys, restore };
+    return {splitKeys, restore};
 }
 
 void KeyProcess::PaddingAlltoallVC(vector<KeysT>& splitKeys) const
@@ -857,10 +902,10 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
-    vector<vector<uint32_t>> keyCount(rankInfo.rankSize); // splitKeys在原始batch中对应的频次
+    vector<vector<uint32_t>> keyCount(rankInfo.rankSize);  // splitKeys在原始batch中对应的频次
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize);                  // 初始化全0，记录每个桶的长度
-    absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey; // 用于去重查询
+    vector<int> hashSplitLens(rankInfo.rankSize);  // 初始化全0，记录每个桶的长度
+    absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey;  // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
         const emb_key_t& key = batchData[i];
@@ -868,10 +913,11 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] = hashSplitLens[devId]++; // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] =
+                hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key].first = restore[i];
             uKey[key].second = 1;
-        } else { // 去重
+        } else {  // 去重
             restore[i] = result->second.first;
             uKey[key].second++;
         }
@@ -897,20 +943,22 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} faae_unique_key_num {}",
-            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} "
+                           "faae_unique_key_num {}",
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
-    return { splitKeys, restore, keyCount };
+    return {splitKeys, restore, keyCount};
 }
 
-tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(const unique_ptr<EmbBatchT>& batch)
+tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(
+    const unique_ptr<EmbBatchT>& batch)
 {
     EASY_FUNCTION(profiler::colors::Gold)
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<int32_t> restore(batch->Size());
-    absl::flat_hash_map<emb_key_t, int> uKey;   // 用于去重查询
+    absl::flat_hash_map<emb_key_t, int> uKey;  // 用于去重查询
     absl::flat_hash_map<emb_key_t, int> keyCountMapByEmbName;
     std::shared_lock<std::shared_mutex> lock(g_smut);
     auto hotMap = hotKey[batch->name];
@@ -919,31 +967,31 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(cons
     vector<int> hotPosDev(hotEmbTotCount[batch->name]);
     int hotCount = 0;
     int hotOffset = hotEmbTotCount[batch->name];
-    for (size_t i = 0; i < miniBs; i++) { // for mini batch
+    for (size_t i = 0; i < miniBs; i++) {  // for mini batch
         const emb_key_t& key = batchData[i];
         if (batch->batchId % hotEmbUpdateStep == 0) {
             keyCountMapByEmbName[key]++;
         }
         emb_key_t devId = abs(key % static_cast<emb_key_t>(rankInfo.rankSize));
         auto result = uKey.find(key);
-        if (result != uKey.end()) { // // already in splitKeys
+        if (result != uKey.end()) {  // // already in splitKeys
             restore[i] = result->second;
             continue;
         }
         // new key in current batch
-        splitKeys[devId].push_back(key); // push to bucket
+        splitKeys[devId].push_back(key);  // push to bucket
         auto hot = hotMap.find(key);
-        if (hot != hotMap.end()) { // is hot key
-            if (hot->second == -1) { // is new hot key in this batch
+        if (hot != hotMap.end()) {    // is hot key
+            if (hot->second == -1) {  // is new hot key in this batch
                 // pos in lookup vec (need add ss) for hot-gather
                 hotPos[hotCount] = static_cast<int>(splitKeys[devId].size()) - 1;
-                hotPosDev[hotCount] = devId; // which dev, for get ss
+                hotPosDev[hotCount] = devId;  // which dev, for get ss
                 hot->second = hotCount;
-                restore[i] = hotCount++; // get pos of hot emb
+                restore[i] = hotCount++;  // get pos of hot emb
             } else {
                 restore[i] = hot->second;
             }
-        } else { // is not hot key
+        } else {  // is not hot key
             // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             restore[i] = static_cast<int32_t>(splitKeys[devId].size() + (hotOffset - 1));
         }
@@ -955,22 +1003,25 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(cons
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} hot_unique_key_num {}",
-            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} "
+                           "hot_unique_key_num {}",
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
 
-    UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name], batch->batchId % hotEmbUpdateStep == 0,
-                 batch->name);
+    UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name],
+                 batch->batchId % hotEmbUpdateStep == 0, batch->name);
     AddCountStartToHotPos(splitKeys, hotPos, hotPosDev, batch);
-    return { splitKeys, restore, hotPos };
+    return {splitKeys, restore, hotPos};
 }
 
-void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
+void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos,
+                                       const vector<int>& hotPosDev,
                                        const unique_ptr<EmbBatchT>& batch)
 {
     vector<int> splitKeysSize;
-    for (auto& splitKey: splitKeys) {
-        int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount : static_cast<int>(splitKey.size());
+    for (auto& splitKey : splitKeys) {
+        int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount
+                                     : static_cast<int>(splitKey.size());
         splitKeysSize.push_back(tmp);
     }
 
@@ -980,13 +1031,13 @@ void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& ho
     }
 }
 
-void KeyProcess::UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_t> &keyCount,
+void KeyProcess::UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount,
                                        uint32_t count, bool refresh, const string& embName)
 {
     auto& hotMap = hotKey[embName];
     if (refresh) {
         priority_queue<pair<int, emb_key_t>> pq;
-        for (size_t i = 0;i < keySend.size(); ++i) {
+        for (size_t i = 0; i < keySend.size(); ++i) {
             if (keySend[i] == -1) {
                 continue;
             }
@@ -1005,15 +1056,15 @@ void KeyProcess::UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_
     }
 }
 
-void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
-                              const string& embName)
+void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName,
+                              uint32_t count, bool refresh, const string& embName)
 {
     if (!refresh) {
         return;
     }
     auto& hotMap = hotKey[embName];
-    priority_queue<pair<int, emb_key_t>> pq; // top k key
-    for (auto& p: keyCountMapByEmbName) {
+    priority_queue<pair<int, emb_key_t>> pq;  // top k key
+    for (auto& p : keyCountMapByEmbName) {
         pq.push(pair<int, emb_key_t>(-p.second, p.first));
         if (pq.size() > count) {
             pq.pop();
@@ -1029,43 +1080,46 @@ void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapBy
 }
 
 /*
- * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread id的）线程间的通信量矩阵
+ * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread
+ * id的）线程间的通信量矩阵
  * scAll返回：所有线程间的通信量矩阵（按行平铺的一维向量）
  */
-vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch)
+vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId,
+                                 const unique_ptr<EmbBatchT>& batch)
 {
     EASY_FUNCTION()
     vector<int> scAll;
     scAll.resize(rankInfo.rankSize * rankInfo.rankSize);
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId, batch->batchId);
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId,
+              batch->batchId);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT,
-                                 scAll.data(), rankInfo.rankSize, MPI_INT,
-                                 comm[batch->channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAll.data(),
+                                 rankInfo.rankSize, MPI_INT, comm[batch->channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {} commId {}, MPI_Allgather failed:{}", rankInfo.rankId, commId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, key scAll matrix:\n{}",
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, "
+              "key scAll matrix:\n{}",
               batch->channel, commId, batch->batchId, VectorToString(scAll));
     return scAll;
 }
 
-void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT> &batch,
-                                   vector<int> &scAllOut)
+void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId,
+                                   const unique_ptr<EmbBatchT>& batch, vector<int>& scAllOut)
 {
     EASY_FUNCTION()
     int channel = batch->channel;
     scAllOut.resize(rankInfo.rankSize * rankInfo.rankSize);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT,
-                                 scAllOut.data(), rankInfo.rankSize, MPI_INT,
-                                 comm[channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAllOut.data(),
+                                 rankInfo.rankSize, MPI_INT, comm[channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Allgather failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key scAllOut matrix:\n{}",
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key "
+              "scAllOut matrix:\n{}",
               channel, commId, batch->batchId, VectorToString(scAllOut));
 }
 
@@ -1073,9 +1127,9 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
 {
     TimeCost key2OffsetTC;
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
     auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp  = maxOffset[embName];
+    auto& maxOffsetTmp = maxOffset[embName];
     auto& evictPos = evictPosMap[embName];
     for (long& key : splitKey) {
         if (key == -1) {
@@ -1088,8 +1142,9 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
             size_t offset;
             // 新值, emb有pos可复用
             offset = evictPos.back();
-            LOG_TRACE("HBM mode, evictPos is not null, name[{}] key [{}] reuse offset [{}], evictSize [{}]!!!",
-                embName, key, offset, evictPos.size());
+            LOG_TRACE("HBM mode, evictPos is not null, name[{}] key [{}] reuse "
+                      "offset [{}], evictSize [{}]!!!",
+                      embName, key, offset, evictPos.size());
             key2Offset[key] = offset;
             key = offset;
             evictPos.pop_back();
@@ -1107,18 +1162,18 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
         LOG_ERROR("dev cache overflow {} > {}", maxOffsetTmp, embInfos[embName].devVocabSize);
         throw std::runtime_error("dev cache overflow!");
     }
-    LOG_DEBUG("current hbm emb:{}, usage:{}/{} key2OffsetTC({} ms)",
-        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
+    LOG_DEBUG("current hbm emb:{}, usage:{}/{} key2OffsetTC({} ms)", embName, maxOffsetTmp,
+              embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
 void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel)
 {
     TimeCost key2OffsetTC;
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
     auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp  = maxOffset[embName];
-    auto& curEmbTable = embeddingTableMap[embName]; // empty when not use dynamic expansion
+    auto& maxOffsetTmp = maxOffset[embName];
+    auto& curEmbTable = embeddingTableMap[embName];  // empty when not use dynamic expansion
     for (long& key : splitKey) {
         if (key == -1) {
             key = 0;
@@ -1141,8 +1196,8 @@ void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& spli
             key = 0;
         }
     }
-    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)",
-        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
+    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)", embName, maxOffsetTmp,
+              embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
 /*
@@ -1150,7 +1205,8 @@ void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& spli
  * 输入接收到emb块的偏移blockOffset，batch内每个key在块内的偏移restoreVec
  * 输出恢复向量restoreVec，即batch到keySend（平铺的splitKeys）的映射
  * 实现方案2：用map记录keySend中key和表内index/offset的映射，在恢复emb时直接根据batch的key查询该map即可找到receive
- * emb中的 位置，时间复杂度：O(map构建keySend.size + map查询)，空间复杂度：O(map)
+ * emb中的 位置，时间复杂度：O(map构建keySend.size +
+ * map查询)，空间复杂度：O(map)
  */
 void KeyProcess::BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
                                  vector<int>& restoreVec, int hotPosSize) const
@@ -1167,11 +1223,11 @@ void KeyProcess::BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vecto
             hotNum += 1;
         }
     }
-    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}",
-        hotNum, batch->Size(), buildRestoreVecTC.ElapsedMS());
+    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}", hotNum, batch->Size(),
+              buildRestoreVecTC.ElapsedMS());
 }
 
-template<class T>
+template <class T>
 T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel)
 {
     std::lock_guard<std::mutex> lockGuard(mut);
@@ -1181,7 +1237,8 @@ T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, in
     }
     auto topBatch = get<int>(list[embName][channel].top());
     if (topBatch < batch) {
-        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch, batch, channel);
+        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch,
+                  batch, channel);
         this_thread::sleep_for(1s);
     }
     if (topBatch != batch) {
@@ -1201,7 +1258,8 @@ T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, in
 KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
 {
     TimeCost tc = TimeCost();
-    // 循环尝试获取list中的数据；如果key process线程退出或者处理数据超时，返回空vector
+    // 循环尝试获取list中的数据；如果key
+    // process线程退出或者处理数据超时，返回空vector
     while (true) {
         if (!isRunning) {
             return {};
@@ -1209,8 +1267,9 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
-            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                    embName, channel, batch);
+            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, "
+                                  "exiting the loop! {}[{}]:{}",
+                      embName, channel, batch);
             return {};
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1231,8 +1290,9 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
                 SendEos(batch, channel);
                 return {};
             }
-            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
-                embName, channel, batch, readEmbKeyBatchId);
+            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: "
+                      "{}, readEmbKey batchId: {}.",
+                      embName, channel, batch, readEmbKeyBatchId);
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
             LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
@@ -1256,22 +1316,28 @@ void KeyProcess::SendEos(int batchId, int channel)
     vector<Tensor> tensors;
     bool isNeedResend = true;
 
-    for (const auto& emb: as_const(embInfos)) { // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos start.", channel, batchId, emb.first);
+    for (const auto& emb :
+         as_const(embInfos)) {  // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
+        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos "
+                 "start.",
+                 channel, batchId, emb.first);
         if (!isRunning) {
             throw EndRunExit("SendEos end run, isRunning is false after lock destroyMutex.");
         }
         for (const string& transName : usedChannelNames) {
-            string sendName = StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
+            string sendName =
+                StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
             size_t channelSize = 0;
-            
+
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] Before send eos, {} contains {}.", sendName, channelSize);
-            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors, isNeedResend);
+            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors,
+                             isNeedResend);
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] After send eos, {} contains {}.", sendName, channelSize);
         }
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel, batchId, emb.first);
+        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel,
+                 batchId, emb.first);
     }
 
     LOG_INFO("channelId:{} batchId:{}, SendEos end.", channel, batchId);
@@ -1285,7 +1351,8 @@ void KeyProcess::SendEos(int batchId, int channel)
 /// \param channel 通道索引（训练/推理）
 /// \param type 数据类型
 /// \return
-unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type)
+unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel,
+                                                  ProcessedInfo type)
 {
     TimeCost tc = TimeCost();
     info_list_t<TensorInfoT>* list;
@@ -1302,7 +1369,8 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
             throw std::invalid_argument("Invalid ProcessedInfo Type.");
     }
 
-    // 循环尝试获取list中的数据；如果key process线程退出或者处理数据超时，返回空指针
+    // 循环尝试获取list中的数据；如果key
+    // process线程退出或者处理数据超时，返回空指针
     while (true) {
         if (!isRunning) {
             return nullptr;
@@ -1310,8 +1378,9 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
-            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                embName, channel, batch);
+            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, "
+                                  "exiting the loop! {}[{}]:{}",
+                      embName, channel, batch);
             return nullptr;
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1328,15 +1397,18 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
             return uTensor;
         } catch (EmptyList&) {
             unique_lock<mutex> lockEosGuard(eosMutex);
-            // 避免eos在keyProcess还未处理完数据时插队到通道前面, readEmbKey真实的次数是readEmbedBatchId减1
-            if (isNeedSendEos[channel] && (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
+            // 避免eos在keyProcess还未处理完数据时插队到通道前面,
+            // readEmbKey真实的次数是readEmbedBatchId减1
+            if (isNeedSendEos[channel] &&
+                (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
                 LOG_INFO("channelId:{} batchId:{}, GetInfoVec eos.", channel, batch);
                 unique_lock<mutex> lockDestroyGuard(destroyMutex);
                 SendEos(batch, channel);
                 return nullptr;
             }
-            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
-                embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
+            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: "
+                      "{}, readEmbKey batchId: {}.",
+                      embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
             LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
@@ -1349,7 +1421,7 @@ void KeyProcess::SendA2A(const vector<int>& a2aInfo, const string& embName, int
 {
     // 数据放到队列里，在mgmt里面发送（检查发送数据量）
     auto tensors = make_unique<vector<Tensor>>();
-    Tensor tmpTensor(tensorflow::DT_INT64, { rankInfo.rankSize, rankInfo.rankSize });
+    Tensor tmpTensor(tensorflow::DT_INT64, {rankInfo.rankSize, rankInfo.rankSize});
     auto tmpData = tmpTensor.matrix<int64>();
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         for (int j = 0; j < rankInfo.rankSize; ++j) {
@@ -1369,13 +1441,14 @@ int KeyProcess::GetMaxStep(int channelId) const
     return rankInfo.ctrlSteps.at(channelId);
 }
 
-void KeyProcess::EvictKeys(const string& embName, const vector<emb_key_t>& keys) // hbm
+void KeyProcess::EvictKeys(const string& embName,
+                           const vector<emb_key_t>& keys)  // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm funEvictCall: [{}]! keySize:{}", embName, keys.size());
     EmbeddingMgmt::Instance()->EvictKeys(embName, keys);
 }
 
-void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys) // hbm
+void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys)  // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm combine funEvictCall, keySize:{}", keys.size());
     EmbeddingMgmt::Instance()->EvictKeysCombine(keys);
@@ -1384,7 +1457,7 @@ void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys) // hbm
 void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys)
 {
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
 
     size_t keySize = keys.size();
     auto& devHashMap = keyOffsetMap.at(embName);
@@ -1398,7 +1471,7 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
             continue;
         }
         const auto& iter = devHashMap.find(key);
-        if (iter == devHashMap.end()) { // not found
+        if (iter == devHashMap.end()) {  // not found
             continue;
         }
         offset = iter->second;
@@ -1406,24 +1479,26 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
         evictPos.emplace_back(offset);
         LOG_TRACE("evict embName:{}, offset:{}", embName, offset);
     }
-    LOG_INFO(KEY_PROCESS "hbm EvictDeleteDeviceEmb: [{}]! evict size on dev:{}", embName, evictPos.size());
+    LOG_INFO(KEY_PROCESS "hbm EvictDeleteDeviceEmb: [{}]! evict size on dev:{}", embName,
+             evictPos.size());
 }
 
 void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset)
 {
     if (offset.size() > embInfos[embName].devVocabSize) {
-        LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
-            embName, offset.size(), embInfos[embName].devVocabSize);
-        throw runtime_error(
-            Logger::Format("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
-                embName, offset.size(), embInfos[embName].devVocabSize
-            ).c_str());
+        LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than "
+                  "dev vocabSize {}",
+                  embName, offset.size(), embInfos[embName].devVocabSize);
+        throw runtime_error(Logger::Format("{} overflow! init evict dev, evictOffset size {} "
+                                           "bigger than dev vocabSize {}",
+                                           embName, offset.size(), embInfos[embName].devVocabSize)
+                                .c_str());
     }
 
     vector<Tensor> tmpDataOut;
     Tensor tmpData = Vec2TensorI32(offset);
     tmpDataOut.emplace_back(tmpData);
-    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
+    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, {1}));
 
     auto evictLen = tmpDataOut.back().flat<int32>();
     int evictSize = static_cast<int>(offset.size());
@@ -1433,15 +1508,16 @@ void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset
     auto trans = Singleton<HDTransfer>::GetInstance();
     trans->Send(TransferChannel::EVICT, tmpDataOut, TRAIN_CHANNEL_ID, embName);
 
-    LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName, offset.size());
+    LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName,
+             offset.size());
 }
 
-string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>> &splitKeys) const
+string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const
 {
     stringstream ssTrace;
     for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
         ssTrace << '|' << devId << ":";
-        for (auto key: splitKeys[devId]) {
+        for (auto key : splitKeys[devId]) {
             ssTrace << key << ',';
         }
         ssTrace << '|';
@@ -1480,7 +1556,8 @@ void KeyProcess::RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch)
 void KeyProcess::SetEos(int status, int channelId)
 {
     unique_lock<mutex> lockGuard(eosMutex);
-    LOG_INFO("isNeedSendEos status is changed, before status:[{}], input status:{}, channel:[{}], ",
+    LOG_INFO("isNeedSendEos status is changed, before status:[{}], input "
+             "status:{}, channel:[{}], ",
              isNeedSendEos[channelId], status, channelId);
     isNeedSendEos[channelId] = (status == 1);
 }
diff --git a/src/core/key_process/key_process.h b/src/core/key_process/key_process.h
index 8bd7b8d0..d6a0b80b 100644
--- a/src/core/key_process/key_process.h
+++ b/src/core/key_process/key_process.h
@@ -16,283 +16,291 @@ See the License for the specific language governing permissions and
 #ifndef MX_REC_KEY_PROCESS_H
 #define MX_REC_KEY_PROCESS_H
 
-#include <vector>
+#include <absl/container/flat_hash_map.h>
+#include <mpi.h>
+
 #include <map>
 #include <memory>
+#include <shared_mutex>
 #include <string>
 #include <thread>
-#include <shared_mutex>
-
-#include <mpi.h>
-#include <absl/container/flat_hash_map.h>
-#include "ock_ctr_common/include/factory.h"
+#include <vector>
 
-#include "utils/common.h"
 #include "emb_table/emb_table.h"
 #include "feature_admit_and_evict.h"
 #include "hybrid_mgmt/hybrid_mgmt_block.h"
+#include "ock_ctr_common/include/factory.h"
+#include "utils/common.h"
 #include "utils/singleton.h"
 
 namespace MxRec {
-    using namespace std;
+using namespace std;
 
-    template<class T>
-    struct Cmp {
-        bool operator()(const T& a, const T& b) const
-        {
-            return get<int>(a) > get<int>(b); // batch id order
-        }
-    };
+template <class T>
+struct Cmp {
+    bool operator()(const T& a, const T& b) const
+    {
+        return get<int>(a) > get<int>(b);  // batch id order
+    }
+};
 
-    template<class T>
-    using heap_t = priority_queue<T, deque<T>, Cmp<T>>;
+template <class T>
+using heap_t = priority_queue<T, deque<T>, Cmp<T>>;
 
-    template<class T>
-    using info_list_t = map<EmbNameT, array<heap_t<T>, MAX_QUEUE_NUM>>;
+template <class T>
+using info_list_t = map<EmbNameT, array<heap_t<T>, MAX_QUEUE_NUM>>;
 
-    enum class ProcessedInfo {
-        RESTORE,
-        ALL2ALL,
-        INVALID
-    };
+enum class ProcessedInfo {
+    RESTORE,
+    ALL2ALL,
+    INVALID
+};
 
-    class EndRunExit : public std::exception {
-    public:
-        explicit EndRunExit(const char* message) : errorMessage(message) {}
+class EndRunExit : public std::exception {
+public:
+    explicit EndRunExit(const char* message) : errorMessage(message) {}
 
-        const char* what() const noexcept override
-        {
-            return errorMessage;
-        }
+    const char* what() const noexcept override
+    {
+        return errorMessage;
+    }
 
-    private:
-        const char* errorMessage;
-    };
+private:
+    const char* errorMessage;
+};
 
-    constexpr int MPI_ABNORMAL_SEND_VALUE = 0; // MPI异常通信时发送0
-    constexpr int MPI_NORMAL_SEND_VALUE = 1; // MPI正常通信时发送1
+constexpr int MPI_ABNORMAL_SEND_VALUE = 0;  // MPI异常通信时发送0
+constexpr int MPI_NORMAL_SEND_VALUE = 1;    // MPI正常通信时发送1
 
-    class EmptyList : public std::exception {
-    };
+class EmptyList : public std::exception {};
 
-    class WrongListTop : public std::exception {
-    };
+class WrongListTop : public std::exception {};
 
-    class KeyProcess {
-    public:
-        bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-                       const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
+class KeyProcess {
+public:
+    bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
+                    const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
 
-        unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type);
+    unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel,
+                                          ProcessedInfo type);
 
-        KeysT GetLookupKeys(int batch, const string& embName, int channel);
+    KeysT GetLookupKeys(int batch, const string& embName, int channel);
 
-        int GetMaxStep(int channelId) const;
+    int GetMaxStep(int channelId) const;
 
-        OffsetMemT GetMaxOffset();
+    OffsetMemT GetMaxOffset();
 
-        KeyOffsetMemT GetKeyOffsetMap();
+    KeyOffsetMemT GetKeyOffsetMap();
 
-        KeyCountMemT GetKeyCountMap();
+    KeyCountMemT GetKeyCountMap();
 
-        FeatureAdmitAndEvict& GetFeatAdmitAndEvict();
+    FeatureAdmitAndEvict& GetFeatAdmitAndEvict();
 
-        void LoadMaxOffset(OffsetMemT& loadData);
+    void LoadMaxOffset(OffsetMemT& loadData);
 
-        void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
+    void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
 
-        void LoadKeyCountMap(KeyCountMemT& loadData);
+    void LoadKeyCountMap(KeyCountMemT& loadData);
 
-        void Destroy();
+    void Destroy();
 
-        void LoadSaveLock();
+    void LoadSaveLock();
 
-        void LoadSaveUnlock();
+    void LoadSaveUnlock();
 
-        void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
+    void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
 
-        void EvictKeysCombine(const vector<emb_key_t>& keys);
+    void EvictKeysCombine(const vector<emb_key_t>& keys);
 
-        void SetupHotEmbUpdateStep();
+    void SetupHotEmbUpdateStep();
 
-        int64_t GetExpansionTableSize(const string& embName);
+    int64_t GetExpansionTableSize(const string& embName);
 
-        int64_t GetExpansionTableCapacity(const string& embName);
+    int64_t GetExpansionTableCapacity(const string& embName);
 
-        void RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch);
+    void RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch);
 
-        template <typename T>
-        void GlobalUnique(T& lookupKeys, T& uniqueKeys, vector<int32_t>& restoreVecSec)
-        {
-            absl::flat_hash_map<emb_key_t, int32_t> umap;
-            restoreVecSec.resize(lookupKeys.size(), -1);
-            int32_t length = 0;
+    template <typename T>
+    void GlobalUnique(T& lookupKeys, T& uniqueKeys, vector<int32_t>& restoreVecSec)
+    {
+        absl::flat_hash_map<emb_key_t, int32_t> umap;
+        restoreVecSec.resize(lookupKeys.size(), -1);
+        int32_t length = 0;
 
-            for (size_t i = 0; i < lookupKeys.size(); ++i) {
-                int64_t key = lookupKeys[i];
-                if (rankInfo.useStatic && (
-                        (!rankInfo.useDynamicExpansion && key == -1) || (rankInfo.useDynamicExpansion && key == 0))) {
-                    continue;
-                }
+        for (size_t i = 0; i < lookupKeys.size(); ++i) {
+            int64_t key = lookupKeys[i];
+            if (rankInfo.useStatic && ((!rankInfo.useDynamicExpansion && key == -1) ||
+                                       (rankInfo.useDynamicExpansion && key == 0))) {
+                continue;
+            }
 
-                auto result = umap.find(key);
-                if (result == umap.end()) {
-                    uniqueKeys.push_back(lookupKeys[i]);
-                    umap[key] = length;
-                    restoreVecSec[i] = length;
-                    length++;
-                } else {
-                    restoreVecSec[i] = result->second;
-                }
+            auto result = umap.find(key);
+            if (result == umap.end()) {
+                uniqueKeys.push_back(lookupKeys[i]);
+                umap[key] = length;
+                restoreVecSec[i] = length;
+                length++;
+            } else {
+                restoreVecSec[i] = result->second;
             }
+        }
 
-            if (rankInfo.useStatic) {
-                if (rankInfo.useDynamicExpansion) {
-                    uniqueKeys.resize(lookupKeys.size(), 0);
-                } else {
-                    uniqueKeys.resize(lookupKeys.size(), -1);
-                }
+        if (rankInfo.useStatic) {
+            if (rankInfo.useDynamicExpansion) {
+                uniqueKeys.resize(lookupKeys.size(), 0);
+            } else {
+                uniqueKeys.resize(lookupKeys.size(), -1);
             }
         }
+    }
+
+    void SetEos(int status, int channelId);
 
-        void SetEos(int status, int channelId);
+    void SendEos(int batchId, int channel);
 
-        void SendEos(int batchId, int channel);
+    bool isRunning{false};
 
-        bool isRunning { false };
+    std::mutex destroyMutex;
+    std::mutex eosMutex;
+    inline bool HasEmbName(const string& embName)
+    {
+        return embInfos.find(embName) != embInfos.end();
+    };
+    GTEST_PRIVATE :
 
-        std::mutex destroyMutex;
-        std::mutex eosMutex;
-        inline bool HasEmbName(const string& embName)
-        {
-            return embInfos.find(embName) != embInfos.end();
-        };
-    GTEST_PRIVATE:
+        int
+        Start();
 
-        int Start();
+    template <class T>
+    T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
 
-        template<class T>
-        T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
+    RankInfo rankInfo;
+    map<EmbNameT, EmbInfo> embInfos;
+    MPI_Comm comm[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD];
+    std::mutex mut{};
+    vector<std::unique_ptr<std::thread>> procThreads{};
+    std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD]{};
+    info_list_t<LookupKeyT> lookupKeysList;
+    list<unique_ptr<vector<Tensor>>> storage;
+    info_list_t<TensorInfoT> infoList;
+    info_list_t<TensorInfoT> all2AllList;
+    map<EmbNameT, size_t> maxOffset{};
+    map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>> keyOffsetMap{};
+    map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>> keyCountMap{};
+    FeatureAdmitAndEvict m_featureAdmitAndEvict{};
+    map<EmbNameT, std::vector<size_t>> evictPosMap{};
+    map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey{};
+    map<EmbNameT, int> hotEmbTotCount;
+    map<EmbNameT, EmbTable> embeddingTableMap{};
+    ock::ctr::FactoryPtr factory{};
+    int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
+    bool isWithFAAE;
+    bool isNeedSendEos[2] = {0, 0};  // 分别代表通道0、1的eos状态
 
-        RankInfo rankInfo;
-        map<EmbNameT, EmbInfo> embInfos;
-        MPI_Comm comm[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD];
-        std::mutex mut {};
-        vector<std::unique_ptr<std::thread>> procThreads {};
-        std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD] {};
-        info_list_t<LookupKeyT> lookupKeysList;
-        list<unique_ptr<vector<Tensor>>> storage;
-        info_list_t<TensorInfoT> infoList;
-        info_list_t<TensorInfoT> all2AllList;
-        map<EmbNameT, size_t> maxOffset {};
-        map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>> keyOffsetMap {};
-        map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>> keyCountMap {};
-        FeatureAdmitAndEvict m_featureAdmitAndEvict {};
-        map<EmbNameT, std::vector<size_t>> evictPosMap {};
-        map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey {};
-        map<EmbNameT, int> hotEmbTotCount;
-        map<EmbNameT, EmbTable> embeddingTableMap {};
-        ock::ctr::FactoryPtr factory {};
-        int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
-        bool isWithFAAE;
-        bool isNeedSendEos[2] = { 0, 0 }; // 分别代表通道0、1的eos状态
+    void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
 
-        void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
+    void KeyProcessTask(int channel, int threadId);
 
-        void KeyProcessTask(int channel, int threadId);
+    void KeyProcessTaskWithFastUnique(int channel, int threadId);
 
-        void KeyProcessTaskWithFastUnique(int channel, int threadId);
+    bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
 
-        bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
+    bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch,
+                                            ock::ctr::UniquePtr& unique, int channel, int threadId);
 
-        bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
-                                            int channel, int threadId);
+    tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch,
+                                                            int id, vector<KeysT>& splitKeys);
 
-        tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch,
-                int id, vector<KeysT>& splitKeys);
+    void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
 
-        void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
+    void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize,
+                          bool& uniqueInitialize, const unique_ptr<EmbBatchT>& batch,
+                          ock::ctr::UniquePtr& unique);
 
-        void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
-                                  const unique_ptr <EmbBatchT>& batch, ock::ctr::UniquePtr& unique);
+    void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique,
+                                    int id, UniqueInfo& uniqueInfoOut);
 
-        void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
-                                           int id, UniqueInfo& uniqueInfoOut);
+    size_t GetKeySize(const unique_ptr<EmbBatchT>& batch);
 
-        size_t GetKeySize(const unique_ptr<EmbBatchT> &batch);
+    void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch,
+                 KeySendInfo& keySendInfo, All2AllInfo& all2AllInfoOut);
 
-        void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &batch, KeySendInfo& keySendInfo,
-                     All2AllInfo& all2AllInfoOut);
+    auto HashSplit(const unique_ptr<EmbBatchT>& batch) const
+        -> tuple<vector<KeysT>, vector<int32_t>>;
 
-        auto HashSplit(const unique_ptr<EmbBatchT>& batch) const -> tuple<vector<KeysT>, vector<int32_t>>;
+    auto HotHashSplit(const unique_ptr<EmbBatchT>& batch)
+        -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
 
-        auto HotHashSplit(const unique_ptr<EmbBatchT>& batch) -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
+    void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
 
-        void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
+    tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> HashSplitWithFAAE(
+        const unique_ptr<EmbBatchT>& batch) const;
 
-        tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>>
-        HashSplitWithFAAE(const unique_ptr<EmbBatchT>& batch) const;
+    vector<int> GetScAll(const vector<int>& keyScLocal, int commId,
+                         const unique_ptr<EmbBatchT>& batch);
 
-        vector<int> GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch);
+    void GetScAllForUnique(const vector<int>& keyScLocal, int commId,
+                           const unique_ptr<EmbBatchT>& batch, vector<int>& scAllOut);
 
-        void GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT> &batch,
-                               vector<int> &scAllOut);
+    void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-        void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
+    void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-        void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
+    unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
 
-        unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
+    void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
+                         vector<int>& restoreVec, int hotPosSize = 0) const;
 
-        void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
-                             vector<int>& restoreVec, int hotPosSize = 0) const;
-        
-        void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
+    void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
 
-        void EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys);
+    void EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys);
 
-        void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
+    void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
 
-        void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
-                          const string& embName);
+    void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count,
+                      bool refresh, const string& embName);
 
-        void UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_t> &keyCount,
-                                   uint32_t count, bool refresh, const string& embName);
+    void UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount,
+                               uint32_t count, bool refresh, const string& embName);
 
-        void HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
-                                       KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
+    void HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, UniqueInfo& uniqueInfoOut,
+                               KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
 
-        void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys);
+    void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
+                    KeysT& lookupKeys);
 
-        void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel);
+    void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys,
+                                 int channel);
 
-        void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
-                                   const unique_ptr<EmbBatchT>& batch);
+    void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos,
+                               const vector<int>& hotPosDev, const unique_ptr<EmbBatchT>& batch);
 
-        void ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_hash_map<emb_key_t, int> &hotMap,
-                           vector<int> &hotPos, vector<int32_t> &restore, const int hotOffset) const;
+    void ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
+                       absl::flat_hash_map<emb_key_t, int>& hotMap, vector<int>& hotPos,
+                       vector<int32_t>& restore, const int hotOffset) const;
 
-        vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
-                                      vector<vector<uint32_t>>& keyCount, vector<int> scAll, vector<int> ss);
+    vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
+                                  vector<vector<uint32_t>>& keyCount, vector<int> scAll,
+                                  vector<int> ss);
 
-        void HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <KeysT>& splitKeys,
-                             vector <int32_t>& restore, vector <int32_t>& hotPos,
-                             vector <vector<uint32_t>>& keyCount);
+    void HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys,
+                         vector<int32_t>& restore, vector<int32_t>& hotPos,
+                         vector<vector<uint32_t>>& keyCount);
 
-        template<class T>
-        inline vector<T> Count2Start(const vector<T>& count) const
-        {
-            vector<T> start = { 0 };
-            for (size_t i = 0; i < count.size() - 1; ++i) {
-                start.push_back(count[i] + start.back());
-            }
-            return start;
+    template <class T>
+    inline vector<T> Count2Start(const vector<T>& count) const
+    {
+        vector<T> start = {0};
+        for (size_t i = 0; i < count.size() - 1; ++i) {
+            start.push_back(count[i] + start.back());
         }
+        return start;
+    }
 
-        string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
-    };
+    string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
+};
 
 #define KEY_PROCESS_INSTANCE Singleton<KeyProcess>::GetInstance()
-} // end namespace MxRec
+}  // end namespace MxRec
 
-#endif // MX_REC_KEY_PROCESS_H
+#endif  // MX_REC_KEY_PROCESS_H
-- 
Gitee


From fa9bb8d73b5a87972bd4dfcd846941eb9e23a8a3 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Apr 2024 14:50:22 +0800
Subject: [PATCH 051/302] Revert "add .clang-format"

This reverts commit d7ed2aa49e8c464e6dc61c3e6216eb18f4e8ae42.
---
 src/core/key_process/key_process.cpp | 535 ++++++++++++---------------
 src/core/key_process/key_process.h   | 382 ++++++++++---------
 2 files changed, 416 insertions(+), 501 deletions(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 58312ca1..f76f6907 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -15,21 +15,19 @@ See the License for the specific language governing permissions and
 
 #include "key_process.h"
 
-#include <mpi.h>
-
 #include <cstddef>
 #include <iostream>
-
-#include "emb_table/embedding_mgmt.h"
-#include "hd_transfer/hd_transfer.h"
-#include "host_emb/host_emb.h"
-#include "ock_ctr_common/include/error_code.h"
+#include <mpi.h>
 #include "utils/common.h"
-#include "utils/config.h"
 #include "utils/logger.h"
 #include "utils/safe_queue.h"
 #include "utils/singleton.h"
 #include "utils/time_cost.h"
+#include "utils/config.h"
+#include "host_emb/host_emb.h"
+#include "emb_table/embedding_mgmt.h"
+#include "hd_transfer/hd_transfer.h"
+#include "ock_ctr_common/include/error_code.h"
 
 using namespace std;
 using namespace chrono;
@@ -43,7 +41,8 @@ void KeyProcess::SetupHotEmbUpdateStep()
 }
 
 bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-                            const vector<ThresholdValue>& thresholdValues, int seed)
+                            const vector<ThresholdValue>& thresholdValues,
+                            int seed)
 {
     this->rankInfo = rInfo;
     if (rankInfo.useHot) {
@@ -51,7 +50,7 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     }
 
     map<EmbNameT, int> scInfo;
-    for (const auto& info : eInfos) {
+    for (const auto& info: eInfos) {
         embInfos[info.name] = info;
         scInfo[info.name] = info.sendCount;
         if (rankInfo.useHot) {
@@ -67,8 +66,8 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     LOG_INFO(KEY_PROCESS "hot emb count info:{}", MapToString(hotEmbTotCount));
     MPI_Group worldGroup;
     MPI_Comm_group(MPI_COMM_WORLD, &worldGroup);
-    for (auto& i : comm) {
-        for (auto& j : i) {
+    for (auto& i: comm) {
+        for (auto& j: i) {
             MPI_Comm_create(MPI_COMM_WORLD, worldGroup, &j);
         }
     }
@@ -86,14 +85,12 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     if (GlobalEnv::fastUnique) {
         int result = ock::ctr::Factory::Create(factory);
         if (result != 0) {
-            throw runtime_error(
-                Logger::Format("create fast factory failed, error code:{}", result));
+            throw runtime_error(Logger::Format("create fast factory failed, error code:{}", result));
         }
     }
 
     LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}, useHot:{}",
-             MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic,
-             rInfo.useHot);
+        MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic, rInfo.useHot);
 #ifndef GTEST
     Start();
 #endif
@@ -106,9 +103,8 @@ int KeyProcess::Start()
     // bind like:
     // 0 1 2 3 4 5 0 1 2 3 4 5
     // |  rank0  | |  rank1  |
-    // each rank creates KEY_PROCESS_THREAD threads, each thread process one
-    // batchdata
-    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF));  // 查看CPU核数
+    // each rank creates KEY_PROCESS_THREAD threads, each thread process one batchdata
+    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF)); // 查看CPU核数
     auto fn = [this](int channel, int threadId) {
 #ifndef GTEST
         auto ret = aclrtSetDevice(static_cast<int32_t>(rankInfo.deviceId));
@@ -122,7 +118,7 @@ int KeyProcess::Start()
         } else {
             KeyProcessTask(channel, threadId);
         }
-    };  // for clean code
+    }; // for clean code
     int threadNum = GetThreadNumEnv();
     for (int channel = 0; channel < MAX_CHANNEL_NUM; ++channel) {
         LOG_INFO(KEY_PROCESS "key process thread num: {}", threadNum);
@@ -140,9 +136,8 @@ void KeyProcess::InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo)
     if (rankInfo.useDynamicExpansion) {
         embeddingSize = info.embeddingSize;
     }
-    hotEmbTotCount[info.name] =
-        static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
-                         HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
+    hotEmbTotCount[info.name] = static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
+                                                 HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
 }
 
 OffsetMemT KeyProcess::GetMaxOffset()
@@ -187,7 +182,7 @@ void KeyProcess::Destroy()
 {
     isRunning = false;
     LOG_INFO(KEY_PROCESS "rankId:{} KeyProcess begin destroy.", rankInfo.rankId);
-    for (auto& i : procThreads) {
+    for (auto& i: procThreads) {
         i->join();
     }
     procThreads.clear();
@@ -197,8 +192,8 @@ void KeyProcess::Destroy()
 /// 每个数据通道的所有数据处理线程上锁
 void KeyProcess::LoadSaveLock()
 {
-    for (int channelId{0}; channelId < MAX_CHANNEL_NUM; ++channelId) {
-        for (int threadId{0}; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
+    for (int channelId { 0 }; channelId < MAX_CHANNEL_NUM; ++channelId) {
+        for (int threadId { 0 }; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
             loadSaveMut[channelId][threadId].lock();
         }
     }
@@ -207,8 +202,8 @@ void KeyProcess::LoadSaveLock()
 /// 每个数据通道的所有数据处理线程释放锁
 void KeyProcess::LoadSaveUnlock()
 {
-    for (int channelId{0}; channelId < MAX_CHANNEL_NUM; ++channelId) {
-        for (int threadId{0}; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
+    for (int channelId { 0 }; channelId < MAX_CHANNEL_NUM; ++channelId) {
+        for (int threadId { 0 }; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
             loadSaveMut[channelId][threadId].unlock();
         }
     }
@@ -234,9 +229,8 @@ void KeyProcess::GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf)
     uniqueConf.maxThreadNum = GlobalEnv::maxUniqueThreadNum;
 }
 
-void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize,
-                                  bool& uniqueInitialize, const unique_ptr<EmbBatchT>& batch,
-                                  ock::ctr::UniquePtr& unique)
+void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
+                                  const unique_ptr <EmbBatchT>& batch, ock::ctr::UniquePtr& unique)
 {
     uniqueConf.desiredSize = static_cast<uint32_t>(batch->Size());
     if (preBatchSize != batch->Size()) {
@@ -278,8 +272,7 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch =
-                GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
+            batch = GetBatchData(channel, threadId); // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -292,8 +285,7 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
                 break;
             }
             LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process with fast unique cost:{},"
-                                 " get data time(ms):{}, batch name:{}, channelId:{}, "
-                                 "threadId:{}, batchId:{}",
+                                 " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
                      getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
                      batch->name, batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
@@ -301,13 +293,14 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
             batchQueue->PutDirty(move(batch));
         }
         unique->UnInitialize();
-    } catch (const EndRunExit& e) {
+    } catch (const EndRunExit &e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
     LOG_INFO(KEY_PROCESS "KeyProcessTaskWithFastUnique exit. rank:{} channelId:{}, threadId:{}",
-             rankInfo.rankId, channel, threadId);
+        rankInfo.rankId, channel, threadId);
 }
 
+
 void KeyProcess::KeyProcessTask(int channel, int threadId)
 {
     unique_ptr<EmbBatchT> batch;
@@ -315,8 +308,7 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch =
-                GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
+            batch = GetBatchData(channel, threadId); // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -328,46 +320,43 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
                 break;
             }
             LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process cost:{},"
-                                 " get data time(ms):{}, batch name:{}, "
-                                 "channelId:{}, threadId:{}, batchId:{}",
+                                 " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
                      getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
                      batch->name, batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
             auto batchQueue = SingletonQueue<EmbBatchT>::GetInstances(queueIndex);
             batchQueue->PutDirty(move(batch));
         }
-    } catch (const EndRunExit& e) {
+    } catch (const EndRunExit &e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
-    LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId,
-             channel, threadId);
+    LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId, channel, threadId);
 }
 
-void KeyProcess::HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys,
-                                 vector<int32_t>& restore, vector<int32_t>& hotPos,
-                                 vector<vector<uint32_t>>& keyCount)
+void KeyProcess::HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <KeysT>& splitKeys,
+                                 vector <int32_t>& restore, vector <int32_t>& hotPos,
+                                 vector <vector<uint32_t>>& keyCount)
 {
     TimeCost uniqueTc;
     if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
-        tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch);  // 按存储dev id切分并去重
+        tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch); // 按存储dev id切分并去重
     } else {
         if (rankInfo.useHot) {
-            tie(splitKeys, restore, hotPos) = HotHashSplit(batch);  // 按存储dev id切分并去重
+            tie(splitKeys, restore, hotPos) = HotHashSplit(batch);   // 按存储dev id切分并去重
         } else {
-            tie(splitKeys, restore) = HashSplit(batch);  // 按存储dev id切分并去重
+            tie(splitKeys, restore) = HashSplit(batch);   // 按存储dev id切分并去重
         }
     }
     LOG_DEBUG("uniqueTc(ms):{}", uniqueTc.ElapsedMS());
 }
 
-bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch,
-                                                    ock::ctr::UniquePtr& unique, int channel,
-                                                    int threadId)
+bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique,
+                                                    int channel, int threadId)
 {
     // tuple for keyRec restore hotPos scAll countRecv
     isWithFAAE = m_featureAdmitAndEvict.GetFunctionSwitch() &&
-                 FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
+                  FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
     TimeCost totalTimeCost = TimeCost();
     TimeCost fastUniqueTC;
     UniqueInfo uniqueInfo;
@@ -376,11 +365,11 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
 
     // 特征准入&淘汰
     if (isWithFAAE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, uniqueInfo.all2AllInfo.keyRecv,
-                                             uniqueInfo.all2AllInfo.countRecv) ==
-         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        (m_featureAdmitAndEvict.FeatureAdmit(
+            channel, batch, uniqueInfo.all2AllInfo.keyRecv, uniqueInfo.all2AllInfo.countRecv) ==
+            FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
         LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
-                  rankInfo.rankId, threadId, channel);
+            rankInfo.rankId, threadId, channel);
         return false;
     }
     std::lock_guard<std::mutex> lock(loadSaveMut[channel][threadId]);
@@ -394,9 +383,7 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
         LOG_DEBUG("key2OffsetTC(ms):{}", key2OffsetTC.ElapsedMS());
     }
     // Static all2all，need send count
-    if (!rankInfo.useStatic) {
-        SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId);
-    }
+    if (!rankInfo.useStatic) { SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId); }
 
     auto tensors = make_unique<vector<Tensor>>();
     tensors->push_back(Vec2TensorI32(uniqueInfo.restore));
@@ -407,17 +394,15 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
 
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(move(tensors), uniqueInfo.all2AllInfo.keyRecv, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion
-                               ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv)
-                               : Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv) :
+                                                            Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
     }
 
     TimeCost pushResultTC;
     PushResult(batch, move(tensors), uniqueInfo.all2AllInfo.keyRecv);
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} "
-                           "key_process_time_cost_with_fast_unique {}",
-                 channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}",
+            channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     return true;
@@ -445,8 +430,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     // 特征准入&淘汰
     if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys, countRecv) ==
-         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys,
+                                             countRecv) == FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
         LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
                   rankInfo.rankId, threadId, channel);
         return false;
@@ -459,9 +444,7 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     }
 
     // Static all2all，need send count
-    if (!rankInfo.useStatic) {
-        SendA2A(scAll, batch->name, batch->channel, batch->batchId);
-    }
+    if (!rankInfo.useStatic) { SendA2A(scAll, batch->name, batch->channel, batch->batchId); }
 
     TimeCost pushResultTC;
     auto tensors = make_unique<vector<Tensor>>();
@@ -473,24 +456,21 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(tensors, lookupKeys, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys)
-                                                        : Vec2TensorI32(lookupKeys));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys) : Vec2TensorI32(lookupKeys));
     }
 
     PushResult(batch, move(tensors), lookupKeys);
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}", channel,
-                 batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}",
+            channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     return true;
 }
 
-void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors,
-                                         KeysT& lookupKeys, int channel)
+void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
 {
-    if (GlobalEnv::applyGradientsStrategy ==
-            ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
+    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
         channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
@@ -499,39 +479,36 @@ void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tenso
         GlobalUnique(lookupKeys, uniqueKeys, restoreVecSec);
         LOG_DEBUG("globalUniqueSyncTC(ms):{}", globalUniqueSyncTC.ElapsedMS());
         tensors->push_back(Vec2TensorI32(restoreVecSec));
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys)
-                                                        : Vec2TensorI32(uniqueKeys));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys) : Vec2TensorI32(uniqueKeys));
     }
 }
 
 vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
-                                          vector<vector<uint32_t>>& keyCount, vector<int> scAll,
-                                          vector<int> ss)
+                                          vector<vector<uint32_t>>& keyCount, vector<int> scAll, vector<int> ss)
 {
     TimeCost getCountRecvTC;
     if (rankInfo.useStatic) {
-        for (auto& cnt : keyCount) {
+        for (auto& cnt: keyCount) {
             cnt.resize(embInfos[batch->name].sendCount, 0);
         }
     }
     vector<uint32_t> countSend;
-    for (auto& cnt : keyCount) {
+    for (auto& cnt: keyCount) {
         countSend.insert(countSend.cend(), cnt.cbegin(), cnt.cend());
     }
     vector<int> sc;
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         sc.push_back(scAll.at(rankInfo.rankSize * rankInfo.rankId + i));
     }
-    vector<int> rc;  // receive count
+    vector<int> rc;                                // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         rc.push_back(scAll.at(i * rankInfo.rankSize + rankInfo.rankId));
     }
-    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
     vector<uint32_t> countRecv;
     countRecv.resize(rs.back() + rc.back());
-    int retCode =
-        MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(),
-                      rc.data(), rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
+    int retCode = MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(),
+                                rc.data(), rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -544,19 +521,16 @@ void KeyProcess::PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tens
 {
     std::unique_lock<std::mutex> lockGuard(mut);
     storage.push_front(move(tensors));
-    infoList[batch->name][batch->channel].push(
-        make_tuple(batch->batchId, batch->name, storage.begin()));
+    infoList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, storage.begin()));
     if (rankInfo.isDDR) {
-        lookupKeysList[batch->name][batch->channel].push(
-            make_tuple(batch->batchId, batch->name, move(lookupKeys)));
+        lookupKeysList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, move(lookupKeys)));
     }
     lockGuard.unlock();
 }
 
 /*
- * 从共享队列SingletonQueue<EmbBatchT>中读取batch数据并返回。batch数据由
- * ReadEmbKeyV2 写入。 commID为线程标识[0,
- * KEY_PROCESS_THREAD-1]，不同线程、训练或推理数据用不同的共享队列通信
+ * 从共享队列SingletonQueue<EmbBatchT>中读取batch数据并返回。batch数据由 ReadEmbKeyV2 写入。
+ * commID为线程标识[0, KEY_PROCESS_THREAD-1]，不同线程、训练或推理数据用不同的共享队列通信
  */
 unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
 {
@@ -577,37 +551,32 @@ unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
         this_thread::sleep_for(100us);
         if (tc.ElapsedSec() > GET_BATCH_TIMEOUT) {
             if (commId == 0) {
-                LOG_WARN(KEY_PROCESS
-                         "getting batch timeout! 1. check last 'read batch cost' print. "
-                         "channel[{}] commId[{}]",
-                         channel, commId);
+                LOG_WARN(KEY_PROCESS "getting batch timeout! 1. check last 'read batch cost' print. "
+                    "channel[{}] commId[{}]", channel, commId);
             }
             this_thread::sleep_for(seconds(1));
             tc = TimeCost();
         }
 
         if (!isRunning) {
-            LOG_WARN("channelId:{} threadId:{}, isRunning is false when GetBatchData", channel,
-                     commId);
+            LOG_WARN("channelId:{} threadId:{}, isRunning is false when GetBatchData", channel, commId);
             throw EndRunExit("GetBatchData end run.");
         }
     }
     EASY_END_BLOCK
-    LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, get batch data "
-                          "done, batchName:{}. bs:{} sample:[{}]",
+    LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, get batch data done, batchName:{}. bs:{} sample:[{}]",
               batch->channel, commId, batch->batchId, batch->name, batch->Size(), batch->UnParse());
 #if defined(PROFILING) && defined(BUILD_WITH_EASY_PROFILER)
     if (batch->batchId == PROFILING_START_BATCH_ID) {
         EASY_PROFILER_ENABLE
     } else if (batch->batchId == PROFILING_END_BATCH_ID) {
-        ::profiler::dumpBlocksToFile(
-            StringFormat("/home/MX_REC-profile-%d.prof", rankInfo.rankId).c_str());
+        ::profiler::dumpBlocksToFile(StringFormat("/home/MX_REC-profile-%d.prof", rankInfo.rankId).c_str());
     }
 #endif
     return batch;
 }
 
-size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT>& batch)
+size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT> &batch)
 {
     size_t size = rankInfo.rankSize * embInfos[batch->name].sendCount;
     if (!rankInfo.useStatic) {
@@ -616,9 +585,8 @@ size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT>& batch)
     return size;
 }
 
-void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
-                                            ock::ctr::UniquePtr& unique, int id,
-                                            UniqueInfo& uniqueInfoOut)
+void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
+                                            int id, UniqueInfo& uniqueInfoOut)
 {
     EASY_FUNCTION(profiler::colors::Purple)
     EASY_VALUE("batchId", batch->batchId)
@@ -637,10 +605,10 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
 
     ock::ctr::UniqueIn uniqueIn;
     uniqueIn.inputIdCnt = static_cast<uint32_t>(batch->Size());
-    uniqueIn.inputId = reinterpret_cast<void*>(batch->sample.data());
+    uniqueIn.inputId = reinterpret_cast<void *>(batch->sample.data());
 
     ock::ctr::EnhancedUniqueOut uniqueOut;
-    uniqueOut.uniqueId = reinterpret_cast<void*>(keySendInfo.keySend.data());
+    uniqueOut.uniqueId = reinterpret_cast<void *>(keySendInfo.keySend.data());
     uniqueOut.index = reinterpret_cast<uint32_t*>(uniqueInfoOut.restore.data());
     if (rankInfo.useStatic) {
         uniqueOut.idCnt = idCount.data();
@@ -649,7 +617,7 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
         uniqueOut.idCnt = keySendInfo.keyCount.data();
     }
     uniqueOut.uniqueIdCntInBucket = splitSize.data();
-    uniqueOut.uniqueIdInBucket = reinterpret_cast<void*>(uniqueVector.data());
+    uniqueOut.uniqueIdInBucket = reinterpret_cast<void *>(uniqueVector.data());
     uniqueOut.uniqueIdCnt = 0;
 
     int ret = unique->DoEnhancedUnique(uniqueIn, uniqueOut);
@@ -665,21 +633,19 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
     All2All(sc, id, batch, keySendInfo, uniqueInfoOut.all2AllInfo);
 
     LOG_DEBUG(KEY_PROCESS "ProcessBatchWithFastUnique get batchId:{}, batchSize:{},"
-                          " channel:{}, name:{}, restore:{}, keyCount:{}",
-              batch->batchId, batch->Size(), batch->channel, batch->name,
-              uniqueInfoOut.restore.size(), keySendInfo.keyCount.size());
+        " channel:{}, name:{}, restore:{}, keyCount:{}",
+        batch->batchId, batch->Size(), batch->channel, batch->name,
+        uniqueInfoOut.restore.size(), keySendInfo.keyCount.size());
 
     if (GlogConfig::gStatOn) {
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} "
-                           "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(),
-                 uniqueOut.uniqueIdCnt);
+            "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
+            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueOut.uniqueIdCnt);
     }
 }
 
-void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch,
-                                       UniqueInfo& uniqueInfoOut, KeySendInfo& keySendInfo,
-                                       vector<int>& sc, vector<int>& splitSize)
+void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
+                                       KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize)
 {
     std::shared_lock<std::shared_mutex> lock(g_smut);
     absl::flat_hash_map<emb_key_t, int> hotMap = hotKey[batch->name];
@@ -693,8 +659,8 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch,
         TimeCost computeHotTc;
         ComputeHotPos(batch, hotMap, uniqueInfoOut.hotPos, uniqueInfoOut.restore, hotOffset);
         LOG_DEBUG("ComputeHot TimeCost(ms):{}", computeHotTc.ElapsedMS());
-        UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount, hotOffset,
-                              batch->batchId % hotEmbUpdateStep == 0, batch->name);
+        UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount,
+                              hotOffset, batch->batchId % hotEmbUpdateStep == 0, batch->name);
     }
 
     if (rankInfo.useStatic) {
@@ -707,9 +673,8 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch,
     }
 }
 
-void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
-                               absl::flat_hash_map<emb_key_t, int>& hotMap, vector<int>& hotPos,
-                               vector<int32_t>& restore, const int hotOffset) const
+void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_hash_map<emb_key_t, int> &hotMap,
+                               vector<int> &hotPos, vector<int32_t> &restore, const int hotOffset) const
 {
     emb_key_t* inputData = batch->sample.data();
     size_t miniBs = batch->Size();
@@ -732,52 +697,48 @@ void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
     }
 }
 
-void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch,
-                         KeySendInfo& keySendInfo, All2AllInfo& all2AllInfoOut)
+void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &batch, KeySendInfo& keySendInfo,
+                         All2AllInfo& all2AllInfoOut)
 {
     TimeCost getScAllTC;
     int channel = batch->channel;
-    GetScAllForUnique(sc, id, batch,
-                      all2AllInfoOut.scAll);  // Allgather通信获取所有（不同rank相同thread id的）
+    GetScAllForUnique(sc, id, batch, all2AllInfoOut.scAll); // Allgather通信获取所有（不同rank相同thread id的）
     LOG_DEBUG("GetScAll TimeCost(ms):{}", getScAllTC.ElapsedMS());
 
     TimeCost all2allTC;
-    vector<int> ss = Count2Start(sc);   // send displays/offset 发送数据的起始偏移量
-    vector<int> rc(rankInfo.rankSize);  // receive count
+    vector<int> ss = Count2Start(sc); // send displays/offset 发送数据的起始偏移量
+    vector<int> rc(rankInfo.rankSize);            // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         // 通信量矩阵某一列的和即为本地要从其他设备接受的key数据量
         rc[i] = all2AllInfoOut.scAll.at(i * rankInfo.rankSize + rankInfo.rankId);
     }
-    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
     all2AllInfoOut.keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
     int retCode = MPI_Alltoallv(keySendInfo.keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
-                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T,
-                                comm[channel][id]);
+                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(),
+                                MPI_INT64_T, comm[channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All MPI_Alltoallv end.", channel, id,
-              batch->batchId);
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All MPI_Alltoallv end.", channel, id, batch->batchId);
     all2AllInfoOut.countRecv.resize(rs.back() + rc.back());
     if (isWithFAAE) {
         retCode = MPI_Alltoallv(keySendInfo.keyCount.data(), sc.data(), ss.data(), MPI_UINT32_T,
-                                all2AllInfoOut.countRecv.data(), rc.data(), rs.data(), MPI_UINT32_T,
-                                comm[channel][id]);
+                                all2AllInfoOut.countRecv.data(), rc.data(),
+                                rs.data(), MPI_UINT32_T, comm[channel][id]);
         if (retCode != MPI_SUCCESS) {
-            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}", channel, id,
-                      batch->batchId, retCode);
+            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}",
+                      channel, id, batch->batchId, retCode);
         }
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC "
-              "TimeCost(ms):{}",
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC TimeCost(ms):{}",
               channel, id, batch->batchId, all2allTC.ElapsedMS());
     EASY_END_BLOCK
 }
 
 auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
-                                  vector<KeysT>& splitKeys)
-    -> tuple<KeysT, vector<int>, vector<int>>
+                                  vector<KeysT>& splitKeys) -> tuple<KeysT, vector<int>, vector<int>>
 {
     TimeCost processSplitKeysTC;
     EASY_FUNCTION(profiler::colors::Purple)
@@ -785,47 +746,44 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
     LOG_INFO(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, ProcessSplitKeys start.",
              batch->channel, id, batch->batchId);
 
-    // 使用静态all2all通信：发送或接受量为预置固定值 scInfo[batch->name] = 65536 /
-    // rankSize 经验值
-    if (rankInfo.useStatic) {  // maybe move after all2all
-        for (KeysT& i : splitKeys) {
+    // 使用静态all2all通信：发送或接受量为预置固定值 scInfo[batch->name] = 65536 / rankSize 经验值
+    if (rankInfo.useStatic) { // maybe move after all2all
+        for (KeysT& i: splitKeys) {
             if (static_cast<int>(i.size()) > embInfos[batch->name].sendCount) {
-                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}", batch->name,
-                          batch->channel, batch->batchId, i.size());
+                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}",
+                    batch->name, batch->channel, batch->batchId, i.size());
                 throw runtime_error(
                     StringFormat("%s[%d]:%d overflow! set send count bigger than %d",
-                                 batch->name.c_str(), batch->channel, batch->batchId, i.size())
-                        .c_str());
+                        batch->name.c_str(), batch->channel, batch->batchId, i.size()).c_str());
             }
             i.resize(embInfos[batch->name].sendCount, -1);
         }
     }
     KeysT keySend;
-    vector<int> sc;  // send count
-    for (const auto& i : splitKeys) {
+    vector<int> sc; // send count
+    for (const auto& i: splitKeys) {
         sc.push_back(static_cast<int>(i.size()));
         keySend.insert(keySend.cend(), i.cbegin(), i.cend());
     }
     KeysT keyRecv;
 
     TimeCost getScAllTC;
-    vector<int> scAll = GetScAll(sc, id, batch);  // Allgather通信获取所有（不同rank相同thread
-                                                  // id的）线程间通信量矩阵
+    vector<int> scAll = GetScAll(sc, id, batch);    // Allgather通信获取所有（不同rank相同thread id的）线程间通信量矩阵
     LOG_DEBUG("getScAllTC(ms)(AllReduce-AllGather):{}", getScAllTC.ElapsedMS());
 
     vector<int> ss = Count2Start(sc);  // send displays/offset 发送数据的起始偏移量
-    vector<int> rc;                    // receive count
+    vector<int> rc; // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         // 通信量矩阵某一列的和即为本地要从其他设备接受的key数据量
         rc.push_back(scAll.at(i * rankInfo.rankSize + rankInfo.rankId));
     }
-    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
     keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
 
     TimeCost uniqueAll2AllTC;
-    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T, keyRecv.data(),
-                                rc.data(), rs.data(), MPI_INT64_T, comm[batch->channel][id]);
+    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
+                                keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -834,8 +792,8 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
     EASY_END_BLOCK
     LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, batchName:{}, MPI_Alltoallv finish."
                           " processSplitKeysTC(ms):{}",
-              batch->channel, id, batch->batchId, batch->name, processSplitKeysTC.ElapsedMS());
-    return {keyRecv, scAll, ss};
+                          batch->channel, id, batch->batchId, batch->name, processSplitKeysTC.ElapsedMS());
+    return { keyRecv, scAll, ss };
 }
 
 /*
@@ -843,16 +801,15 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
  * splitKeys返回：将数据的key切分到其所在dev id对应的桶中，并去重。
  * restore返回：去重后key在桶内偏移量（用于计算恢复向量）
  */
-tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(
-    const unique_ptr<EmbBatchT>& batch) const
+tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<EmbBatchT>& batch) const
 {
     EASY_FUNCTION(profiler::colors::Gold)
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize);  // 初始化全0，记录每个桶的长度
-    absl::flat_hash_map<emb_key_t, int> uKey;      // 用于去重查询
+    vector<int> hashSplitLens(rankInfo.rankSize); // 初始化全0，记录每个桶的长度
+    absl::flat_hash_map<emb_key_t, int> uKey;     // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
         const emb_key_t& key = batchData[i];
@@ -860,10 +817,9 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] =
-                hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] = hashSplitLens[devId]++; // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key] = restore[i];
-        } else {  // 去重
+        } else { // 去重
             restore[i] = result->second;
         }
     }
@@ -876,11 +832,10 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} "
-                           "unique_key_num {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} unique_key_num {}",
+            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
-    return {splitKeys, restore};
+    return { splitKeys, restore };
 }
 
 void KeyProcess::PaddingAlltoallVC(vector<KeysT>& splitKeys) const
@@ -902,10 +857,10 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
-    vector<vector<uint32_t>> keyCount(rankInfo.rankSize);  // splitKeys在原始batch中对应的频次
+    vector<vector<uint32_t>> keyCount(rankInfo.rankSize); // splitKeys在原始batch中对应的频次
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize);  // 初始化全0，记录每个桶的长度
-    absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey;  // 用于去重查询
+    vector<int> hashSplitLens(rankInfo.rankSize);                  // 初始化全0，记录每个桶的长度
+    absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey; // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
         const emb_key_t& key = batchData[i];
@@ -913,11 +868,10 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] =
-                hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] = hashSplitLens[devId]++; // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key].first = restore[i];
             uKey[key].second = 1;
-        } else {  // 去重
+        } else { // 去重
             restore[i] = result->second.first;
             uKey[key].second++;
         }
@@ -943,22 +897,20 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} "
-                           "faae_unique_key_num {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} faae_unique_key_num {}",
+            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
-    return {splitKeys, restore, keyCount};
+    return { splitKeys, restore, keyCount };
 }
 
-tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(
-    const unique_ptr<EmbBatchT>& batch)
+tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(const unique_ptr<EmbBatchT>& batch)
 {
     EASY_FUNCTION(profiler::colors::Gold)
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<int32_t> restore(batch->Size());
-    absl::flat_hash_map<emb_key_t, int> uKey;  // 用于去重查询
+    absl::flat_hash_map<emb_key_t, int> uKey;   // 用于去重查询
     absl::flat_hash_map<emb_key_t, int> keyCountMapByEmbName;
     std::shared_lock<std::shared_mutex> lock(g_smut);
     auto hotMap = hotKey[batch->name];
@@ -967,31 +919,31 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(
     vector<int> hotPosDev(hotEmbTotCount[batch->name]);
     int hotCount = 0;
     int hotOffset = hotEmbTotCount[batch->name];
-    for (size_t i = 0; i < miniBs; i++) {  // for mini batch
+    for (size_t i = 0; i < miniBs; i++) { // for mini batch
         const emb_key_t& key = batchData[i];
         if (batch->batchId % hotEmbUpdateStep == 0) {
             keyCountMapByEmbName[key]++;
         }
         emb_key_t devId = abs(key % static_cast<emb_key_t>(rankInfo.rankSize));
         auto result = uKey.find(key);
-        if (result != uKey.end()) {  // // already in splitKeys
+        if (result != uKey.end()) { // // already in splitKeys
             restore[i] = result->second;
             continue;
         }
         // new key in current batch
-        splitKeys[devId].push_back(key);  // push to bucket
+        splitKeys[devId].push_back(key); // push to bucket
         auto hot = hotMap.find(key);
-        if (hot != hotMap.end()) {    // is hot key
-            if (hot->second == -1) {  // is new hot key in this batch
+        if (hot != hotMap.end()) { // is hot key
+            if (hot->second == -1) { // is new hot key in this batch
                 // pos in lookup vec (need add ss) for hot-gather
                 hotPos[hotCount] = static_cast<int>(splitKeys[devId].size()) - 1;
-                hotPosDev[hotCount] = devId;  // which dev, for get ss
+                hotPosDev[hotCount] = devId; // which dev, for get ss
                 hot->second = hotCount;
-                restore[i] = hotCount++;  // get pos of hot emb
+                restore[i] = hotCount++; // get pos of hot emb
             } else {
                 restore[i] = hot->second;
             }
-        } else {  // is not hot key
+        } else { // is not hot key
             // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             restore[i] = static_cast<int32_t>(splitKeys[devId].size() + (hotOffset - 1));
         }
@@ -1003,25 +955,22 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} "
-                           "hot_unique_key_num {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} hot_unique_key_num {}",
+            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
 
-    UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name],
-                 batch->batchId % hotEmbUpdateStep == 0, batch->name);
+    UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name], batch->batchId % hotEmbUpdateStep == 0,
+                 batch->name);
     AddCountStartToHotPos(splitKeys, hotPos, hotPosDev, batch);
-    return {splitKeys, restore, hotPos};
+    return { splitKeys, restore, hotPos };
 }
 
-void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos,
-                                       const vector<int>& hotPosDev,
+void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
                                        const unique_ptr<EmbBatchT>& batch)
 {
     vector<int> splitKeysSize;
-    for (auto& splitKey : splitKeys) {
-        int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount
-                                     : static_cast<int>(splitKey.size());
+    for (auto& splitKey: splitKeys) {
+        int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount : static_cast<int>(splitKey.size());
         splitKeysSize.push_back(tmp);
     }
 
@@ -1031,13 +980,13 @@ void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& ho
     }
 }
 
-void KeyProcess::UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount,
+void KeyProcess::UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_t> &keyCount,
                                        uint32_t count, bool refresh, const string& embName)
 {
     auto& hotMap = hotKey[embName];
     if (refresh) {
         priority_queue<pair<int, emb_key_t>> pq;
-        for (size_t i = 0; i < keySend.size(); ++i) {
+        for (size_t i = 0;i < keySend.size(); ++i) {
             if (keySend[i] == -1) {
                 continue;
             }
@@ -1056,15 +1005,15 @@ void KeyProcess::UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_
     }
 }
 
-void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName,
-                              uint32_t count, bool refresh, const string& embName)
+void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
+                              const string& embName)
 {
     if (!refresh) {
         return;
     }
     auto& hotMap = hotKey[embName];
-    priority_queue<pair<int, emb_key_t>> pq;  // top k key
-    for (auto& p : keyCountMapByEmbName) {
+    priority_queue<pair<int, emb_key_t>> pq; // top k key
+    for (auto& p: keyCountMapByEmbName) {
         pq.push(pair<int, emb_key_t>(-p.second, p.first));
         if (pq.size() > count) {
             pq.pop();
@@ -1080,46 +1029,43 @@ void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapBy
 }
 
 /*
- * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread
- * id的）线程间的通信量矩阵
+ * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread id的）线程间的通信量矩阵
  * scAll返回：所有线程间的通信量矩阵（按行平铺的一维向量）
  */
-vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId,
-                                 const unique_ptr<EmbBatchT>& batch)
+vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch)
 {
     EASY_FUNCTION()
     vector<int> scAll;
     scAll.resize(rankInfo.rankSize * rankInfo.rankSize);
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId,
-              batch->batchId);
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId, batch->batchId);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAll.data(),
-                                 rankInfo.rankSize, MPI_INT, comm[batch->channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT,
+                                 scAll.data(), rankInfo.rankSize, MPI_INT,
+                                 comm[batch->channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {} commId {}, MPI_Allgather failed:{}", rankInfo.rankId, commId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, "
-              "key scAll matrix:\n{}",
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, key scAll matrix:\n{}",
               batch->channel, commId, batch->batchId, VectorToString(scAll));
     return scAll;
 }
 
-void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId,
-                                   const unique_ptr<EmbBatchT>& batch, vector<int>& scAllOut)
+void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT> &batch,
+                                   vector<int> &scAllOut)
 {
     EASY_FUNCTION()
     int channel = batch->channel;
     scAllOut.resize(rankInfo.rankSize * rankInfo.rankSize);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAllOut.data(),
-                                 rankInfo.rankSize, MPI_INT, comm[channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT,
+                                 scAllOut.data(), rankInfo.rankSize, MPI_INT,
+                                 comm[channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Allgather failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key "
-              "scAllOut matrix:\n{}",
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key scAllOut matrix:\n{}",
               channel, commId, batch->batchId, VectorToString(scAllOut));
 }
 
@@ -1127,9 +1073,9 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
 {
     TimeCost key2OffsetTC;
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
     auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp = maxOffset[embName];
+    auto& maxOffsetTmp  = maxOffset[embName];
     auto& evictPos = evictPosMap[embName];
     for (long& key : splitKey) {
         if (key == -1) {
@@ -1142,9 +1088,8 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
             size_t offset;
             // 新值, emb有pos可复用
             offset = evictPos.back();
-            LOG_TRACE("HBM mode, evictPos is not null, name[{}] key [{}] reuse "
-                      "offset [{}], evictSize [{}]!!!",
-                      embName, key, offset, evictPos.size());
+            LOG_TRACE("HBM mode, evictPos is not null, name[{}] key [{}] reuse offset [{}], evictSize [{}]!!!",
+                embName, key, offset, evictPos.size());
             key2Offset[key] = offset;
             key = offset;
             evictPos.pop_back();
@@ -1162,18 +1107,18 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
         LOG_ERROR("dev cache overflow {} > {}", maxOffsetTmp, embInfos[embName].devVocabSize);
         throw std::runtime_error("dev cache overflow!");
     }
-    LOG_DEBUG("current hbm emb:{}, usage:{}/{} key2OffsetTC({} ms)", embName, maxOffsetTmp,
-              embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
+    LOG_DEBUG("current hbm emb:{}, usage:{}/{} key2OffsetTC({} ms)",
+        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
 void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel)
 {
     TimeCost key2OffsetTC;
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
     auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp = maxOffset[embName];
-    auto& curEmbTable = embeddingTableMap[embName];  // empty when not use dynamic expansion
+    auto& maxOffsetTmp  = maxOffset[embName];
+    auto& curEmbTable = embeddingTableMap[embName]; // empty when not use dynamic expansion
     for (long& key : splitKey) {
         if (key == -1) {
             key = 0;
@@ -1196,8 +1141,8 @@ void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& spli
             key = 0;
         }
     }
-    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)", embName, maxOffsetTmp,
-              embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
+    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)",
+        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
 /*
@@ -1205,8 +1150,7 @@ void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& spli
  * 输入接收到emb块的偏移blockOffset，batch内每个key在块内的偏移restoreVec
  * 输出恢复向量restoreVec，即batch到keySend（平铺的splitKeys）的映射
  * 实现方案2：用map记录keySend中key和表内index/offset的映射，在恢复emb时直接根据batch的key查询该map即可找到receive
- * emb中的 位置，时间复杂度：O(map构建keySend.size +
- * map查询)，空间复杂度：O(map)
+ * emb中的 位置，时间复杂度：O(map构建keySend.size + map查询)，空间复杂度：O(map)
  */
 void KeyProcess::BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
                                  vector<int>& restoreVec, int hotPosSize) const
@@ -1223,11 +1167,11 @@ void KeyProcess::BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vecto
             hotNum += 1;
         }
     }
-    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}", hotNum, batch->Size(),
-              buildRestoreVecTC.ElapsedMS());
+    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}",
+        hotNum, batch->Size(), buildRestoreVecTC.ElapsedMS());
 }
 
-template <class T>
+template<class T>
 T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel)
 {
     std::lock_guard<std::mutex> lockGuard(mut);
@@ -1237,8 +1181,7 @@ T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, in
     }
     auto topBatch = get<int>(list[embName][channel].top());
     if (topBatch < batch) {
-        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch,
-                  batch, channel);
+        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch, batch, channel);
         this_thread::sleep_for(1s);
     }
     if (topBatch != batch) {
@@ -1258,8 +1201,7 @@ T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, in
 KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
 {
     TimeCost tc = TimeCost();
-    // 循环尝试获取list中的数据；如果key
-    // process线程退出或者处理数据超时，返回空vector
+    // 循环尝试获取list中的数据；如果key process线程退出或者处理数据超时，返回空vector
     while (true) {
         if (!isRunning) {
             return {};
@@ -1267,9 +1209,8 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
-            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, "
-                                  "exiting the loop! {}[{}]:{}",
-                      embName, channel, batch);
+            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
+                    embName, channel, batch);
             return {};
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1290,9 +1231,8 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
                 SendEos(batch, channel);
                 return {};
             }
-            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: "
-                      "{}, readEmbKey batchId: {}.",
-                      embName, channel, batch, readEmbKeyBatchId);
+            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
+                embName, channel, batch, readEmbKeyBatchId);
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
             LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
@@ -1316,28 +1256,22 @@ void KeyProcess::SendEos(int batchId, int channel)
     vector<Tensor> tensors;
     bool isNeedResend = true;
 
-    for (const auto& emb :
-         as_const(embInfos)) {  // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos "
-                 "start.",
-                 channel, batchId, emb.first);
+    for (const auto& emb: as_const(embInfos)) { // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
+        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos start.", channel, batchId, emb.first);
         if (!isRunning) {
             throw EndRunExit("SendEos end run, isRunning is false after lock destroyMutex.");
         }
         for (const string& transName : usedChannelNames) {
-            string sendName =
-                StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
+            string sendName = StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
             size_t channelSize = 0;
-
+            
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] Before send eos, {} contains {}.", sendName, channelSize);
-            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors,
-                             isNeedResend);
+            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors, isNeedResend);
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] After send eos, {} contains {}.", sendName, channelSize);
         }
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel,
-                 batchId, emb.first);
+        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel, batchId, emb.first);
     }
 
     LOG_INFO("channelId:{} batchId:{}, SendEos end.", channel, batchId);
@@ -1351,8 +1285,7 @@ void KeyProcess::SendEos(int batchId, int channel)
 /// \param channel 通道索引（训练/推理）
 /// \param type 数据类型
 /// \return
-unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel,
-                                                  ProcessedInfo type)
+unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type)
 {
     TimeCost tc = TimeCost();
     info_list_t<TensorInfoT>* list;
@@ -1369,8 +1302,7 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
             throw std::invalid_argument("Invalid ProcessedInfo Type.");
     }
 
-    // 循环尝试获取list中的数据；如果key
-    // process线程退出或者处理数据超时，返回空指针
+    // 循环尝试获取list中的数据；如果key process线程退出或者处理数据超时，返回空指针
     while (true) {
         if (!isRunning) {
             return nullptr;
@@ -1378,9 +1310,8 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
-            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, "
-                                  "exiting the loop! {}[{}]:{}",
-                      embName, channel, batch);
+            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
+                embName, channel, batch);
             return nullptr;
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1397,18 +1328,15 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
             return uTensor;
         } catch (EmptyList&) {
             unique_lock<mutex> lockEosGuard(eosMutex);
-            // 避免eos在keyProcess还未处理完数据时插队到通道前面,
-            // readEmbKey真实的次数是readEmbedBatchId减1
-            if (isNeedSendEos[channel] &&
-                (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
+            // 避免eos在keyProcess还未处理完数据时插队到通道前面, readEmbKey真实的次数是readEmbedBatchId减1
+            if (isNeedSendEos[channel] && (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
                 LOG_INFO("channelId:{} batchId:{}, GetInfoVec eos.", channel, batch);
                 unique_lock<mutex> lockDestroyGuard(destroyMutex);
                 SendEos(batch, channel);
                 return nullptr;
             }
-            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: "
-                      "{}, readEmbKey batchId: {}.",
-                      embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
+            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
+                embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
             LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
@@ -1421,7 +1349,7 @@ void KeyProcess::SendA2A(const vector<int>& a2aInfo, const string& embName, int
 {
     // 数据放到队列里，在mgmt里面发送（检查发送数据量）
     auto tensors = make_unique<vector<Tensor>>();
-    Tensor tmpTensor(tensorflow::DT_INT64, {rankInfo.rankSize, rankInfo.rankSize});
+    Tensor tmpTensor(tensorflow::DT_INT64, { rankInfo.rankSize, rankInfo.rankSize });
     auto tmpData = tmpTensor.matrix<int64>();
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         for (int j = 0; j < rankInfo.rankSize; ++j) {
@@ -1441,14 +1369,13 @@ int KeyProcess::GetMaxStep(int channelId) const
     return rankInfo.ctrlSteps.at(channelId);
 }
 
-void KeyProcess::EvictKeys(const string& embName,
-                           const vector<emb_key_t>& keys)  // hbm
+void KeyProcess::EvictKeys(const string& embName, const vector<emb_key_t>& keys) // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm funEvictCall: [{}]! keySize:{}", embName, keys.size());
     EmbeddingMgmt::Instance()->EvictKeys(embName, keys);
 }
 
-void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys)  // hbm
+void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys) // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm combine funEvictCall, keySize:{}", keys.size());
     EmbeddingMgmt::Instance()->EvictKeysCombine(keys);
@@ -1457,7 +1384,7 @@ void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys)  // hbm
 void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys)
 {
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
 
     size_t keySize = keys.size();
     auto& devHashMap = keyOffsetMap.at(embName);
@@ -1471,7 +1398,7 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
             continue;
         }
         const auto& iter = devHashMap.find(key);
-        if (iter == devHashMap.end()) {  // not found
+        if (iter == devHashMap.end()) { // not found
             continue;
         }
         offset = iter->second;
@@ -1479,26 +1406,24 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
         evictPos.emplace_back(offset);
         LOG_TRACE("evict embName:{}, offset:{}", embName, offset);
     }
-    LOG_INFO(KEY_PROCESS "hbm EvictDeleteDeviceEmb: [{}]! evict size on dev:{}", embName,
-             evictPos.size());
+    LOG_INFO(KEY_PROCESS "hbm EvictDeleteDeviceEmb: [{}]! evict size on dev:{}", embName, evictPos.size());
 }
 
 void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset)
 {
     if (offset.size() > embInfos[embName].devVocabSize) {
-        LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than "
-                  "dev vocabSize {}",
-                  embName, offset.size(), embInfos[embName].devVocabSize);
-        throw runtime_error(Logger::Format("{} overflow! init evict dev, evictOffset size {} "
-                                           "bigger than dev vocabSize {}",
-                                           embName, offset.size(), embInfos[embName].devVocabSize)
-                                .c_str());
+        LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
+            embName, offset.size(), embInfos[embName].devVocabSize);
+        throw runtime_error(
+            Logger::Format("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
+                embName, offset.size(), embInfos[embName].devVocabSize
+            ).c_str());
     }
 
     vector<Tensor> tmpDataOut;
     Tensor tmpData = Vec2TensorI32(offset);
     tmpDataOut.emplace_back(tmpData);
-    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, {1}));
+    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
 
     auto evictLen = tmpDataOut.back().flat<int32>();
     int evictSize = static_cast<int>(offset.size());
@@ -1508,16 +1433,15 @@ void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset
     auto trans = Singleton<HDTransfer>::GetInstance();
     trans->Send(TransferChannel::EVICT, tmpDataOut, TRAIN_CHANNEL_ID, embName);
 
-    LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName,
-             offset.size());
+    LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName, offset.size());
 }
 
-string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const
+string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>> &splitKeys) const
 {
     stringstream ssTrace;
     for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
         ssTrace << '|' << devId << ":";
-        for (auto key : splitKeys[devId]) {
+        for (auto key: splitKeys[devId]) {
             ssTrace << key << ',';
         }
         ssTrace << '|';
@@ -1556,8 +1480,7 @@ void KeyProcess::RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch)
 void KeyProcess::SetEos(int status, int channelId)
 {
     unique_lock<mutex> lockGuard(eosMutex);
-    LOG_INFO("isNeedSendEos status is changed, before status:[{}], input "
-             "status:{}, channel:[{}], ",
+    LOG_INFO("isNeedSendEos status is changed, before status:[{}], input status:{}, channel:[{}], ",
              isNeedSendEos[channelId], status, channelId);
     isNeedSendEos[channelId] = (status == 1);
 }
diff --git a/src/core/key_process/key_process.h b/src/core/key_process/key_process.h
index d6a0b80b..8bd7b8d0 100644
--- a/src/core/key_process/key_process.h
+++ b/src/core/key_process/key_process.h
@@ -16,291 +16,283 @@ See the License for the specific language governing permissions and
 #ifndef MX_REC_KEY_PROCESS_H
 #define MX_REC_KEY_PROCESS_H
 
-#include <absl/container/flat_hash_map.h>
-#include <mpi.h>
-
+#include <vector>
 #include <map>
 #include <memory>
-#include <shared_mutex>
 #include <string>
 #include <thread>
-#include <vector>
+#include <shared_mutex>
 
+#include <mpi.h>
+#include <absl/container/flat_hash_map.h>
+#include "ock_ctr_common/include/factory.h"
+
+#include "utils/common.h"
 #include "emb_table/emb_table.h"
 #include "feature_admit_and_evict.h"
 #include "hybrid_mgmt/hybrid_mgmt_block.h"
-#include "ock_ctr_common/include/factory.h"
-#include "utils/common.h"
 #include "utils/singleton.h"
 
 namespace MxRec {
-using namespace std;
+    using namespace std;
 
-template <class T>
-struct Cmp {
-    bool operator()(const T& a, const T& b) const
-    {
-        return get<int>(a) > get<int>(b);  // batch id order
-    }
-};
+    template<class T>
+    struct Cmp {
+        bool operator()(const T& a, const T& b) const
+        {
+            return get<int>(a) > get<int>(b); // batch id order
+        }
+    };
 
-template <class T>
-using heap_t = priority_queue<T, deque<T>, Cmp<T>>;
+    template<class T>
+    using heap_t = priority_queue<T, deque<T>, Cmp<T>>;
 
-template <class T>
-using info_list_t = map<EmbNameT, array<heap_t<T>, MAX_QUEUE_NUM>>;
+    template<class T>
+    using info_list_t = map<EmbNameT, array<heap_t<T>, MAX_QUEUE_NUM>>;
 
-enum class ProcessedInfo {
-    RESTORE,
-    ALL2ALL,
-    INVALID
-};
+    enum class ProcessedInfo {
+        RESTORE,
+        ALL2ALL,
+        INVALID
+    };
 
-class EndRunExit : public std::exception {
-public:
-    explicit EndRunExit(const char* message) : errorMessage(message) {}
+    class EndRunExit : public std::exception {
+    public:
+        explicit EndRunExit(const char* message) : errorMessage(message) {}
 
-    const char* what() const noexcept override
-    {
-        return errorMessage;
-    }
+        const char* what() const noexcept override
+        {
+            return errorMessage;
+        }
 
-private:
-    const char* errorMessage;
-};
+    private:
+        const char* errorMessage;
+    };
 
-constexpr int MPI_ABNORMAL_SEND_VALUE = 0;  // MPI异常通信时发送0
-constexpr int MPI_NORMAL_SEND_VALUE = 1;    // MPI正常通信时发送1
+    constexpr int MPI_ABNORMAL_SEND_VALUE = 0; // MPI异常通信时发送0
+    constexpr int MPI_NORMAL_SEND_VALUE = 1; // MPI正常通信时发送1
 
-class EmptyList : public std::exception {};
+    class EmptyList : public std::exception {
+    };
 
-class WrongListTop : public std::exception {};
+    class WrongListTop : public std::exception {
+    };
 
-class KeyProcess {
-public:
-    bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-                    const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
+    class KeyProcess {
+    public:
+        bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
+                       const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
 
-    unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel,
-                                          ProcessedInfo type);
+        unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type);
 
-    KeysT GetLookupKeys(int batch, const string& embName, int channel);
+        KeysT GetLookupKeys(int batch, const string& embName, int channel);
 
-    int GetMaxStep(int channelId) const;
+        int GetMaxStep(int channelId) const;
 
-    OffsetMemT GetMaxOffset();
+        OffsetMemT GetMaxOffset();
 
-    KeyOffsetMemT GetKeyOffsetMap();
+        KeyOffsetMemT GetKeyOffsetMap();
 
-    KeyCountMemT GetKeyCountMap();
+        KeyCountMemT GetKeyCountMap();
 
-    FeatureAdmitAndEvict& GetFeatAdmitAndEvict();
+        FeatureAdmitAndEvict& GetFeatAdmitAndEvict();
 
-    void LoadMaxOffset(OffsetMemT& loadData);
+        void LoadMaxOffset(OffsetMemT& loadData);
 
-    void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
+        void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
 
-    void LoadKeyCountMap(KeyCountMemT& loadData);
+        void LoadKeyCountMap(KeyCountMemT& loadData);
 
-    void Destroy();
+        void Destroy();
 
-    void LoadSaveLock();
+        void LoadSaveLock();
 
-    void LoadSaveUnlock();
+        void LoadSaveUnlock();
 
-    void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
+        void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
 
-    void EvictKeysCombine(const vector<emb_key_t>& keys);
+        void EvictKeysCombine(const vector<emb_key_t>& keys);
 
-    void SetupHotEmbUpdateStep();
+        void SetupHotEmbUpdateStep();
 
-    int64_t GetExpansionTableSize(const string& embName);
+        int64_t GetExpansionTableSize(const string& embName);
 
-    int64_t GetExpansionTableCapacity(const string& embName);
+        int64_t GetExpansionTableCapacity(const string& embName);
 
-    void RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch);
+        void RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch);
 
-    template <typename T>
-    void GlobalUnique(T& lookupKeys, T& uniqueKeys, vector<int32_t>& restoreVecSec)
-    {
-        absl::flat_hash_map<emb_key_t, int32_t> umap;
-        restoreVecSec.resize(lookupKeys.size(), -1);
-        int32_t length = 0;
+        template <typename T>
+        void GlobalUnique(T& lookupKeys, T& uniqueKeys, vector<int32_t>& restoreVecSec)
+        {
+            absl::flat_hash_map<emb_key_t, int32_t> umap;
+            restoreVecSec.resize(lookupKeys.size(), -1);
+            int32_t length = 0;
 
-        for (size_t i = 0; i < lookupKeys.size(); ++i) {
-            int64_t key = lookupKeys[i];
-            if (rankInfo.useStatic && ((!rankInfo.useDynamicExpansion && key == -1) ||
-                                       (rankInfo.useDynamicExpansion && key == 0))) {
-                continue;
-            }
+            for (size_t i = 0; i < lookupKeys.size(); ++i) {
+                int64_t key = lookupKeys[i];
+                if (rankInfo.useStatic && (
+                        (!rankInfo.useDynamicExpansion && key == -1) || (rankInfo.useDynamicExpansion && key == 0))) {
+                    continue;
+                }
 
-            auto result = umap.find(key);
-            if (result == umap.end()) {
-                uniqueKeys.push_back(lookupKeys[i]);
-                umap[key] = length;
-                restoreVecSec[i] = length;
-                length++;
-            } else {
-                restoreVecSec[i] = result->second;
+                auto result = umap.find(key);
+                if (result == umap.end()) {
+                    uniqueKeys.push_back(lookupKeys[i]);
+                    umap[key] = length;
+                    restoreVecSec[i] = length;
+                    length++;
+                } else {
+                    restoreVecSec[i] = result->second;
+                }
             }
-        }
 
-        if (rankInfo.useStatic) {
-            if (rankInfo.useDynamicExpansion) {
-                uniqueKeys.resize(lookupKeys.size(), 0);
-            } else {
-                uniqueKeys.resize(lookupKeys.size(), -1);
+            if (rankInfo.useStatic) {
+                if (rankInfo.useDynamicExpansion) {
+                    uniqueKeys.resize(lookupKeys.size(), 0);
+                } else {
+                    uniqueKeys.resize(lookupKeys.size(), -1);
+                }
             }
         }
-    }
-
-    void SetEos(int status, int channelId);
 
-    void SendEos(int batchId, int channel);
+        void SetEos(int status, int channelId);
 
-    bool isRunning{false};
+        void SendEos(int batchId, int channel);
 
-    std::mutex destroyMutex;
-    std::mutex eosMutex;
-    inline bool HasEmbName(const string& embName)
-    {
-        return embInfos.find(embName) != embInfos.end();
-    };
-    GTEST_PRIVATE :
+        bool isRunning { false };
 
-        int
-        Start();
+        std::mutex destroyMutex;
+        std::mutex eosMutex;
+        inline bool HasEmbName(const string& embName)
+        {
+            return embInfos.find(embName) != embInfos.end();
+        };
+    GTEST_PRIVATE:
 
-    template <class T>
-    T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
+        int Start();
 
-    RankInfo rankInfo;
-    map<EmbNameT, EmbInfo> embInfos;
-    MPI_Comm comm[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD];
-    std::mutex mut{};
-    vector<std::unique_ptr<std::thread>> procThreads{};
-    std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD]{};
-    info_list_t<LookupKeyT> lookupKeysList;
-    list<unique_ptr<vector<Tensor>>> storage;
-    info_list_t<TensorInfoT> infoList;
-    info_list_t<TensorInfoT> all2AllList;
-    map<EmbNameT, size_t> maxOffset{};
-    map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>> keyOffsetMap{};
-    map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>> keyCountMap{};
-    FeatureAdmitAndEvict m_featureAdmitAndEvict{};
-    map<EmbNameT, std::vector<size_t>> evictPosMap{};
-    map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey{};
-    map<EmbNameT, int> hotEmbTotCount;
-    map<EmbNameT, EmbTable> embeddingTableMap{};
-    ock::ctr::FactoryPtr factory{};
-    int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
-    bool isWithFAAE;
-    bool isNeedSendEos[2] = {0, 0};  // 分别代表通道0、1的eos状态
+        template<class T>
+        T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
 
-    void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
+        RankInfo rankInfo;
+        map<EmbNameT, EmbInfo> embInfos;
+        MPI_Comm comm[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD];
+        std::mutex mut {};
+        vector<std::unique_ptr<std::thread>> procThreads {};
+        std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD] {};
+        info_list_t<LookupKeyT> lookupKeysList;
+        list<unique_ptr<vector<Tensor>>> storage;
+        info_list_t<TensorInfoT> infoList;
+        info_list_t<TensorInfoT> all2AllList;
+        map<EmbNameT, size_t> maxOffset {};
+        map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>> keyOffsetMap {};
+        map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>> keyCountMap {};
+        FeatureAdmitAndEvict m_featureAdmitAndEvict {};
+        map<EmbNameT, std::vector<size_t>> evictPosMap {};
+        map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey {};
+        map<EmbNameT, int> hotEmbTotCount;
+        map<EmbNameT, EmbTable> embeddingTableMap {};
+        ock::ctr::FactoryPtr factory {};
+        int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
+        bool isWithFAAE;
+        bool isNeedSendEos[2] = { 0, 0 }; // 分别代表通道0、1的eos状态
 
-    void KeyProcessTask(int channel, int threadId);
+        void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
 
-    void KeyProcessTaskWithFastUnique(int channel, int threadId);
+        void KeyProcessTask(int channel, int threadId);
 
-    bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
+        void KeyProcessTaskWithFastUnique(int channel, int threadId);
 
-    bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch,
-                                            ock::ctr::UniquePtr& unique, int channel, int threadId);
+        bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
 
-    tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch,
-                                                            int id, vector<KeysT>& splitKeys);
+        bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
+                                            int channel, int threadId);
 
-    void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
+        tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch,
+                int id, vector<KeysT>& splitKeys);
 
-    void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize,
-                          bool& uniqueInitialize, const unique_ptr<EmbBatchT>& batch,
-                          ock::ctr::UniquePtr& unique);
+        void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
 
-    void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique,
-                                    int id, UniqueInfo& uniqueInfoOut);
+        void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
+                                  const unique_ptr <EmbBatchT>& batch, ock::ctr::UniquePtr& unique);
 
-    size_t GetKeySize(const unique_ptr<EmbBatchT>& batch);
+        void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
+                                           int id, UniqueInfo& uniqueInfoOut);
 
-    void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch,
-                 KeySendInfo& keySendInfo, All2AllInfo& all2AllInfoOut);
+        size_t GetKeySize(const unique_ptr<EmbBatchT> &batch);
 
-    auto HashSplit(const unique_ptr<EmbBatchT>& batch) const
-        -> tuple<vector<KeysT>, vector<int32_t>>;
+        void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &batch, KeySendInfo& keySendInfo,
+                     All2AllInfo& all2AllInfoOut);
 
-    auto HotHashSplit(const unique_ptr<EmbBatchT>& batch)
-        -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
+        auto HashSplit(const unique_ptr<EmbBatchT>& batch) const -> tuple<vector<KeysT>, vector<int32_t>>;
 
-    void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
+        auto HotHashSplit(const unique_ptr<EmbBatchT>& batch) -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
 
-    tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> HashSplitWithFAAE(
-        const unique_ptr<EmbBatchT>& batch) const;
+        void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
 
-    vector<int> GetScAll(const vector<int>& keyScLocal, int commId,
-                         const unique_ptr<EmbBatchT>& batch);
+        tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>>
+        HashSplitWithFAAE(const unique_ptr<EmbBatchT>& batch) const;
 
-    void GetScAllForUnique(const vector<int>& keyScLocal, int commId,
-                           const unique_ptr<EmbBatchT>& batch, vector<int>& scAllOut);
+        vector<int> GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch);
 
-    void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
+        void GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT> &batch,
+                               vector<int> &scAllOut);
 
-    void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
+        void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-    unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
+        void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-    void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
-                         vector<int>& restoreVec, int hotPosSize = 0) const;
+        unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
 
-    void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
+        void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
+                             vector<int>& restoreVec, int hotPosSize = 0) const;
+        
+        void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
 
-    void EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys);
+        void EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys);
 
-    void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
+        void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
 
-    void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count,
-                      bool refresh, const string& embName);
+        void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
+                          const string& embName);
 
-    void UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount,
-                               uint32_t count, bool refresh, const string& embName);
+        void UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_t> &keyCount,
+                                   uint32_t count, bool refresh, const string& embName);
 
-    void HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, UniqueInfo& uniqueInfoOut,
-                               KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
+        void HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
+                                       KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
 
-    void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
-                    KeysT& lookupKeys);
+        void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys);
 
-    void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys,
-                                 int channel);
+        void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel);
 
-    void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos,
-                               const vector<int>& hotPosDev, const unique_ptr<EmbBatchT>& batch);
+        void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
+                                   const unique_ptr<EmbBatchT>& batch);
 
-    void ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
-                       absl::flat_hash_map<emb_key_t, int>& hotMap, vector<int>& hotPos,
-                       vector<int32_t>& restore, const int hotOffset) const;
+        void ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_hash_map<emb_key_t, int> &hotMap,
+                           vector<int> &hotPos, vector<int32_t> &restore, const int hotOffset) const;
 
-    vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
-                                  vector<vector<uint32_t>>& keyCount, vector<int> scAll,
-                                  vector<int> ss);
+        vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
+                                      vector<vector<uint32_t>>& keyCount, vector<int> scAll, vector<int> ss);
 
-    void HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys,
-                         vector<int32_t>& restore, vector<int32_t>& hotPos,
-                         vector<vector<uint32_t>>& keyCount);
+        void HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <KeysT>& splitKeys,
+                             vector <int32_t>& restore, vector <int32_t>& hotPos,
+                             vector <vector<uint32_t>>& keyCount);
 
-    template <class T>
-    inline vector<T> Count2Start(const vector<T>& count) const
-    {
-        vector<T> start = {0};
-        for (size_t i = 0; i < count.size() - 1; ++i) {
-            start.push_back(count[i] + start.back());
+        template<class T>
+        inline vector<T> Count2Start(const vector<T>& count) const
+        {
+            vector<T> start = { 0 };
+            for (size_t i = 0; i < count.size() - 1; ++i) {
+                start.push_back(count[i] + start.back());
+            }
+            return start;
         }
-        return start;
-    }
 
-    string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
-};
+        string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
+    };
 
 #define KEY_PROCESS_INSTANCE Singleton<KeyProcess>::GetInstance()
-}  // end namespace MxRec
+} // end namespace MxRec
 
-#endif  // MX_REC_KEY_PROCESS_H
+#endif // MX_REC_KEY_PROCESS_H
-- 
Gitee


From f3db56ec0161daa8159ba38d0bdf7949d81ba993 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Apr 2024 14:57:24 +0800
Subject: [PATCH 052/302] =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96cpp=E6=BA=90?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/key_process/key_process.cpp | 520 +++++++++++++++------------
 src/core/key_process/key_process.h   | 382 ++++++++++----------
 2 files changed, 489 insertions(+), 413 deletions(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 98df97ed..85b17bbb 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -15,19 +15,21 @@ See the License for the specific language governing permissions and
 
 #include "key_process.h"
 
+#include <mpi.h>
+
 #include <cstddef>
 #include <iostream>
-#include <mpi.h>
+
+#include "emb_table/embedding_mgmt.h"
+#include "hd_transfer/hd_transfer.h"
+#include "host_emb/host_emb.h"
+#include "ock_ctr_common/include/error_code.h"
 #include "utils/common.h"
+#include "utils/config.h"
 #include "utils/logger.h"
 #include "utils/safe_queue.h"
 #include "utils/singleton.h"
 #include "utils/time_cost.h"
-#include "utils/config.h"
-#include "host_emb/host_emb.h"
-#include "emb_table/embedding_mgmt.h"
-#include "hd_transfer/hd_transfer.h"
-#include "ock_ctr_common/include/error_code.h"
 
 using namespace std;
 using namespace chrono;
@@ -41,15 +43,14 @@ void KeyProcess::SetupHotEmbUpdateStep()
 }
 
 bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-                            const vector<ThresholdValue>& thresholdValues,
-                            int seed)
+                            const vector<ThresholdValue>& thresholdValues, int seed)
 {
     this->rankInfo = rInfo;
-    
+
     SetupHotEmbUpdateStep();
-    
+
     map<EmbNameT, int> scInfo;
-    for (const auto& info: eInfos) {
+    for (const auto& info : eInfos) {
         embInfos[info.name] = info;
         scInfo[info.name] = info.sendCount;
         InitHotEmbTotCount(info, rInfo);
@@ -63,8 +64,8 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     LOG_INFO(KEY_PROCESS "hot emb count info:{}", MapToString(hotEmbTotCount));
     MPI_Group worldGroup;
     MPI_Comm_group(MPI_COMM_WORLD, &worldGroup);
-    for (auto& i: comm) {
-        for (auto& j: i) {
+    for (auto& i : comm) {
+        for (auto& j : i) {
             MPI_Comm_create(MPI_COMM_WORLD, worldGroup, &j);
         }
     }
@@ -82,12 +83,13 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     if (GlobalEnv::fastUnique) {
         int result = ock::ctr::Factory::Create(factory);
         if (result != 0) {
-            throw runtime_error(Logger::Format("create fast factory failed, error code:{}", result));
+            throw runtime_error(
+                Logger::Format("create fast factory failed, error code:{}", result));
         }
     }
 
     LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}",
-        MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic);
+             MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic);
 #ifndef GTEST
     Start();
 #endif
@@ -101,7 +103,7 @@ int KeyProcess::Start()
     // 0 1 2 3 4 5 0 1 2 3 4 5
     // |  rank0  | |  rank1  |
     // each rank creates KEY_PROCESS_THREAD threads, each thread process one batchdata
-    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF)); // 查看CPU核数
+    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF));  // 查看CPU核数
     auto fn = [this](int channel, int threadId) {
 #ifndef GTEST
         auto ret = aclrtSetDevice(static_cast<int32_t>(rankInfo.deviceId));
@@ -115,7 +117,7 @@ int KeyProcess::Start()
         } else {
             KeyProcessTask(channel, threadId);
         }
-    }; // for clean code
+    };  // for clean code
     int threadNum = GetThreadNumEnv();
     for (int channel = 0; channel < MAX_CHANNEL_NUM; ++channel) {
         LOG_INFO(KEY_PROCESS "key process thread num: {}", threadNum);
@@ -133,8 +135,9 @@ void KeyProcess::InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo)
     if (rankInfo.useDynamicExpansion) {
         embeddingSize = info.embeddingSize;
     }
-    hotEmbTotCount[info.name] = static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
-                                                 HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
+    hotEmbTotCount[info.name] =
+        static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
+                         HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
 }
 
 OffsetMemT KeyProcess::GetMaxOffset()
@@ -179,7 +182,7 @@ void KeyProcess::Destroy()
 {
     isRunning = false;
     LOG_INFO(KEY_PROCESS "rankId:{} KeyProcess begin destroy.", rankInfo.rankId);
-    for (auto& i: procThreads) {
+    for (auto& i : procThreads) {
         i->join();
     }
     procThreads.clear();
@@ -189,8 +192,8 @@ void KeyProcess::Destroy()
 /// 每个数据通道的所有数据处理线程上锁
 void KeyProcess::LoadSaveLock()
 {
-    for (int channelId { 0 }; channelId < MAX_CHANNEL_NUM; ++channelId) {
-        for (int threadId { 0 }; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
+    for (int channelId{0}; channelId < MAX_CHANNEL_NUM; ++channelId) {
+        for (int threadId{0}; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
             loadSaveMut[channelId][threadId].lock();
         }
     }
@@ -199,8 +202,8 @@ void KeyProcess::LoadSaveLock()
 /// 每个数据通道的所有数据处理线程释放锁
 void KeyProcess::LoadSaveUnlock()
 {
-    for (int channelId { 0 }; channelId < MAX_CHANNEL_NUM; ++channelId) {
-        for (int threadId { 0 }; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
+    for (int channelId{0}; channelId < MAX_CHANNEL_NUM; ++channelId) {
+        for (int threadId{0}; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
             loadSaveMut[channelId][threadId].unlock();
         }
     }
@@ -226,8 +229,9 @@ void KeyProcess::GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf)
     uniqueConf.maxThreadNum = GlobalEnv::maxUniqueThreadNum;
 }
 
-void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
-                                  const unique_ptr <EmbBatchT>& batch, ock::ctr::UniquePtr& unique)
+void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize,
+                                  bool& uniqueInitialize, const unique_ptr<EmbBatchT>& batch,
+                                  ock::ctr::UniquePtr& unique)
 {
     uniqueConf.desiredSize = static_cast<uint32_t>(batch->Size());
     if (preBatchSize != batch->Size()) {
@@ -269,7 +273,8 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch = GetBatchData(channel, threadId); // get batch data from SingletonQueue<EmbBatchT>
+            batch =
+                GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -281,8 +286,9 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
             if (!KeyProcessTaskHelperWithFastUnique(batch, unique, channel, threadId)) {
                 break;
             }
-            LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process with fast unique cost:{},"
-                                 " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
+            LOG_INFO(KEY_PROCESS
+                     "getAndProcessTC(ms):{}, key process with fast unique cost:{},"
+                     " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
                      getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
                      batch->name, batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
@@ -290,14 +296,13 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
             batchQueue->PutDirty(move(batch));
         }
         unique->UnInitialize();
-    } catch (const EndRunExit &e) {
+    } catch (const EndRunExit& e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
     LOG_INFO(KEY_PROCESS "KeyProcessTaskWithFastUnique exit. rank:{} channelId:{}, threadId:{}",
-        rankInfo.rankId, channel, threadId);
+             rankInfo.rankId, channel, threadId);
 }
 
-
 void KeyProcess::KeyProcessTask(int channel, int threadId)
 {
     unique_ptr<EmbBatchT> batch;
@@ -305,7 +310,8 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch = GetBatchData(channel, threadId); // get batch data from SingletonQueue<EmbBatchT>
+            batch =
+                GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -316,40 +322,43 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
             if (!KeyProcessTaskHelper(batch, channel, threadId)) {
                 break;
             }
-            LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process cost:{},"
-                                 " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
+            LOG_INFO(KEY_PROCESS
+                     "getAndProcessTC(ms):{}, key process cost:{},"
+                     " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
                      getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
                      batch->name, batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
             auto batchQueue = SingletonQueue<EmbBatchT>::GetInstances(queueIndex);
             batchQueue->PutDirty(move(batch));
         }
-    } catch (const EndRunExit &e) {
+    } catch (const EndRunExit& e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
-    LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId, channel, threadId);
+    LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId,
+             channel, threadId);
 }
 
-void KeyProcess::HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <KeysT>& splitKeys,
-                                 vector <int32_t>& restore, vector <int32_t>& hotPos,
-                                 vector <vector<uint32_t>>& keyCount)
+void KeyProcess::HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys,
+                                 vector<int32_t>& restore, vector<int32_t>& hotPos,
+                                 vector<vector<uint32_t>>& keyCount)
 {
     TimeCost uniqueTc;
     if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
-        tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch); // 按存储dev id切分并去重
+        tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch);  // 按存储dev id切分并去重
     } else {
-        tie(splitKeys, restore, hotPos) = HotHashSplit(batch);   // 按存储dev id切分并去重
+        tie(splitKeys, restore, hotPos) = HotHashSplit(batch);  // 按存储dev id切分并去重
     }
     LOG_DEBUG("uniqueTc(ms):{}", uniqueTc.ElapsedMS());
 }
 
-bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique,
-                                                    int channel, int threadId)
+bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch,
+                                                    ock::ctr::UniquePtr& unique, int channel,
+                                                    int threadId)
 {
     // tuple for keyRec restore hotPos scAll countRecv
     isWithFAAE = m_featureAdmitAndEvict.GetFunctionSwitch() &&
-                  FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
+                 FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
     TimeCost totalTimeCost = TimeCost();
     TimeCost fastUniqueTC;
     UniqueInfo uniqueInfo;
@@ -358,11 +367,11 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
 
     // 特征准入&淘汰
     if (isWithFAAE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(
-            channel, batch, uniqueInfo.all2AllInfo.keyRecv, uniqueInfo.all2AllInfo.countRecv) ==
-            FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, uniqueInfo.all2AllInfo.keyRecv,
+                                             uniqueInfo.all2AllInfo.countRecv) ==
+         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
         LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
-            rankInfo.rankId, threadId, channel);
+                  rankInfo.rankId, threadId, channel);
         return false;
     }
     std::lock_guard<std::mutex> lock(loadSaveMut[channel][threadId]);
@@ -376,25 +385,29 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
         LOG_DEBUG("key2OffsetTC(ms):{}", key2OffsetTC.ElapsedMS());
     }
     // Static all2all，need send count
-    if (!rankInfo.useStatic) { SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId); }
+    if (!rankInfo.useStatic) {
+        SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId);
+    }
 
     auto tensors = make_unique<vector<Tensor>>();
     tensors->push_back(Vec2TensorI32(uniqueInfo.restore));
 
     uniqueInfo.hotPos.resize(hotEmbTotCount[batch->name], -1);
     tensors->push_back(Vec2TensorI32(uniqueInfo.hotPos));
-    
+
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(move(tensors), uniqueInfo.all2AllInfo.keyRecv, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv) :
-                                                            Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
+        tensors->push_back(rankInfo.useDynamicExpansion
+                               ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv)
+                               : Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
     }
 
     TimeCost pushResultTC;
     PushResult(batch, move(tensors), uniqueInfo.all2AllInfo.keyRecv);
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}",
-            channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO
+                 "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}",
+                 channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     return true;
@@ -422,8 +435,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     // 特征准入&淘汰
     if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys,
-                                             countRecv) == FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys, countRecv) ==
+         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
         LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
                   rankInfo.rankId, threadId, channel);
         return false;
@@ -436,7 +449,9 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     }
 
     // Static all2all，need send count
-    if (!rankInfo.useStatic) { SendA2A(scAll, batch->name, batch->channel, batch->batchId); }
+    if (!rankInfo.useStatic) {
+        SendA2A(scAll, batch->name, batch->channel, batch->batchId);
+    }
 
     TimeCost pushResultTC;
     auto tensors = make_unique<vector<Tensor>>();
@@ -444,24 +459,27 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
     hotPos.resize(hotEmbTotCount[batch->name], 0);
     tensors->push_back(Vec2TensorI32(hotPos));
-    
+
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(tensors, lookupKeys, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys) : Vec2TensorI32(lookupKeys));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys)
+                                                        : Vec2TensorI32(lookupKeys));
     }
 
     PushResult(batch, move(tensors), lookupKeys);
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}",
-            channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}", channel,
+                 batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     return true;
 }
 
-void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
+void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors,
+                                         KeysT& lookupKeys, int channel)
 {
-    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
+    if (GlobalEnv::applyGradientsStrategy ==
+            ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
         channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
@@ -470,36 +488,39 @@ void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tenso
         GlobalUnique(lookupKeys, uniqueKeys, restoreVecSec);
         LOG_DEBUG("globalUniqueSyncTC(ms):{}", globalUniqueSyncTC.ElapsedMS());
         tensors->push_back(Vec2TensorI32(restoreVecSec));
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys) : Vec2TensorI32(uniqueKeys));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys)
+                                                        : Vec2TensorI32(uniqueKeys));
     }
 }
 
 vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
-                                          vector<vector<uint32_t>>& keyCount, vector<int> scAll, vector<int> ss)
+                                          vector<vector<uint32_t>>& keyCount, vector<int> scAll,
+                                          vector<int> ss)
 {
     TimeCost getCountRecvTC;
     if (rankInfo.useStatic) {
-        for (auto& cnt: keyCount) {
+        for (auto& cnt : keyCount) {
             cnt.resize(embInfos[batch->name].sendCount, 0);
         }
     }
     vector<uint32_t> countSend;
-    for (auto& cnt: keyCount) {
+    for (auto& cnt : keyCount) {
         countSend.insert(countSend.cend(), cnt.cbegin(), cnt.cend());
     }
     vector<int> sc;
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         sc.push_back(scAll.at(rankInfo.rankSize * rankInfo.rankId + i));
     }
-    vector<int> rc;                                // receive count
+    vector<int> rc;  // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         rc.push_back(scAll.at(i * rankInfo.rankSize + rankInfo.rankId));
     }
-    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
     vector<uint32_t> countRecv;
     countRecv.resize(rs.back() + rc.back());
-    int retCode = MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(),
-                                rc.data(), rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
+    int retCode =
+        MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(),
+                      rc.data(), rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -512,9 +533,11 @@ void KeyProcess::PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tens
 {
     std::unique_lock<std::mutex> lockGuard(mut);
     storage.push_front(move(tensors));
-    infoList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, storage.begin()));
+    infoList[batch->name][batch->channel].push(
+        make_tuple(batch->batchId, batch->name, storage.begin()));
     if (rankInfo.isDDR) {
-        lookupKeysList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, move(lookupKeys)));
+        lookupKeysList[batch->name][batch->channel].push(
+            make_tuple(batch->batchId, batch->name, move(lookupKeys)));
     }
     lockGuard.unlock();
 }
@@ -542,32 +565,38 @@ unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
         this_thread::sleep_for(100us);
         if (tc.ElapsedSec() > GET_BATCH_TIMEOUT) {
             if (commId == 0) {
-                LOG_WARN(KEY_PROCESS "getting batch timeout! 1. check last 'read batch cost' print. "
-                    "channel[{}] commId[{}]", channel, commId);
+                LOG_WARN(KEY_PROCESS
+                         "getting batch timeout! 1. check last 'read batch cost' print. "
+                         "channel[{}] commId[{}]",
+                         channel, commId);
             }
             this_thread::sleep_for(seconds(1));
             tc = TimeCost();
         }
 
         if (!isRunning) {
-            LOG_WARN("channelId:{} threadId:{}, isRunning is false when GetBatchData", channel, commId);
+            LOG_WARN("channelId:{} threadId:{}, isRunning is false when GetBatchData", channel,
+                     commId);
             throw EndRunExit("GetBatchData end run.");
         }
     }
     EASY_END_BLOCK
-    LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, get batch data done, batchName:{}. bs:{} sample:[{}]",
-              batch->channel, commId, batch->batchId, batch->name, batch->Size(), batch->UnParse());
+    LOG_DEBUG(
+        KEY_PROCESS
+        "channelId:{} threadId:{} batchId:{}, get batch data done, batchName:{}. bs:{} sample:[{}]",
+        batch->channel, commId, batch->batchId, batch->name, batch->Size(), batch->UnParse());
 #if defined(PROFILING) && defined(BUILD_WITH_EASY_PROFILER)
     if (batch->batchId == PROFILING_START_BATCH_ID) {
         EASY_PROFILER_ENABLE
     } else if (batch->batchId == PROFILING_END_BATCH_ID) {
-        ::profiler::dumpBlocksToFile(StringFormat("/home/MX_REC-profile-%d.prof", rankInfo.rankId).c_str());
+        ::profiler::dumpBlocksToFile(
+            StringFormat("/home/MX_REC-profile-%d.prof", rankInfo.rankId).c_str());
     }
 #endif
     return batch;
 }
 
-size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT> &batch)
+size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT>& batch)
 {
     size_t size = rankInfo.rankSize * embInfos[batch->name].sendCount;
     if (!rankInfo.useStatic) {
@@ -576,8 +605,9 @@ size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT> &batch)
     return size;
 }
 
-void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
-                                            int id, UniqueInfo& uniqueInfoOut)
+void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
+                                            ock::ctr::UniquePtr& unique, int id,
+                                            UniqueInfo& uniqueInfoOut)
 {
     EASY_FUNCTION(profiler::colors::Purple)
     EASY_VALUE("batchId", batch->batchId)
@@ -596,10 +626,10 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch,
 
     ock::ctr::UniqueIn uniqueIn;
     uniqueIn.inputIdCnt = static_cast<uint32_t>(batch->Size());
-    uniqueIn.inputId = reinterpret_cast<void *>(batch->sample.data());
+    uniqueIn.inputId = reinterpret_cast<void*>(batch->sample.data());
 
     ock::ctr::EnhancedUniqueOut uniqueOut;
-    uniqueOut.uniqueId = reinterpret_cast<void *>(keySendInfo.keySend.data());
+    uniqueOut.uniqueId = reinterpret_cast<void*>(keySendInfo.keySend.data());
     uniqueOut.index = reinterpret_cast<uint32_t*>(uniqueInfoOut.restore.data());
     if (rankInfo.useStatic) {
         uniqueOut.idCnt = idCount.data();
@@ -608,7 +638,7 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch,
         uniqueOut.idCnt = keySendInfo.keyCount.data();
     }
     uniqueOut.uniqueIdCntInBucket = splitSize.data();
-    uniqueOut.uniqueIdInBucket = reinterpret_cast<void *>(uniqueVector.data());
+    uniqueOut.uniqueIdInBucket = reinterpret_cast<void*>(uniqueVector.data());
     uniqueOut.uniqueIdCnt = 0;
 
     int ret = unique->DoEnhancedUnique(uniqueIn, uniqueOut);
@@ -624,19 +654,21 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch,
     All2All(sc, id, batch, keySendInfo, uniqueInfoOut.all2AllInfo);
 
     LOG_DEBUG(KEY_PROCESS "ProcessBatchWithFastUnique get batchId:{}, batchSize:{},"
-        " channel:{}, name:{}, restore:{}, keyCount:{}",
-        batch->batchId, batch->Size(), batch->channel, batch->name,
-        uniqueInfoOut.restore.size(), keySendInfo.keyCount.size());
+                          " channel:{}, name:{}, restore:{}, keyCount:{}",
+              batch->batchId, batch->Size(), batch->channel, batch->name,
+              uniqueInfoOut.restore.size(), keySendInfo.keyCount.size());
 
     if (GlogConfig::gStatOn) {
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} "
-            "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
-            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueOut.uniqueIdCnt);
+                           "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(),
+                 uniqueOut.uniqueIdCnt);
     }
 }
 
-void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
-                                       KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize)
+void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch,
+                                       UniqueInfo& uniqueInfoOut, KeySendInfo& keySendInfo,
+                                       vector<int>& sc, vector<int>& splitSize)
 {
     std::shared_lock<std::shared_mutex> lock(g_smut);
     absl::flat_hash_map<emb_key_t, int> hotMap = hotKey[batch->name];
@@ -649,8 +681,8 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, Uniqu
     TimeCost computeHotTc;
     ComputeHotPos(batch, hotMap, uniqueInfoOut.hotPos, uniqueInfoOut.restore, hotOffset);
     LOG_DEBUG("ComputeHot TimeCost(ms):{}", computeHotTc.ElapsedMS());
-    UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount,
-                          hotOffset, batch->batchId % hotEmbUpdateStep == 0, batch->name);
+    UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount, hotOffset,
+                          batch->batchId % hotEmbUpdateStep == 0, batch->name);
 
     if (rankInfo.useStatic) {
         sc.resize(rankInfo.rankSize, embInfos[batch->name].sendCount);
@@ -662,8 +694,9 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, Uniqu
     }
 }
 
-void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_hash_map<emb_key_t, int> &hotMap,
-                               vector<int> &hotPos, vector<int32_t> &restore, const int hotOffset) const
+void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
+                               absl::flat_hash_map<emb_key_t, int>& hotMap, vector<int>& hotPos,
+                               vector<int32_t>& restore, const int hotOffset) const
 {
     emb_key_t* inputData = batch->sample.data();
     size_t miniBs = batch->Size();
@@ -686,39 +719,41 @@ void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_ha
     }
 }
 
-void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &batch, KeySendInfo& keySendInfo,
-                         All2AllInfo& all2AllInfoOut)
+void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch,
+                         KeySendInfo& keySendInfo, All2AllInfo& all2AllInfoOut)
 {
     TimeCost getScAllTC;
     int channel = batch->channel;
-    GetScAllForUnique(sc, id, batch, all2AllInfoOut.scAll); // Allgather通信获取所有（不同rank相同thread id的）
+    GetScAllForUnique(sc, id, batch,
+                      all2AllInfoOut.scAll);  // Allgather通信获取所有（不同rank相同thread id的）
     LOG_DEBUG("GetScAll TimeCost(ms):{}", getScAllTC.ElapsedMS());
 
     TimeCost all2allTC;
-    vector<int> ss = Count2Start(sc); // send displays/offset 发送数据的起始偏移量
-    vector<int> rc(rankInfo.rankSize);            // receive count
+    vector<int> ss = Count2Start(sc);   // send displays/offset 发送数据的起始偏移量
+    vector<int> rc(rankInfo.rankSize);  // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         // 通信量矩阵某一列的和即为本地要从其他设备接受的key数据量
         rc[i] = all2AllInfoOut.scAll.at(i * rankInfo.rankSize + rankInfo.rankId);
     }
-    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
     all2AllInfoOut.keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
     int retCode = MPI_Alltoallv(keySendInfo.keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
-                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(),
-                                MPI_INT64_T, comm[channel][id]);
+                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T,
+                                comm[channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All MPI_Alltoallv end.", channel, id, batch->batchId);
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All MPI_Alltoallv end.", channel, id,
+              batch->batchId);
     all2AllInfoOut.countRecv.resize(rs.back() + rc.back());
     if (isWithFAAE) {
         retCode = MPI_Alltoallv(keySendInfo.keyCount.data(), sc.data(), ss.data(), MPI_UINT32_T,
-                                all2AllInfoOut.countRecv.data(), rc.data(),
-                                rs.data(), MPI_UINT32_T, comm[channel][id]);
+                                all2AllInfoOut.countRecv.data(), rc.data(), rs.data(), MPI_UINT32_T,
+                                comm[channel][id]);
         if (retCode != MPI_SUCCESS) {
-            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}",
-                      channel, id, batch->batchId, retCode);
+            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}", channel, id,
+                      batch->batchId, retCode);
         }
     }
     LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC TimeCost(ms):{}",
@@ -727,7 +762,8 @@ void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &b
 }
 
 auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
-                                  vector<KeysT>& splitKeys) -> tuple<KeysT, vector<int>, vector<int>>
+                                  vector<KeysT>& splitKeys)
+    -> tuple<KeysT, vector<int>, vector<int>>
 {
     TimeCost processSplitKeysTC;
     EASY_FUNCTION(profiler::colors::Purple)
@@ -736,43 +772,45 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
              batch->channel, id, batch->batchId);
 
     // 使用静态all2all通信：发送或接受量为预置固定值 scInfo[batch->name] = 65536 / rankSize 经验值
-    if (rankInfo.useStatic) { // maybe move after all2all
-        for (KeysT& i: splitKeys) {
+    if (rankInfo.useStatic) {  // maybe move after all2all
+        for (KeysT& i : splitKeys) {
             if (static_cast<int>(i.size()) > embInfos[batch->name].sendCount) {
-                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}",
-                    batch->name, batch->channel, batch->batchId, i.size());
+                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}", batch->name,
+                          batch->channel, batch->batchId, i.size());
                 throw runtime_error(
                     StringFormat("%s[%d]:%d overflow! set send count bigger than %d",
-                        batch->name.c_str(), batch->channel, batch->batchId, i.size()).c_str());
+                                 batch->name.c_str(), batch->channel, batch->batchId, i.size())
+                        .c_str());
             }
             i.resize(embInfos[batch->name].sendCount, -1);
         }
     }
     KeysT keySend;
-    vector<int> sc; // send count
-    for (const auto& i: splitKeys) {
+    vector<int> sc;  // send count
+    for (const auto& i : splitKeys) {
         sc.push_back(static_cast<int>(i.size()));
         keySend.insert(keySend.cend(), i.cbegin(), i.cend());
     }
     KeysT keyRecv;
 
     TimeCost getScAllTC;
-    vector<int> scAll = GetScAll(sc, id, batch);    // Allgather通信获取所有（不同rank相同thread id的）线程间通信量矩阵
+    vector<int> scAll = GetScAll(
+        sc, id, batch);  // Allgather通信获取所有（不同rank相同thread id的）线程间通信量矩阵
     LOG_DEBUG("getScAllTC(ms)(AllReduce-AllGather):{}", getScAllTC.ElapsedMS());
 
     vector<int> ss = Count2Start(sc);  // send displays/offset 发送数据的起始偏移量
-    vector<int> rc; // receive count
+    vector<int> rc;                    // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         // 通信量矩阵某一列的和即为本地要从其他设备接受的key数据量
         rc.push_back(scAll.at(i * rankInfo.rankSize + rankInfo.rankId));
     }
-    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
     keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
 
     TimeCost uniqueAll2AllTC;
-    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
-                                keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T, comm[batch->channel][id]);
+    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T, keyRecv.data(),
+                                rc.data(), rs.data(), MPI_INT64_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -781,8 +819,8 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
     EASY_END_BLOCK
     LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, batchName:{}, MPI_Alltoallv finish."
                           " processSplitKeysTC(ms):{}",
-                          batch->channel, id, batch->batchId, batch->name, processSplitKeysTC.ElapsedMS());
-    return { keyRecv, scAll, ss };
+              batch->channel, id, batch->batchId, batch->name, processSplitKeysTC.ElapsedMS());
+    return {keyRecv, scAll, ss};
 }
 
 /*
@@ -790,15 +828,16 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
  * splitKeys返回：将数据的key切分到其所在dev id对应的桶中，并去重。
  * restore返回：去重后key在桶内偏移量（用于计算恢复向量）
  */
-tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<EmbBatchT>& batch) const
+tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(
+    const unique_ptr<EmbBatchT>& batch) const
 {
     EASY_FUNCTION(profiler::colors::Gold)
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize); // 初始化全0，记录每个桶的长度
-    absl::flat_hash_map<emb_key_t, int> uKey;     // 用于去重查询
+    vector<int> hashSplitLens(rankInfo.rankSize);  // 初始化全0，记录每个桶的长度
+    absl::flat_hash_map<emb_key_t, int> uKey;      // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
         const emb_key_t& key = batchData[i];
@@ -806,9 +845,10 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<Emb
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] = hashSplitLens[devId]++; // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] =
+                hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key] = restore[i];
-        } else { // 去重
+        } else {  // 去重
             restore[i] = result->second;
         }
     }
@@ -821,10 +861,11 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<Emb
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} unique_key_num {}",
-            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO
+                 "channel_id {} batch_id {} rank_id {} batch_key_num {} unique_key_num {}",
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
-    return { splitKeys, restore };
+    return {splitKeys, restore};
 }
 
 void KeyProcess::PaddingAlltoallVC(vector<KeysT>& splitKeys) const
@@ -846,10 +887,10 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
-    vector<vector<uint32_t>> keyCount(rankInfo.rankSize); // splitKeys在原始batch中对应的频次
+    vector<vector<uint32_t>> keyCount(rankInfo.rankSize);  // splitKeys在原始batch中对应的频次
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize);                  // 初始化全0，记录每个桶的长度
-    absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey; // 用于去重查询
+    vector<int> hashSplitLens(rankInfo.rankSize);  // 初始化全0，记录每个桶的长度
+    absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey;  // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
         const emb_key_t& key = batchData[i];
@@ -857,10 +898,11 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] = hashSplitLens[devId]++; // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] =
+                hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key].first = restore[i];
             uKey[key].second = 1;
-        } else { // 去重
+        } else {  // 去重
             restore[i] = result->second.first;
             uKey[key].second++;
         }
@@ -886,20 +928,22 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} faae_unique_key_num {}",
-            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO
+                 "channel_id {} batch_id {} rank_id {} batch_key_num {} faae_unique_key_num {}",
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
-    return { splitKeys, restore, keyCount };
+    return {splitKeys, restore, keyCount};
 }
 
-tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(const unique_ptr<EmbBatchT>& batch)
+tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(
+    const unique_ptr<EmbBatchT>& batch)
 {
     EASY_FUNCTION(profiler::colors::Gold)
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<int32_t> restore(batch->Size());
-    absl::flat_hash_map<emb_key_t, int> uKey;   // 用于去重查询
+    absl::flat_hash_map<emb_key_t, int> uKey;  // 用于去重查询
     absl::flat_hash_map<emb_key_t, int> keyCountMapByEmbName;
     std::shared_lock<std::shared_mutex> lock(g_smut);
     auto hotMap = hotKey[batch->name];
@@ -908,31 +952,31 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(cons
     vector<int> hotPosDev(hotEmbTotCount[batch->name]);
     int hotCount = 0;
     int hotOffset = hotEmbTotCount[batch->name];
-    for (size_t i = 0; i < miniBs; i++) { // for mini batch
+    for (size_t i = 0; i < miniBs; i++) {  // for mini batch
         const emb_key_t& key = batchData[i];
         if (batch->batchId % hotEmbUpdateStep == 0) {
             keyCountMapByEmbName[key]++;
         }
         emb_key_t devId = abs(key % static_cast<emb_key_t>(rankInfo.rankSize));
         auto result = uKey.find(key);
-        if (result != uKey.end()) { // // already in splitKeys
+        if (result != uKey.end()) {  // // already in splitKeys
             restore[i] = result->second;
             continue;
         }
         // new key in current batch
-        splitKeys[devId].push_back(key); // push to bucket
+        splitKeys[devId].push_back(key);  // push to bucket
         auto hot = hotMap.find(key);
-        if (hot != hotMap.end()) { // is hot key
-            if (hot->second == -1) { // is new hot key in this batch
+        if (hot != hotMap.end()) {    // is hot key
+            if (hot->second == -1) {  // is new hot key in this batch
                 // pos in lookup vec (need add ss) for hot-gather
                 hotPos[hotCount] = static_cast<int>(splitKeys[devId].size()) - 1;
-                hotPosDev[hotCount] = devId; // which dev, for get ss
+                hotPosDev[hotCount] = devId;  // which dev, for get ss
                 hot->second = hotCount;
-                restore[i] = hotCount++; // get pos of hot emb
+                restore[i] = hotCount++;  // get pos of hot emb
             } else {
                 restore[i] = hot->second;
             }
-        } else { // is not hot key
+        } else {  // is not hot key
             // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             restore[i] = static_cast<int32_t>(splitKeys[devId].size() + (hotOffset - 1));
         }
@@ -944,22 +988,25 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(cons
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} hot_unique_key_num {}",
-            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO
+                 "channel_id {} batch_id {} rank_id {} batch_key_num {} hot_unique_key_num {}",
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
 
-    UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name], batch->batchId % hotEmbUpdateStep == 0,
-                 batch->name);
+    UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name],
+                 batch->batchId % hotEmbUpdateStep == 0, batch->name);
     AddCountStartToHotPos(splitKeys, hotPos, hotPosDev, batch);
-    return { splitKeys, restore, hotPos };
+    return {splitKeys, restore, hotPos};
 }
 
-void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
+void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos,
+                                       const vector<int>& hotPosDev,
                                        const unique_ptr<EmbBatchT>& batch)
 {
     vector<int> splitKeysSize;
-    for (auto& splitKey: splitKeys) {
-        int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount : static_cast<int>(splitKey.size());
+    for (auto& splitKey : splitKeys) {
+        int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount
+                                     : static_cast<int>(splitKey.size());
         splitKeysSize.push_back(tmp);
     }
 
@@ -969,13 +1016,13 @@ void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& ho
     }
 }
 
-void KeyProcess::UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_t> &keyCount,
+void KeyProcess::UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount,
                                        uint32_t count, bool refresh, const string& embName)
 {
     auto& hotMap = hotKey[embName];
     if (refresh) {
         priority_queue<pair<int, emb_key_t>> pq;
-        for (size_t i = 0;i < keySend.size(); ++i) {
+        for (size_t i = 0; i < keySend.size(); ++i) {
             if (keySend[i] == -1) {
                 continue;
             }
@@ -994,15 +1041,15 @@ void KeyProcess::UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_
     }
 }
 
-void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
-                              const string& embName)
+void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName,
+                              uint32_t count, bool refresh, const string& embName)
 {
     if (!refresh) {
         return;
     }
     auto& hotMap = hotKey[embName];
-    priority_queue<pair<int, emb_key_t>> pq; // top k key
-    for (auto& p: keyCountMapByEmbName) {
+    priority_queue<pair<int, emb_key_t>> pq;  // top k key
+    for (auto& p : keyCountMapByEmbName) {
         pq.push(pair<int, emb_key_t>(-p.second, p.first));
         if (pq.size() > count) {
             pq.pop();
@@ -1018,53 +1065,55 @@ void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapBy
 }
 
 /*
- * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread id的）线程间的通信量矩阵
- * scAll返回：所有线程间的通信量矩阵（按行平铺的一维向量）
+ * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread
+ * id的）线程间的通信量矩阵 scAll返回：所有线程间的通信量矩阵（按行平铺的一维向量）
  */
-vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch)
+vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId,
+                                 const unique_ptr<EmbBatchT>& batch)
 {
     EASY_FUNCTION()
     vector<int> scAll;
     scAll.resize(rankInfo.rankSize * rankInfo.rankSize);
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId, batch->batchId);
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId,
+              batch->batchId);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT,
-                                 scAll.data(), rankInfo.rankSize, MPI_INT,
-                                 comm[batch->channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAll.data(),
+                                 rankInfo.rankSize, MPI_INT, comm[batch->channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {} commId {}, MPI_Allgather failed:{}", rankInfo.rankId, commId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, key scAll matrix:\n{}",
-              batch->channel, commId, batch->batchId, VectorToString(scAll));
+    LOG_DEBUG(
+        "channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, key scAll matrix:\n{}",
+        batch->channel, commId, batch->batchId, VectorToString(scAll));
     return scAll;
 }
 
-void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT> &batch,
-                                   vector<int> &scAllOut)
+void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId,
+                                   const unique_ptr<EmbBatchT>& batch, vector<int>& scAllOut)
 {
     EASY_FUNCTION()
     int channel = batch->channel;
     scAllOut.resize(rankInfo.rankSize * rankInfo.rankSize);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT,
-                                 scAllOut.data(), rankInfo.rankSize, MPI_INT,
-                                 comm[channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAllOut.data(),
+                                 rankInfo.rankSize, MPI_INT, comm[channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Allgather failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key scAllOut matrix:\n{}",
-              channel, commId, batch->batchId, VectorToString(scAllOut));
+    LOG_DEBUG(
+        "channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key scAllOut matrix:\n{}",
+        channel, commId, batch->batchId, VectorToString(scAllOut));
 }
 
 void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel)
 {
     TimeCost key2OffsetTC;
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
     auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp  = maxOffset[embName];
+    auto& maxOffsetTmp = maxOffset[embName];
     auto& evictPos = evictPosMap[embName];
     for (long& key : splitKey) {
         if (key == -1) {
@@ -1077,8 +1126,9 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
             size_t offset;
             // 新值, emb有pos可复用
             offset = evictPos.back();
-            LOG_TRACE("HBM mode, evictPos is not null, name[{}] key [{}] reuse offset [{}], evictSize [{}]!!!",
-                embName, key, offset, evictPos.size());
+            LOG_TRACE("HBM mode, evictPos is not null, name[{}] key [{}] reuse offset [{}], "
+                      "evictSize [{}]!!!",
+                      embName, key, offset, evictPos.size());
             key2Offset[key] = offset;
             key = offset;
             evictPos.pop_back();
@@ -1096,18 +1146,18 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
         LOG_ERROR("dev cache overflow {} > {}", maxOffsetTmp, embInfos[embName].devVocabSize);
         throw std::runtime_error("dev cache overflow!");
     }
-    LOG_DEBUG("current hbm emb:{}, usage:{}/{} key2OffsetTC({} ms)",
-        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
+    LOG_DEBUG("current hbm emb:{}, usage:{}/{} key2OffsetTC({} ms)", embName, maxOffsetTmp,
+              embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
 void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel)
 {
     TimeCost key2OffsetTC;
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
     auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp  = maxOffset[embName];
-    auto& curEmbTable = embeddingTableMap[embName]; // empty when not use dynamic expansion
+    auto& maxOffsetTmp = maxOffset[embName];
+    auto& curEmbTable = embeddingTableMap[embName];  // empty when not use dynamic expansion
     for (long& key : splitKey) {
         if (key == -1) {
             key = 0;
@@ -1130,8 +1180,8 @@ void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& spli
             key = 0;
         }
     }
-    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)",
-        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
+    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)", embName, maxOffsetTmp,
+              embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
 /*
@@ -1156,11 +1206,11 @@ void KeyProcess::BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vecto
             hotNum += 1;
         }
     }
-    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}",
-        hotNum, batch->Size(), buildRestoreVecTC.ElapsedMS());
+    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}", hotNum, batch->Size(),
+              buildRestoreVecTC.ElapsedMS());
 }
 
-template<class T>
+template <class T>
 T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel)
 {
     std::lock_guard<std::mutex> lockGuard(mut);
@@ -1170,7 +1220,8 @@ T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, in
     }
     auto topBatch = get<int>(list[embName][channel].top());
     if (topBatch < batch) {
-        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch, batch, channel);
+        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch,
+                  batch, channel);
         this_thread::sleep_for(1s);
     }
     if (topBatch != batch) {
@@ -1198,8 +1249,10 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
-            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                    embName, channel, batch);
+            LOG_DEBUG(
+                KEY_PROCESS
+                "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
+                embName, channel, batch);
             return {};
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1220,8 +1273,9 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
                 SendEos(batch, channel);
                 return {};
             }
-            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
-                embName, channel, batch, readEmbKeyBatchId);
+            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey "
+                      "batchId: {}.",
+                      embName, channel, batch, readEmbKeyBatchId);
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
             LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
@@ -1245,22 +1299,27 @@ void KeyProcess::SendEos(int batchId, int channel)
     vector<Tensor> tensors;
     bool isNeedResend = true;
 
-    for (const auto& emb: as_const(embInfos)) { // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos start.", channel, batchId, emb.first);
+    for (const auto& emb :
+         as_const(embInfos)) {  // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
+        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos start.", channel,
+                 batchId, emb.first);
         if (!isRunning) {
             throw EndRunExit("SendEos end run, isRunning is false after lock destroyMutex.");
         }
         for (const string& transName : usedChannelNames) {
-            string sendName = StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
+            string sendName =
+                StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
             size_t channelSize = 0;
-            
+
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] Before send eos, {} contains {}.", sendName, channelSize);
-            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors, isNeedResend);
+            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors,
+                             isNeedResend);
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] After send eos, {} contains {}.", sendName, channelSize);
         }
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel, batchId, emb.first);
+        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel,
+                 batchId, emb.first);
     }
 
     LOG_INFO("channelId:{} batchId:{}, SendEos end.", channel, batchId);
@@ -1274,7 +1333,8 @@ void KeyProcess::SendEos(int batchId, int channel)
 /// \param channel 通道索引（训练/推理）
 /// \param type 数据类型
 /// \return
-unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type)
+unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel,
+                                                  ProcessedInfo type)
 {
     TimeCost tc = TimeCost();
     info_list_t<TensorInfoT>* list;
@@ -1299,7 +1359,9 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
-            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
+            LOG_DEBUG(
+                KEY_PROCESS
+                "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
                 embName, channel, batch);
             return nullptr;
         }
@@ -1317,15 +1379,18 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
             return uTensor;
         } catch (EmptyList&) {
             unique_lock<mutex> lockEosGuard(eosMutex);
-            // 避免eos在keyProcess还未处理完数据时插队到通道前面, readEmbKey真实的次数是readEmbedBatchId减1
-            if (isNeedSendEos[channel] && (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
+            // 避免eos在keyProcess还未处理完数据时插队到通道前面,
+            // readEmbKey真实的次数是readEmbedBatchId减1
+            if (isNeedSendEos[channel] &&
+                (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
                 LOG_INFO("channelId:{} batchId:{}, GetInfoVec eos.", channel, batch);
                 unique_lock<mutex> lockDestroyGuard(destroyMutex);
                 SendEos(batch, channel);
                 return nullptr;
             }
-            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
-                embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
+            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey "
+                      "batchId: {}.",
+                      embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
             LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
@@ -1338,7 +1403,7 @@ void KeyProcess::SendA2A(const vector<int>& a2aInfo, const string& embName, int
 {
     // 数据放到队列里，在mgmt里面发送（检查发送数据量）
     auto tensors = make_unique<vector<Tensor>>();
-    Tensor tmpTensor(tensorflow::DT_INT64, { rankInfo.rankSize, rankInfo.rankSize });
+    Tensor tmpTensor(tensorflow::DT_INT64, {rankInfo.rankSize, rankInfo.rankSize});
     auto tmpData = tmpTensor.matrix<int64>();
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         for (int j = 0; j < rankInfo.rankSize; ++j) {
@@ -1358,13 +1423,13 @@ int KeyProcess::GetMaxStep(int channelId) const
     return rankInfo.ctrlSteps.at(channelId);
 }
 
-void KeyProcess::EvictKeys(const string& embName, const vector<emb_key_t>& keys) // hbm
+void KeyProcess::EvictKeys(const string& embName, const vector<emb_key_t>& keys)  // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm funEvictCall: [{}]! keySize:{}", embName, keys.size());
     EmbeddingMgmt::Instance()->EvictKeys(embName, keys);
 }
 
-void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys) // hbm
+void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys)  // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm combine funEvictCall, keySize:{}", keys.size());
     EmbeddingMgmt::Instance()->EvictKeysCombine(keys);
@@ -1373,7 +1438,7 @@ void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys) // hbm
 void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys)
 {
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
 
     size_t keySize = keys.size();
     auto& devHashMap = keyOffsetMap.at(embName);
@@ -1387,7 +1452,7 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
             continue;
         }
         const auto& iter = devHashMap.find(key);
-        if (iter == devHashMap.end()) { // not found
+        if (iter == devHashMap.end()) {  // not found
             continue;
         }
         offset = iter->second;
@@ -1395,24 +1460,26 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
         evictPos.emplace_back(offset);
         LOG_TRACE("evict embName:{}, offset:{}", embName, offset);
     }
-    LOG_INFO(KEY_PROCESS "hbm EvictDeleteDeviceEmb: [{}]! evict size on dev:{}", embName, evictPos.size());
+    LOG_INFO(KEY_PROCESS "hbm EvictDeleteDeviceEmb: [{}]! evict size on dev:{}", embName,
+             evictPos.size());
 }
 
 void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset)
 {
     if (offset.size() > embInfos[embName].devVocabSize) {
         LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
-            embName, offset.size(), embInfos[embName].devVocabSize);
+                  embName, offset.size(), embInfos[embName].devVocabSize);
         throw runtime_error(
-            Logger::Format("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
-                embName, offset.size(), embInfos[embName].devVocabSize
-            ).c_str());
+            Logger::Format(
+                "{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
+                embName, offset.size(), embInfos[embName].devVocabSize)
+                .c_str());
     }
 
     vector<Tensor> tmpDataOut;
     Tensor tmpData = Vec2TensorI32(offset);
     tmpDataOut.emplace_back(tmpData);
-    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
+    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, {1}));
 
     auto evictLen = tmpDataOut.back().flat<int32>();
     int evictSize = static_cast<int>(offset.size());
@@ -1422,15 +1489,16 @@ void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset
     auto trans = Singleton<HDTransfer>::GetInstance();
     trans->Send(TransferChannel::EVICT, tmpDataOut, TRAIN_CHANNEL_ID, embName);
 
-    LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName, offset.size());
+    LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName,
+             offset.size());
 }
 
-string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>> &splitKeys) const
+string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const
 {
     stringstream ssTrace;
     for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
         ssTrace << '|' << devId << ":";
-        for (auto key: splitKeys[devId]) {
+        for (auto key : splitKeys[devId]) {
             ssTrace << key << ',';
         }
         ssTrace << '|';
diff --git a/src/core/key_process/key_process.h b/src/core/key_process/key_process.h
index 8bd7b8d0..d6a0b80b 100644
--- a/src/core/key_process/key_process.h
+++ b/src/core/key_process/key_process.h
@@ -16,283 +16,291 @@ See the License for the specific language governing permissions and
 #ifndef MX_REC_KEY_PROCESS_H
 #define MX_REC_KEY_PROCESS_H
 
-#include <vector>
+#include <absl/container/flat_hash_map.h>
+#include <mpi.h>
+
 #include <map>
 #include <memory>
+#include <shared_mutex>
 #include <string>
 #include <thread>
-#include <shared_mutex>
-
-#include <mpi.h>
-#include <absl/container/flat_hash_map.h>
-#include "ock_ctr_common/include/factory.h"
+#include <vector>
 
-#include "utils/common.h"
 #include "emb_table/emb_table.h"
 #include "feature_admit_and_evict.h"
 #include "hybrid_mgmt/hybrid_mgmt_block.h"
+#include "ock_ctr_common/include/factory.h"
+#include "utils/common.h"
 #include "utils/singleton.h"
 
 namespace MxRec {
-    using namespace std;
+using namespace std;
 
-    template<class T>
-    struct Cmp {
-        bool operator()(const T& a, const T& b) const
-        {
-            return get<int>(a) > get<int>(b); // batch id order
-        }
-    };
+template <class T>
+struct Cmp {
+    bool operator()(const T& a, const T& b) const
+    {
+        return get<int>(a) > get<int>(b);  // batch id order
+    }
+};
 
-    template<class T>
-    using heap_t = priority_queue<T, deque<T>, Cmp<T>>;
+template <class T>
+using heap_t = priority_queue<T, deque<T>, Cmp<T>>;
 
-    template<class T>
-    using info_list_t = map<EmbNameT, array<heap_t<T>, MAX_QUEUE_NUM>>;
+template <class T>
+using info_list_t = map<EmbNameT, array<heap_t<T>, MAX_QUEUE_NUM>>;
 
-    enum class ProcessedInfo {
-        RESTORE,
-        ALL2ALL,
-        INVALID
-    };
+enum class ProcessedInfo {
+    RESTORE,
+    ALL2ALL,
+    INVALID
+};
 
-    class EndRunExit : public std::exception {
-    public:
-        explicit EndRunExit(const char* message) : errorMessage(message) {}
+class EndRunExit : public std::exception {
+public:
+    explicit EndRunExit(const char* message) : errorMessage(message) {}
 
-        const char* what() const noexcept override
-        {
-            return errorMessage;
-        }
+    const char* what() const noexcept override
+    {
+        return errorMessage;
+    }
 
-    private:
-        const char* errorMessage;
-    };
+private:
+    const char* errorMessage;
+};
 
-    constexpr int MPI_ABNORMAL_SEND_VALUE = 0; // MPI异常通信时发送0
-    constexpr int MPI_NORMAL_SEND_VALUE = 1; // MPI正常通信时发送1
+constexpr int MPI_ABNORMAL_SEND_VALUE = 0;  // MPI异常通信时发送0
+constexpr int MPI_NORMAL_SEND_VALUE = 1;    // MPI正常通信时发送1
 
-    class EmptyList : public std::exception {
-    };
+class EmptyList : public std::exception {};
 
-    class WrongListTop : public std::exception {
-    };
+class WrongListTop : public std::exception {};
 
-    class KeyProcess {
-    public:
-        bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-                       const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
+class KeyProcess {
+public:
+    bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
+                    const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
 
-        unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type);
+    unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel,
+                                          ProcessedInfo type);
 
-        KeysT GetLookupKeys(int batch, const string& embName, int channel);
+    KeysT GetLookupKeys(int batch, const string& embName, int channel);
 
-        int GetMaxStep(int channelId) const;
+    int GetMaxStep(int channelId) const;
 
-        OffsetMemT GetMaxOffset();
+    OffsetMemT GetMaxOffset();
 
-        KeyOffsetMemT GetKeyOffsetMap();
+    KeyOffsetMemT GetKeyOffsetMap();
 
-        KeyCountMemT GetKeyCountMap();
+    KeyCountMemT GetKeyCountMap();
 
-        FeatureAdmitAndEvict& GetFeatAdmitAndEvict();
+    FeatureAdmitAndEvict& GetFeatAdmitAndEvict();
 
-        void LoadMaxOffset(OffsetMemT& loadData);
+    void LoadMaxOffset(OffsetMemT& loadData);
 
-        void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
+    void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
 
-        void LoadKeyCountMap(KeyCountMemT& loadData);
+    void LoadKeyCountMap(KeyCountMemT& loadData);
 
-        void Destroy();
+    void Destroy();
 
-        void LoadSaveLock();
+    void LoadSaveLock();
 
-        void LoadSaveUnlock();
+    void LoadSaveUnlock();
 
-        void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
+    void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
 
-        void EvictKeysCombine(const vector<emb_key_t>& keys);
+    void EvictKeysCombine(const vector<emb_key_t>& keys);
 
-        void SetupHotEmbUpdateStep();
+    void SetupHotEmbUpdateStep();
 
-        int64_t GetExpansionTableSize(const string& embName);
+    int64_t GetExpansionTableSize(const string& embName);
 
-        int64_t GetExpansionTableCapacity(const string& embName);
+    int64_t GetExpansionTableCapacity(const string& embName);
 
-        void RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch);
+    void RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch);
 
-        template <typename T>
-        void GlobalUnique(T& lookupKeys, T& uniqueKeys, vector<int32_t>& restoreVecSec)
-        {
-            absl::flat_hash_map<emb_key_t, int32_t> umap;
-            restoreVecSec.resize(lookupKeys.size(), -1);
-            int32_t length = 0;
+    template <typename T>
+    void GlobalUnique(T& lookupKeys, T& uniqueKeys, vector<int32_t>& restoreVecSec)
+    {
+        absl::flat_hash_map<emb_key_t, int32_t> umap;
+        restoreVecSec.resize(lookupKeys.size(), -1);
+        int32_t length = 0;
 
-            for (size_t i = 0; i < lookupKeys.size(); ++i) {
-                int64_t key = lookupKeys[i];
-                if (rankInfo.useStatic && (
-                        (!rankInfo.useDynamicExpansion && key == -1) || (rankInfo.useDynamicExpansion && key == 0))) {
-                    continue;
-                }
+        for (size_t i = 0; i < lookupKeys.size(); ++i) {
+            int64_t key = lookupKeys[i];
+            if (rankInfo.useStatic && ((!rankInfo.useDynamicExpansion && key == -1) ||
+                                       (rankInfo.useDynamicExpansion && key == 0))) {
+                continue;
+            }
 
-                auto result = umap.find(key);
-                if (result == umap.end()) {
-                    uniqueKeys.push_back(lookupKeys[i]);
-                    umap[key] = length;
-                    restoreVecSec[i] = length;
-                    length++;
-                } else {
-                    restoreVecSec[i] = result->second;
-                }
+            auto result = umap.find(key);
+            if (result == umap.end()) {
+                uniqueKeys.push_back(lookupKeys[i]);
+                umap[key] = length;
+                restoreVecSec[i] = length;
+                length++;
+            } else {
+                restoreVecSec[i] = result->second;
             }
+        }
 
-            if (rankInfo.useStatic) {
-                if (rankInfo.useDynamicExpansion) {
-                    uniqueKeys.resize(lookupKeys.size(), 0);
-                } else {
-                    uniqueKeys.resize(lookupKeys.size(), -1);
-                }
+        if (rankInfo.useStatic) {
+            if (rankInfo.useDynamicExpansion) {
+                uniqueKeys.resize(lookupKeys.size(), 0);
+            } else {
+                uniqueKeys.resize(lookupKeys.size(), -1);
             }
         }
+    }
+
+    void SetEos(int status, int channelId);
 
-        void SetEos(int status, int channelId);
+    void SendEos(int batchId, int channel);
 
-        void SendEos(int batchId, int channel);
+    bool isRunning{false};
 
-        bool isRunning { false };
+    std::mutex destroyMutex;
+    std::mutex eosMutex;
+    inline bool HasEmbName(const string& embName)
+    {
+        return embInfos.find(embName) != embInfos.end();
+    };
+    GTEST_PRIVATE :
 
-        std::mutex destroyMutex;
-        std::mutex eosMutex;
-        inline bool HasEmbName(const string& embName)
-        {
-            return embInfos.find(embName) != embInfos.end();
-        };
-    GTEST_PRIVATE:
+        int
+        Start();
 
-        int Start();
+    template <class T>
+    T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
 
-        template<class T>
-        T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
+    RankInfo rankInfo;
+    map<EmbNameT, EmbInfo> embInfos;
+    MPI_Comm comm[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD];
+    std::mutex mut{};
+    vector<std::unique_ptr<std::thread>> procThreads{};
+    std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD]{};
+    info_list_t<LookupKeyT> lookupKeysList;
+    list<unique_ptr<vector<Tensor>>> storage;
+    info_list_t<TensorInfoT> infoList;
+    info_list_t<TensorInfoT> all2AllList;
+    map<EmbNameT, size_t> maxOffset{};
+    map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>> keyOffsetMap{};
+    map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>> keyCountMap{};
+    FeatureAdmitAndEvict m_featureAdmitAndEvict{};
+    map<EmbNameT, std::vector<size_t>> evictPosMap{};
+    map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey{};
+    map<EmbNameT, int> hotEmbTotCount;
+    map<EmbNameT, EmbTable> embeddingTableMap{};
+    ock::ctr::FactoryPtr factory{};
+    int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
+    bool isWithFAAE;
+    bool isNeedSendEos[2] = {0, 0};  // 分别代表通道0、1的eos状态
 
-        RankInfo rankInfo;
-        map<EmbNameT, EmbInfo> embInfos;
-        MPI_Comm comm[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD];
-        std::mutex mut {};
-        vector<std::unique_ptr<std::thread>> procThreads {};
-        std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD] {};
-        info_list_t<LookupKeyT> lookupKeysList;
-        list<unique_ptr<vector<Tensor>>> storage;
-        info_list_t<TensorInfoT> infoList;
-        info_list_t<TensorInfoT> all2AllList;
-        map<EmbNameT, size_t> maxOffset {};
-        map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>> keyOffsetMap {};
-        map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>> keyCountMap {};
-        FeatureAdmitAndEvict m_featureAdmitAndEvict {};
-        map<EmbNameT, std::vector<size_t>> evictPosMap {};
-        map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey {};
-        map<EmbNameT, int> hotEmbTotCount;
-        map<EmbNameT, EmbTable> embeddingTableMap {};
-        ock::ctr::FactoryPtr factory {};
-        int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
-        bool isWithFAAE;
-        bool isNeedSendEos[2] = { 0, 0 }; // 分别代表通道0、1的eos状态
+    void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
 
-        void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
+    void KeyProcessTask(int channel, int threadId);
 
-        void KeyProcessTask(int channel, int threadId);
+    void KeyProcessTaskWithFastUnique(int channel, int threadId);
 
-        void KeyProcessTaskWithFastUnique(int channel, int threadId);
+    bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
 
-        bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
+    bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch,
+                                            ock::ctr::UniquePtr& unique, int channel, int threadId);
 
-        bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
-                                            int channel, int threadId);
+    tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch,
+                                                            int id, vector<KeysT>& splitKeys);
 
-        tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch,
-                int id, vector<KeysT>& splitKeys);
+    void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
 
-        void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
+    void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize,
+                          bool& uniqueInitialize, const unique_ptr<EmbBatchT>& batch,
+                          ock::ctr::UniquePtr& unique);
 
-        void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
-                                  const unique_ptr <EmbBatchT>& batch, ock::ctr::UniquePtr& unique);
+    void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique,
+                                    int id, UniqueInfo& uniqueInfoOut);
 
-        void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
-                                           int id, UniqueInfo& uniqueInfoOut);
+    size_t GetKeySize(const unique_ptr<EmbBatchT>& batch);
 
-        size_t GetKeySize(const unique_ptr<EmbBatchT> &batch);
+    void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch,
+                 KeySendInfo& keySendInfo, All2AllInfo& all2AllInfoOut);
 
-        void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &batch, KeySendInfo& keySendInfo,
-                     All2AllInfo& all2AllInfoOut);
+    auto HashSplit(const unique_ptr<EmbBatchT>& batch) const
+        -> tuple<vector<KeysT>, vector<int32_t>>;
 
-        auto HashSplit(const unique_ptr<EmbBatchT>& batch) const -> tuple<vector<KeysT>, vector<int32_t>>;
+    auto HotHashSplit(const unique_ptr<EmbBatchT>& batch)
+        -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
 
-        auto HotHashSplit(const unique_ptr<EmbBatchT>& batch) -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
+    void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
 
-        void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
+    tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> HashSplitWithFAAE(
+        const unique_ptr<EmbBatchT>& batch) const;
 
-        tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>>
-        HashSplitWithFAAE(const unique_ptr<EmbBatchT>& batch) const;
+    vector<int> GetScAll(const vector<int>& keyScLocal, int commId,
+                         const unique_ptr<EmbBatchT>& batch);
 
-        vector<int> GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch);
+    void GetScAllForUnique(const vector<int>& keyScLocal, int commId,
+                           const unique_ptr<EmbBatchT>& batch, vector<int>& scAllOut);
 
-        void GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT> &batch,
-                               vector<int> &scAllOut);
+    void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-        void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
+    void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-        void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
+    unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
 
-        unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
+    void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
+                         vector<int>& restoreVec, int hotPosSize = 0) const;
 
-        void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
-                             vector<int>& restoreVec, int hotPosSize = 0) const;
-        
-        void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
+    void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
 
-        void EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys);
+    void EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys);
 
-        void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
+    void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
 
-        void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
-                          const string& embName);
+    void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count,
+                      bool refresh, const string& embName);
 
-        void UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_t> &keyCount,
-                                   uint32_t count, bool refresh, const string& embName);
+    void UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount,
+                               uint32_t count, bool refresh, const string& embName);
 
-        void HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
-                                       KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
+    void HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, UniqueInfo& uniqueInfoOut,
+                               KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
 
-        void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys);
+    void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
+                    KeysT& lookupKeys);
 
-        void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel);
+    void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys,
+                                 int channel);
 
-        void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
-                                   const unique_ptr<EmbBatchT>& batch);
+    void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos,
+                               const vector<int>& hotPosDev, const unique_ptr<EmbBatchT>& batch);
 
-        void ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_hash_map<emb_key_t, int> &hotMap,
-                           vector<int> &hotPos, vector<int32_t> &restore, const int hotOffset) const;
+    void ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
+                       absl::flat_hash_map<emb_key_t, int>& hotMap, vector<int>& hotPos,
+                       vector<int32_t>& restore, const int hotOffset) const;
 
-        vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
-                                      vector<vector<uint32_t>>& keyCount, vector<int> scAll, vector<int> ss);
+    vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
+                                  vector<vector<uint32_t>>& keyCount, vector<int> scAll,
+                                  vector<int> ss);
 
-        void HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <KeysT>& splitKeys,
-                             vector <int32_t>& restore, vector <int32_t>& hotPos,
-                             vector <vector<uint32_t>>& keyCount);
+    void HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys,
+                         vector<int32_t>& restore, vector<int32_t>& hotPos,
+                         vector<vector<uint32_t>>& keyCount);
 
-        template<class T>
-        inline vector<T> Count2Start(const vector<T>& count) const
-        {
-            vector<T> start = { 0 };
-            for (size_t i = 0; i < count.size() - 1; ++i) {
-                start.push_back(count[i] + start.back());
-            }
-            return start;
+    template <class T>
+    inline vector<T> Count2Start(const vector<T>& count) const
+    {
+        vector<T> start = {0};
+        for (size_t i = 0; i < count.size() - 1; ++i) {
+            start.push_back(count[i] + start.back());
         }
+        return start;
+    }
 
-        string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
-    };
+    string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
+};
 
 #define KEY_PROCESS_INSTANCE Singleton<KeyProcess>::GetInstance()
-} // end namespace MxRec
+}  // end namespace MxRec
 
-#endif // MX_REC_KEY_PROCESS_H
+#endif  // MX_REC_KEY_PROCESS_H
-- 
Gitee


From d6db1b2256f2a7d70d67652b20735dd52b35f822 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 22 Apr 2024 16:22:54 +0800
Subject: [PATCH 053/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=A0=B9=E6=8D=AE=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=E5=99=A8=E7=B1=BB=E5=9E=8B=E8=87=AA=E5=8A=A8=E5=88=A4?=
 =?UTF-8?q?=E6=96=AD=E6=98=AF=E5=90=A6=E5=BC=80=E5=90=AF=E5=85=A8=E5=B1=80?=
 =?UTF-8?q?=E5=8E=BB=E9=87=8D=E7=89=B9=E6=80=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/optimizers/adagrad.py   | 20 +++++++++++++++++---
 mx_rec/optimizers/ftrl.py      | 22 ++++++++++++++--------
 mx_rec/optimizers/lazy_adam.py | 15 ++++++++++-----
 3 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/mx_rec/optimizers/adagrad.py b/mx_rec/optimizers/adagrad.py
index 4ba444a6..fe8a0a2d 100644
--- a/mx_rec/optimizers/adagrad.py
+++ b/mx_rec/optimizers/adagrad.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 
 from collections import defaultdict
 
+from tensorflow.python.framework import ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import adagrad, training_ops
@@ -129,13 +130,26 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
             self._get_or_make_slot_with_initializer(var, init, var.get_shape(), dtype,
                                                     "acc", acc_state_name)
 
+    def _apply_sparse_duplicate_indices(self, grad, var):
+        #  _apply_sparse_duplicate_indices method include tf.unique and unsorted_segment_sum operations which may
+        #  introduce dynamic shape problem, if encounter that, please de-annotation the method below.
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
+        gradient_no_duplicate_indices = ops.IndexedSlices(
+            indices=unique_keys,
+            values=unique_local_grad,
+            dense_shape=grad.dense_shape)
+        return self._apply_sparse(gradient_no_duplicate_indices, var)
+
+    def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad, var=handle, is_expansion=False)
+        return self._resource_apply_sparse(unique_local_grad, handle, unique_keys)
+
     def _apply_sparse(self, grad, var):
         acc = self.get_slot(var, "acc")
-        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
         return training_ops.sparse_apply_adagrad(
             var, acc, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
-            unique_local_grad,
-            unique_keys,
+            grad.values,
+            grad.indices,
             use_locking=self._use_locking)
 
     def _resource_apply_sparse(self, grad, var, indices):
diff --git a/mx_rec/optimizers/ftrl.py b/mx_rec/optimizers/ftrl.py
index 3659ffcd..855fa9c4 100644
--- a/mx_rec/optimizers/ftrl.py
+++ b/mx_rec/optimizers/ftrl.py
@@ -120,10 +120,18 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
         return [self._initial_accumulator_value, initial_linear_value]
 
     def _apply_sparse_duplicate_indices(self, grad, var):
-        return self._apply_sparse(grad, var)
+        #  _apply_sparse_duplicate_indices method include tf.unique and unsorted_segment_sum operations which may
+        #  introduce dynamic shape problem, if encounter that, please de-annotation the method below.
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
+        gradient_no_duplicate_indices = ops.IndexedSlices(
+            indices=unique_keys,
+            values=unique_local_grad,
+            dense_shape=grad.dense_shape)
+        return self._apply_sparse(gradient_no_duplicate_indices, var)
 
     def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
-        return self._resource_apply_sparse(grad, handle, indices)
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad, var=handle, is_expansion=False)
+        return self._resource_apply_sparse(unique_local_grad, handle, unique_keys)
 
     def _resource_apply_sparse(self, grad, handle, indices):
         if self._l2_shrinkage_regularization_strength <= 0.0:
@@ -140,19 +148,17 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
                 self._resource_scatter_nd_update)
 
     def _apply_sparse(self, grad, var):
-        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
-
         if self._l2_shrinkage_regularization_strength <= 0.0:
             return self._apply_sparse_shared(
-                unique_local_grad,
+                grad.values,
                 var,
-                unique_keys,
+                grad.indices,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
         else:
             return self._apply_sparse_shared_v2(
-                unique_local_grad,
+                grad.values,
                 var,
-                unique_keys,
+                grad.indices,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_update):
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index bab8245f..6ac7e844 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -119,10 +119,16 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
     def _apply_sparse_duplicate_indices(self, grad, var):
         #  _apply_sparse_duplicate_indices method include tf.unique and unsorted_segment_sum operations which may
         #  introduce dynamic shape problem, if encounter that, please de-annotation the method below.
-        return self._apply_sparse(grad, var)
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
+        gradient_no_duplicate_indices = ops.IndexedSlices(
+            indices=unique_keys,
+            values=unique_local_grad,
+            dense_shape=grad.dense_shape)
+        return self._apply_sparse(gradient_no_duplicate_indices, var)
 
     def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
-        return self._resource_apply_sparse(grad, handle, indices)
+        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad, var=handle, is_expansion=False)
+        return self._resource_apply_sparse(unique_local_grad, handle, unique_keys)
 
     def _apply_dense(self, grad, var):
         raise NotImplementedError("You are using a wrong type of variable.")
@@ -149,11 +155,10 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
             self._resource_scatter_nd_add)
 
     def _apply_sparse(self, grad, var):
-        unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad.values, var=var, is_expansion=False)
         return self._apply_sparse_shared(
-            unique_local_grad,
+            grad.values,
             var,
-            unique_keys,
+            grad.indices,
             lambda x, i, v: tf.compat.v1.scatter_nd_add(x, i, v))
 
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_add):
-- 
Gitee


From ba920189251739b7654296b881cdb9501f49eef3 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Apr 2024 16:48:57 +0800
Subject: [PATCH 054/302] =?UTF-8?q?=E6=9B=B4=E6=94=B9=E8=A1=8C=E5=AE=BD?=
 =?UTF-8?q?=E5=92=8C=E4=BA=8C=E5=85=83=E8=BF=90=E7=AE=97=E7=AC=A6=E9=85=8D?=
 =?UTF-8?q?=E7=BD=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .clang-format                        |   4 +-
 src/core/key_process/key_process.cpp | 351 +++++++++++----------------
 src/core/key_process/key_process.h   |  82 +++----
 3 files changed, 181 insertions(+), 256 deletions(-)

diff --git a/.clang-format b/.clang-format
index f1f5b0d0..ee9f3a3c 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,7 +1,7 @@
 Language: Cpp
 BasedOnStyle: Google
 AccessModifierOffset: -4
-ColumnLimit: 100
+ColumnLimit: 120
 IndentWidth: 4
 UseTab: Never
 AlignOperands: Align
@@ -34,7 +34,7 @@ BraceWrapping:
   BeforeCatch: false
   BeforeElse: false
   IndentBraces: false
-BreakBeforeBinaryOperators: None
+BreakBeforeBinaryOperators: NonAssignment
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializers: BeforeColon
 BreakStringLiterals: true
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 85b17bbb..9751e268 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -83,13 +83,12 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     if (GlobalEnv::fastUnique) {
         int result = ock::ctr::Factory::Create(factory);
         if (result != 0) {
-            throw runtime_error(
-                Logger::Format("create fast factory failed, error code:{}", result));
+            throw runtime_error(Logger::Format("create fast factory failed, error code:{}", result));
         }
     }
 
-    LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}",
-             MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic);
+    LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}", MapToString(scInfo),
+             rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic);
 #ifndef GTEST
     Start();
 #endif
@@ -135,9 +134,8 @@ void KeyProcess::InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo)
     if (rankInfo.useDynamicExpansion) {
         embeddingSize = info.embeddingSize;
     }
-    hotEmbTotCount[info.name] =
-        static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
-                         HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
+    hotEmbTotCount[info.name] = static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float))
+                                                 * HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
 }
 
 OffsetMemT KeyProcess::GetMaxOffset()
@@ -229,9 +227,8 @@ void KeyProcess::GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf)
     uniqueConf.maxThreadNum = GlobalEnv::maxUniqueThreadNum;
 }
 
-void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize,
-                                  bool& uniqueInitialize, const unique_ptr<EmbBatchT>& batch,
-                                  ock::ctr::UniquePtr& unique)
+void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
+                                  const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique)
 {
     uniqueConf.desiredSize = static_cast<uint32_t>(batch->Size());
     if (preBatchSize != batch->Size()) {
@@ -273,8 +270,7 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch =
-                GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
+            batch = GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -286,11 +282,10 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
             if (!KeyProcessTaskHelperWithFastUnique(batch, unique, channel, threadId)) {
                 break;
             }
-            LOG_INFO(KEY_PROCESS
-                     "getAndProcessTC(ms):{}, key process with fast unique cost:{},"
-                     " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
-                     getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
-                     batch->name, batch->channel, threadId, batch->batchId);
+            LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process with fast unique cost:{},"
+                                 " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
+                     getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime, batch->name,
+                     batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
             auto batchQueue = SingletonQueue<EmbBatchT>::GetInstances(queueIndex);
             batchQueue->PutDirty(move(batch));
@@ -299,8 +294,8 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
     } catch (const EndRunExit& e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
-    LOG_INFO(KEY_PROCESS "KeyProcessTaskWithFastUnique exit. rank:{} channelId:{}, threadId:{}",
-             rankInfo.rankId, channel, threadId);
+    LOG_INFO(KEY_PROCESS "KeyProcessTaskWithFastUnique exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId,
+             channel, threadId);
 }
 
 void KeyProcess::KeyProcessTask(int channel, int threadId)
@@ -310,8 +305,7 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch =
-                GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
+            batch = GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -322,11 +316,10 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
             if (!KeyProcessTaskHelper(batch, channel, threadId)) {
                 break;
             }
-            LOG_INFO(KEY_PROCESS
-                     "getAndProcessTC(ms):{}, key process cost:{},"
-                     " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
-                     getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
-                     batch->name, batch->channel, threadId, batch->batchId);
+            LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process cost:{},"
+                                 " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
+                     getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime, batch->name,
+                     batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
             auto batchQueue = SingletonQueue<EmbBatchT>::GetInstances(queueIndex);
             batchQueue->PutDirty(move(batch));
@@ -334,17 +327,15 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
     } catch (const EndRunExit& e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
-    LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId,
-             channel, threadId);
+    LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId, channel, threadId);
 }
 
-void KeyProcess::HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys,
-                                 vector<int32_t>& restore, vector<int32_t>& hotPos,
-                                 vector<vector<uint32_t>>& keyCount)
+void KeyProcess::HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys, vector<int32_t>& restore,
+                                 vector<int32_t>& hotPos, vector<vector<uint32_t>>& keyCount)
 {
     TimeCost uniqueTc;
-    if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
-        FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
+    if (m_featureAdmitAndEvict.GetFunctionSwitch()
+        && FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
         tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch);  // 按存储dev id切分并去重
     } else {
         tie(splitKeys, restore, hotPos) = HotHashSplit(batch);  // 按存储dev id切分并去重
@@ -352,13 +343,12 @@ void KeyProcess::HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<Keys
     LOG_DEBUG("uniqueTc(ms):{}", uniqueTc.ElapsedMS());
 }
 
-bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch,
-                                                    ock::ctr::UniquePtr& unique, int channel,
-                                                    int threadId)
+bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique,
+                                                    int channel, int threadId)
 {
     // tuple for keyRec restore hotPos scAll countRecv
-    isWithFAAE = m_featureAdmitAndEvict.GetFunctionSwitch() &&
-                 FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
+    isWithFAAE = m_featureAdmitAndEvict.GetFunctionSwitch()
+                 && FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
     TimeCost totalTimeCost = TimeCost();
     TimeCost fastUniqueTC;
     UniqueInfo uniqueInfo;
@@ -366,12 +356,12 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
     LOG_DEBUG("ProcessBatchWithFastUnique(ms):{}", fastUniqueTC.ElapsedMS());
 
     // 特征准入&淘汰
-    if (isWithFAAE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, uniqueInfo.all2AllInfo.keyRecv,
-                                             uniqueInfo.all2AllInfo.countRecv) ==
-         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
-        LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
-                  rankInfo.rankId, threadId, channel);
+    if (isWithFAAE
+        && (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, uniqueInfo.all2AllInfo.keyRecv,
+                                                uniqueInfo.all2AllInfo.countRecv)
+            == FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...", rankInfo.rankId,
+                  threadId, channel);
         return false;
     }
     std::lock_guard<std::mutex> lock(loadSaveMut[channel][threadId]);
@@ -397,17 +387,15 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
 
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(move(tensors), uniqueInfo.all2AllInfo.keyRecv, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion
-                               ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv)
-                               : Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv)
+                                                        : Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
     }
 
     TimeCost pushResultTC;
     PushResult(batch, move(tensors), uniqueInfo.all2AllInfo.keyRecv);
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO
-                 "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}",
-                 channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}", channel,
+                 batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     return true;
@@ -424,8 +412,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     auto [lookupKeys, scAll, ss] = ProcessSplitKeys(batch, threadId, splitKeys);
 
     vector<uint32_t> countRecv;
-    if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
-        FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
+    if (m_featureAdmitAndEvict.GetFunctionSwitch()
+        && FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
         countRecv = GetCountRecv(batch, threadId, keyCount, scAll, ss);
     }
     std::lock_guard<std::mutex> lock(loadSaveMut[channel][threadId]);
@@ -433,12 +421,12 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     BuildRestoreVec(batch, ss, restore, static_cast<int>(hotPos.size()));
 
     // 特征准入&淘汰
-    if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
-        FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys, countRecv) ==
-         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
-        LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
-                  rankInfo.rankId, threadId, channel);
+    if (m_featureAdmitAndEvict.GetFunctionSwitch()
+        && FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE
+        && (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys, countRecv)
+            == FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...", rankInfo.rankId,
+                  threadId, channel);
         return false;
     }
 
@@ -462,25 +450,22 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(tensors, lookupKeys, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys)
-                                                        : Vec2TensorI32(lookupKeys));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys) : Vec2TensorI32(lookupKeys));
     }
 
     PushResult(batch, move(tensors), lookupKeys);
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}", channel,
-                 batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}", channel, batch->batchId,
+                 rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     return true;
 }
 
-void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors,
-                                         KeysT& lookupKeys, int channel)
+void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
 {
-    if (GlobalEnv::applyGradientsStrategy ==
-            ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
-        channel == TRAIN_CHANNEL_ID) {
+    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY
+        && channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
 
@@ -488,14 +473,12 @@ void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tenso
         GlobalUnique(lookupKeys, uniqueKeys, restoreVecSec);
         LOG_DEBUG("globalUniqueSyncTC(ms):{}", globalUniqueSyncTC.ElapsedMS());
         tensors->push_back(Vec2TensorI32(restoreVecSec));
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys)
-                                                        : Vec2TensorI32(uniqueKeys));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys) : Vec2TensorI32(uniqueKeys));
     }
 }
 
 vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
-                                          vector<vector<uint32_t>>& keyCount, vector<int> scAll,
-                                          vector<int> ss)
+                                          vector<vector<uint32_t>>& keyCount, vector<int> scAll, vector<int> ss)
 {
     TimeCost getCountRecvTC;
     if (rankInfo.useStatic) {
@@ -518,9 +501,8 @@ vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, in
     vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
     vector<uint32_t> countRecv;
     countRecv.resize(rs.back() + rc.back());
-    int retCode =
-        MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(),
-                      rc.data(), rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
+    int retCode = MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(), rc.data(),
+                                rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -528,16 +510,13 @@ vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, in
     return countRecv;
 }
 
-void KeyProcess::PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
-                            KeysT& lookupKeys)
+void KeyProcess::PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys)
 {
     std::unique_lock<std::mutex> lockGuard(mut);
     storage.push_front(move(tensors));
-    infoList[batch->name][batch->channel].push(
-        make_tuple(batch->batchId, batch->name, storage.begin()));
+    infoList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, storage.begin()));
     if (rankInfo.isDDR) {
-        lookupKeysList[batch->name][batch->channel].push(
-            make_tuple(batch->batchId, batch->name, move(lookupKeys)));
+        lookupKeysList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, move(lookupKeys)));
     }
     lockGuard.unlock();
 }
@@ -565,9 +544,8 @@ unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
         this_thread::sleep_for(100us);
         if (tc.ElapsedSec() > GET_BATCH_TIMEOUT) {
             if (commId == 0) {
-                LOG_WARN(KEY_PROCESS
-                         "getting batch timeout! 1. check last 'read batch cost' print. "
-                         "channel[{}] commId[{}]",
+                LOG_WARN(KEY_PROCESS "getting batch timeout! 1. check last 'read batch cost' print. "
+                                     "channel[{}] commId[{}]",
                          channel, commId);
             }
             this_thread::sleep_for(seconds(1));
@@ -575,22 +553,18 @@ unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
         }
 
         if (!isRunning) {
-            LOG_WARN("channelId:{} threadId:{}, isRunning is false when GetBatchData", channel,
-                     commId);
+            LOG_WARN("channelId:{} threadId:{}, isRunning is false when GetBatchData", channel, commId);
             throw EndRunExit("GetBatchData end run.");
         }
     }
     EASY_END_BLOCK
-    LOG_DEBUG(
-        KEY_PROCESS
-        "channelId:{} threadId:{} batchId:{}, get batch data done, batchName:{}. bs:{} sample:[{}]",
-        batch->channel, commId, batch->batchId, batch->name, batch->Size(), batch->UnParse());
+    LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, get batch data done, batchName:{}. bs:{} sample:[{}]",
+              batch->channel, commId, batch->batchId, batch->name, batch->Size(), batch->UnParse());
 #if defined(PROFILING) && defined(BUILD_WITH_EASY_PROFILER)
     if (batch->batchId == PROFILING_START_BATCH_ID) {
         EASY_PROFILER_ENABLE
     } else if (batch->batchId == PROFILING_END_BATCH_ID) {
-        ::profiler::dumpBlocksToFile(
-            StringFormat("/home/MX_REC-profile-%d.prof", rankInfo.rankId).c_str());
+        ::profiler::dumpBlocksToFile(StringFormat("/home/MX_REC-profile-%d.prof", rankInfo.rankId).c_str());
     }
 #endif
     return batch;
@@ -605,8 +579,7 @@ size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT>& batch)
     return size;
 }
 
-void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
-                                            ock::ctr::UniquePtr& unique, int id,
+void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique, int id,
                                             UniqueInfo& uniqueInfoOut)
 {
     EASY_FUNCTION(profiler::colors::Purple)
@@ -655,20 +628,18 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
 
     LOG_DEBUG(KEY_PROCESS "ProcessBatchWithFastUnique get batchId:{}, batchSize:{},"
                           " channel:{}, name:{}, restore:{}, keyCount:{}",
-              batch->batchId, batch->Size(), batch->channel, batch->name,
-              uniqueInfoOut.restore.size(), keySendInfo.keyCount.size());
+              batch->batchId, batch->Size(), batch->channel, batch->name, uniqueInfoOut.restore.size(),
+              keySendInfo.keyCount.size());
 
     if (GlogConfig::gStatOn) {
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} "
                            "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(),
-                 uniqueOut.uniqueIdCnt);
+                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueOut.uniqueIdCnt);
     }
 }
 
-void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch,
-                                       UniqueInfo& uniqueInfoOut, KeySendInfo& keySendInfo,
-                                       vector<int>& sc, vector<int>& splitSize)
+void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, UniqueInfo& uniqueInfoOut,
+                                       KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize)
 {
     std::shared_lock<std::shared_mutex> lock(g_smut);
     absl::flat_hash_map<emb_key_t, int> hotMap = hotKey[batch->name];
@@ -681,8 +652,8 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch,
     TimeCost computeHotTc;
     ComputeHotPos(batch, hotMap, uniqueInfoOut.hotPos, uniqueInfoOut.restore, hotOffset);
     LOG_DEBUG("ComputeHot TimeCost(ms):{}", computeHotTc.ElapsedMS());
-    UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount, hotOffset,
-                          batch->batchId % hotEmbUpdateStep == 0, batch->name);
+    UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount, hotOffset, batch->batchId % hotEmbUpdateStep == 0,
+                          batch->name);
 
     if (rankInfo.useStatic) {
         sc.resize(rankInfo.rankSize, embInfos[batch->name].sendCount);
@@ -694,9 +665,8 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch,
     }
 }
 
-void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
-                               absl::flat_hash_map<emb_key_t, int>& hotMap, vector<int>& hotPos,
-                               vector<int32_t>& restore, const int hotOffset) const
+void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch, absl::flat_hash_map<emb_key_t, int>& hotMap,
+                               vector<int>& hotPos, vector<int32_t>& restore, const int hotOffset) const
 {
     emb_key_t* inputData = batch->sample.data();
     size_t miniBs = batch->Size();
@@ -719,8 +689,8 @@ void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
     }
 }
 
-void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch,
-                         KeySendInfo& keySendInfo, All2AllInfo& all2AllInfoOut)
+void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch, KeySendInfo& keySendInfo,
+                         All2AllInfo& all2AllInfoOut)
 {
     TimeCost getScAllTC;
     int channel = batch->channel;
@@ -739,48 +709,43 @@ void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& b
     all2AllInfoOut.keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
     int retCode = MPI_Alltoallv(keySendInfo.keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
-                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T,
-                                comm[channel][id]);
+                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T, comm[channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All MPI_Alltoallv end.", channel, id,
-              batch->batchId);
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All MPI_Alltoallv end.", channel, id, batch->batchId);
     all2AllInfoOut.countRecv.resize(rs.back() + rc.back());
     if (isWithFAAE) {
         retCode = MPI_Alltoallv(keySendInfo.keyCount.data(), sc.data(), ss.data(), MPI_UINT32_T,
-                                all2AllInfoOut.countRecv.data(), rc.data(), rs.data(), MPI_UINT32_T,
-                                comm[channel][id]);
+                                all2AllInfoOut.countRecv.data(), rc.data(), rs.data(), MPI_UINT32_T, comm[channel][id]);
         if (retCode != MPI_SUCCESS) {
-            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}", channel, id,
-                      batch->batchId, retCode);
+            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}", channel, id, batch->batchId,
+                      retCode);
         }
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC TimeCost(ms):{}",
-              channel, id, batch->batchId, all2allTC.ElapsedMS());
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC TimeCost(ms):{}", channel, id,
+              batch->batchId, all2allTC.ElapsedMS());
     EASY_END_BLOCK
 }
 
-auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
-                                  vector<KeysT>& splitKeys)
+auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id, vector<KeysT>& splitKeys)
     -> tuple<KeysT, vector<int>, vector<int>>
 {
     TimeCost processSplitKeysTC;
     EASY_FUNCTION(profiler::colors::Purple)
     EASY_VALUE("batchId", batch->batchId)
-    LOG_INFO(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, ProcessSplitKeys start.",
-             batch->channel, id, batch->batchId);
+    LOG_INFO(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, ProcessSplitKeys start.", batch->channel, id,
+             batch->batchId);
 
     // 使用静态all2all通信：发送或接受量为预置固定值 scInfo[batch->name] = 65536 / rankSize 经验值
     if (rankInfo.useStatic) {  // maybe move after all2all
         for (KeysT& i : splitKeys) {
             if (static_cast<int>(i.size()) > embInfos[batch->name].sendCount) {
-                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}", batch->name,
-                          batch->channel, batch->batchId, i.size());
-                throw runtime_error(
-                    StringFormat("%s[%d]:%d overflow! set send count bigger than %d",
-                                 batch->name.c_str(), batch->channel, batch->batchId, i.size())
-                        .c_str());
+                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}", batch->name, batch->channel,
+                          batch->batchId, i.size());
+                throw runtime_error(StringFormat("%s[%d]:%d overflow! set send count bigger than %d",
+                                                 batch->name.c_str(), batch->channel, batch->batchId, i.size())
+                                        .c_str());
             }
             i.resize(embInfos[batch->name].sendCount, -1);
         }
@@ -794,8 +759,7 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
     KeysT keyRecv;
 
     TimeCost getScAllTC;
-    vector<int> scAll = GetScAll(
-        sc, id, batch);  // Allgather通信获取所有（不同rank相同thread id的）线程间通信量矩阵
+    vector<int> scAll = GetScAll(sc, id, batch);  // Allgather通信获取所有（不同rank相同thread id的）线程间通信量矩阵
     LOG_DEBUG("getScAllTC(ms)(AllReduce-AllGather):{}", getScAllTC.ElapsedMS());
 
     vector<int> ss = Count2Start(sc);  // send displays/offset 发送数据的起始偏移量
@@ -809,8 +773,8 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
     EASY_BLOCK("all2all")
 
     TimeCost uniqueAll2AllTC;
-    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T, keyRecv.data(),
-                                rc.data(), rs.data(), MPI_INT64_T, comm[batch->channel][id]);
+    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T, keyRecv.data(), rc.data(), rs.data(),
+                                MPI_INT64_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -828,8 +792,7 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
  * splitKeys返回：将数据的key切分到其所在dev id对应的桶中，并去重。
  * restore返回：去重后key在桶内偏移量（用于计算恢复向量）
  */
-tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(
-    const unique_ptr<EmbBatchT>& batch) const
+tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<EmbBatchT>& batch) const
 {
     EASY_FUNCTION(profiler::colors::Gold)
     emb_key_t* batchData = batch->sample.data();
@@ -845,8 +808,7 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] =
-                hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] = hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key] = restore[i];
         } else {  // 去重
             restore[i] = result->second;
@@ -861,9 +823,8 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO
-                 "channel_id {} batch_id {} rank_id {} batch_key_num {} unique_key_num {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} unique_key_num {}", batch->channel,
+                 batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
     return {splitKeys, restore};
 }
@@ -889,7 +850,7 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<vector<uint32_t>> keyCount(rankInfo.rankSize);  // splitKeys在原始batch中对应的频次
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize);  // 初始化全0，记录每个桶的长度
+    vector<int> hashSplitLens(rankInfo.rankSize);                   // 初始化全0，记录每个桶的长度
     absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey;  // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
@@ -898,8 +859,7 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] =
-                hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] = hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key].first = restore[i];
             uKey[key].second = 1;
         } else {  // 去重
@@ -928,15 +888,13 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO
-                 "channel_id {} batch_id {} rank_id {} batch_key_num {} faae_unique_key_num {}",
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} faae_unique_key_num {}",
                  batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
     return {splitKeys, restore, keyCount};
 }
 
-tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(
-    const unique_ptr<EmbBatchT>& batch)
+tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(const unique_ptr<EmbBatchT>& batch)
 {
     EASY_FUNCTION(profiler::colors::Gold)
     emb_key_t* batchData = batch->sample.data();
@@ -988,25 +946,22 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO
-                 "channel_id {} batch_id {} rank_id {} batch_key_num {} hot_unique_key_num {}",
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} hot_unique_key_num {}",
                  batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
 
-    UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name],
-                 batch->batchId % hotEmbUpdateStep == 0, batch->name);
+    UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name], batch->batchId % hotEmbUpdateStep == 0,
+                 batch->name);
     AddCountStartToHotPos(splitKeys, hotPos, hotPosDev, batch);
     return {splitKeys, restore, hotPos};
 }
 
-void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos,
-                                       const vector<int>& hotPosDev,
+void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
                                        const unique_ptr<EmbBatchT>& batch)
 {
     vector<int> splitKeysSize;
     for (auto& splitKey : splitKeys) {
-        int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount
-                                     : static_cast<int>(splitKey.size());
+        int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount : static_cast<int>(splitKey.size());
         splitKeysSize.push_back(tmp);
     }
 
@@ -1016,8 +971,8 @@ void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& ho
     }
 }
 
-void KeyProcess::UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount,
-                                       uint32_t count, bool refresh, const string& embName)
+void KeyProcess::UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount, uint32_t count,
+                                       bool refresh, const string& embName)
 {
     auto& hotMap = hotKey[embName];
     if (refresh) {
@@ -1041,8 +996,8 @@ void KeyProcess::UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_
     }
 }
 
-void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName,
-                              uint32_t count, bool refresh, const string& embName)
+void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
+                              const string& embName)
 {
     if (!refresh) {
         return;
@@ -1068,43 +1023,39 @@ void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapBy
  * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread
  * id的）线程间的通信量矩阵 scAll返回：所有线程间的通信量矩阵（按行平铺的一维向量）
  */
-vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId,
-                                 const unique_ptr<EmbBatchT>& batch)
+vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch)
 {
     EASY_FUNCTION()
     vector<int> scAll;
     scAll.resize(rankInfo.rankSize * rankInfo.rankSize);
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId,
-              batch->batchId);
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId, batch->batchId);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAll.data(),
-                                 rankInfo.rankSize, MPI_INT, comm[batch->channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAll.data(), rankInfo.rankSize,
+                                 MPI_INT, comm[batch->channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {} commId {}, MPI_Allgather failed:{}", rankInfo.rankId, commId, retCode);
     }
-    LOG_DEBUG(
-        "channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, key scAll matrix:\n{}",
-        batch->channel, commId, batch->batchId, VectorToString(scAll));
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, key scAll matrix:\n{}", batch->channel,
+              commId, batch->batchId, VectorToString(scAll));
     return scAll;
 }
 
-void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId,
-                                   const unique_ptr<EmbBatchT>& batch, vector<int>& scAllOut)
+void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch,
+                                   vector<int>& scAllOut)
 {
     EASY_FUNCTION()
     int channel = batch->channel;
     scAllOut.resize(rankInfo.rankSize * rankInfo.rankSize);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAllOut.data(),
-                                 rankInfo.rankSize, MPI_INT, comm[channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAllOut.data(), rankInfo.rankSize,
+                                 MPI_INT, comm[channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Allgather failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG(
-        "channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key scAllOut matrix:\n{}",
-        channel, commId, batch->batchId, VectorToString(scAllOut));
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key scAllOut matrix:\n{}", channel, commId,
+              batch->batchId, VectorToString(scAllOut));
 }
 
 void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel)
@@ -1206,8 +1157,7 @@ void KeyProcess::BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vecto
             hotNum += 1;
         }
     }
-    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}", hotNum, batch->Size(),
-              buildRestoreVecTC.ElapsedMS());
+    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}", hotNum, batch->Size(), buildRestoreVecTC.ElapsedMS());
 }
 
 template <class T>
@@ -1220,8 +1170,7 @@ T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, in
     }
     auto topBatch = get<int>(list[embName][channel].top());
     if (topBatch < batch) {
-        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch,
-                  batch, channel);
+        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch, batch, channel);
         this_thread::sleep_for(1s);
     }
     if (topBatch != batch) {
@@ -1249,10 +1198,8 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
-            LOG_DEBUG(
-                KEY_PROCESS
-                "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                embName, channel, batch);
+            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
+                      embName, channel, batch);
             return {};
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1299,27 +1246,22 @@ void KeyProcess::SendEos(int batchId, int channel)
     vector<Tensor> tensors;
     bool isNeedResend = true;
 
-    for (const auto& emb :
-         as_const(embInfos)) {  // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos start.", channel,
-                 batchId, emb.first);
+    for (const auto& emb : as_const(embInfos)) {  // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
+        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos start.", channel, batchId, emb.first);
         if (!isRunning) {
             throw EndRunExit("SendEos end run, isRunning is false after lock destroyMutex.");
         }
         for (const string& transName : usedChannelNames) {
-            string sendName =
-                StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
+            string sendName = StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
             size_t channelSize = 0;
 
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] Before send eos, {} contains {}.", sendName, channelSize);
-            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors,
-                             isNeedResend);
+            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors, isNeedResend);
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] After send eos, {} contains {}.", sendName, channelSize);
         }
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel,
-                 batchId, emb.first);
+        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel, batchId, emb.first);
     }
 
     LOG_INFO("channelId:{} batchId:{}, SendEos end.", channel, batchId);
@@ -1333,8 +1275,7 @@ void KeyProcess::SendEos(int batchId, int channel)
 /// \param channel 通道索引（训练/推理）
 /// \param type 数据类型
 /// \return
-unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel,
-                                                  ProcessedInfo type)
+unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type)
 {
     TimeCost tc = TimeCost();
     info_list_t<TensorInfoT>* list;
@@ -1359,10 +1300,8 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
-            LOG_DEBUG(
-                KEY_PROCESS
-                "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                embName, channel, batch);
+            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
+                      embName, channel, batch);
             return nullptr;
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1381,8 +1320,7 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
             unique_lock<mutex> lockEosGuard(eosMutex);
             // 避免eos在keyProcess还未处理完数据时插队到通道前面,
             // readEmbKey真实的次数是readEmbedBatchId减1
-            if (isNeedSendEos[channel] &&
-                (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
+            if (isNeedSendEos[channel] && (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
                 LOG_INFO("channelId:{} batchId:{}, GetInfoVec eos.", channel, batch);
                 unique_lock<mutex> lockDestroyGuard(destroyMutex);
                 SendEos(batch, channel);
@@ -1460,19 +1398,17 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
         evictPos.emplace_back(offset);
         LOG_TRACE("evict embName:{}, offset:{}", embName, offset);
     }
-    LOG_INFO(KEY_PROCESS "hbm EvictDeleteDeviceEmb: [{}]! evict size on dev:{}", embName,
-             evictPos.size());
+    LOG_INFO(KEY_PROCESS "hbm EvictDeleteDeviceEmb: [{}]! evict size on dev:{}", embName, evictPos.size());
 }
 
 void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset)
 {
     if (offset.size() > embInfos[embName].devVocabSize) {
-        LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
-                  embName, offset.size(), embInfos[embName].devVocabSize);
+        LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}", embName,
+                  offset.size(), embInfos[embName].devVocabSize);
         throw runtime_error(
-            Logger::Format(
-                "{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
-                embName, offset.size(), embInfos[embName].devVocabSize)
+            Logger::Format("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}", embName,
+                           offset.size(), embInfos[embName].devVocabSize)
                 .c_str());
     }
 
@@ -1489,8 +1425,7 @@ void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset
     auto trans = Singleton<HDTransfer>::GetInstance();
     trans->Send(TransferChannel::EVICT, tmpDataOut, TRAIN_CHANNEL_ID, embName);
 
-    LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName,
-             offset.size());
+    LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName, offset.size());
 }
 
 string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const
diff --git a/src/core/key_process/key_process.h b/src/core/key_process/key_process.h
index d6a0b80b..4dafc07f 100644
--- a/src/core/key_process/key_process.h
+++ b/src/core/key_process/key_process.h
@@ -81,8 +81,7 @@ public:
     bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
                     const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
 
-    unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel,
-                                          ProcessedInfo type);
+    unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type);
 
     KeysT GetLookupKeys(int batch, const string& embName, int channel);
 
@@ -129,8 +128,8 @@ public:
 
         for (size_t i = 0; i < lookupKeys.size(); ++i) {
             int64_t key = lookupKeys[i];
-            if (rankInfo.useStatic && ((!rankInfo.useDynamicExpansion && key == -1) ||
-                                       (rankInfo.useDynamicExpansion && key == 0))) {
+            if (rankInfo.useStatic
+                && ((!rankInfo.useDynamicExpansion && key == -1) || (rankInfo.useDynamicExpansion && key == 0))) {
                 continue;
             }
 
@@ -205,42 +204,38 @@ public:
 
     bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
 
-    bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch,
-                                            ock::ctr::UniquePtr& unique, int channel, int threadId);
+    bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique, int channel,
+                                            int threadId);
 
-    tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch,
-                                                            int id, vector<KeysT>& splitKeys);
+    tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
+                                                            vector<KeysT>& splitKeys);
 
     void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
 
-    void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize,
-                          bool& uniqueInitialize, const unique_ptr<EmbBatchT>& batch,
-                          ock::ctr::UniquePtr& unique);
+    void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
+                          const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique);
 
-    void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique,
-                                    int id, UniqueInfo& uniqueInfoOut);
+    void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique, int id,
+                                    UniqueInfo& uniqueInfoOut);
 
     size_t GetKeySize(const unique_ptr<EmbBatchT>& batch);
 
-    void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch,
-                 KeySendInfo& keySendInfo, All2AllInfo& all2AllInfoOut);
+    void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch, KeySendInfo& keySendInfo,
+                 All2AllInfo& all2AllInfoOut);
 
-    auto HashSplit(const unique_ptr<EmbBatchT>& batch) const
-        -> tuple<vector<KeysT>, vector<int32_t>>;
+    auto HashSplit(const unique_ptr<EmbBatchT>& batch) const -> tuple<vector<KeysT>, vector<int32_t>>;
 
-    auto HotHashSplit(const unique_ptr<EmbBatchT>& batch)
-        -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
+    auto HotHashSplit(const unique_ptr<EmbBatchT>& batch) -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
 
     void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
 
     tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> HashSplitWithFAAE(
         const unique_ptr<EmbBatchT>& batch) const;
 
-    vector<int> GetScAll(const vector<int>& keyScLocal, int commId,
-                         const unique_ptr<EmbBatchT>& batch);
+    vector<int> GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch);
 
-    void GetScAllForUnique(const vector<int>& keyScLocal, int commId,
-                           const unique_ptr<EmbBatchT>& batch, vector<int>& scAllOut);
+    void GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch,
+                           vector<int>& scAllOut);
 
     void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
 
@@ -248,8 +243,8 @@ public:
 
     unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
 
-    void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
-                         vector<int>& restoreVec, int hotPosSize = 0) const;
+    void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset, vector<int>& restoreVec,
+                         int hotPosSize = 0) const;
 
     void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
 
@@ -257,35 +252,30 @@ public:
 
     void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
 
-    void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count,
-                      bool refresh, const string& embName);
+    void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
+                      const string& embName);
 
-    void UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount,
-                               uint32_t count, bool refresh, const string& embName);
+    void UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount, uint32_t count, bool refresh,
+                               const string& embName);
 
-    void HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, UniqueInfo& uniqueInfoOut,
-                               KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
+    void HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, UniqueInfo& uniqueInfoOut, KeySendInfo& keySendInfo,
+                               vector<int>& sc, vector<int>& splitSize);
 
-    void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
-                    KeysT& lookupKeys);
+    void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys);
 
-    void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys,
-                                 int channel);
+    void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel);
 
-    void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos,
-                               const vector<int>& hotPosDev, const unique_ptr<EmbBatchT>& batch);
+    void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
+                               const unique_ptr<EmbBatchT>& batch);
 
-    void ComputeHotPos(const unique_ptr<EmbBatchT>& batch,
-                       absl::flat_hash_map<emb_key_t, int>& hotMap, vector<int>& hotPos,
-                       vector<int32_t>& restore, const int hotOffset) const;
+    void ComputeHotPos(const unique_ptr<EmbBatchT>& batch, absl::flat_hash_map<emb_key_t, int>& hotMap,
+                       vector<int>& hotPos, vector<int32_t>& restore, const int hotOffset) const;
 
-    vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
-                                  vector<vector<uint32_t>>& keyCount, vector<int> scAll,
-                                  vector<int> ss);
+    vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id, vector<vector<uint32_t>>& keyCount,
+                                  vector<int> scAll, vector<int> ss);
 
-    void HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys,
-                         vector<int32_t>& restore, vector<int32_t>& hotPos,
-                         vector<vector<uint32_t>>& keyCount);
+    void HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys, vector<int32_t>& restore,
+                         vector<int32_t>& hotPos, vector<vector<uint32_t>>& keyCount);
 
     template <class T>
     inline vector<T> Count2Start(const vector<T>& count) const
-- 
Gitee


From b2a422158e202b4185d556e11fb6368f5f7d5932 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Apr 2024 17:04:31 +0800
Subject: [PATCH 055/302] clean code

---
 .clang-format                        |  3 ++-
 src/core/key_process/key_process.cpp | 35 ++++++++++++++--------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/.clang-format b/.clang-format
index ee9f3a3c..c1bb9720 100644
--- a/.clang-format
+++ b/.clang-format
@@ -10,6 +10,7 @@ AlignTrailingComments: true
 DerivePointerAlignment: false
 PointerAlignment: Left
 AllowAllParametersOfDeclarationOnNextLine: false
+AllowAllArgumentsOnNextLine: false
 AllowShortBlocksOnASingleLine: Empty
 AllowShortCaseLabelsOnASingleLine: false
 AllowShortEnumsOnASingleLine: false
@@ -34,7 +35,7 @@ BraceWrapping:
   BeforeCatch: false
   BeforeElse: false
   IndentBraces: false
-BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBinaryOperators: None
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializers: BeforeColon
 BreakStringLiterals: true
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 9751e268..c5ec9204 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -134,8 +134,8 @@ void KeyProcess::InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo)
     if (rankInfo.useDynamicExpansion) {
         embeddingSize = info.embeddingSize;
     }
-    hotEmbTotCount[info.name] = static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float))
-                                                 * HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
+    hotEmbTotCount[info.name] = static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
+                                                 HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
 }
 
 OffsetMemT KeyProcess::GetMaxOffset()
@@ -334,8 +334,8 @@ void KeyProcess::HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<Keys
                                  vector<int32_t>& hotPos, vector<vector<uint32_t>>& keyCount)
 {
     TimeCost uniqueTc;
-    if (m_featureAdmitAndEvict.GetFunctionSwitch()
-        && FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
+    if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
+        FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
         tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch);  // 按存储dev id切分并去重
     } else {
         tie(splitKeys, restore, hotPos) = HotHashSplit(batch);  // 按存储dev id切分并去重
@@ -347,8 +347,8 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
                                                     int channel, int threadId)
 {
     // tuple for keyRec restore hotPos scAll countRecv
-    isWithFAAE = m_featureAdmitAndEvict.GetFunctionSwitch()
-                 && FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
+    isWithFAAE = m_featureAdmitAndEvict.GetFunctionSwitch() &&
+                 FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
     TimeCost totalTimeCost = TimeCost();
     TimeCost fastUniqueTC;
     UniqueInfo uniqueInfo;
@@ -356,10 +356,9 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
     LOG_DEBUG("ProcessBatchWithFastUnique(ms):{}", fastUniqueTC.ElapsedMS());
 
     // 特征准入&淘汰
-    if (isWithFAAE
-        && (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, uniqueInfo.all2AllInfo.keyRecv,
-                                                uniqueInfo.all2AllInfo.countRecv)
-            == FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+    const auto errStatus = FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR;
+    if (isWithFAAE && (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, uniqueInfo.all2AllInfo.keyRecv,
+                                                           uniqueInfo.all2AllInfo.countRecv) == errStatus)) {
         LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...", rankInfo.rankId,
                   threadId, channel);
         return false;
@@ -412,8 +411,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     auto [lookupKeys, scAll, ss] = ProcessSplitKeys(batch, threadId, splitKeys);
 
     vector<uint32_t> countRecv;
-    if (m_featureAdmitAndEvict.GetFunctionSwitch()
-        && FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
+    if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
+        FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
         countRecv = GetCountRecv(batch, threadId, keyCount, scAll, ss);
     }
     std::lock_guard<std::mutex> lock(loadSaveMut[channel][threadId]);
@@ -421,10 +420,10 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     BuildRestoreVec(batch, ss, restore, static_cast<int>(hotPos.size()));
 
     // 特征准入&淘汰
-    if (m_featureAdmitAndEvict.GetFunctionSwitch()
-        && FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE
-        && (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys, countRecv)
-            == FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+    if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
+        FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE &&
+        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys, countRecv) ==
+         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
         LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...", rankInfo.rankId,
                   threadId, channel);
         return false;
@@ -464,8 +463,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
 void KeyProcess::PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel)
 {
-    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY
-        && channel == TRAIN_CHANNEL_ID) {
+    if (GlobalEnv::applyGradientsStrategy == ApplyGradientsStrategyOptions::SUM_SAME_ID_GRADIENTS_AND_APPLY &&
+        channel == TRAIN_CHANNEL_ID) {
         KeysT uniqueKeys;
         vector<int32_t> restoreVecSec;
 
-- 
Gitee


From 622cde53968d0c31535f50a0257442ef6f996a7c Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 23 Apr 2024 15:42:53 +0800
Subject: [PATCH 056/302] =?UTF-8?q?warm=20start=E5=8A=9F=E8=83=BD=E5=AE=9E?=
 =?UTF-8?q?=E7=8E=B0=EF=BC=8C=E5=AE=9E=E7=8E=B0=E4=BB=8E=E5=A4=9A=E4=B8=AA?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E8=B7=AF=E5=BE=84=E5=8A=A0=E8=BD=BD=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=E5=8F=82=E6=95=B0=E3=80=81=E7=A8=80=E7=96=8F=E8=A1=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/warm_start.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index 53324b06..31a5e358 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -80,21 +80,19 @@ def patch_for_func_warm_start(func):
     def wrapper(*args, **kwargs):
         ckpt_to_initialize_from = args[0]
         if isinstance(ckpt_to_initialize_from, (list, tuple)):
-            vars_to_warm_start_list = kwargs.get('vars_to_warm_start')
-            var_name_to_prev_var_name_list = kwargs.get('var_name_to_prev_var_name')
-            results = []
+            vars_to_warm_start_list = args[1]
+            var_name_to_prev_var_name_list = args[3]
             for i in range(len(ckpt_to_initialize_from)):
-                results.append(
-                    func(ckpt_to_initialize_from[i], vars_to_warm_start_list[i], var_name_to_prev_var_name_list[i],
-                         args[3:], **kwargs))
-            return results
+                f = func(ckpt_to_initialize_from[i], vars_to_warm_start_list[i], var_name_to_prev_var_name_list[i],
+                         args[3:], **kwargs)
+            return f
         else:
             return func(*args, **kwargs)
     return wrapper
 
 def patch_for_estimator_train(func):
     def warpper(*args, **kwargs):
-        hooks = kwargs.get('hook', [])
+        hooks = kwargs.get('hooks', [])
         if WarmStartController().get_elements():
             hooks.append(SparseRestoreHook())
         return func(*args, *kwargs)
@@ -193,11 +191,10 @@ def _warm_settings_filter(warm_start_setting):
         # 如果匹配到了，那么这个warm_start_settings对于dense部分就是无效的
         # add WarmStartController(path:table_name)
         if matching_tables:
-            warm_start_setting = None
             #add controller to set sparse
             WarmStartController().add_element(vars_to_warm_start.ckpt_to_initialize_from, matching_tables)
-        if vars_to_warm_start != ".*":
-            return None
+            if vars_to_warm_start != ".*":
+                return None
             # path: embedding_table_name
         return warm_start_setting
     elif all(isinstance(v, str) for v in vars_to_warm_start):
-- 
Gitee


From 32685509402c9cf3a2fffc3a5c762c146788fe1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Tue, 23 Apr 2024 16:08:27 +0800
Subject: [PATCH 057/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=A0=B9=E6=8D=AE=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=E5=99=A8=E7=B1=BB=E5=9E=8B=E8=87=AA=E5=8A=A8=E5=88=A4?=
 =?UTF-8?q?=E6=96=AD=E6=98=AF=E5=90=A6=E5=BC=80=E5=90=AF=E5=85=A8=E5=B1=80?=
 =?UTF-8?q?=E5=8E=BB=E9=87=8D=E7=89=B9=E6=80=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/asc/manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index ef4597b2..64611295 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -204,7 +204,7 @@ def initialize_emb_cache(table_info_list, threshold_list):
         option = option | USE_DYNAMIC_EXPANSION
 
     optimizer = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
-    if optimizer.derivative == 2:
+    if optimizer and optimizer.derivative == 2:
         option = option | USE_SUM_SAME_ID_GRADIENTS
 
     # [train_steps, eval_steps, save_steps] pass step information to HybridMgmt for data process loop
-- 
Gitee


From c13c7a6e9267e7b87cc77ce1798ffa1f179f0f69 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Wed, 24 Apr 2024 16:53:18 +0800
Subject: [PATCH 058/302] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=80=9A=E8=AE=AF?=
 =?UTF-8?q?=E7=9F=A9=E9=98=B5=EF=BC=8C=E4=BC=98=E5=8C=96=E6=8F=8F=E8=BF=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...\344\277\241\347\237\251\351\230\265.xlsx" | Bin 31412 -> 31424 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git "a/docs/MindX 6.0.RC1 \351\200\232\344\277\241\347\237\251\351\230\265.xlsx" "b/docs/MindX 6.0.RC1 \351\200\232\344\277\241\347\237\251\351\230\265.xlsx"
index 5224de2b2f5ed98b0b0d0b0c15e65ebf98ad993e..9e14cd61ae280c9144d8def5fec2024625260f48 100644
GIT binary patch
delta 8975
zcmZ8n1yCK$vc}zmy9al74;);BJHg#`gS#Fi5P}7_-~>51goC>VCpZKM8X!FK-&gm&
zd#7r<W_rKr{-(CJx3_1Su3>wwVe6)05j!Ou@6Esqu#|v_az7V-)KA1`S!i%(@q-(>
z2{KDRI&v7nU`I->_p|`J(q0STy5ig{iyHv$qVk`hfm)goP7~>uvGTrglF~#_&;xx8
zN-}L=$SWH5w@7DVf%o)+i6%U})}?jiU`1DNsq78F<Ums$ayOWq9(u@mwY`f3+Cu8J
z2>V*=#0&%pt>n(hgv606svJh7Z${#orj+6=n$U^pvMlQ^7#TGog7BCM5Oydm;1xnT
zDV+}gU^Iay4IF^-ewEkXLt4|`)O-H?be7Pu=2A!2LL8c(+B(1EDax`?x}1TK&It0b
zm%bY*#a>OWQBwS=-UyKq>PCw<b<}8$=BpMo3kR^wWJ0P6YfB`3!s6htC1>(87Pr=d
zTjcDeNG*rBKH4}9edAR-4D`I0mk@s?X5cJV&dA@JzI9isI&F1?;343R|0DLnEWt10
z5arwF1hspiy?%S_9pjXiep89!D>4mbcm#YH7#I|oXtiyrEkXzLb3_=JG8`C~S1(#?
zSHMcc!`01`)5_b^%hj2~&)Es0Z;<?Xi6l5V{aM4Kn2D|`C-}JH2sgf$n5;4UNvqGz
zxo+84Yy?PVg+M{Jqzspx)HF$5o?U_1LAU3Zt0|1?6y--klcu)A$bL5n4b<2>Cs!hk
z>=)U2;`535ks)ceo8AIp0XM5<-G6?_>sDRD>8N5Hl^MlWhkTM()eed%5Q&jGTBLGG
zZd?UhnU;ijcGRe8goq8n;#?M}N-sz;VSqKJ7F6>jh&D#XB^Do+_@5o2T-fXL^GW=K
zf^GSxUHD)*1V&*0q{0vRb7R?<x@%2$$+Zy;x+^}&T*!xYm7fY+^yTBxtmL^FlBxt+
z?$NI^%Wm%s^I2z=)pO>=^V_sYZ;o+%)rBndW}*zcj?qaRN}T!-<H3MWBT3_@D4F)T
z-Orf4Ay0>he}s28bT`e$Wp2}r`#5Bp$+1)ciP3}Jl_<c6)69op7jG&^rD#2!|Ew=t
zMycWV!BN`U_@pDNh=l>e??hE}$&9N=SC$Fl9B=&7+V$L3k<~_SmQwau|78l=_oXbJ
z7Z3{Do$%=}i`P956jUc5dtKDBn}X78!iqyn-+`x!AZ>TfsaxmZ&@Pev=PR5WD{LBH
zM!W;|y<z|&wlUc(Cs5M(l^E-*(Wm#i+j&01rEUXn-z!9gQ8yzB$wB8zOSe1MCSD;T
zT%>s~4iZUKV?dNfhgjU2lUNpt1iiZ!nX)&G1#i?k+1Qc7vBi!bobao~?6i$#<rm8?
zRR_05>*>?|m9|DDEYT79JUpcbPT?rbJxa)QKdb<sZQPf&A||7!w0-er3~Mua%o!@1
zJ+w#+YcwobHgt>=mu(v7qG>eiWRSyMeGa{sMX|YY|5&{$^kp2;HP5c?QnrYGFObL0
z)*MCLk3mJmG^cjz$l_AvN<GPB--hI99>(4nL!B;Gzn1YKOB16Krv9LK^~7krl{+n(
zICdIXDdcn5Zz8l(CDW<L#PoS_NzU4AX^#YHm?lGZG7=;PH6`eVzik5<5^<q_!F|}A
ztSQ`Xr);<kp}O=d=}$LFpWsGKDj4SSdYPKL-ZbP+_0t63r-GL+iKwq!R}R%(%-2iV
z^~+s}H@>#bqWKffD_lu|=_jGpGwxy^R3@VQl|>zLo_yOiX{yES9G8~mY4{&G@hyb_
zswveB$9;_nzCw&-&L$WWSs4Bj0UNA@Oa-j^eP7`U6F!!<Qn;?%%R?H|l@I@wBDy{|
zXuDjAYv5I(`R1O5uhn9bQ#&e?cj#KM**N=?vH?Ajt>5?sM><tshCBVj;bTrOq@uKk
zZgmvpdqvPZjxBDh%Vk>@#ohh)VXB6~IVz`%r}qTMHg?Qa=I-?lKjHeWoG{CgYHfkR
z?uwJugCO~)ugj~$mILZpEj;cOC1ow54O*}H1H~ewkA)2#=qo(eOx<;`W`*OA!Z$n-
z@@t$~Sg}kz1bY3CufD(IJWLpNhG*Bs55Ik@eb0h9h4XF~OZ6~SAPr%#$iP{kM{fj_
zn2AcYs{f{N+lFddKFSnDjrOg0{9+8iD@o;1t`@N!1gCx9&=yK9z+!r#OC<MOLc=F5
z7FAaCKy@K;S&u0q+UcA<G)VaU^S)@1<Rf*z&ylLGmI!I=gH5wa`0R)6<ohNrGn)(l
zh40&AflDd<9Tpha=j=v3=V6TU2qg#~%!Tv@+;gl@^9m9282phK2cp@?JmNio{6$sn
z)>bk67U}+GcRZ1TcJ0_cgx?7TR4!XT;bpX#eh~`~zZG^RIPVuz9ug%;rg@@V4;!9T
z+f*-j3|WwDQMx4m{$2A2G+*lRG)SN7)w9=|m=p}9w4aAJ+@p`DIyAo~DC&pfXl14l
zk}QO}%$afsaU*bXKWsAV(`O4JU_80T72IMx-(&Ze0gY~`6GDhfw<^l~U|AVPz-31O
zKk63oH&z&-iA}Yl&k23AIPvJ>KwE+5p<(Kwehhs8{fZi;1seWogZh-SuRK*b@8q<s
zCeg2yl!k+$_H(*Pn&<o`&l%u>JaQ<H-d<`a)V@_sHGDwA1o2|0=kd<s_uh9tt-V23
z&SGwo51jP;8(7`XwLk$>yPYNiAmNh9#Tvu3T2&qeJz>UnHp#crM`iM>(kAy>UD}nb
zM)*}#YvUVE9NI6&by5oqqGUlGhYF6pH^JQ3zPPd3#Pl?U->9FxUOCNLHto+>4H+@4
z!|oaGdZusdH>K|^Q!sQhoxjSN;di{}Ymjp|V&kkvx6e!cf>CTxJ*=Du_^b+Yof9(N
z;zYm(LQ9S4<@EU{w%qI}TI@%Yf3YPI4xbrBv~{FU+nsPvlHc$mp1pEhGDED3GY+7m
zIS##_=;{;|+zM6*LK~(LZx{S<d!83DU~7%2$BpR{iVVsZ`uhHDS<nf_CS8C8`pp1?
zg%FnsSx`pEMA&&CibV!Dkn}U&Z+1-JHnyJD(UpiND@qcXwT0vZ(YuSY*8v~Eo9_M8
z**?c(d<RL9(khwO>N0lZAH*q#%V-kN@)hvi;y)_vipM_=#++6+=GSOY4orx~aj2~$
z)*??29yXDq=+BmYx(#)l{<bH);B~lbvYF#+KZb!c#t6Si8#kM700{n^WqnxswBY|m
zLQn{?LCTHY<G8sU(-B6?=F<y!TcF$kcAae?^kXNwT=lW$2=s|*K-hH|Gj=($=y5Nj
zf*@=qqX;r&(}=EmuS!y`MAMOyv4IaUhw-Lhvp92Ny%-lid3|Nx(~DfydmHIFB%b9W
z9wN1x#YK67u>J=NfcZ1R_a3x$8sfLodQ(U6luKL@!TzciB*eq57u%wuqEkfoN;^Xc
z9GeaDvazhi6VKK!A81oCt{8|^F|LY8B=oM3o~m58!)dB+B}J$?{)ALPxB!=xt{2MQ
zQ{w^KMTo?TtqbvdkG$Vwdq_@(%7cf3p--cT`<B}L2u(T-Ncae^ngg%OkD}#MDxK2g
zN#LH+gpL_j48cRmwfj*_L4VqeBt65yugKus(&5U7GIO98PMvthr&yA@OOuilSZ+kQ
zfJq701(k)XxK5Fx$ELCWHXh129$HFTY#8G-XiJI^s3=dKekK!MpjnHcbxea9CSELE
z7tM!qlY-O>P|i@#&nQvCQM5%*+(Aq3`@-H?XA7)^(uGP5cmy_2V|gLM<uh2)VGiX;
z*@4ygLZ{%=`B4lvv*VT&Puk*|QjyRJ+^34aAb2=s3#1*klNdOpfRx-%%45Q3AoJ8r
zSsjLQ4H%Ct6^$6rhjf5;b7~r0sA8rxs2wh}7&HnrrKVY_Qtyt(GUBA!SJEzh<i!kQ
z|4bWaqa#<JAmzY5o5048(vlFzlsQek#M2yOO>%c!Ho-HMz#g7Xj{0I+YldLB-lp!~
zRz#{wNfGWXN@+)!Khsk19Y(%P#Z|1a$w@6i>dn-|9poWJFe)WXWrGKp77YNToO+v-
ze^Y{`dRvB}<x!0qVVz?KN8_rdixB8pDy_lcNubI0mlcARWA;1i3hBCk_`Mjckf$H*
zN#_|o|H3owN@6$kva%!uS_35JL$u<XbX*jDh?g#MBF{ASaMvu{mTnmD)wK{SX|fEp
zH3yD9gatsq!feMGzVISww1X?8B3fkGrrzPuEe^Nt!bJ(nJY;z{O>Ku?{@Ek3M3Y7C
zgM7M_{2JpIs20)>T~l!aZ%<S4NX-IL!U1NbrAHfOymEJm*sS%WX?q+VID${r<?Ugl
zQ?EHOHb;)PqSSr4$NECylD}JX5!;(bNsp-XqssyT#F3_t>ADd!nBr{Pr%7umeAkq)
zJCacO3ql9>eu+tEqJW$8=t<F$kjKsS{2=m;{7zrV@A*CbU87XDCetgepiWn)(xaJ@
z^o4@b$lc91v)5+8^RL|vAmr)hHtS3G(`B5OMu>*Mdf%gK=PqvM1`3pP;j#%)K}~L1
zxI^65>mwwx@gbC_yn#ra?9TR0sS0`!q?DRH3zf2fiG8JXS=kQ*`gy;5_t~8*ZPc8f
z3p_XiMf>psFpPQzyP=Xb)+^4U0KShE-v-cOU=&PXU@%{P0{omfJ-z&$Y&`#F|MSMj
zuB!_K&l2X(@V({_Tyuo-nqTHT2uBGu3fR#G;E|ldrIpjiGw}H1MozF08%nZUu}Czm
zjarI&+6fJAEG}AH^ut?}p0gF}B~Q(k+HTW2_d!|sPX%({qy`p+_kAgW+uR4*MHlib
z@*gcfFr8aBG~%mrD)paQR_h-EF|PwxYcg^wF@hU6xG7p)`R8OE%h&av_XHQB{uElR
zt+cY4!4f`M$gbT!^<K+ZEUmW&?77WT=m&qD^@ijf_E)sNCB_mPJ;6ZLoMb-8(PO?+
z>G;BfwHSEHrxH+ldt+B8I^z=vKilf-{OtytnN#<$`Ac`pqOZ8;B5XObsrW8NG;ejF
z&#Gs%%c2>g9%Aci^XdXdpJmCWL94SRxL1n+@HIW|87!*gD*5PwY<|MDp;-&@IL&GK
z+zCnozJ^V9J%Lk?yW9Ih4qk1nY0k+oq5YaKyXPr-4t|QEFz4oNt((jKcp}4sQ(})3
zI;ZOlk_?gF4B1g=NE_mNCaIeXpt;TN2+ga%ebs{x_p|nk_nL&ShY~y|nDyAP-#2{~
zD1{#{Rus3sVe^rY_%j}Fi#^<Y({+ehs8sh2gw&>IKA_FkG|$7}MzN@U67o%?j@s{t
zc4wATQiyXW1mxA!h7@q+WG9B2BJTz@L=PILdVOhAsu-{DqJESn&|Hd*ibA@colD=7
zkz;{Sn*b-3byus%nWu~%DmXK+y6RjG)aa?sHsxfF{JyHVACZe2Lvw7FS}8$W9n7q9
zA4LpdDbcDPCY=qMcVctagE+)Lmf&U2WQGNlYubL!qfX3GL;0;NP)dzS!b`S>SJW`a
z(p;G__B~1meT5uXe}qHr9b$`%o*=K2FJ377<xd{foXze*d*9NN{D#=qOh})WfbAdc
z?9%!W)QWp8Bk7_#kZ|BQoUe&PZz^+!y?$=@Wwb__NC-5TwGcT%+l$e-WjwgmHcc$G
z-B&b{jsUrG4yS#kWk-&#S}W44Yir~;X~Ip==yB_&9BNW4HuBWuaeq&G0c+j<o)~05
zJ6irz=AjpY>|(tr!k{Vmh9mqp0CQTWRTglDBv_;t6zTndyV<l^YU7@X@5aO&hI2EA
zwhZ0I;ZUuDk*JUuXS^^jf1BBmGd>0(I4Wt~bmb0MV*_^&4m;?z7>?Hr4u=hwbEqvW
zD2N{mVT@oq%aTL|(komz^T?Le=pA1N)aX~tJ9vNAG46ci8UB*xa2<yQa5YuSdh0#o
z#eB&eZT6_IB^#`M<wFffJ7muPXeWW1#eGbDEcu#!GtlTdp25)ZZ%tc{`#5@<f;ZGf
z;WBIc9X-J8QpyWnZhfb(VdUWaZ=I<I5zGQ**3mxzg-gzZcOoj$v7si5$2~I5cx+t&
zA+(*mqor$4--HS&Ef3PF9un*;6_ru@vw!C7x8XN|+wWb6Jr*>@C;%!$wOhZXkL>lW
z1vhD_@PW_<`MpbBj1Dz6;`e<CWo``=Pb$O@G$U=3JT?;*%3M5aQ&6gkcb@z?B_BR7
zvh|9czGI#_LO~lAqgbi4^*+($^swdw7|qXKb8~Mb?BnA$`-fw6BvcmaCN`K_2&h+?
zmPg|xg*M+^>KYQ!#(8nbAuyZ7FmcGf>AFhOdn1>s_C_v6>y4bd9);)`ia@d41+gC<
zcY1MF8((9<1VgHwNETyFt>;R-F_y`4DY|w_=Hx6#3A?=A&tKkzl<Hx)=~(4*fb*!k
zOKRH1lZ#FgOEEcx*K`!c5=j0gC`DX)7iX^~Tvjh&3Hm$da4GO9Z$4U;E^~j?xuLfh
zvn=z?t7~cBxEkr!O|C93n53<$58<JnOK|r(BBJmpk51v4+94sl1HGJ(A8lRpH-`&c
zMeEsxGizdR;aI%h%Gmx`EB6AHfefxn>DFo!)bjHBJ}+6z&jFrutIeH+Ym39HAlCRH
z#N~MUQ0X}=f^W{<sseG-*E4-89#RfHZX%ox^d-Cz#Qb7Hy9RhgF?(N#`_Y9t!r7v5
z`j)QameY6n{eLJm*%S8W@}QX;`~IK?r?WNkq#qRz6IACq@AmBte3zxC0LZcl3W-vQ
zScjw|KB!{4!%}djH<&{?8(4G%@^oni8KowKNHr^?MFyo|`+kJVM4G`g>nIoUV9iGx
zq1AFuYPoyk)V_Z2z|ONVNFebw?^TQli`V<O4guk8Cu>O;*+bbkC5bwFxkYmGn5GlM
ztRNk6Me&IWcFkTUxNX+=cmU2mKEusdu2QHj**B(037J}r?XUPpo-Uebh@x+q>c9Om
z3d8|vv3afxX&R?F_#<lUwceN~_q(e($7;TGH@}X#p7t`mpz8iYxkerrC{He4L<HVC
zOgCJ1dHTjm_;$lzDUKE6X&A@_5}IHJ{R5F={Nx6d#yh$@-k+Z>{m}XpY(jCV`BneZ
z`#qVUos3SLRfMl~6aH9W+sM7j3)Q-HL=%Ia4wI#IqNdZxAcEHu&h#oa$HHz;1%d0Q
zoJ=><bHy6En<6`|5nzZD6^3z{cDECyh}bI1o_4=y)1@Z<P3K8R-V+v^z-LU6MkAl}
z{D3{cd{yMVv!Iie_6(kQ0Tw1bey$>{MkY<E<9C`qwhLKuhBKC7nqA&(asoy!jxTws
z&|j3i^nYv;S+uAVNGKPHMr`&|zFFQ_p4yp`bD%C{TVi_8N8}mqR(1}Ms$QhbO;0Ge
zn_20=;a~cHjuHv^eYJZLXpw_xc9P_TZ0c(cC{Y9n%DO$D#oOL~>;C=YFuEHZ+Jh03
z5In;8U7uuF_<nNdVm3b-fgb89e5Jc!FU<LTMINE@2N#vbKVx?4mZ!{q2xS$!i7Ij?
zW>knFc-}Q9tyh*$!vPmF*wra&ulrVwN-Y2JMw2O8CBRMi<d>rMJ7VC^GW24x3_IBj
z2)SE_@`-0BZC+iSX4+?if#=h!gC`B@Y?4icy7Jc-i%@WPlB6GS8T}dh2EaZX)~A@B
zz7(K4-1$7K{)bp=3Q<;$mj4;LqYnFhcfv}O)MRL<$;LiNrCf-?LBmTo@fAUdDS8Zb
zjy`87hX)O9#VQ;vO4)lwjO3y@S6Ey?j1(vKqIX>(PhboB3wb9{)9XFj(h(OX$1lPZ
z@?cygTspYDVjKEzfWQAE7cH)pOxaIX75I-q0k)U>Z+0HKsWv)9@9(N&^b*ibj+h89
zd7HfS@;{npwH!jblNf!wFb1cYedK5pD=t3UPXDr8m@1*+{_`jlU^+(J&PS>S{CwOV
zoPCWOON`#mM>u2_?0z}ffwQyo8e3iR{$O`8|DulJGwocHmp$D=vnw+zh?b07&wK^6
z?Pn!H#vNjAs?=A?lb=Ye&2>R%L8tlJA<G=eDI<-q&awutzOd0(%zVjqx712>fhBib
zr!8&Bq*_;qOh~3S>gQ^4BPiVu=)r)4w+|XJjBLcUYN1bD)~|9Cu-wfoP9<|Bm{3SC
zmL+XV(~sqGR7x<oL#i66uGiag8u$?Tbc`qPPj@&{P=Ma)uQym;^S5L(CrtHdcl5;1
zaomoIRst_>6Nww6_}*F7^exAvIx5l$zNj#lkejJr4C7Tv0WpYruzDXt{};h+lsV+|
zmm;^v=*)0pGny@(f{OuZ+Y0N-1Z)s*Wuq)G_DRq+Za+1)SRh-u_5hTAuzO<;k9?J`
zv$k1LPBYw$nimHryDqPSND7-K$Ro0@iNP?UZ)0y{nc{OmR4Z$gSu)PRhmgkRjBUS}
zg#*0ucQij2_B%3_s7HpE%n#!I*fmR-_(=WWRb4CW@pyXhPWHz)*QrG(nt4QZh|Pvi
z@JLbN<<26Mb-9`jtI_fc_LjKAovuROng-C)tXbpg(tMSX)1qm_4bt;3F<oCWW+od?
z93R&+nQh+dFju?`y*Nge-wC3+SU8Flc>tz_#&ii2k7bc1u1^t1kTzW}95D$#aoy1a
zL@)vm`7D~m3RF^N&yxRH`d8YF)U`%<DgC3t!2I11_?z|HS$No3YkPUvyV(Ad{6qAN
z)XCoBgnXHMHgD6)*Xm>pjaU$hOQ_Tb;er(B-;S;$u=)Y25aqc}x%lz%Awe`)%a8G)
zIIW>3IQT*RkHkI`l#&-jJjd{a%ibKIlFnt%{QIu##@Y@bQo(uhK86?r{mI9c$hP$&
zOp+W0P5t3CIU0C6Oj=_ENziPPVWJ0hKOu6i{A}-jo?wVe!wg2O-;khHvXT@fF9IU4
z?+0qni2=@nJtZN$OQk!(%Q>1Pc|S8L@-2U6zN3S^ucmK?iSSPZ06*gtt}l&~E@VVu
zK+-Qt#y0QOGoM)LR%H*cLnaI6Ql7goee0+N$|~AV4dlW<YLR1^28H_s;)2V<c(VrK
z+`{p<JJ@PxS@FNC7nVX#&g8{i)L<$wbnzS0g#qpBnCjwj5yCiw6as<<(G*RkKQU~2
zW2HLFrAG(`l~UqX3(}VGbB7Zmcv)NKm@Lub^Qw*cl=%#iCRI%`=?M6hyUh*@G$^}u
z)~OR}vcgf3{J&;XJ~@vFpH?eoh(icxM}!se6KT;HawV9H&pA6=BBqG2uLkk5#qHHf
zt^jSsZy08TO%%AFlOyDzqQ(5{G8R;NKd^A>$%S^S_>_j(LXFFy#JkB~vmoN)Yq=#5
zx!B=Xi;8ZI4?aIXWkHP1%<`9gmaJ%YgOm5AwUfYYKvP3?#xrEUIW`F4t(qvi-4B+4
zrogY|ubVwE@fgG<lgYy)@pXtSj`XY8n*m6jI`;`Mq^waTKt!q|PJ^uJRcz}>|Af?F
zACOr#Sd4o!FQLS27#uD@I0ci=FAhn@IZSk}#P_zz{v#OM5viYwIuB!eu>^DevzYt$
z*kc>Hv*x-3I+U5Gm25K~`Ki<QAnGFg7RwwBNlJNhKiVweV()?)%3k#Lp{_OIJFvha
zny~vWehWD{*F<n!3#lX=w;H|;Kt~Iebb^=|FUls*o~UFBvK0~X<Jutc(V<uu=_L7|
zVPb_fgrO^BzEgBRF3Nj&e^|rKgZ#9Hq$)RFPlD*7qba>Cmu3|wV3~lJ0H<*C-J0yE
zrOx>g*n}95UZHAH)<4ne<8FkK0r~Q}tYyRQ<UFXJ8+E4hmG<1{7)LxjXo0d4WIfGi
z(jRZk4i~nJTh&mMCf=uoLkMCr^|2!4k`2{ZtfszIOHxo^>om561)XjQy<Xg!$sIM2
zBh=0v{k2}W;aU%l$U>GEdR3XrpG9!$L@-KULR&pfg8r57^L)~Ee~zh9H()BUuDl@a
zNX5;=<lfF7X>FeT5@kQQ>kWF{B~ZV7sI$*hxM<1=c2eqMjcvjQL=r!x`GKnTr*MxO
zHs80p|Hi3;{6vrFx6-GfYg<mjZ1_dy*+$r=^vIKPhjP5bd)MfP@!_W6LFkNiPi0|4
z!@Kb*Gl*-eg194P!>=-o6BuYj?h)X+i|JaUbzDxag){WZyWfEwmG%7Ok;UeSNQEkf
zdl-wBs5AsWfGasxTX&8Xyz}Sc#;5C-hy-Zao;Q*FuJAAf`#-}Id@_l=C>#t-%S(vD
z{#SVNb@gzxbai$7Tb4-FnRZ#>!Vk`G0suuwSJ*3Rd>kctw8KgQ9nB#TRqY>)g6917
zbgLMp_CTGu3+F?${p3Ubjp#dMCa%)hETP-I+#Sy^UuGpttShczBZ_nVhnZK0;Rr82
zG{i=mCiiK<8))GYg421=r{1I-%mII@#rL=WB>f6hM&~*#tkCjDw<1xcp=%iPGzWew
z_sJ`yV+|sV8pFi?z?ZUNu3hq>u#Mnp(y@z`<?~CMLTMMvvFVx9l9K|Zw{Vv6WevN&
z=8L>&ppD<Xe{a2VODnGjDn1(2kv=J`#JVRJKqWG;%tYqRIr=hQX(GJ@uv~%;dUQlf
zxw1ZXqSgr8aOZa;h-9;@^?}=~ngRSaGSat{9#t)=eMVPB$cCl*?0Uo}ju~Xf?GK>a
z)uw~KrKvnao$i?UC1$w=6@4b0gko7msRMZUnwG59lw(AH>;ffj`gCchoF=jwr?sfh
zq2Z52{ic3%Lq-P_M`&VJI@d`1!@zzg@OX24aXcAK)lJnEBq?%+@U1!1AJEI~)@k8y
z4v#6G&Zrx+6xrmX=$PR<Yra>mh?~zrJt|ZWT}3%@Xw<5VRMW2%R_(w?xHg2IM(N%T
z9OhkrKmQfbPN=}WOje(u5T=}0?lHjInOh5y_9p#m4zI~uxWD1bId8~FR@<Nswa(UW
zwzdslsB;7v)ITG4F5GfP^1=6R-AQdO7f2pa#XwFHT{a@3!$hA7R)xtoNU%g*-#|WI
z6W--x4*njy9yG`oGkx}Wlmxq@k~aEUM<HUqr)3*9hJ}Gyf4SJfTZ;H#N{1UjhL<WL
zS9Sq<zNGskAi)^w(1<cV9mG%tRkU8ClNl}-5`pe5{;Fbvbo~Abd2_9$x|otVJH5Lt
z2Jz38$S-|zv^JP`|9KtE_aPx~X0+Guj!S|}z;?T|rSx<7Wu|W0TtE%maq{mH$>8JQ
zig~r+eIui>)HWwz1EIj+J$g61WeD6~ir{lUgvLfkC<3e(IM_`J)(+g|#tWMOzH}3U
zeGg`J7ldsB7rMU!e{;hDN4b;2ZoiC_VOPMX?#ghpw%{{&La>GhAKaMTKiympKG<^b
zw1*Vz6By4^3T{Xcyd_8gw)bR#bq1%uD3JKUH2g14Jz3x;1YWuXU~Vr?SZ}a}mpojO
z=syaGSKR*|)==>lA^Y$111~Qs3@i-wi_`=cdP^aDp#GCsq`8y7tON_W(D9!kxYvyk
z%;`f4I}8T-ynzLS(|p8Xqrp=?!f^Fs;4v`@Fq1C}Y%W;aR|0O(9*pil0<QMOA^(pj
z_1Du&`<Jude?gpJLk}D<ksm4SD455OpX|RAod1J6fW!P=zy9yC=l|Ug)ck)%P;j9S
z^<VNYe4hOTVEe%W{)~wKL?^I?KLH$!CD_kj6c!QO^dfn{=l;^L)-RF>9OsPvmk{|s
k+jU4VFtsEwFvNd-ytIF@=9U4g$dH2b0&tK_-Ttco2T^0m-T(jq

delta 8989
zcmZ8{Wmp`|()R8KcPF?L+}$O(yGsbZxD#Y?O|Zp-yL)gC1b27$AOQjd{m6OV@0{zL
z`B76-U43`e^ff)*cX#)1V8d@<%@h#UpbZyf44ns(LZ+01*>J+nm3-l+eXNlMn&5Qv
zEPwUV(2iTTgO@qALnW5Szce+~pz3iM_x^c_J<o_6zttk}4XiDjC&<GT2U@Es6h@<T
zRN!slz|K1}d+UJ*Rgv+=u12CYxfEyNvJ}?>oRRD{<(4_7%&m6}zq2)(y}@mYSFsi~
zTqxQehlr+wcny6oK~9Q;FglyfyD>XxZ+YHelEkZ{3|fMRNy%Zuuox!C%?Q;Vq3)&(
z4_3L^pA(W?>~}l`j$@Y8iNhp6InH6Mo_b8N9C5dc_B)OY9Qe<v9-V1}RI}uJLV#zs
zP*FS4MuGl(-nHNI4r|k7pDM-H8@)%|YjxTkAWiho!oQo>pSxiQyvRIiM3s!8Z4Yyx
zJ!5)AdekF3DXX8Kejs?Ci}=q^sKBX6+rZa>l_Vs--xWE=OP<iVAfe*TO+A==xnsDd
z{beUbN2QVcnOixa-Q?y`X1EKes_+H|2LJ#d0K%2G#e(qjf-YbI02&Mc0Q=Qy?)VX+
z>f-3+%KFjW)y>hK#mnBVLuV;wTNlfZ%;uS*UpC(+m+9QV-SS(z=|_|wZK$j?A5Cg{
z9vt+a&XttyB{ZOQmhZdB^=|pbBbx`__l-D8rVF{}l<f%lWKxiAE@2M_34N}gIT>y2
zv-)(S$?K1IFP9a#u!L~D<e2^Z+=O4LIgG5z&0do2Kv8$^j>oO^3p41L98_uoX(q91
zf_^kE^mp~CP*(L9`T@ka%uy1b7o$Ugs!q-;Wr^T#d>`xVdsyUt0S~gFug}iKaO3f|
zWE*$FL8V}5As=rv+XJVTH1>;|W<`sCA4uSdXwU_2Ei&6J_$o$qI+Urr^g;-@{?Iv<
zO|(Tn&1(fIHL=@&FXrmqP{5mS;nLvJGhvxE*33I)5tZ~3>$`WxJ#l9c<Y}VPEpmST
zgAViXyMyd*Z3fbX{9=pOTD*(pfmycaWPN|7CyhSO18MO2Q&&!&60rXf>vvIwGyXV^
ze8QZJPRdUQe`A9w#1V!mM>~Bll|h{{M2wHt^J)!kpIjYI>?VGcq&8CTTX;qQ(#uBs
zZ#^=;pi3i%WdhDUeQOvCwP-SygVQbb3yPxGFmK<fDh0HQiL#u*-tT{TnW0&nFbY~-
zdY{vj3WeNU`id@KEhUa`oAGTrxP!DKeU|r9dofLbgak-Ivv`{rG}xDEfL|VhDmWN@
z3iC$zCV*OwYg%Ta^6oi-GTrF)*+0hl=Kij0i-N!E8s4IaTvVLbtJS>rf-@?vQMMR2
zFo<;^L3N)dl=UHk8E0~E3>LEq*)3ow$wBjjEhIL%-$DUXgiJzZO->fvqCXsvR5Cv-
zXJP$KA`(#(o1EJ+WlWtc?0pXs2}<Wu;ey4vP#^8xgniKZ617gCMe99FC<$oU>qpHj
zp&1INuVId=AVnc!TxDmVP=3K4VMQQf<0xg2>SdpYj+wDEdkLs759(0w$i4AHmeh<|
z8UhZVF3FtH|IBT#+Rwwa__xW5*{DK->tSg};u`xkQ@SY*ZB*ku1{<SprgrI{!J<uJ
zQ74}Uhada~caqRFJk$gRsQH2@zfb<H`e}x6z^~m?ZLx5ClFt|+UmKIA@7L-Sb4SN^
z%DkFll2Pz)Njr(hRoYu6tG0o-Wg0p|UkQ0^@<HD$cX&TTE8Es3OBwV;Fw1h-GmV#@
zY{Kp>wc1;X^^+<n8a)3Sc~4ZRJUki1;N8kHTM++=_($pAW&2rhqgqnIS8;;tAqn+%
za}i9fvnBbyxakH|`ziF2BLqA5U-hd-#|xPfqB-05z3_+8w}$z!PTc&*HrF*ZHsen%
zV9sF5Y$NI;40!*nj-WmII_#piXdGt>cASTrb)Ya?;4-mjpBjm!W>o^`x_?(d?s${@
zh%&1q@OXWQ^giDmO>5J#e7g=&l+iveu#637vlf<7_(JVowZa%P{6WA<&F{h|X0!gA
zl0|>6!Z<CTV`kRMPnt~37W<sSD6@-aTaZ>Ekg)gjYA(I-v~)s_E?4;&+@ic*=@iG-
zN1_ut&Gw&Et%H-uJ#xEcR=)_HA^QiCXmgRO87ihAEgoKsKIwkO&f_524ftVY4)zb$
z@Et>~>?=s27jZ-#V=pEH8!wOY=PP*Ti^?6P^71lN&RrGBHco^ZkL(cAQC>aYEbb<Y
zZTs1j?`=Ft=oj{N;ajjuQ3&#gY&}jD5j$K1SVEGBLJF5CiV^7aL@ovekj_~p_NFF#
zYP;Yt@D8<`n0ql+DB?ANxag{OH9A5h)$GSz@{L{JssOTmJ&yHKFO;L$Ir@mlpD@p$
zVKUddTBU@8&HWe#m1ofQIn>Bc@o|QPLV|-@uJbfGmj3G2Mtnb0&@3KTDkKMy_`8#D
zk17{4B?1*CTaQMKEJrpb%5@ig&^pf{>%&vAXuUJyI3F6P_lhVx8~{*7006uNK;u3U
zLMqcnT(+fg!fs241xp$RhlAs~O$EDWoiI&zY8hRJ^zpu^>8W$7zuA#V)htY~$Rc0)
zsqURKDkP&^?w+gSDQ9dR%<|-(oqzSR7GZgD?}|eRpHKk&5?-`s`xP$IuN7^3AotH!
z$HQ?im->D`n6-eNAbo<G+a$L0#lfjy2%^zS6GFfyk%2bG{IQL3ME;36vb{mdlR7LJ
zRf#gT$Kukuur0*vq(TEnU%73qo{}{}iM?0rGJ*Zj68p|v{>ksX87-Wg>>>N0=o(yG
z!`suVd#RmD>~n+({ax3DZOw+noh2ihF1ics^l4P^AFjIhw#ORA`YF?{(w!V}MG)Qi
z6roekYST>;p;bu?xS#w!tCSSIWzI1JA5WSU4E!^!oYl$O95H#8ia$$h#Cq>r$I&}w
z>*n_e1TeT9eF?D(+b4QBKI;v6Ao#^jK5)IeYP$Yi5DP0io?Ew{s~MJ*&To4Y%i-If
zMS^abYhOI#!H|%xNw-tinyF$Y1W`yhhBm>@b$i5tWn+uVXYM7hhd3qcJJ%Y^m0WKd
zyj%ag0a!%RCX|Gu26Dh!JzK<36tOJA7s>jGH-ET7)V5|KKn~6`5$zL{5O%Y7CqJ!l
zhl3ZrWv2Rv+PZ(yweqbczX5{p-7ZC(nC=&a2GUv9+L4K`SiYq71T1~;u8n|fb6L!f
zJY0{6cr>|BRZpO52Qpz0N3Y&j8cU*zl~rI9Un%o2v2x>Xj!6*n4Yi5&wUQQQtj0=Y
zL?^Gsr5e!<s~D&1Q<NCdFFP`QHL@O785UDQ`ZYv3ta>yIuX<vN=xU`Asd6?&o-6YP
zPoxAc8-JYt*0A;J4rFiA)6E9K_Ie-!+%eAp7(&~^)qG4zOd0O^*|iZ3XlWU?2KzK=
z1UT3=BC8ae8Jt<LU6T%n`Xp%XIchZ{k<*&BBNft`)ewnr-3!H4wH&z-7)}gOgN{EH
zi<kB4ZzfDLu)#KKvGgqTn(pP5d7@ox65lapexwrr0SW{%!q9VIBBVlECE}=O_QmS=
z#j4=MhsU`Q5<GG#(GomL#p4<lK!JNTzSi(^KKZXtjHv2zU&qc~Tj2UsE*>ZCNmwu~
zdx;@CCOpfCe*%XN+}NcEoc$zAo)8|_@c$u8k>a&{tHf)<HT%SmShx`g_w>oXkxr8@
zN)p2O$OfB2&}}e&QlP3QDPLk74hp-C>#&UB<8e`qxJ{xEBg0T8&y)vSZoVhK(4*v;
zHZ4O8p*>Q>M!;R1BQNQ8#jTBBM8-WLM@?qf&I#|t8!WXDkS+RZX<jJKK+|8BPl+af
zTL~xLvN}hZV6nMYG|urjC-Yo%OinWZif0N=%s_yFBrq_~h|4Q;2gdPQrHjeZ>PXe_
z9uUG9NK4_2lth>j1kUOg5$M@q*b(5WG3ATbSlD6?aa^1fk8@0ZW)4myMt)z1qV7T&
zr!itmQ=Bd~_?1(Md{;>)IA_nUigK7CpOR#^tT2i$8B?~>B0W9{t|wUyv!CO&fg-$k
zrZ=LH#1p1BV~ZTJL$zYmBjhyiKtM8`@>j&yRNMJyU?kSvv|xN(tHk5T@EG!R|3-yS
zFwZeIC?9P4ulP`dm^GqhlLK!zM>!tL6|Y$S8?DsH0tTebYBVPmXh{eI>jFDH;ej|F
z?|dM&BZpxA5(*L9tRDt_slypO+F`K7jJRex^tqFkA~s)5igKG`9Ig~oHY!b(3O22u
zrR-%gL)QGcvg=cq*mJ|MA&mR5?|X}r0c!goo{<p3oaIR!C_Mhod1i(47><IfCVyt%
zAQ`;)=Ke4gNc_jYd27ZGMaol5pk>IEZF&+VI58~@KMZosht_dNzk(^>blv>f)5k+$
zde#T#b5}z2n86~L0M*OkVX|IE)OT6qX6Iq(X({~ZX{l}K;eoT6@A_^p_nkhxb|g9z
zf8+}z{bIXX0!Qc5uZ5+BjTfPZ+ia7rr>DIW$F6pAl0-5)qVvr^jvc$0!;T^5Z@$?z
znG(qMHM$CWj32VJA%kz<Sk#Od8`^J8#3-iW-F=c2*Jr^t3{cG*8*}KjkR=s<;eQ$E
zk*x~)J*k)s4$s&AqcU<R3e5!*LRCFs>RV+e5Fn3oB2**D0KmEt0D$_sBJi?fb#?Q$
zvvB>p6gV|}aa{R<`{LLA3?I~U^Ig>dN6Jru*{aqK2-}(`CS=;p^S0srk|DiXVaezj
z8@WNc_emU3s?mZGsRA5tA<DEqj|!gTITFRDzfpO>zd4K9?SRJMFn`5EtpM64Z2O!;
zuGSw`J9gIM!MtdnVq8`(!x~o{OZv5xv?}~pP8Xj#!tU2j=jD~O!aA4EQ)5<|2#<5Q
zH61G;Phha*%Ve#?sX=meK-6<h?%Bih=BHdXyK@6!*XCoi3X#WY_l~Ti@1;#TZ_$KC
zexgLGPS78wYtvsVwpDYWF8G{s$$u(>+|FCqOndmenQ8L0AGkp;WzjnNR^8RO;3@37
z04#ww7M?{3=PdK_SaA(^STKRrf^Ay)wla^>YgV|a*JO{C+1lDUPxf117*f)(rncc&
zBJC#kp{sp9Fj`9MgEH?LTKA$_XQ83b-ZHnnjCT_8iz#fj-_>dSNmpy{^Q2%Wq$9s1
zbP{L2T;;?<;^pZWQ?T38(!W>R%HJGcJ&;Z*7@?xdR^#~>a?lMCZ$c(l(eH_X!YSl$
z6OV*<1P?x-REfB?-`}cM4&+#D;w)S&{Os>u-=bSi8jelaXjcFDz(AM(ub;q{`E}k7
z3NAzsgCX(-Bt<Xz&_?|tP}R;SNYvW7E<9dbF$;sp)Ao}T?yW{%{uuW~zLn&TTmVy$
z!j1kU_?b?P6<JEZ@6u~JvB8GR?=T=|E0bOhWN7nNI&L@u&`Rwo^~srn6=rP^+tINp
zw^5&4X~tH+>~N-SUl5z016(l&0^{4-;mcJ0pu*oRO&qSJ1S?({)z^N5=-rkvU$Q0*
zCD!7sN`4U_x`hFAxq*&Y`BH<paYpTY6-+HkwQRK*(+W{r@EZ8dY7$9QEKwUBw0Sx0
zJh6hAFE^yZxF{eg2Wx4cUJ{jrdf4Cg;j!}ABmOv6I!tG6z@;WEPprRXl3fVJBTmyu
zn>PD3OeT>@ct;ak8G?xcIm_dms<S_96NX-6m&!D0@TuG6D5HKTu@_Xn4$&%$>)|!j
zGYKvh-nhJH=2HvWIsFd&e06_eDnE(4=;wO2el_|%PkF>mATal2ktE2hT23ZK+%LdG
zw^T#aGngz-+&2{U0duoqv&h~#3CGErJ_sXb7HMf|8-r!E93WB(5gDT$GCb5tZcHB=
zt;fADY}(w|yK}Esd^U=yw6rLnTWuva=>B}Xx3s)mVm0<X=W&uJkz$RCGs+Q4z+nGP
zzj^`OFJGx$PE@$Px566y^r=+4`Vb&Fl3rH-oYL+4&9Evd+I@}Aa_s~B4eQWYUF#|r
z)q{c@XI)n!*G#+@<fgKvZudVX9nLCm@nSV2m-P6j!UBRN6Z!O`QS+Nh0=(H|{jdMF
zvDzR2z_AKS|AL`<I8fyDQw)y`G+H?6miUIn)QLCLO5E1ixvy(PMoKO0<Cy*UdkyGa
z;pRt{?6I&Q(AaWtqXdtJockVfmm=GX#LHyKs|g{)%m0ubEOPLbY6BH79%;8o>|xc1
z9uDclStWq;J^!uo{%BAqj7kxaz#o~iCBs-tn*BR&Mz)DIy~qZUpf`1j;p{*4NPK3a
zu?U2*Yp$`CN6~kJX!Cd+;|5srZLPKPD|B>6XIk;^DJIz?@Ju@8ze-R;;#opv$t{rL
z`=ss$Q;etYBdDkUM8K%O-p7wW4v_SOB}T-G)5c54-E!zy&QG*ZVyPSq7yNct_K9D~
zWn6LlfYiDtwOrUxh;ZuHInjG6#}+gVOe(SGA7@tQ=v??RtQMM>AI!2`^TwcbFbloz
zT?BD&ySvk%Ji4Hs2`34}r+TCQzQ(5S(zi{CC$(W~_R?C$Z!%LIJju+b$SMRFIMf>L
z>E^c!^s~*(1j6_?ijUBXb`flErmSBZZrDD!f!{ixkE$LyWHAD9!x%r*U6N|HcKwux
znaeSluY%E>)&0;)QLEO$etfEL9DN4fwJ%JP+6h{Zb^`^MkT5`ER{W#waPm(IL(|5A
zEx_0rZ)d7fLCFZU@4=Gm3ZQ7~7hG(w+}M#Ed`X0@rQcEgiPzxEVd&c#rooH^*b76?
zJ#r|E9-al=oZKjGUIw%42UoK*iiE%~Oi^r6g9<o1<RB{z$@Y*y{Np;)A=W4cO`i2~
z3H+}B{*OF)Riuz(9C%oS6nG?Df8eY-Rp>CECK)Mxb7#A3|D1Sp7NE>M?a9b+EBJ&z
zuq0mszQoHGDhU1ZGkZn#HaCA-n!#0BE$<kP)oK@)ri`?J^nfe5#1sek3s(#=!ZixZ
z%FwH5NQ-EIE|guOO4ZC5={J89U1(4yV<O+qpGsi_8hS`Fzx_@`N`byHF!9yGx8syC
zqign8O-*?fZ!P@6=;L0mLON|@IhZr+RjBq7ZR+H_W7uoDAvaFGH1Kiy0d!V=^*|ip
zmx}!B@@B9Ad)&>L*@2mCouo~-w7$Ar5`x2`{Vs<;W!>xDL?I85rNWg3<FaBBV#wWr
zI3;Msfb7+Tyz&s#*fD98>g-J=l*D}4{X6(9@4*D)&(vyxs`jS9jAKZBhaDA)VX<qM
z9i@QK3IeU>ch`n%H5{IfpKV!BXcav9Bm(t59*JZ=`=%=b0e-x8RvOcU!Z~Pfs5LVN
zflkR3MbWDiy-dH}#A7w#?qXb~K_p@ruwhbCCg^s(4^XzcD%7OVLNd16MRcqh1B8Dp
zU46^<7J3wAYS*|>R05z<gkqZa<+O7F^*`k-=8&Xp`;U}x(eKtPpt?U^9}VqzeqSe}
zs}rmv@j?xrBg~oo-2K$Xnzcsua*1C|_Q-=1%tp?B6PfAs-OBV}cO!N{1p>iCid+7E
zI;mq<eA@oF<I2YI$HzRx?ba}?lU0D3Q+lzp`iZSUTKxyj5o5$m0t#tDAO#>H_q={=
z@bO^%DEI)nA%t`qH}uva+^^Aw0cB@GM412i_<HNt2>QBkTJpOYYQwBfcTRiP%j&Ky
z(hht=YEC>u032R|Z>G?n5y;)sEp3-*y6;_+YRo7FIlsQU6_cNv+x;&x3B=a8$oB<X
zqTY{)0bf_Wne_8Vq756F_69=IHS*x3bRL>j)zZI-AuRU6vAX?2MU<PtLjY!iDB?+_
zY}P>(&D<DRe|pvvve34MuHbikUVrVd#O|qCDHh1@V?<&rB0dMSxIoede`TAHihh3o
zUit7^ILgKps&c0Id~NOf;<@AF`HswwU?{46vM|ytF+0An998JLwLwYc*_*57O3FG$
z`H2dXzW7IMmeg%@M;J?C-(CIAp?~ebWT72j$JG_SO7?q7N_;nZ{2wO%{+F?@tIJ=|
z@M1EE*JZg1zx~h*p)EwewE^>FnKu!aM(xWWZmKerdOfvTdKZ1vQ*KsszZ{bv7F!*Q
z8_P4#1Mx$*8ogX3E>V&!YNpn1$cOBvulpW$n$78@E;M^auL>cbL}}RhSwXb?>@Tsm
zFtF<WKcqunAq)~+cz75LzYMM>B(~J5ZP~uxCsf~;Z^nA%n`q+eFYI;YbU)_z&P8N|
z`u)b@{>P*DEA?MBa#rlEQyHqwIb9^uF0~gEK0jENpNI&zN_w|~V$bextMM&i|G<)q
zl)w_a0_TBnkn=!gH6VJce_)vv{;6W_pf|j_SmMM@={lmmZ(D6G2!X#~E+0pG`gqpv
ztRWODLH%XO&{;|+FGiUZ{bn=ZETNTX6_*>Pg}28DlYLjRPcsLT+?y(YLNF@#y|A|4
za`M{(lKZ_=wqy$%SHw4gObRQKuTzVH_v?gjebKUtQdIbQgR@mS_mnoiMQ1(c+#cx~
zx$fSrOvR!mkwL;cpCQObMotS~Z|oO}83-9f{WIGc#^JWv^ltAl*3ZMbh6n43j#V0Y
z%6^YkCU>gO@Ypg+4>45Fju3riw8(F%ZCNS<x6VJ(!zB3LJ>H8?>Hgf=*6&cYA`=TQ
zJ<oIYJM4Zp*wMWNcx3jk%+14w%f8-#4H$b?$)rQcN3XAbLC62^{Q-}4HeqcT0KgR*
zXhZ^8Q6|#C@vEMN)EJhk(33(pL+1ISK9}jrW7^8j>5OE-FnTEp)7l#6_3T^j5QPG2
z$NI%0R=QCTkUBqzge_UcWZ#AU{3R!C{)0=6VQ*~xwtimV;q${;_7?jJrwBtBWpBXR
z()T6%c8mxZ$zCY%AwNDF()_V0lD!M#I#vn-<KO+9fq&{Lyy3rt*Pn{&3hUf0LKyw!
z<<Kc2P)+0~z7OS2dqCDey!9g4_pYgTXwE{z55e1X69c^;V~k^5`Fo$^Oq*aSu_aI*
z1RhQs5h<<*V9qU)4OrR+n8Sq5Qp0DjG#z@zD^@VCnn_xxobJNf-hOLB+HD*|tE2-%
zLQBt*QYZ93FX?rhce%QV8u1*=&a>>^vV4_ueKWXQ!t}M0DUD&koEv;otIiZu!4S6*
znjsN2U|wp8R(Oe~MLXnzn$BO3uKqP=)a^FD04DrIRnWHDKJBVumze%kuacXxI7jnc
zHBLRc9Jvf`w7Uab86t6oQAXgSp#eh)<iLXA9Q-9jpZS&3L02myzb{Da>ADzJmoN~R
zO=d7ZYY@(y8aEFi5UDOv!z_=L?n`ylweJX=23E)!(U}Qj?4sLG#~Hwt%FQ6sk-D-P
z`uTNa?0LPi8?@CCo0jMH3z_&VRb5>h#Dd%35H33x9&(szh8{O;wbaXBR<D*I??s|?
zl!X~f=L9U@S~c4h&Dp+D$E|D*APnwDnN2ahe{au3r01o~j^#BxkfY|v+F0TukG7L4
zJFy^l!@XO7-7Y&~)A4iUO+x(KKCL~o8DQ{4{{}!mtuLs_0=`6g@<ckvbCIIO&CDpo
z&g|}*7PxK+MW$bmZ|BCFo-y}_unvkM!+LF8pgzH$6ZPSJ4v3-Xjuj$17`w<BWG+g9
zujvX})&{pJ?(Rp#!=q;k^Nwjt7X~9%L^nea5qmc5t4OO3bgE}E6t#Jni(<IqEu^`y
z5q7sV<-N{PQ3Knf@INz6BwXHD)L+C;3$oS0oSxDu^^bi^gF({N7F(Bq#D7E%W-kKK
z0y}&@4CgGf5%#=<uiq|k_(UbhIEY4T?|NSRc&DdTPT-OdVmR#Uuvk+5yW*`ck`f|r
zsFkGfNuT>oAjJc#;z?bVAY({@l{icHGeaL^1-yD*KQm(z{bB3jOH~YV*_y6Y$BlCa
z^rX&)r9g-ynwg~lcH_bk(zGbwI~Gja7czWsimNAV@||6-5as0R9@a2Ri|F05nk3lH
zkc8}(Mo|1f#LOZN>gGe(-%X0j6B*-d+~aZgImdLFCh|;~gNlyZFP`nah+FV1oKi--
zxwJhV8|FdXML?VLK0<NFIl46E4j%<}3x5pIdyL366zffcf6N02GCKGJTxLgb-Ya9R
z^lq{7VHg4NnQbDu#op<V!mnOw{%T>Ekg9d()_U^N+AwLtr{66q7LF|XS~GASX9tJB
zCzeTM_<D=zdH7&Jc-hYieAm5eHJaeGDU*Ld^55f*1Ie1Z5C{Nhd>wnx|6w6K9U(4Y
zGe<}8Un1g*=9<F{8&0RpHVH@l%Dmi*0e4#ZHv()hw|gN91()~GkS%j9)k>jM1aE*s
z^A-ulLi@xG@uFN?BTnvmAOE|J+dqN>14mg*Gkx~}3H5Qq-9&qHAhP%SrOaq!l^}Iw
zJ#|dH_z+H~DVey#KA-1`w$1HlNb;4B0y5iCUYfcd{6~Uua+<nPPSbmZQ7QREw0@H0
zGyv8RL4Z8AVeKiLGNMI^>S2T=mlyF2#=THRaknM~B{_xsN(yJX+^+k_^zMd|K}tOy
z%F>$-#keGlQaDU4jFyrz$<6?!ycj0-iAR2=>jjUwD)OJgxZd<`re&Gx5Zytg^^p0X
zx(HTdP{&vLwO-5m(r-8|Bt$waWjgj$p-qS0=naaU`b@lhvkVLKS}#dwO*T_4wYkFX
zCH|NP@zCgsv>HwFw{kx)f+ykO^zB)jD0p2iNyVU)5%j@>TsFM=ORb2sRPf%cKY}+)
zqMIfKXK}s8O9(%Y$K8&5LtgGq7j_o1(MA15egF8otzN}~Nv<mRAf<L-TX+h&UWE^7
z{chbhf*ytTtKchq^}TX#{8_2&$4?_o<)`wcczoie`wgcF=e)LspF~UMzbe1DsiKQi
z7lUnZb~-Q>Z0MSIYZ=0Y`up8ZQOjm}`H3RZk%eE3n^aFy?P`Wa<Y{&&Yi)<d;WC{!
z66?%f(7g)JO+fHL!AF7JGgD3+LKr^msjKaXXt1|o{#~AYoxG&C>&kEoC(IY3L{Y!f
zPNM5YyxX7mydZ$ToL7C2c^E8Wi*_l70TA#?%VXN20U_y2<h#Um%3%Xl$4$T35jDR`
zSu5`|>M7Auo{(Zn(DqNs@pNUGNFA-s4V(&>rP#KHHDtul;vXE1?OgxiIyIphDU0!`
zKF{wlzfLqp`|&;e*HR5XHt#oX5|YHGiiCoBu!SCEmi6h#;9Q~7Lqf!4*(~O>#lr(N
zU<_n<hXcC8uFnM@z<{hBpm7ek&}=6Lpb>P~i4zzIeRkplLZH0PyuebZqq8t*!2)`2
zg$M0(<^oMv{!7Jh;Q|d>y{hC;LKh6^vNZ-&zy%4|1C4i402V{nT*QEnP*PVhU_aF4
zRoX#wUL`Da%T)?A#siJ#$A!M|z(U<!F`(*hgdjKmR}&#L!i@p+MetvxU4pRCaktR_
zJFB7VE<p6Z=LcRtRR9n`{wh_WmF{9dXXu$b8!!n<=D`DOhiZGs0282D9>Tyd=&A=l
zs744nCPV_|@?-#|$U?Vlu%WK6YOBqwN(627L?ixp=e%|h^Iz=O>mT^@zfJ^DR$IJ(
zrMe`Gf%2ciAOC>e%VPdbhUnju7$gAT@6m_<INCvDyxtN0FXj24au4XFmjEyYitNn;
z>Xe6W$m2qFy)lUYcXawsh3gN{5N|;s47A%@5JYPa4Y0?9HhW?~sXh^cbpJlm{0*1r
Yzqj!I7100X#RiT2#0aPE^w;+P0QPv@-2eap

-- 
Gitee


From 54b16a1e0f01ef660f27c66423a6b5e34294d33e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 24 Apr 2024 17:07:32 +0800
Subject: [PATCH 059/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91slot=E5=92=8Cderivative?=
 =?UTF-8?q?=E7=A7=BB=E8=87=B3=E4=B8=8A=E5=B1=82base?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/dlrm/model/gradient_descent_w.py     |  8 --------
 mx_rec/optimizers/adagrad.py                  |  8 --------
 mx_rec/optimizers/base.py                     | 10 ++++++++++
 mx_rec/optimizers/ftrl.py                     |  8 --------
 mx_rec/optimizers/gradient_descent.py         |  8 --------
 mx_rec/optimizers/gradient_descent_by_addr.py |  8 --------
 mx_rec/optimizers/lazy_adam.py                |  8 --------
 mx_rec/optimizers/lazy_adam_by_addr.py        |  8 --------
 8 files changed, 10 insertions(+), 56 deletions(-)

diff --git a/examples/dlrm/model/gradient_descent_w.py b/examples/dlrm/model/gradient_descent_w.py
index 6c34b726..a2a5635a 100644
--- a/examples/dlrm/model/gradient_descent_w.py
+++ b/examples/dlrm/model/gradient_descent_w.py
@@ -50,14 +50,6 @@ class CustomizedGradientDescentWithWeighDecay(gradient_descent.GradientDescentOp
         self._slot_num = 0
         self._derivative = 1
 
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         logger.info("no slot for gradient descent")
         return []
diff --git a/mx_rec/optimizers/adagrad.py b/mx_rec/optimizers/adagrad.py
index fe8a0a2d..125346b9 100644
--- a/mx_rec/optimizers/adagrad.py
+++ b/mx_rec/optimizers/adagrad.py
@@ -80,14 +80,6 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
         self._slot_num = 1
         self._derivative = 2
 
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
         def creat_one_single_slot(var, op_name):
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index 49594d40..ed765539 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -79,6 +79,16 @@ class CustomizedOptimizer:
     def __init__(self):
         self.unique_name = ""
         self.base_name = ""
+        self._slot_num = 0  # 优化器对应slot的个数
+        self._derivative = 1  # 优化器阶数，如果不做全局去重可以数学等价，则为1阶，其余2阶
+
+    @property
+    def slot_num(self):
+        return self._slot_num
+
+    @property
+    def derivative(self):
+        return self._derivative
 
     @staticmethod
     def sum_same_id_gradients(grad, var, is_expansion):
diff --git a/mx_rec/optimizers/ftrl.py b/mx_rec/optimizers/ftrl.py
index 855fa9c4..ef617c2d 100644
--- a/mx_rec/optimizers/ftrl.py
+++ b/mx_rec/optimizers/ftrl.py
@@ -82,14 +82,6 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
         self._slot_num = 2
         self._derivative = 2
 
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         val = constant_op.constant(
             self._initial_accumulator_value, dtype=var.dtype, shape=var.get_shape())
diff --git a/mx_rec/optimizers/gradient_descent.py b/mx_rec/optimizers/gradient_descent.py
index 2ba72789..d021f69f 100644
--- a/mx_rec/optimizers/gradient_descent.py
+++ b/mx_rec/optimizers/gradient_descent.py
@@ -57,14 +57,6 @@ class CustomizedGradientDescent(gradient_descent.GradientDescentOptimizer, Custo
         self._slot_num = 0
         self._derivative = 1
 
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         return []
 
diff --git a/mx_rec/optimizers/gradient_descent_by_addr.py b/mx_rec/optimizers/gradient_descent_by_addr.py
index 11a9fda6..9db7c2ae 100644
--- a/mx_rec/optimizers/gradient_descent_by_addr.py
+++ b/mx_rec/optimizers/gradient_descent_by_addr.py
@@ -62,14 +62,6 @@ class CustomizedGradientDescentByAddr(gradient_descent.GradientDescentOptimizer,
         self._slot_num = 0
         self._derivative = 1
 
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         return []
 
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 6ac7e844..1f491d14 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -74,14 +74,6 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         self._slot_num = 2
         self._derivative = 2
 
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
-
     def initialize_slots(self, var, table_instance):
         # Create slots for the first and second moments.
         def creat_one_single_slot(var, op_name):
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index cd4ee878..f1f8a2df 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -75,14 +75,6 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         self._slot_num = 2
         self._derivative = 2
 
-    @property
-    def slot_num(self):
-        return self._slot_num
-
-    @property
-    def derivative(self):
-        return self._derivative
-
     def get_slot_init_values(self):
         # return state value list of adam that needs to initialize in ASC DDR.
         initial_momentum_value = 0.0
-- 
Gitee


From b40e8d32057f45840e66bb361691e5420d6a2785 Mon Sep 17 00:00:00 2001
From: rome_zhouyang <9538256+rome_sky@user.noreply.gitee.com>
Date: Wed, 24 Apr 2024 20:35:53 +0800
Subject: [PATCH 060/302] add FasterKV

---
 src/core/utils/MapperFast.cpp | 262 ++++++++++++++++++++++++++++++++++
 src/core/utils/MapperFast.h   |  97 +++++++++++++
 2 files changed, 359 insertions(+)
 create mode 100644 src/core/utils/MapperFast.cpp
 create mode 100644 src/core/utils/MapperFast.h

diff --git a/src/core/utils/MapperFast.cpp b/src/core/utils/MapperFast.cpp
new file mode 100644
index 00000000..3ed25102
--- /dev/null
+++ b/src/core/utils/MapperFast.cpp
@@ -0,0 +1,262 @@
+//
+// Created by z00576261 on 2024/4/15.
+//
+
+#include "MapperFast.h"
+#include <cmath>
+#include <cstring>
+#include <pthread.h>
+#include <iostream>
+
+RecMapper::BuckStatus RecMapper::InnerBuck::Insert(uint64_t key, uint64_t& value, std::function<bool()> ValueSet)
+{
+    for (int i = 0; i < BUCKCAPACITY; ++i){
+        uint64_t old_key = 0;
+        if (keys_[i].load(std::memory_order_relaxed) == 0 && keys_[i].compare_exchange_strong(old_key, key)){
+            bool ret = ValueSet();
+            if (!ret){
+                keys_[i].store(0);
+                return BuckStatus::BUCK_ERROR;
+            }
+            values_[i] = value;
+            return BuckStatus::BUCK_NOEXIST;
+        }
+    }
+    return BuckStatus::BUCK_ERROR;
+}
+
+RecMapper::BuckStatus RecMapper::InnerBuck::Find(uint64_t key, uint64_t& value)
+{
+    for (int i = 0; i < BUCKCAPACITY; ++i){
+        if (keys_[i].load(std::memory_order_relaxed) == key){
+            value = values_[i];
+            return BuckStatus::BUCK_EXIST;
+        }
+    }
+    return BuckStatus::BUCK_NOEXIST;
+}
+
+RecMapper::BuckStatus RecMapper::InnerBuck::Remove(uint64_t key)
+{
+    for (int i = 0; i < BUCKCAPACITY; ++i) {
+        uint64_t oldkey = key;
+        if (keys_[i].load(std::memory_order_relaxed) == key){
+            if (keys_[i].compare_exchange_strong(oldkey, 0)){
+                values_[i] = 0;
+                return BuckStatus::BUCK_EXIST;
+            }
+        }
+    }
+    return BUCK_ERROR;
+}
+
+bool RecMapper::MapperFast::InitializeBuck()
+{
+    uint16_t i = 0;
+
+    while(i <= prime_max){
+        if (pow(2, i) < reserve_){
+            i++;
+            continue;
+        }
+        break;
+    }
+    buck_count_ = i < 7 ? 128 : pow(2, i);
+
+    for(auto &buck_map : buck_maps_){
+        InnerBuck* buck_map_temp = new (std::nothrow) InnerBuck[buck_count_];
+        if (buck_map_temp == nullptr) {
+            FreeBuckMaps();
+            return false;
+        }
+        memset(buck_map_temp, 0, sizeof(InnerBuck) * buck_count_);
+        buck_map = buck_map_temp;
+    }
+    return true;
+}
+
+void RecMapper::MapperFast::UnInitializeBuck()
+{
+    FreeBuckExpend();
+    FreeBuckMaps();
+}
+
+void RecMapper::MapperFast::FreeBuckMaps()
+{
+    for (auto &buck_map : buck_maps_){
+        if (buck_map != nullptr){
+            delete[] buck_map;
+            buck_map = nullptr;
+        }
+    }
+}
+
+void RecMapper::MapperFast::FreeBuckExpend()
+{
+    for (auto &buck_map : buck_maps_ ){
+        if (buck_map == nullptr){
+            continue;
+        }
+        for (uint32_t i = 0; i < buck_count_; ++i){
+            InnerBuck* buck_attch = buck_map[i].next_;
+            while (buck_attch != nullptr){
+                InnerBuck* buck_attch_temp = buck_attch->next_;
+                delete buck_attch;
+                buck_attch = buck_attch_temp;
+            }
+        }
+    }
+}
+
+RecMapper::MapperStatus RecMapper::MapperFast::Put(uint64_t key, uint64_t& value)
+{
+    if (size_.load() > capacity_){
+        return MapperStatus::MAPPER_ERROR;
+    }
+
+    if(key == 0){
+        if (spec_buck != nullptr) {
+            spec_buck->spin.lock();
+            spec_buck->Find(key, value);
+            spec_buck->spin.unlock();
+            return MapperStatus::MAPPER_OK;
+        }
+        spec_buck =  new (std::nothrow) InnerBuck;
+        memset(spec_buck, 0, sizeof(InnerBuck));
+        spec_buck->spin.lock();
+        spec_buck->keys_[0].store(key);
+        spec_buck->values_[0] = offset_.fetch_add(1) + 1;
+        size_.fetch_add(1);
+        spec_buck->spin.unlock();
+        return MapperStatus::MAPPER_OK;
+    }
+    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
+    //first，find key if exist in buck
+    while(buck != nullptr){
+        buck->spin.lock();
+        if(buck->Find(key, value) == BuckStatus::BUCK_EXIST){
+            buck->spin.unlock();
+            return MapperStatus::MAPPER_OK;
+        }
+        buck->spin.unlock();
+        if(buck->next_ != nullptr){
+            buck = buck->next_;
+        } else{
+            break;
+        }
+    }
+
+    //if not find,
+    for (int i = 0; i < 8192; ++i){
+        // insert exist buck
+        while(buck != nullptr){
+            buck->spin.lock();
+            auto value_func = [&]() ->bool {
+                value = offset_.fetch_add(1);
+                return true;};
+            BuckStatus ret = buck->Insert(key, value, value_func);
+
+            buck->spin.unlock();
+            if (ret == BuckStatus::BUCK_ERROR) {
+                return MapperStatus::MAPPER_ERROR;
+            } else if (ret == BuckStatus::BUCK_NOEXIST) {
+                size_.fetch_add(1);
+                return MapperStatus::MAPPER_OK;
+            }
+            if (buck->next_ != nullptr) {
+                buck = buck->next_;
+            } else {
+                break;
+            }
+        }
+
+        //insert not exist buck
+        auto& old_spin = buck->spin;
+        old_spin.lock();
+        if (buck->next_ != nullptr) {
+            buck = buck->next_;
+            old_spin.unlock();
+            continue;
+        }
+
+        InnerBuck* new_buck =  new (std::nothrow) InnerBuck;
+        memset(new_buck, 0, sizeof(InnerBuck));
+        buck->next_ = new_buck;
+        buck = new_buck;
+        old_spin.unlock();
+    }
+    return MapperStatus::MAPPER_ERROR;
+}
+
+RecMapper::MapperStatus RecMapper::MapperFast::Find(uint64_t key, uint64_t& value) {
+    if(key == 0) {
+        if (spec_buck != nullptr) {
+            spec_buck->spin.lock();
+            value = spec_buck->values_[0];
+            spec_buck->spin.unlock();
+            return MapperStatus::MAPPER_OK;
+        }
+        return MapperStatus::MAPPER_INVALID;
+    }
+    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
+    if (buck == nullptr) {
+        return MapperStatus::MAPPER_ERROR;
+    }
+    if (buck->Find(key,value) == BuckStatus::BUCK_NOEXIST) {
+        return  MapperStatus::MAPPER_INVALID;
+    }
+    return MapperStatus::MAPPER_OK;
+}
+
+RecMapper::MapperStatus RecMapper::MapperFast::Remove(uint64_t key)
+{
+    if(key == 0) {
+        if (spec_buck != nullptr) {
+            delete spec_buck;
+            spec_buck = nullptr;
+            size_.fetch_sub(1);
+            return MapperStatus::MAPPER_OK;
+        }
+        return MapperStatus::MAPPER_INVALID;
+    }
+    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
+    while(buck != nullptr) {
+        uint64_t value;
+        if (buck->Find(key, value) == BuckStatus::BUCK_NOEXIST) {
+            return MapperStatus::MAPPER_INVALID;
+        }
+
+        buck->spin.lock();
+        if (buck->Remove(key) == BuckStatus::BUCK_EXIST){
+            size_.fetch_sub(1);
+            return MapperStatus::MAPPER_OK;
+        }
+        buck = buck->next_;
+    }
+    return MapperStatus::MAPPER_INVALID;
+}
+
+RecMapper::MapperStatus RecMapper::MapperFast::ToVector(std::vector<std::pair<uint64_t, uint64_t>>& vec)
+{
+    if (spec_buck != nullptr) {
+        vec.push_back(std::make_pair<uint64_t, uint64_t>(spec_buck->keys_[0], spec_buck->values_[0]));
+    }
+    for (auto& sub_map : buck_maps_){
+        if (sub_map == nullptr){
+            continue;
+        }
+        for(int i = 0; i < buck_count_; ++i){
+            InnerBuck* buck = &sub_map[i];
+            while(buck) {
+                for (int j = 0; j < BUCKCAPACITY; ++j){
+                    if (buck->keys_[j] == 0) {
+                        continue;
+                    }
+                    vec.push_back(std::make_pair<uint64_t, uint64_t>(buck->keys_[j], buck->values_[j]));
+                }
+                buck = buck->next_;
+            }
+        }
+    }
+    return MapperStatus::MAPPER_OK;
+}
\ No newline at end of file
diff --git a/src/core/utils/MapperFast.h b/src/core/utils/MapperFast.h
new file mode 100644
index 00000000..0ad73d5b
--- /dev/null
+++ b/src/core/utils/MapperFast.h
@@ -0,0 +1,97 @@
+//
+// Created by z00576261 on 2024/4/15.
+//
+
+#ifndef FAST_MAPPERFAST_H
+#define FAST_MAPPERFAST_H
+
+#include <atomic>
+#include <mutex>
+#include <algorithm>
+#include <functional>
+#include <string.h>
+
+namespace RecMapper {
+    constexpr int BUCKCAPACITY = 3;
+    enum BuckStatus{
+        BUCK_EXIST,
+        BUCK_NOEXIST,
+        BUCK_ERROR
+    };
+
+    enum MapperStatus{
+        MAPPER_ERROR,
+        MAPPER_INVALID,
+        MAPPER_OK
+    };
+
+    class SpinLock {
+    public:
+        SpinLock() = default;
+        SpinLock(const SpinLock&) = delete;
+        SpinLock& operator=(const SpinLock) = delete;
+
+        void lock() {
+            while(f.test_and_set(std::memory_order_acquire));
+        }
+
+        void unlock() {
+            f.clear(std::memory_order_release);
+        }
+
+    private:
+        std::atomic_flag f;
+    };
+
+    struct InnerBuck{
+        std::atomic<uint64_t> keys_[BUCKCAPACITY]{};
+        int64_t  values_[BUCKCAPACITY]{};
+        InnerBuck* next_ = nullptr;
+        SpinLock spin;
+
+        BuckStatus Insert(uint64_t, uint64_t&, std::function<bool()>);
+        BuckStatus Find(uint64_t, uint64_t&);
+        BuckStatus Remove(uint64_t);
+
+    };
+
+    class MapperFast {
+    public:
+        MapperFast(uint64_t cap, uint64_t res) : capacity_(cap), reserve_(res) {};
+
+        ~MapperFast() = default;
+
+        bool InitializeBuck();
+        void UnInitializeBuck();
+
+        MapperStatus Put(uint64_t key, uint64_t& value);
+
+        MapperStatus Find(uint64_t key, uint64_t& value);
+
+        MapperStatus Remove(uint64_t key);
+
+        MapperStatus ToVector(std::vector<std::pair<uint64_t, uint64_t>>& vec);
+
+        uint64_t Size() {
+            return size_.load();
+        }
+
+    private:
+        void FreeBuckMaps();
+        void FreeBuckExpend();
+
+        std::atomic<uint64_t> size_{ 0 };
+        std::atomic<uint64_t> offset_{ 0 };
+        uint64_t capacity_;
+        uint64_t reserve_;
+        uint32_t buck_count_;
+
+        static constexpr uint32_t sub_map_count = 5;
+        static constexpr uint32_t prime_max = 32;
+
+        InnerBuck* buck_maps_[sub_map_count] {};
+        InnerBuck* spec_buck = nullptr;
+    };
+}
+
+#endif //FAST_MAPPERFAST_H
-- 
Gitee


From b2125d0db79021a5f1142c161fdcea90395a7cfd Mon Sep 17 00:00:00 2001
From: rome_zhouyang <9538256+rome_sky@user.noreply.gitee.com>
Date: Wed, 24 Apr 2024 20:43:14 +0800
Subject: [PATCH 061/302] add FasterKV fix

---
 src/core/utils/mapper_fast.cpp | 262 +++++++++++++++++++++++++++++++++
 src/core/utils/mapper_fast.h   |  97 ++++++++++++
 2 files changed, 359 insertions(+)
 create mode 100644 src/core/utils/mapper_fast.cpp
 create mode 100644 src/core/utils/mapper_fast.h

diff --git a/src/core/utils/mapper_fast.cpp b/src/core/utils/mapper_fast.cpp
new file mode 100644
index 00000000..3ed25102
--- /dev/null
+++ b/src/core/utils/mapper_fast.cpp
@@ -0,0 +1,262 @@
+//
+// Created by z00576261 on 2024/4/15.
+//
+
+#include "MapperFast.h"
+#include <cmath>
+#include <cstring>
+#include <pthread.h>
+#include <iostream>
+
+RecMapper::BuckStatus RecMapper::InnerBuck::Insert(uint64_t key, uint64_t& value, std::function<bool()> ValueSet)
+{
+    for (int i = 0; i < BUCKCAPACITY; ++i){
+        uint64_t old_key = 0;
+        if (keys_[i].load(std::memory_order_relaxed) == 0 && keys_[i].compare_exchange_strong(old_key, key)){
+            bool ret = ValueSet();
+            if (!ret){
+                keys_[i].store(0);
+                return BuckStatus::BUCK_ERROR;
+            }
+            values_[i] = value;
+            return BuckStatus::BUCK_NOEXIST;
+        }
+    }
+    return BuckStatus::BUCK_ERROR;
+}
+
+RecMapper::BuckStatus RecMapper::InnerBuck::Find(uint64_t key, uint64_t& value)
+{
+    for (int i = 0; i < BUCKCAPACITY; ++i){
+        if (keys_[i].load(std::memory_order_relaxed) == key){
+            value = values_[i];
+            return BuckStatus::BUCK_EXIST;
+        }
+    }
+    return BuckStatus::BUCK_NOEXIST;
+}
+
+RecMapper::BuckStatus RecMapper::InnerBuck::Remove(uint64_t key)
+{
+    for (int i = 0; i < BUCKCAPACITY; ++i) {
+        uint64_t oldkey = key;
+        if (keys_[i].load(std::memory_order_relaxed) == key){
+            if (keys_[i].compare_exchange_strong(oldkey, 0)){
+                values_[i] = 0;
+                return BuckStatus::BUCK_EXIST;
+            }
+        }
+    }
+    return BUCK_ERROR;
+}
+
+bool RecMapper::MapperFast::InitializeBuck()
+{
+    uint16_t i = 0;
+
+    while(i <= prime_max){
+        if (pow(2, i) < reserve_){
+            i++;
+            continue;
+        }
+        break;
+    }
+    buck_count_ = i < 7 ? 128 : pow(2, i);
+
+    for(auto &buck_map : buck_maps_){
+        InnerBuck* buck_map_temp = new (std::nothrow) InnerBuck[buck_count_];
+        if (buck_map_temp == nullptr) {
+            FreeBuckMaps();
+            return false;
+        }
+        memset(buck_map_temp, 0, sizeof(InnerBuck) * buck_count_);
+        buck_map = buck_map_temp;
+    }
+    return true;
+}
+
+void RecMapper::MapperFast::UnInitializeBuck()
+{
+    FreeBuckExpend();
+    FreeBuckMaps();
+}
+
+void RecMapper::MapperFast::FreeBuckMaps()
+{
+    for (auto &buck_map : buck_maps_){
+        if (buck_map != nullptr){
+            delete[] buck_map;
+            buck_map = nullptr;
+        }
+    }
+}
+
+void RecMapper::MapperFast::FreeBuckExpend()
+{
+    for (auto &buck_map : buck_maps_ ){
+        if (buck_map == nullptr){
+            continue;
+        }
+        for (uint32_t i = 0; i < buck_count_; ++i){
+            InnerBuck* buck_attch = buck_map[i].next_;
+            while (buck_attch != nullptr){
+                InnerBuck* buck_attch_temp = buck_attch->next_;
+                delete buck_attch;
+                buck_attch = buck_attch_temp;
+            }
+        }
+    }
+}
+
+RecMapper::MapperStatus RecMapper::MapperFast::Put(uint64_t key, uint64_t& value)
+{
+    if (size_.load() > capacity_){
+        return MapperStatus::MAPPER_ERROR;
+    }
+
+    if(key == 0){
+        if (spec_buck != nullptr) {
+            spec_buck->spin.lock();
+            spec_buck->Find(key, value);
+            spec_buck->spin.unlock();
+            return MapperStatus::MAPPER_OK;
+        }
+        spec_buck =  new (std::nothrow) InnerBuck;
+        memset(spec_buck, 0, sizeof(InnerBuck));
+        spec_buck->spin.lock();
+        spec_buck->keys_[0].store(key);
+        spec_buck->values_[0] = offset_.fetch_add(1) + 1;
+        size_.fetch_add(1);
+        spec_buck->spin.unlock();
+        return MapperStatus::MAPPER_OK;
+    }
+    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
+    //first，find key if exist in buck
+    while(buck != nullptr){
+        buck->spin.lock();
+        if(buck->Find(key, value) == BuckStatus::BUCK_EXIST){
+            buck->spin.unlock();
+            return MapperStatus::MAPPER_OK;
+        }
+        buck->spin.unlock();
+        if(buck->next_ != nullptr){
+            buck = buck->next_;
+        } else{
+            break;
+        }
+    }
+
+    //if not find,
+    for (int i = 0; i < 8192; ++i){
+        // insert exist buck
+        while(buck != nullptr){
+            buck->spin.lock();
+            auto value_func = [&]() ->bool {
+                value = offset_.fetch_add(1);
+                return true;};
+            BuckStatus ret = buck->Insert(key, value, value_func);
+
+            buck->spin.unlock();
+            if (ret == BuckStatus::BUCK_ERROR) {
+                return MapperStatus::MAPPER_ERROR;
+            } else if (ret == BuckStatus::BUCK_NOEXIST) {
+                size_.fetch_add(1);
+                return MapperStatus::MAPPER_OK;
+            }
+            if (buck->next_ != nullptr) {
+                buck = buck->next_;
+            } else {
+                break;
+            }
+        }
+
+        //insert not exist buck
+        auto& old_spin = buck->spin;
+        old_spin.lock();
+        if (buck->next_ != nullptr) {
+            buck = buck->next_;
+            old_spin.unlock();
+            continue;
+        }
+
+        InnerBuck* new_buck =  new (std::nothrow) InnerBuck;
+        memset(new_buck, 0, sizeof(InnerBuck));
+        buck->next_ = new_buck;
+        buck = new_buck;
+        old_spin.unlock();
+    }
+    return MapperStatus::MAPPER_ERROR;
+}
+
+RecMapper::MapperStatus RecMapper::MapperFast::Find(uint64_t key, uint64_t& value) {
+    if(key == 0) {
+        if (spec_buck != nullptr) {
+            spec_buck->spin.lock();
+            value = spec_buck->values_[0];
+            spec_buck->spin.unlock();
+            return MapperStatus::MAPPER_OK;
+        }
+        return MapperStatus::MAPPER_INVALID;
+    }
+    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
+    if (buck == nullptr) {
+        return MapperStatus::MAPPER_ERROR;
+    }
+    if (buck->Find(key,value) == BuckStatus::BUCK_NOEXIST) {
+        return  MapperStatus::MAPPER_INVALID;
+    }
+    return MapperStatus::MAPPER_OK;
+}
+
+RecMapper::MapperStatus RecMapper::MapperFast::Remove(uint64_t key)
+{
+    if(key == 0) {
+        if (spec_buck != nullptr) {
+            delete spec_buck;
+            spec_buck = nullptr;
+            size_.fetch_sub(1);
+            return MapperStatus::MAPPER_OK;
+        }
+        return MapperStatus::MAPPER_INVALID;
+    }
+    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
+    while(buck != nullptr) {
+        uint64_t value;
+        if (buck->Find(key, value) == BuckStatus::BUCK_NOEXIST) {
+            return MapperStatus::MAPPER_INVALID;
+        }
+
+        buck->spin.lock();
+        if (buck->Remove(key) == BuckStatus::BUCK_EXIST){
+            size_.fetch_sub(1);
+            return MapperStatus::MAPPER_OK;
+        }
+        buck = buck->next_;
+    }
+    return MapperStatus::MAPPER_INVALID;
+}
+
+RecMapper::MapperStatus RecMapper::MapperFast::ToVector(std::vector<std::pair<uint64_t, uint64_t>>& vec)
+{
+    if (spec_buck != nullptr) {
+        vec.push_back(std::make_pair<uint64_t, uint64_t>(spec_buck->keys_[0], spec_buck->values_[0]));
+    }
+    for (auto& sub_map : buck_maps_){
+        if (sub_map == nullptr){
+            continue;
+        }
+        for(int i = 0; i < buck_count_; ++i){
+            InnerBuck* buck = &sub_map[i];
+            while(buck) {
+                for (int j = 0; j < BUCKCAPACITY; ++j){
+                    if (buck->keys_[j] == 0) {
+                        continue;
+                    }
+                    vec.push_back(std::make_pair<uint64_t, uint64_t>(buck->keys_[j], buck->values_[j]));
+                }
+                buck = buck->next_;
+            }
+        }
+    }
+    return MapperStatus::MAPPER_OK;
+}
\ No newline at end of file
diff --git a/src/core/utils/mapper_fast.h b/src/core/utils/mapper_fast.h
new file mode 100644
index 00000000..0ad73d5b
--- /dev/null
+++ b/src/core/utils/mapper_fast.h
@@ -0,0 +1,97 @@
+//
+// Created by z00576261 on 2024/4/15.
+//
+
+#ifndef FAST_MAPPERFAST_H
+#define FAST_MAPPERFAST_H
+
+#include <atomic>
+#include <mutex>
+#include <algorithm>
+#include <functional>
+#include <string.h>
+
+namespace RecMapper {
+    constexpr int BUCKCAPACITY = 3;
+    enum BuckStatus{
+        BUCK_EXIST,
+        BUCK_NOEXIST,
+        BUCK_ERROR
+    };
+
+    enum MapperStatus{
+        MAPPER_ERROR,
+        MAPPER_INVALID,
+        MAPPER_OK
+    };
+
+    class SpinLock {
+    public:
+        SpinLock() = default;
+        SpinLock(const SpinLock&) = delete;
+        SpinLock& operator=(const SpinLock) = delete;
+
+        void lock() {
+            while(f.test_and_set(std::memory_order_acquire));
+        }
+
+        void unlock() {
+            f.clear(std::memory_order_release);
+        }
+
+    private:
+        std::atomic_flag f;
+    };
+
+    struct InnerBuck{
+        std::atomic<uint64_t> keys_[BUCKCAPACITY]{};
+        int64_t  values_[BUCKCAPACITY]{};
+        InnerBuck* next_ = nullptr;
+        SpinLock spin;
+
+        BuckStatus Insert(uint64_t, uint64_t&, std::function<bool()>);
+        BuckStatus Find(uint64_t, uint64_t&);
+        BuckStatus Remove(uint64_t);
+
+    };
+
+    class MapperFast {
+    public:
+        MapperFast(uint64_t cap, uint64_t res) : capacity_(cap), reserve_(res) {};
+
+        ~MapperFast() = default;
+
+        bool InitializeBuck();
+        void UnInitializeBuck();
+
+        MapperStatus Put(uint64_t key, uint64_t& value);
+
+        MapperStatus Find(uint64_t key, uint64_t& value);
+
+        MapperStatus Remove(uint64_t key);
+
+        MapperStatus ToVector(std::vector<std::pair<uint64_t, uint64_t>>& vec);
+
+        uint64_t Size() {
+            return size_.load();
+        }
+
+    private:
+        void FreeBuckMaps();
+        void FreeBuckExpend();
+
+        std::atomic<uint64_t> size_{ 0 };
+        std::atomic<uint64_t> offset_{ 0 };
+        uint64_t capacity_;
+        uint64_t reserve_;
+        uint32_t buck_count_;
+
+        static constexpr uint32_t sub_map_count = 5;
+        static constexpr uint32_t prime_max = 32;
+
+        InnerBuck* buck_maps_[sub_map_count] {};
+        InnerBuck* spec_buck = nullptr;
+    };
+}
+
+#endif //FAST_MAPPERFAST_H
-- 
Gitee


From 54212c205f72f1347a9d9fc53ead982cec6217b4 Mon Sep 17 00:00:00 2001
From: rome_zhouyang <9538256+rome_sky@user.noreply.gitee.com>
Date: Wed, 24 Apr 2024 20:46:17 +0800
Subject: [PATCH 062/302] add FasterKV fix1

---
 src/core/utils/MapperFast.cpp  | 262 ---------------------------------
 src/core/utils/MapperFast.h    |  97 ------------
 src/core/utils/mapper_fast.cpp |   2 +-
 3 files changed, 1 insertion(+), 360 deletions(-)
 delete mode 100644 src/core/utils/MapperFast.cpp
 delete mode 100644 src/core/utils/MapperFast.h

diff --git a/src/core/utils/MapperFast.cpp b/src/core/utils/MapperFast.cpp
deleted file mode 100644
index 3ed25102..00000000
--- a/src/core/utils/MapperFast.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-//
-// Created by z00576261 on 2024/4/15.
-//
-
-#include "MapperFast.h"
-#include <cmath>
-#include <cstring>
-#include <pthread.h>
-#include <iostream>
-
-RecMapper::BuckStatus RecMapper::InnerBuck::Insert(uint64_t key, uint64_t& value, std::function<bool()> ValueSet)
-{
-    for (int i = 0; i < BUCKCAPACITY; ++i){
-        uint64_t old_key = 0;
-        if (keys_[i].load(std::memory_order_relaxed) == 0 && keys_[i].compare_exchange_strong(old_key, key)){
-            bool ret = ValueSet();
-            if (!ret){
-                keys_[i].store(0);
-                return BuckStatus::BUCK_ERROR;
-            }
-            values_[i] = value;
-            return BuckStatus::BUCK_NOEXIST;
-        }
-    }
-    return BuckStatus::BUCK_ERROR;
-}
-
-RecMapper::BuckStatus RecMapper::InnerBuck::Find(uint64_t key, uint64_t& value)
-{
-    for (int i = 0; i < BUCKCAPACITY; ++i){
-        if (keys_[i].load(std::memory_order_relaxed) == key){
-            value = values_[i];
-            return BuckStatus::BUCK_EXIST;
-        }
-    }
-    return BuckStatus::BUCK_NOEXIST;
-}
-
-RecMapper::BuckStatus RecMapper::InnerBuck::Remove(uint64_t key)
-{
-    for (int i = 0; i < BUCKCAPACITY; ++i) {
-        uint64_t oldkey = key;
-        if (keys_[i].load(std::memory_order_relaxed) == key){
-            if (keys_[i].compare_exchange_strong(oldkey, 0)){
-                values_[i] = 0;
-                return BuckStatus::BUCK_EXIST;
-            }
-        }
-    }
-    return BUCK_ERROR;
-}
-
-bool RecMapper::MapperFast::InitializeBuck()
-{
-    uint16_t i = 0;
-
-    while(i <= prime_max){
-        if (pow(2, i) < reserve_){
-            i++;
-            continue;
-        }
-        break;
-    }
-    buck_count_ = i < 7 ? 128 : pow(2, i);
-
-    for(auto &buck_map : buck_maps_){
-        InnerBuck* buck_map_temp = new (std::nothrow) InnerBuck[buck_count_];
-        if (buck_map_temp == nullptr) {
-            FreeBuckMaps();
-            return false;
-        }
-        memset(buck_map_temp, 0, sizeof(InnerBuck) * buck_count_);
-        buck_map = buck_map_temp;
-    }
-    return true;
-}
-
-void RecMapper::MapperFast::UnInitializeBuck()
-{
-    FreeBuckExpend();
-    FreeBuckMaps();
-}
-
-void RecMapper::MapperFast::FreeBuckMaps()
-{
-    for (auto &buck_map : buck_maps_){
-        if (buck_map != nullptr){
-            delete[] buck_map;
-            buck_map = nullptr;
-        }
-    }
-}
-
-void RecMapper::MapperFast::FreeBuckExpend()
-{
-    for (auto &buck_map : buck_maps_ ){
-        if (buck_map == nullptr){
-            continue;
-        }
-        for (uint32_t i = 0; i < buck_count_; ++i){
-            InnerBuck* buck_attch = buck_map[i].next_;
-            while (buck_attch != nullptr){
-                InnerBuck* buck_attch_temp = buck_attch->next_;
-                delete buck_attch;
-                buck_attch = buck_attch_temp;
-            }
-        }
-    }
-}
-
-RecMapper::MapperStatus RecMapper::MapperFast::Put(uint64_t key, uint64_t& value)
-{
-    if (size_.load() > capacity_){
-        return MapperStatus::MAPPER_ERROR;
-    }
-
-    if(key == 0){
-        if (spec_buck != nullptr) {
-            spec_buck->spin.lock();
-            spec_buck->Find(key, value);
-            spec_buck->spin.unlock();
-            return MapperStatus::MAPPER_OK;
-        }
-        spec_buck =  new (std::nothrow) InnerBuck;
-        memset(spec_buck, 0, sizeof(InnerBuck));
-        spec_buck->spin.lock();
-        spec_buck->keys_[0].store(key);
-        spec_buck->values_[0] = offset_.fetch_add(1) + 1;
-        size_.fetch_add(1);
-        spec_buck->spin.unlock();
-        return MapperStatus::MAPPER_OK;
-    }
-    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
-    //first，find key if exist in buck
-    while(buck != nullptr){
-        buck->spin.lock();
-        if(buck->Find(key, value) == BuckStatus::BUCK_EXIST){
-            buck->spin.unlock();
-            return MapperStatus::MAPPER_OK;
-        }
-        buck->spin.unlock();
-        if(buck->next_ != nullptr){
-            buck = buck->next_;
-        } else{
-            break;
-        }
-    }
-
-    //if not find,
-    for (int i = 0; i < 8192; ++i){
-        // insert exist buck
-        while(buck != nullptr){
-            buck->spin.lock();
-            auto value_func = [&]() ->bool {
-                value = offset_.fetch_add(1);
-                return true;};
-            BuckStatus ret = buck->Insert(key, value, value_func);
-
-            buck->spin.unlock();
-            if (ret == BuckStatus::BUCK_ERROR) {
-                return MapperStatus::MAPPER_ERROR;
-            } else if (ret == BuckStatus::BUCK_NOEXIST) {
-                size_.fetch_add(1);
-                return MapperStatus::MAPPER_OK;
-            }
-            if (buck->next_ != nullptr) {
-                buck = buck->next_;
-            } else {
-                break;
-            }
-        }
-
-        //insert not exist buck
-        auto& old_spin = buck->spin;
-        old_spin.lock();
-        if (buck->next_ != nullptr) {
-            buck = buck->next_;
-            old_spin.unlock();
-            continue;
-        }
-
-        InnerBuck* new_buck =  new (std::nothrow) InnerBuck;
-        memset(new_buck, 0, sizeof(InnerBuck));
-        buck->next_ = new_buck;
-        buck = new_buck;
-        old_spin.unlock();
-    }
-    return MapperStatus::MAPPER_ERROR;
-}
-
-RecMapper::MapperStatus RecMapper::MapperFast::Find(uint64_t key, uint64_t& value) {
-    if(key == 0) {
-        if (spec_buck != nullptr) {
-            spec_buck->spin.lock();
-            value = spec_buck->values_[0];
-            spec_buck->spin.unlock();
-            return MapperStatus::MAPPER_OK;
-        }
-        return MapperStatus::MAPPER_INVALID;
-    }
-    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
-    if (buck == nullptr) {
-        return MapperStatus::MAPPER_ERROR;
-    }
-    if (buck->Find(key,value) == BuckStatus::BUCK_NOEXIST) {
-        return  MapperStatus::MAPPER_INVALID;
-    }
-    return MapperStatus::MAPPER_OK;
-}
-
-RecMapper::MapperStatus RecMapper::MapperFast::Remove(uint64_t key)
-{
-    if(key == 0) {
-        if (spec_buck != nullptr) {
-            delete spec_buck;
-            spec_buck = nullptr;
-            size_.fetch_sub(1);
-            return MapperStatus::MAPPER_OK;
-        }
-        return MapperStatus::MAPPER_INVALID;
-    }
-    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
-    while(buck != nullptr) {
-        uint64_t value;
-        if (buck->Find(key, value) == BuckStatus::BUCK_NOEXIST) {
-            return MapperStatus::MAPPER_INVALID;
-        }
-
-        buck->spin.lock();
-        if (buck->Remove(key) == BuckStatus::BUCK_EXIST){
-            size_.fetch_sub(1);
-            return MapperStatus::MAPPER_OK;
-        }
-        buck = buck->next_;
-    }
-    return MapperStatus::MAPPER_INVALID;
-}
-
-RecMapper::MapperStatus RecMapper::MapperFast::ToVector(std::vector<std::pair<uint64_t, uint64_t>>& vec)
-{
-    if (spec_buck != nullptr) {
-        vec.push_back(std::make_pair<uint64_t, uint64_t>(spec_buck->keys_[0], spec_buck->values_[0]));
-    }
-    for (auto& sub_map : buck_maps_){
-        if (sub_map == nullptr){
-            continue;
-        }
-        for(int i = 0; i < buck_count_; ++i){
-            InnerBuck* buck = &sub_map[i];
-            while(buck) {
-                for (int j = 0; j < BUCKCAPACITY; ++j){
-                    if (buck->keys_[j] == 0) {
-                        continue;
-                    }
-                    vec.push_back(std::make_pair<uint64_t, uint64_t>(buck->keys_[j], buck->values_[j]));
-                }
-                buck = buck->next_;
-            }
-        }
-    }
-    return MapperStatus::MAPPER_OK;
-}
\ No newline at end of file
diff --git a/src/core/utils/MapperFast.h b/src/core/utils/MapperFast.h
deleted file mode 100644
index 0ad73d5b..00000000
--- a/src/core/utils/MapperFast.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//
-// Created by z00576261 on 2024/4/15.
-//
-
-#ifndef FAST_MAPPERFAST_H
-#define FAST_MAPPERFAST_H
-
-#include <atomic>
-#include <mutex>
-#include <algorithm>
-#include <functional>
-#include <string.h>
-
-namespace RecMapper {
-    constexpr int BUCKCAPACITY = 3;
-    enum BuckStatus{
-        BUCK_EXIST,
-        BUCK_NOEXIST,
-        BUCK_ERROR
-    };
-
-    enum MapperStatus{
-        MAPPER_ERROR,
-        MAPPER_INVALID,
-        MAPPER_OK
-    };
-
-    class SpinLock {
-    public:
-        SpinLock() = default;
-        SpinLock(const SpinLock&) = delete;
-        SpinLock& operator=(const SpinLock) = delete;
-
-        void lock() {
-            while(f.test_and_set(std::memory_order_acquire));
-        }
-
-        void unlock() {
-            f.clear(std::memory_order_release);
-        }
-
-    private:
-        std::atomic_flag f;
-    };
-
-    struct InnerBuck{
-        std::atomic<uint64_t> keys_[BUCKCAPACITY]{};
-        int64_t  values_[BUCKCAPACITY]{};
-        InnerBuck* next_ = nullptr;
-        SpinLock spin;
-
-        BuckStatus Insert(uint64_t, uint64_t&, std::function<bool()>);
-        BuckStatus Find(uint64_t, uint64_t&);
-        BuckStatus Remove(uint64_t);
-
-    };
-
-    class MapperFast {
-    public:
-        MapperFast(uint64_t cap, uint64_t res) : capacity_(cap), reserve_(res) {};
-
-        ~MapperFast() = default;
-
-        bool InitializeBuck();
-        void UnInitializeBuck();
-
-        MapperStatus Put(uint64_t key, uint64_t& value);
-
-        MapperStatus Find(uint64_t key, uint64_t& value);
-
-        MapperStatus Remove(uint64_t key);
-
-        MapperStatus ToVector(std::vector<std::pair<uint64_t, uint64_t>>& vec);
-
-        uint64_t Size() {
-            return size_.load();
-        }
-
-    private:
-        void FreeBuckMaps();
-        void FreeBuckExpend();
-
-        std::atomic<uint64_t> size_{ 0 };
-        std::atomic<uint64_t> offset_{ 0 };
-        uint64_t capacity_;
-        uint64_t reserve_;
-        uint32_t buck_count_;
-
-        static constexpr uint32_t sub_map_count = 5;
-        static constexpr uint32_t prime_max = 32;
-
-        InnerBuck* buck_maps_[sub_map_count] {};
-        InnerBuck* spec_buck = nullptr;
-    };
-}
-
-#endif //FAST_MAPPERFAST_H
diff --git a/src/core/utils/mapper_fast.cpp b/src/core/utils/mapper_fast.cpp
index 3ed25102..021daaca 100644
--- a/src/core/utils/mapper_fast.cpp
+++ b/src/core/utils/mapper_fast.cpp
@@ -2,7 +2,7 @@
 // Created by z00576261 on 2024/4/15.
 //
 
-#include "MapperFast.h"
+#include "mapper_fast.h"
 #include <cmath>
 #include <cstring>
 #include <pthread.h>
-- 
Gitee


From 85cabc7f4f8feae3e10325bb4a56094bba5d7708 Mon Sep 17 00:00:00 2001
From: rome_zhouyang <9538256+rome_sky@user.noreply.gitee.com>
Date: Thu, 25 Apr 2024 09:04:11 +0800
Subject: [PATCH 063/302] delete fasterKV

---
 src/core/utils/mapper_fast.cpp | 262 ---------------------------------
 src/core/utils/mapper_fast.h   |  97 ------------
 2 files changed, 359 deletions(-)
 delete mode 100644 src/core/utils/mapper_fast.cpp
 delete mode 100644 src/core/utils/mapper_fast.h

diff --git a/src/core/utils/mapper_fast.cpp b/src/core/utils/mapper_fast.cpp
deleted file mode 100644
index 021daaca..00000000
--- a/src/core/utils/mapper_fast.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-//
-// Created by z00576261 on 2024/4/15.
-//
-
-#include "mapper_fast.h"
-#include <cmath>
-#include <cstring>
-#include <pthread.h>
-#include <iostream>
-
-RecMapper::BuckStatus RecMapper::InnerBuck::Insert(uint64_t key, uint64_t& value, std::function<bool()> ValueSet)
-{
-    for (int i = 0; i < BUCKCAPACITY; ++i){
-        uint64_t old_key = 0;
-        if (keys_[i].load(std::memory_order_relaxed) == 0 && keys_[i].compare_exchange_strong(old_key, key)){
-            bool ret = ValueSet();
-            if (!ret){
-                keys_[i].store(0);
-                return BuckStatus::BUCK_ERROR;
-            }
-            values_[i] = value;
-            return BuckStatus::BUCK_NOEXIST;
-        }
-    }
-    return BuckStatus::BUCK_ERROR;
-}
-
-RecMapper::BuckStatus RecMapper::InnerBuck::Find(uint64_t key, uint64_t& value)
-{
-    for (int i = 0; i < BUCKCAPACITY; ++i){
-        if (keys_[i].load(std::memory_order_relaxed) == key){
-            value = values_[i];
-            return BuckStatus::BUCK_EXIST;
-        }
-    }
-    return BuckStatus::BUCK_NOEXIST;
-}
-
-RecMapper::BuckStatus RecMapper::InnerBuck::Remove(uint64_t key)
-{
-    for (int i = 0; i < BUCKCAPACITY; ++i) {
-        uint64_t oldkey = key;
-        if (keys_[i].load(std::memory_order_relaxed) == key){
-            if (keys_[i].compare_exchange_strong(oldkey, 0)){
-                values_[i] = 0;
-                return BuckStatus::BUCK_EXIST;
-            }
-        }
-    }
-    return BUCK_ERROR;
-}
-
-bool RecMapper::MapperFast::InitializeBuck()
-{
-    uint16_t i = 0;
-
-    while(i <= prime_max){
-        if (pow(2, i) < reserve_){
-            i++;
-            continue;
-        }
-        break;
-    }
-    buck_count_ = i < 7 ? 128 : pow(2, i);
-
-    for(auto &buck_map : buck_maps_){
-        InnerBuck* buck_map_temp = new (std::nothrow) InnerBuck[buck_count_];
-        if (buck_map_temp == nullptr) {
-            FreeBuckMaps();
-            return false;
-        }
-        memset(buck_map_temp, 0, sizeof(InnerBuck) * buck_count_);
-        buck_map = buck_map_temp;
-    }
-    return true;
-}
-
-void RecMapper::MapperFast::UnInitializeBuck()
-{
-    FreeBuckExpend();
-    FreeBuckMaps();
-}
-
-void RecMapper::MapperFast::FreeBuckMaps()
-{
-    for (auto &buck_map : buck_maps_){
-        if (buck_map != nullptr){
-            delete[] buck_map;
-            buck_map = nullptr;
-        }
-    }
-}
-
-void RecMapper::MapperFast::FreeBuckExpend()
-{
-    for (auto &buck_map : buck_maps_ ){
-        if (buck_map == nullptr){
-            continue;
-        }
-        for (uint32_t i = 0; i < buck_count_; ++i){
-            InnerBuck* buck_attch = buck_map[i].next_;
-            while (buck_attch != nullptr){
-                InnerBuck* buck_attch_temp = buck_attch->next_;
-                delete buck_attch;
-                buck_attch = buck_attch_temp;
-            }
-        }
-    }
-}
-
-RecMapper::MapperStatus RecMapper::MapperFast::Put(uint64_t key, uint64_t& value)
-{
-    if (size_.load() > capacity_){
-        return MapperStatus::MAPPER_ERROR;
-    }
-
-    if(key == 0){
-        if (spec_buck != nullptr) {
-            spec_buck->spin.lock();
-            spec_buck->Find(key, value);
-            spec_buck->spin.unlock();
-            return MapperStatus::MAPPER_OK;
-        }
-        spec_buck =  new (std::nothrow) InnerBuck;
-        memset(spec_buck, 0, sizeof(InnerBuck));
-        spec_buck->spin.lock();
-        spec_buck->keys_[0].store(key);
-        spec_buck->values_[0] = offset_.fetch_add(1) + 1;
-        size_.fetch_add(1);
-        spec_buck->spin.unlock();
-        return MapperStatus::MAPPER_OK;
-    }
-    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
-    //first，find key if exist in buck
-    while(buck != nullptr){
-        buck->spin.lock();
-        if(buck->Find(key, value) == BuckStatus::BUCK_EXIST){
-            buck->spin.unlock();
-            return MapperStatus::MAPPER_OK;
-        }
-        buck->spin.unlock();
-        if(buck->next_ != nullptr){
-            buck = buck->next_;
-        } else{
-            break;
-        }
-    }
-
-    //if not find,
-    for (int i = 0; i < 8192; ++i){
-        // insert exist buck
-        while(buck != nullptr){
-            buck->spin.lock();
-            auto value_func = [&]() ->bool {
-                value = offset_.fetch_add(1);
-                return true;};
-            BuckStatus ret = buck->Insert(key, value, value_func);
-
-            buck->spin.unlock();
-            if (ret == BuckStatus::BUCK_ERROR) {
-                return MapperStatus::MAPPER_ERROR;
-            } else if (ret == BuckStatus::BUCK_NOEXIST) {
-                size_.fetch_add(1);
-                return MapperStatus::MAPPER_OK;
-            }
-            if (buck->next_ != nullptr) {
-                buck = buck->next_;
-            } else {
-                break;
-            }
-        }
-
-        //insert not exist buck
-        auto& old_spin = buck->spin;
-        old_spin.lock();
-        if (buck->next_ != nullptr) {
-            buck = buck->next_;
-            old_spin.unlock();
-            continue;
-        }
-
-        InnerBuck* new_buck =  new (std::nothrow) InnerBuck;
-        memset(new_buck, 0, sizeof(InnerBuck));
-        buck->next_ = new_buck;
-        buck = new_buck;
-        old_spin.unlock();
-    }
-    return MapperStatus::MAPPER_ERROR;
-}
-
-RecMapper::MapperStatus RecMapper::MapperFast::Find(uint64_t key, uint64_t& value) {
-    if(key == 0) {
-        if (spec_buck != nullptr) {
-            spec_buck->spin.lock();
-            value = spec_buck->values_[0];
-            spec_buck->spin.unlock();
-            return MapperStatus::MAPPER_OK;
-        }
-        return MapperStatus::MAPPER_INVALID;
-    }
-    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
-    if (buck == nullptr) {
-        return MapperStatus::MAPPER_ERROR;
-    }
-    if (buck->Find(key,value) == BuckStatus::BUCK_NOEXIST) {
-        return  MapperStatus::MAPPER_INVALID;
-    }
-    return MapperStatus::MAPPER_OK;
-}
-
-RecMapper::MapperStatus RecMapper::MapperFast::Remove(uint64_t key)
-{
-    if(key == 0) {
-        if (spec_buck != nullptr) {
-            delete spec_buck;
-            spec_buck = nullptr;
-            size_.fetch_sub(1);
-            return MapperStatus::MAPPER_OK;
-        }
-        return MapperStatus::MAPPER_INVALID;
-    }
-    InnerBuck* buck = &(buck_maps_[key % sub_map_count][key % buck_count_]);
-    while(buck != nullptr) {
-        uint64_t value;
-        if (buck->Find(key, value) == BuckStatus::BUCK_NOEXIST) {
-            return MapperStatus::MAPPER_INVALID;
-        }
-
-        buck->spin.lock();
-        if (buck->Remove(key) == BuckStatus::BUCK_EXIST){
-            size_.fetch_sub(1);
-            return MapperStatus::MAPPER_OK;
-        }
-        buck = buck->next_;
-    }
-    return MapperStatus::MAPPER_INVALID;
-}
-
-RecMapper::MapperStatus RecMapper::MapperFast::ToVector(std::vector<std::pair<uint64_t, uint64_t>>& vec)
-{
-    if (spec_buck != nullptr) {
-        vec.push_back(std::make_pair<uint64_t, uint64_t>(spec_buck->keys_[0], spec_buck->values_[0]));
-    }
-    for (auto& sub_map : buck_maps_){
-        if (sub_map == nullptr){
-            continue;
-        }
-        for(int i = 0; i < buck_count_; ++i){
-            InnerBuck* buck = &sub_map[i];
-            while(buck) {
-                for (int j = 0; j < BUCKCAPACITY; ++j){
-                    if (buck->keys_[j] == 0) {
-                        continue;
-                    }
-                    vec.push_back(std::make_pair<uint64_t, uint64_t>(buck->keys_[j], buck->values_[j]));
-                }
-                buck = buck->next_;
-            }
-        }
-    }
-    return MapperStatus::MAPPER_OK;
-}
\ No newline at end of file
diff --git a/src/core/utils/mapper_fast.h b/src/core/utils/mapper_fast.h
deleted file mode 100644
index 0ad73d5b..00000000
--- a/src/core/utils/mapper_fast.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//
-// Created by z00576261 on 2024/4/15.
-//
-
-#ifndef FAST_MAPPERFAST_H
-#define FAST_MAPPERFAST_H
-
-#include <atomic>
-#include <mutex>
-#include <algorithm>
-#include <functional>
-#include <string.h>
-
-namespace RecMapper {
-    constexpr int BUCKCAPACITY = 3;
-    enum BuckStatus{
-        BUCK_EXIST,
-        BUCK_NOEXIST,
-        BUCK_ERROR
-    };
-
-    enum MapperStatus{
-        MAPPER_ERROR,
-        MAPPER_INVALID,
-        MAPPER_OK
-    };
-
-    class SpinLock {
-    public:
-        SpinLock() = default;
-        SpinLock(const SpinLock&) = delete;
-        SpinLock& operator=(const SpinLock) = delete;
-
-        void lock() {
-            while(f.test_and_set(std::memory_order_acquire));
-        }
-
-        void unlock() {
-            f.clear(std::memory_order_release);
-        }
-
-    private:
-        std::atomic_flag f;
-    };
-
-    struct InnerBuck{
-        std::atomic<uint64_t> keys_[BUCKCAPACITY]{};
-        int64_t  values_[BUCKCAPACITY]{};
-        InnerBuck* next_ = nullptr;
-        SpinLock spin;
-
-        BuckStatus Insert(uint64_t, uint64_t&, std::function<bool()>);
-        BuckStatus Find(uint64_t, uint64_t&);
-        BuckStatus Remove(uint64_t);
-
-    };
-
-    class MapperFast {
-    public:
-        MapperFast(uint64_t cap, uint64_t res) : capacity_(cap), reserve_(res) {};
-
-        ~MapperFast() = default;
-
-        bool InitializeBuck();
-        void UnInitializeBuck();
-
-        MapperStatus Put(uint64_t key, uint64_t& value);
-
-        MapperStatus Find(uint64_t key, uint64_t& value);
-
-        MapperStatus Remove(uint64_t key);
-
-        MapperStatus ToVector(std::vector<std::pair<uint64_t, uint64_t>>& vec);
-
-        uint64_t Size() {
-            return size_.load();
-        }
-
-    private:
-        void FreeBuckMaps();
-        void FreeBuckExpend();
-
-        std::atomic<uint64_t> size_{ 0 };
-        std::atomic<uint64_t> offset_{ 0 };
-        uint64_t capacity_;
-        uint64_t reserve_;
-        uint32_t buck_count_;
-
-        static constexpr uint32_t sub_map_count = 5;
-        static constexpr uint32_t prime_max = 32;
-
-        InnerBuck* buck_maps_[sub_map_count] {};
-        InnerBuck* spec_buck = nullptr;
-    };
-}
-
-#endif //FAST_MAPPERFAST_H
-- 
Gitee


From a7dd3ad107ca6b2e5a199c73f7fb01f52b0ae0cc Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Thu, 25 Apr 2024 01:33:14 +0000
Subject: [PATCH 064/302] =?UTF-8?q?!90=20=E6=B8=85=E7=90=86cleancode?=
 =?UTF-8?q?=E5=91=8A=E8=AD=A6=EF=BC=88=E6=9C=80=E5=B0=8F=E9=9B=86=E2=80=94?=
 =?UTF-8?q?=E2=80=94=E4=B8=A5=E9=87=8D=EF=BC=89=20*=20cleancode=E5=91=8A?=
 =?UTF-8?q?=E8=AD=A6=E6=B8=85=E7=90=86=20*=20cleancode=E5=91=8A=E8=AD=A6?=
 =?UTF-8?q?=E6=B8=85=E7=90=86=20*=20cleancode=E5=91=8A=E8=AD=A6=E6=B8=85?=
 =?UTF-8?q?=E7=90=86=20*=20cleancode=E5=91=8A=E8=AD=A6=E6=B8=85=E7=90=86?=
 =?UTF-8?q?=20*=20cleancode=E5=91=8A=E8=AD=A6=E6=B8=85=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py                | 24 +++---
 examples/demo/little_demo_estimator/main.py | 20 ++---
 examples/dlrm/criteo_tb/gen_ttf.py          | 83 +++++++++++----------
 examples/dlrm/model/config.py               |  8 +-
 examples/dlrm/model/main_mxrec.py           | 31 ++++----
 mx_rec/util/communication/hccl_mgmt.py      |  8 +-
 src/AccCTR/tests/ut/src/unique_test.cpp     | 21 +++---
 7 files changed, 97 insertions(+), 98 deletions(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index d5a51312..205b0f67 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -53,8 +53,8 @@ def add_timestamp_func(batch):
     return batch
 
 
-def make_batch_and_iterator(cfg, feature_spec_list, is_training, dump_graph, use_faae=False):
-    if cfg.USE_PIPELINE_TEST:
+def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph, is_use_faae=False):
+    if config.USE_PIPELINE_TEST:
         num_parallel = 1
     else:
         num_parallel = 8
@@ -62,9 +62,9 @@ def make_batch_and_iterator(cfg, feature_spec_list, is_training, dump_graph, use
     def extract_fn(data_record):
         features = {
             # Extract features using the keys set during creation
-            'label': tf.compat.v1.FixedLenFeature(shape=(cfg.line_per_sample,), dtype=tf.int64),
-            'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(26 * cfg.line_per_sample,), dtype=tf.int64),
-            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * cfg.line_per_sample,), dtype=tf.float32),
+            'label': tf.compat.v1.FixedLenFeature(shape=(config.line_per_sample,), dtype=tf.int64),
+            'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(26 * config.line_per_sample,), dtype=tf.int64),
+            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * config.line_per_sample,), dtype=tf.float32),
         }
         sample = tf.compat.v1.parse_single_example(data_record, features)
         return sample
@@ -77,24 +77,24 @@ def make_batch_and_iterator(cfg, feature_spec_list, is_training, dump_graph, use
         return batch
 
     if is_training:
-        files_list = glob(os.path.join(cfg.data_path, cfg.train_file_pattern) + '/*.tfrecord')
+        files_list = glob(os.path.join(config.data_path, config.train_file_pattern) + '/*.tfrecord')
     else:
-        files_list = glob(os.path.join(cfg.data_path, cfg.test_file_pattern) + '/*.tfrecord')
+        files_list = glob(os.path.join(config.data_path, config.test_file_pattern) + '/*.tfrecord')
     dataset = tf.data.TFRecordDataset(files_list, num_parallel_reads=num_parallel)
-    batch_size = cfg.batch_size // cfg.line_per_sample
+    batch_size = config.batch_size // config.line_per_sample
 
-    dataset = dataset.shard(cfg.rank_size, cfg.rank_id)
+    dataset = dataset.shard(config.rank_size, config.rank_id)
     if is_training:
         dataset = dataset.shuffle(batch_size * 1000, seed=SHUFFLE_SEED)
     if is_training:
-        dataset = dataset.repeat(cfg.train_epoch)
+        dataset = dataset.repeat(config.train_epoch)
     else:
-        dataset = dataset.repeat(cfg.test_epoch)
+        dataset = dataset.repeat(config.test_epoch)
 
     dataset = dataset.map(extract_fn, num_parallel_calls=num_parallel).batch(batch_size,
                                                                              drop_remainder=True)
     dataset = dataset.map(reshape_fn, num_parallel_calls=num_parallel)
-    if use_faae:
+    if is_use_faae:
         dataset = dataset.map(add_timestamp_func)
 
     if not MODIFY_GRAPH_FLAG:
diff --git a/examples/demo/little_demo_estimator/main.py b/examples/demo/little_demo_estimator/main.py
index 8df1420c..de0b6c86 100644
--- a/examples/demo/little_demo_estimator/main.py
+++ b/examples/demo/little_demo_estimator/main.py
@@ -37,7 +37,7 @@ from utils import FeatureSpecIns
 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
 
 
-def main(params, cfg):
+def main(params, config):
     mg_session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False)
     run_config = NPURunConfig(
         model_dir=params.model_dir,
@@ -64,27 +64,29 @@ def main(params, cfg):
         hooks_list = [ACGPushOpsToDatasetHook(dump_graph=True), GraphModifierHook(modify_graph=params.modify_graph)]
 
     if params.use_timestamp:
-        config_for_user_table = dict(access_threshold=cfg.access_threshold, eviction_threshold=cfg.eviction_threshold)
-        config_for_item_table = dict(access_threshold=cfg.access_threshold, eviction_threshold=cfg.eviction_threshold)
+        config_for_user_table = dict(access_threshold=config.access_threshold,
+                                     eviction_threshold=config.eviction_threshold)
+        config_for_item_table = dict(access_threshold=config.access_threshold,
+                                     eviction_threshold=config.eviction_threshold)
         access_and_evict = dict(user_table=config_for_user_table, item_table=config_for_item_table)
 
         evict_hook = EvictHook(evict_enable=True, evict_time_interval=10)
         hooks_list.append(evict_hook)
-    create_fs_params = dict(cfg=cfg, use_timestamp=params.use_timestamp,
+    create_fs_params = dict(cfg=config, use_timestamp=params.use_timestamp,
                             use_multi_lookup=use_multi_lookup, multi_lookup_times=MULTI_LOOKUP_TIMES)
     est = NPUEstimator(
-        model_fn=get_model_fn(create_fs_params, cfg, access_and_evict),
+        model_fn=get_model_fn(create_fs_params, config, access_and_evict),
         params=params,
         model_dir=params.model_dir,
         config=run_config
     )
 
     if params.run_mode == 'train':
-        est.train(input_fn=lambda: input_fn(params, create_fs_params, cfg), max_steps=params.max_steps,
+        est.train(input_fn=lambda: input_fn(params, create_fs_params, config), max_steps=params.max_steps,
                   hooks=npu_hooks_append(hooks_list))
 
     elif params.run_mode == 'train_and_evaluate':
-        train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(params, create_fs_params, cfg,
+        train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(params, create_fs_params, config,
                                                                       use_one_shot=args.use_one_shot),
                                             max_steps=params.max_steps, hooks=npu_hooks_append(hooks_list))
         # 在开启evict时，eval时不支持淘汰，所以无需加入evict hook
@@ -95,14 +97,14 @@ def main(params, cfg):
             eval_hook_list = [ACGPushOpsToDatasetHook(dump_graph=True),
                               GraphModifierHook(modify_graph=params.modify_graph)]
 
-        eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(params, create_fs_params, cfg, is_eval=True,
+        eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(params, create_fs_params, config, is_eval=True,
                                                                     use_one_shot=args.use_one_shot),
                                           steps=params.eval_steps, hooks=npu_hooks_append(eval_hook_list),
                                           throttle_secs=0)
         tf.estimator.train_and_evaluate(est, train_spec=train_spec, eval_spec=eval_spec)
 
     elif params.run_mode == 'predict':
-        results = est.predict(input_fn=lambda: input_fn(params, create_fs_params, cfg),
+        results = est.predict(input_fn=lambda: input_fn(params, create_fs_params, config),
                               hooks=npu_hooks_append(hooks_list=hooks_list), yield_single_examples=False)
         output_pred1 = []
         output_pred2 = []
diff --git a/examples/dlrm/criteo_tb/gen_ttf.py b/examples/dlrm/criteo_tb/gen_ttf.py
index 92fabb3d..04b7b767 100644
--- a/examples/dlrm/criteo_tb/gen_ttf.py
+++ b/examples/dlrm/criteo_tb/gen_ttf.py
@@ -50,11 +50,11 @@ class Logger(object):
         self.logger.addHandler(sh)  # 把对象加到logger里
         self.logger.addHandler(th)
 
-    def info(self, *args):
-        if len(args) == 1:
-            self.logger.info(*args)
+    def info(self, *prams):
+        if len(prams) == 1:
+            self.logger.info(*prams)
         else:
-            self.logger.info([*args])
+            self.logger.info([*prams])
 
 
 class CriteoStatsDict():
@@ -89,12 +89,11 @@ class CriteoStatsDict():
         for i, cat in enumerate(cat_list):
             map_cat_count(i, cat)
 
-    #
-    def save_dict(self, output_path, hist_map, prefix=""):
-        with open(os.path.join(output_path, "{}hist_map.pkl".format(prefix)), "wb") as file_wrt:
+    @staticmethod
+    def save_dict(output_file_path, hist_map, prefix=""):
+        with open(os.path.join(output_file_path, "{}hist_map.pkl".format(prefix)), "wb") as file_wrt:
             pickle.dump(hist_map, file_wrt)
 
-    #
     def load_dict(self, dict_path, prefix=""):
         with open(os.path.join(dict_path, "{}hist_map.pkl".format(prefix)), "rb") as file_wrt:
             self.hist_map = pickle.load(file_wrt)
@@ -128,13 +127,14 @@ class CriteoStatsDict():
 
         return dense_list, cat_list
 
-def statsdata_multiprocess(process_num, process_id, data_file_path, output_path, criteo_stats):
+
+def statsdata_multiprocess(proc_num, proc_id, data_file_path, output_file_path, criteo_stats_data):
     start_time = time.time()
     with open(data_file_path, encoding="utf-8") as file_in:
         errorline_list = []
         count = 0
         for i, line in enumerate(file_in):
-            if i % process_num != process_id:
+            if i % proc_num != proc_id:
                 continue
             count += 1
             line = line.strip("\n")
@@ -146,26 +146,26 @@ def statsdata_multiprocess(process_num, process_id, data_file_path, output_path,
             if count % 1000000 == 0:
                 print("Have handle {}w lines.".format(count // 10000))
             cats = items[14:]
-            criteo_stats.stats_cats(cats)
-    criteo_stats.save_dict(output_path)
+            criteo_stats_data.stats_cats(cats)
+    criteo_stats_data.save_dict(output_file_path)
     print('statsdata time cost: {:.2f}s'.format(time.time() - start_time))
 
 
-def get_unique_id_multiprocess(process_num, process_id, data_file_path, output_path, criteo_stats):
-    if os.path.exists(os.path.join(output_path, "unique_id.pkl")):
+def get_unique_id_multiprocess(proc_num, proc_id, data_file_path, output_file_path, criteo_stats_data):
+    if os.path.exists(os.path.join(output_file_path, "unique_id.pkl")):
         return
     start_time = time.time()
-    cat_sets = [OrderedDict() for col in criteo_stats.cat_cols]
-    cat_global_id_nums = [0 for col in criteo_stats.cat_cols]
-    hash_bucket = criteo_stats.hash_bucket
+    cat_sets = [OrderedDict() for col in criteo_stats_data.cat_cols]
+    cat_global_id_nums = [0 for col in criteo_stats_data.cat_cols]
+    hash_bucket = criteo_stats_data.hash_bucket
     line_num = 0
     with open(data_file_path, encoding="utf-8") as file_in:
         errorline_list = []
 
         for i, line in enumerate(file_in):
             line_num += 1
-    start_line = process_id * ((line_num + process_num) // process_num)
-    end_line = (process_id + 1) * ((line_num + process_num) // process_num)
+    start_line = proc_id * ((line_num + proc_num) // proc_num)
+    end_line = (proc_id + 1) * ((line_num + proc_num) // proc_num)
     with open(data_file_path, encoding="utf-8") as file_in:
         errorline_list = []
         count = 0
@@ -183,21 +183,17 @@ def get_unique_id_multiprocess(process_num, process_id, data_file_path, output_p
                 print("Have handle {}w lines.".format(count // 10000))
                 sys.stdout.flush()
             cats = items[14:]
-            # criteo_stats.stats_cats(cats)
-            # def map_cat_count(i, cat):
             for k, cat in enumerate(cats):
-                # map_cat_count(i, cat)
                 capped_value = int(cat, 16) % hash_bucket if cat else hash_bucket
-                # if capped_value not in self.hist_map[key_col]:
                 if capped_value not in cat_sets:
                     cat_sets[k][capped_value] = cat_global_id_nums[k]
                     cat_global_id_nums[k] += 1
-    with open(os.path.join(output_path, "unique_id.pkl"), "wb") as file_wrt:
+    with open(os.path.join(output_file_path, "unique_id.pkl"), "wb") as file_wrt:
         pickle.dump(cat_sets, file_wrt)
     print('statsdata time cost: {:.2f}s'.format(time.time() - start_time))
 
 
-def merge_stats_count(stats_dir, criteo_stats):
+def merge_stats_count(stats_dir, criteo_stats_data):
     if os.path.exists(f'{stats_dir}/hist_map.pkl'):
         return
     stats_sub_dirs = sorted(glob(f'{stats_dir}/*[0-9]'))
@@ -207,15 +203,15 @@ def merge_stats_count(stats_dir, criteo_stats):
     for i in tqdm(range(1, len(stats_sub_dirs))):
         with open(f'{stats_sub_dirs[i]}/unique_id.pkl', 'rb') as f:
             others_count = pickle.load(f)
-        for k, _ in enumerate(criteo_stats.cat_cols):
+        for k, _ in enumerate(criteo_stats_data.cat_cols):
             all_count_1, others_count_1 = all_hist_map[k], others_count[k]
             all_count_1.update(others_count_1)
             all_hist_map[k] = all_count_1
     hist_map = {}
-    for i, col in enumerate(criteo_stats.cat_cols):
+    for i, col in enumerate(criteo_stats_data.cat_cols):
         hist_map[col] = dict(zip(list(all_hist_map[i].keys()), range(len(all_hist_map[i]))))
 
-    criteo_stats.save_dict(stats_dir, hist_map)
+    criteo_stats_data.save_dict(stats_dir, hist_map)
 
 
 def mkdir_path(file_path):
@@ -235,13 +231,14 @@ def make_example(label_list, dense_feat_list, sparse_feat_list):
 
     return example
 
-def convert_input2tfrd_multiprocess(process_num, process_id, in_file_path, output_path, criteo_stats, line_per_sample=1024,
-                            part_rows=2000000, mode="train_"):
+
+def convert_input2tfrd_multiprocess(proc_num, proc_id, in_file_path, output_file_path, criteo_stats_dict,
+                                    line_per_sample=1024, part_rows=2000000):
     start_time = time.time()
     print("----------" * 10 + "\n" * 2)
 
     part_number = 0
-    file_name = output_path + "part_{:0>8d}.tfrecord"
+    file_name = output_file_path + "part_{:0>8d}.tfrecord"
 
     file_writer = tf.python_io.TFRecordWriter(file_name.format(part_number))
     sample_count = 0
@@ -253,8 +250,8 @@ def convert_input2tfrd_multiprocess(process_num, process_id, in_file_path, outpu
         for i, line in tqdm(enumerate(file_in)):
             line_num += 1
     print(f'line_num: {line_num}')
-    start_line = process_id * ((line_num + process_num) // process_num)
-    end_line = (process_id + 1) * ((line_num + process_num) // process_num)
+    start_line = proc_id * ((line_num + proc_num) // proc_num)
+    end_line = (proc_id + 1) * ((line_num + proc_num) // proc_num)
     dense_res_list = []
     cat_res_list = []
     label_res_list = []
@@ -276,9 +273,11 @@ def convert_input2tfrd_multiprocess(process_num, process_id, in_file_path, outpu
             label = int(items[0])
             values = items[1:14]
             cats = items[14:]
-            assert len(values) == 13, "values.size： {}".format(len(values))
-            assert len(cats) == 26, "cats.size： {}".format(len(cats))
-            val_list, cat_list = criteo_stats.map_cat2id(values, cats)
+            if len(values) == 13:
+                raise ValueError("values.size： {}".format(len(values)))
+            if len(cats) == 26:
+                raise ValueError("cats.size： {}".format(len(cats)))
+            val_list, cat_list = criteo_stats_dict.map_cat2id(values, cats)
             dense_res_list.append(val_list)
             cat_res_list.append(cat_list)
             label_res_list.append(label)
@@ -362,8 +361,10 @@ if __name__ == "__main__":
     mkdir_path(save_tfrecord_path)
     processs = []
     process_num = args.train_process_num
-    assert process_num % len(train_data_files) == 0, print(
-        f'process_num {process_num} must exact div length of data_files {len(data_files)}')
+    if len(train_data_files) == 0:
+        raise ValueError(f'file not exist in train_data_dir:{train_data_dir}')
+    if process_num % len(train_data_files) == 0:
+        raise ValueError(f'process_num {process_num} must exact div length of train_data_files {len(train_data_files)}')
 
     for process_id in range(process_num):
         sub_process_num = process_num // len(train_data_files)
@@ -384,8 +385,10 @@ if __name__ == "__main__":
     mkdir_path(save_tfrecord_path)
     processs = []
     process_num = args.test_process_num
-    assert process_num % len(test_data_files) == 0, print(
-        f'process_num {process_num} must exact div length of data_files {len(data_files)}')
+    if len(test_data_files) == 0:
+        raise ValueError(f'file not exist in test_data_dir:{test_data_dir}')
+    if process_num % len(test_data_files) == 0:
+        raise ValueError(f'process_num {process_num} must exact div length of test_data_files {len(test_data_files)}')
 
     for process_id in range(process_num):
         sub_process_num = process_num // len(test_data_files)
diff --git a/examples/dlrm/model/config.py b/examples/dlrm/model/config.py
index 452b2a7f..23b042c2 100644
--- a/examples/dlrm/model/config.py
+++ b/examples/dlrm/model/config.py
@@ -40,7 +40,6 @@ class LearningRateScheduler:
         # used for the warmup stage
         warmup_step = tf.cast(1 / self.warmup_steps, tf.float32)
         lr_factor_warmup = 1 - tf.cast(self.warmup_steps - global_step, tf.float32) * warmup_step
-        # lr_factor_warmup = tf.cast(global_step, tf.float32) / tf.cast(self.warmup_steps, tf.float32) #hx
         lr_factor_warmup = tf.cast(lr_factor_warmup, tf.float32)
         # used for the constant stage
         lr_factor_constant = tf.cast(1.0, tf.float32)
@@ -55,7 +54,6 @@ class LearningRateScheduler:
             global_step < self.decay_end_step,
             lambda: lr_factor_decay,
             lambda: sparse_after_decay,
-            # lambda: 0.000 #hx
         )
 
         lr_factor_decay_dense = tf.cond(
@@ -119,7 +117,6 @@ class Config:
 
         self.emb_dim = 128
         self.hashtable_threshold = 1
-        # self.learning_rate = 0.01
 
         self.USE_PIPELINE_TEST = False
 
@@ -182,8 +179,8 @@ def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2"):
     custom_op.parameter_map["mix_compile_mode"].b = False
     custom_op.parameter_map["use_off_line"].b = True
     custom_op.parameter_map["min_group_size"].b = 1
+    # 可选配置level0:pairwise;level1:pairwise
     custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:fullmesh;level1:fullmesh")
-    # custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:pairwise;level1:pairwise")
     custom_op.parameter_map["enable_data_pre_proc"].b = True
     custom_op.parameter_map["iterations_per_loop"].i = 10
     custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
@@ -228,7 +225,6 @@ def get_npu_run_config():
         iterations_per_loop=1,
         jit_compile=False,
         op_compiler_cache_mode="enable",
-        HCCL_algorithm="level0:fullmesh;level1:fullmesh"
-        # HCCL_algorithm="level0:pairwise;level1:pairwise"
+        HCCL_algorithm="level0:fullmesh;level1:fullmesh"  # 可选配置：level0:pairwise;level1:pairwise
     )
     return run_config
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 4bbd16de..8c98238b 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -57,8 +57,8 @@ def add_timestamp_func(batch):
     return batch
 
 
-def make_batch_and_iterator(cfg, feature_spec_list, is_training, dump_graph, use_faae=False):
-    if cfg.USE_PIPELINE_TEST:
+def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph, is_use_faae=False):
+    if config.USE_PIPELINE_TEST:
         num_parallel = 1
     else:
         num_parallel = 8
@@ -66,9 +66,9 @@ def make_batch_and_iterator(cfg, feature_spec_list, is_training, dump_graph, use
     def extract_fn(data_record):
         features = {
             # Extract features using the keys set during creation
-            'label': tf.compat.v1.FixedLenFeature(shape=(cfg.line_per_sample,), dtype=tf.int64),
-            'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(26 * cfg.line_per_sample,), dtype=tf.int64),
-            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * cfg.line_per_sample,), dtype=tf.float32),
+            'label': tf.compat.v1.FixedLenFeature(shape=(config.line_per_sample,), dtype=tf.int64),
+            'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(26 * config.line_per_sample,), dtype=tf.int64),
+            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * config.line_per_sample,), dtype=tf.float32),
         }
         sample = tf.compat.v1.parse_single_example(data_record, features)
         return sample
@@ -81,24 +81,23 @@ def make_batch_and_iterator(cfg, feature_spec_list, is_training, dump_graph, use
         return batch
 
     if is_training:
-        files_list = glob(os.path.join(cfg.data_path, cfg.train_file_pattern) + '/*.tfrecord')
+        files_list = glob(os.path.join(config.data_path, config.train_file_pattern) + '/*.tfrecord')
     else:
-        files_list = glob(os.path.join(cfg.data_path, cfg.test_file_pattern) + '/*.tfrecord')
+        files_list = glob(os.path.join(config.data_path, config.test_file_pattern) + '/*.tfrecord')
     dataset = tf.data.TFRecordDataset(files_list, num_parallel_reads=num_parallel)
-    batch_size = cfg.batch_size // cfg.line_per_sample
+    batch_size = config.batch_size // config.line_per_sample
 
-    dataset = dataset.shard(cfg.rank_size, cfg.rank_id)
+    dataset = dataset.shard(config.rank_size, config.rank_id)
     if is_training:
         dataset = dataset.shuffle(batch_size * 1000, seed=shuffle_seed)
     if is_training:
-        dataset = dataset.repeat(cfg.train_epoch)
+        dataset = dataset.repeat(config.train_epoch)
     else:
-        dataset = dataset.repeat(cfg.test_epoch)
-    # dataset = dataset.repeat(cfg.num_epochs)
+        dataset = dataset.repeat(config.test_epoch)
     dataset = dataset.map(extract_fn, num_parallel_calls=num_parallel).batch(batch_size,
                                                                              drop_remainder=True)
     dataset = dataset.map(reshape_fn, num_parallel_calls=num_parallel)
-    if use_faae:
+    if is_use_faae:
         dataset = dataset.map(add_timestamp_func)
 
     if not MODIFY_GRAPH_FLAG:
@@ -161,11 +160,11 @@ def evaluate():
             eval_start = time.time()
             eval_loss, pred, label = sess.run([eval_model["loss"], eval_model["pred"], eval_label])
             eval_cost = time.time() - eval_start
-            qps = (1 / eval_cost) * rank_size * cfg.batch_size
+            qps_eval = (1 / eval_cost) * rank_size * cfg.batch_size
             log_loss_list += list(eval_loss.reshape(-1))
             pred_list += list(pred.reshape(-1))
             label_list += list(label.reshape(-1))
-            print(f"eval current_steps: {eval_current_steps}, qps: {qps}")
+            print(f"eval current_steps: {eval_current_steps}, qps: {qps_eval}")
             if eval_current_steps == eval_steps:
                 finished = True
         except tf.errors.OutOfRangeError:
@@ -217,7 +216,6 @@ def evaluate_fix(step):
     os.mknod(f"flag_{rank_id}.txt")
     while True:
         file_exists_list = [os.path.exists(f"flag_{i}.txt") for i in range(rank_size)]
-        # print(file_exists_list)
         if sum(file_exists_list) == rank_size:
             print("All saved!!!!!!!!!!")
             break
@@ -424,7 +422,6 @@ if __name__ == "__main__":
         cost_time = end_time - start_time
         qps = (1 / cost_time) * rank_size * cfg.batch_size * iteration_per_loop
         cost_sum += cost_time
-        # qps_sum += qps
         logger.info(f"step: {i * iteration_per_loop}; training loss: {loss}")
         logger.info(f"step: {i * iteration_per_loop}; grad: {grad}")
         logger.info(f"step: {i * iteration_per_loop}; lr: {lr}")
diff --git a/mx_rec/util/communication/hccl_mgmt.py b/mx_rec/util/communication/hccl_mgmt.py
index 2f50e832..43042d6b 100644
--- a/mx_rec/util/communication/hccl_mgmt.py
+++ b/mx_rec/util/communication/hccl_mgmt.py
@@ -82,11 +82,11 @@ def set_hccl_info_without_json() -> Dict[int, int]:
     Used for no rank table file configured training situation.
     :return: rank_id to logic_id mapping dictionary.
     """
-    rank_size = global_env.cm_worker_size
-    chief_device = global_env.cm_chief_device
+    env_rank_size = global_env.cm_worker_size
+    env_chief_device = global_env.cm_chief_device
     device_list = get_device_list()
-    chief_device = int(chief_device)
-    rank_size = int(rank_size)
+    chief_device = int(env_chief_device)
+    rank_size = int(env_rank_size)
 
     if chief_device not in device_list:
         raise ValueError(f"The environment variable CM_CHIEF_DEVICE {chief_device} is not in the local device list. ")
diff --git a/src/AccCTR/tests/ut/src/unique_test.cpp b/src/AccCTR/tests/ut/src/unique_test.cpp
index ef6846f8..f971bb91 100644
--- a/src/AccCTR/tests/ut/src/unique_test.cpp
+++ b/src/AccCTR/tests/ut/src/unique_test.cpp
@@ -1162,14 +1162,14 @@ TEST_F(UniqueTest, DoUniqueShardMultipleTimes)
         unordered_set<int> uniqueIdSet;
         map<int64_t, int> expectedIdCntMap;
 
-        for (size_t i = 0; i < uniqueIn.inputIdCnt; i++) {
-            restoreIds[i] = uniqueId[index[i]];
-            expectedIdCntMap[inputId[i]]++;
-            if (uniqueIdSet.find(inputId[i]) != uniqueIdSet.end()) {
+        for (size_t j = 0; j < uniqueIn.inputIdCnt; j++) {
+            restoreIds[j] = uniqueId[index[j]];
+            expectedIdCntMap[inputId[j]]++;
+            if (uniqueIdSet.find(inputId[j]) != uniqueIdSet.end()) {
                 continue;
             } else {
-                uniqueIdSet.insert(inputId[i]);
-                expectedUniqueIdCnt[inputId[i] % conf.shardingNum]++;
+                uniqueIdSet.insert(inputId[j]);
+                expectedUniqueIdCnt[inputId[j] % conf.shardingNum]++;
             }
         }
 
@@ -1177,13 +1177,14 @@ TEST_F(UniqueTest, DoUniqueShardMultipleTimes)
 
         int uniqueSum = 0;
 
-        for (int i = 0; i < conf.shardingNum; i++) {
-            uniqueSum += uniqueIdCntInBucket[i];
+        for (int j = 0; j < conf.shardingNum; j++) {
+            uniqueSum += uniqueIdCntInBucket[j];
         }
 
         vector<int> expectedIdCnt(uniqueSum);
-        for (int i = 0; i < uniqueSum; i++) {
-            expectedIdCnt[i] = expectedIdCntMap[uniqueId[i]];
+
+        for (int j = 0; j < uniqueSum; j++) {
+            expectedIdCnt[j] = expectedIdCntMap[uniqueId[j]];
         }
         expectedIdCnt.resize(uniqueIn.inputIdCnt);
 
-- 
Gitee


From 8a3e5af57410974ca8d7850655f05d6d034cf562 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 23 Apr 2024 15:42:53 +0800
Subject: [PATCH 065/302] =?UTF-8?q?warm=20start=E5=8A=9F=E8=83=BD=E5=AE=9E?=
 =?UTF-8?q?=E7=8E=B0=EF=BC=8C=E5=AE=9E=E7=8E=B0=E4=BB=8E=E5=A4=9A=E4=B8=AA?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E8=B7=AF=E5=BE=84=E5=8A=A0=E8=BD=BD=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=E5=8F=82=E6=95=B0=E3=80=81=E7=A8=80=E7=96=8F=E8=A1=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/warm_start.py | 57 ++++++++++++--------------------------
 1 file changed, 17 insertions(+), 40 deletions(-)

diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index 31a5e358..520c3df3 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -33,8 +33,9 @@ if tf.__version__.startswith("1"):
 else:
     from npu_device.compat.v1.npu_init import NPUEstimator
 
+
 class WarmStartController:
-    _instance = None  # 类属性，用于存储唯一的实例
+    _instance = None
 
     def __new__(cls):
         if cls._instance is None:
@@ -47,7 +48,6 @@ class WarmStartController:
         logging.info("start to build WarmStartController.")
 
     def add_element(self, path: str, table_list: List[str]):
-        """添加 path， table list"""
         if path not in self._warm_start_dict:
             self._warm_start_dict[path] = table_list
         else:
@@ -57,7 +57,6 @@ class WarmStartController:
         self.table_name_to_prev_table_name[table] = prev_table
 
     def get_elements(self):
-        """返回dict中的所有元素"""
         return self._warm_start_dict
 
 
@@ -83,26 +82,25 @@ def patch_for_func_warm_start(func):
             vars_to_warm_start_list = args[1]
             var_name_to_prev_var_name_list = args[3]
             for i in range(len(ckpt_to_initialize_from)):
-                f = func(ckpt_to_initialize_from[i], vars_to_warm_start_list[i], var_name_to_prev_var_name_list[i],
-                         args[3:], **kwargs)
+                f = func(ckpt_to_initialize_from[i], vars_to_warm_start_list[i], args[2],
+                         var_name_to_prev_var_name_list[i], **kwargs)
             return f
         else:
             return func(*args, **kwargs)
     return wrapper
 
+
 def patch_for_estimator_train(func):
-    def warpper(*args, **kwargs):
+    def wrapper(*args, **kwargs):
         hooks = kwargs.get('hooks', [])
         if WarmStartController().get_elements():
             hooks.append(SparseRestoreHook())
         return func(*args, *kwargs)
-    return warpper
+    return wrapper
 
 
 def warm_settings_filter(warm_start_from):
-    # condition 1: 原始入参为settings
     if isinstance(warm_start_from, estimator_lib.WarmStartSettings):
-        # mx_rec 定制 warm start的写法, 定制写法的策略应该和原始warm start的过滤策略不一样
         if isinstance(warm_start_from.ckpt_to_initialize_from, (list, tuple)):
             out_setting_list = []
             logger.info("According to warm_start_settings, warm start will load from more than one checkpoint path.")
@@ -111,21 +109,16 @@ def warm_settings_filter(warm_start_from):
                 filter_setting = _warm_settings_filter(setting)
                 if filter_setting:
                     out_setting_list.append(filter_setting)
-            # 这里out setting list 必须要revcover成warm_start_settings再返回
             if out_setting_list:
                 warm_start_from = recover_warm_settings(out_setting_list)
                 return warm_start_from
-        # 原始写法
         elif isinstance(warm_start_from.ckpt_to_initialize_from, (six.string_types, six.binary_type)):
             logger.info("According to warm_start_settings, warm start will load from only one checkpoint path.")
             filter_setting = _warm_settings_filter(warm_start_from)
             if filter_setting:
                 return filter_setting
         return None
-    # condition 2: 原始入参为str
     elif isinstance(warm_start_from, (six.string_types, six.binary_type)):
-        # 这里还有一种类型是：str 这种类型相对比较简单，传递就好。但是在这里要调用以下controller来指定一下sparse的地址和表名，
-        # 这里可以单独写函数
         table_name_list = get_table_name_set_by_ckpt_path(warm_start_from)
         WarmStartController().add_element(warm_start_from, table_name_list)
         return warm_start_from
@@ -148,9 +141,7 @@ def recover_warm_settings(setting_list):
         var_name_to_prev_var_name=var_name_to_prev_var_name_list)
 
 
-# 处理定制的warm settings, 将warm_start_from进行校验
 def _build_warm_settings_list(warm_start_from):
-    # 这里可以修改一下传参，用参数解包来做，更加简洁高效
     ckpt_to_initialize_from = warm_start_from.ckpt_to_initialize_from
     vars_to_warm_start = warm_start_from.vars_to_warm_start
     var_name_to_prev_var_name = warm_start_from.var_name_to_prev_var_name
@@ -176,26 +167,16 @@ def _build_warm_settings_list(warm_start_from):
 
 
 def _warm_settings_filter(warm_start_setting):
-    # 将settings里面的稀疏摘出来
-    # 要考虑名字有对应的场景
     vars_to_warm_start = warm_start_setting.vars_to_warm_start
     var_name_to_prev_var_name = warm_start_setting.var_name_to_prev_var_name
     vars_to_warm_start_res = []
-    # table_name_set从路径里面去获取
     table_name_list = get_table_name_set_by_ckpt_path(warm_start_setting.ckpt_to_initialize_from)
-    # 稀疏支持以下格式： 1.str(支持表名) ； 2. list[str];
     if isinstance(vars_to_warm_start, str):
-        # condition 1: vars_to_warm_start : str(正则表达式、表名)
-        # 表名
         matching_tables = [table for table in table_name_list if re.match(vars_to_warm_start, table)]
-        # 如果匹配到了，那么这个warm_start_settings对于dense部分就是无效的
-        # add WarmStartController(path:table_name)
         if matching_tables:
-            #add controller to set sparse
-            WarmStartController().add_element(vars_to_warm_start.ckpt_to_initialize_from, matching_tables)
+            WarmStartController().add_element(warm_start_setting.ckpt_to_initialize_from, matching_tables)
             if vars_to_warm_start != ".*":
                 return None
-            # path: embedding_table_name
         return warm_start_setting
     elif all(isinstance(v, str) for v in vars_to_warm_start):
         sparse_vars = []
@@ -203,7 +184,7 @@ def _warm_settings_filter(warm_start_setting):
             matching_tables = [table for table in table_name_list if re.match(v, table)]
             if matching_tables:
                 sparse_vars.append(v)
-                WarmStartController().add_element(vars_to_warm_start.ckpt_to_initialize_from, matching_tables)
+                WarmStartController().add_element(warm_start_setting.ckpt_to_initialize_from, matching_tables)
         vars_to_warm_start_res = [v for v in vars_to_warm_start if v not in sparse_vars]
         if not vars_to_warm_start_res:
             warm_start_setting = None
@@ -219,14 +200,13 @@ def get_table_name_set_by_ckpt_path(warm_start_path: str) -> List[str]:
     Get the list of sparse table names saved under the path 'warm_start_path'.
     '''
     table_name_list = []
-    if tf.io.gfile.idsir(warm_start_path):
+    if tf.io.gfile.isdir(warm_start_path):
         restore_path = get_latest_ckpt(warm_start_path)
     else:
         restore_path = warm_start_path
     directory, base_name = os.path.split(restore_path)
     ckpt_name = f"sparse-{base_name}"
     sparse_path = os.path.join(directory, ckpt_name)
-    # 如果这个sparse_path不存在的话，可能是gpu路径，不能直接报错，只需要返回一个空的table_name_set就可以了
     if not tf.io.gfile.isdir(sparse_path):
         logger.info(f"under the warm start path {warm_start_path}, sparse directory {sparse_path} not exists.")
     else:
@@ -248,22 +228,19 @@ def get_latest_ckpt(warm_start_path) -> str:
     return path
 
 
-
-
-
 class SparseRestoreHook(tf.estimator.SessionRunHook):
     def __init__(self):
         logging.info("In warm start mode, SparseRestoreHook has been initialized.")
-        pass
+        self._is_warm_start = False
 
     def begin(self):
         self._saver = Saver()
         logging.info("In warm start mode, begin SparseRestoreHook.")
 
     def after_create_session(self, session, coord):
-        #这里mxrec需要适配新的restore接口,这里的策略是调用多次restore接口
-        self._warm_start_dict = WarmStartController().get_elements()
-        for path, restore_tables in self._warm_start_dict.items():
-            restore_path = get_latest_ckpt(path)
-            self._saver.restore(session, restore_path, restore_tables)
-
+        if not self._is_warm_start:
+            self._warm_start_dict = WarmStartController().get_elements()
+            for path, restore_tables in self._warm_start_dict.items():
+                restore_path = get_latest_ckpt(path)
+                self._saver.restore(session, restore_path, restore_tables)
+            self._is_warm_start = False
-- 
Gitee


From 744c293ea213020e4a4dce8c8f5615a4fdf5a1c3 Mon Sep 17 00:00:00 2001
From: yangzhen <yangzhen92a@163.com>
Date: Fri, 26 Apr 2024 09:03:14 +0800
Subject: [PATCH 066/302] =?UTF-8?q?=E4=BF=AE=E5=A4=8DdcnV2=E5=8F=82?=
 =?UTF-8?q?=E6=95=B0=E5=90=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index 1721370e..6fd235ba 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 import time
 import warnings
 import random
@@ -286,9 +285,9 @@ if __name__ == "__main__":
         feature_spec_list_eval = create_feature_spec_list(use_timestamp=False)
 
     train_batch, train_iterator = make_batch_and_iterator(cfg, feature_spec_list_train, is_training=True,
-                                                          dump_graph=True, use_faae=use_faae)
+                                                          dump_graph=True, is_use_faae=use_faae)
     eval_batch, eval_iterator = make_batch_and_iterator(cfg, feature_spec_list_eval, is_training=False,
-                                                        dump_graph=False, use_faae=use_faae)
+                                                        dump_graph=False, is_use_faae=use_faae)
     logger.info(f"train_batch: {train_batch}")
 
     if use_faae:
-- 
Gitee


From ae3aff4c793206f684ac44baf05d972db0fd859b Mon Sep 17 00:00:00 2001
From: yangzhen <yangzhen92a@163.com>
Date: Fri, 26 Apr 2024 10:15:19 +0800
Subject: [PATCH 067/302] =?UTF-8?q?=E4=BF=AE=E5=A4=8Ddlrm=E5=8F=82?=
 =?UTF-8?q?=E6=95=B0=E5=90=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/dlrm/model/main_mxrec.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 05369038..b6036804 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -289,9 +289,9 @@ if __name__ == "__main__":
         feature_spec_list_eval = create_feature_spec_list(use_timestamp=False)
 
     train_batch, train_iterator = make_batch_and_iterator(cfg, feature_spec_list_train, is_training=True,
-                                                          dump_graph=True, use_faae=use_faae)
+                                                          dump_graph=True, is_use_faae=use_faae)
     eval_batch, eval_iterator = make_batch_and_iterator(cfg, feature_spec_list_eval, is_training=False,
-                                                        dump_graph=False, use_faae=use_faae)
+                                                        dump_graph=False, is_use_faae=use_faae)
     logger.info(f"train_batch: {train_batch}")
 
     if use_faae:
-- 
Gitee


From 407cb4adf00a42ee91962f432b5967cbf2991dd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Fri, 26 Apr 2024 09:36:25 +0000
Subject: [PATCH 068/302] =?UTF-8?q?!106=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E8=A7=A3=E5=86=B3?=
 =?UTF-8?q?=E5=85=A8=E5=B1=80unique=E5=AF=BC=E8=87=B4=E9=9D=99=E6=80=81sha?=
 =?UTF-8?q?pe=E6=80=A7=E8=83=BD=E4=B8=8B=E9=99=8D=E9=97=AE=E9=A2=98=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91=E8=A7=A3=E5=86=B3=E5=85=A8=E5=B1=80unique=E5=AF=BC?=
 =?UTF-8?q?=E8=87=B4=E9=9D=99=E6=80=81shape=E6=80=A7=E8=83=BD=E4=B8=8B?=
 =?UTF-8?q?=E9=99=8D=E9=97=AE=E9=A2=98=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E8=A7=A3=E5=86=B3?=
 =?UTF-8?q?=E5=85=A8=E5=B1=80unique=E5=AF=BC=E8=87=B4=E9=9D=99=E6=80=81sha?=
 =?UTF-8?q?pe=E6=80=A7=E8=83=BD=E4=B8=8B=E9=99=8D=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/optimizers/base.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index ed765539..696406f8 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -26,15 +26,18 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.training.optimizer import _TensorProcessor
 
+from mx_rec.constants.constants import ASCAnchorAttr
 from mx_rec.util.tf_version_adapter import npu_ops
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
+from mx_rec.util.communication.hccl_ops import get_rank_size
 
 
-def get_restore_vector_second(table_name: str) -> tf.Tensor:
+def get_restore_vector_second(table_name: str, max_lookup_vec_size: int) -> tf.Tensor:
     """
     Get restore vector which is calculated after the second all2all
     :param table_name: embedding table_name
+    :param max_lookup_vec_size: static shape
     :return: the restore vector calculated after the second all2all
     """
     channel_id = 0
@@ -43,15 +46,16 @@ def get_restore_vector_second(table_name: str) -> tf.Tensor:
     with tf.compat.v1.variable_scope(table_name, reuse=tf.compat.v1.AUTO_REUSE):
         restore_vector_second = npu_ops.gen_npu_ops.get_next(
             output_types=[tf.int32],
-            output_shapes=[[None]],
+            output_shapes=[[max_lookup_vec_size]],
             channel_name=f'{table_name}_restore_second_{channel_id}')[0]
     return restore_vector_second
 
 
-def get_unique_keys(table_name: str, is_expansion: bool) -> tf.Tensor:
+def get_unique_keys(table_name: str, max_lookup_vec_size: int, is_expansion: bool) -> tf.Tensor:
     """
     Get the global unique keys which is calculated after the second all2all
     :param table_name: embedding table_name
+    :param max_lookup_vec_size: static shape
     :param is_expansion: use dynamic expansion
     :return: the global unique keys calculated after the second all2all
     """
@@ -61,13 +65,13 @@ def get_unique_keys(table_name: str, is_expansion: bool) -> tf.Tensor:
         if is_expansion:
             unique_keys = npu_ops.gen_npu_ops.get_next(
                 output_types=[tf.int64],
-                output_shapes=[[None]],
+                output_shapes=[[max_lookup_vec_size]],
                 channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
             return unique_keys
 
         unique_keys = npu_ops.gen_npu_ops.get_next(
             output_types=[tf.int32],
-            output_shapes=[[None]],
+            output_shapes=[[max_lookup_vec_size]],
             channel_name=f'{table_name}_uniquekeys_{channel_id}')[0]
         return unique_keys
 
@@ -95,14 +99,23 @@ class CustomizedOptimizer:
         if isinstance(var, ops.Tensor):
             # 扩容模式从scope获取表名,偏移是-2
             table_name = var.op.name.split('/')[-2]
+            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance_by_name(table_name)
         else:
             table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
             table_name = table_instance.table_name
-        with tf.compat.v1.variable_scope("restore_vector_second"):
-            restore_vector_second = get_restore_vector_second(table_name)
 
-        with tf.compat.v1.variable_scope("unique_keys"):
-            unique_keys = get_unique_keys(table_name, is_expansion)
+        max_lookup_vec_size = None
+        use_static = ConfigInitializer.get_instance().use_static
+        if use_static:
+            send_count = table_instance.send_count
+            rank_size = get_rank_size()
+            max_lookup_vec_size = send_count * rank_size if send_count > 0 else None
+
+        with tf.compat.v1.variable_scope(str(ASCAnchorAttr.RESTORE_VECTOR_SECOND)):
+            restore_vector_second = get_restore_vector_second(table_name, max_lookup_vec_size)
+
+        with tf.compat.v1.variable_scope(str(ASCAnchorAttr.UNIQUE_KEYS)):
+            unique_keys = get_unique_keys(table_name, max_lookup_vec_size, is_expansion)
 
         unique_local_grad = tf.compat.v1.unsorted_segment_sum(grad,
                                                               restore_vector_second,
-- 
Gitee


From b1521fa72b3847a6e3256b1d71f824657e0b34d3 Mon Sep 17 00:00:00 2001
From: chenhangcal <1764252734@qq.com>
Date: Mon, 29 Apr 2024 01:24:07 +0000
Subject: [PATCH 069/302] =?UTF-8?q?!98=20little-demo=E7=A1=AE=E5=AE=9A?=
 =?UTF-8?q?=E6=80=A7=E8=AE=A1=E7=AE=97loss=E7=94=A8=E4=BE=8B=20*=20add=20e?=
 =?UTF-8?q?xamples/demo/little=5Fdemo/deterministic=5Floss/loss1.=20*=20ad?=
 =?UTF-8?q?d=20examples/demo/little=5Fdemo/deterministic=5Floss/loss2.=20*?=
 =?UTF-8?q?=20update=20examples/demo/little=5Fdemo/run=5Fmode.py.=20*=20up?=
 =?UTF-8?q?date=20examples/demo/little=5Fdemo/config.py.=20*=20update=20ex?=
 =?UTF-8?q?amples/demo/little=5Fdemo/main.py.=20*=20update=20examples/demo?=
 =?UTF-8?q?/little=5Fdemo/run=5Fmode.py.=20*=20update=20examples/demo/litt?=
 =?UTF-8?q?le=5Fdemo/config.py.=20*=20update=20examples/demo/little=5Fdemo?=
 =?UTF-8?q?/main.py.=20*=20update=20examples/demo/little=5Fdemo/run=5Fdete?=
 =?UTF-8?q?rministic.sh.=20*=20rename=20*=20rename=20*=20update=20examples?=
 =?UTF-8?q?/demo/little=5Fdemo/run=5Fdeterministic.sh.=20*=20update=20exam?=
 =?UTF-8?q?ples/demo/little=5Fdemo/run=5Fdeterministic.sh.=20*=20update=20?=
 =?UTF-8?q?examples/demo/little=5Fdemo/run=5Fdeterministic.sh.=20*=20add?=
 =?UTF-8?q?=20examples/demo/little=5Fdemo/deterministic=5Floss/Ascend910B3?=
 =?UTF-8?q?.=20*=20add=20examples/demo/little=5Fdemo/deterministic=5Floss/?=
 =?UTF-8?q?Ascend910B.=20*=20add=20examples/demo/little=5Fdemo/run=5Fdeter?=
 =?UTF-8?q?ministic.sh.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo/config.py           |   8 +-
 .../demo/little_demo/deterministic_loss/loss  | 200 ++++++++++++++++++
 .../demo/little_demo/deterministic_loss/loss1 | 200 ++++++++++++++++++
 .../demo/little_demo/deterministic_loss/loss2 | 200 ++++++++++++++++++
 .../demo/little_demo/deterministic_loss/loss3 | 200 ++++++++++++++++++
 examples/demo/little_demo/main.py             |  18 +-
 .../demo/little_demo/run_deterministic.sh     |  45 ++++
 examples/demo/little_demo/run_mode.py         |   8 +-
 8 files changed, 870 insertions(+), 9 deletions(-)
 create mode 100644 examples/demo/little_demo/deterministic_loss/loss
 create mode 100644 examples/demo/little_demo/deterministic_loss/loss1
 create mode 100644 examples/demo/little_demo/deterministic_loss/loss2
 create mode 100644 examples/demo/little_demo/deterministic_loss/loss3
 create mode 100644 examples/demo/little_demo/run_deterministic.sh

diff --git a/examples/demo/little_demo/config.py b/examples/demo/little_demo/config.py
index 2cc48216..a0912ac5 100644
--- a/examples/demo/little_demo/config.py
+++ b/examples/demo/little_demo/config.py
@@ -95,7 +95,7 @@ class Config:
         self.learning_rate = 0.01
 
 
-def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2"):
+def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2", use_deterministic=0):
     session_config = tf.compat.v1.ConfigProto(allow_soft_placement=False,
                                               log_device_placement=False)
 
@@ -108,7 +108,11 @@ def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2"):
     custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:pairwise;level1:pairwise")
     custom_op.parameter_map["enable_data_pre_proc"].b = True
     custom_op.parameter_map["iterations_per_loop"].i = 1
-    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    if use_deterministic:
+        custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("must_keep_origin_dtype")
+        custom_op.parameter_map["deterministic"].i = 1
+    else:
+        custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
     custom_op.parameter_map["hcom_parallel"].b = False
     custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes("op_impl_mode.ini")
     custom_op.parameter_map["op_execute_timeout"].i = 2000
diff --git a/examples/demo/little_demo/deterministic_loss/loss b/examples/demo/little_demo/deterministic_loss/loss
new file mode 100644
index 00000000..3bd00f80
--- /dev/null
+++ b/examples/demo/little_demo/deterministic_loss/loss
@@ -0,0 +1,200 @@
+0.6931473016738892
+0.6930400133132935
+0.6931400895118713
+0.69315505027771
+0.6931849122047424
+0.6931070685386658
+0.6931337714195251
+0.6931014657020569
+0.6931450963020325
+0.6931362152099609
+0.6930745244026184
+0.6931930184364319
+0.693183958530426
+0.6931136846542358
+0.6932246088981628
+0.69315105676651
+0.6931785941123962
+0.6931335926055908
+0.6931543946266174
+0.6931360960006714
+0.6931753158569336
+0.6931651830673218
+0.6931512951850891
+0.6931533217430115
+0.6931378841400146
+0.6931486129760742
+0.6931435465812683
+0.6931432485580444
+0.6930928230285645
+0.6931749582290649
+0.693172037601471
+0.6931487917900085
+0.6931713819503784
+0.6931683421134949
+0.6931532621383667
+0.6931494474411011
+0.6932084560394287
+0.6930452585220337
+0.6931130886077881
+0.6932073831558228
+0.6931206583976746
+0.6931828856468201
+0.6931034922599792
+0.6931605935096741
+0.6931373476982117
+0.6931723952293396
+0.6931106448173523
+0.6931154131889343
+0.6931938529014587
+0.6932826638221741
+0.6932423114776611
+0.6931906342506409
+0.6931505799293518
+0.6931438446044922
+0.6931610107421875
+0.6931508779525757
+0.6931482553482056
+0.693139910697937
+0.693148136138916
+0.6931435465812683
+0.6930944323539734
+0.693130373954773
+0.6931836009025574
+0.6930789947509766
+0.6932032108306885
+0.693130373954773
+0.6933913230895996
+0.6931992173194885
+0.6931376457214355
+0.6931767463684082
+0.6931583881378174
+0.6931485533714294
+0.693138837814331
+0.6931250095367432
+0.693103015422821
+0.6931023597717285
+0.6932260990142822
+0.6931752562522888
+0.6930729150772095
+0.6929311156272888
+0.693302571773529
+0.6932254433631897
+0.69317626953125
+0.693097710609436
+0.6930376291275024
+0.6931532621383667
+0.6931279301643372
+0.6931777596473694
+0.6931577324867249
+0.6931435465812683
+0.6931730508804321
+0.693141520023346
+0.6931696534156799
+0.6931543350219727
+0.6931476593017578
+0.6931471824645996
+0.6931589245796204
+0.693145751953125
+0.6931431293487549
+0.6931287050247192
+0.6931427717208862
+0.6931363344192505
+0.6931345462799072
+0.6931136250495911
+0.6930984258651733
+0.6931260228157043
+0.6932109594345093
+0.6931638121604919
+0.6931529641151428
+0.6931443214416504
+0.6931478381156921
+0.6931700110435486
+0.69312983751297
+0.6932106614112854
+0.6930972933769226
+0.6931629776954651
+0.6931963562965393
+0.6932249665260315
+0.6932281851768494
+0.6932195425033569
+0.6931582093238831
+0.6931502819061279
+0.693153440952301
+0.6930547952651978
+0.6932091116905212
+0.6930832862854004
+0.69318687915802
+0.693234384059906
+0.6931787133216858
+0.6931472420692444
+0.6931833624839783
+0.6931379437446594
+0.6931558847427368
+0.693196713924408
+0.6931143999099731
+0.693136990070343
+0.6931957602500916
+0.6931578516960144
+0.6931463479995728
+0.6931509375572205
+0.6931226253509521
+0.6931785941123962
+0.6931405663490295
+0.6931736469268799
+0.6931595206260681
+0.6931319236755371
+0.6931323409080505
+0.6931301355361938
+0.6931783556938171
+0.6931540966033936
+0.6930714249610901
+0.693152904510498
+0.6931881904602051
+0.6931595206260681
+0.6931363940238953
+0.6931393146514893
+0.6931549310684204
+0.6931518316268921
+0.6931600570678711
+0.6931359767913818
+0.693086564540863
+0.6930826306343079
+0.693168044090271
+0.6931942105293274
+0.6932410001754761
+0.693097710609436
+0.693099856376648
+0.69315505027771
+0.693153977394104
+0.6931472420692444
+0.6931328177452087
+0.6931746602058411
+0.6931381821632385
+0.6931582689285278
+0.6933059692382812
+0.6930915117263794
+0.6931243538856506
+0.6934514045715332
+0.6933988928794861
+0.6932798624038696
+0.6931632161140442
+0.6931505799293518
+0.6931473016738892
+0.6931563019752502
+0.6931017637252808
+0.6932226419448853
+0.6932034492492676
+0.6931058764457703
+0.6932246088981628
+0.6930988430976868
+0.6931736469268799
+0.6931524276733398
+0.6931332945823669
+0.6931236386299133
+0.6931801438331604
+0.6931136250495911
+0.6931392550468445
+0.6931288838386536
+0.6931090950965881
+0.6931648254394531
\ No newline at end of file
diff --git a/examples/demo/little_demo/deterministic_loss/loss1 b/examples/demo/little_demo/deterministic_loss/loss1
new file mode 100644
index 00000000..cfe29fc9
--- /dev/null
+++ b/examples/demo/little_demo/deterministic_loss/loss1
@@ -0,0 +1,200 @@
+0.6931475400924683
+0.6930400133132935
+0.693139910697937
+0.6931551098823547
+0.6931850910186768
+0.6931071877479553
+0.6931338310241699
+0.6931014060974121
+0.6931450963020325
+0.69313645362854
+0.6930742263793945
+0.6931931376457214
+0.6931841373443604
+0.6931138038635254
+0.6932246685028076
+0.6931509971618652
+0.6931785941123962
+0.693133533000946
+0.6931544542312622
+0.6931360363960266
+0.6931753158569336
+0.6931651830673218
+0.6931511163711548
+0.6931532621383667
+0.6931378245353699
+0.6931488513946533
+0.6931437253952026
+0.6931431889533997
+0.693092942237854
+0.6931750178337097
+0.693172037601471
+0.6931487917900085
+0.6931712627410889
+0.6931683421134949
+0.6931533813476562
+0.6931492686271667
+0.6932083964347839
+0.6930453181266785
+0.6931129693984985
+0.6932074427604675
+0.6931206583976746
+0.6931827068328857
+0.6931033730506897
+0.6931606531143188
+0.6931372880935669
+0.69317227602005
+0.6931107044219971
+0.6931154727935791
+0.6931938529014587
+0.6932826638221741
+0.6932423710823059
+0.6931905746459961
+0.6931506395339966
+0.6931438446044922
+0.6931609511375427
+0.69315105676651
+0.6931482553482056
+0.6931400895118713
+0.6931483149528503
+0.6931435465812683
+0.6930944919586182
+0.6931304931640625
+0.6931834816932678
+0.6930789947509766
+0.6932030916213989
+0.693130373954773
+0.6933913826942444
+0.6931991577148438
+0.6931377649307251
+0.6931768655776978
+0.6931586861610413
+0.6931484341621399
+0.6931391358375549
+0.6931250691413879
+0.6931028366088867
+0.6931021213531494
+0.6932262182235718
+0.6931752562522888
+0.6930727362632751
+0.6929311156272888
+0.6933025121688843
+0.6932255625724792
+0.6931764483451843
+0.6930979490280151
+0.6930376887321472
+0.6931535005569458
+0.6931277513504028
+0.6931778788566589
+0.6931575536727905
+0.6931436657905579
+0.6931729316711426
+0.6931415796279907
+0.6931697726249695
+0.6931543946266174
+0.6931476593017578
+0.6931473016738892
+0.6931586861610413
+0.6931456923484802
+0.6931430697441101
+0.6931284070014954
+0.693142831325531
+0.6931363940238953
+0.6931345462799072
+0.6931135058403015
+0.6930984258651733
+0.6931260228157043
+0.6932108998298645
+0.6931638717651367
+0.6931529641151428
+0.6931443810462952
+0.6931477785110474
+0.6931700110435486
+0.6931299567222595
+0.6932107210159302
+0.6930974125862122
+0.6931627988815308
+0.6931964159011841
+0.6932250261306763
+0.6932283043861389
+0.6932194828987122
+0.6931582093238831
+0.6931501626968384
+0.693153440952301
+0.6930548548698425
+0.6932091116905212
+0.6930834650993347
+0.6931867599487305
+0.6932343244552612
+0.6931787133216858
+0.6931471824645996
+0.6931833028793335
+0.6931377649307251
+0.6931559443473816
+0.693196713924408
+0.6931144595146179
+0.6931368708610535
+0.6931958198547363
+0.6931577920913696
+0.6931461691856384
+0.6931511163711548
+0.6931224465370178
+0.693178653717041
+0.6931405663490295
+0.6931737661361694
+0.6931594014167786
+0.6931319236755371
+0.6931324005126953
+0.6931299567222595
+0.6931784152984619
+0.6931542754173279
+0.6930714845657349
+0.693152666091919
+0.6931881308555603
+0.6931596994400024
+0.6931365132331848
+0.6931394338607788
+0.6931548714637756
+0.6931518316268921
+0.6931599974632263
+0.6931360363960266
+0.6930868029594421
+0.6930827498435974
+0.6931679844856262
+0.6931941509246826
+0.6932410001754761
+0.693097710609436
+0.693099856376648
+0.6931549906730652
+0.6931538581848145
+0.6931471824645996
+0.693132758140564
+0.6931745409965515
+0.6931381225585938
+0.6931583881378174
+0.6933057904243469
+0.693091630935669
+0.6931243538856506
+0.6934512853622437
+0.6933985948562622
+0.6932798624038696
+0.6931629180908203
+0.6931505799293518
+0.6931473612785339
+0.6931563019752502
+0.6931016445159912
+0.6932225227355957
+0.6932035088539124
+0.693105936050415
+0.6932247877120972
+0.6930989027023315
+0.6931736469268799
+0.6931525468826294
+0.6931331753730774
+0.6931236982345581
+0.69318026304245
+0.6931138038635254
+0.6931390762329102
+0.6931287050247192
+0.6931091547012329
+0.6931648850440979
\ No newline at end of file
diff --git a/examples/demo/little_demo/deterministic_loss/loss2 b/examples/demo/little_demo/deterministic_loss/loss2
new file mode 100644
index 00000000..cfe29fc9
--- /dev/null
+++ b/examples/demo/little_demo/deterministic_loss/loss2
@@ -0,0 +1,200 @@
+0.6931475400924683
+0.6930400133132935
+0.693139910697937
+0.6931551098823547
+0.6931850910186768
+0.6931071877479553
+0.6931338310241699
+0.6931014060974121
+0.6931450963020325
+0.69313645362854
+0.6930742263793945
+0.6931931376457214
+0.6931841373443604
+0.6931138038635254
+0.6932246685028076
+0.6931509971618652
+0.6931785941123962
+0.693133533000946
+0.6931544542312622
+0.6931360363960266
+0.6931753158569336
+0.6931651830673218
+0.6931511163711548
+0.6931532621383667
+0.6931378245353699
+0.6931488513946533
+0.6931437253952026
+0.6931431889533997
+0.693092942237854
+0.6931750178337097
+0.693172037601471
+0.6931487917900085
+0.6931712627410889
+0.6931683421134949
+0.6931533813476562
+0.6931492686271667
+0.6932083964347839
+0.6930453181266785
+0.6931129693984985
+0.6932074427604675
+0.6931206583976746
+0.6931827068328857
+0.6931033730506897
+0.6931606531143188
+0.6931372880935669
+0.69317227602005
+0.6931107044219971
+0.6931154727935791
+0.6931938529014587
+0.6932826638221741
+0.6932423710823059
+0.6931905746459961
+0.6931506395339966
+0.6931438446044922
+0.6931609511375427
+0.69315105676651
+0.6931482553482056
+0.6931400895118713
+0.6931483149528503
+0.6931435465812683
+0.6930944919586182
+0.6931304931640625
+0.6931834816932678
+0.6930789947509766
+0.6932030916213989
+0.693130373954773
+0.6933913826942444
+0.6931991577148438
+0.6931377649307251
+0.6931768655776978
+0.6931586861610413
+0.6931484341621399
+0.6931391358375549
+0.6931250691413879
+0.6931028366088867
+0.6931021213531494
+0.6932262182235718
+0.6931752562522888
+0.6930727362632751
+0.6929311156272888
+0.6933025121688843
+0.6932255625724792
+0.6931764483451843
+0.6930979490280151
+0.6930376887321472
+0.6931535005569458
+0.6931277513504028
+0.6931778788566589
+0.6931575536727905
+0.6931436657905579
+0.6931729316711426
+0.6931415796279907
+0.6931697726249695
+0.6931543946266174
+0.6931476593017578
+0.6931473016738892
+0.6931586861610413
+0.6931456923484802
+0.6931430697441101
+0.6931284070014954
+0.693142831325531
+0.6931363940238953
+0.6931345462799072
+0.6931135058403015
+0.6930984258651733
+0.6931260228157043
+0.6932108998298645
+0.6931638717651367
+0.6931529641151428
+0.6931443810462952
+0.6931477785110474
+0.6931700110435486
+0.6931299567222595
+0.6932107210159302
+0.6930974125862122
+0.6931627988815308
+0.6931964159011841
+0.6932250261306763
+0.6932283043861389
+0.6932194828987122
+0.6931582093238831
+0.6931501626968384
+0.693153440952301
+0.6930548548698425
+0.6932091116905212
+0.6930834650993347
+0.6931867599487305
+0.6932343244552612
+0.6931787133216858
+0.6931471824645996
+0.6931833028793335
+0.6931377649307251
+0.6931559443473816
+0.693196713924408
+0.6931144595146179
+0.6931368708610535
+0.6931958198547363
+0.6931577920913696
+0.6931461691856384
+0.6931511163711548
+0.6931224465370178
+0.693178653717041
+0.6931405663490295
+0.6931737661361694
+0.6931594014167786
+0.6931319236755371
+0.6931324005126953
+0.6931299567222595
+0.6931784152984619
+0.6931542754173279
+0.6930714845657349
+0.693152666091919
+0.6931881308555603
+0.6931596994400024
+0.6931365132331848
+0.6931394338607788
+0.6931548714637756
+0.6931518316268921
+0.6931599974632263
+0.6931360363960266
+0.6930868029594421
+0.6930827498435974
+0.6931679844856262
+0.6931941509246826
+0.6932410001754761
+0.693097710609436
+0.693099856376648
+0.6931549906730652
+0.6931538581848145
+0.6931471824645996
+0.693132758140564
+0.6931745409965515
+0.6931381225585938
+0.6931583881378174
+0.6933057904243469
+0.693091630935669
+0.6931243538856506
+0.6934512853622437
+0.6933985948562622
+0.6932798624038696
+0.6931629180908203
+0.6931505799293518
+0.6931473612785339
+0.6931563019752502
+0.6931016445159912
+0.6932225227355957
+0.6932035088539124
+0.693105936050415
+0.6932247877120972
+0.6930989027023315
+0.6931736469268799
+0.6931525468826294
+0.6931331753730774
+0.6931236982345581
+0.69318026304245
+0.6931138038635254
+0.6931390762329102
+0.6931287050247192
+0.6931091547012329
+0.6931648850440979
\ No newline at end of file
diff --git a/examples/demo/little_demo/deterministic_loss/loss3 b/examples/demo/little_demo/deterministic_loss/loss3
new file mode 100644
index 00000000..a38ce81c
--- /dev/null
+++ b/examples/demo/little_demo/deterministic_loss/loss3
@@ -0,0 +1,200 @@
+0.6931473016738892
+0.6930400729179382
+0.6931402087211609
+0.69315505027771
+0.6931849122047424
+0.6931070685386658
+0.6931337118148804
+0.6931014060974121
+0.693144679069519
+0.6931362748146057
+0.6930745840072632
+0.6931930780410767
+0.6931840777397156
+0.6931135654449463
+0.6932245492935181
+0.6931509375572205
+0.6931784152984619
+0.6931337714195251
+0.693154513835907
+0.6931360363960266
+0.6931752562522888
+0.6931653022766113
+0.6931512355804443
+0.6931530833244324
+0.6931378841400146
+0.6931486129760742
+0.6931437253952026
+0.6931434273719788
+0.6930927634239197
+0.6931749582290649
+0.6931719779968262
+0.6931490302085876
+0.6931714415550232
+0.6931683421134949
+0.6931533217430115
+0.6931492686271667
+0.6932084560394287
+0.6930454969406128
+0.6931130290031433
+0.6932073831558228
+0.6931207776069641
+0.6931827664375305
+0.693103551864624
+0.6931607127189636
+0.6931374669075012
+0.69317227602005
+0.6931108832359314
+0.6931152939796448
+0.6931939125061035
+0.6932826638221741
+0.6932422518730164
+0.6931905746459961
+0.693150520324707
+0.6931438446044922
+0.693160891532898
+0.6931508779525757
+0.693148136138916
+0.6931400299072266
+0.6931481957435608
+0.6931434869766235
+0.6930946111679077
+0.6931304335594177
+0.693183422088623
+0.6930789947509766
+0.6932030320167542
+0.6931302547454834
+0.6933913826942444
+0.6931991577148438
+0.6931378841400146
+0.6931770443916321
+0.6931586265563965
+0.6931484937667847
+0.6931388974189758
+0.6931250095367432
+0.693103015422821
+0.6931023001670837
+0.6932262182235718
+0.6931753754615784
+0.6930726170539856
+0.6929312944412231
+0.693302571773529
+0.6932252645492554
+0.6931763291358948
+0.693097710609436
+0.6930376291275024
+0.6931533217430115
+0.6931278705596924
+0.6931778788566589
+0.6931576728820801
+0.6931436657905579
+0.6931729912757874
+0.6931415796279907
+0.6931697130203247
+0.6931542754173279
+0.6931475400924683
+0.6931473016738892
+0.6931589245796204
+0.6931456923484802
+0.6931431293487549
+0.6931285858154297
+0.693142831325531
+0.6931363344192505
+0.6931344270706177
+0.6931136250495911
+0.6930983066558838
+0.6931259632110596
+0.693211019039154
+0.6931636929512024
+0.6931530237197876
+0.6931443214416504
+0.6931476593017578
+0.6931700706481934
+0.69312983751297
+0.6932106614112854
+0.6930974125862122
+0.6931630373001099
+0.6931962370872498
+0.6932251453399658
+0.6932281851768494
+0.6932194828987122
+0.6931582093238831
+0.6931502819061279
+0.693153440952301
+0.6930545568466187
+0.693209171295166
+0.6930832862854004
+0.6931869387626648
+0.6932346224784851
+0.693178653717041
+0.6931472420692444
+0.6931833624839783
+0.6931377649307251
+0.6931559443473816
+0.6931968331336975
+0.6931143999099731
+0.6931371092796326
+0.6931959390640259
+0.6931577324867249
+0.6931463479995728
+0.6931511759757996
+0.6931225061416626
+0.6931787133216858
+0.6931406259536743
+0.6931735873222351
+0.6931595206260681
+0.6931317448616028
+0.6931322813034058
+0.6931302547454834
+0.6931782960891724
+0.6931543946266174
+0.6930716037750244
+0.6931528449058533
+0.6931881904602051
+0.6931595802307129
+0.6931363940238953
+0.6931394934654236
+0.6931549906730652
+0.6931518912315369
+0.6931601762771606
+0.6931359767913818
+0.6930863261222839
+0.6930826902389526
+0.693168044090271
+0.6931941509246826
+0.6932411193847656
+0.6930977702140808
+0.6930997967720032
+0.6931549906730652
+0.6931539177894592
+0.6931472420692444
+0.6931326985359192
+0.6931745409965515
+0.6931379437446594
+0.6931582689285278
+0.6933060884475708
+0.693091630935669
+0.6931243538856506
+0.6934512853622437
+0.693398654460907
+0.6932798624038696
+0.6931633353233337
+0.6931505799293518
+0.6931473612785339
+0.6931564211845398
+0.693101704120636
+0.6932228207588196
+0.6932032704353333
+0.6931060552597046
+0.6932246685028076
+0.6930988430976868
+0.6931737065315247
+0.6931525468826294
+0.6931332945823669
+0.6931236386299133
+0.6931802034378052
+0.6931136846542358
+0.6931393146514893
+0.6931288242340088
+0.6931090950965881
+0.6931647658348083
\ No newline at end of file
diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index 5d5e151e..14b2e065 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -21,6 +21,7 @@ import shutil
 import warnings
 from glob import glob
 
+import numpy as np
 import tensorflow as tf
 
 from mx_rec.constants.constants import ASCEND_TIMESTAMP
@@ -192,9 +193,10 @@ if __name__ == "__main__":
         MODIFY_GRAPH_FLAG = bool(int(os.getenv("USE_MODIFY_GRAPH", 0)))
         USE_TIMESTAMP = bool(int(os.getenv("USE_TIMESTAMP", 0)))
         USE_ONE_SHOT = bool(int(os.getenv("USE_ONE_SHOT", 0)))
+        USE_DETERMINISTIC = bool(int(os.getenv("USE_DETERMINISTIC", 0)))
     except ValueError as err:
         raise ValueError("please correctly config USE_MPI or USE_DYNAMIC or USE_DYNAMIC_EXPANSION or "
-                         "USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT "
+                         "USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT or USE_DETERMINISTIC"
                          "only 0 or 1 is supported.") from err
 
     try:
@@ -202,6 +204,10 @@ if __name__ == "__main__":
     except ValueError as err:
         raise ValueError("please correctly config MULTI_LOOKUP_TIMES only int is supported.") from err
 
+    if USE_DETERMINISTIC:
+        np.random.seed(128)
+        tf.random.set_random_seed(128)
+
     if_load = False
     save_path = "./saved-model"
     model_file = []
@@ -261,11 +267,12 @@ if __name__ == "__main__":
         raise ValueError(f"cache mode must in {list(cache_mode_dict.keys())}, get:{cache_mode}")
     if cache_mode in ["DDR", "SSD"] and not use_dynamic:
         logger.warning("when cache_mode in [DDR, SSD], suggest use_dynamic=true to avoid tuning size parameter")
-
+    emb_initializer = tf.compat.v1.constant_initializer(0) if USE_DETERMINISTIC \
+                      else tf.compat.v1.truncated_normal_initializer()
     user_hashtable = create_table(key_dtype=tf.int64,
                                   dim=tf.TensorShape([cfg.user_hashtable_dim]),
                                   name='user_table',
-                                  emb_initializer=tf.compat.v1.truncated_normal_initializer(),
+                                  emb_initializer=emb_initializer,
                                   optimizer_list=sparse_optimizer_list,
                                   all2all_gradients_op="sum_gradients_and_div_by_ranksize",
                                   **cache_mode_dict[cache_mode])
@@ -273,7 +280,7 @@ if __name__ == "__main__":
     item_hashtable = create_table(key_dtype=tf.int64,
                                   dim=tf.TensorShape([cfg.item_hashtable_dim]),
                                   name='item_table',
-                                  emb_initializer=tf.compat.v1.truncated_normal_initializer(),
+                                  emb_initializer=emb_initializer,
                                   optimizer_list=sparse_optimizer_list,
                                   **cache_mode_dict[cache_mode])
 
@@ -293,7 +300,8 @@ if __name__ == "__main__":
                                                         batch_number=MAX_DATASET_GENERATE * get_rank_size())
     dense_variables, sparse_variables = get_dense_and_sparse_variable()
 
-    params = {"train_batch": train_batch, "eval_batch": eval_batch, "use_one_shot": USE_ONE_SHOT}
+    params = {"train_batch": train_batch, "eval_batch": eval_batch, "use_one_shot": USE_ONE_SHOT, 
+              "use_deterministic": USE_DETERMINISTIC}
     run_mode = RunMode(
         MODIFY_GRAPH_FLAG, USE_TIMESTAMP, table_list, optimizer_list, train_model, eval_model, train_iterator,
         eval_iterator, MAX_TRAIN_STEPS, EVAL_STEPS, params
diff --git a/examples/demo/little_demo/run_deterministic.sh b/examples/demo/little_demo/run_deterministic.sh
new file mode 100644
index 00000000..fbb4342d
--- /dev/null
+++ b/examples/demo/little_demo/run_deterministic.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+export USE_DETERMINISTIC=1
+
+sh run.sh main.py | tee log
+
+grep -rn "loss" log | grep "1,0" | awk '{print $NF}'> loss
+
+rm -f log
+
+soc_name=`python3 -c 'import acl;print(acl.get_soc_name())'`
+echo "soc_name: $soc_name"
+
+loss_file=deterministic_loss/loss${soc_name:10:1}
+
+if [ ! -e $loss_file ];then
+    echo "$loss_file file does not exist"
+    rm -f loss
+    exit
+fi
+
+
+diff $loss_file loss
+
+if [ $? -eq 0 ]; then
+  echo "deterministic loss check passed"
+else
+  echo "deterministic loss check failed"
+fi
+
+rm -f loss
diff --git a/examples/demo/little_demo/run_mode.py b/examples/demo/little_demo/run_mode.py
index 6a3301c4..f164322a 100644
--- a/examples/demo/little_demo/run_mode.py
+++ b/examples/demo/little_demo/run_mode.py
@@ -44,7 +44,9 @@ class RunMode:
             eval_model, train_iterator, eval_iterator, max_train_steps: int, infer_steps: int, params: dict):
         self.is_modify_graph = is_modify_graph
         self.is_faae = is_faae
-        self.session = tf.compat.v1.Session(config=sess_config(dump_data=False))
+        self.use_deterministic = params.get("use_deterministic")
+        self.session = tf.compat.v1.Session(
+            config=sess_config(dump_data=False, use_deterministic=self.use_deterministic))
         self.train_model = train_model
         self.train_iterator = train_iterator
         self.eval_model = eval_model
@@ -138,7 +140,9 @@ class RunMode:
         for i in range(start_step, start_step + self.max_train_steps):
             logger.info("################    training at step %d    ################", i)
             try:
-                self.session.run([self.train_ops, self.train_model.loss_list])
+                _, loss = self.session.run([self.train_ops, self.train_model.loss_list])
+                if self.use_deterministic:
+                    logger.info(f"train_loss: {loss[0]}")
             except tf.errors.OutOfRangeError:
                 logger.info("Encounter the end of Sequence for training.")
                 break
-- 
Gitee


From d566330b9910877827beabbc87ad10b436986a1a Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Mon, 29 Apr 2024 06:08:04 +0000
Subject: [PATCH 070/302] !97 cleancode * Merge branch 'develop' of
 gitee.com:ascend/mxrec into develop_cleancode * Merge branch 'develop' of
 gitee.com:ascend/mxrec into develop_cleancode * cleancode * cleancode *
 cleancode * cleancode * cleancode * cleancode * cleancode * cleancode *
 cleancode * cleancode * cleancode * cleancode * Merge branch 'develop' of
 gitee.com:ascend/mxrec into develop_cleancode * cleancode * cleancode *
 cleancode * cleancode * cleancode * cleancode * cleancode * cleancode *
 cleancode * cleancode * cleancode

---
 .../aclnn_op_test/inc/op_runner.h             |   8 +-
 .../op_host/embedding_lookup_by_address.cpp   |  26 ++-
 .../op_host/embedding_update_by_address.cpp   |  23 ++-
 .../op_kernel/embedding_lookup_by_address.cpp | 150 ++++++++-------
 .../op_kernel/embedding_update_by_address.cpp | 172 ++++++++----------
 examples/DCNv2/main_mxrec.py                  |   8 +-
 .../little_demo_estimator/nn_model_build.py   |  11 +-
 .../little_demo_estimator/nn_model_input.py   |   2 +-
 .../demo/little_demo_estimator/nn_optim.py    |   4 +-
 examples/dlrm/criteo_tb/gen_ttf.py            |  23 ++-
 examples/dlrm/model/main_mxrec.py             |  14 +-
 examples/dlrm/model/mean_auc.py               |   4 +-
 mx_rec/__init__.py                            |   2 +-
 mx_rec/core/asc/merge_table.py                |   8 +-
 mx_rec/util/cpu.py                            |   1 -
 src/AccCTR/src/unique/unique_func.cpp         |   7 +-
 src/AccCTR/src/unique/unique_func.h           |  34 ++--
 .../local_file_system/local_file_system.h     |   2 -
 src/core/hd_transfer/hd_transfer.cpp          |   4 +-
 src/core/hd_transfer/hd_transfer.h            |   4 +-
 .../random_normal_initializer.cpp             |   7 +-
 .../random_normal_initializer.h               |   1 +
 .../truncated_normal_initializer.cpp          |   4 +-
 src/core/utils/common.h                       |  22 +--
 src/dataset_tf/eos_dataset_op.cc              |  79 +++++---
 25 files changed, 318 insertions(+), 302 deletions(-)

diff --git a/cust_op/cust_op_by_addr/aclnn_op_test/inc/op_runner.h b/cust_op/cust_op_by_addr/aclnn_op_test/inc/op_runner.h
index bf923d7e..e41e3596 100644
--- a/cust_op/cust_op_by_addr/aclnn_op_test/inc/op_runner.h
+++ b/cust_op/cust_op_by_addr/aclnn_op_test/inc/op_runner.h
@@ -140,16 +140,16 @@ public:
      /**
       * @brief Print readable input by index
       * @param [in] index: input index
-      * @param [in] elementsPerRow: number of elements per row
+      * @param [in] numElementsPerRow: number of elements per row
       */
-    void PrintInput(size_t index, size_t elementsPerRow = 16);
+    void PrintInput(size_t index, size_t numElementsPerRow = 16);
 
     /**
       * @brief Print readable output by index
       * @param [in] index: output index
-      * @param [in] elementsPerRow: number of elements per row
+      * @param [in] numElementsPerRow: number of elements per row
       */
-    void PrintOutput(size_t index, size_t elementsPerRow = 16);
+    void PrintOutput(size_t index, size_t numElementsPerRow = 16);
 
     /**
      * @brief Compile static op
diff --git a/cust_op/cust_op_by_addr/op_host/embedding_lookup_by_address.cpp b/cust_op/cust_op_by_addr/op_host/embedding_lookup_by_address.cpp
index 45681773..41a5b33a 100644
--- a/cust_op/cust_op_by_addr/op_host/embedding_lookup_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_host/embedding_lookup_by_address.cpp
@@ -16,6 +16,12 @@ See the License for the specific language governing permissions and
 #include "embedding_lookup_by_address_tiling.h"
 #include "register/op_def_registry.h"
 
+namespace {
+    constexpr int32_t EMBEDDING_TYPE_FLOAT16 = 2;
+    constexpr int32_t EMBEDDING_TYPE_INT32 = 0;
+    constexpr int32_t EMBEDDING_TYPE_FLOAT32 = 1;
+}
+
 namespace optiling
 {
     constexpr int32_t BLOCK_DIM = 48;  // 910b一张卡48个vector核
@@ -81,7 +87,7 @@ namespace optiling
         int32_t inputShape = inputTensor->GetShapeSize();
 
         int32_t typeSize = SIZE_OF_FLOAT_OR_INT;
-        if (embeddingType == 2) {
+        if (embeddingType == EMBEDDING_TYPE_FLOAT16) {
             typeSize = SIZE_OF_HALF;
         }
         // shape需要对齐到的最小单位, MIN_BLOCK_SIZE=32
@@ -92,7 +98,8 @@ namespace optiling
         int32_t occupyAddressBytesNum =
                 sizeof(int64_t) + typeSize * embeddingDimAligned * PING_PONG_NUM * 2;
         // 一轮计算中最多计算多少个addr，由于地址也要搬到ub，所以需要对齐32,
-        int32_t addrPerLoop = (UB_LIMIT / occupyAddressBytesNum) & (~3); //  & (~3)，保证地址数是4的倍数
+        int32_t addrPerLoop = static_cast<int32_t>((UB_LIMIT /
+                static_cast<uint32_t>(occupyAddressBytesNum)) & (~3u)); //  & (~3u)，保证地址数是4的倍数
         if (addrPerLoop <= 0) {
             return ge::GRAPH_FAILED;
         }
@@ -116,6 +123,7 @@ namespace optiling
 
 namespace ge
 {
+    constexpr int OUTPUT_DIMENSION = 2;
     static ge::graphStatus InferShape1(gert::InferShapeContext *context)
     {
 
@@ -140,8 +148,12 @@ namespace ge
 
         int64_t updateDim = *attr0Value;
 
-        int64_t inputShape = context->GetInputTensor(0)->GetShapeSize();
-        yShape->SetDimNum(2);
+        auto *inputTensor2 = context->GetInputTensor(0);
+        if (optiling::CheckNullPointer(inputTensor2, "inputTensor2") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+        int64_t inputShape = inputTensor2->GetShapeSize();
+        yShape->SetDimNum(OUTPUT_DIMENSION);
         yShape->SetDim(0, inputShape);
         yShape->SetDim(1, updateDim);
         return GRAPH_SUCCESS;
@@ -165,15 +177,15 @@ namespace ge
         }
 
         embbedingType = *attr1Value;
-        if (embbedingType == 0)
+        if (embbedingType == EMBEDDING_TYPE_INT32)
         {
             context->SetOutputDataType(0, ge::DataType(DT_INT32));
         }
-        else if (embbedingType == 1)
+        else if (embbedingType == EMBEDDING_TYPE_FLOAT32)
         {
             context->SetOutputDataType(0, ge::DataType(DT_FLOAT));
         }
-        else if (embbedingType == 2)
+        else if (embbedingType == EMBEDDING_TYPE_FLOAT16)
         {
 
             context->SetOutputDataType(0, ge::DataType(DT_FLOAT16));
diff --git a/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
index 5c45e2ab..d0e4b778 100644
--- a/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
@@ -16,13 +16,19 @@ See the License for the specific language governing permissions and
 #include "embedding_update_by_address_tiling.h"
 #include "register/op_def_registry.h"
 
+namespace {
+    constexpr int32_t EMBEDDING_TYPE_FLOAT16 = 2;
+    constexpr int32_t EMBEDDING_TYPE_INT32 = 0;
+    constexpr int32_t EMBEDDING_TYPE_FLOAT32 = 1;
+}
+
 namespace optiling
 {
     constexpr int32_t BLOCK_DIM = 48;  // 910b一张卡48个vector核
     constexpr int32_t SIZE_OF_HALF = 2;
     constexpr int32_t SIZE_OF_FLOAT_OR_INT = 4;
     constexpr int32_t MIN_BLOCK_SIZE = 32; // ub空间的数据都要按照32对齐
-    constexpr int32_t UB_LIMIT = 175 * 1024;
+    constexpr uint32_t UB_LIMIT = 175 * 1024;
     constexpr int32_t USR_SIZE = 256;
     constexpr int32_t SYS_WORKSPACE_SIZE = 16 * 1024 * 1024;
     constexpr int32_t PING_PONG_NUM = 1;
@@ -67,7 +73,7 @@ namespace optiling
             return ge::GRAPH_FAILED;
         }
 
-        int32_t inputShape = inputTensor->GetShapeSize();
+        int64_t inputShape = static_cast<int64_t>(inputTensor->GetShapeSize());
         if (CheckPositiveInt(inputShape, "inputShape") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
         }
@@ -78,7 +84,7 @@ namespace optiling
         }
 
         const int32_t inputShapeTmp = (inputShape > 0) ? inputShape : 1;
-        int32_t inputDim = inputTensor1->GetShapeSize() / inputShapeTmp;
+        int64_t inputDim = static_cast<int64_t>(inputTensor1->GetShapeSize() / inputShapeTmp);
         if (CheckPositiveInt(inputDim, "inputDim") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
         }
@@ -97,15 +103,15 @@ namespace optiling
         ge::DataType inputDatatype = inputTensor1->GetDataType();
         int32_t embeddingType;
         if (inputDatatype == ge::DT_FLOAT16) {
-            embeddingType = 2;
+            embeddingType = EMBEDDING_TYPE_FLOAT16;
         } else if (inputDatatype == ge::DT_INT32) {
-            embeddingType = 0;
+            embeddingType = EMBEDDING_TYPE_INT32;
         } else {
-            embeddingType = 1;
+            embeddingType = EMBEDDING_TYPE_FLOAT32;
         }
 
         int32_t typeSize = SIZE_OF_FLOAT_OR_INT;
-        if (embeddingType == 2) {
+        if (embeddingType == EMBEDDING_TYPE_FLOAT16) {
             typeSize = SIZE_OF_HALF;
         }
         int32_t alignNum = MIN_BLOCK_SIZE / typeSize;
@@ -116,7 +122,8 @@ namespace optiling
         int32_t occupyAddressBytesNum =
                 sizeof(int64_t) + typeSize * inputDimAligned * PING_PONG_NUM * 2;
         // 一轮计算中最多计算多少个addr，由于地址也要搬到ub，所以需要对齐32
-        int32_t addrPerLoop = (UB_LIMIT / occupyAddressBytesNum) & (~3); // & (~3)，保证地址数是4的倍数
+        int32_t addrPerLoop = static_cast<int32_t>((UB_LIMIT /
+                occupyAddressBytesNum) & (~3U)); // & (~3U)，保证地址数是4的倍数
         if (CheckPositiveInt(addrPerLoop, "addrPerLoop") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
         }
diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
index 1a58768c..3fded632 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
@@ -32,7 +32,7 @@ public:
     needComputeAddrLen = singleCoreAddrLen;
     if (block_idx == block_num - 1) // 最后一个core,需要多计算的addr长度
     {
-      needComputeAddrLen = addrNums * sizeof(int64_t) - singleCoreAddrLen * (block_num - 1);
+        needComputeAddrLen = addrNums * sizeof(int64_t) - singleCoreAddrLen * (block_num - 1);
     }
     loopCount = needComputeAddrLen / (addrNumPerLoop * sizeof(int64_t)); // 可能为0
 
@@ -73,105 +73,99 @@ public:
 
     if (loopCount > 0)
     {
-      for (int32_t i = 0; i < loopCount; i++)
-      {
-        DataCopy(srcAddrLocal, srcAddrGlobal[i * addrNumPerLoop], addrNumPerLoop);
-        MoveProcess(srcAddrLocal, i, addrNumPerLoop);
-      }
+        for (int32_t i = 0; i < loopCount; i++) {
+            DataCopy(srcAddrLocal, srcAddrGlobal[i * addrNumPerLoop], addrNumPerLoop);
+            MoveProcess(srcAddrLocal, i, addrNumPerLoop);
+        }
     }
     // 处理最后一张卡剩下的addr
     int unProcess = (needComputeAddrLen / sizeof(int64_t)) % addrNumPerLoop;
     if (unProcess)
     {
-      int unProcessAligned = (unProcess + 3) & (~3); // 处理 addressList 不对齐32b的情况
-      // 地址列表访问越界，对齐考虑无问题，会自动多申请一部分，兼容
-      DataCopy(srcAddrLocal, srcAddrGlobal[loopCount * addrNumPerLoop], unProcessAligned);
-      MoveProcess(srcAddrLocal, loopCount, unProcess);
+        int unProcessAligned = static_cast<int>
+                ((static_cast<unsigned int>(unProcess) + 3) & (~3U)); // 处理 addressList 不对齐32b的情况
+        // 地址列表访问越界，对齐考虑无问题，会自动多申请一部分，兼容
+        DataCopy(srcAddrLocal, srcAddrGlobal[loopCount * addrNumPerLoop], unProcessAligned);
+        MoveProcess(srcAddrLocal, loopCount, unProcess);
     }
   }
 
 private:
-  __aicore__ inline void MoveProcess(const LocalTensor<int64_t> srcAddrLocal, const int turns, int addrNum)
-  {
-    set_flag(PIPE_MTE2, PIPE_S, 0);
-    wait_flag(PIPE_MTE2, PIPE_S, 0);
-    LocalTensor<T> dataLocal = inQueue.AllocTensor<T>(); // Queue的大小可以容下一个循环的所有emb
-    bool isFull = false;
-    int nums = 0;
-    int outIndex = 0;
-    int times = embDimAligned >> 3; // >>3位运算：除以8。 embDimAligned一定是8的倍数，若地址无效时，每次填充8个0
-    int tmpCache = cache - 1; // 设计初是一次cache执行多次copyin、一次compute和一次copyout，现状是一次loop就只对应一次cache
-
-    for (int i = 0; i < addrNum; i++)
+    __aicore__ inline void MoveProcess(const LocalTensor<int64_t> srcAddrLocal, const int turns, int addrNum)
     {
-      // 多次copyIn， 对应一次compute和copyOut，由cache决定
-      dataLocal = isFull ? inQueue.AllocTensor<T>() : dataLocal;
-      int64_t address = srcAddrLocal.GetValue(i);
-
-      if (address != 0)
-      {
-        srcDataBufferGm.SetGlobalBuffer((__gm__ T *)(address), embDimAligned);
-        DataCopy(dataLocal[embDimAligned * nums], srcDataBufferGm, embDimAligned);
-      }
-      else
-      {
-        for (int j = 0; j < times; j++)
+        set_flag(PIPE_MTE2, PIPE_S, 0);
+        wait_flag(PIPE_MTE2, PIPE_S, 0);
+        LocalTensor<T> dataLocal = inQueue.AllocTensor<T>(); // Queue的大小可以容下一个循环的所有emb
+        bool isFull = false;
+        int nums = 0;
+        int outIndex = 0;
+        int times = embDimAligned >> 3; // >>3位运算：除以8。 embDimAligned一定是8的倍数，若地址无效时，每次填充8个0
+        int tmpCache = cache - 1; // 设计初是一次cache执行多次copyin、一次compute和一次copyout，现状是一次loop就只对应一次cache
+
+        for (int i = 0; i < addrNum; i++)
         {
-          Duplicate(dataLocal[embDimAligned * nums + j * PADDING_ZERO_NUM_PER_TIME], (T)0, PADDING_ZERO_NUM_PER_TIME);
+            // 多次copyIn， 对应一次compute和copyOut，由cache决定
+            dataLocal = isFull ? inQueue.AllocTensor<T>() : dataLocal;
+            int64_t address = srcAddrLocal.GetValue(i);
+
+            if (address != 0) {
+                srcDataBufferGm.SetGlobalBuffer((__gm__ T *)(address), embDimAligned);
+                DataCopy(dataLocal[embDimAligned * nums], srcDataBufferGm, embDimAligned);
+            } else {
+                for (int j = 0; j < times; j++) {
+                    Duplicate(dataLocal[embDimAligned * nums + j * PADDING_ZERO_NUM_PER_TIME],
+                              (T)0, PADDING_ZERO_NUM_PER_TIME);
+                }
+            }
+
+            nums++;
+            isFull = (i == tmpCache || i == addrNum - 1); // cache满了，或者最后一个地址
+            if (isFull) {
+                inQueue.EnQue(dataLocal);
+                Compute(nums);
+                CopyOut(outIndex, turns, nums);
+                nums = 0;
+                outIndex = i + 1;
+                tmpCache += cache;
+            }
         }
-      }
-
-      nums++;
-      isFull = (i == tmpCache || i == addrNum - 1); // cache满了，或者最后一个地址
-      if (isFull)
-      {
-          inQueue.EnQue(dataLocal);
-          Compute(nums);
-          CopyOut(outIndex, turns, nums);
-          nums = 0;
-          outIndex = i + 1;
-          tmpCache += cache;
-      }
     }
-  }
 
-  __aicore__ inline void Compute(const int nums)
-  {
-    // deque input tensors from VECIN queue
-    LocalTensor<T> srcLocal = inQueue.DeQue<T>();
-    LocalTensor<T> dstLocal = outQueue.AllocTensor<T>();
+    __aicore__ inline void Compute(const int nums)
+    {
+        // deque input tensors from VECIN queue
+        LocalTensor<T> srcLocal = inQueue.DeQue<T>();
+        LocalTensor<T> dstLocal = outQueue.AllocTensor<T>();
 
-    DataCopyParams copyParams;
-    copyParams.blockCount = 1;
-    copyParams.blockLen = (embDimAligned * sizeof(T) * nums) >> 5; // >> 5， 除以32，ub空间对齐
-    DataCopy(dstLocal, srcLocal, copyParams);
+        DataCopyParams copyParams;
+        copyParams.blockCount = 1;
+        copyParams.blockLen = (embDimAligned * sizeof(T) * nums) >> 5; // >> 5， 除以32，ub空间对齐
+        DataCopy(dstLocal, srcLocal, copyParams);
 
-    outQueue.EnQue<T>(dstLocal);
-    inQueue.FreeTensor(srcLocal);
-  }
+        outQueue.EnQue<T>(dstLocal);
+        inQueue.FreeTensor(srcLocal);
+    }
 
-  __aicore__ inline void CopyOut(const int index, const int turns, const int nums)
-  {
-    LocalTensor<T> dstLocal = outQueue.DeQue<T>();
+    __aicore__ inline void CopyOut(const int index, const int turns, const int nums)
+    {
+        LocalTensor<T> dstLocal = outQueue.DeQue<T>();
 
-    int offset = block_idx * dim * singleCoreAddrLen / sizeof(int64_t) + (turns * addrNumPerLoop * dim) + dim * index;
+        int offset = block_idx * dim * singleCoreAddrLen /
+                sizeof(int64_t) + (turns * addrNumPerLoop * dim) + dim * index;
 #if defined(__DAV_C220_VEC__)
-    if (typeSize == SIZE_OF_FLOAT_OR_INT)
-    {
-      copy_ubuf_to_gm_align_b32((__gm__ T *)dstDataGm[offset].GetPhyAddr(), (__ubuf__ T *)dstLocal.GetPhyAddr(), 0,
-                                nums, dim * sizeof(T), 0, 0, 0, 0);
-    }
-    else if (typeSize == SIZE_OF_HALF)
-    {
-      copy_ubuf_to_gm_align_b16((__gm__ T *)dstDataGm[offset].GetPhyAddr(), (__ubuf__ T *)dstLocal.GetPhyAddr(), 0,
-                                nums, dim * sizeof(T), 0, 0, 0, 0);
-    }
+        if (typeSize == SIZE_OF_FLOAT_OR_INT) {
+            copy_ubuf_to_gm_align_b32((__gm__ T *)dstDataGm[offset].GetPhyAddr(),
+                                      (__ubuf__ T *)dstLocal.GetPhyAddr(), 0, nums, dim * sizeof(T), 0, 0, 0, 0);
+        } else if (typeSize == SIZE_OF_HALF) {
+            copy_ubuf_to_gm_align_b16((__gm__ T *)dstDataGm[offset].GetPhyAddr(),
+                                      (__ubuf__ T *)dstLocal.GetPhyAddr(), 0, nums, dim * sizeof(T), 0, 0, 0, 0);
+        }
 #else
 
-    DataCopy(dstDataGm[offset], dstLocal, embDimAligned * nums);
+        DataCopy(dstDataGm[offset], dstLocal, embDimAligned * nums);
 #endif
-    outQueue.FreeTensor(dstLocal);
-  }
+        outQueue.FreeTensor(dstLocal);
+    }
 
 public:
   int32_t addrNumPerLoop, loopCount, singleCoreAddrLen, needComputeAddrLen, veclen, dim, pingpongNum, cache;
diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
index 98847260..4a13c3eb 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
@@ -31,7 +31,7 @@ public:
     needComputeAddrLen = singleCoreAddrLen;
     if (block_idx == block_num - 1)
     {
-      needComputeAddrLen = addrNums * sizeof(int64_t) - singleCoreAddrLen * (block_num - 1);
+        needComputeAddrLen = addrNums * sizeof(int64_t) - singleCoreAddrLen * (block_num - 1);
     }
     loopCount = needComputeAddrLen / (addrNumPerLoop * sizeof(int64_t));
 
@@ -41,7 +41,8 @@ public:
 
     // get start index for current core, core parallel block_indx block_dim
     srcAddrGlobal.SetGlobalBuffer((__gm__ int64_t *)(address + block_idx * singleCoreAddrLen));
-    srcDataBufferGm.SetGlobalBuffer((__gm__ T *)(embedding + block_idx * singleCoreAddrLen / sizeof(int64_t) * sizeof(T) * dim));
+    srcDataBufferGm.SetGlobalBuffer((__gm__ T *)(embedding + block_idx * singleCoreAddrLen
+    / sizeof(int64_t) * sizeof(T) * dim));
     outDataGm.SetGlobalBuffer((__gm__ T *)(y));
   }
 
@@ -72,120 +73,105 @@ public:
 
     if (loopCount > 0)
     {
-      for (int32_t i = 0; i < loopCount; i++)
-      {
-        DataCopy(srcAddrLocal, srcAddrGlobal[i * addrNumPerLoop], addrNumPerLoop);
-        MoveProcess(srcAddrLocal, i, addrNumPerLoop);
-      }
+        for (int32_t i = 0; i < loopCount; i++) {
+            DataCopy(srcAddrLocal, srcAddrGlobal[i * addrNumPerLoop], addrNumPerLoop);
+            MoveProcess(srcAddrLocal, i, addrNumPerLoop);
+        }
     }
 
     int unProcess = (needComputeAddrLen / sizeof(int64_t)) % addrNumPerLoop;
     if (unProcess)
     {
-      int unProcessAligned = (unProcess + 3) & (~3); // 处理 addressList 不对齐32b的情况
-      DataCopy(srcAddrLocal, srcAddrGlobal[loopCount * addrNumPerLoop], unProcessAligned);
-      MoveProcess(srcAddrLocal, loopCount, unProcess);
+        int unProcessAligned = (static_cast<unsigned int>(unProcess) + 3) & (~3U); // 处理 addressList 不对齐32b的情况
+        DataCopy(srcAddrLocal, srcAddrGlobal[loopCount * addrNumPerLoop], unProcessAligned);
+        MoveProcess(srcAddrLocal, loopCount, unProcess);
     }
   }
 
 private:
-  __aicore__ inline void MoveProcess(const LocalTensor<int64_t> srcAddrLocal, const int turns, int addrNum)
-  {
-    set_flag(PIPE_MTE2, PIPE_S, 0);
-    wait_flag(PIPE_MTE2, PIPE_S, 0);
-    LocalTensor<T> dataLocal;
-
-    int64_t address = 0;
-    if (dim == inputDimAligned) // copyIn 和 compute一次，copyOut多次
+    __aicore__ inline void MoveProcess(const LocalTensor<int64_t> srcAddrLocal, const int turns, int addrNum)
     {
-      dataLocal = inQueue.AllocTensor<T>();
-      DataCopy(dataLocal, srcDataBufferGm[turns * addrNumPerLoop * dim], addrNum * inputDimAligned);
-      inQueue.EnQue(dataLocal);
-
-      Compute(addrNum); // 只有copyOut的管道支持拷贝到gm上
-
-      LocalTensor<T> dstLocal = outQueue.DeQue<T>();
-      if (updateType == 0)
-      {
-        SetAtomicAdd<T>();
-      }
-      for (int i = 0; i < addrNum; i++)
-      {
-        address = srcAddrLocal.GetValue(i);
-        if (address != 0)
+        set_flag(PIPE_MTE2, PIPE_S, 0);
+        wait_flag(PIPE_MTE2, PIPE_S, 0);
+        LocalTensor<T> dataLocal;
+
+        int64_t address = 0;
+        if (dim == inputDimAligned) // copyIn 和 compute一次，copyOut多次
         {
-          dstDataGm.SetGlobalBuffer((__gm__ T*)(address));
-          DataCopy(dstDataGm, dstLocal[i * inputDimAligned], inputDimAligned);
+            dataLocal = inQueue.AllocTensor<T>();
+            DataCopy(dataLocal, srcDataBufferGm[turns * addrNumPerLoop * dim], addrNum * inputDimAligned);
+            inQueue.EnQue(dataLocal);
+
+            Compute(addrNum); // 只有copyOut的管道支持拷贝到gm上
+
+            LocalTensor<T> dstLocal = outQueue.DeQue<T>();
+            if (updateType == 0) {
+                SetAtomicAdd<T>();
+            }
+            for (int i = 0; i < addrNum; i++) {
+                address = srcAddrLocal.GetValue(i);
+                if (address != 0) {
+                    dstDataGm.SetGlobalBuffer((__gm__ T*)(address));
+                    DataCopy(dstDataGm, dstLocal[i * inputDimAligned], inputDimAligned);
+                }
+            }
+            if (updateType == 0) {
+                SetAtomicNone();
+            }
+            outQueue.FreeTensor(dstLocal);
+        } else {
+            for (int i = 0; i < addrNum; i++) {
+                dataLocal = inQueue.AllocTensor<T>();
+                DataCopy(dataLocal, srcDataBufferGm[i * dim + turns * addrNumPerLoop * dim], inputDimAligned);
+                inQueue.EnQue<T>(dataLocal);
+                Compute(1);
+                address = srcAddrLocal.GetValue(i);
+                CopyOut(address, turns, i);
+            }
         }
-      }
-      if (updateType == 0)
-      {
-        SetAtomicNone();
-      }
-      outQueue.FreeTensor(dstLocal);
     }
-    else
+
+    __aicore__ inline void Compute(const int nums)
     {
-      for (int i = 0; i < addrNum; i++)
-      {
-        dataLocal = inQueue.AllocTensor<T>();
-        DataCopy(dataLocal, srcDataBufferGm[i * dim + turns * addrNumPerLoop * dim], inputDimAligned);
-        inQueue.EnQue<T>(dataLocal);
-        Compute(1);
-        address = srcAddrLocal.GetValue(i);
-        CopyOut(address, turns, i);
-      }
+        // deque input tensors from VECIN queue
+        LocalTensor<T> srcLocal = inQueue.DeQue<T>();
+        LocalTensor<T> dstLocal = outQueue.AllocTensor<T>();
+        DataCopyParams copyparams;
+        copyparams.blockCount = 1;
+        copyparams.blockLen = (inputDimAligned * sizeof(T) * nums) >> 5; // >> 5， 除以32，ub空间对齐
+        DataCopy(dstLocal, srcLocal, copyparams);
+        outQueue.EnQue<T>(dstLocal);
+        inQueue.FreeTensor(srcLocal);
     }
-  }
-
-  __aicore__ inline void Compute(const int nums)
-  {
-    // deque input tensors from VECIN queue
-    LocalTensor<T> srcLocal = inQueue.DeQue<T>();
-    LocalTensor<T> dstLocal = outQueue.AllocTensor<T>();
-    DataCopyParams copyparams;
-    copyparams.blockCount = 1;
-    copyparams.blockLen = (inputDimAligned * sizeof(T) * nums) >> 5; // >> 5， 除以32，ub空间对齐
-    DataCopy(dstLocal, srcLocal, copyparams);
-    outQueue.EnQue<T>(dstLocal);
-    inQueue.FreeTensor(srcLocal);
-  }
 
-  __aicore__ inline void CopyOut(const int64_t address, const int64_t turns, const int64_t index)
-  {
-    LocalTensor<T> dstLocal = outQueue.DeQue<T>();
-
-    if (address != 0)
+    __aicore__ inline void CopyOut(const int64_t address, const int64_t turns, const int64_t index)
     {
-      dstDataGm.SetGlobalBuffer((__gm__ T *)(address));
+        LocalTensor<T> dstLocal = outQueue.DeQue<T>();
 
-      if (updateType == 0)
-      {
-        SetAtomicAdd<T>();
-      }
+        if (address != 0) {
+            dstDataGm.SetGlobalBuffer((__gm__ T *)(address));
+
+            if (updateType == 0) {
+                SetAtomicAdd<T>();
+            }
 
 #if defined(__DAV_C220_VEC__)
-      if (typeSize == SIZE_OF_FLOAT_OR_INT)
-      {
-
-        copy_ubuf_to_gm_align_b32((__gm__ T *)dstDataGm.GetPhyAddr(), (__ubuf__ T *)dstLocal.GetPhyAddr(), 0,
-                                  1, dim * sizeof(T), 0, 0, 0, 0);
-      }
-      else if (typeSize == SIZE_OF_HALF)
-      {
-        copy_ubuf_to_gm_align_b16((__gm__ T *)dstDataGm.GetPhyAddr(), (__ubuf__ T *)dstLocal.GetPhyAddr(), 0,
-                                  1, dim * sizeof(T), 0, 0, 0, 0);
-      }
+            if (typeSize == SIZE_OF_FLOAT_OR_INT) {
+                copy_ubuf_to_gm_align_b32((__gm__ T *)dstDataGm.GetPhyAddr(), (__ubuf__ T *)dstLocal.GetPhyAddr(), 0,
+                                          1, dim * sizeof(T), 0, 0, 0, 0);
+            } else if (typeSize == SIZE_OF_HALF) {
+                copy_ubuf_to_gm_align_b16((__gm__ T *)dstDataGm.GetPhyAddr(), (__ubuf__ T *)dstLocal.GetPhyAddr(), 0,
+                                          1, dim * sizeof(T), 0, 0, 0, 0);
+            }
 #else
-      DataCopy(dstDataGm, dstLocal, inputDimAligned);
+            DataCopy(dstDataGm, dstLocal, inputDimAligned);
 #endif
+        }
+        if (updateType == 0) {
+            SetAtomicNone();
+        }
+        outQueue.FreeTensor(dstLocal);
     }
-    if (updateType == 0)
-    {
-      SetAtomicNone();
-    }
-    outQueue.FreeTensor(dstLocal);
-  }
 
 public:
   int32_t addrNumPerLoop, loopCount, singleCoreAddrLen, needComputeAddrLen, addrNums, cache, veclen, dim, pingpongNum;
diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index 6fd235ba..5e4efe02 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -154,7 +154,7 @@ def evaluate():
         try:
             eval_current_steps += 1
             eval_start = time.time()
-            eval_loss, pred, label = sess.run([eval_model["loss"], eval_model["pred"], eval_label])
+            eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_label])
             eval_cost = time.time() - eval_start
             eval_qps = (1 / eval_cost) * rank_size * cfg.batch_size
             log_loss_list += list(eval_loss.reshape(-1))
@@ -185,7 +185,7 @@ def evaluate_fix(step):
     while not finished:
         try:
             eval_current_steps += 1
-            eval_loss, pred, label = sess.run([eval_model["loss"], eval_model["pred"], eval_model["label"]])
+            eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_model.get("label")])
             log_loss_list += list(eval_loss.reshape(-1))
             pred_list += list(pred.reshape(-1))
             label_list += list(label.reshape(-1))
@@ -322,7 +322,7 @@ if __name__ == "__main__":
     rank_size = mxrec_util.communication.hccl_ops.get_rank_size()
     train_ops = []
     # multi task training
-    for loss, (dense_optimizer, sparse_optimizer) in zip([train_model["loss"]], optimizer_list):
+    for loss, (dense_optimizer, sparse_optimizer) in zip([train_model.get("loss")], optimizer_list):
         # do dense optimization
         grads = dense_optimizer.compute_gradients(loss, var_list=dense_variables)
         avg_grads = []
@@ -404,7 +404,7 @@ if __name__ == "__main__":
         start_time = time.time()
 
         try:
-            grad, loss = sess.run([train_ops, train_model["loss"]])
+            grad, loss = sess.run([train_ops, train_model.get("loss")])
             lr = sess.run(cfg.learning_rate)
             global_step = sess.run(cfg.global_step)
         except tf.errors.OutOfRangeError:
diff --git a/examples/demo/little_demo_estimator/nn_model_build.py b/examples/demo/little_demo_estimator/nn_model_build.py
index e715f930..67820d04 100644
--- a/examples/demo/little_demo_estimator/nn_model_build.py
+++ b/examples/demo/little_demo_estimator/nn_model_build.py
@@ -207,15 +207,16 @@ class LittleModel:
         return embedding_list
 
 
-def _make_ids_with_const_ops(input: Tensor) -> Tensor:
-    const_ids = tf.constant(1, shape=input.shape, dtype=input.dtype)
+def _make_ids_with_const_ops(input_tensor: Tensor) -> Tensor:
+    const_ids = tf.constant(1, shape=input_tensor.shape, dtype=input_tensor.dtype)
     const_ids = tf.compat.v1.add(const_ids, 1)
     const_ids = tf.compat.v1.subtract(const_ids, 1)
 
     return const_ids
 
-def _make_ids_with_str_ops(input: Tensor) -> Tensor:
-    str_ids = tf.compat.v1.strings.as_string(input)
+
+def _make_ids_with_str_ops(input_tensor: Tensor) -> Tensor:
+    str_ids = tf.compat.v1.strings.as_string(input_tensor)
     str_ids = tf.compat.v1.strings.to_number(str_ids)
-    
+
     return str_ids
diff --git a/examples/demo/little_demo_estimator/nn_model_input.py b/examples/demo/little_demo_estimator/nn_model_input.py
index d763c058..d6ebb529 100644
--- a/examples/demo/little_demo_estimator/nn_model_input.py
+++ b/examples/demo/little_demo_estimator/nn_model_input.py
@@ -17,10 +17,10 @@
 
 import tensorflow as tf
 from mx_rec.constants.constants import ASCEND_TIMESTAMP
+from mx_rec.util.log import logger
 
 from nn_model_build import LittleModel
 from nn_optim import get_train_op
-from mx_rec.util.log import logger
 
 
 def get_model_fn(create_fs_params, cfg, access_and_evict_config_dict=None):
diff --git a/examples/demo/little_demo_estimator/nn_optim.py b/examples/demo/little_demo_estimator/nn_optim.py
index 3be3c7ed..415c5ff2 100644
--- a/examples/demo/little_demo_estimator/nn_optim.py
+++ b/examples/demo/little_demo_estimator/nn_optim.py
@@ -55,9 +55,7 @@ def get_train_op_list(losses, learning_rate):
     dense_variables, sparse_variables = get_dense_and_sparse_variable()
     trainable_variables = [dense_variables]
 
-    for i in range(len(losses)):
-        name = losses[i][0]
-        loss = losses[i][1]
+    for i, (name, loss) in enumerate(losses):
         with tf.control_dependencies(update_ops):
             # do dense grad
             grads = dense_optimizer.compute_gradients(loss, var_list=trainable_variables)
diff --git a/examples/dlrm/criteo_tb/gen_ttf.py b/examples/dlrm/criteo_tb/gen_ttf.py
index 04b7b767..8715f048 100644
--- a/examples/dlrm/criteo_tb/gen_ttf.py
+++ b/examples/dlrm/criteo_tb/gen_ttf.py
@@ -19,12 +19,12 @@ import collections
 import logging
 import argparse
 from multiprocessing import Process
-import numpy as np
+import sys
 import time
+import numpy as np
 from tqdm import tqdm
 from glob import glob
 from collections import Counter, OrderedDict
-import sys
 
 import tensorflow as tf
 
@@ -91,7 +91,7 @@ class CriteoStatsDict():
 
     @staticmethod
     def save_dict(output_file_path, hist_map, prefix=""):
-        with open(os.path.join(output_file_path, "{}hist_map.pkl".format(prefix)), "wb") as file_wrt:
+        with os.fdopen(os.path.join(output_file_path, "{}hist_map.pkl".format(prefix)), "wb") as file_wrt:
             pickle.dump(hist_map, file_wrt)
 
     def load_dict(self, dict_path, prefix=""):
@@ -188,7 +188,7 @@ def get_unique_id_multiprocess(proc_num, proc_id, data_file_path, output_file_pa
                 if capped_value not in cat_sets:
                     cat_sets[k][capped_value] = cat_global_id_nums[k]
                     cat_global_id_nums[k] += 1
-    with open(os.path.join(output_file_path, "unique_id.pkl"), "wb") as file_wrt:
+    with os.fdopen(os.path.join(output_file_path, "unique_id.pkl"), "wb") as file_wrt:
         pickle.dump(cat_sets, file_wrt)
     print('statsdata time cost: {:.2f}s'.format(time.time() - start_time))
 
@@ -247,7 +247,7 @@ def convert_input2tfrd_multiprocess(proc_num, proc_id, in_file_path, output_file
     with open(in_file_path, encoding="utf-8") as file_in:
         errorline_list = []
 
-        for i, line in tqdm(enumerate(file_in)):
+        for _ in tqdm(file_in):
             line_num += 1
     print(f'line_num: {line_num}')
     start_line = proc_id * ((line_num + proc_num) // proc_num)
@@ -370,9 +370,9 @@ if __name__ == "__main__":
         sub_process_num = process_num // len(train_data_files)
         data_file = train_data_files[process_id // sub_process_num]
         output_path = f'{save_tfrecord_path}/{process_id:04}_'
-        p = Process(target=convert_input2tfrd_multiprocess, args=(sub_process_num, process_id%sub_process_num, data_file, output_path,
-                                                     criteo_stats, spe_num,
-                                                     5000000))
+        p = Process(target=convert_input2tfrd_multiprocess, args=(sub_process_num, process_id % sub_process_num,
+                                                                  data_file, output_path, criteo_stats, spe_num,
+                                                                  5000000))
         processs.append(p)
     for p in processs:
         p.start()
@@ -394,10 +394,9 @@ if __name__ == "__main__":
         sub_process_num = process_num // len(test_data_files)
         data_file = test_data_files[process_id // sub_process_num]
         output_path = f'{save_tfrecord_path}/{process_id:04}_'
-        p = Process(target=convert_input2tfrd_multiprocess, args=(sub_process_num, process_id%sub_process_num, data_file, output_path,
-                                                     criteo_stats, spe_num,
-                                                     5000000))
-
+        p = Process(target=convert_input2tfrd_multiprocess, args=(sub_process_num, process_id % sub_process_num,
+                                                                  data_file, output_path, criteo_stats, spe_num,
+                                                                  5000000))
         processs.append(p)
     for p in processs:
         p.start()
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index b6036804..8c4cdd7e 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -24,6 +24,9 @@ import tensorflow as tf
 from sklearn.metrics import roc_auc_score
 import numpy as np
 
+from optimizer import get_dense_and_sparse_optimizer
+from config import sess_config, Config
+from model import MyModel
 from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
@@ -38,9 +41,6 @@ from mx_rec.util.variable import get_dense_and_sparse_variable
 from mx_rec.util.log import logger
 from npu_bridge.npu_init import *
 
-from model import MyModel
-from config import sess_config, Config
-from optimizer import get_dense_and_sparse_optimizer
 
 npu_plugin.set_device_sat_mode(0)
 
@@ -158,7 +158,7 @@ def evaluate():
         try:
             eval_current_steps += 1
             eval_start = time.time()
-            eval_loss, pred, label = sess.run([eval_model["loss"], eval_model["pred"], eval_label])
+            eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_label])
             eval_cost = time.time() - eval_start
             qps_eval = (1 / eval_cost) * rank_size * cfg.batch_size
             log_loss_list += list(eval_loss.reshape(-1))
@@ -189,7 +189,7 @@ def evaluate_fix(step):
     while not finished:
         try:
             eval_current_steps += 1
-            eval_loss, pred, label = sess.run([eval_model["loss"], eval_model["pred"], eval_model["label"]])
+            eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_model.get("label")])
             log_loss_list += list(eval_loss.reshape(-1))
             pred_list += list(pred.reshape(-1))
             label_list += list(label.reshape(-1))
@@ -331,7 +331,7 @@ if __name__ == "__main__":
     rank_size = mxrec_util.communication.hccl_ops.get_rank_size()
     train_ops = []
     # multi task training
-    for loss, (dense_optimizer, sparse_optimizer) in zip([train_model["loss"]], optimizer_list):
+    for loss, (dense_optimizer, sparse_optimizer) in zip([train_model.get("loss")], optimizer_list):
         # do dense optimization
         grads = dense_optimizer.compute_gradients(loss, var_list=trainable_varibles)
         avg_grads = []
@@ -411,7 +411,7 @@ if __name__ == "__main__":
         start_time = time.time()
 
         try:
-            grad, loss = sess.run([train_ops, train_model["loss"]])
+            grad, loss = sess.run([train_ops, train_model.get("loss")])
             lr = sess.run(cfg.learning_rate)
             global_step = sess.run(cfg.global_step)
         except tf.errors.OutOfRangeError:
diff --git a/examples/dlrm/model/mean_auc.py b/examples/dlrm/model/mean_auc.py
index 1116ebd5..ff57df00 100644
--- a/examples/dlrm/model/mean_auc.py
+++ b/examples/dlrm/model/mean_auc.py
@@ -15,8 +15,8 @@
 # ==============================================================================
 
 import os
-import numpy as np
 from glob import glob
+import numpy as np
 
 
 def split_auc(log_input):
@@ -26,7 +26,7 @@ def split_auc(log_input):
             if 'Test' in line:
                 all_auc.append(float(line.split(';')[0].split(':')[-1].strip()))
     all_auc_len = len(all_auc)
-    all_auc_arr = np.array(all_auc)[:all_auc_len - all_auc_len%8]
+    all_auc_arr = np.array(all_auc)[:all_auc_len - all_auc_len % 8]
     test_auc = np.mean(all_auc_arr.reshape(-1, 8), axis=-1)
     return test_auc
 
diff --git a/mx_rec/__init__.py b/mx_rec/__init__.py
index bdb85131..64cdcc16 100644
--- a/mx_rec/__init__.py
+++ b/mx_rec/__init__.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 # ==============================================================================
 
+__version__ = "5.0.RC2"
 __all__ = ["version", "__version__"]
 
 from mx_rec.constants.constants import ASCEND_GLOBAL_HASHTABLE_COLLECTION
@@ -34,7 +35,6 @@ patch_for_assert_eval_spec()
 patch_for_bool_gauge()
 patch_for_optimizer()
 patch_for_session()
-__version__ = "5.0.RC2"
 
 
 def version():
diff --git a/mx_rec/core/asc/merge_table.py b/mx_rec/core/asc/merge_table.py
index 776a72c4..fb993032 100644
--- a/mx_rec/core/asc/merge_table.py
+++ b/mx_rec/core/asc/merge_table.py
@@ -196,7 +196,9 @@ def check_dangling_table():
     config_instance = ConfigInitializer.get_instance()
     dangling_table = config_instance.sparse_embed_config.dangling_table
     if not dangling_table:
-        dangling_table = find_dangling_table([table_instance.table_name
-                                              for _, table_instance in
-                                              config_instance.sparse_embed_config.table_instance_dict.items()])
+        table_names = []
+        for _, table_instance in config_instance.sparse_embed_config.table_instance_dict.items():
+            table_names.append(table_instance.table_name)
+        dangling_table = find_dangling_table(table_names)
+
     return dangling_table
diff --git a/mx_rec/util/cpu.py b/mx_rec/util/cpu.py
index 69700262..a7848d7f 100644
--- a/mx_rec/util/cpu.py
+++ b/mx_rec/util/cpu.py
@@ -3,7 +3,6 @@
 # Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
 
 import ctypes
-from ctypes import *
 import psutil
 
 from mx_rec.util.log import logger
diff --git a/src/AccCTR/src/unique/unique_func.cpp b/src/AccCTR/src/unique/unique_func.cpp
index 64ad6d52..717d8890 100644
--- a/src/AccCTR/src/unique/unique_func.cpp
+++ b/src/AccCTR/src/unique/unique_func.cpp
@@ -119,10 +119,11 @@ void Dedup::NewParameter()
         // Time to check the proper size of sharded tables for performance
         // sake.
         uint64_t shardedTableSize = 0;
-        if (std::numeric_limits<uint64_t>::max() / n / groupCount_ < newBucketCountPowerOf2) {
-            shardedTableSize = std::numeric_limits<int>::max();
+        if (std::numeric_limits<uint64_t>::max() / static_cast<uint64_t>(n) / static_cast<uint64_t>(groupCount_)
+        < newBucketCountPowerOf2) {
+            shardedTableSize = static_cast<uint64_t>(std::numeric_limits<int>::max());
         } else {
-            shardedTableSize = newBucketCountPowerOf2 * n * groupCount_;
+            shardedTableSize = newBucketCountPowerOf2 * n * static_cast<uint64_t>(groupCount_);
         }
 
         int largeCount = 0;
diff --git a/src/AccCTR/src/unique/unique_func.h b/src/AccCTR/src/unique/unique_func.h
index 39e5a6b3..46718bde 100644
--- a/src/AccCTR/src/unique/unique_func.h
+++ b/src/AccCTR/src/unique/unique_func.h
@@ -171,7 +171,7 @@ public:
                 if (idCountEnable_) {
                     idCount[total] = bucket->idCount[j];
                 }
-                out[total++] = bucket->data[j];
+                out[total++] = static_cast<typename Map<DataType::INT64>::type>(bucket->data[j]);
             }
             replaceOffset += bucket->count;
         }
@@ -179,7 +179,7 @@ public:
         int32_t totalOverflow = 0;
         while (it != overflow_.end()) {
             if (idCountEnable_) {
-                idCount[total] = idCountOverflow_[it->first];
+                idCount[total] = static_cast<int32_t>(idCountOverflow_[it->first]);
             }
             out[total++] = it->first;
             it->second = replaceOffset++;
@@ -189,7 +189,7 @@ public:
 
         // set total overflow count
         stats_.totalUniques = static_cast<uint64_t>(total - priorTotal);
-        stats_.totalOverflowUniques = totalOverflow;
+        stats_.totalOverflowUniques = static_cast<uint64_t>(totalOverflow);
         return total - priorTotal;
     }
 
@@ -244,18 +244,20 @@ public:
     {
         const int numOfGroupsInShard = groupMethod_.GroupCount();
         uint32_t totalSize = conf.desiredSize + (conf.desiredSize >> 1);
-        while (bucketCountPower2_ * K_BUCKET_WIDTH * numOfGroupsInShard * estimatedDuplicateRatio < totalSize) {
+        while (bucketCountPower2_ * static_cast<uint32_t>(K_BUCKET_WIDTH) *
+        static_cast<uint32_t>(numOfGroupsInShard) * static_cast<uint32_t>(estimatedDuplicateRatio) < totalSize) {
             bucketCountPower2_ <<= 1;
         }
 
         idCountEnable_ = (conf.outputType == OutputType::ENHANCED) && conf.useIdCount;
-        for (int32_t i = 0; i < numOfGroupsInShard; ++i) {
-            auto obj = new DedupT(bucketCountPower2_, numOfGroupsInShard, idCountEnable_);
-            if (obj == nullptr) {
-                ExternalLogger::PrintLog(LogLevel::ERROR, "creat object error");
-                throw NullptrError();
+        try {
+            for (int32_t i = 0; i < numOfGroupsInShard; ++i) {
+                auto obj = new DedupT(bucketCountPower2_, numOfGroupsInShard, idCountEnable_);
+                dedupShards_.emplace_back(obj);
             }
-            dedupShards_.emplace_back(obj);
+        } catch (const std::bad_alloc& e) {
+            ExternalLogger::PrintLog(LogLevel::ERROR, "Memory allocation failed during loop: " + std::string(e.what()));
+            throw;
         }
     }
 
@@ -302,7 +304,7 @@ public:
         if (conf.outputType == OutputType::ENHANCED) {
             int totalNumber = 0;
             for (int i = 0; i < conf.shardingNum; i++) {
-                totalUniqueSize[i] = totalNumber;
+                totalUniqueSize[i] = static_cast<size_t>(totalNumber);
                 if (conf.useSharding) {
                     totalNumber += uniqueOut.uniqueIdCntInBucket[i];
                 }
@@ -365,14 +367,14 @@ private:
                 if (conf.useSharding && conf.useIdCount) {
                     inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueIdInBucket, total,
                         uniqueOut.idCnt); // 特征计数使能和shard同时使能
-                    uniqueOut.uniqueIdCntInBucket[j] = inGroupTotal;
+                    uniqueOut.uniqueIdCntInBucket[j] = static_cast<int32_t>(inGroupTotal);
                 } else if (!conf.useSharding && conf.useIdCount) {
                     inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total,
                         uniqueOut.idCnt); // 特征计数使能和shard不使能
                 } else if (conf.useSharding && !conf.useIdCount) {
                     inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueIdInBucket, total,
                         nullptr); // 特征计数使能和shard不使能
-                    uniqueOut.uniqueIdCntInBucket[j] = inGroupTotal;
+                    uniqueOut.uniqueIdCntInBucket[j] = static_cast<int>(inGroupTotal);
                 } else {
                     inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total,
                         nullptr); // 特征计数不使能和shard不使能，跟普通unique对等
@@ -380,7 +382,7 @@ private:
             } else {
                 inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total, nullptr);
             }
-            total += inGroupTotal;
+            total += static_cast<int32_t>(inGroupTotal);
         }
         uniqueOut.uniqueIdCnt = total;
     }
@@ -523,8 +525,8 @@ private:
         uint32_t *beginPtr = uniqueOut.index;
         uint32_t *finishPtr = beginPtr + uniqueIn.inputIdCnt;
         uint32_t *partBeginPtr = beginPtr;
-        auto *partEndPtr =
-            reinterpret_cast<uint32_t *>(CacheLineAlign(reinterpret_cast<uintptr_t>(partBeginPtr + partSize)));
+        auto alignedAddress = CacheLineAlign(reinterpret_cast<uintptr_t>(partBeginPtr + partSize));
+        auto *partEndPtr = reinterpret_cast<uint32_t *>(alignedAddress);
         std::vector<std::function<void()>> tasks;
         auto val = TypeTrans<T>(uniqueIn.inputId);
         while (partBeginPtr < finishPtr) {
diff --git a/src/core/file_system/local_file_system/local_file_system.h b/src/core/file_system/local_file_system/local_file_system.h
index d137f158..f8eefd5b 100644
--- a/src/core/file_system/local_file_system/local_file_system.h
+++ b/src/core/file_system/local_file_system/local_file_system.h
@@ -46,8 +46,6 @@ namespace MxRec {
         void WriterFn(BufferQueue& queue, int fd, ssize_t& writerBytesNum);
         void FillToBuffer(BufferQueue& queue, const char* data, size_t dataSize);
         void CalculateMapSize(off_t fileSize, size_t& mapByteSize, size_t& mapRowNum, size_t onceReadByteSize) const;
-        void HandleMappedData(char* mappedData, size_t mapRowNum, size_t onceReadByteSize,
-                                               vector<vector<float>>& dst, size_t cnt) const;
 
     private:
         const mode_t dirMode;
diff --git a/src/core/hd_transfer/hd_transfer.cpp b/src/core/hd_transfer/hd_transfer.cpp
index 7bd083ab..a32ddf28 100644
--- a/src/core/hd_transfer/hd_transfer.cpp
+++ b/src/core/hd_transfer/hd_transfer.cpp
@@ -101,9 +101,9 @@ void HDTransfer::CreateChannel(const uint32_t localRankId, const string& embName
             TransferChannel2Str(channel) == "uniquekeys" ||
             TransferChannel2Str(channel) == "evict"  /* for noDDR */
                 ) {
-            transferChannels[sendName] = tdtCreateChannel(localRankId, sendName.c_str(), channelSize);
+            transferChannels[sendName] = TDT_CREATE_CHANNEL(localRankId, sendName.c_str(), channelSize);
         } else {
-            transferChannels[sendName] = tdtCreateChannel(localRankId, sendName.c_str(), PING_PONG_SIZE);
+            transferChannels[sendName] = TDT_CREATE_CHANNEL(localRankId, sendName.c_str(), PING_PONG_SIZE);
         }
         LOG_INFO("create channel:{} {}", sendName, static_cast<void*>(transferChannels[sendName]));
     }
diff --git a/src/core/hd_transfer/hd_transfer.h b/src/core/hd_transfer/hd_transfer.h
index 0ff29e1b..f9528578 100644
--- a/src/core/hd_transfer/hd_transfer.h
+++ b/src/core/hd_transfer/hd_transfer.h
@@ -24,8 +24,8 @@ See the License for the specific language governing permissions and
 #include "utils/common.h"
 #include "utils/config.h"
 
-#ifndef tdtCreateChannel
-#define tdtCreateChannel acltdtCreateChannelWithCapacity
+#ifndef TDT_CREATE_CHANNEL
+#define TDT_CREATE_CHANNEL acltdtCreateChannelWithCapacity
 #endif
 
 namespace MxRec {
diff --git a/src/core/initializer/random_normal_initializer/random_normal_initializer.cpp b/src/core/initializer/random_normal_initializer/random_normal_initializer.cpp
index 1ea0084f..addc4647 100644
--- a/src/core/initializer/random_normal_initializer/random_normal_initializer.cpp
+++ b/src/core/initializer/random_normal_initializer/random_normal_initializer.cpp
@@ -20,11 +20,10 @@ See the License for the specific language governing permissions and
 using namespace MxRec;
 
 RandomNormalInitializer::RandomNormalInitializer(int start, int len, NormalInitializerInfo& initInfo)
-    : start(start), len(len), mean(initInfo.mean), stddev(initInfo.stddev), seed(initInfo.seed)
+    : start(start), len(len), mean(initInfo.mean), stddev(initInfo.stddev), seed(initInfo.seed),
+      initParam(initInfo.initK), generator(std::default_random_engine(seed)),
+      distribution(std::normal_distribution<float>(mean, stddev))
 {
-    initParam = initInfo.initK;
-    generator = std::default_random_engine(seed);
-    distribution = std::normal_distribution<float>(mean, stddev);
 }
 
 void RandomNormalInitializer::GenerateData(float* const emb, const int embSize)
diff --git a/src/core/initializer/random_normal_initializer/random_normal_initializer.h b/src/core/initializer/random_normal_initializer/random_normal_initializer.h
index 9d5f9942..e342f75f 100644
--- a/src/core/initializer/random_normal_initializer/random_normal_initializer.h
+++ b/src/core/initializer/random_normal_initializer/random_normal_initializer.h
@@ -37,6 +37,7 @@ namespace MxRec {
         float mean;
         float stddev;
         int seed;
+        float initParam;
 
         std::default_random_engine generator;
         std::normal_distribution<float> distribution;
diff --git a/src/core/initializer/truncated_normal_initializer/truncated_normal_initializer.cpp b/src/core/initializer/truncated_normal_initializer/truncated_normal_initializer.cpp
index d50a7a97..e011cfc7 100644
--- a/src/core/initializer/truncated_normal_initializer/truncated_normal_initializer.cpp
+++ b/src/core/initializer/truncated_normal_initializer/truncated_normal_initializer.cpp
@@ -20,7 +20,8 @@ See the License for the specific language governing permissions and
 using namespace MxRec;
 
 TruncatedNormalInitializer::TruncatedNormalInitializer(int start, int len, NormalInitializerInfo& initInfo)
-    : start(start), len(len), seed(initInfo.seed)
+    : start(start), len(len), seed(initInfo.seed), generator(std::default_random_engine(initInfo.seed)),
+      distribution(std::normal_distribution<float>(initInfo.mean, initInfo.stddev))
 {
     initParam = initInfo.initK;
     // 校验stddev mean值范围
@@ -43,7 +44,6 @@ TruncatedNormalInitializer::TruncatedNormalInitializer(int start, int len, Norma
         stddev = initInfo.stddev;
     }
 
-    generator = std::default_random_engine(seed);
     distribution = std::normal_distribution<float>(mean, stddev);
     minBound = initParam * (mean - static_cast<float>(boundNum) * stddev);
     maxBound = initParam * (mean + static_cast<float>(boundNum) * stddev);
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index f833b759..b761a1ef 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -269,15 +269,11 @@ namespace MxRec {
     };
 
     struct EmbeddingSizeInfo {
+        size_t embeddingSize = 0;
+        size_t extendEmbSize = 0;
         EmbeddingSizeInfo() = default;
         EmbeddingSizeInfo(size_t embSize, size_t extendSize)
-        {
-            embeddingSize = embSize;
-            extendEmbSize = extendSize;
-        }
-
-        size_t embeddingSize;
-        size_t extendEmbSize;
+            : embeddingSize(embSize), extendEmbSize(extendSize) {}
     };
 
     struct OptimizerInfo {
@@ -417,6 +413,12 @@ namespace MxRec {
     }
 
     struct EmbInfoParams {
+        std::string name;
+        int sendCount;
+        int embeddingSize;
+        int extEmbeddingSize;
+        bool isSave;
+        bool isGrad;
         EmbInfoParams() = default;
 
         EmbInfoParams(const std::string& name,
@@ -433,12 +435,6 @@ namespace MxRec {
               isGrad(isGrad)
         {
         }
-        std::string name;
-        int sendCount;
-        int embeddingSize;
-        int extEmbeddingSize;
-        bool isSave;
-        bool isGrad;
     };
 
     struct EmbInfo {
diff --git a/src/dataset_tf/eos_dataset_op.cc b/src/dataset_tf/eos_dataset_op.cc
index 85b8e1d0..afc3fe3a 100644
--- a/src/dataset_tf/eos_dataset_op.cc
+++ b/src/dataset_tf/eos_dataset_op.cc
@@ -74,15 +74,15 @@ int CheckCommFinished(MPI_Request& req, int channelId)
 // 表示数据集的不可变性定义，这个类的 MakeIterator() 方法告诉 TensorFlow 怎样在数据集上生成迭代器对象。
 class EosDatasetOp::Dataset : public DatasetBase {
 public:
-    explicit Dataset(OpKernelContext *ctx, const DatasetBase *input, int32_t channelId, int32_t maxTrainSteps,
-        int32_t maxEvalSteps)
+    explicit Dataset(OpKernelContext *ctx, const DatasetBase *input, int32_t channelId,
+                     int32_t maxTrainSteps,
+                     int32_t maxEvalSteps)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           channelId_(channelId),
           maxTrainSteps_(maxTrainSteps),
           maxEvalSteps_(maxEvalSteps),
-          id_(g_datasetId[channelId])
-    {
+          id_(g_datasetId[channelId]) {
         input_->Ref();
         auto os_input = input->output_shapes();
         output_shapes_ = os_input;
@@ -93,12 +93,13 @@ public:
         MPI_Comm_size(g_comm[channelId], &g_rankSize);
 
         LOG_DEBUG("EosDataset: {} was born for channel: {}, maxTrainSteps: {}, maxEvalSteps: {}.",
-            g_datasetId[channelId], channelId, maxTrainSteps, maxEvalSteps);
+                  g_datasetId[channelId], channelId, maxTrainSteps, maxEvalSteps);
         g_datasetId[channelId] += 1;
     }
 
-    Dataset(const Dataset&) = delete;
-    Dataset& operator=(const Dataset&) = delete;
+    Dataset(const Dataset &) = delete;
+
+    Dataset &operator=(const Dataset &) = delete;
 
     ~Dataset() override
     {
@@ -147,8 +148,10 @@ public:
     }
 
 protected:
-    Status AsGraphDefInternal(SerializationContext *ctx, DatasetGraphDefBuilder *b, Node **output) const override
-    {
+    Status
+    AsGraphDefInternal(SerializationContext *ctx, DatasetGraphDefBuilder *b,
+                       Node **output) const override
+                       {
         Node *input_graph = nullptr;
         TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph));
         Node *channel_id_x = nullptr;
@@ -158,7 +161,8 @@ protected:
         Node *max_eval_steps_x = nullptr;
         TF_RETURN_IF_ERROR(b->AddScalar(maxEvalSteps_, &max_eval_steps_x));
         TF_RETURN_IF_ERROR(
-            b->AddDataset(this, { input_graph, channel_id_x, max_train_steps_x, max_eval_steps_x }, output));
+            b->AddDataset(this, {input_graph, channel_id_x, max_train_steps_x, max_eval_steps_x},
+                output));
         return Status::OK();
     }
 
@@ -166,20 +170,27 @@ private:
     // 表示特定数据集上的迭代器的可变性，这个类的 GetNextInternal() 方法告诉 TensorFlow 怎样获取迭代器的下一个元素。
     class Iterator : public DatasetIterator<Dataset> {
     public:
-        explicit Iterator(const Params &params) : DatasetIterator<Dataset>(params), i_(0), iter_times_(0) {}
+        explicit Iterator(const Params &params) : DatasetIterator<Dataset>(params), i_(0),
+                                                  iter_times_(0) {}
+
 #if defined(TF_VERSION_TF2)
         Status Initialize(IteratorContext* ctx) override
         {
             return dataset()->input_->MakeIterator(ctx, this, prefix(), &input_impl_);
         }
 #else
+
         Status Initialize(IteratorContext *ctx) override
         {
             return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_);
         }
+
 #endif
-        Status GetNextInternal(IteratorContext *ctx, std::vector<Tensor> *out_tensors, bool *end_of_sequence) override
-        {
+
+        Status
+        GetNextInternal(IteratorContext *ctx, std::vector <Tensor> *out_tensors,
+                        bool *end_of_sequence) override
+                        {
             mutex_lock l(mu_);
             if (!input_impl_) {
                 *end_of_sequence = true;
@@ -202,12 +213,14 @@ private:
                 getNextStatus = GET_NEXT_TERMINATE;
 
                 MPI_Request req;
-                MPI_Iallreduce(MPI_IN_PLACE, &getNextStatus, 1, MPI_INT, MPI_SUM, g_comm[channelId], &req);
+                MPI_Iallreduce(MPI_IN_PLACE, &getNextStatus, 1, MPI_INT, MPI_SUM, g_comm[channelId],
+                               &req);
                 CheckCommFinished(req, channelId);
 
                 keyProcess->SetEos(1, dataset()->channelId_);
-                LOG_DEBUG("[ACTIVE] GetNext eos was triggered actively, channel: {}, iter: {}", dataset()->channelId_,
-                    iter_times_);
+                LOG_DEBUG("[ACTIVE] GetNext eos was triggered actively, channel: {}, iter: {}",
+                          dataset()->channelId_,
+                          iter_times_);
 
                 input_impl_.reset();
                 return Status::OK();
@@ -220,7 +233,8 @@ private:
             if (getNextStatus < g_rankSize) {
                 *end_of_sequence = true;
                 keyProcess->SetEos(1, dataset()->channelId_);
-                LOG_DEBUG("[PASSIVE] GetNext eos was triggered passively, channel: {}, iter: {}, sum: {}",
+                LOG_DEBUG(
+                    "[PASSIVE] GetNext eos was triggered passively, channel: {}, iter: {}, sum: {}",
                     dataset()->channelId_, iter_times_, getNextStatus);
 
                 input_impl_.reset();
@@ -232,11 +246,12 @@ private:
         }
 
     protected:
-        std::shared_ptr<model::Node> CreateNode(
-                IteratorContext* ctx, model::Node::Args args) const override
-        {
-            return model::MakeKnownRatioNode(std::move(args), /* ratio= */ 1);
+        std::shared_ptr <model::Node> CreateNode(
+                IteratorContext *ctx, model::Node::Args args) const override
+                {
+            return model::MakeKnownRatioNode(std::move(args), 1); // ratio = 1
         }
+
 #if defined(TF_VERSION_TF2)
         Status SaveInternal(SerializationContext* ctx, IteratorStateWriter* writer) override
         {
@@ -244,15 +259,18 @@ private:
             return Status::OK();
         }
 #else
-        Status SaveInternal(IteratorStateWriter* writer) override
+
+        Status SaveInternal(IteratorStateWriter *writer) override
         {
             TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
             return Status::OK();
         }
+
 #endif
-        Status RestoreInternal(IteratorContext* ctx,
-                               IteratorStateReader* reader) override
-        {
+
+        Status RestoreInternal(IteratorContext *ctx,
+                               IteratorStateReader *reader) override
+                               {
             mutex_lock l(mu_);
             TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
             return Status::OK();
@@ -261,11 +279,14 @@ private:
     private:
         static constexpr int GET_NEXT_CONTINUE = 1;
         static constexpr int GET_NEXT_TERMINATE = 0;
-    
+
         tensorflow::mutex mu_;
-        int64 i_ GUARDED_BY(mu_);
-        int64 iter_times_ GUARDED_BY(mu_);
-        std::unique_ptr <IteratorBase> input_impl_ GUARDED_BY(mu_);
+        int64 i_
+        GUARDED_BY(mu_);
+        int64 iter_times_
+        GUARDED_BY(mu_);
+        std::unique_ptr <IteratorBase> input_impl_
+        GUARDED_BY(mu_);
     };
 
     const DatasetBase *input_;
-- 
Gitee


From 97cd35bf918697852cc5991f03650b470c22e9cb Mon Sep 17 00:00:00 2001
From: sihaixianyu <sihaixianyu@qq.com>
Date: Mon, 29 Apr 2024 06:12:30 +0000
Subject: [PATCH 071/302] =?UTF-8?q?!92=20=E3=80=90=E5=86=92=E7=83=9F?=
 =?UTF-8?q?=E5=B7=B2=E8=BF=87=E3=80=91=E5=88=87=E5=9B=BE=E5=8A=9F=E8=83=BD?=
 =?UTF-8?q?=E5=A2=9E=E5=BC=BA=20*=20Slicer=E5=8A=9F=E8=83=BD=E5=A2=9E?=
 =?UTF-8?q?=E5=BC=BA=EF=BC=8C=E5=85=BC=E5=AE=B9TF1=E3=80=81TF2=EF=BC=8C?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81Summary=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py           |  13 -
 mx_rec/core/embedding.py                |   4 +-
 mx_rec/graph/__init__.py                |   9 +-
 mx_rec/graph/acg_push_ops.py            | 641 -----------------
 mx_rec/graph/constants.py               |  37 +
 mx_rec/graph/graph_typing.py            |  35 -
 mx_rec/graph/modifier.py                |  39 +-
 mx_rec/graph/slicers.py                 | 879 ++++++++++++++++++++++++
 mx_rec/graph/utils.py                   |  36 +-
 tests/mx_rec/graph/test_acg_push_ops.py | 514 --------------
 tests/mx_rec/graph/test_modifier.py     |   2 +-
 11 files changed, 975 insertions(+), 1234 deletions(-)
 delete mode 100644 mx_rec/graph/acg_push_ops.py
 create mode 100644 mx_rec/graph/constants.py
 delete mode 100644 mx_rec/graph/graph_typing.py
 create mode 100644 mx_rec/graph/slicers.py
 delete mode 100644 tests/mx_rec/graph/test_acg_push_ops.py

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index 2c2cd2fe..a5f055ab 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -212,16 +212,3 @@ class TFDevice(Enum):
 class Flag(Enum):
     TRUE = "1"
     FALSE = "0"
-
-
-class AnchorDatasetOp(Enum):
-    MODEL_DATASET = "ModelDataset"
-    OPTIMIZE_DATASET = "OptimizeDataset"
-    PREFETCH_DATASET = "PrefetchDataset"
-
-
-class AnchorIteratorOp(Enum):
-    ITERATOR_GET_NEXT = "IteratorGetNext"
-    MAKE_ITERATOR = "MakeIterator"
-    ONE_SHOT_ITERATOR = "OneShotIterator"
-
diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index b38c486b..f90efcf6 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -26,8 +26,8 @@ from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.core.emb.emb_factory import HBMDynamicSparseEmbeddingFactory, HBMSparseEmbeddingFactory, \
     ExternalStorageSparseEmbeddingFactory
-from mx_rec.graph.utils import tag_orphan_ids
 from mx_rec.constants.constants import MAX_INT32, All2allGradientsOp, MAX_VOCABULARY_SIZE, MAX_DEVICE_VOCABULARY_SIZE
+from mx_rec.graph.utils import mark_orphan_lookup_key
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.validator.validator import ClassValidator, StringValidator, SSDFeatureValidator, \
     para_checker_decorator, IntValidator, NumValidator, OptionValidator, OptionalIntValidator, \
@@ -172,7 +172,7 @@ def sparse_lookup(hashtable: BaseSparseEmbedding,
 
     # 对于向上找没有IteratorGetNext的孤儿ids需要标记，以便于后续ACGPushOpsToDataset工作
     if isinstance(ids, tf.Tensor):
-        ids = tag_orphan_ids(ids)
+        ids = mark_orphan_lookup_key(ids)
 
     with tf.compat.v1.variable_scope("{0}//{1}".format(hashtable.table_name, kwargs.get("name"))):
         if isinstance(ids, FeatureSpec):
diff --git a/mx_rec/graph/__init__.py b/mx_rec/graph/__init__.py
index f4d2642c..b91d2a49 100644
--- a/mx_rec/graph/__init__.py
+++ b/mx_rec/graph/__init__.py
@@ -15,8 +15,13 @@
 # limitations under the License.
 # ==============================================================================
 
-__all__ = ["modify_graph_and_start_emb_cache", "GraphModifierHook", "run", "ACGPushOpsToDatasetHook"]
+__all__ = [
+    "modify_graph_and_start_emb_cache",
+    "GraphModifierHook",
+    "run",
+    "LookupSubgraphSlicerHook",
+    "OrphanLookupKeySlicerHook",
+]
 
 from mx_rec.graph.modifier import GraphModifierHook, modify_graph_and_start_emb_cache
 from mx_rec.graph.patch import run
-from mx_rec.graph.acg_push_ops import ACGPushOpsToDatasetHook
diff --git a/mx_rec/graph/acg_push_ops.py b/mx_rec/graph/acg_push_ops.py
deleted file mode 100644
index ed3e18e6..00000000
--- a/mx_rec/graph/acg_push_ops.py
+++ /dev/null
@@ -1,641 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-from typing import Dict, Tuple, List, Set
-
-import tensorflow as tf
-from tensorflow.python.data.ops.dataset_ops import DatasetV1Adapter
-from tensorflow.python.framework.ops import Operation
-from tensorflow.python.util import nest as tf_nest
-from tensorflow.core.framework import node_def_pb2
-from tensorflow.core.framework import attr_value_pb2
-from tensorflow.python.framework import tensor_util
-
-from mx_rec.graph import modifier
-from mx_rec.util.log import logger
-from mx_rec.graph.utils import export_pb_graph
-from mx_rec.graph.graph_typing import SubgraphInfo
-from mx_rec.constants.constants import ASCEND_TIMESTAMP, ANCHOR_DATASET_NAME, MAX_WHILE_SIZE, AnchorIteratorOp
-from mx_rec.validator.validator import para_checker_decorator, ClassValidator
-
-tf.compat.v1.disable_eager_execution()
-
-_ACG_NEW_NODE_PREFIX = "ACG_"
-_ACG_NEW_ITERATOR = "ACG_NEW_ITERATOR"
-_ACG_NEW_INITIALIZER = "ACG_NEW_INITIALIZER"
-
-_OP_TYPE_TO_PUSH = frozenset(["StringSplit", "StringToNumber"])
-_OP_TYPE_TO_IGNORE = frozenset([AnchorIteratorOp.ITERATOR_GET_NEXT])
-_OP_TYPE_CONTAIN_STRING_TO_IGNORE = frozenset(["Dataset", "Summary"])
-_OP_NAME_CONTAIN_STRING_TO_IGNORE = frozenset(["save", "report_", "loss"])
-_OP_NAME_CONTAIN_STRING_TO_PUSH = frozenset(["ACG_PUSH_NODE"])
-
-_TENSOR_TYPE_TO_IGNORE = frozenset([tf.variant, tf.resource])
-
-_VARIABLE_TYPES = frozenset(["Variable", "VariableV2", "VarHandleOp"])
-_IGNORE_REPLACE_NODE = frozenset(["Assign", "SaveV2"])
-
-
-class ACGPushOpsToDatasetHook(tf.estimator.SessionRunHook):
-    @para_checker_decorator(
-        check_option_list=[
-            ("dump_graph", ClassValidator, {"classes": (bool,)}),
-        ]
-    )
-    def __init__(self, dump_graph: bool = False) -> None:
-        super().__init__()
-        self._dump_graph = dump_graph
-
-        modifier.get_src_dataset = _patched_get_src_dataset
-        logger.info("[ACGPushOpsToDatasetHook] The function `get_src_dataset` of modifier has been replaced!")
-
-    def begin(self):
-        logger.info("[ACGPushOpsToDataset] Trigger at beginning!")
-        graph = tf.compat.v1.get_default_graph()
-        _find_ops_to_be_pushed(graph=graph, dump_graph=self._dump_graph)
-
-    def after_create_session(self, session, coord):
-        logger.info("[ACGPushOpsToDatasetHook] Trigger after create session!")
-        initializers = tf.compat.v1.get_collection(_ACG_NEW_INITIALIZER)
-        logger.info("[ACGPushOpsToDatasetHook] Got new initialzers: %s.", initializers)
-        session.run(initializers)
-
-    def end(self, session):
-        logger.info("[ACGPushOpsToDatasetHook] Trigger in the end!")
-
-
-def _find_ops_to_be_pushed(graph: tf.Graph, dump_graph: bool = False):
-    export_pb_graph("before_push_graph.pbtxt", dump_graph, graph_def=graph.as_graph_def())
-    op_nodes = graph.get_operations()
-    nodes_to_push = set()
-
-    for op_node in op_nodes:
-        if op_node.type in _OP_TYPE_TO_IGNORE:
-            continue
-
-        pushable = False
-        if op_node.type in _OP_TYPE_TO_PUSH:
-            pushable = True
-
-        for ignore_type in _OP_TYPE_CONTAIN_STRING_TO_IGNORE:
-            if ignore_type in op_node.type:
-                pushable = False
-            if not pushable:
-                continue
-        for ignore_name in _OP_NAME_CONTAIN_STRING_TO_IGNORE:
-            if ignore_name in op_node.name:
-                pushable = False
-            if not pushable:
-                continue
-        for each_tensor in list(op_node.outputs) + list(op_node.inputs):
-            if each_tensor.dtype in _TENSOR_TYPE_TO_IGNORE:
-                pushable = False
-            if not pushable:
-                continue
-
-        for push_name in _OP_NAME_CONTAIN_STRING_TO_PUSH:
-            if push_name in op_node.name:
-                pushable = True
-                break
-
-        if pushable:
-            nodes_to_push.add(op_node)
-
-    if not nodes_to_push:
-        logger.info("No target op has to be pushed to dataset map func!")
-        return
-
-    logger.info("Found operations should be pushed: %s.", nodes_to_push)
-    subgraph_nodes = _find_subgraph_nodes(
-        graph, nodes_to_push, tgt_op_type=AnchorIteratorOp.ITERATOR_GET_NEXT.value, exclude_tgt_op=True
-    )
-    _push_subgraph_to_dataset(graph, subgraph_nodes, dump_graph)
-    export_pb_graph("after_push_graph.pbtxt", dump_graph, graph_def=graph.as_graph_def())
-
-
-def _find_subgraph_nodes(
-    graph: tf.Graph,
-    base_nodes: Set[tf.Operation],
-    tgt_op_type: str,
-    exclude_tgt_op: bool = True,
-) -> Set[tf.Operation]:
-    subgraph_nodes = set()
-    visited_nodes = base_nodes
-    found_nodes = base_nodes
-    all_nodes = graph.get_operations()
-    logger.info("Got base_nodes: %s.", base_nodes)
-
-    loop_cnt = 0
-    while len(found_nodes) > 0:
-        loop_cnt += 1
-        if loop_cnt > MAX_WHILE_SIZE:
-            raise RuntimeError(f"In bfs_lookup function, the maximum cycle depth is greater than {MAX_WHILE_SIZE}.")
-
-        base_nodes = set()
-        for parent_node in found_nodes:
-            if (not exclude_tgt_op) and parent_node.type == tgt_op_type:
-                continue
-            base_nodes.add(parent_node)
-        found_nodes = set()
-        for base_node in base_nodes:
-            tmp_nodes = [x.op for x in base_node.inputs] + base_node.control_inputs
-            _warn_for_var_scope_nodes(all_nodes, base_node)
-
-            tmp_nodes = set(tmp_nodes) - visited_nodes
-            if exclude_tgt_op:
-                tmp_nodes = set(filter(lambda node: node.type != tgt_op_type, tmp_nodes))
-            found_nodes.update(tmp_nodes)
-            visited_nodes.update(tmp_nodes)
-
-    subgraph_nodes.update(visited_nodes)
-    logger.info("Found subgraph from nodes_to_push: %s.", subgraph_nodes)
-    return subgraph_nodes
-
-
-def _warn_for_var_scope_nodes(all_nodes: List[tf.Operation], base_node: tf.Operation):
-    if base_node.type in _VARIABLE_TYPES:
-        for x in base_node.outputs:
-            varable_scope_node = [x for x in all_nodes if x.name.startswith(f"{base_node.name}/")]
-            logger.warning("Got base_node: %s and varable_scope_node: %s.", base_node, varable_scope_node)
-
-
-def _find_op_from_base_op(base_ops: tf.Operation, target_op_type: str) -> tf.Operation:
-    base_ops = modifier.check_input_list(base_ops, tf.Operation)
-    parent_ops = base_ops
-    while True:
-        for parent_op in parent_ops:
-            if parent_op.type == target_op_type:
-                return parent_op
-        base_ops = parent_ops
-        parent_ops = []
-        for base_op in base_ops:
-            parent_ops.extend(modifier.find_parent_op(base_op))
-        if not parent_ops:
-            raise ValueError(f"op {target_op_type} was not found.")
-
-
-def _get_dataset_op(graph: tf.Graph, get_next_op: Operation) -> Operation:
-    if get_next_op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-        raise TypeError(f"op '{get_next_op}' must be one instance of IteratorGetNext.")
-    # looking for the MakeIterator operator which corresponds to given batch_tensor
-    base_op = modifier.find_make_iterator_op(get_next_op.outputs[0])
-    # looking for the op which is the one before OptimizeDataset operator
-    if tf.__version__.startswith("1"):
-        optimize_dataset_op = _find_op_from_base_op(base_op, "ModelDataset")
-        target_op = modifier.find_parent_op(optimize_dataset_op)
-        if not target_op:
-            raise RuntimeError("the parent op for 'ModelDataset' op was not found.")
-        if target_op[0].type != "OptimizeDataset":
-            raise TypeError("op OptimizeDataset was not found.")
-        target_op = target_op[0]
-    else:
-        # 'OptimizeDataset' is not available in TensorFlow2.X
-        raise RuntimeError("Not supoprt tf2")
-    return target_op
-
-
-def _ordered_output_from_subgraph(subgraph_out: Dict[tf.Operation, Set[tf.Operation]]) -> List[tf.Tensor]:
-    addition_funcgraph_output_tensor = []
-    for k, v in sorted(subgraph_out.items(), key=lambda x: x[0].name):
-        k_inputs = set(k.inputs)
-        for node in v:
-            _add_sorted_additional_tensors(addition_funcgraph_output_tensor, k_inputs, node)
-    return addition_funcgraph_output_tensor
-
-
-def _add_sorted_additional_tensors(addition_funcgraph_output_tensor, k_inputs, node):
-    for each_tensor in sorted(node.outputs, key=lambda x: x.name):
-        if each_tensor in k_inputs:
-            addition_funcgraph_output_tensor.append(each_tensor)
-
-
-def _get_tensor_consumers_unsafe(tensor: tf.Tensor) -> List[tf.Operation]:
-    if isinstance(tensor, tf.Operation):
-        raise RuntimeError(f"not support type: {node}")
-
-    from tensorflow.python import pywrap_tensorflow as c_api
-
-    consumer_names = c_api.TF_OperationOutputConsumers_wrapper(tensor._as_tf_output())
-    graph = tensor.graph
-    result = []
-    for name in consumer_names:
-        with graph._lock:
-            if name in graph._nodes_by_name:  # ignore deleted node
-                result.append(graph._nodes_by_name[name])
-
-    return result
-
-
-def _push_subgraph_to_dataset(graph: tf.Graph, subgraph_to_push: Set[tf.Operation], dump_graph: bool = False):
-    subgraph_in, subgraph_out = _find_subgraph_in_out(subgraph_to_push)
-    logger.info("Got input tensor of extracted subgraph: %s", subgraph_in)
-    logger.info("Got output tensor of extracted subgraph: %s", subgraph_out)
-
-    get_next_node = graph.get_operation_by_name(AnchorIteratorOp.ITERATOR_GET_NEXT.value)
-    src_dataset = _get_src_dataset(graph, get_next_node)
-
-    def acg_func(*x): # pragma: no cover
-        old_x = x
-        logger.debug("Got old batch layout: %s", x)
-
-        x = tf_nest.flatten(x)
-        for each_tensor in x:
-            if not isinstance(each_tensor, tf.Tensor):
-                raise RuntimeError(f"Expected tensor as input of mapfunc. but got: {x}!")
-
-        funcgraph = tf.compat.v1.get_default_graph()
-        subgraph_info = SubgraphInfo(subgraph_in, subgraph_out, subgraph_to_push)
-        new_batch = _clone_subgraph_into_funcgraph(
-            funcgraph,
-            graph,
-            subgraph_info,
-            x,
-            old_x,
-        )
-
-        logger.debug("Got new batch layout: %s.", new_batch)
-        export_pb_graph("map_func_graph.pbtxt", dump_graph, graph_def=funcgraph.as_graph_def())
-        return new_batch
-
-    tgt_dataset = src_dataset.map(acg_func)
-    tgt_dataset = tgt_dataset.prefetch(0)
-    _update_iterator_getnext(
-        graph=graph,
-        get_next_op=get_next_node,
-        tgt_dataset=tgt_dataset,
-        subgraph_out=subgraph_out,
-        subgraph_to_push=subgraph_to_push,
-    )
-
-
-def _find_subgraph_in_out(
-    sub_graph_nodes: Set[tf.Operation],
-) -> Tuple[Dict[tf.Operation, Set[tf.Operation]], Dict[tf.Operation, Set[tf.Operation]]]:
-    relay_input_nodes = set()
-    relay_output_nodes = set()
-    input_to_subnodes = dict()
-    output_to_subnodes = dict()
-
-    for base_node in sub_graph_nodes:
-        _update_subgraph_in(base_node, input_to_subnodes, relay_input_nodes, sub_graph_nodes)
-        _update_subgraph_out(base_node, output_to_subnodes, relay_output_nodes, sub_graph_nodes)
-
-    return input_to_subnodes, output_to_subnodes
-
-
-def _update_subgraph_in(
-    base_node: tf.Operation,
-    input_to_subnodes: Dict[tf.Operation, Set[tf.Operation]],
-    relay_input_nodes: Set[tf.Operation],
-    sub_graph_nodes: Set[tf.Operation],
-):
-    for input_tensor in base_node.inputs:
-        input_node = input_tensor.op
-        if input_node not in sub_graph_nodes:
-            relay_input_nodes.add(input_node)
-            res = input_to_subnodes.get(input_node, set())
-            res.add(base_node)
-            input_to_subnodes[input_node] = res
-
-
-def _update_subgraph_out(
-    base_node: tf.Operation,
-    output_to_subnodes: Dict[tf.Operation, Set[tf.Operation]],
-    relay_output_nodes: Set[tf.Operation],
-    sub_graph_nodes: Set[tf.Operation],
-):
-    for output_tensor in base_node.outputs:
-        for output_consumer in output_tensor.consumers():
-            if output_consumer not in sub_graph_nodes:
-                relay_output_nodes.add(output_consumer)
-                res = output_to_subnodes.get(output_consumer, set())
-                res.add(base_node)
-                output_to_subnodes[output_consumer] = res
-
-
-def _get_src_dataset(graph: tf.Graph, get_next_op: Operation) -> DatasetV1Adapter:
-    try:
-        target_op = _get_dataset_op(graph, get_next_op)
-    except (ValueError, TypeError, RuntimeError) as err:
-        logger.warning("The dataset op was not found, the error is %s. Start to traverse the operations.", err)
-        dataset_op_list = [op for op in graph.get_operations() if ANCHOR_DATASET_NAME in op.name]
-        if len(dataset_op_list) != 1:
-            raise RuntimeError(
-                f"The `{ANCHOR_DATASET_NAME}` was not found from the operations, dataset_op_list: "
-                f"{dataset_op_list}."
-            ) from err
-        target_op = dataset_op_list[0]
-    except Exception as err:
-        raise RuntimeError(f"The dataset was not found, the error is `{err}`.") from err
-    if not target_op.outputs:
-        raise ValueError(f"The length of the outputs of target op `{target_op}` is 0.")
-    logger.info("Find target op `%s`, and output is `%s`.", target_op.name, target_op.outputs)
-    src_dataset = modifier.find_target_instance_dataset(target_op.outputs[0])
-    return src_dataset
-
-
-def _clone_subgraph_into_funcgraph(
-    funcgraph: tf.Graph,
-    defaultgraph: tf.Graph,
-    subgraph_info: SubgraphInfo,
-    x: List[tf.Tensor],
-    old_x: Tuple[Dict[str, tf.Tensor]],
-) -> Dict[str, tf.Tensor]:
-    topo_subgraph_list = _topo_subgraph(subgraph_info.subgraph_to_push)  # node
-    tensor_mapping = {}  # subgraph-tensor -> funcgraph-tensor
-    node_mapping = {}  # subgraph-node -> funcgraph-node
-    for k, v in subgraph_info.subgraph_in.items():
-        _get_mapping_for_subgraph_in(k, v, x, tensor_mapping)
-    for old_node in topo_subgraph_list:
-        _get_mapping_for_subgraph(funcgraph, defaultgraph, node_mapping, old_node, tensor_mapping)
-
-    logger.info("Got node_mapping: %s", node_mapping)
-    logger.info("Got tensor_mapping: %s", tensor_mapping)
-
-    ordered_output_subgraph_tensors = _ordered_output_from_subgraph(subgraph_info.subgraph_out)
-    addition_funcgraph_output_tensor = _get_mapping_tensor(tensor_mapping, ordered_output_subgraph_tensors)
-    new_funcgraph_output_tensor = list(x) + addition_funcgraph_output_tensor
-    logger.info("Got new_funcgraph_output_tensor: %s", new_funcgraph_output_tensor)
-
-    new_x = old_x[0]
-    for tensor in addition_funcgraph_output_tensor:
-        last_key = f"{sorted(new_x)[-1]}_last_key"
-        new_x[last_key] = tensor
-
-    return new_x
-
-
-def _get_mapping_for_subgraph_in(
-    from_node: tf.Operation, to_nodes: Set[tf.Operation], x: List[tf.Tensor], tensor_mapping
-):
-    if from_node.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-        raise RuntimeError(f"Expect IteratorGetNext for input tensor of subgraph, but got {from_node}")
-    for node in to_nodes:
-        for each_tensor in node.inputs:
-            if each_tensor.op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-                continue
-            old_tensor_name = each_tensor.name
-            x_index = int(old_tensor_name.split(":")[-1])
-            tensor_mapping[each_tensor] = x[x_index]
-
-
-def _get_mapping_for_subgraph(
-    funcgraph: tf.Graph,
-    defaultgraph: tf.Graph,
-    node_mapping: Dict[tf.Operation, tf.Operation],
-    old_node: tf.Operation,
-    tensor_mapping: Dict[tf.Tensor, tf.Tensor],
-):
-    logger.debug("old_node: %s \n old_node_inputs: %s", old_node, [x for x in old_node.inputs])
-    node_def = old_node.node_def
-    for each_tensor in old_node.inputs:
-        if each_tensor not in tensor_mapping:
-            raise RuntimeError(
-                f"each_tensor(input) {each_tensor} need by {old_node.name} not in tensor_mapping.{tensor_mapping}"
-            )
-    new_inputs = _get_mapping_tensor(tensor_mapping, old_node.inputs)
-    if old_node.type in _VARIABLE_TYPES:
-        node_def = _frozen_variable_node_to_func_const_node_def(
-            variable_node=old_node, funcgraph=funcgraph, defaultgraph=defaultgraph
-        )
-    node_def.name = _ACG_NEW_NODE_PREFIX + node_def.name
-    new_node = tf.Operation(node_def=node_def, g=funcgraph, inputs=new_inputs)
-    node_mapping[old_node] = new_node
-    for old_out_tensor, new_out_tensor in zip(old_node.outputs, new_node.outputs):
-        tensor_mapping[old_out_tensor] = new_out_tensor
-
-
-def _frozen_variable_node_to_func_const_node_def(
-    variable_node: tf.Operation, funcgraph: tf.Graph, defaultgraph: tf.Graph
-) -> node_def_pb2.NodeDef:
-    def create_const_node_def(node_name, dtype, data, data_shape=None):
-        """Creates a Const op."""
-        output_node = node_def_pb2.NodeDef()
-        output_node.op = "Const"
-        output_node.name = node_name
-        output_node.attr["dtype"].CopyFrom(dtype)
-        output_node.attr["value"].CopyFrom(
-            attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(data, dtype=dtype.type, shape=data_shape))
-        )
-        return output_node
-
-    # NOTE: Variable node type is readonly in funcgraph, all nodes of this type have to be fronzen.
-    variable_name = variable_node.name
-    if variable_node.type == "VarHandleOp":
-        variable_name = f"{variable_name}/Read/ReadVariableOp:0"
-    else:
-        variable_name = f"{variable_name}:0"
-    initializer = defaultgraph.get_operation_by_name(f"{variable_node.name}/Assign")
-    logger.info(f"VariableV2: {variable_node.name}, initializer: {initializer.name} ")
-    defaultsession = tf.compat.v1.Session(graph=defaultgraph)
-    _ = defaultsession.run([initializer])
-    logger.info(f"Start run variables data: {variable_name}")
-    returned_variable_data = defaultsession.run(variable_name)
-    logger.info(f"Start froze variables: {variable_name} {returned_variable_data}")
-    new_const_node = create_const_node_def(
-        variable_node.name, variable_node.node_def.attr["dtype"], returned_variable_data, returned_variable_data.shape
-    )
-    return new_const_node
-
-
-def _get_mapping_tensor(tsr2tsr: Dict[tf.Tensor, tf.Tensor], keys: List[tf.Tensor]) -> List[tf.Tensor]:
-    tensors = []
-    for k in keys:
-        if k not in tsr2tsr:
-            raise KeyError(f"Failed to find key tensor: {k} from tensor map: {tsr2tsr}.")
-        tensors.append(tsr2tsr[k])
-    return tensors
-
-
-def _topo_subgraph(subgraph: Set[tf.Operation]) -> List[tf.Operation]:
-    topo_subgraph_list = []
-    topo_subgraph_set = set()
-    start_nodes = set()
-    [start_nodes.add(x) for x in subgraph]
-    logger.info("Got topo_subgraph start nodes: %s", start_nodes)
-
-    def topo_subgraph_dfs(curr_node, output_list, output_set):
-        if not isinstance(curr_node, tf.Operation):
-            raise RuntimeError(f"topo_subgraph_dfs input should be node(aka. tf.Operator). {curr_node}")
-        curr_inputs = curr_node.inputs
-        logger.debug("Got topo_dfs: %s <- %s", curr_node.name, [x.name for x in curr_inputs])
-        current_control_inputs = curr_node.control_inputs
-        if len(current_control_inputs) > 0:
-            raise RuntimeError(
-                f"Control input are not supported: {curr_node.name}, control_inputs: {current_control_inputs}"
-            )
-        if curr_node in output_set:
-            return
-        output_set.add(curr_node)
-        for tensor in curr_inputs:
-            node = tensor.op
-            if node.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value and node not in output_set:
-                topo_subgraph_dfs(node, output_list, output_set)
-        output_list.append(curr_node)
-
-    [topo_subgraph_dfs(x, topo_subgraph_list, topo_subgraph_set) for x in start_nodes]
-    if len(topo_subgraph_list) != len(topo_subgraph_set):
-        raise RuntimeError(f"Got duplicated topo node: {sorted(topo_subgraph_list, key=lambda x: x.name)}.")
-    logger.info("Got topo_subgraph: %s", topo_subgraph_list)
-    return topo_subgraph_list
-
-
-def _update_iterator_getnext(
-    graph: tf.Graph,
-    get_next_op: Operation,
-    tgt_dataset: DatasetV1Adapter,
-    subgraph_out: Dict[tf.Operation, Set[tf.Operation]],
-    subgraph_to_push: Set[tf.Operation],
-):
-    if not get_next_op.outputs:
-        raise RuntimeError("there is no tensor in the dataset. Please check the dataset and data processing.")
-    iterator_type = ""
-    if get_next_op.inputs:
-        iterator_type = get_next_op.inputs[0].op.type
-    if iterator_type == "IteratorV2":
-        iterator_type = modifier.find_make_iterator_op(get_next_op.outputs[0]).type
-    if iterator_type not in (AnchorIteratorOp.MAKE_ITERATOR.value, AnchorIteratorOp.ONE_SHOT_ITERATOR.value):
-        raise RuntimeError(
-            f"Only iterators `MakeIterator` and `OneShotIterator` are supported in `graph modify` mode, "
-            f"but the current iterator is `{iterator_type}`."
-        )
-    logger.info("The iterator type of dataset is %s.", iterator_type)
-    if iterator_type == AnchorIteratorOp.MAKE_ITERATOR.value:
-        new_iterator = tgt_dataset.make_initializable_iterator()
-        logger.info("Got new_iterator: %s, new_iterator.initializer: %s.", new_iterator, new_iterator.initializer)
-        graph.add_to_collection(_ACG_NEW_INITIALIZER, new_iterator.initializer)
-    else:
-        new_iterator = tgt_dataset.make_one_shot_iterator()
-    new_batch = new_iterator.get_next(_ACG_NEW_ITERATOR)
-    if "timestamp" in new_batch.keys():
-        tf.compat.v1.add_to_collection(ASCEND_TIMESTAMP, new_batch["timestamp"])
-    try:
-        new_batch_tensor = new_batch
-        while not isinstance(new_batch_tensor, tf.Tensor):
-            if isinstance(new_batch_tensor, tuple):
-                new_batch_tensor = new_batch_tensor[0]
-            elif isinstance(new_batch_tensor, dict):
-                new_batch_tensor = list(new_batch_tensor.values())
-            elif isinstance(new_batch_tensor, list):
-                new_batch_tensor = new_batch_tensor[0]
-            elif isinstance(new_batch_tensor, tf.Tensor):
-                break
-            else:
-                raise RuntimeError(
-                    f"Need to support new_batch_tensor{new_batch_tensor}, type: {type(new_batch_tensor)}"
-                )
-    except IndexError as err:
-        raise IndexError("Cannot find a tensor from given batch.") from err
-    new_get_next_op = _find_op_from_base_op(new_batch_tensor.op, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
-    logger.info("Got new_get_next_op: %s.", new_get_next_op)
-    _replace_get_next_op(graph, get_next_op, new_get_next_op, subgraph_out, subgraph_to_push)
-
-
-def _replace_get_next_op(
-    graph: tf.Graph,
-    old_get_next_op: tf.Operation,
-    new_get_next_op: tf.Operation,
-    subgraph_out: Dict[tf.Operation, Set[tf.Operation]],
-    subgraph_to_push: Set[tf.Operation],
-):
-    for output_tensor in old_get_next_op.outputs:
-        _update_old_consumer(graph, new_get_next_op, output_tensor, subgraph_to_push)
-
-    old_get_next_op_output_size = len(old_get_next_op.outputs)
-    ordered_output_tensor = _ordered_output_from_subgraph(subgraph_out)
-
-    for i, output_tensor in enumerate(ordered_output_tensor):
-        offset = old_get_next_op_output_size + i
-        _update_subgraph_out_consumer(graph, new_get_next_op, offset, output_tensor)
-
-
-def _update_old_consumer(
-    graph: tf.Graph, new_get_next_op: tf.Operation, output_tensor: tf.Tensor, subgraph_to_push: List[tf.Operation]
-):
-    old_tensor_name = output_tensor.name
-    output_index = old_tensor_name.split(":")[-1]
-    new_tensor_name = f"{new_get_next_op.name}:{output_index}"
-    logger.info("Replace old_tensor_name: %s to new_tensor_name: %s", old_tensor_name, new_tensor_name)
-    new_tensor = graph.get_tensor_by_name(new_tensor_name)
-    for output_consumer in _get_tensor_consumers_unsafe(output_tensor):
-        if output_consumer in subgraph_to_push:
-            logger.info(
-                "Ignore consumer in old subgraph %s, not let it connect to new IteratorGetNext.", output_consumer
-            )
-            continue
-        for i, consumer_input in enumerate(output_consumer.inputs):
-            if consumer_input != output_tensor:
-                logger.debug("Not replace output_consumer: %s consumer_input: %s.", output_consumer, consumer_input)
-                continue
-            logger.info(
-                "Success replace output_consumer: %s type: %s from consumer_input: %s to new_tensor: %s",
-                output_consumer.name,
-                output_consumer.type,
-                consumer_input,
-                new_tensor,
-            )
-            output_consumer._update_input(i, new_tensor)
-
-
-def _update_subgraph_out_consumer(
-    graph: tf.Graph, new_get_next_op: tf.Operation, offset: int, output_tensor: tf.Tensor
-):
-    new_tensor_name = f"{new_get_next_op.name}:{offset}"
-    logger.info("Replace old_tensor_name: %s to new_tensor_name: %s.", output_tensor.name, new_tensor_name)
-    new_tensor = graph.get_tensor_by_name(new_tensor_name)
-    for output_consumer in _get_tensor_consumers_unsafe(output_tensor):
-        if output_consumer.type in _IGNORE_REPLACE_NODE:
-            logger.info("Ignore replace output_consumer: %s, it's of type: %s.", output_consumer, output_consumer.type)
-            continue
-        for j, consumer_input in enumerate(output_consumer.inputs):
-            if consumer_input != output_tensor:
-                logger.debug("Not replace output_consumer: %s consumer_input: %s.", output_consumer, consumer_input)
-                continue
-            logger.info(
-                "Success replace output_consumer: %s type: %s from consumer_input: %s to new_tensor: %s",
-                output_consumer.name,
-                output_consumer.type,
-                consumer_input,
-                new_tensor,
-            )
-            output_consumer._update_input(j, new_tensor)
-
-
-def _patched_get_src_dataset(get_next_op: Operation, is_training: bool) -> DatasetV1Adapter:
-    try:
-        target_op = modifier.get_dataset_op(get_next_op)
-    except (ValueError, TypeError, RuntimeError) as err:
-        logger.debug("In `OneShotIterator` mode, find `PrefetchDataset` from all ops in graph.")
-        graph = tf.compat.v1.get_default_graph()
-        dataset_op_list = [op for op in graph.get_operations() if ANCHOR_DATASET_NAME in op.name]
-        dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
-        logger.debug("Got sorted dataset_op_list: %s.", dataset_op_list)
-        if len(dataset_op_list) != 2:
-            raise RuntimeError(
-                f"Expect two `PrefetchDataset` ops in dataset_op_list, but got: {dataset_op_list}."
-            ) from err
-        target_op = dataset_op_list[1]
-    except Exception as err:
-        raise RuntimeError(f"The source dataset can't be found, got error: {err}.") from err
-
-    if not target_op.outputs:
-        raise ValueError(f"The length of the outputs of target op `{target_op}` is 0.")
-
-    logger.debug("Find target dataset op: %s, and output is %s.", target_op, target_op.outputs)
-    src_dataset = modifier.find_target_instance_dataset(target_op.outputs[0])
-
-    return src_dataset
diff --git a/mx_rec/graph/constants.py b/mx_rec/graph/constants.py
new file mode 100644
index 00000000..077405d6
--- /dev/null
+++ b/mx_rec/graph/constants.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+
+from enum import Enum
+
+
+class DeprecatedOp(Enum):
+    DEPRECATED_ITERATOR_GET_NEXT = "DEPRECATED_ITERATOR_GET_NEXT"
+    DEPRECATED_PREFETCH_DATASET = "DEPRECATED_PREFETCH_DATASET"
+
+
+class AnchorDatasetOp(Enum):
+    MODEL_DATASET = "ModelDataset"
+    OPTIMIZE_DATASET = "OptimizeDataset"
+    PREFETCH_DATASET = "PrefetchDataset"
+
+
+class AnchorIteratorOp(Enum):
+    ITERATOR_GET_NEXT = "IteratorGetNext"
+    ITERATOR_V2 = "IteratorV2"
+    MAKE_ITERATOR = "MakeIterator"
+    ONE_SHOT_ITERATOR = "OneShotIterator"
diff --git a/mx_rec/graph/graph_typing.py b/mx_rec/graph/graph_typing.py
deleted file mode 100644
index c11bd4c0..00000000
--- a/mx_rec/graph/graph_typing.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# !/usr/bin/env python3
-# -- coding: utf-8 --
-# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
-
-import dataclasses
-from typing import Dict, DefaultDict, List, Tuple, Set
-
-from tensorflow import Operation, Tensor
-from tensorflow.core.framework.graph_pb2 import GraphDef
-
-
-# DefaultDict:
-#     Key: Tensor => Represent output tensor of `IteratorGetNext` operation.
-#     Val: List[Tuple[int, Operation]] => Contains target operation of output tensor and it's corresponding index.
-ReplacementSpec = DefaultDict[Tensor, List[Tuple[int, Operation]]]
-
-
-@dataclasses.dataclass
-class AnchorRecord:
-    replacement_spec: ReplacementSpec
-    passing_tensors: List[Tensor]
-    batch_tensor_indexs: List[int]
-    sub_cutting_points: List[Tensor]
-    sub_graph_def: GraphDef
-    input_names: List[str]
-    output_names: List[str]
-    is_training: bool
-    input_indexs: List[int] = None
-
-
-@dataclasses.dataclass
-class SubgraphInfo:
-    subgraph_in: Dict[Operation, Set[Operation]]
-    subgraph_out: Dict[Operation, Set[Operation]]
-    subgraph_to_push: Set[Operation]
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index 8338e870..e0b4bdeb 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -15,9 +15,10 @@
 # limitations under the License.
 # ==============================================================================
 
+import dataclasses
 from collections import defaultdict
 from collections.abc import Callable
-from typing import Any, List, Dict, Tuple
+from typing import Any, List, Dict, Tuple, DefaultDict
 
 import tensorflow as tf
 from tensorflow import Operation, Tensor
@@ -26,16 +27,15 @@ from tensorflow.python.data.ops.dataset_ops import DatasetV1Adapter
 from tensorflow.python.framework.errors_impl import InvalidArgumentError
 
 from mx_rec.constants.constants import ASCEND_CUTTING_POINT_INITIALIZER, ASCEND_SPARSE_LOOKUP_ENTRANCE, \
-    ASCAnchorAttr, ASCEND_TIMESTAMP, MAX_WHILE_SIZE, LIBREC_EOS_OPS_SO, AnchorDatasetOp, \
-    AnchorIteratorOp
+    ASCAnchorAttr, ASCEND_TIMESTAMP, MAX_WHILE_SIZE, LIBREC_EOS_OPS_SO
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.helper import get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.graph.merge_lookup import do_merge_lookup
-from mx_rec.graph.utils import check_input_list, find_parent_op, check_cutting_points, record_ops_to_replace, \
-    export_pb_graph, make_sorted_key_to_tensor_list
-from mx_rec.graph.graph_typing import AnchorRecord, ReplacementSpec
+from mx_rec.graph.utils import check_input_list, find_parent_op, check_cutting_points, \
+record_ops_to_replace, export_pb_graph, make_sorted_key_to_tensor_list
+from mx_rec.graph.constants import DeprecatedOp, AnchorDatasetOp, AnchorIteratorOp
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
 from mx_rec.util.ops import import_host_pipeline_ops
@@ -43,6 +43,19 @@ from mx_rec.util.perf import performance
 from mx_rec.validator.validator import para_checker_decorator, ClassValidator
 
 
+@dataclasses.dataclass
+class AnchorRecord:
+    replacement_spec: DefaultDict[Tensor, List[Tuple[int, Operation]]]
+    passing_tensors: List[Tensor]
+    batch_tensor_indexs: List[int]
+    sub_cutting_points: List[Tensor]
+    sub_graph_def: GraphDef
+    input_names: List[str]
+    output_names: List[str]
+    is_training: bool
+    input_indexs: List[int] = None
+
+
 def get_preprocessing_map_func(
         graph_def: GraphDef,
         input_names: List[str],
@@ -142,7 +155,7 @@ def parse_batch(data_args: Any, data_batch: dict, key: str = None):
 
 def get_input_index_list(
         cutting_point_list: List[Tensor],
-        replacement_specs: ReplacementSpec,
+        replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]],
         mapping_name_list: List[str],
         base_count: int,
         timestamp_index: int = None
@@ -319,7 +332,7 @@ def get_sub_graph(
     return sub_graph_def, input_name_list, output_name_list
 
 
-def update_input_tensor_with_new_batch(replacement_specs: ReplacementSpec,
+def update_input_tensor_with_new_batch(replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]],
                                        new_get_next_op_name: str,
                                        new_batch: Dict[str, Tensor]):
     """
@@ -428,6 +441,14 @@ def get_src_dataset(get_next_op: Operation, is_training: bool) -> DatasetV1Adapt
         logger.warning("The dataset op was not found, the error is `%s`. Start to traverse the operations.", err)
         graph = tf.compat.v1.get_default_graph()
         dataset_op_list = [op for op in graph.get_operations() if AnchorDatasetOp.PREFETCH_DATASET.value in op.name]
+
+        # WARN: Couple with NoGradSubGraphSlicer::_find_old_dataset.
+        dataset_op_list = list(
+            filter(lambda op: op not in tf.compat.v1.get_collection(DeprecatedOp.DEPRECATED_PREFETCH_DATASET),
+            dataset_op_list)
+        )
+        dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
+
         logger.debug("In get_src_dataset function, current mode(train: True, eval: False): %s, dataset_op_list: %s.",
                      is_training, dataset_op_list)
 
@@ -440,7 +461,7 @@ def get_src_dataset(get_next_op: Operation, is_training: bool) -> DatasetV1Adapt
             prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
             target_op = prefetch_dataset_op_list[1]
         else:
-            raise RuntimeError(f"`{AnchorDatasetOp.PREFETCH_DATASET.value}` not found, got dataset_op_list: "
+            raise RuntimeError(f"'{AnchorDatasetOp.PREFETCH_DATASET.value}' not found, got transformation datasets: "
                                f"{dataset_op_list}.") from err
     except Exception as err:
         raise RuntimeError(f"The dataset was not found, the error is `{err}`.") from err
diff --git a/mx_rec/graph/slicers.py b/mx_rec/graph/slicers.py
new file mode 100644
index 00000000..d22af868
--- /dev/null
+++ b/mx_rec/graph/slicers.py
@@ -0,0 +1,879 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import abc
+from typing import List, Dict, Set, Tuple, Union
+
+import pandas as pd
+import tensorflow as tf
+from tensorflow import Operation, Tensor, SparseTensor, Graph, variant, resource
+from tensorflow.python.data.ops.dataset_ops import DatasetV1Adapter
+
+from mx_rec.graph import utils, modifier
+from mx_rec.util.log import logger
+from mx_rec.validator.validator import ClassValidator, para_checker_decorator
+from mx_rec.constants.constants import (
+    ASCEND_TIMESTAMP,
+    MAX_WHILE_SIZE,
+    ASCAnchorAttr,
+    ASCEND_SPARSE_LOOKUP_ENTRANCE,
+)
+from mx_rec.graph.constants import DeprecatedOp, AnchorDatasetOp, AnchorIteratorOp
+from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
+
+
+class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
+    _SLICED_OP_NAME_PREFIX = "sliced"
+
+    _SLICING_SUMMARY_NAME = "slicing_summary.csv"
+    _UNSLICED_FULL_GRAPH_NAME = "unsliced_full_graph.pbtxt"
+    _SLICED_SUB_GRAPH_NAME = "sliced_sub_graph.pbtxt"
+    _SLICED_FULL_GRAPH_NAME = "sliced_full_graph.pbtxt"
+
+    _INVALID_STR_IN_OP_TYPE = ("Dataset", "Summary")
+    _INVALID_STR_IN_OP_NAME = ("save", "report_", "loss")
+    _INVALID_CONSUMER_OP_TYPE = ("Assign", "SaveV2")
+
+    _VALID_TENSOR_CLASS = (Tensor, SparseTensor)
+    _INVALID_TENSOR_DTYPE = (variant, resource)
+
+    def __init__(self, full_graph: Graph = None, info_dir: str = "slicing") -> None:
+        if not full_graph:
+            full_graph = tf.compat.v1.get_default_graph()
+        self._full_graph = full_graph
+
+        if not os.path.exists(info_dir):
+            os.makedirs(info_dir)
+        self._info_dir = info_dir
+
+    @abc.abstractmethod
+    def summarize(self) -> None:
+        pass
+
+    @abc.abstractmethod
+    def slice(self) -> None:
+        pass
+
+    def _slice_ops(self, sliceable_ops: Set[Operation], is_training: bool) -> None:
+        sliced_ops = self._find_min_dep_ops(sliceable_ops)
+        in_op_to_edge_ops, out_op_to_edge_ops = self._find_subgraph_in_and_out(sliced_ops)
+
+        old_get_next = self._find_old_get_next(sliceable_ops)
+        old_dataset = self._find_old_dataset(old_get_next, is_training)
+
+        new_dataset = self._make_new_dataset(old_dataset, sliced_ops, in_op_to_edge_ops, out_op_to_edge_ops)
+        new_dataset = new_dataset.prefetch(0)
+
+        new_get_next = self._make_new_get_next(old_get_next, new_dataset)
+        self._replace_get_next(old_get_next, new_get_next, out_op_to_edge_ops, sliced_ops)
+
+    def _make_new_dataset(
+        self,
+        old_dataset: DatasetV1Adapter,
+        sliced_ops: Set[Operation],
+        in_op_to_edge_ops: Dict[Operation, Set[Operation]],
+        out_op_to_edge_ops: Dict[Operation, Set[Operation]],
+    ) -> DatasetV1Adapter:
+        def slice_map_func(*batch):  # pragma: no cover
+            logger.debug("The layout of old batch: %s.", batch)
+
+            funcgraph = tf.compat.v1.get_default_graph()
+            flatten_batch = tf.nest.flatten(batch)
+
+            for t in flatten_batch:
+                if isinstance(t, NoGradSubgraphSlicer._VALID_TENSOR_CLASS):
+                    continue
+                raise RuntimeError(f"expected 'tf.Tensor' or 'tf.SparseTensor' in batch, but got %s.", t)
+
+            new_batch = self._clone_subgraph_into_funcgraph(sliced_ops, in_op_to_edge_ops, out_op_to_edge_ops, batch)
+            utils.export_pb_graph(
+                file_name=NoGradSubgraphSlicer._SLICED_SUB_GRAPH_NAME,
+                dump_graph=True,
+                graph_def=funcgraph.as_graph_def(),
+                export_path=self._info_dir,
+            )
+
+            return new_batch
+
+        return old_dataset.map(slice_map_func)
+
+    def _find_subgraph_in_and_out(
+        self,
+        sub_graph_ops: Set[Operation],
+    ) -> Tuple[Dict[Operation, Set[Operation]], Dict[Operation, Set[Operation]]]:
+        in_op_to_edge_ops = dict()
+        out_op_to_edge_ops = dict()
+
+        for base_node in sub_graph_ops:
+            self._update_subgraph_in(base_node, in_op_to_edge_ops, sub_graph_ops)
+            self._update_subgraph_out(base_node, out_op_to_edge_ops, sub_graph_ops)
+
+        logger.info("Got input relationship of extracted subgraph: %s", in_op_to_edge_ops)
+        logger.info("Got output relationship of extracted subgraph: %s", out_op_to_edge_ops)
+        return in_op_to_edge_ops, out_op_to_edge_ops
+
+    def _find_old_get_next(self, sliceable_ops: Set[Operation]) -> Operation:
+        old_get_next = self._upward_bfs_op(sliceable_ops, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
+
+        tf.compat.v1.add_to_collection(DeprecatedOp.DEPRECATED_ITERATOR_GET_NEXT, old_get_next)
+        logger.info("Old 'IteratorGetNext' operation has been deprecated now.")
+
+        return old_get_next
+
+    def _find_old_dataset(self, get_next: Operation, is_training: bool) -> DatasetV1Adapter:
+        tgt_trans_dataset = None
+        try:
+            tgt_trans_dataset = self._find_trans_dataset(get_next)
+        except (ValueError, TypeError, RuntimeError) as err:
+            trans_datasets = [
+                op for op in self._full_graph.get_operations() if AnchorDatasetOp.PREFETCH_DATASET.value in op.name
+            ]
+            trans_datasets = list(
+                filter(
+                    lambda op: op not in tf.compat.v1.get_collection(DeprecatedOp.DEPRECATED_PREFETCH_DATASET),
+                    trans_datasets,
+                )
+            )
+            sorted_datasets = sorted(trans_datasets, key=lambda op: op.name)
+
+            if len(trans_datasets) == 1:
+                tgt_trans_dataset = sorted_datasets[0]
+            elif is_training and len(sorted_datasets) == 2:
+                tgt_trans_dataset = sorted_datasets[0]
+            elif not is_training and len(sorted_datasets) == 2:
+                tgt_trans_dataset = sorted_datasets[0]
+            else:
+                raise RuntimeError(f"target transformation dataset not found, got datasets: {trans_datasets}.") from err
+        except Exception as err:
+            raise RuntimeError(f"the dataset was not found, the error is `{err}`.") from err
+
+        if not tgt_trans_dataset.outputs:
+            raise ValueError(f"the length of the outputs of target op `{tgt_trans_dataset}` is 0.")
+        logger.info("Find target op `%s`, and output is `%s`.", tgt_trans_dataset.name, tgt_trans_dataset.outputs)
+
+        # WARN: Couple with modifier module, global collection used for filtering deprecated prefetch dataset.
+        self._full_graph.add_to_collection(DeprecatedOp.DEPRECATED_PREFETCH_DATASET, tgt_trans_dataset)
+        old_dataset = modifier.find_target_instance_dataset(tgt_trans_dataset.outputs[0])
+
+        return old_dataset
+
+    def _find_trans_dataset(self, get_next: Operation) -> Operation:
+        if get_next.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
+            raise TypeError(f"operation '{get_next}' must be one instance of 'IteratorGetNext'.")
+
+        make_iter = modifier.find_make_iterator_op(get_next.outputs[0])
+
+        trans_dataset = None
+        if tf.__version__.startswith("1"):
+            optimize_dataset_op = self._upward_bfs_op(make_iter, AnchorDatasetOp.MODEL_DATASET.value)
+            trans_dataset = utils.find_parent_op(optimize_dataset_op)
+            if not trans_dataset:
+                raise RuntimeError("parent operation of 'ModelDataset' was not found.")
+            if trans_dataset[0].type != AnchorDatasetOp.OPTIMIZE_DATASET.value:
+                raise TypeError(f"operation 'OptimizeDataset' was not found.")
+            trans_dataset = trans_dataset[0]
+        else:
+            trans_dataset = self._upward_bfs_op(make_iter, AnchorDatasetOp.PREFETCH_DATASET.value)
+
+        return trans_dataset
+
+    def _clone_subgraph_into_funcgraph(
+        self,
+        sliced_ops: Set[Operation],
+        in_op_to_edge_ops: Set[Operation],
+        out_op_to_edge_ops: Set[Operation],
+        batch: Tuple[Dict[str, Union[Tensor, SparseTensor, Dict]]],
+    ) -> Dict[str, Union[Tensor, SparseTensor, Dict]]:
+        """Clone the sliced subgraph into a new funcgraph.
+
+        Args:
+            sliced_ops: The operation set that has been sliced.
+            in_op_to_edge_ops: The input relationship of sliced subgraph.
+            out_op_to_edge_ops: The output relationship of sliced subgraph.
+            batch: The original batch layout of old dataset.
+
+        Returns:
+            new_batch: The new batch layout of new dataset.
+        """
+
+        topo_subgraph_list = self._topo_sort_sliced_ops(sliced_ops)
+
+        node_mapping = {}  # subgraph-node -> funcgraph-node
+        tensor_mapping = {}  # subgraph-tensor -> funcgraph-tensor
+        for in_op, edge_ops in in_op_to_edge_ops.items():
+            self._get_mapping_for_subgraph_in(in_op, edge_ops, tensor_mapping)
+        for old_op in topo_subgraph_list:
+            self._get_mapping_for_subgraph(old_op, node_mapping, tensor_mapping)
+
+        logger.info("Got node_mapping: %s", node_mapping)
+        logger.info("Got tensor_mapping: %s", tensor_mapping)
+
+        ordered_output_tensors = self._sort_sliced_graph_outputs(out_op_to_edge_ops)
+        extra_output_tensor = self._get_mapped_tensor(tensor_mapping, ordered_output_tensors)
+
+        if not isinstance(batch, tuple):
+            batch = (batch,)
+
+        new_batch = batch[0]
+        for tensor in extra_output_tensor:
+            next_last_key = f"{sorted(new_batch)[-1]}_"
+            new_batch[next_last_key] = tensor
+
+        logger.debug("Got new batch layout: %s.", new_batch)
+        return new_batch
+
+    def _make_new_get_next(
+        self,
+        old_get_next: Operation,
+        new_dataset: DatasetV1Adapter,
+    ) -> Operation:
+        """Make new 'IteratorGetNext' operation.
+
+        1. This func will automatically detect the iterator type of the old dataset, and then make 'IteratorGetNext'
+        from the corresponding iterator.
+        2. Only 'MakeIterator' and 'OneShotIterator' are available now.
+
+        Args:
+            old_get_next: The old 'IteratorGetNext' operation.
+            new_dataset: The new dataset which contains sliced subgraph and corresponding additional outputs.
+
+        Returns:
+            new_get_next: The new 'IteratorGetNext' operation.
+        """
+
+        if not old_get_next.outputs:
+            raise RuntimeError("no available tensor in the dataset. Please check the dataset and data processing.")
+
+        iter_type = None
+        if old_get_next.inputs:
+            iter_type = old_get_next.inputs[0].op.type
+        if iter_type == AnchorIteratorOp.ITERATOR_V2.value:
+            iter_type = modifier.find_make_iterator_op(old_get_next.outputs[0]).type
+        if iter_type not in (AnchorIteratorOp.MAKE_ITERATOR.value, AnchorIteratorOp.ONE_SHOT_ITERATOR.value):
+            raise RuntimeError(
+                f"only iterators `MakeIterator` and `OneShotIterator` are supported in `graph modify` mode, "
+                f"but the current iterator is `{iter_type}`."
+            )
+        logger.info("The iterator type of old dataset is %s.", iter_type)
+
+        if iter_type == AnchorIteratorOp.MAKE_ITERATOR.value:
+            new_iterator = tf.compat.v1.data.make_initializable_iterator(new_dataset)
+        else:
+            new_iterator = tf.compat.v1.data.make_one_shot_iterator(new_dataset)
+        logger.info("Got new iterator: %s from dataset %s.", new_iterator, new_dataset)
+
+        new_batch_name = "{}/{}".format(
+            NoGradSubgraphSlicer._SLICED_OP_NAME_PREFIX, AnchorIteratorOp.ITERATOR_GET_NEXT.value
+        )
+        new_batch = new_iterator.get_next(name=new_batch_name)
+
+        # WARN: Couple with user model, this collection has been addded manually.
+        if "timestamp" in new_batch.keys():
+            tf.compat.v1.add_to_collection(ASCEND_TIMESTAMP, new_batch["timestamp"])
+
+        try:
+            new_batch_tensor = new_batch
+            while not isinstance(new_batch_tensor, NoGradSubgraphSlicer._VALID_TENSOR_CLASS):
+                if isinstance(new_batch_tensor, tuple):
+                    new_batch_tensor = new_batch_tensor[0]
+                elif isinstance(new_batch_tensor, dict):
+                    new_batch_tensor = list(new_batch_tensor.values())
+                elif isinstance(new_batch_tensor, list):
+                    new_batch_tensor = new_batch_tensor[0]
+                elif isinstance(new_batch_tensor, NoGradSubgraphSlicer._VALID_TENSOR_CLASS):
+                    break
+                else:
+                    raise RuntimeError(f"batch value {new_batch_tensor} of {type(new_batch_tensor)} is not supported.")
+        except IndexError as err:
+            raise IndexError("cannot find a tensor from given batch.") from err
+
+        new_get_next = self._upward_bfs_op(new_batch_tensor.op, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
+
+        logger.info("Got old_new_get_next: %s.", new_get_next)
+        return new_get_next
+
+    def _replace_get_next(
+        self,
+        old_get_next: Operation,
+        new_get_next: Operation,
+        out_op_to_edge_ops: Dict[Operation, Set[Operation]],
+        sliced_ops: Set[Operation],
+    ) -> None:
+        """Replace the old 'IteratorGetNext' operation with the new one.
+
+        1. This func will update the consumer of the old 'IteratorGetNext' operation to the new one.
+        2. This func will update the consumer of the output tensors of the sliced subgraph to the new one.
+
+        Args:
+            old_get_next: The old 'IteratorGetNext' operation.
+            new_get_next: The new 'IteratorGetNext' operation.
+            out_op_to_edge_ops: The output relationship of sliced subgraph.
+            sliced_ops: The operation set that has been sliced.
+        """
+
+        for t in old_get_next.outputs:
+            self._update_old_get_next_consumer(t, new_get_next, sliced_ops)
+
+        next_offset = len(old_get_next.outputs) - 1
+        sorted_outputs = self._sort_sliced_graph_outputs(out_op_to_edge_ops)
+
+        for t in sorted_outputs:
+            next_offset += 1
+            self._update_sliced_graph_consumer(t, new_get_next, next_offset)
+
+    def _update_old_get_next_consumer(
+        self, old_get_next_output: Tensor, new_get_next: Operation, sliced_ops: Set[Operation]
+    ) -> None:
+        """Update the consumer of the old 'IteratorGetNext' operation to the new one.
+
+        Args:
+            old_get_next_output: The output tensor of the old 'IteratorGetNext' operation.
+            new_get_next: The new 'IteratorGetNext' operation.
+            sliced_ops: The operation set that has been sliced.
+        """
+
+        old_tensor_name = old_get_next_output.name
+        output_index = old_tensor_name.split(":")[-1]
+        new_tensor_name = f"{new_get_next.name}:{output_index}"
+        new_tensor = self._full_graph.get_tensor_by_name(new_tensor_name)
+
+        old_tensor_consumers = self._get_tensor_consumers(old_get_next_output)
+        for consumer in old_tensor_consumers:
+            if consumer in sliced_ops:
+                logger.debug("Ignore consumer: %s in sliced operations.", consumer.name)
+                continue
+            for i, t in enumerate(consumer.inputs):
+                if t != old_get_next_output:
+                    logger.debug(
+                        "Ignore input %s of consumer %s, cause it not output of 'IteratorGetNext'.",
+                        t.name,
+                        consumer.name,
+                    )
+                    continue
+                consumer._update_input(i, new_tensor)
+                logger.debug(
+                    "Succeed replace old input %s of consumer %s to new input %s.",
+                    old_tensor_name,
+                    consumer.name,
+                    new_tensor,
+                )
+
+    def _update_sliced_graph_consumer(
+        self, sliced_graph_output: Tensor, new_get_next: Operation, next_offset: int
+    ) -> None:
+        """Update the consumer of the output tensors of the sliced subgraph to the new one.
+
+        The outputs of the sliced subgraph are not the original outputs of 'IteratorGetNext'. Thus, next offset should
+        trace the last index of outputs of new 'IteratorGetNext'.
+
+        Args:
+            sliced_graph_output: The output tensor of the sliced subgraph.
+            new_get_next: The new 'IteratorGetNext' operation.
+            next_offset: The last offset of the new 'IteratorGetNext' operation.
+        """
+
+        new_tensor_name = f"{new_get_next.name}:{next_offset}"
+        new_tensor = self._full_graph.get_tensor_by_name(new_tensor_name)
+
+        old_tensor_consumers = self._get_tensor_consumers(sliced_graph_output)
+        for consumer in old_tensor_consumers:
+            if consumer.type in NoGradSubgraphSlicer._INVALID_CONSUMER_OP_TYPE:
+                logger.debug("Ignore invalid consumer: %s.", consumer.name)
+                continue
+            for i, t in enumerate(consumer.inputs):
+                if t != sliced_graph_output:
+                    logger.debug(
+                        "Ignore input %s of consumer %s, cause it not output of sliced graph.",
+                        t.name,
+                        consumer.name,
+                    )
+                    continue
+                consumer._update_input(i, new_tensor)
+                logger.debug(
+                    "Succeed replace old input %s of consumer %s to new input %s.",
+                    sliced_graph_output,
+                    consumer.name,
+                    new_tensor,
+                )
+
+    @staticmethod
+    def _find_min_dep_ops(
+        tgt_ops: Set[Operation],
+    ) -> Set[Operation]:
+        logger.debug("Search from base nodes: %s.", tgt_ops)
+        base_ops = tgt_ops.copy()
+        visited_ops = base_ops
+
+        loop_cnt = 0
+        while base_ops:
+            loop_cnt += 1
+            if loop_cnt > MAX_WHILE_SIZE:
+                raise RuntimeError(f"maximum loop times exceed limit: {MAX_WHILE_SIZE}.")
+
+            parent_ops = set()
+            for base_node in base_ops:
+                if len(base_node.control_inputs) != 0:
+                    raise ValueError("control dependencies are not supported.")
+
+                parent_ops.update(
+                    tensor_in.op
+                    for tensor_in in base_node.inputs
+                    if tensor_in.op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value
+                )
+
+            new_ops = parent_ops - visited_ops
+            base_ops = parent_ops
+            visited_ops.update(new_ops)
+
+        logger.debug("Found minimum dependency graph nodes: %s.", visited_ops)
+        return visited_ops
+
+    @staticmethod
+    def _validate_op(op: Operation) -> bool:
+        op_type = op.type
+        op_name = op.name
+        op_inputs = op.inputs
+        op_outputs = op.outputs
+
+        for s in NoGradSubgraphSlicer._INVALID_STR_IN_OP_TYPE:
+            if s in op_type:
+                logger.warning("Invalid operation type: %s which contains str: %s.", op_type, s)
+                return False
+        for s in NoGradSubgraphSlicer._INVALID_STR_IN_OP_NAME:
+            if s in op_name:
+                logger.warning("Invalid operation name: %s which contains str: %s.", op_name, s)
+                return False
+        for t in op_inputs:
+            if t.dtype in NoGradSubgraphSlicer._INVALID_TENSOR_DTYPE:
+                logger.warning("Invalid operation input tensor of operation: %s whose type is %s.", t, t.dtype)
+                return False
+        for t in op_outputs:
+            if t.dtype in NoGradSubgraphSlicer._INVALID_TENSOR_DTYPE:
+                logger.warning("Invalid operation output tensor of operation: %s whose type is %s.", t, t.dtype)
+                return False
+
+        return True
+
+    @staticmethod
+    def _update_subgraph_in(
+        base_ops: Operation,
+        input_to_edge_ops: Dict[Operation, Set[Operation]],
+        sub_graph_ops: Set[Operation],
+    ) -> None:
+        for input_tensor in base_ops.inputs:
+            input_node = input_tensor.op
+            if input_node not in sub_graph_ops:
+                res = input_to_edge_ops.get(input_node, set())
+                res.add(base_ops)
+                input_to_edge_ops[input_node] = res
+
+    @staticmethod
+    def _update_subgraph_out(
+        base_ops: Operation,
+        out_op_to_edge_ops: Dict[Operation, Set[Operation]],
+        sub_graph_ops: Set[Operation],
+    ) -> None:
+        for output_tensor in base_ops.outputs:
+            for output_consumer in output_tensor.consumers():
+                if output_consumer not in sub_graph_ops:
+                    res = out_op_to_edge_ops.get(output_consumer, set())
+                    res.add(base_ops)
+                    out_op_to_edge_ops[output_consumer] = res
+
+    @staticmethod
+    def _upward_bfs_op(base_ops: Union[Operation, Set[Operation], List[Operation]], tgt_op_type: str) -> Operation:
+        if not isinstance(base_ops, (set, list)):
+            base_ops = [base_ops]
+
+        parent_ops = base_ops
+        while True:
+            for parent_op in parent_ops:
+                if parent_op.type == tgt_op_type:
+                    return parent_op
+            base_ops = parent_ops
+            parent_ops = []
+            for base_op in base_ops:
+                parent_ops.extend(utils.find_parent_op(base_op))
+            if not parent_ops:
+                raise ValueError(f"target operation '{tgt_op_type}'' was not found.")
+
+    @staticmethod
+    def _topo_sort_sliced_ops(sliced_ops: Set[Operation]) -> List[Operation]:
+        topo_subgraph_list = []
+        topo_subgraph_set = set()
+        start_nodes = set()
+        [start_nodes.add(x) for x in sliced_ops]
+        logger.info("Got topo_subgraph start nodes: %s", start_nodes)
+
+        def topo_sort_helper(curr_op, output_list, output_set):
+            if not isinstance(curr_op, Operation):
+                raise RuntimeError(f"topo_subgraph_dfs input should be node(aka. tf.Operator). {curr_op}")
+            curr_inputs = curr_op.inputs
+            logger.debug("Got topo_dfs: %s <- %s", curr_op.name, [x.name for x in curr_inputs])
+            current_control_inputs = curr_op.control_inputs
+            if len(current_control_inputs) > 0:
+                raise RuntimeError(
+                    f"control input are not supported: {curr_op.name}, control_inputs: {current_control_inputs}"
+                )
+            if curr_op in output_set:
+                return
+            output_set.add(curr_op)
+            for tensor in curr_inputs:
+                node = tensor.op
+                if node.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value and node not in output_set:
+                    topo_sort_helper(node, output_list, output_set)
+            output_list.append(curr_op)
+
+        [topo_sort_helper(x, topo_subgraph_list, topo_subgraph_set) for x in start_nodes]
+        if len(topo_subgraph_list) != len(topo_subgraph_set):
+            raise RuntimeError(f"got duplicated topo node: {sorted(topo_subgraph_list, key=lambda x: x.name)}.")
+        logger.info("Got topo_subgraph: %s", topo_subgraph_list)
+        return topo_subgraph_list
+
+    @staticmethod
+    def _get_mapping_for_subgraph_in(
+        from_op: Operation,
+        to_ops: Set[Operation],
+        tensor_mapping: Union[Dict[Tensor, Tensor], Dict[SparseTensor, SparseTensor]],
+    ) -> None:
+        if from_op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
+            raise RuntimeError(f"expect IteratorGetNext for input tensor of subgraph, but got {from_op}")
+        for node in to_ops:
+            for each_tensor in node.inputs:
+                if each_tensor.op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
+                    continue
+                old_tensor_name = each_tensor.name
+                x_index = int(old_tensor_name.split(":")[-1])
+                g = tf.compat.v1.get_default_graph()
+                arg_tensor = g.get_tensor_by_name("args_%d:0" % x_index)
+                tensor_mapping[each_tensor] = arg_tensor
+
+    @staticmethod
+    def _get_mapping_for_subgraph(
+        old_op: Operation,
+        node_mapping: Dict[Operation, Operation],
+        tensor_mapping: Dict[Tensor, Tensor],
+    ) -> None:
+        logger.debug("old operation name: %s\nold operation inputs: %s\n", old_op.name, [x for x in old_op.inputs])
+
+        for each_tensor in old_op.inputs:
+            if each_tensor not in tensor_mapping:
+                raise RuntimeError(
+                    f"each_tensor(input) {each_tensor} need by {old_op.name} not in tensor_mapping.{tensor_mapping}"
+                )
+        new_inputs = NoGradSubgraphSlicer._get_mapped_tensor(tensor_mapping, old_op.inputs)
+
+        node_def = old_op.node_def
+        node_def.name = "{}/{}".format(NoGradSubgraphSlicer._SLICED_OP_NAME_PREFIX, node_def.name)
+        new_node = tf.Operation(node_def=node_def, g=tf.compat.v1.get_default_graph(), inputs=new_inputs)
+
+        node_mapping[old_op] = new_node
+        for old_out_tensor, new_out_tensor in zip(old_op.outputs, new_node.outputs):
+            tensor_mapping[old_out_tensor] = new_out_tensor
+
+    @staticmethod
+    def _get_mapped_tensor(tensor2tensor: Dict[Tensor, Tensor], keys: List[Tensor]) -> List[Tensor]:
+        tensors = []
+        for k in keys:
+            if k not in tensor2tensor:
+                raise KeyError(f"failed to find key tensor: {k} from tensor map: {tensor2tensor}.")
+            tensors.append(tensor2tensor[k])
+        return tensors
+
+    @staticmethod
+    def _sort_sliced_graph_outputs(subgraph_out: Dict[Operation, Set[Operation]]) -> List[Tensor]:
+        extra_outputs = []
+        sorted_outputs = sorted(subgraph_out.items(), key=lambda x: x[0].name)
+        for outside_op, edge_ops in sorted_outputs:
+            outside_op_inputs = set(outside_op.inputs)
+            for edge_op in edge_ops:
+                NoGradSubgraphSlicer._add_sorted_additional_tensors(extra_outputs, outside_op_inputs, edge_op)
+        return extra_outputs
+
+    @staticmethod
+    def _add_sorted_additional_tensors(extra_outputs, outside_op_inputs, edge_op) -> None:
+        for each_tensor in sorted(edge_op.outputs, key=lambda x: x.name):
+            if each_tensor not in outside_op_inputs:
+                continue
+            if each_tensor in extra_outputs:
+                continue
+            extra_outputs.append(each_tensor)
+
+    @staticmethod
+    def _get_tensor_consumers(tensor: Tensor) -> List[Operation]:
+        if not isinstance(tensor, NoGradSubgraphSlicer._VALID_TENSOR_CLASS):
+            raise RuntimeError(f"expected 'tf.Tensor' or 'tf.SparseTensor', but got: {tensor}")
+
+        graph = tensor.graph
+        consumers = []
+        consumer_names = [op.name for op in tensor.consumers()]
+
+        with graph._lock:
+            for name in consumer_names:
+                if name not in graph._nodes_by_name:  # ignore deleted node
+                    continue
+                consumers.append(graph._nodes_by_name[name])
+
+        return consumers
+
+
+@para_checker_decorator(
+    check_option_list=[
+        ("op_types", ClassValidator, {"classes": (list,)}),
+        ("full_graph", ClassValidator, {"classes": (Graph, type(None))}),
+        ("info_dir", ClassValidator, {"classes": (str,)}),
+    ]
+)
+class LookupSubgraphSlicer(NoGradSubgraphSlicer):
+    def __init__(self, op_types: List[str], full_graph: Graph = None, info_dir: str = "lookup_slicing") -> None:
+        """Initialize LookupSubgraphSlicer.
+        Args:
+            op_types: The list of operation types to be sliced in lookup subgraph.
+            full_graph: The full graph to be sliced. If None, the default graph will be used.
+            info_dir: The directory to save the slicing information. Defaults to "lookup_slicing".
+        """
+        super().__init__(full_graph, info_dir)
+        if not op_types:
+            raise ValueError("no slicing operation types specified!")
+        self._op_types = set(op_types)
+
+    def summarize(self) -> None:  # pragma: no cover
+        all_tgt_ops = self._find_all_tgt_ops()
+        (train_sliceable_tgt_ops, eval_sliceable_tgt_ops) = self._find_sliceable_tgt_ops()
+        all_sliceable_tgt_ops = train_sliceable_tgt_ops | eval_sliceable_tgt_ops
+
+        result = {"Operation Type": [], "Total Num": [], "Sliceable Num": [], "Sliceable Ratio": []}
+
+        for op_type in self._op_types:
+            tgt_ops = set(filter(lambda op: op.type == op_type, all_tgt_ops))
+            sliceable_tgt_ops = set(filter(lambda op: op.type == op_type, all_sliceable_tgt_ops))
+
+            total_num = len(tgt_ops)
+            sliceable_num = len(sliceable_tgt_ops)
+
+            try:
+                sliceable_ratio = sliceable_num / total_num
+            except ZeroDivisionError:
+                logger.warning("No target operaiton types '%s' found in given graph.", self._op_types)
+
+            result["Operation Type"].append(op_type)
+            result["Total Num"].append(total_num)
+            result["Sliceable Num"].append(sliceable_num)
+            result["Sliceable Ratio"].append(sliceable_ratio)
+
+        result_df = pd.DataFrame(data=result)
+        file = "{}/{}".format(self._info_dir, NoGradSubgraphSlicer._SLICING_SUMMARY_NAME)
+        result_df.to_csv(file, sep=",")
+
+        logger.info("Summary of slicing:\n%s", result_df)
+
+    def slice(self) -> None:
+        utils.export_pb_graph(
+            file_name=NoGradSubgraphSlicer._UNSLICED_FULL_GRAPH_NAME,
+            dump_graph=True,
+            graph_def=self._full_graph.as_graph_def(),
+            export_path=self._info_dir,
+        )
+
+        (train_sliceable_ops, eval_sliceable_ops) = self._find_sliceable_tgt_ops()
+
+        if train_sliceable_ops:
+            logger.info("Start to slice training lookup subgraph.")
+            self._slice_ops(train_sliceable_ops, is_training=True)
+
+        if eval_sliceable_ops:
+            logger.info("Start to slice evaluation lookup subgraph.")
+            self._slice_ops(eval_sliceable_ops, is_training=False)
+
+        utils.export_pb_graph(
+            file_name=NoGradSubgraphSlicer._SLICED_FULL_GRAPH_NAME,
+            dump_graph=True,
+            graph_def=self._full_graph.as_graph_def(),
+            export_path=self._info_dir,
+        )
+
+    def _find_all_tgt_ops(self) -> Set[Operation]:
+        """Found all operations of specific types in full graph."""
+        all_tgt_ops = set()
+        all_ops = self._full_graph.get_operations()
+
+        for op in all_ops:
+            if op.type not in self._op_types:
+                continue
+            all_tgt_ops.add(op)
+
+        return all_tgt_ops
+
+    def _find_sliceable_tgt_ops(self) -> Tuple[Set[Operation], Set[Operation]]:
+        """Found sliceable operations of given types in lookup subgraph."""
+
+        # WARN: Couple with mx_rec::core::embedding module.
+        lookup_keys = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE)
+
+        train_base_ops = set()
+        eval_base_ops = set()
+        for t in lookup_keys:
+            if BaseSparseEmbedding.get_anchor_attribute(t, ASCAnchorAttr.IS_TRAINING):
+                train_base_ops.add(t.op)
+            else:
+                eval_base_ops.add(t.op)
+
+        def find_sliceable_ops(base_ops):
+            min_dep_ops = self._find_min_dep_ops(base_ops)
+
+            sliceable_ops = set()
+            for op in min_dep_ops:
+                if not self._validate_op(op):
+                    continue
+                if op.type not in self._op_types:
+                    continue
+                sliceable_ops.add(op)
+
+            return sliceable_ops
+
+        train_sliceable_ops = find_sliceable_ops(train_base_ops)
+        eval_sliceable_ops = find_sliceable_ops(eval_base_ops)
+
+        logger.debug("Found sliceable operations in training lookup subgraph: %s.", train_sliceable_ops)
+        logger.debug("Found sliceable operations in evaluation lookup subgraph: %s.", eval_sliceable_ops)
+        return (train_sliceable_ops, eval_sliceable_ops)
+
+
+@para_checker_decorator(
+    check_option_list=[
+        ("full_graph", ClassValidator, {"classes": (Graph, type(None))}),
+        ("info_dir", ClassValidator, {"classes": (str,)}),
+    ]
+)
+class OrphanLookupKeySlicer(NoGradSubgraphSlicer):
+    SLICEABLE_ORPHAN_LOOKUP_KEY_PREFIX = "orphan"
+
+    def __init__(self, full_graph: Graph = None, info_dir: str = "orphan_slicing") -> None:
+        """Initialize OrphanLookupKeySlicer.
+        Args:
+            full_graph: The full graph to be sliced. If None, the default graph will be used.
+            info_dir: The directory to save the slicing information. Defaults to "orphan_slicing".
+        """
+        super().__init__(full_graph, info_dir)
+
+    def summarize(self) -> None:  # pragma: no cover
+        (train_sliceable_ops, _) = self._find_sliceable_tgt_ops()
+
+        if len(train_sliceable_ops) == 0:
+            return
+
+        result = {"Operation Type": [], "Operation Name": []}
+        for op in train_sliceable_ops:
+            result["Operation Type"].append(op.type)
+            result["Operation Name"].append(op.name)
+
+        result_df = pd.DataFrame(data=result)
+        file = "{}/{}".format(self._info_dir, NoGradSubgraphSlicer._SLICING_SUMMARY_NAME)
+        result_df.to_csv(file, sep=",")
+
+        logger.info("Summary of slicing:\n%s", result_df)
+
+    def slice(self) -> None:
+        utils.export_pb_graph(
+            file_name=NoGradSubgraphSlicer._UNSLICED_FULL_GRAPH_NAME,
+            dump_graph=True,
+            graph_def=self._full_graph.as_graph_def(),
+            export_path=self._info_dir,
+        )
+
+        (train_sliceable_ops, eval_sliceable_ops) = self._find_sliceable_tgt_ops()
+
+        if train_sliceable_ops:
+            logger.info("Start to slice training lookup subgraph.")
+            self._slice_ops(train_sliceable_ops, is_training=True)
+
+        if eval_sliceable_ops:
+            logger.info("Start to slice evaluation lookup subgraph.")
+            self._slice_ops(eval_sliceable_ops, is_training=False)
+
+        utils.export_pb_graph(
+            file_name=NoGradSubgraphSlicer._SLICED_FULL_GRAPH_NAME,
+            dump_graph=True,
+            graph_def=self._full_graph.as_graph_def(),
+            export_path=self._info_dir,
+        )
+
+    def _slice_ops(self, sliceable_ops: Set[Operation], is_training: bool) -> None:
+        """Override the '_slice_ops' protected method of super class."""
+
+        sliced_ops = self._find_min_dep_ops(sliceable_ops)
+        in_op_to_edge_ops, out_op_to_edge_ops = self._find_subgraph_in_and_out(sliced_ops)
+
+        all_get_nexts = [
+            op for op in self._full_graph.get_operations() if op.type == AnchorIteratorOp.ITERATOR_GET_NEXT.value
+        ]
+        alive_get_nexts = list(
+            filter(
+                lambda op: op not in tf.compat.v1.get_collection(DeprecatedOp.DEPRECATED_ITERATOR_GET_NEXT),
+                all_get_nexts,
+            )
+        )
+        alive_get_nexts = sorted(alive_get_nexts, key=lambda op: op.name)
+
+        old_get_next = None
+        if len(alive_get_nexts) == 1:
+            old_get_next = alive_get_nexts[0]
+        else:
+            old_get_next = alive_get_nexts[0] if is_training else alive_get_nexts[1]
+
+        old_dataset = self._find_old_dataset(old_get_next, is_training)
+
+        new_dataset = self._make_new_dataset(old_dataset, sliced_ops, in_op_to_edge_ops, out_op_to_edge_ops)
+        new_dataset = new_dataset.prefetch(0)
+
+        new_get_next = self._make_new_get_next(old_get_next, new_dataset)
+        self._replace_get_next(old_get_next, new_get_next, out_op_to_edge_ops, sliced_ops)
+
+    def _find_sliceable_tgt_ops(self) -> Tuple[Set[Operation], Set[Operation]]:
+        """Found orhpan keys' additional identity operation in lookup subgraph."""
+
+        # WARN: Couple with mx_rec::core::embedding module.
+        lookup_keys = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE)
+
+        train_base_ops = set()
+        eval_base_ops = set()
+        for t in lookup_keys:
+            if BaseSparseEmbedding.get_anchor_attribute(t, ASCAnchorAttr.IS_TRAINING):
+                train_base_ops.add(t.op)
+            else:
+                eval_base_ops.add(t.op)
+
+        def find_sliceable_ops(base_ops):
+            min_dep_ops = self._find_min_dep_ops(base_ops)
+
+            sliceable_ops = set()
+            for op in min_dep_ops:
+                if not self._validate_op(op):
+                    continue
+                if OrphanLookupKeySlicer.SLICEABLE_ORPHAN_LOOKUP_KEY_PREFIX not in op.name:
+                    continue
+                sliceable_ops.add(op)
+
+            return sliceable_ops
+
+        train_sliceable_ops = find_sliceable_ops(train_base_ops)
+        eval_sliceable_ops = find_sliceable_ops(eval_base_ops)
+
+        logger.debug("Found sliceable operations in training lookup subgraph: %s.", train_sliceable_ops)
+        logger.debug("Found sliceable operations in evaluation lookup subgraph: %s.", eval_sliceable_ops)
+        return (train_sliceable_ops, eval_sliceable_ops)
diff --git a/mx_rec/graph/utils.py b/mx_rec/graph/utils.py
index c010d80d..8ffc8bc6 100644
--- a/mx_rec/graph/utils.py
+++ b/mx_rec/graph/utils.py
@@ -17,16 +17,17 @@
 
 import os
 from collections import defaultdict
-from typing import List, Dict, Union
+from typing import List, Dict, Union, DefaultDict, Tuple
 
 import tensorflow as tf
 from tensorflow import Operation, Tensor
 from tensorflow.core.framework.graph_pb2 import GraphDef
 from tensorflow.python.framework.errors_impl import InvalidArgumentError
 
+from mx_rec.graph.slicers import OrphanLookupKeySlicer
+from mx_rec.graph.constants import AnchorIteratorOp
 from mx_rec.constants.constants import ASCAnchorAttr, DUMP_MIDIFY_GRAPH_FILE_MODE
 from mx_rec.core.embedding import BaseSparseEmbedding
-from mx_rec.graph.graph_typing import ReplacementSpec
 from mx_rec.util.log import logger
 
 
@@ -46,21 +47,21 @@ def find_parent_op(operator: Operation) -> List[Operation]:
     parent_ops = []
     for input_tensor in operator.inputs:
         parent_op = input_tensor.op
-        if isinstance(parent_op, tf.Operation):
+        if isinstance(parent_op, Operation):
             parent_ops.append(parent_op)
     return parent_ops
 
 
 def check_cutting_points(cutting_point_list: List[Tensor]):
     for tensor in cutting_point_list:
-        if not isinstance(tensor, tf.Tensor):
+        if not isinstance(tensor, Tensor):
             raise TypeError(f"Collection ASCEND_CUTTING_POINT can only contain Tensors, but '{tensor}' was found.")
 
         if tensor.op.type != "Identity":
             raise ValueError(f"Cutting point can only be the output of an Operator 'Identity'.")
 
 
-def record_ops_to_replace(src_op: Operation) -> ReplacementSpec:
+def record_ops_to_replace(src_op: Operation) -> DefaultDict[Tensor, List[Tuple[int, Operation]]]:
     replacement_specs = defaultdict(list)
     output_list = src_op.outputs
     op_list = tf.compat.v1.get_default_graph().get_operations()
@@ -73,7 +74,7 @@ def record_ops_to_replace(src_op: Operation) -> ReplacementSpec:
     return replacement_specs
 
 
-def replace_anchor(replacement_specs: ReplacementSpec, new_tensor_list: List[Tensor]):
+def replace_anchor(replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]], new_tensor_list: List[Tensor]):
     if len(replacement_specs) != len(new_tensor_list):
         raise ValueError(f"Given replacement_specs and new_tensor_list must have the same length. "
                          f"replacement_specs: {replacement_specs}, new_tensor_list: {new_tensor_list}")
@@ -93,7 +94,7 @@ def export_pb_graph(file_name: str,
                     dump_graph: bool = False,
                     graph_def: GraphDef = None,
                     export_path: str = "./export_graph",
-                    as_text: bool = False):
+                    as_text: bool = True):
     """
     Save tensorflow graph before and after modifier graph
     :param file_name: FileName of the graph
@@ -164,15 +165,16 @@ def replace_anchor_vec(cutting_point: Tensor, attribute: ASCAnchorAttr, anchor:
     replace_anchor(replacement_specs_for_anchor_vec, [anchor])
 
 
-def tag_orphan_ids(ids: tf.Tensor) -> tf.Tensor:
-    """
-    将孤儿ids使用identity操作创建ACG_PUSH_NODE前缀命名的标记节点，以便在PushOps时能找到。
-    """
+def mark_orphan_lookup_key(lookup_key: Tensor) -> Tensor:
     graph_def = tf.compat.v1.get_default_graph().as_graph_def()
-    subgraph = tf.compat.v1.graph_util.extract_sub_graph(graph_def, [ids.op.name])
+    subgraph = tf.compat.v1.graph_util.extract_sub_graph(graph_def, [lookup_key.op.name])
+
     for node in subgraph.node:
-        if node.op == 'IteratorGetNext':
-            return ids
-    new_ids = tf.identity(ids, name=f"ACG_PUSH_NODE_{ids.op.name}")
-    logger.info('Tag orphan op node: %s with %s.', ids, new_ids)
-    return new_ids
+        if node.op == AnchorIteratorOp.ITERATOR_GET_NEXT.value:
+            return lookup_key
+
+    name_prefix = OrphanLookupKeySlicer.SLICEABLE_ORPHAN_LOOKUP_KEY_PREFIX
+    marked_lookup_key = tf.identity(lookup_key, name="{}/{}".format(name_prefix, lookup_key.op.name))
+
+    logger.info('Mark orphan lookup key %s as %s.', lookup_key, marked_lookup_key)
+    return marked_lookup_key
diff --git a/tests/mx_rec/graph/test_acg_push_ops.py b/tests/mx_rec/graph/test_acg_push_ops.py
deleted file mode 100644
index 129b773f..00000000
--- a/tests/mx_rec/graph/test_acg_push_ops.py
+++ /dev/null
@@ -1,514 +0,0 @@
-#!/usr/bin/env python3
-# coding: UTF-8
-# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-from unittest import TestCase
-from unittest.mock import patch, Mock
-
-import tensorflow as tf
-from tensorflow.core.framework import node_def_pb2
-from tensorflow.python.data.ops.dataset_ops import DatasetV1
-from mx_rec.graph.acg_push_ops import (
-    ACGPushOpsToDatasetHook,
-    SubgraphInfo,
-    _OP_NAME_CONTAIN_STRING_TO_PUSH,
-    _ACG_NEW_INITIALIZER,
-    _find_ops_to_be_pushed,
-    _find_op_from_base_op,
-    _find_subgraph_nodes,
-    _get_mapping_tensor,
-    _topo_subgraph,
-    _get_dataset_op,
-    _clone_subgraph_into_funcgraph,
-    _update_subgraph_out_consumer,
-    _get_src_dataset,
-    _update_iterator_getnext,
-    _find_subgraph_in_out,
-    _push_subgraph_to_dataset,
-    _warn_for_var_scope_nodes,
-    _frozen_variable_node_to_func_const_node_def,
-    _update_old_consumer,
-    _get_mapping_for_subgraph,
-    _get_mapping_for_subgraph_in,
-    _ordered_output_from_subgraph,
-    _replace_get_next_op,
-    _patched_get_src_dataset,
-)
-from tests.mx_rec.core.mock_class import MockConfigInitializer
-from tests.mx_rec.graph.mock_dataset import gen_mock_dataset
-
-
-@patch.multiple(
-    "mx_rec.graph.patch",
-    ConfigInitializer=Mock(return_value=MockConfigInitializer(modify_graph=True, is_graph_modify_hook_running=True)),
-)
-@patch.multiple(
-    "tensorflow.compat.v1.train.Saver",
-    __init__=Mock(return_value=None),
-    build=Mock(),
-)
-@patch.multiple("mx_rec.graph.acg_push_ops", _find_ops_to_be_pushed=Mock())
-class ACGPushOpsToDatasetHookTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_cutting_point = tf.identity(mock_ids)
-
-        mock_new_iterator = mock_dataset.make_initializable_iterator()
-        tf.compat.v1.add_to_collection(_ACG_NEW_INITIALIZER, mock_new_iterator.initializer)
-
-        with tf.compat.v1.train.MonitoredSession(hooks=[ACGPushOpsToDatasetHook()]) as sess:
-            sess.run(mock_iterator.initializer)
-            sess.run(mock_cutting_point)
-
-
-@patch.multiple(
-    "mx_rec.graph.acg_push_ops",
-    _find_subgraph_nodes=Mock(return_value=set()),
-    _push_subgraph_to_dataset=Mock(),
-)
-class FindOpsToBePushedTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok_op_contain_str_to_push(self):
-        tensor = tf.constant(value=[1, 2, 3], name="MOCK" + list(_OP_NAME_CONTAIN_STRING_TO_PUSH)[0])
-        mock_graph = tf.compat.v1.get_default_graph()
-        _find_ops_to_be_pushed(mock_graph)
-
-    def test_ok_op_type_to_push(self):
-        const_tensor = tf.constant(value=[1, 2, 3], dtype=tf.int32)
-        str_tensor = tf.compat.v1.as_string(const_tensor)
-        num_tensor = tf.compat.v1.string_to_number(str_tensor)
-        mock_graph = tf.compat.v1.get_default_graph()
-        _find_ops_to_be_pushed(mock_graph)
-
-    def test_ok_no_node_to_push(self):
-        mock_graph = tf.compat.v1.get_default_graph()
-        _find_ops_to_be_pushed(mock_graph)
-
-
-class FindSubgraphNodesTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-
-        tensor_in_subgraph = tf.identity(mock_ids)
-        tensor_out_subgraph = tf.identity(tensor_in_subgraph)
-        mock_base_nodes = {tensor_out_subgraph.op}
-
-        subgraph_nodes = _find_subgraph_nodes(
-            tf.compat.v1.get_default_graph(), mock_base_nodes, tgt_op_type="IteratorGetNext"
-        )
-        self.assertEqual(subgraph_nodes, {tensor_in_subgraph.op, tensor_out_subgraph.op})
-
-
-class WarnForVarScopeNodesTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        with tf.compat.v1.variable_scope("mock_var_scope"):
-            var1 = tf.compat.v1.get_variable("var", shape=(3, 3), initializer=tf.random_normal_initializer())
-
-        mock_all_nodes = tf.compat.v1.get_default_graph().get_operations()
-        mock_base_node = var1.op
-        _warn_for_var_scope_nodes(mock_all_nodes, mock_base_node)
-
-
-class FindOpFromBaseOpTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_err_no_tgt_op_type(self):
-        parent_tensor = tf.ones(shape=(3, 3))
-        child_tensor = tf.identity(parent_tensor)
-        with self.assertRaises(ValueError):
-            _find_op_from_base_op(child_tensor.op, "IteratorGetNext")
-
-
-class GetDatasetOpTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_prefetch_dataset = mock_dataset.prefetch(buffer_size=10)
-        mock_iterator = mock_prefetch_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        mock_graph = tf.compat.v1.get_default_graph()
-        expected = mock_graph.get_operation_by_name("OptimizeDataset")
-
-        tgt_dataset_op = _get_dataset_op(mock_graph, mock_get_next_op)
-        self.assertEqual(tgt_dataset_op, expected)
-
-    def test_err_invalid_get_next_op_type(self):
-        mock_get_next_op = tf.zeros(shape=(3,)).op
-        mock_graph = tf.compat.v1.get_default_graph()
-
-        with self.assertRaises(TypeError):
-            _get_dataset_op(mock_graph, mock_get_next_op)
-
-    @patch.multiple("mx_rec.graph.acg_push_ops", _find_op_from_base_op=Mock(return_value=None))
-    @patch.multiple("mx_rec.graph.acg_push_ops.modifier", find_parent_op=Mock(return_value=None))
-    def test_err_no_tgt_op_found(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        mock_graph = tf.compat.v1.get_default_graph()
-
-        with self.assertRaises(RuntimeError):
-            _get_dataset_op(mock_graph, mock_get_next_op)
-
-
-class OrderedOutputFromSubgraphTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next(name="IteratorGetNext")
-        mock_ids = mock_batch.get("mock_ids")
-
-        mock_subgraph_out = {tf.identity(mock_ids).op: {mock_ids.op}}
-
-        addition_funcgraph_output_tensor = _ordered_output_from_subgraph(mock_subgraph_out)
-        self.assertEqual(addition_funcgraph_output_tensor, [mock_ids])
-
-
-class PushSubgraphToDatasetTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-
-        tensor_in_subgraph = tf.identity(mock_ids)
-        tensor_out_subgraph = tf.identity(tensor_in_subgraph)
-        mock_subgraph_to_push = {tensor_in_subgraph.op}
-        _push_subgraph_to_dataset(tf.compat.v1.get_default_graph(), mock_subgraph_to_push)
-
-
-class FindSubgraphInOutTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-
-        tensor_in_subgraph = tf.identity(mock_ids)
-        tensor_out_subgraph = tf.identity(tensor_in_subgraph)
-        mock_subgraph_nodes = {tensor_in_subgraph.op}
-
-        (
-            subgraph_in,
-            subgraph_out,
-        ) = _find_subgraph_in_out(mock_subgraph_nodes)
-        self.assertEqual(subgraph_in, {mock_ids.op: {tensor_in_subgraph.op}})
-        self.assertEqual(subgraph_out, {tensor_out_subgraph.op: {tensor_in_subgraph.op}})
-
-
-class GetSrcDatasetTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok_make_iterator(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        src_dataset = _get_src_dataset(tf.compat.v1.get_default_graph(), mock_get_next_op)
-        self.assertEqual(src_dataset, mock_dataset)
-
-    def test_ok_one_shot_iterator(self):
-        mock_dataset = gen_mock_dataset()
-        mock_prefetch_dataset = mock_dataset.prefetch(10)
-        mock_iterator = mock_prefetch_dataset.make_one_shot_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        src_dataset = _get_src_dataset(tf.compat.v1.get_default_graph(), mock_get_next_op)
-        self.assertEqual(src_dataset, mock_dataset)
-
-    def test_err_no_anchor_dataset(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_one_shot_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        with self.assertRaises(RuntimeError):
-            _get_src_dataset(tf.compat.v1.get_default_graph(), mock_get_next_op)
-
-
-class CloneSubgraphIntoFuncgraphTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-
-        mock_subgraph_in = {mock_ids.op: {tf.identity(mock_ids).op}}
-        mock_subgraph_out = {tf.identity(mock_ids).op: {mock_ids.op}}
-        mock_subgraph_to_push = set()
-        mock_subgraph_info = SubgraphInfo(mock_subgraph_in, mock_subgraph_out, mock_subgraph_to_push)
-
-        mock_new_ids = tf.ones_like(mock_ids)
-        mock_x = [mock_new_ids]
-        mock_old_x = ({"mock_new_ids": mock_new_ids},)
-
-        mock_defaultgraph = tf.compat.v1.get_default_graph()
-        with tf.Graph().as_default():
-            mock_funcgraph = tf.compat.v1.get_default_graph()
-            _clone_subgraph_into_funcgraph(mock_funcgraph, mock_defaultgraph, mock_subgraph_info, mock_x, mock_old_x)
-
-
-class GetMappingForSubgraphInTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_prefetch_dataset = mock_dataset.prefetch(10)
-        mock_iterator = mock_prefetch_dataset.make_one_shot_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-
-        mock_from_node = mock_ids.op
-        mock_to_nodes = {tf.identity(mock_ids).op}
-        mock_new_ids = tf.zeros_like(mock_ids)
-        mock_x = [mock_new_ids]
-        tensor_mapping = dict()
-
-        _get_mapping_for_subgraph_in(mock_from_node, mock_to_nodes, mock_x, tensor_mapping)
-        self.assertEqual(tensor_mapping, {mock_ids: mock_new_ids})
-
-
-class GetMappingForSubgraphTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_defaultgraph = tf.compat.v1.get_default_graph()
-
-        # NOTE: Simulate independent graph environment while executing `dataset.map()` method.
-        with tf.Graph().as_default():
-            key_tensor = tf.zeros(shape=(1))
-            val_tensor = tf.zeros(shape=(1))
-            mock_tensor_mapping = {key_tensor: val_tensor}
-
-            mock_node_mapping = dict()
-            mock_old_node = tf.identity(key_tensor).op
-            mock_funcgraph = tf.compat.v1.get_default_graph()
-
-            _get_mapping_for_subgraph(
-                mock_funcgraph, mock_defaultgraph, mock_node_mapping, mock_old_node, mock_tensor_mapping
-            )
-
-        self.assertEqual(len(mock_node_mapping), 1)
-        self.assertEqual(len(mock_tensor_mapping), 2)
-
-
-@patch.multiple(
-    "mx_rec.graph.patch",
-    ConfigInitializer=Mock(return_value=MockConfigInitializer(modify_graph=True, is_graph_modify_hook_running=True)),
-)
-class FrozenVariableNodeToFuncConstNodeDefTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        var_tensor = tf.Variable(initial_value=[1], shape=(1,))
-        tf.compat.v1.assign(ref=var_tensor, value=[1])
-
-        mock_funcgraph = tf.Graph()
-        mock_defaultgraph = tf.compat.v1.get_default_graph()
-        new_const_node: node_def_pb2.NodeDef = _frozen_variable_node_to_func_const_node_def(
-            var_tensor.op, mock_funcgraph, mock_defaultgraph
-        )
-        self.assertEqual(new_const_node.op, "Const")
-
-
-class GetMappingTensorTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        key_tensor = tf.zeros(shape=(3, 3))
-        val_tensor = tf.ones(shape=(3, 3))
-        tsr2tsr = {key_tensor: val_tensor}
-        keys = [key_tensor]
-
-        mapped_tensors = _get_mapping_tensor(tsr2tsr, keys)
-        self.assertEqual(mapped_tensors, [val_tensor])
-
-    def test_err_key_tensor_not_exist(self):
-        tsr2tsr = {tf.zeros(shape=(3, 3)): tf.ones(shape=(3, 3))}
-        keys = [tf.ones(shape=(3, 3))]
-
-        with self.assertRaises(KeyError):
-            _get_mapping_tensor(tsr2tsr, keys)
-
-
-class TopoSubgraphTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_prefetch_dataset = mock_dataset.prefetch(10)
-        mock_iterator = mock_prefetch_dataset.make_one_shot_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        tensor1 = tf.identity(mock_ids)
-        tensor2 = tf.add(tensor1, 1)
-        mock_subgraph = {tensor1.op, tensor2.op}
-
-        const_op_for_add = None
-        for tensor in tensor2.op.inputs:
-            if tensor.op.name != "Add/y":
-                continue
-            const_op_for_add = tensor.op
-
-        if not const_op_for_add:
-            self.fail(
-                f"Failed to find input of add operation, input tensor of add op: {[x.op for x in tensor2.op.inputs]}"
-            )
-
-        topo_subgraph_list = _topo_subgraph(mock_subgraph)
-        self.assertEqual(topo_subgraph_list, [tensor1.op, const_op_for_add, tensor2.op])
-
-
-class UpdateIteratorGetNextTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_old_dataset = gen_mock_dataset()
-        mock_old_iterator = mock_old_dataset.make_initializable_iterator()
-        mock_old_batch = mock_old_iterator.get_next(name="OldIteratorGetNext")
-        mock_old_ids = mock_old_batch.get("mock_ids")
-        mock_old_get_next_op = mock_old_ids.op
-
-        mock_new_dataset: DatasetV1 = mock_old_dataset.map(lambda x: x)
-        mock_subgraph_out = {tf.identity(mock_old_ids).op: {mock_old_ids.op}}
-
-        _update_iterator_getnext(
-            graph=tf.compat.v1.get_default_graph(),
-            get_next_op=mock_old_get_next_op,
-            tgt_dataset=mock_new_dataset,
-            subgraph_out=mock_subgraph_out,
-            subgraph_to_push=set(),
-        )
-
-
-class UpdateOldConsumerTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next(name="NewIteratorGetNext")
-        mock_ids = mock_batch.get("mock_ids")
-        mock_new_get_next_op = mock_ids.op
-        mock_output_tensor = tf.identity(mock_ids)
-
-        _update_old_consumer(
-            graph=tf.compat.v1.get_default_graph(),
-            new_get_next_op=mock_new_get_next_op,
-            output_tensor=mock_ids,
-            subgraph_to_push=set(),
-        )
-
-
-class UpdateSubgraphOutConsumerTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next(name="NewIteratorGetNext")
-        mock_ids = mock_batch.get("mock_ids")
-        mock_new_get_next_op = mock_ids.op
-        mock_output_tensor = tf.identity(mock_ids)
-
-        _update_subgraph_out_consumer(
-            graph=tf.compat.v1.get_default_graph(),
-            new_get_next_op=mock_new_get_next_op,
-            offset=0,
-            output_tensor=mock_ids,
-        )
-
-
-class PatchedGetSrcDatasetTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_prefetch_dataset = mock_dataset.prefetch(10)
-        mock_double_prefetch_dataset = mock_prefetch_dataset.prefetch(10)
-        mock_iterator = mock_prefetch_dataset.make_one_shot_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        src_dataset = _patched_get_src_dataset(mock_get_next_op, is_training=True)
-        self.assertEqual(src_dataset, mock_prefetch_dataset)
-
-    def test_err_single_prefetch_dataset(self):
-        mock_dataset = gen_mock_dataset()
-        mock_prefetch_dataset = mock_dataset.prefetch(10)
-        mock_iterator = mock_prefetch_dataset.make_one_shot_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        with self.assertRaises(RuntimeError):
-            _patched_get_src_dataset(mock_get_next_op, is_training=True)
diff --git a/tests/mx_rec/graph/test_modifier.py b/tests/mx_rec/graph/test_modifier.py
index 14b87617..2a9af10d 100644
--- a/tests/mx_rec/graph/test_modifier.py
+++ b/tests/mx_rec/graph/test_modifier.py
@@ -31,9 +31,9 @@ from mx_rec.constants.constants import (
     ASCAnchorAttr,
 )
 from mx_rec.core.asc import FeatureSpec
-from mx_rec.graph.graph_typing import AnchorRecord
 from mx_rec.graph.modifier import (
     GraphModifierHook,
+    AnchorRecord,
     find_make_iterator_op,
     find_target_dataset_op,
     find_target_instance_dataset,
-- 
Gitee


From d0367f93d3458535409a7f1d3b96d75ec6b9678b Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Mon, 29 Apr 2024 06:13:07 +0000
Subject: [PATCH 072/302] =?UTF-8?q?!107=20=E5=8E=BB=E9=99=A4ascend=5Fvisib?=
 =?UTF-8?q?le=5Fdevices=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F=EF=BC=8C?=
 =?UTF-8?q?=E5=A2=9E=E6=B7=BBCM=5FWORKER=5FSIZE=E7=9A=84=E8=8C=83=E5=9B=B4?=
 =?UTF-8?q?=E6=A0=A1=E9=AA=8C=20*=20=E5=8E=BB=E9=99=A4ascend=5Fvisible=5Fd?=
 =?UTF-8?q?evices=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F=EF=BC=8C=E5=A2=9E?=
 =?UTF-8?q?=E6=B7=BBCM=5FWORKER=5FSIZE=E7=9A=84=E8=8C=83=E5=9B=B4=E6=A0=A1?=
 =?UTF-8?q?=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/run.sh                      |  1 -
 examples/demo/little_demo/run.sh           |  1 -
 examples/demo/little_demo_estimator/run.sh |  1 -
 mx_rec/constants/constants.py              |  6 +++++-
 mx_rec/util/global_env_conf.py             | 16 +++++-----------
 tests/mx_rec/util/test_variable.py         |  3 ---
 6 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/examples/DCNv2/run.sh b/examples/DCNv2/run.sh
index 1709959c..860ff53f 100644
--- a/examples/DCNv2/run.sh
+++ b/examples/DCNv2/run.sh
@@ -92,7 +92,6 @@ if [ -n "$ip" ]; then
     echo "CM_CHIEF_DEVICE=$CM_CHIEF_DEVICE"
     echo "CM_WORKER_IP=$CM_WORKER_IP"
     echo "CM_WORKER_SIZE=$CM_WORKER_SIZE"
-    echo "ASCEND_VISIBLE_DEVICES=$ASCEND_VISIBLE_DEVICES"
 else
     # ranktable
     echo "Current is ranktable solution, hccl json file:${hccl_cfg_json}"
diff --git a/examples/demo/little_demo/run.sh b/examples/demo/little_demo/run.sh
index de7fd806..9462a0cb 100644
--- a/examples/demo/little_demo/run.sh
+++ b/examples/demo/little_demo/run.sh
@@ -160,7 +160,6 @@ else
       echo "CM_CHIEF_DEVICE=$CM_CHIEF_DEVICE"
       echo "CM_WORKER_IP=$CM_WORKER_IP"
       echo "CM_WORKER_SIZE=$CM_WORKER_SIZE"
-      echo "ASCEND_VISIBLE_DEVICES=$ASCEND_VISIBLE_DEVICES"
       #########################################################
     else
       echo "ip: $ip not available!" # 使用ranktable方案
diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index 2c78166f..6534fb21 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -150,7 +150,6 @@ else
       echo "CM_CHIEF_DEVICE=$CM_CHIEF_DEVICE"
       echo "CM_WORKER_IP=$CM_WORKER_IP"
       echo "CM_WORKER_SIZE=$CM_WORKER_SIZE"
-      echo "ASCEND_VISIBLE_DEVICES=$ASCEND_VISIBLE_DEVICES"
       #########################################################
     else
       echo "ip: $ip not available!" # 使用ranktable方案
diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index a5f055ab..f69f32c8 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -43,6 +43,11 @@ DEFAULT_HD_CHANNEL_SIZE = 40
 MAX_HD_CHANNEL_SIZE = 8192
 MIN_HD_CHANNEL_SIZE = 2
 
+# CM_WORKER_SIZE集群节点数
+DEFAULT_CM_WORKER_SIZE = 0
+MAX_CM_WORKER_SIZE = 512
+MIN_CM_WORKER_SIZE = 0
+
 # key process线程数
 DEFAULT_KP_THREAD_NUM = 6
 MIN_KP_THREAD_NUM = 1
@@ -116,7 +121,6 @@ class BaseEnum(Enum):
 class EnvOption(Enum):
     MXREC_LOG_LEVEL = "MXREC_LOG_LEVEL"
     RANK_TABLE_FILE = "RANK_TABLE_FILE"
-    ASCEND_VISIBLE_DEVICES = "ASCEND_VISIBLE_DEVICES"
     CM_CHIEF_DEVICE = "CM_CHIEF_DEVICE"
     CM_WORKER_SIZE = "CM_WORKER_SIZE"
     TF_DEVICE = "TF_DEVICE"
diff --git a/mx_rec/util/global_env_conf.py b/mx_rec/util/global_env_conf.py
index 52b5af46..313f1693 100644
--- a/mx_rec/util/global_env_conf.py
+++ b/mx_rec/util/global_env_conf.py
@@ -22,7 +22,7 @@ from mx_rec.constants.constants import EnvOption, RecPyLogLevel, Flag, EMPTY_STR
     DEFAULT_HD_CHANNEL_SIZE, DEFAULT_KP_THREAD_NUM, DEFAULT_FAST_UNIQUE_THREAD_NUM, RecCPPLogLevel, MAX_INT32, \
     MIN_HD_CHANNEL_SIZE, MAX_HD_CHANNEL_SIZE, MIN_KP_THREAD_NUM, MAX_KP_THREAD_NUM, \
     MIN_FAST_UNIQUE_THREAD_NUM, MAX_FAST_UNIQUE_THREAD_NUM, DEFAULT_HOT_EMB_UPDATE_STEP, MIN_HOT_EMB_UPDATE_STEP, \
-    MAX_HOT_EMB_UPDATE_STEP, TFDevice
+    MAX_HOT_EMB_UPDATE_STEP, TFDevice, MAX_CM_WORKER_SIZE, MIN_CM_WORKER_SIZE, DEFAULT_CM_WORKER_SIZE
 from mx_rec.validator.validator import para_checker_decorator, OptionValidator, DirectoryValidator, Convert2intValidator
 
 
@@ -30,7 +30,6 @@ from mx_rec.validator.validator import para_checker_decorator, OptionValidator,
 class RecEnv:
     mxrec_log_level: str
     rank_table_file: str
-    ascend_visible_devices: str
     cm_chief_device: str
     cm_worker_size: str
     tf_device: str
@@ -45,9 +44,6 @@ class RecEnv:
     use_combine_faae: str
     stat_on: str
     record_key_count: str
-    rank_id_env: str
-    rank_size_env: str
-    local_rank_size_env: str
 
 
 def get_global_env_conf() -> RecEnv:
@@ -58,9 +54,8 @@ def get_global_env_conf() -> RecEnv:
     rec_env = RecEnv(
         mxrec_log_level=os.getenv(EnvOption.MXREC_LOG_LEVEL.value, RecPyLogLevel.INFO.value),
         rank_table_file=os.getenv(EnvOption.RANK_TABLE_FILE.value, EMPTY_STR),
-        ascend_visible_devices=os.getenv(EnvOption.ASCEND_VISIBLE_DEVICES.value),
         cm_chief_device=os.getenv(EnvOption.CM_CHIEF_DEVICE.value),
-        cm_worker_size=os.getenv(EnvOption.CM_WORKER_SIZE.value),
+        cm_worker_size=os.getenv(EnvOption.CM_WORKER_SIZE.value, DEFAULT_CM_WORKER_SIZE),
         tf_device=os.getenv(EnvOption.TF_DEVICE.value, TFDevice.NONE.value),
         acl_timeout=os.getenv(EnvOption.ACL_TIMEOUT.value, "-1"),
         hd_channel_size=os.getenv(EnvOption.HD_CHANNEL_SIZE.value, DEFAULT_HD_CHANNEL_SIZE),
@@ -72,10 +67,7 @@ def get_global_env_conf() -> RecEnv:
         glog_stderrthreahold=os.getenv(EnvOption.GLOG_STDERRTHREAHOLD.value, RecCPPLogLevel.INFO.value),
         use_combine_faae=os.getenv(EnvOption.USE_COMBINE_FAAE.value, Flag.FALSE.value),
         stat_on=os.getenv(EnvOption.STAT_ON.value, Flag.FALSE.value),
-        record_key_count=os.getenv(EnvOption.RECORD_KEY_COUNT.value, Flag.FALSE.value),
-        rank_id_env=os.getenv(EnvOption.OMPI_COMM_WORLD_RANK.value),
-        rank_size_env=os.getenv(EnvOption.OMPI_COMM_WORLD_LOCAL_SIZE.value),
-        local_rank_size_env=os.getenv(EnvOption.OMPI_COMM_WORLD_LOCAL_SIZE.value),
+        record_key_count=os.getenv(EnvOption.RECORD_KEY_COUNT.value, Flag.FALSE.value)
     )
 
     return rec_env
@@ -84,6 +76,8 @@ def get_global_env_conf() -> RecEnv:
 @para_checker_decorator(check_option_list=[
     ("mxrec_log_level", OptionValidator, {"options": [i.value for i in list(RecPyLogLevel)]}),
     ("rank_table_file", DirectoryValidator, {}, ["check_exists_if_not_empty"]),
+    ("cm_worker_size", Convert2intValidator, {"min_value": MIN_CM_WORKER_SIZE, "max_value": MAX_CM_WORKER_SIZE},
+     ["check_value"]),
     ("tf_device", OptionValidator, {"options": [i.value for i in list(TFDevice)]}),
     ("acl_timeout", Convert2intValidator, {"min_value": -1, "max_value": MAX_INT32}, ["check_value"]),
     ("hd_channel_size", Convert2intValidator,
diff --git a/tests/mx_rec/util/test_variable.py b/tests/mx_rec/util/test_variable.py
index c72ed9dc..f8cd2725 100644
--- a/tests/mx_rec/util/test_variable.py
+++ b/tests/mx_rec/util/test_variable.py
@@ -44,10 +44,8 @@ class VariableTest(unittest.TestCase):
         """
         self.cm_worker_size = global_env.cm_worker_size
         self.cm_chief_device = global_env.cm_chief_device
-        self.ascend_visible_devices = global_env.ascend_visible_devices
         global_env.cm_worker_size = "8"
         global_env.cm_chief_device = "0"
-        global_env.ascend_visible_devices = "0-7"
 
     def tearDown(self):
         """
@@ -56,7 +54,6 @@ class VariableTest(unittest.TestCase):
         """
         global_env.cm_worker_size = self.cm_worker_size
         global_env.cm_chief_device = self.cm_chief_device
-        global_env.ascend_visible_devices = self.ascend_visible_devices
 
     @mock.patch("mx_rec.util.variable.ConfigInitializer")
     def test_get_dense_and_sparse_variable(self, variable_config_initializer):
-- 
Gitee


From e27d5206ab32673d50720b8d62be289167a253ac Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 29 Apr 2024 14:50:10 +0800
Subject: [PATCH 073/302] add clang-format comment

---
 .clang-format | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/.clang-format b/.clang-format
index c1bb9720..1595fa33 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,13 +1,39 @@
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+---
+# 详细配置说明 https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+---
 Language: Cpp
 BasedOnStyle: Google
+# public等标识符不缩进
 AccessModifierOffset: -4
+# 限制行宽120字符
 ColumnLimit: 120
+# 4空格缩进
 IndentWidth: 4
+# 不使用tab
 UseTab: Never
+# 二元运算符换行时对齐
 AlignOperands: Align
+# 参数换行时对齐
 AlignAfterOpenBracket: Align
+# 行末注释对齐
 AlignTrailingComments: true
 DerivePointerAlignment: false
+# 引用和指针左对齐
 PointerAlignment: Left
 AllowAllParametersOfDeclarationOnNextLine: false
 AllowAllArgumentsOnNextLine: false
@@ -18,8 +44,10 @@ AllowShortFunctionsOnASingleLine: Empty
 AllowShortIfStatementsOnASingleLine: false
 AllowShortLoopsOnASingleLine: false
 AllowShortLambdasOnASingleLine: Inline
+# Break after return type automatically
 AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakBeforeMultilineStrings: false
+# 允许参数部分换行
 BinPackArguments: true
 BinPackParameters: true
 BreakBeforeBraces: Custom
@@ -27,6 +55,7 @@ BraceWrapping:
   AfterClass: false
   AfterControlStatement: false
   AfterEnum: false
+  # 只有函数括号另起一行
   AfterFunction: true
   AfterNamespace: false
   AfterStruct: false
@@ -35,16 +64,22 @@ BraceWrapping:
   BeforeCatch: false
   BeforeElse: false
   IndentBraces: false
+# 二元运算符换行时 运算符在第一行末尾
 BreakBeforeBinaryOperators: None
+# 三元运算符换行时 运算符在下一行
 BreakBeforeTernaryOperators: true
+# 构造函数初始化列表冒号在换行后 逗号在换行前
 BreakConstructorInitializers: BeforeColon
 BreakStringLiterals: true
 CompactNamespaces: false
+# 初始化要么一行 要么每个一行
 PackConstructorInitializers: CurrentLine
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
+# 使用cpp11统一初始化风格
 Cpp11BracedListStyle: true
 DisableFormat: false
 FixNamespaceComments: true
+# 返回值类型声明后换行时不缩进
 IndentWrappedFunctionNames: false
 Standard: Latest
-- 
Gitee


From 309366a9969f231f1528d87c1610e2e5a968cef2 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 29 Apr 2024 15:21:39 +0800
Subject: [PATCH 074/302] format comment

---
 src/core/key_process/key_process.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index c5ec9204..47f9b719 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -1019,8 +1019,8 @@ void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapBy
 }
 
 /*
- * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread
- * id的）线程间的通信量矩阵 scAll返回：所有线程间的通信量矩阵（按行平铺的一维向量）
+ * 将本地（rank）batch要发送的key数据量进行Allgather通信，获取所有（不同rank相同thread id的）线程间的通信量矩阵
+ * scAll返回：所有线程间的通信量矩阵（按行平铺的一维向量）
  */
 vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch)
 {
-- 
Gitee


From d8e72c5a5532da62f136ade434d416031f5f8028 Mon Sep 17 00:00:00 2001
From: sihaixianyu <sihaixianyu@qq.com>
Date: Mon, 29 Apr 2024 10:50:23 +0000
Subject: [PATCH 075/302] =?UTF-8?q?!108=20Slicer=E8=A1=A5=E5=85=85?=
 =?UTF-8?q?=E6=B3=A8=E9=87=8A=E5=92=8C=E5=8D=95=E6=B5=8B=E3=80=82=20*=20Sl?=
 =?UTF-8?q?icer=E8=A1=A5=E5=85=85=E6=B3=A8=E9=87=8A=E5=92=8C=E5=8D=95?=
 =?UTF-8?q?=E6=B5=8B=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/graph/__init__.py           |   1 +
 mx_rec/graph/hooks.py              |  63 ++++++
 mx_rec/graph/slicers.py            |  63 ++++++
 tests/mx_rec/graph/test_slicers.py | 304 +++++++++++++++++++++++++++++
 4 files changed, 431 insertions(+)
 create mode 100644 mx_rec/graph/hooks.py
 create mode 100644 tests/mx_rec/graph/test_slicers.py

diff --git a/mx_rec/graph/__init__.py b/mx_rec/graph/__init__.py
index b91d2a49..687e78ff 100644
--- a/mx_rec/graph/__init__.py
+++ b/mx_rec/graph/__init__.py
@@ -25,3 +25,4 @@ __all__ = [
 
 from mx_rec.graph.modifier import GraphModifierHook, modify_graph_and_start_emb_cache
 from mx_rec.graph.patch import run
+from mx_rec.graph.hooks import LookupSubgraphSlicerHook, OrphanLookupKeySlicerHook
diff --git a/mx_rec/graph/hooks.py b/mx_rec/graph/hooks.py
new file mode 100644
index 00000000..5cf64b15
--- /dev/null
+++ b/mx_rec/graph/hooks.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from typing import List
+
+import tensorflow as tf
+from tensorflow import Operation, Graph
+
+from mx_rec.util.log import logger
+from mx_rec.graph.slicers import LookupSubgraphSlicer, OrphanLookupKeySlicer
+from mx_rec.validator.validator import ClassValidator, para_checker_decorator
+
+
+@para_checker_decorator(
+    check_option_list=[
+        ("op_types", ClassValidator, {"classes": (list)}),
+        ("full_graph", ClassValidator, {"classes": (Graph, type(None))}),
+    ]
+)
+class LookupSubgraphSlicerHook(tf.estimator.SessionRunHook):
+    def __init__(self, op_types: List[Operation], full_graph: Graph = None) -> None:
+        super().__init__()
+        self._op_types = op_types
+        self._full_graph = full_graph
+
+    def begin(self) -> None:
+        slicer = LookupSubgraphSlicer(self._op_types, self._full_graph)
+
+        logger.info("Starts to summarize sliceable specific operations in lookup subgraph!")
+        slicer.summarize()
+
+        logger.info("Starts to slice specific operations and their corresponding minimum dependency graphs!")
+        slicer.slice()
+
+
+@para_checker_decorator(check_option_list=[("full_graph", ClassValidator, {"classes": (Graph, type(None))})])
+class OrphanLookupKeySlicerHook(tf.estimator.SessionRunHook):
+    def __init__(self, full_graph: Graph = None) -> None:
+        super().__init__()
+        self._full_graph = full_graph
+
+    def begin(self) -> None:
+        slicer = OrphanLookupKeySlicer(self._full_graph)
+
+        logger.info("Starts to summarize sliceable orphan lookup keys!")
+        slicer.summarize()
+
+        logger.info("Starts to slice orphan lookup keys and their corresponding minimum dependency graphs!")
+        slicer.slice()
diff --git a/mx_rec/graph/slicers.py b/mx_rec/graph/slicers.py
index d22af868..3204af4e 100644
--- a/mx_rec/graph/slicers.py
+++ b/mx_rec/graph/slicers.py
@@ -70,6 +70,13 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
         pass
 
     def _slice_ops(self, sliceable_ops: Set[Operation], is_training: bool) -> None:
+        """Slice the minimum dependency graph of given operation set.
+
+        Args:
+            sliceable_ops (Set[Operation]): The operation set that can be sliced.
+            is_training (bool): Whether the slicing is for training graph or not.
+        """
+
         sliced_ops = self._find_min_dep_ops(sliceable_ops)
         in_op_to_edge_ops, out_op_to_edge_ops = self._find_subgraph_in_and_out(sliced_ops)
 
@@ -89,6 +96,18 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
         in_op_to_edge_ops: Dict[Operation, Set[Operation]],
         out_op_to_edge_ops: Dict[Operation, Set[Operation]],
     ) -> DatasetV1Adapter:
+        """Make a new dataset which clones the sliced subgraph by mapfunc.
+
+        Args:
+            old_dataset: The old dataset that needs to be mapped.
+            sliced_ops: The operation set that has been sliced.
+            in_op_to_edge_ops: The input relationship of sliced subgraph.
+            out_op_to_edge_ops: The output relationship of sliced subgraph.
+
+        Returns:
+            DatasetV1Adapter: The new dataset that has cloned the sliced subgraph.
+        """
+
         def slice_map_func(*batch):  # pragma: no cover
             logger.debug("The layout of old batch: %s.", batch)
 
@@ -116,6 +135,16 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
         self,
         sub_graph_ops: Set[Operation],
     ) -> Tuple[Dict[Operation, Set[Operation]], Dict[Operation, Set[Operation]]]:
+        """Find the input and output relationship of sliced subgraph.
+
+        Args:
+            sub_graph_ops: The operation set that has been sliced.
+
+        Returns:
+            in_op_to_edge_ops: The input relationship of sliced subgraph.
+            out_op_to_edge_ops: The output relationship of sliced subgraph.
+        """
+
         in_op_to_edge_ops = dict()
         out_op_to_edge_ops = dict()
 
@@ -128,6 +157,15 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
         return in_op_to_edge_ops, out_op_to_edge_ops
 
     def _find_old_get_next(self, sliceable_ops: Set[Operation]) -> Operation:
+        """Find the old 'IteratorGetNext' operation.
+
+        Args:
+            sliceable_ops: The operation set that can be sliced.
+
+        Returns:
+            old_get_next: The old 'IteratorGetNext' operation.
+        """
+
         old_get_next = self._upward_bfs_op(sliceable_ops, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
 
         tf.compat.v1.add_to_collection(DeprecatedOp.DEPRECATED_ITERATOR_GET_NEXT, old_get_next)
@@ -136,6 +174,22 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
         return old_get_next
 
     def _find_old_dataset(self, get_next: Operation, is_training: bool) -> DatasetV1Adapter:
+        """Find the old dataset that needs to be mapped.
+
+        Due to the different iterator types, the search method is different.
+        1. If the iterator type is 'MakeIterator', this func will exec upward bfs search through get_next.
+        2. If the iterator type is 'OneShotIterator', this func will fetch all operation in 'self._full_graph', then
+        filter out the 'PrefetchDataset' operation. This diff is caused by the isolation of 'OneShotIterator' and the
+        'PrefetchDataset'.
+
+        Args:
+            get_next: The old 'IteratorGetNext' operation.
+            is_training: Whether the slicing is for training graph or not.
+
+        Returns:
+            old_dataset: The old dataset that needs to be mapped.
+        """
+
         tgt_trans_dataset = None
         try:
             tgt_trans_dataset = self._find_trans_dataset(get_next)
@@ -173,6 +227,15 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
         return old_dataset
 
     def _find_trans_dataset(self, get_next: Operation) -> Operation:
+        """Find the transformation dataset through 'get_next'.
+
+        Args:
+            get_next: The old 'IteratorGetNext' operation.
+
+        Returns:
+            trans_dataset: The target transformation dataset.
+        """
+
         if get_next.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
             raise TypeError(f"operation '{get_next}' must be one instance of 'IteratorGetNext'.")
 
diff --git a/tests/mx_rec/graph/test_slicers.py b/tests/mx_rec/graph/test_slicers.py
new file mode 100644
index 00000000..b6d9cad9
--- /dev/null
+++ b/tests/mx_rec/graph/test_slicers.py
@@ -0,0 +1,304 @@
+#!/usr/bin/env python3
+# coding: UTF-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import unittest
+from unittest.mock import patch, Mock
+
+import tensorflow as tf
+from tensorflow import Graph
+
+from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_ENTRANCE
+from mx_rec.graph.constants import AnchorDatasetOp
+from mx_rec.graph.slicers import NoGradSubgraphSlicer, LookupSubgraphSlicer, OrphanLookupKeySlicer
+from tests.mx_rec.graph.mock_dataset import gen_mock_dataset
+
+
+class MockNoGradSubgraphSlicer(NoGradSubgraphSlicer):
+    def __init__(self, full_graph: Graph = None, info_dir: str = "slicing") -> None:
+        super().__init__(full_graph, info_dir)
+
+    def summarize(self) -> None:
+        pass
+
+    def slice(self) -> None:
+        pass
+
+
+class NoGradSubgraphSlicerTestCase(unittest.TestCase):
+    def test_ok_slice_ops(self):
+        with tf.compat.v1.Graph().as_default():
+            dataset = gen_mock_dataset()
+            prefetch_dataset = dataset.prefetch(0)
+
+            iterator = tf.compat.v1.data.make_initializable_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+
+            mock_ids = batch["mock_ids"]
+            mock_labels = batch["mock_labels"]
+
+            inner_tensor = tf.identity(mock_ids)
+            inner_op = inner_tensor.op
+
+            tf.identity(inner_tensor)
+            tf.identity(mock_labels)
+
+            sliced_ops = {inner_op}
+            MockNoGradSubgraphSlicer()._slice_ops(sliced_ops, is_training=True)
+
+            g = tf.compat.v1.get_default_graph()
+            prefetch_datasets = [op for op in g.get_operations() if AnchorDatasetOp.PREFETCH_DATASET.value in op.name]
+            self.assertEqual(len(prefetch_datasets), 2)
+
+    def test_ok_find_min_dep_ops(self):
+        with tf.compat.v1.Graph().as_default():
+            dataset = gen_mock_dataset()
+            iterator = dataset.make_initializable_iterator()
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            subgraph_in = tf.identity(ids)
+            subgraph_out = tf.identity(subgraph_in)
+            base_ops = {subgraph_out.op}
+
+            min_dep_ops = NoGradSubgraphSlicer._find_min_dep_ops(base_ops)
+            self.assertEqual(min_dep_ops, {subgraph_in.op, subgraph_out.op})
+
+    def test_ok_validate_op(self):
+        with tf.compat.v1.Graph().as_default():
+            t = tf.constant(0)
+            t = tf.add(t, 1)
+            t = tf.subtract(t, 1)
+            op = t.op
+
+            is_valid = NoGradSubgraphSlicer._validate_op(op)
+            self.assertTrue(is_valid, True)
+
+    def test_ok_find_subgraph_in_and_out(self):
+        with tf.compat.v1.Graph().as_default():
+            dataset = gen_mock_dataset()
+            iterator = dataset.make_initializable_iterator()
+            batch = iterator.get_next()
+            ids = batch.get("mock_ids")
+
+            input_tensor = tf.identity(ids)
+            inner_tensor = tf.identity(input_tensor)
+            output_tensor = tf.identity(inner_tensor)
+            subgraph_ops = {inner_tensor.op}
+
+            (subgraph_in, subgraph_out) = MockNoGradSubgraphSlicer()._find_subgraph_in_and_out(subgraph_ops)
+            self.assertEqual(subgraph_in, {input_tensor.op: {inner_tensor.op}})
+            self.assertEqual(subgraph_out, {output_tensor.op: {inner_tensor.op}})
+
+    def test_ok_find_old_dataset(self):
+        with tf.compat.v1.Graph().as_default():
+            dataset = gen_mock_dataset()
+            iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+            get_next = ids.op
+
+            old_dataset = MockNoGradSubgraphSlicer()._find_old_dataset(get_next, is_training=True)
+            self.assertEqual(old_dataset, dataset)
+
+        with tf.compat.v1.Graph().as_default():
+            dataset = gen_mock_dataset()
+            prefetch_dataset = dataset.prefetch(0)
+            iterator = tf.compat.v1.data.make_one_shot_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+            get_next = ids.op
+
+            old_dataset = MockNoGradSubgraphSlicer()._find_old_dataset(get_next, is_training=True)
+            self.assertEqual(old_dataset, dataset)
+
+        with tf.compat.v1.Graph().as_default():
+            dataset = gen_mock_dataset()
+            prefetch_dataset = dataset.prefetch(0)
+            gen_mock_dataset().prefetch(0)
+
+            iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+            get_next = ids.op
+
+            old_dataset = MockNoGradSubgraphSlicer()._find_old_dataset(get_next, is_training=True)
+            self.assertEqual(old_dataset, dataset)
+
+        with tf.compat.v1.Graph().as_default():
+            dataset = gen_mock_dataset()
+            prefetch_dataset = dataset.prefetch(0)
+            gen_mock_dataset().prefetch(0)
+
+            iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+            get_next = ids.op
+
+            old_dataset = MockNoGradSubgraphSlicer()._find_old_dataset(get_next, is_training=False)
+            self.assertEqual(old_dataset, dataset)
+
+    def test_ok_make_new_dataset(self):
+        with tf.compat.v1.Graph().as_default():
+            dataset = gen_mock_dataset()
+            prefetch_dataset = dataset.prefetch(0)
+            iterator = tf.compat.v1.data.make_initializable_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            in_op = ids.op
+            inner_tensor = tf.identity(ids)
+            inner_op = inner_tensor.op
+            out_op = tf.identity(inner_tensor).op
+
+            sliced_ops = {inner_op}
+            in_op_to_edge_ops = {in_op: {inner_op}}
+            out_op_to_edge_ops = {out_op: {inner_op}}
+
+            new_dataset = MockNoGradSubgraphSlicer()._make_new_dataset(
+                dataset, sliced_ops, in_op_to_edge_ops, out_op_to_edge_ops
+            )
+            new_prefetch_dataset = new_dataset
+            new_iter = tf.compat.v1.data.make_initializable_iterator(new_prefetch_dataset)
+            new_batch = new_iter.get_next()
+            self.assertEqual(len(new_batch), 4)
+
+    def test_ok_topo_sort_sliced_ops(self):
+        with tf.compat.v1.Graph().as_default():
+            t1 = tf.constant(0)
+            t2 = tf.identity(t1)
+            t3 = tf.identity(t2)
+            ops = {t3.op, t2.op, t1.op}
+
+            topo_sorted_ops = NoGradSubgraphSlicer._topo_sort_sliced_ops(ops)
+            self.assertEqual(topo_sorted_ops, [t1.op, t2.op, t3.op])
+
+    def test_ok_clone_subgraph_into_funcgraph(self):
+        with tf.compat.v1.Graph().as_default():
+            prefetch_dataset = gen_mock_dataset().prefetch(0)
+            iterator = tf.compat.v1.data.make_initializable_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            in_op = ids.op
+            inner_tensor = tf.identity(ids)
+            inner_op = inner_tensor.op
+            out_op = tf.identity(inner_tensor).op
+
+            sliced_ops = {inner_op}
+            in_op_to_edge_ops = {in_op: {inner_op}}
+            out_op_to_edge_ops = {out_op: {inner_op}}
+
+            with patch.object(tf.compat.v1.Graph, "get_tensor_by_name", return_value=tf.identity(inner_tensor)):
+                new_batch = MockNoGradSubgraphSlicer()._clone_subgraph_into_funcgraph(
+                    sliced_ops, in_op_to_edge_ops, out_op_to_edge_ops, batch
+                )
+            self.assertEqual(len(new_batch), 4)
+
+    def test_ok_make_new_get_next(self):
+        with tf.compat.v1.Graph().as_default():
+            prefetch_dataset = gen_mock_dataset().prefetch(0)
+            iterator = tf.compat.v1.data.make_initializable_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            old_get_next = ids.op
+            new_dataset = gen_mock_dataset().prefetch(0)
+
+            new_get_next = MockNoGradSubgraphSlicer()._make_new_get_next(old_get_next, new_dataset)
+            self.assertIsNotNone(new_get_next)
+
+        with tf.compat.v1.Graph().as_default():
+            prefetch_dataset = gen_mock_dataset().prefetch(0)
+            iterator = tf.compat.v1.data.make_one_shot_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            old_get_next = ids.op
+            new_dataset = gen_mock_dataset().prefetch(0)
+
+            new_get_next = MockNoGradSubgraphSlicer()._make_new_get_next(old_get_next, new_dataset)
+            self.assertIsNotNone(new_get_next)
+
+
+class LookupSubGraphSlicerTestCase(unittest.TestCase):
+    def test_ok_find_all_tgt_ops(self):
+        with tf.compat.v1.Graph().as_default():
+            prefetch_dataset = gen_mock_dataset().prefetch(0)
+            iterator = tf.compat.v1.data.make_initializable_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            inner_tensor = tf.identity(ids)
+            tf.identity(inner_tensor)
+
+            all_tgt_ops = LookupSubgraphSlicer(op_types=["Identity"])._find_all_tgt_ops()
+            self.assertEqual(len(all_tgt_ops), 2)
+
+    @patch.multiple(
+        "mx_rec.core.emb.base_sparse_embedding.BaseSparseEmbedding", get_anchor_attribute=Mock(return_value=True)
+    )
+    def test_ok_find_sliceable_tgt_ops(self):
+        with tf.compat.v1.Graph().as_default():
+            prefetch_dataset = gen_mock_dataset().prefetch(0)
+            iterator = tf.compat.v1.data.make_initializable_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            inner_tensor = tf.identity(ids)
+            lookup_key = tf.identity(inner_tensor)
+            tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE, lookup_key)
+
+            all_tgt_ops = LookupSubgraphSlicer(op_types=["Identity"])._find_sliceable_tgt_ops()
+            self.assertEqual(len(all_tgt_ops), 2)
+
+
+class OrphanLookupKeySlicerTestCase(unittest.TestCase):
+    @patch.multiple("mx_rec.graph.slicers.utils", export_pb_graph=Mock(return_value=None))
+    def test_ok_slice_ops(self):
+        with tf.compat.v1.Graph().as_default():
+            prefetch_dataset = gen_mock_dataset().prefetch(0)
+            iterator = tf.compat.v1.data.make_initializable_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            inner_tensor = tf.constant(0, dtype=ids.dtype, shape=ids.shape)
+            lookup_key = tf.identity(inner_tensor)
+            tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE, lookup_key)
+
+            sliceable_ops = {inner_tensor.op}
+            OrphanLookupKeySlicer()._slice_ops(sliceable_ops, is_training=False)
+
+            g = tf.compat.v1.get_default_graph()
+            prefetch_datasets = [op for op in g.get_operations() if AnchorDatasetOp.PREFETCH_DATASET.value in op.name]
+            self.assertEqual(len(prefetch_datasets), 2)
+
+    @patch.multiple(
+        "mx_rec.core.emb.base_sparse_embedding.BaseSparseEmbedding", get_anchor_attribute=Mock(return_value=True)
+    )
+    def test_ok_find_sliceable_tgt_ops(self):
+        with tf.compat.v1.Graph().as_default():
+            prefetch_dataset = gen_mock_dataset().prefetch(0)
+            iterator = tf.compat.v1.data.make_initializable_iterator(prefetch_dataset)
+            batch = iterator.get_next()
+            ids = batch["mock_ids"]
+
+            inner_tensor = tf.constant(0, dtype=ids.dtype, shape=ids.shape)
+            lookup_key = tf.identity(inner_tensor)
+            tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE, lookup_key)
+
+            all_tgt_ops = OrphanLookupKeySlicer()._find_sliceable_tgt_ops()
+            self.assertEqual(len(all_tgt_ops), 2)
-- 
Gitee


From 07a159c2dbf87f9348f7871f7130aba53f069476 Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Mon, 29 Apr 2024 19:30:02 +0800
Subject: [PATCH 076/302] cleancode

---
 .../op_host/embedding_update_by_address.cpp         | 13 +++++++------
 .../op_kernel/embedding_update_by_address.cpp       | 10 +++++++---
 examples/demo/little_demo_estimator/nn_optim.py     |  2 +-
 src/AccCTR/src/unique/unique_func.cpp               |  5 +++--
 src/AccCTR/src/unique/unique_func.h                 |  2 +-
 src/core/emb_table/embedding_ddr.cpp                | 11 -----------
 6 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
index d0e4b778..5f823889 100644
--- a/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
@@ -73,7 +73,7 @@ namespace optiling
             return ge::GRAPH_FAILED;
         }
 
-        int64_t inputShape = static_cast<int64_t>(inputTensor->GetShapeSize());
+        int32_t inputShape = static_cast<int32_t>(inputTensor->GetShapeSize());
         if (CheckPositiveInt(inputShape, "inputShape") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
         }
@@ -84,7 +84,7 @@ namespace optiling
         }
 
         const int32_t inputShapeTmp = (inputShape > 0) ? inputShape : 1;
-        int64_t inputDim = static_cast<int64_t>(inputTensor1->GetShapeSize() / inputShapeTmp);
+        int32_t inputDim = static_cast<int32_t>(inputTensor1->GetShapeSize()) / inputShapeTmp;
         if (CheckPositiveInt(inputDim, "inputDim") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
         }
@@ -122,8 +122,9 @@ namespace optiling
         int32_t occupyAddressBytesNum =
                 sizeof(int64_t) + typeSize * inputDimAligned * PING_PONG_NUM * 2;
         // 一轮计算中最多计算多少个addr，由于地址也要搬到ub，所以需要对齐32
-        int32_t addrPerLoop = static_cast<int32_t>((UB_LIMIT /
-                occupyAddressBytesNum) & (~3U)); // & (~3U)，保证地址数是4的倍数
+        int32_t addrPerLoop = static_cast<int32_t>(
+                UB_LIMIT / static_cast<uint32_t>(occupyAddressBytesNum) & (~3U)); // & (~3U)，保证地址数是4的倍数
+
         if (CheckPositiveInt(addrPerLoop, "addrPerLoop") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
         }
@@ -132,8 +133,8 @@ namespace optiling
 
         tiling.set_update_type(updateType);
         tiling.set_embedding_type(embeddingType);
-        tiling.set_update_dim(inputDim);
-        tiling.set_addr_nums(inputShape);
+        tiling.set_update_dim(static_cast<int32_t>(inputDim));
+        tiling.set_addr_nums(static_cast<int32_t>(inputShape));
         tiling.set_addr_per_loop(addrPerLoop);
         tiling.set_type_size(typeSize);
         tiling.set_input_dim_aligned(inputDimAligned);
diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
index 4a13c3eb..d129947b 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
@@ -20,6 +20,9 @@ constexpr int32_t SIZE_OF_HALF = 2;
 constexpr int32_t SIZE_OF_FLOAT_OR_INT = 4;
 
 template <typename T>
+
+namespace AscendKernel {
+
 class KernelEimtable_update
 {
 public:
@@ -185,6 +188,7 @@ private:
   GlobalTensor<T> srcDataBufferGm, dstDataGm, outDataGm;
   GlobalTensor<int64_t> srcAddrGlobal;
 };
+}
 
 extern "C" __global__ __aicore__ void embedding_update_by_address(GM_ADDR address, GM_ADDR embedding, GM_ADDR y,
                                                                   GM_ADDR usrWorkspace, GM_ADDR tiling)
@@ -197,7 +201,7 @@ extern "C" __global__ __aicore__ void embedding_update_by_address(GM_ADDR addres
   {
   case 0:
   {
-    KernelEimtable_update<int32_t> op;
+    AscendKernel::KernelEimtable_update<int32_t> op;
     op.Init_param(tiling);
     op.Init(address, embedding, y);
     op.Process();
@@ -205,7 +209,7 @@ extern "C" __global__ __aicore__ void embedding_update_by_address(GM_ADDR addres
   break;
   case 2:
   {
-    KernelEimtable_update<half> op;
+    AscendKernel::KernelEimtable_update<half> op;
     op.Init_param(tiling);
     op.Init(address, embedding, y);
     op.Process();
@@ -213,7 +217,7 @@ extern "C" __global__ __aicore__ void embedding_update_by_address(GM_ADDR addres
   break;
   default:
   {
-    KernelEimtable_update<float> op;
+    AscendKernel::KernelEimtable_update<float> op;
     op.Init_param(tiling);
     op.Init(address, embedding, y);
     op.Process();
diff --git a/examples/demo/little_demo_estimator/nn_optim.py b/examples/demo/little_demo_estimator/nn_optim.py
index 415c5ff2..4d519366 100644
--- a/examples/demo/little_demo_estimator/nn_optim.py
+++ b/examples/demo/little_demo_estimator/nn_optim.py
@@ -55,7 +55,7 @@ def get_train_op_list(losses, learning_rate):
     dense_variables, sparse_variables = get_dense_and_sparse_variable()
     trainable_variables = [dense_variables]
 
-    for i, (name, loss) in enumerate(losses):
+    for _, (name, loss) in enumerate(losses):
         with tf.control_dependencies(update_ops):
             # do dense grad
             grads = dense_optimizer.compute_gradients(loss, var_list=trainable_variables)
diff --git a/src/AccCTR/src/unique/unique_func.cpp b/src/AccCTR/src/unique/unique_func.cpp
index 717d8890..d208eac9 100644
--- a/src/AccCTR/src/unique/unique_func.cpp
+++ b/src/AccCTR/src/unique/unique_func.cpp
@@ -119,8 +119,9 @@ void Dedup::NewParameter()
         // Time to check the proper size of sharded tables for performance
         // sake.
         uint64_t shardedTableSize = 0;
-        if (std::numeric_limits<uint64_t>::max() / static_cast<uint64_t>(n) / static_cast<uint64_t>(groupCount_)
-        < newBucketCountPowerOf2) {
+        if (std::numeric_limits<uint64_t>::max() / static_cast<uint64_t>(n) /
+        static_cast<uint64_t>(groupCount_) <
+        newBucketCountPowerOf2) {
             shardedTableSize = static_cast<uint64_t>(std::numeric_limits<int>::max());
         } else {
             shardedTableSize = newBucketCountPowerOf2 * n * static_cast<uint64_t>(groupCount_);
diff --git a/src/AccCTR/src/unique/unique_func.h b/src/AccCTR/src/unique/unique_func.h
index 46718bde..07c8ebb7 100644
--- a/src/AccCTR/src/unique/unique_func.h
+++ b/src/AccCTR/src/unique/unique_func.h
@@ -526,7 +526,7 @@ private:
         uint32_t *finishPtr = beginPtr + uniqueIn.inputIdCnt;
         uint32_t *partBeginPtr = beginPtr;
         auto alignedAddress = CacheLineAlign(reinterpret_cast<uintptr_t>(partBeginPtr + partSize));
-        auto *partEndPtr = reinterpret_cast<uint32_t *>(alignedAddress);
+        auto *partEndPtr = reinterpret_cast<uint32_t *>(static_cast<uintptr_t>(alignedAddress));
         std::vector<std::function<void()>> tasks;
         auto val = TypeTrans<T>(uniqueIn.inputId);
         while (partBeginPtr < finishPtr) {
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 02d7c116..8f529646 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -24,17 +24,6 @@ See the License for the specific language governing permissions and
 
 using namespace MxRec;
 
-constexpr int ELEMENT_NUM = 4;
-constexpr int CURRENT_UPDATE_IDX = 0;
-constexpr int HOST_VOCAB_SIZE_IDX = 1;
-constexpr int DEV_VOCAB_SIZE_IDX = 2;
-constexpr int MAX_OFFSET_IDX = 3;
-
-constexpr int EMB_INFO_ELEMENT_NUM = 3;
-constexpr int EMB_INFO_EXT_SIZE_IDX = 0;
-constexpr int EMB_INFO_DEV_VOCAB_SIZE_IDX = 1;
-constexpr int EMB_INFO_HOST_VOCAB_SIZE_IDX = 2;
-
 EmbeddingDDR::EmbeddingDDR()
 {
 }
-- 
Gitee


From 19966c0c38372304f373c097e20bf76413084b8e Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Mon, 29 Apr 2024 20:07:24 +0800
Subject: [PATCH 077/302] cleancode

---
 .../op_kernel/embedding_update_by_address.cpp          | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
index d129947b..828d7fbe 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
@@ -16,13 +16,11 @@ See the License for the specific language governing permissions and
 #include "kernel_operator.h"
 using namespace AscendC;
 
+namespace KernelOps {
 constexpr int32_t SIZE_OF_HALF = 2;
 constexpr int32_t SIZE_OF_FLOAT_OR_INT = 4;
 
 template <typename T>
-
-namespace AscendKernel {
-
 class KernelEimtable_update
 {
 public:
@@ -201,7 +199,7 @@ extern "C" __global__ __aicore__ void embedding_update_by_address(GM_ADDR addres
   {
   case 0:
   {
-    AscendKernel::KernelEimtable_update<int32_t> op;
+    KernelOps::KernelEimtable_update<int32_t> op;
     op.Init_param(tiling);
     op.Init(address, embedding, y);
     op.Process();
@@ -209,7 +207,7 @@ extern "C" __global__ __aicore__ void embedding_update_by_address(GM_ADDR addres
   break;
   case 2:
   {
-    AscendKernel::KernelEimtable_update<half> op;
+    KernelOps::KernelEimtable_update<half> op;
     op.Init_param(tiling);
     op.Init(address, embedding, y);
     op.Process();
@@ -217,7 +215,7 @@ extern "C" __global__ __aicore__ void embedding_update_by_address(GM_ADDR addres
   break;
   default:
   {
-    AscendKernel::KernelEimtable_update<float> op;
+    KernelOps::KernelEimtable_update<float> op;
     op.Init_param(tiling);
     op.Init(address, embedding, y);
     op.Process();
-- 
Gitee


From b6ff564e63a54c47571eeda75b3a950a4259e5f2 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 29 Apr 2024 20:25:02 +0800
Subject: [PATCH 078/302] =?UTF-8?q?LazyAdam=E8=9E=8D=E5=90=88=E7=AE=97?=
 =?UTF-8?q?=E5=AD=90-aclnn=E9=83=A8=E5=88=86=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/inc/common.h         |  51 ++++
 .../aclnn_lazy_adam_test/inc/op_runner.h      | 188 +++++++++++++++
 .../aclnn_lazy_adam_test/inc/operator_desc.h  |  66 +++++
 .../aclnn_lazy_adam_test/input/.keep          |   0
 .../aclnn_lazy_adam_test/output/.keep         |   0
 .../aclnn_lazy_adam_test/run.sh               | 106 ++++++++
 .../aclnn_lazy_adam_test/scripts/gen_data.py  | 145 +++++++++++
 .../scripts/verify_result.py                  |  50 ++++
 .../aclnn_lazy_adam_test/src/CMakeLists.txt   |  67 ++++++
 .../aclnn_lazy_adam_test/src/common.cpp       |  85 +++++++
 .../aclnn_lazy_adam_test/src/main.cpp         | 226 ++++++++++++++++++
 11 files changed, 984 insertions(+)
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/input/.keep
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/output/.keep
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/verify_result.py
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
new file mode 100644
index 00000000..ba754761
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
@@ -0,0 +1,51 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <cstdio>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <iomanip>
+
+#include "acl/acl.h"
+
+#define SUCCESS 0
+#define FAILED 1
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize);
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
+
+#endif // COMMON_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
new file mode 100644
index 00000000..ed432a1e
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
@@ -0,0 +1,188 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef OP_RUNNER_H
+#define OP_RUNNER_H
+
+#include "aclnn/acl_meta.h"
+#include "acl/acl.h"
+#include "common.h"
+#include "operator_desc.h"
+
+/**
+ * Op Runner
+ */
+class OpRunner {
+public:
+    /**
+     * @brief Constructor
+     * @param [in] opDesc: op description
+     */
+    explicit OpRunner(OperatorDesc *opDesc);
+
+    /**
+     * @brief Destructor
+     */
+    virtual ~OpRunner();
+
+    /**
+    * @brief Init op runner
+    */
+    bool Init();
+
+    /**
+     * @brief Get number of inputs
+     * @return number of inputs
+     */
+    const size_t NumInputs();
+
+    /**
+     * @brief Get number of outputs
+     * @return number of outputs
+     */
+    const size_t NumOutputs();
+
+    /**
+     * @brief Get input size by index
+     * @param [in] index: input index
+     * @return size of the input
+     */
+    const size_t GetInputSize(size_t index) const;
+    const size_t GetInputNumDims(size_t index) const;
+    aclDataType GetInputDataType(size_t index) const;
+    aclFormat GetInputFormat(size_t index) const;
+
+    /**
+     * @brief Get output size by index
+     * @param [in] index: output index
+     * @return size of the output
+     */
+    size_t GetOutputSize(size_t index) const;
+    const size_t GetOutputNumDims(size_t index) const;
+    aclDataType GetOutputDataType(size_t index) const;
+    aclFormat GetOutputFormat(size_t index) const;
+
+    /**
+     * @brief Get input element count by index
+     * @param i[in] ndex: input index
+     * @return element count of the input
+     */
+    size_t GetInputElementCount(size_t index) const;
+
+    /**
+     * @brief Get output element count by index
+     * @param [in] index: output index
+     * @return element count of the output
+     */
+    size_t GetOutputElementCount(size_t index) const;
+
+    /**
+     * @brief Get input shape by index
+     * @param [in] index: input index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetInputShape(size_t index) const;
+
+    /**
+     * @brief Get output shape by index
+     * @param [in] index: output index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetOutputShape(size_t index) const;
+
+    /**
+     * @brief Get input buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: input index
+     * @return host address of the input
+     */
+    template<typename T>
+    T *GetInputBuffer(size_t index)
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return nullptr;
+        }
+        return reinterpret_cast<T *>(hostInputs_[index]);
+    }
+
+    /**
+     * @brief Get output buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: output index
+     * @return host address of the output
+     */
+    template<typename T>
+    const T *GetOutputBuffer(size_t index)
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return nullptr;
+        }
+
+        return reinterpret_cast<T *>(hostOutputs_[index]);
+    }
+
+     /**
+      * @brief Print readable input by index
+      * @param [in] index: input index
+      * @param [in] numElementsPerRow: number of elements per row
+      */
+    void PrintInput(size_t index, size_t numElementsPerRow = 16);
+
+    /**
+      * @brief Print readable output by index
+      * @param [in] index: output index
+      * @param [in] numElementsPerRow: number of elements per row
+      */
+    void PrintOutput(size_t index, size_t numElementsPerRow = 16);
+
+    /**
+     * @brief Compile static op
+     * @return compile result
+     */
+    bool CompileStaticOp();
+
+    /**
+     * @brief Compile dynamic op
+     * @return compile result
+     */
+    bool CompileDynamicOp();
+
+    /**
+     * @brief Run op
+     * @return run result
+     */
+    bool RunOp();
+
+private:
+    size_t numInputs_;
+    size_t numOutputs_;
+
+    std::vector<aclDataBuffer *> inputBuffers_;
+    std::vector<aclDataBuffer *> outputBuffers_;
+
+    std::vector<void *> devInputs_;
+    std::vector<void *> devOutputs_;
+
+    std::vector<void *> hostInputs_;
+    std::vector<void *> hostOutputs_;
+
+    std::vector<aclTensor *> inputTensor_;
+    std::vector<aclTensor *> outputTensor_;
+    OperatorDesc *opDesc_;
+};
+
+#endif // OP_RUNNER_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
new file mode 100644
index 00000000..0c76260b
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
@@ -0,0 +1,66 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef OPERATOR_DESC_H
+#define OPERATOR_DESC_H
+
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+/**
+ * Op description
+ */
+struct OperatorDesc {
+    /**
+     * Constructor
+     */
+    explicit OperatorDesc();
+
+    /**
+     * Destructor
+     */
+    virtual ~OperatorDesc();
+
+    /**
+     * Add an input tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    /**
+     * Add an output tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    std::string opType;
+    std::vector<aclTensorDesc *> inputDesc;
+    std::vector<aclTensorDesc *> outputDesc;
+    double beta1;
+    double beta2;
+    double epsilon;
+};
+
+#endif // OPERATOR_DESC_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/input/.keep b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/input/.keep
new file mode 100644
index 00000000..e69de29b
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/output/.keep b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/output/.keep
new file mode 100644
index 00000000..e69de29b
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
new file mode 100644
index 00000000..3d4af97c
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+export ASCEND_GLOBAL_LOG_LEVEL=0
+
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+cd $CURRENT_DIR
+
+# 导出环境变量
+SHORT=v:,
+LONG=dtype:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+while :
+do
+    case "$1" in
+        # float16, float, int32
+        (-v | --dtype)
+            DTYPE="$2"
+            shift 2;;
+        (--)
+            shift;
+            break;;
+        (*)
+            echo "[ERROR] Unexpected option: $1";
+            break;;
+    esac
+done
+
+if [ ! $ASCEND_HOME_DIR ]; then
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        export ASCEND_HOME_DIR=$HOME/Ascend/ascend-toolkit/latest
+    else
+        export ASCEND_HOME_DIR=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $ASCEND_HOME_DIR/bin/setenv.bash
+
+export DDK_PATH=$ASCEND_HOME_DIR
+arch=$(uname -m)
+export NPU_HOST_LIB=$ASCEND_HOME_DIR/${arch}-linux/lib64
+
+function main {
+    # 1. 清除遗留生成文件和日志文件
+    rm -rf $HOME/ascend/log/*
+    rm ./input/*.bin
+    rm ./output/*.bin
+
+    # 2. 生成输入数据和真值数据
+    cd $CURRENT_DIR
+    python3 scripts/gen_data.py
+    if [ $? -ne 0 ]; then
+        echo "ERROR: generate input data failed!"
+        return 1
+    fi
+    echo "INFO: generate input data success!"
+
+    # 3. 编译acl可执行文件
+    cd $CURRENT_DIR; rm -rf build; mkdir -p build; cd build
+    cmake ../src
+    if [ $? -ne 0 ]; then
+        echo "ERROR: cmake failed!"
+        return 1
+    fi
+    echo "INFO: cmake success!"
+    make
+    if [ $? -ne 0 ]; then
+        echo "ERROR: make failed!"
+        return 1
+    fi
+    echo "INFO: make success!"
+
+    # 4. 运行可执行文件
+    cd $CURRENT_DIR/output
+    echo "INFO: execute op!"
+    ./execute_op
+
+    if [ $? -ne 0 ]; then
+        echo "ERROR: acl executable run failed! please check your project!"
+        return 1
+    fi
+    echo "INFO: acl executable run success!"
+
+    # 5. 比较真值文件
+    cd $CURRENT_DIR
+    python3 scripts/verify_result.py
+}
+
+main
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
new file mode 100644
index 00000000..6e07f836
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import copy
+import os
+import numpy as np
+
+# 获取项目路径
+_CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
+_PROJECT_PATH = os.path.dirname(_CURRENT_PATH)
+_INPUT_PATH = os.path.join(_PROJECT_PATH, "./input")
+_OUTPUT_PATH = os.path.join(_PROJECT_PATH, "./output")
+
+_DIM_0 = 2000000
+_DIM_1 = 564096
+_DIM_2 = 32
+
+
+def _gather(input_data, indices):
+    out = np.zeros((len(indices), input_data.shape[1]))
+    for i, index_ in enumerate(indices):
+        # 跳过index小于0的数据
+        if index_[0] < 0:
+            continue
+        out[i] = input_data[index_[0]]
+    return out
+
+
+def _scatter_nd_update(momentum, indices, update_value):
+    out = copy.deepcopy(momentum)
+    for i, index_ in enumerate(indices):
+        if index_[0] < 0:
+            continue
+        else:
+            out[index_[0]] = update_value[i]
+    return out
+
+
+def _scatter_nd_add(momentum, indices, update_value):
+    out = copy.deepcopy(momentum)
+    for i, index_ in enumerate(indices):
+        if index_[0] < 0:
+            continue
+        else:
+            out[indices[i][0]] = out[index_[0]] + update_value[i]
+    return out
+
+
+def _gen_input_data():
+    range_start = 1
+    range_end = 2
+
+    dtype_chose = np.float32
+    shape0 = (_DIM_0, _DIM_2)
+    indices_shape = (_DIM_1, 1)
+    grad_shape = (_DIM_1, _DIM_2)
+
+    input_var = np.random.uniform(range_start, range_end, size=shape0).astype(dtype_chose)  # shape [2000000,32]
+    input_m = np.random.uniform(range_start, range_end, size=shape0).astype(dtype_chose)  # shape [2000000,32]
+    input_v = np.random.uniform(range_start, range_end, size=shape0).astype(dtype_chose)  # shape [2000000,32]
+
+    # indices shape [564096,1]
+    indices = np.random.permutation(np.arange(_DIM_0)).astype(np.int32)[:indices_shape[0]].reshape(-1, 1)
+    # gradient shape [564096,32]
+    gradient = np.random.uniform(range_start, range_end, size=grad_shape).astype(dtype_chose)
+
+    if not os.path.exists(_INPUT_PATH):
+        os.makedirs(_INPUT_PATH)
+    indices.tofile(os.path.join(_INPUT_PATH, "indices.bin"))
+    gradient.tofile(os.path.join(_INPUT_PATH, "gradient.bin"))
+    input_m.tofile(os.path.join(_INPUT_PATH, "inputM.bin"))
+    input_v.tofile(os.path.join(_INPUT_PATH, "inputV.bin"))
+    input_var.tofile(os.path.join(_INPUT_PATH, "inputVar.bin"))
+
+
+def _gen_golden_data():
+    beta1 = 0.9
+    beta2 = 0.999
+    lr = 0.001
+    epsilon = 1e-7
+
+    lr = np.array(lr).astype(np.float32)
+    beta1 = np.array(beta1).astype(np.float32)
+    beta2 = np.array(beta2).astype(np.float32)
+    epsilon = np.array(epsilon).astype(np.float32)
+
+    lr.tofile(os.path.join(_INPUT_PATH, "learningRate.bin"))
+
+    indices = np.fromfile(os.path.join(_INPUT_PATH, "indices.bin"), dtype=np.int32).reshape(
+        (_DIM_1, 1))  # shape (564096,1)
+    gradient = np.fromfile(os.path.join(_INPUT_PATH, "gradient.bin"), dtype=np.float32).reshape(
+        (_DIM_1, _DIM_2))  # shape (564096,32)
+    input_m = np.fromfile(os.path.join(_INPUT_PATH, "inputM.bin"), dtype=np.float32).reshape(
+        (_DIM_0, _DIM_2))  # shape (2000000,32)
+    input_v = np.fromfile(os.path.join(_INPUT_PATH, "inputV.bin"), dtype=np.float32).reshape(
+        (_DIM_0, _DIM_2))  # shape (2000000,32)
+    input_var = np.fromfile(os.path.join(_INPUT_PATH, "inputVar.bin"), dtype=np.float32).reshape(
+        (_DIM_0, _DIM_2))  # shape (2000000,32)
+
+    old_m_slice = _gather(input_m, indices)  # shape(564096,32)
+    old_m_slice = np.array(old_m_slice).astype(np.float32)  #
+    update_m = beta1 * old_m_slice + (1 - beta1) * gradient
+    out_m = _scatter_nd_update(input_m, indices, update_m)
+
+    old_v_slice = _gather(input_v, indices)
+    old_v_slice = np.array(old_v_slice).astype(np.float32)
+    update_v = beta2 * old_v_slice + (1 - beta2) * np.square(gradient)
+    out_v = _scatter_nd_update(input_v, indices, update_v)
+
+    denominator_slice = np.sqrt(update_v) + epsilon
+    update_var = np.divide(-lr * update_m, denominator_slice)
+    out_var = _scatter_nd_add(input_var, indices, update_var)
+
+    return out_m, out_v, out_var
+
+
+def _gen_input_and_golden_data():
+    # 产生输入数据
+    _gen_input_data()
+
+    # 产生真值数据
+    out_m, out_v, out_var = _gen_golden_data()
+    if not os.path.exists(_OUTPUT_PATH):
+        os.makedirs(_OUTPUT_PATH)
+    out_m.tofile(os.path.join(_OUTPUT_PATH, "goldenOutputM.bin"))
+    out_v.tofile(os.path.join(_OUTPUT_PATH, "goldenOutputV.bin"))
+    out_var.tofile(os.path.join(_OUTPUT_PATH, "goldenOutputVar.bin"))
+
+
+if __name__ == "__main__":
+    _gen_input_and_golden_data()
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/verify_result.py b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/verify_result.py
new file mode 100644
index 00000000..1cc516db
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/verify_result.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import logging
+import numpy as np
+
+_LOSS_THRESHOLD = 1e-6  # 容忍偏差，一般fp16要求绝对误差和相对误差均不超过万分之一
+_MINIMUM = 10e-10
+
+logging.getLogger().setLevel(logging.INFO)
+
+
+def verify_result(real_result, golden):
+    real_result = np.fromfile(real_result, dtype=np.float32)  # 从bin文件读取实际运算结果
+    golden = np.fromfile(golden, dtype=np.float32)  # 从bin文件读取预期运算结果
+    result = np.abs(real_result - golden)  # 计算运算结果和预期结果偏差
+    deno = np.maximum(np.abs(real_result), np.abs(golden))  # 获取最大值并组成新数组
+    result_atol = np.less_equal(result, _LOSS_THRESHOLD)  # 计算绝对误差
+    result_rtol = np.less_equal(result / np.add(deno, _MINIMUM), _LOSS_THRESHOLD)  # 计算相对误差
+    if not result_rtol.all() and not result_atol.all():
+        # 误差超出预期时返回打印错误，返回对比失败
+        if np.sum(result_rtol == False) > real_result.size * _LOSS_THRESHOLD \
+                and np.sum(result_atol == False) > real_result.size * _LOSS_THRESHOLD:
+            logging.error("[ERROR] output verify result error.")
+            return False
+    logging.info("output verify pass.")
+    return True
+
+
+if __name__ == '__main__':
+    logging.info("start verify outputM.")
+    verify_result("output/outputM.bin", "output/goldenOutputM.bin")
+    logging.info("start verify outputV.")
+    verify_result("output/outputV.bin", "output/goldenOutputV.bin")
+    logging.info("start verify outputVar.")
+    verify_result("output/outputVar.bin", "output/goldenOutputVar.bin")
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
new file mode 100644
index 00000000..1642e3ca
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
@@ -0,0 +1,67 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_lazy_adam)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output")
+
+set(INC_PATH $ENV{DDK_PATH})
+
+if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+else ()
+    message(STATUS "env INC_PATH: ${INC_PATH}")
+endif()
+
+set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize_lazy_adam/op_api")
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
+    set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+set(AUTO_GEN_PATH "../../lazy_adam/build_out/autogen")
+# Header path
+include_directories(
+    ${INC_PATH}/runtime/include
+    ${INC_PATH}/atc/include
+    ../inc
+    ${CUST_PKG_PATH}/include
+    ${AUTO_GEN_PATH}
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${LIB_PATH1}
+    ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_op
+        main.cpp
+        common.cpp
+)
+
+target_link_libraries(execute_op
+    ascendcl
+    cust_opapi
+    acl_op_compiler
+    nnopbase
+    stdc++
+)
+
+install(TARGETS execute_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
new file mode 100644
index 00000000..1c295bfc
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
@@ -0,0 +1,85 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include <fcntl.h>
+#include <fstream>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "common.h"
+
+extern bool g_isDevice;
+
+bool ReadFile(const std::string& filePath, size_t fileSize, void* buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file %s", filePath.c_str());
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf* buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char*>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+bool WriteFile(const std::string& filePath, const void* buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    auto writeSize = write(fd, buffer, size);
+    (void) close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
new file mode 100644
index 00000000..f32efcaa
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
@@ -0,0 +1,226 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <iostream>
+#include <stdexcept>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "acl/acl.h"
+#include "aclnn_lazy_adam.h"
+#include "common.h"
+#include "op_runner.h"
+
+bool g_isDevice = false;
+int g_deviceId = 0;
+constexpr int DIM0 = 2000000;  // inputM inputV inputVar 的行数
+constexpr int DIM1 = 564096;  // indices长度
+constexpr int DIM2 = 32;  // inputM inputV inputVar gradient等每行的数据个数
+constexpr int INPUT_M_INDEX = 2;
+constexpr int INPUT_V_INDEX = 3;
+constexpr int INPUT_VAR_INDEX = 4;
+constexpr int LEARNING_RATE_INDEX = 5;
+constexpr int OUTPUT_M_INDEX = 0;
+constexpr int OUTPUT_V_INDEX = 1;
+constexpr int OUTPUT_VAR_INDEX = 2;
+constexpr float LEARNING_RATE = 0.001;
+constexpr float BETA1 = 0.9;
+constexpr float BETA2 = 0.999;
+constexpr float EPSILON = 1e-7;
+const char* READ_ERROR_INFO = "read input file error, please check whether file exist and access rights is correct";
+const char* WRITE_ERROR_INFO = "write output file error, please check access rights is correct";
+
+OperatorDesc CreateOpDesc()
+{
+    std::vector <int64_t> indicesShape{DIM1, 1};
+    std::vector <int64_t> gradientShape{DIM1, DIM2};
+    std::vector <int64_t> inputMShape{DIM0, DIM2};  // inputM inputV inputVar 的shape相同
+    std::vector <int64_t> learningRateShape{1};
+    aclDataType dataType = ACL_FLOAT;
+    aclDataType indexDataType = ACL_INT32;
+    aclFormat format = ACL_FORMAT_ND;
+    OperatorDesc opDesc;
+    opDesc.AddInputTensorDesc(dataType, gradientShape.size(), gradientShape.data(), format);
+    opDesc.AddInputTensorDesc(indexDataType, indicesShape.size(), indicesShape.data(), format);
+    opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputM
+    opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputV
+    opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputVar
+    opDesc.AddInputTensorDesc(dataType, learningRateShape.size(), learningRateShape.data(), format);  // learningRate
+    opDesc.beta1 = BETA1;
+    opDesc.beta2 = BETA2;
+    opDesc.epsilon = EPSILON;
+    return opDesc;
+}
+
+bool SetInputData(OpRunner& runner)
+{
+    size_t fileSize = 0;
+    if (!ReadFile("../input/gradient.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0))) {
+        throw std::runtime_error(READ_ERROR_INFO);
+    }
+    if (!ReadFile("../input/indices.bin", fileSize, runner.GetInputBuffer<void>(1), runner.GetInputSize(1))) {
+        throw std::runtime_error(READ_ERROR_INFO);
+    }
+    if (!ReadFile("../input/inputM.bin", fileSize, runner.GetInputBuffer<void>(INPUT_M_INDEX),
+                  runner.GetInputSize(INPUT_M_INDEX))) {
+        throw std::runtime_error(READ_ERROR_INFO);
+    }
+    if (!ReadFile("../input/inputV.bin", fileSize, runner.GetInputBuffer<void>(INPUT_V_INDEX),
+                  runner.GetInputSize(INPUT_V_INDEX))) {
+        throw std::runtime_error(READ_ERROR_INFO);
+    }
+    if (!ReadFile("../input/inputVar.bin", fileSize, runner.GetInputBuffer<void>(INPUT_VAR_INDEX),
+                  runner.GetInputSize(INPUT_VAR_INDEX))) {
+        throw std::runtime_error(READ_ERROR_INFO);
+    }
+    if (!ReadFile("../input/learningRate.bin", fileSize, runner.GetInputBuffer<void>(LEARNING_RATE_INDEX),
+                  runner.GetInputSize(LEARNING_RATE_INDEX))) {
+        throw std::runtime_error(READ_ERROR_INFO);
+    }
+    INFO_LOG("Set input success");
+    return true;
+}
+
+bool ProcessOutputData(OpRunner& runner)
+{
+    // 保存输出数据 由于输出仅有hostOutputs_数据，未设置outputDesc，因此数据size从inputTensor获取
+    if (!WriteFile("../output/outputM.bin", runner.GetOutputBuffer<void>(OUTPUT_M_INDEX),
+                   runner.GetInputSize(INPUT_M_INDEX))) {
+        throw std::runtime_error(WRITE_ERROR_INFO);
+    }
+    if (!WriteFile("../output/outputV.bin", runner.GetOutputBuffer<void>(OUTPUT_V_INDEX),
+                   runner.GetInputSize(INPUT_V_INDEX))) {
+        throw std::runtime_error(WRITE_ERROR_INFO);
+    }
+    if (!WriteFile("../output/outputVar.bin", runner.GetOutputBuffer<void>(OUTPUT_VAR_INDEX),
+                   runner.GetInputSize(INPUT_VAR_INDEX))) {
+        throw std::runtime_error(WRITE_ERROR_INFO);
+    }
+    INFO_LOG("Write output success");
+    return true;
+}
+
+void DestroyResource()
+{
+    bool flag = false;
+    if (aclrtResetDevice(g_deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Reset device %d failed", g_deviceId);
+        flag = true;
+    }
+    INFO_LOG("Reset Device success");
+    if (aclFinalize() != ACL_SUCCESS) {
+        ERROR_LOG("Finalize acl failed");
+        flag = true;
+    }
+    if (flag) {
+        ERROR_LOG("Destroy resource failed");
+    } else {
+        INFO_LOG("Destroy resource success");
+    }
+}
+
+bool InitResource()
+{
+    std::string output = "../output";
+    if (access(output.c_str(), 0) == -1) {
+        int ret = mkdir(output.c_str(), 0700);
+        if (ret == 0) {
+            INFO_LOG("Make output directory successfully");
+        } else {
+            ERROR_LOG("Make output directory fail");
+            return false;
+        }
+    }
+
+    // acl.json is dump or profiling config file
+    if (aclInit(NULL) != ACL_SUCCESS) {
+        ERROR_LOG("acl init failed");
+        return false;
+    }
+
+    if (aclrtSetDevice(g_deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Set device failed. g_deviceId is %d", g_deviceId);
+        (void) aclFinalize();
+        return false;
+    }
+    INFO_LOG("Set device[%d] success", g_deviceId);
+
+    // runMode is ACL_HOST which represents app is running in host
+    // runMode is ACL_DEVICE which represents app is running in device
+    aclrtRunMode runMode;
+    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
+        ERROR_LOG("Get run mode failed");
+        DestroyResource();
+        return false;
+    }
+    g_isDevice = (runMode == ACL_DEVICE);
+    INFO_LOG("Get RunMode[%d] success", runMode);
+
+    return true;
+}
+
+bool RunOp()
+{
+    // create op desc
+    OperatorDesc opDesc = CreateOpDesc();
+
+    // create Runner
+    OpRunner opRunner(&opDesc);
+    if (!opRunner.Init()) {
+        ERROR_LOG("Init OpRunner failed");
+        return false;
+    }
+
+    // Load inputs
+    if (!SetInputData(opRunner)) {
+        ERROR_LOG("Set input data failed");
+        return false;
+    }
+
+    // Run op
+    if (!opRunner.RunOp()) {
+        ERROR_LOG("Run op failed");
+        return false;
+    }
+
+    // process output data
+    if (!ProcessOutputData(opRunner)) {
+        ERROR_LOG("Process output data failed");
+        return false;
+    }
+
+    INFO_LOG("Run op success");
+    return true;
+}
+
+int main(int argc, char** argv)
+{
+    if (!InitResource()) {
+        ERROR_LOG("Init resource failed");
+        return FAILED;
+    }
+    INFO_LOG("Init resource success");
+
+    if (!RunOp()) {
+        DestroyResource();
+        return FAILED;
+    }
+
+    DestroyResource();
+
+    return SUCCESS;
+}
-- 
Gitee


From f27dd548deda4eb170eee12d862e54b516ff54df Mon Sep 17 00:00:00 2001
From: sihaixianyu <sihaixianyu@qq.com>
Date: Tue, 30 Apr 2024 02:59:28 +0000
Subject: [PATCH 079/302] =?UTF-8?q?!112=20=E9=80=82=E9=85=8D=E5=88=87?=
 =?UTF-8?q?=E5=9B=BE=E5=8A=9F=E8=83=BD=E7=9A=84LittleDemo<Estimator>?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E7=94=A8=E4=BE=8B=20*=20=E6=B7=BB=E5=8A=A0?=
 =?UTF-8?q?=E5=88=87=E5=9B=BE=E5=8A=9F=E8=83=BD=E7=9A=84LittleDemo<Estimat?=
 =?UTF-8?q?or>=E6=A8=A1=E5=9E=8B=E7=94=A8=E4=BE=8B=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo_estimator/main.py   | 22 ++++++++++---------
 .../little_demo_estimator/nn_model_build.py   |  2 +-
 .../little_demo_estimator/nn_model_input.py   |  2 +-
 examples/demo/little_demo_estimator/run.sh    |  3 +--
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/examples/demo/little_demo_estimator/main.py b/examples/demo/little_demo_estimator/main.py
index de0b6c86..cca5a7a5 100644
--- a/examples/demo/little_demo_estimator/main.py
+++ b/examples/demo/little_demo_estimator/main.py
@@ -19,16 +19,14 @@ import argparse
 import os
 
 import tensorflow as tf
-
 from mx_rec.util.initialize import init, terminate_config_initializer
-from mx_rec.util.communication.hccl_ops import get_rank_id
 from mx_rec.core.asc.helper import FeatureSpec
 from mx_rec.graph.modifier import GraphModifierHook
-from mx_rec.graph.acg_push_ops import ACGPushOpsToDatasetHook
+from mx_rec.graph.hooks import OrphanLookupKeySlicerHook, LookupSubgraphSlicerHook
 from mx_rec.core.feature_process import EvictHook
 from mx_rec.util.log import logger
 
-from tf_adapter import NPURunConfig, NPUEstimator, npu_hooks_append, DumpConfig
+from tf_adapter import NPURunConfig, NPUEstimator, npu_hooks_append
 from nn_reader import input_fn
 from nn_model_input import get_model_fn
 from config import Config
@@ -58,10 +56,12 @@ def main(params, config):
     # access_threshold unit counts; eviction_threshold unit seconds
     access_and_evict = None
 
-    if not params.enable_push_ops_test:
+    if not params.enable_slicer_test:
         hooks_list = [GraphModifierHook(modify_graph=params.modify_graph)]
     else:
-        hooks_list = [ACGPushOpsToDatasetHook(dump_graph=True), GraphModifierHook(modify_graph=params.modify_graph)]
+        orphan_slicer_hook = OrphanLookupKeySlicerHook()
+        lookup_slicer_hook = LookupSubgraphSlicerHook(op_types=["StringToNumber"])
+        hooks_list = [orphan_slicer_hook, lookup_slicer_hook, GraphModifierHook(modify_graph=params.modify_graph)]
 
     if params.use_timestamp:
         config_for_user_table = dict(access_threshold=config.access_threshold,
@@ -89,12 +89,14 @@ def main(params, config):
         train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(params, create_fs_params, config,
                                                                       use_one_shot=args.use_one_shot),
                                             max_steps=params.max_steps, hooks=npu_hooks_append(hooks_list))
-        # 在开启evict时，eval时不支持淘汰，所以无需加入evict hook
 
-        if not params.enable_push_ops_test:
+        if not params.enable_slicer_test:
+            # 在开启evict时，eval时不支持淘汰，所以无需加入evict hook
             eval_hook_list = [GraphModifierHook(modify_graph=params.modify_graph)]
         else:
-            eval_hook_list = [ACGPushOpsToDatasetHook(dump_graph=True),
+            orphan_slicer_hook = OrphanLookupKeySlicerHook()
+            lookup_slicer_hook = LookupSubgraphSlicerHook(op_types=["StringToNumber"])
+            eval_hook_list = [orphan_slicer_hook, lookup_slicer_hook,
                               GraphModifierHook(modify_graph=params.modify_graph)]
 
         eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(params, create_fs_params, config, is_eval=True,
@@ -165,7 +167,7 @@ if __name__ == '__main__':
         MODIFY_GRAPH_FLAG = bool(int(os.getenv("USE_MODIFY_GRAPH", 0)))
         USE_TIMESTAMP = bool(int(os.getenv("USE_TIMESTAMP", 0)))
         args.use_one_shot = bool(int(os.getenv("USE_ONE_SHOT", 0)))
-        args.enable_push_ops_test = bool(int(os.getenv("ENABLE_PUSH_OPS_TEST", 0)))
+        args.enable_slicer_test = bool(int(os.getenv("ENABLE_SLICER_TEST", 0)))
     except ValueError as err:
         raise ValueError("please correctly config USE_MPI or USE_DYNAMIC or USE_DYNAMIC_EXPANSION or "
                          "USE_MULTI_LOOKUP or USE_MODIFY_GRAPH or USE_TIMESTAMP or USE_ONE_SHOT "
diff --git a/examples/demo/little_demo_estimator/nn_model_build.py b/examples/demo/little_demo_estimator/nn_model_build.py
index 67820d04..11faadf1 100644
--- a/examples/demo/little_demo_estimator/nn_model_build.py
+++ b/examples/demo/little_demo_estimator/nn_model_build.py
@@ -155,7 +155,7 @@ class LittleModel:
                                       optimizer_list=sparse_optimizer_list)
 
         if self.params.modify_graph:
-            if not self.params.enable_push_ops_test:
+            if not self.params.enable_slicer_test:
                 input_list = [[self.features["user_ids"], self.features["item_ids"]],
                               [user_hashtable, item_hashtable],
                               [self.cfg.user_send_cnt, self.cfg.item_send_cnt],
diff --git a/examples/demo/little_demo_estimator/nn_model_input.py b/examples/demo/little_demo_estimator/nn_model_input.py
index d6ebb529..973a457c 100644
--- a/examples/demo/little_demo_estimator/nn_model_input.py
+++ b/examples/demo/little_demo_estimator/nn_model_input.py
@@ -29,7 +29,7 @@ def get_model_fn(create_fs_params, cfg, access_and_evict_config_dict=None):
             if params.use_timestamp:
                 model = LittleModel(params, cfg, mode, features, create_fs_params,
                                     access_and_evict_config_dict=access_and_evict_config_dict)
-                tf.add_to_collection(ASCEND_TIMESTAMP, features["timestamp"])
+                tf.compat.v1.add_to_collection(ASCEND_TIMESTAMP, features["timestamp"])
             else:
                 model = LittleModel(params, cfg, mode, features, create_fs_params)
         else:
diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index 6534fb21..f3d34c82 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -104,8 +104,7 @@ export KEY_PROCESS_THREAD_NUM=6 #default 6, max 10
 export FAST_UNIQUE=0   #if use fast unique
 export MGMT_HBM_TASK_MODE=0 #if async h2d (get and send tensors)
 ################## 测试配置项 #####################
-# NOTE: 仅在测试constant、string相关op作为稀疏表输入时启用，当前版本只支持TF1。
-export ENABLE_PUSH_OPS_TEST=0
+export ENABLE_SLICER_TEST=0
 
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
-- 
Gitee


From 564f2c2ad7334a7938614bc8a884701a0c0e7fbe Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Tue, 30 Apr 2024 11:47:08 +0800
Subject: [PATCH 080/302] cleancode

---
 .../op_host/embedding_update_by_address.cpp        | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
index 5f823889..43d7a886 100644
--- a/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_host/embedding_update_by_address.cpp
@@ -28,7 +28,7 @@ namespace optiling
     constexpr int32_t SIZE_OF_HALF = 2;
     constexpr int32_t SIZE_OF_FLOAT_OR_INT = 4;
     constexpr int32_t MIN_BLOCK_SIZE = 32; // ub空间的数据都要按照32对齐
-    constexpr uint32_t UB_LIMIT = 175 * 1024;
+    constexpr uint64_t UB_LIMIT = 175 * 1024;
     constexpr int32_t USR_SIZE = 256;
     constexpr int32_t SYS_WORKSPACE_SIZE = 16 * 1024 * 1024;
     constexpr int32_t PING_PONG_NUM = 1;
@@ -44,7 +44,7 @@ namespace optiling
         return ge::GRAPH_SUCCESS;
     }
 
-    static ge::graphStatus CheckPositiveInt(int32_t value, const char *errorMessage)
+    static ge::graphStatus CheckPositiveInt(int64_t value, const char *errorMessage)
     {
         if (value < 0) {
             printf("%s can not be smaller than 0\n", errorMessage);
@@ -73,7 +73,7 @@ namespace optiling
             return ge::GRAPH_FAILED;
         }
 
-        int32_t inputShape = static_cast<int32_t>(inputTensor->GetShapeSize());
+        int64_t inputShape = inputTensor->GetShapeSize();
         if (CheckPositiveInt(inputShape, "inputShape") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
         }
@@ -83,8 +83,8 @@ namespace optiling
             return ge::GRAPH_FAILED;
         }
 
-        const int32_t inputShapeTmp = (inputShape > 0) ? inputShape : 1;
-        int32_t inputDim = static_cast<int32_t>(inputTensor1->GetShapeSize()) / inputShapeTmp;
+        const int64_t inputShapeTmp = (inputShape > 0) ? inputShape : 1;
+        int64_t inputDim = inputTensor1->GetShapeSize() / inputShapeTmp;
         if (CheckPositiveInt(inputDim, "inputDim") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
         }
@@ -122,8 +122,8 @@ namespace optiling
         int32_t occupyAddressBytesNum =
                 sizeof(int64_t) + typeSize * inputDimAligned * PING_PONG_NUM * 2;
         // 一轮计算中最多计算多少个addr，由于地址也要搬到ub，所以需要对齐32
-        int32_t addrPerLoop = static_cast<int32_t>(
-                UB_LIMIT / static_cast<uint32_t>(occupyAddressBytesNum) & (~3U)); // & (~3U)，保证地址数是4的倍数
+        int64_t addrPerLoop = static_cast<int64_t>(
+                UB_LIMIT / static_cast<uint64_t>(occupyAddressBytesNum) & (~3U)); // & (~3U)，保证地址数是4的倍数
 
         if (CheckPositiveInt(addrPerLoop, "addrPerLoop") != ge::GRAPH_SUCCESS) {
             return ge::GRAPH_FAILED;
-- 
Gitee


From 19f0fa3308abe6eddc2c7b4aaed07317874289e0 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 30 Apr 2024 15:41:39 +0800
Subject: [PATCH 081/302] =?UTF-8?q?aclnn=E6=B5=8B=E8=AF=95=E9=97=A8?=
 =?UTF-8?q?=E7=A6=81=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/inc/common.h         |  35 +-
 .../aclnn_lazy_adam_test/inc/op_runner.h      | 321 +++++++++---------
 .../aclnn_lazy_adam_test/inc/operator_desc.h  |  73 ++--
 .../aclnn_lazy_adam_test/src/common.cpp       | 107 +++---
 .../aclnn_lazy_adam_test/src/main.cpp         |  39 +--
 5 files changed, 290 insertions(+), 285 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
index ba754761..601a2617 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
@@ -24,6 +24,7 @@ See the License for the specific language governing permissions and
 
 #include "acl/acl.h"
 
+namespace AclnnLazyAdam {
 #define SUCCESS 0
 #define FAILED 1
 
@@ -31,21 +32,21 @@ See the License for the specific language governing permissions and
 #define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
 #define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
 
-/**
- * @brief Read data from file
- * @param [in] filePath: file path
- * @param [out] fileSize: file size
- * @return read result
- */
-bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize);
-
-/**
- * @brief Write data to file
- * @param [in] filePath: file path
- * @param [in] buffer: data to write to file
- * @param [in] size: size to write
- * @return write result
- */
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
-
+    /**
+     * @brief Read data from file
+     * @param [in] filePath: file path
+     * @param [out] fileSize: file size
+     * @return read result
+     */
+    bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize);
+
+    /**
+     * @brief Write data to file
+     * @param [in] filePath: file path
+     * @param [in] buffer: data to write to file
+     * @param [in] size: size to write
+     * @return write result
+     */
+    bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
+}
 #endif // COMMON_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
index ed432a1e..cfb6a1b7 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
@@ -21,168 +21,173 @@ See the License for the specific language governing permissions and
 #include "common.h"
 #include "operator_desc.h"
 
-/**
- * Op Runner
- */
-class OpRunner {
-public:
+namespace AclnnLazyAdam {
     /**
-     * @brief Constructor
-     * @param [in] opDesc: op description
+     * Op Runner
      */
-    explicit OpRunner(OperatorDesc *opDesc);
-
-    /**
-     * @brief Destructor
-     */
-    virtual ~OpRunner();
-
-    /**
-    * @brief Init op runner
-    */
-    bool Init();
-
-    /**
-     * @brief Get number of inputs
-     * @return number of inputs
-     */
-    const size_t NumInputs();
-
-    /**
-     * @brief Get number of outputs
-     * @return number of outputs
-     */
-    const size_t NumOutputs();
-
-    /**
-     * @brief Get input size by index
-     * @param [in] index: input index
-     * @return size of the input
-     */
-    const size_t GetInputSize(size_t index) const;
-    const size_t GetInputNumDims(size_t index) const;
-    aclDataType GetInputDataType(size_t index) const;
-    aclFormat GetInputFormat(size_t index) const;
-
-    /**
-     * @brief Get output size by index
-     * @param [in] index: output index
-     * @return size of the output
-     */
-    size_t GetOutputSize(size_t index) const;
-    const size_t GetOutputNumDims(size_t index) const;
-    aclDataType GetOutputDataType(size_t index) const;
-    aclFormat GetOutputFormat(size_t index) const;
-
-    /**
-     * @brief Get input element count by index
-     * @param i[in] ndex: input index
-     * @return element count of the input
-     */
-    size_t GetInputElementCount(size_t index) const;
-
-    /**
-     * @brief Get output element count by index
-     * @param [in] index: output index
-     * @return element count of the output
-     */
-    size_t GetOutputElementCount(size_t index) const;
-
-    /**
-     * @brief Get input shape by index
-     * @param [in] index: input index
-     * @return shape of the output
-     */
-    std::vector<int64_t> GetInputShape(size_t index) const;
-
-    /**
-     * @brief Get output shape by index
-     * @param [in] index: output index
-     * @return shape of the output
-     */
-    std::vector<int64_t> GetOutputShape(size_t index) const;
-
-    /**
-     * @brief Get input buffer(host memory) by index
-     * @tparam T: data type
-     * @param [in] index: input index
-     * @return host address of the input
-     */
-    template<typename T>
-    T *GetInputBuffer(size_t index)
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return nullptr;
+    class OpRunner {
+    public:
+        /**
+         * @brief Constructor
+         * @param [in] opDesc: op description
+         */
+        explicit OpRunner(OperatorDesc *opDesc);
+
+        /**
+         * @brief Destructor
+         */
+        virtual ~OpRunner();
+
+        /**
+        * @brief Init op runner
+        */
+        bool Init();
+
+        /**
+         * @brief Get number of inputs
+         * @return number of inputs
+         */
+        const size_t NumInputs();
+
+        /**
+         * @brief Get number of outputs
+         * @return number of outputs
+         */
+        const size_t NumOutputs();
+
+        /**
+         * @brief Get input size by index
+         * @param [in] index: input index
+         * @return size of the input
+         */
+        const size_t GetInputSize(size_t index) const;
+
+        const size_t GetInputNumDims(size_t index) const;
+
+        aclDataType GetInputDataType(size_t index) const;
+
+        aclFormat GetInputFormat(size_t index) const;
+
+        /**
+         * @brief Get output size by index
+         * @param [in] index: output index
+         * @return size of the output
+         */
+        size_t GetOutputSize(size_t index) const;
+
+        const size_t GetOutputNumDims(size_t index) const;
+
+        aclDataType GetOutputDataType(size_t index) const;
+
+        aclFormat GetOutputFormat(size_t index) const;
+
+        /**
+         * @brief Get input element count by index
+         * @param i[in] ndex: input index
+         * @return element count of the input
+         */
+        size_t GetInputElementCount(size_t index) const;
+
+        /**
+         * @brief Get output element count by index
+         * @param [in] index: output index
+         * @return element count of the output
+         */
+        size_t GetOutputElementCount(size_t index) const;
+
+        /**
+         * @brief Get input shape by index
+         * @param [in] index: input index
+         * @return shape of the output
+         */
+        std::vector <int64_t> GetInputShape(size_t index) const;
+
+        /**
+         * @brief Get output shape by index
+         * @param [in] index: output index
+         * @return shape of the output
+         */
+        std::vector <int64_t> GetOutputShape(size_t index) const;
+
+        /**
+         * @brief Get input buffer(host memory) by index
+         * @tparam T: data type
+         * @param [in] index: input index
+         * @return host address of the input
+         */
+        template<typename T>
+        T *GetInputBuffer(size_t index) {
+            if (index >= numInputs_) {
+                ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+                return nullptr;
+            }
+            return reinterpret_cast<T *>(hostInputs_[index]);
         }
-        return reinterpret_cast<T *>(hostInputs_[index]);
-    }
 
-    /**
-     * @brief Get output buffer(host memory) by index
-     * @tparam T: data type
-     * @param [in] index: output index
-     * @return host address of the output
-     */
-    template<typename T>
-    const T *GetOutputBuffer(size_t index)
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return nullptr;
+        /**
+         * @brief Get output buffer(host memory) by index
+         * @tparam T: data type
+         * @param [in] index: output index
+         * @return host address of the output
+         */
+        template<typename T>
+        const T *GetOutputBuffer(size_t index) {
+            if (index >= numOutputs_) {
+                ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+                return nullptr;
+            }
+
+            return reinterpret_cast<T *>(hostOutputs_[index]);
         }
 
-        return reinterpret_cast<T *>(hostOutputs_[index]);
-    }
-
-     /**
-      * @brief Print readable input by index
-      * @param [in] index: input index
-      * @param [in] numElementsPerRow: number of elements per row
-      */
-    void PrintInput(size_t index, size_t numElementsPerRow = 16);
-
-    /**
-      * @brief Print readable output by index
-      * @param [in] index: output index
-      * @param [in] numElementsPerRow: number of elements per row
-      */
-    void PrintOutput(size_t index, size_t numElementsPerRow = 16);
-
-    /**
-     * @brief Compile static op
-     * @return compile result
-     */
-    bool CompileStaticOp();
-
-    /**
-     * @brief Compile dynamic op
-     * @return compile result
-     */
-    bool CompileDynamicOp();
-
-    /**
-     * @brief Run op
-     * @return run result
-     */
-    bool RunOp();
-
-private:
-    size_t numInputs_;
-    size_t numOutputs_;
-
-    std::vector<aclDataBuffer *> inputBuffers_;
-    std::vector<aclDataBuffer *> outputBuffers_;
-
-    std::vector<void *> devInputs_;
-    std::vector<void *> devOutputs_;
-
-    std::vector<void *> hostInputs_;
-    std::vector<void *> hostOutputs_;
-
-    std::vector<aclTensor *> inputTensor_;
-    std::vector<aclTensor *> outputTensor_;
-    OperatorDesc *opDesc_;
-};
-
+        /**
+         * @brief Print readable input by index
+         * @param [in] index: input index
+         * @param [in] numElementsPerRow: number of elements per row
+         */
+        void PrintInput(size_t index, size_t numElementsPerRow = 16);
+
+        /**
+          * @brief Print readable output by index
+          * @param [in] index: output index
+          * @param [in] numElementsPerRow: number of elements per row
+          */
+        void PrintOutput(size_t index, size_t numElementsPerRow = 16);
+
+        /**
+         * @brief Compile static op
+         * @return compile result
+         */
+        bool CompileStaticOp();
+
+        /**
+         * @brief Compile dynamic op
+         * @return compile result
+         */
+        bool CompileDynamicOp();
+
+        /**
+         * @brief Run op
+         * @return run result
+         */
+        bool RunOp();
+
+    private:
+        size_t numInputs_;
+        size_t numOutputs_;
+
+        std::vector<aclDataBuffer *> inputBuffers_;
+        std::vector<aclDataBuffer *> outputBuffers_;
+
+        std::vector<void *> devInputs_;
+        std::vector<void *> devOutputs_;
+
+        std::vector<void *> hostInputs_;
+        std::vector<void *> hostOutputs_;
+
+        std::vector<aclTensor *> inputTensor_;
+        std::vector<aclTensor *> outputTensor_;
+        OperatorDesc *opDesc_;
+    };
+}
 #endif // OP_RUNNER_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
index 0c76260b..ddd3b3a9 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
@@ -21,46 +21,47 @@ See the License for the specific language governing permissions and
 
 #include "acl/acl.h"
 
-/**
- * Op description
- */
-struct OperatorDesc {
+namespace AclnnLazyAdam {
     /**
-     * Constructor
+     * Op description
      */
-    explicit OperatorDesc();
+    struct OperatorDesc {
+        /**
+         * Constructor
+         */
+        explicit OperatorDesc();
 
-    /**
-     * Destructor
-     */
-    virtual ~OperatorDesc();
+        /**
+         * Destructor
+         */
+        virtual ~OperatorDesc();
 
-    /**
-     * Add an input tensor description
-     * @param [in] dataType: data type
-     * @param [in] numDims: number of dims
-     * @param [in] dims: dims
-     * @param [in] format: format
-     * @return OperatorDesc
-     */
-    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
-
-    /**
-     * Add an output tensor description
-     * @param [in] dataType: data type
-     * @param [in] numDims: number of dims
-     * @param [in] dims: dims
-     * @param [in] format: format
-     * @return OperatorDesc
-     */
-    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+        /**
+         * Add an input tensor description
+         * @param [in] dataType: data type
+         * @param [in] numDims: number of dims
+         * @param [in] dims: dims
+         * @param [in] format: format
+         * @return OperatorDesc
+         */
+        OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
 
-    std::string opType;
-    std::vector<aclTensorDesc *> inputDesc;
-    std::vector<aclTensorDesc *> outputDesc;
-    double beta1;
-    double beta2;
-    double epsilon;
-};
+        /**
+         * Add an output tensor description
+         * @param [in] dataType: data type
+         * @param [in] numDims: number of dims
+         * @param [in] dims: dims
+         * @param [in] format: format
+         * @return OperatorDesc
+         */
+        OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
 
+        std::string opType;
+        std::vector<aclTensorDesc *> inputDesc;
+        std::vector<aclTensorDesc *> outputDesc;
+        double beta1;
+        double beta2;
+        double epsilon;
+    };
+}
 #endif // OPERATOR_DESC_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
index 1c295bfc..1f353b68 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
@@ -20,66 +20,63 @@ See the License for the specific language governing permissions and
 
 #include "common.h"
 
-extern bool g_isDevice;
+namespace AclnnLazyAdam {
+    bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize) {
+        struct stat sBuf;
+        int fileStatus = stat(filePath.data(), &sBuf);
+        if (fileStatus == -1) {
+            ERROR_LOG("failed to get file %s", filePath.c_str());
+            return false;
+        }
+        if (S_ISREG(sBuf.st_mode) == 0) {
+            ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+            return false;
+        }
 
-bool ReadFile(const std::string& filePath, size_t fileSize, void* buffer, size_t bufferSize)
-{
-    struct stat sBuf;
-    int fileStatus = stat(filePath.data(), &sBuf);
-    if (fileStatus == -1) {
-        ERROR_LOG("failed to get file %s", filePath.c_str());
-        return false;
-    }
-    if (S_ISREG(sBuf.st_mode) == 0) {
-        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
-        return false;
-    }
+        std::ifstream file;
+        file.open(filePath, std::ios::binary);
+        if (!file.is_open()) {
+            ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+            return false;
+        }
 
-    std::ifstream file;
-    file.open(filePath, std::ios::binary);
-    if (!file.is_open()) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    std::filebuf* buf = file.rdbuf();
-    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
-    if (size == 0) {
-        ERROR_LOG("file size is 0");
+        std::filebuf *buf = file.rdbuf();
+        size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+        if (size == 0) {
+            ERROR_LOG("file size is 0");
+            file.close();
+            return false;
+        }
+        if (size > bufferSize) {
+            ERROR_LOG("file size is larger than buffer size");
+            file.close();
+            return false;
+        }
+        buf->pubseekpos(0, std::ios::in);
+        buf->sgetn(static_cast<char *>(buffer), size);
+        fileSize = size;
         file.close();
-        return false;
+        return true;
     }
-    if (size > bufferSize) {
-        ERROR_LOG("file size is larger than buffer size");
-        file.close();
-        return false;
-    }
-    buf->pubseekpos(0, std::ios::in);
-    buf->sgetn(static_cast<char*>(buffer), size);
-    fileSize = size;
-    file.close();
-    return true;
-}
 
-bool WriteFile(const std::string& filePath, const void* buffer, size_t size)
-{
-    if (buffer == nullptr) {
-        ERROR_LOG("Write file failed. buffer is nullptr");
-        return false;
-    }
+    bool WriteFile(const std::string &filePath, const void *buffer, size_t size) {
+        if (buffer == nullptr) {
+            ERROR_LOG("Write file failed. buffer is nullptr");
+            return false;
+        }
+        int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+        if (fd < 0) {
+            ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+            return false;
+        }
 
-    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
-    if (fd < 0) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
+        auto writeSize = write(fd, buffer, size);
+        (void) close(fd);
+        if (writeSize != size) {
+            ERROR_LOG("Write file Failed.");
+            return false;
+        }
 
-    auto writeSize = write(fd, buffer, size);
-    (void) close(fd);
-    if (writeSize != size) {
-        ERROR_LOG("Write file Failed.");
-        return false;
+        return true;
     }
-
-    return true;
-}
+}
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
index f32efcaa..526da630 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
@@ -25,24 +25,28 @@ See the License for the specific language governing permissions and
 #include "common.h"
 #include "op_runner.h"
 
+using namespace AclnnLazyAdam;
+
 bool g_isDevice = false;
 int g_deviceId = 0;
-constexpr int DIM0 = 2000000;  // inputM inputV inputVar 的行数
-constexpr int DIM1 = 564096;  // indices长度
-constexpr int DIM2 = 32;  // inputM inputV inputVar gradient等每行的数据个数
-constexpr int INPUT_M_INDEX = 2;
-constexpr int INPUT_V_INDEX = 3;
-constexpr int INPUT_VAR_INDEX = 4;
-constexpr int LEARNING_RATE_INDEX = 5;
-constexpr int OUTPUT_M_INDEX = 0;
-constexpr int OUTPUT_V_INDEX = 1;
-constexpr int OUTPUT_VAR_INDEX = 2;
-constexpr float LEARNING_RATE = 0.001;
-constexpr float BETA1 = 0.9;
-constexpr float BETA2 = 0.999;
-constexpr float EPSILON = 1e-7;
-const char* READ_ERROR_INFO = "read input file error, please check whether file exist and access rights is correct";
-const char* WRITE_ERROR_INFO = "write output file error, please check access rights is correct";
+namespace {
+    constexpr int DIM0 = 2000000;  // inputM inputV inputVar 的行数
+    constexpr int DIM1 = 564096;  // indices长度
+    constexpr int DIM2 = 32;  // inputM inputV inputVar gradient等每行的数据个数
+    constexpr int INPUT_M_INDEX = 2;
+    constexpr int INPUT_V_INDEX = 3;
+    constexpr int INPUT_VAR_INDEX = 4;
+    constexpr int LEARNING_RATE_INDEX = 5;
+    constexpr int OUTPUT_M_INDEX = 0;
+    constexpr int OUTPUT_V_INDEX = 1;
+    constexpr int OUTPUT_VAR_INDEX = 2;
+    constexpr float LEARNING_RATE = 0.001;
+    constexpr float BETA1 = 0.9;
+    constexpr float BETA2 = 0.999;
+    constexpr float EPSILON = 1e-7;
+    const char* READ_ERROR_INFO = "read input file error, please check whether file exist and access rights is correct";
+    const char* WRITE_ERROR_INFO = "write output file error, please check access rights is correct";
+}
 
 OperatorDesc CreateOpDesc()
 {
@@ -202,7 +206,6 @@ bool RunOp()
         ERROR_LOG("Process output data failed");
         return false;
     }
-
     INFO_LOG("Run op success");
     return true;
 }
@@ -219,8 +222,6 @@ int main(int argc, char** argv)
         DestroyResource();
         return FAILED;
     }
-
     DestroyResource();
-
     return SUCCESS;
 }
-- 
Gitee


From a1f85f8ff7cade87aab728915de571fbd76ebf17 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 30 Apr 2024 16:21:10 +0800
Subject: [PATCH 082/302] =?UTF-8?q?=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B92?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h    | 6 ++++--
 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
index cfb6a1b7..6f91f905 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
@@ -116,7 +116,8 @@ namespace AclnnLazyAdam {
          * @return host address of the input
          */
         template<typename T>
-        T *GetInputBuffer(size_t index) {
+        T *GetInputBuffer(size_t index)
+        {
             if (index >= numInputs_) {
                 ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
                 return nullptr;
@@ -131,7 +132,8 @@ namespace AclnnLazyAdam {
          * @return host address of the output
          */
         template<typename T>
-        const T *GetOutputBuffer(size_t index) {
+        const T *GetOutputBuffer(size_t index)
+        {
             if (index >= numOutputs_) {
                 ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
                 return nullptr;
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
index 1f353b68..e2cd6865 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
@@ -21,7 +21,8 @@ See the License for the specific language governing permissions and
 #include "common.h"
 
 namespace AclnnLazyAdam {
-    bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize) {
+    bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize)
+    {
         struct stat sBuf;
         int fileStatus = stat(filePath.data(), &sBuf);
         if (fileStatus == -1) {
@@ -59,7 +60,8 @@ namespace AclnnLazyAdam {
         return true;
     }
 
-    bool WriteFile(const std::string &filePath, const void *buffer, size_t size) {
+    bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+    {
         if (buffer == nullptr) {
             ERROR_LOG("Write file failed. buffer is nullptr");
             return false;
-- 
Gitee


From fb0eacdee9bd361babe62083f91b1175abddb7b1 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 30 Apr 2024 16:30:45 +0800
Subject: [PATCH 083/302] =?UTF-8?q?=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B93?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/src/main.cpp         | 289 +++++++++---------
 1 file changed, 145 insertions(+), 144 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
index 526da630..c4253996 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
@@ -46,168 +46,169 @@ namespace {
     constexpr float EPSILON = 1e-7;
     const char* READ_ERROR_INFO = "read input file error, please check whether file exist and access rights is correct";
     const char* WRITE_ERROR_INFO = "write output file error, please check access rights is correct";
-}
-
-OperatorDesc CreateOpDesc()
-{
-    std::vector <int64_t> indicesShape{DIM1, 1};
-    std::vector <int64_t> gradientShape{DIM1, DIM2};
-    std::vector <int64_t> inputMShape{DIM0, DIM2};  // inputM inputV inputVar 的shape相同
-    std::vector <int64_t> learningRateShape{1};
-    aclDataType dataType = ACL_FLOAT;
-    aclDataType indexDataType = ACL_INT32;
-    aclFormat format = ACL_FORMAT_ND;
-    OperatorDesc opDesc;
-    opDesc.AddInputTensorDesc(dataType, gradientShape.size(), gradientShape.data(), format);
-    opDesc.AddInputTensorDesc(indexDataType, indicesShape.size(), indicesShape.data(), format);
-    opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputM
-    opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputV
-    opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputVar
-    opDesc.AddInputTensorDesc(dataType, learningRateShape.size(), learningRateShape.data(), format);  // learningRate
-    opDesc.beta1 = BETA1;
-    opDesc.beta2 = BETA2;
-    opDesc.epsilon = EPSILON;
-    return opDesc;
-}
 
-bool SetInputData(OpRunner& runner)
-{
-    size_t fileSize = 0;
-    if (!ReadFile("../input/gradient.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0))) {
-        throw std::runtime_error(READ_ERROR_INFO);
-    }
-    if (!ReadFile("../input/indices.bin", fileSize, runner.GetInputBuffer<void>(1), runner.GetInputSize(1))) {
-        throw std::runtime_error(READ_ERROR_INFO);
-    }
-    if (!ReadFile("../input/inputM.bin", fileSize, runner.GetInputBuffer<void>(INPUT_M_INDEX),
-                  runner.GetInputSize(INPUT_M_INDEX))) {
-        throw std::runtime_error(READ_ERROR_INFO);
-    }
-    if (!ReadFile("../input/inputV.bin", fileSize, runner.GetInputBuffer<void>(INPUT_V_INDEX),
-                  runner.GetInputSize(INPUT_V_INDEX))) {
-        throw std::runtime_error(READ_ERROR_INFO);
-    }
-    if (!ReadFile("../input/inputVar.bin", fileSize, runner.GetInputBuffer<void>(INPUT_VAR_INDEX),
-                  runner.GetInputSize(INPUT_VAR_INDEX))) {
-        throw std::runtime_error(READ_ERROR_INFO);
-    }
-    if (!ReadFile("../input/learningRate.bin", fileSize, runner.GetInputBuffer<void>(LEARNING_RATE_INDEX),
-                  runner.GetInputSize(LEARNING_RATE_INDEX))) {
-        throw std::runtime_error(READ_ERROR_INFO);
-    }
-    INFO_LOG("Set input success");
-    return true;
-}
-
-bool ProcessOutputData(OpRunner& runner)
-{
-    // 保存输出数据 由于输出仅有hostOutputs_数据，未设置outputDesc，因此数据size从inputTensor获取
-    if (!WriteFile("../output/outputM.bin", runner.GetOutputBuffer<void>(OUTPUT_M_INDEX),
-                   runner.GetInputSize(INPUT_M_INDEX))) {
-        throw std::runtime_error(WRITE_ERROR_INFO);
-    }
-    if (!WriteFile("../output/outputV.bin", runner.GetOutputBuffer<void>(OUTPUT_V_INDEX),
-                   runner.GetInputSize(INPUT_V_INDEX))) {
-        throw std::runtime_error(WRITE_ERROR_INFO);
-    }
-    if (!WriteFile("../output/outputVar.bin", runner.GetOutputBuffer<void>(OUTPUT_VAR_INDEX),
-                   runner.GetInputSize(INPUT_VAR_INDEX))) {
-        throw std::runtime_error(WRITE_ERROR_INFO);
+    OperatorDesc CreateOpDesc()
+    {
+        std::vector <int64_t> indicesShape{DIM1, 1};
+        std::vector <int64_t> gradientShape{DIM1, DIM2};
+        std::vector <int64_t> inputMShape{DIM0, DIM2};  // inputM inputV inputVar 的shape相同
+        std::vector <int64_t> learningRateShape{1};
+        aclDataType dataType = ACL_FLOAT;
+        aclDataType indexDataType = ACL_INT32;
+        aclFormat format = ACL_FORMAT_ND;
+        OperatorDesc opDesc;
+        opDesc.AddInputTensorDesc(dataType, gradientShape.size(), gradientShape.data(), format);
+        opDesc.AddInputTensorDesc(indexDataType, indicesShape.size(), indicesShape.data(), format);
+        opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputM
+        opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputV
+        opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputVar
+        opDesc.AddInputTensorDesc(dataType, learningRateShape.size(), learningRateShape.data(),
+                                  format);  // learningRate
+        opDesc.beta1 = BETA1;
+        opDesc.beta2 = BETA2;
+        opDesc.epsilon = EPSILON;
+        return opDesc;
+    }
+
+    bool SetInputData(OpRunner& runner)
+    {
+        size_t fileSize = 0;
+        if (!ReadFile("../input/gradient.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/indices.bin", fileSize, runner.GetInputBuffer<void>(1), runner.GetInputSize(1))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/inputM.bin", fileSize, runner.GetInputBuffer<void>(INPUT_M_INDEX),
+                      runner.GetInputSize(INPUT_M_INDEX))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/inputV.bin", fileSize, runner.GetInputBuffer<void>(INPUT_V_INDEX),
+                      runner.GetInputSize(INPUT_V_INDEX))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/inputVar.bin", fileSize, runner.GetInputBuffer<void>(INPUT_VAR_INDEX),
+                      runner.GetInputSize(INPUT_VAR_INDEX))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/learningRate.bin", fileSize, runner.GetInputBuffer<void>(LEARNING_RATE_INDEX),
+                      runner.GetInputSize(LEARNING_RATE_INDEX))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        INFO_LOG("Set input success");
+        return true;
     }
-    INFO_LOG("Write output success");
-    return true;
-}
 
-void DestroyResource()
-{
-    bool flag = false;
-    if (aclrtResetDevice(g_deviceId) != ACL_SUCCESS) {
-        ERROR_LOG("Reset device %d failed", g_deviceId);
-        flag = true;
-    }
-    INFO_LOG("Reset Device success");
-    if (aclFinalize() != ACL_SUCCESS) {
-        ERROR_LOG("Finalize acl failed");
-        flag = true;
-    }
-    if (flag) {
-        ERROR_LOG("Destroy resource failed");
-    } else {
-        INFO_LOG("Destroy resource success");
+    bool ProcessOutputData(OpRunner& runner)
+    {
+        // 保存输出数据 由于输出仅有hostOutputs_数据，未设置outputDesc，因此数据size从inputTensor获取
+        if (!WriteFile("../output/outputM.bin", runner.GetOutputBuffer<void>(OUTPUT_M_INDEX),
+                       runner.GetInputSize(INPUT_M_INDEX))) {
+            throw std::runtime_error(WRITE_ERROR_INFO);
+        }
+        if (!WriteFile("../output/outputV.bin", runner.GetOutputBuffer<void>(OUTPUT_V_INDEX),
+                       runner.GetInputSize(INPUT_V_INDEX))) {
+            throw std::runtime_error(WRITE_ERROR_INFO);
+        }
+        if (!WriteFile("../output/outputVar.bin", runner.GetOutputBuffer<void>(OUTPUT_VAR_INDEX),
+                       runner.GetInputSize(INPUT_VAR_INDEX))) {
+            throw std::runtime_error(WRITE_ERROR_INFO);
+        }
+        INFO_LOG("Write output success");
+        return true;
     }
-}
 
-bool InitResource()
-{
-    std::string output = "../output";
-    if (access(output.c_str(), 0) == -1) {
-        int ret = mkdir(output.c_str(), 0700);
-        if (ret == 0) {
-            INFO_LOG("Make output directory successfully");
+    void DestroyResource()
+    {
+        bool flag = false;
+        if (aclrtResetDevice(g_deviceId) != ACL_SUCCESS) {
+            ERROR_LOG("Reset device %d failed", g_deviceId);
+            flag = true;
+        }
+        INFO_LOG("Reset Device success");
+        if (aclFinalize() != ACL_SUCCESS) {
+            ERROR_LOG("Finalize acl failed");
+            flag = true;
+        }
+        if (flag) {
+            ERROR_LOG("Destroy resource failed");
         } else {
-            ERROR_LOG("Make output directory fail");
-            return false;
+            INFO_LOG("Destroy resource success");
         }
     }
 
-    // acl.json is dump or profiling config file
-    if (aclInit(NULL) != ACL_SUCCESS) {
-        ERROR_LOG("acl init failed");
-        return false;
-    }
+    bool InitResource()
+    {
+        std::string output = "../output";
+        if (access(output.c_str(), 0) == -1) {
+            int ret = mkdir(output.c_str(), 0700);
+            if (ret == 0) {
+                INFO_LOG("Make output directory successfully");
+            } else {
+                ERROR_LOG("Make output directory fail");
+                return false;
+            }
+        }
 
-    if (aclrtSetDevice(g_deviceId) != ACL_SUCCESS) {
-        ERROR_LOG("Set device failed. g_deviceId is %d", g_deviceId);
-        (void) aclFinalize();
-        return false;
-    }
-    INFO_LOG("Set device[%d] success", g_deviceId);
+        // acl.json is dump or profiling config file
+        if (aclInit(NULL) != ACL_SUCCESS) {
+            ERROR_LOG("acl init failed");
+            return false;
+        }
 
-    // runMode is ACL_HOST which represents app is running in host
-    // runMode is ACL_DEVICE which represents app is running in device
-    aclrtRunMode runMode;
-    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
-        ERROR_LOG("Get run mode failed");
-        DestroyResource();
-        return false;
+        if (aclrtSetDevice(g_deviceId) != ACL_SUCCESS) {
+            ERROR_LOG("Set device failed. g_deviceId is %d", g_deviceId);
+            (void) aclFinalize();
+            return false;
+        }
+        INFO_LOG("Set device[%d] success", g_deviceId);
+
+        // runMode is ACL_HOST which represents app is running in host
+        // runMode is ACL_DEVICE which represents app is running in device
+        aclrtRunMode runMode;
+        if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
+            ERROR_LOG("Get run mode failed");
+            DestroyResource();
+            return false;
+        }
+        g_isDevice = (runMode == ACL_DEVICE);
+        INFO_LOG("Get RunMode[%d] success", runMode);
+
+        return true;
     }
-    g_isDevice = (runMode == ACL_DEVICE);
-    INFO_LOG("Get RunMode[%d] success", runMode);
 
-    return true;
-}
+    bool RunOp()
+    {
+        // create op desc
+        OperatorDesc opDesc = CreateOpDesc();
 
-bool RunOp()
-{
-    // create op desc
-    OperatorDesc opDesc = CreateOpDesc();
-
-    // create Runner
-    OpRunner opRunner(&opDesc);
-    if (!opRunner.Init()) {
-        ERROR_LOG("Init OpRunner failed");
-        return false;
-    }
+        // create Runner
+        OpRunner opRunner(&opDesc);
+        if (!opRunner.Init()) {
+            ERROR_LOG("Init OpRunner failed");
+            return false;
+        }
 
-    // Load inputs
-    if (!SetInputData(opRunner)) {
-        ERROR_LOG("Set input data failed");
-        return false;
-    }
+        // Load inputs
+        if (!SetInputData(opRunner)) {
+            ERROR_LOG("Set input data failed");
+            return false;
+        }
 
-    // Run op
-    if (!opRunner.RunOp()) {
-        ERROR_LOG("Run op failed");
-        return false;
-    }
+        // Run op
+        if (!opRunner.RunOp()) {
+            ERROR_LOG("Run op failed");
+            return false;
+        }
 
-    // process output data
-    if (!ProcessOutputData(opRunner)) {
-        ERROR_LOG("Process output data failed");
-        return false;
+        // process output data
+        if (!ProcessOutputData(opRunner)) {
+            ERROR_LOG("Process output data failed");
+            return false;
+        }
+        INFO_LOG("Run op success");
+        return true;
     }
-    INFO_LOG("Run op success");
-    return true;
 }
 
 int main(int argc, char** argv)
-- 
Gitee


From f9500b2688d13b7ef428fc1b2ffb5bce53a3e39a Mon Sep 17 00:00:00 2001
From: sihaixianyu <sihaixianyu@qq.com>
Date: Mon, 6 May 2024 01:37:03 +0000
Subject: [PATCH 084/302] =?UTF-8?q?!114=20CleanCode=E6=B8=85=E7=90=86=20*?=
 =?UTF-8?q?=20CleanCode=E6=B8=85=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/graph/slicers.py | 440 ++++++++++++++++++++--------------------
 1 file changed, 220 insertions(+), 220 deletions(-)

diff --git a/mx_rec/graph/slicers.py b/mx_rec/graph/slicers.py
index 3204af4e..3999cdd4 100644
--- a/mx_rec/graph/slicers.py
+++ b/mx_rec/graph/slicers.py
@@ -69,6 +69,226 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
     def slice(self) -> None:
         pass
 
+    @staticmethod
+    def _find_min_dep_ops(
+        tgt_ops: Set[Operation],
+    ) -> Set[Operation]:
+        logger.debug("Search from base nodes: %s.", tgt_ops)
+        base_ops = tgt_ops.copy()
+        visited_ops = base_ops
+
+        loop_cnt = 0
+        while base_ops:
+            loop_cnt += 1
+            if loop_cnt > MAX_WHILE_SIZE:
+                raise RuntimeError(f"maximum loop times exceed limit: {MAX_WHILE_SIZE}.")
+
+            parent_ops = set()
+            for base_node in base_ops:
+                if len(base_node.control_inputs) != 0:
+                    raise ValueError("control dependencies are not supported.")
+
+                parent_ops.update(
+                    tensor_in.op
+                    for tensor_in in base_node.inputs
+                    if tensor_in.op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value
+                )
+
+            new_ops = parent_ops - visited_ops
+            base_ops = parent_ops
+            visited_ops.update(new_ops)
+
+        logger.debug("Found minimum dependency graph nodes: %s.", visited_ops)
+        return visited_ops
+
+    @staticmethod
+    def _validate_op(op: Operation) -> bool:
+        op_type = op.type
+        op_name = op.name
+        op_inputs = op.inputs
+        op_outputs = op.outputs
+
+        for s in NoGradSubgraphSlicer._INVALID_STR_IN_OP_TYPE:
+            if s in op_type:
+                logger.warning("Invalid operation type: %s which contains str: %s.", op_type, s)
+                return False
+        for s in NoGradSubgraphSlicer._INVALID_STR_IN_OP_NAME:
+            if s in op_name:
+                logger.warning("Invalid operation name: %s which contains str: %s.", op_name, s)
+                return False
+        for t in op_inputs:
+            if t.dtype in NoGradSubgraphSlicer._INVALID_TENSOR_DTYPE:
+                logger.warning("Invalid operation input tensor of operation: %s whose type is %s.", t, t.dtype)
+                return False
+        for t in op_outputs:
+            if t.dtype in NoGradSubgraphSlicer._INVALID_TENSOR_DTYPE:
+                logger.warning("Invalid operation output tensor of operation: %s whose type is %s.", t, t.dtype)
+                return False
+
+        return True
+
+    @staticmethod
+    def _update_subgraph_in(
+        base_ops: Operation,
+        input_to_edge_ops: Dict[Operation, Set[Operation]],
+        sub_graph_ops: Set[Operation],
+    ) -> None:
+        for input_tensor in base_ops.inputs:
+            input_node = input_tensor.op
+            if input_node not in sub_graph_ops:
+                res = input_to_edge_ops.get(input_node, set())
+                res.add(base_ops)
+                input_to_edge_ops[input_node] = res
+
+    @staticmethod
+    def _update_subgraph_out(
+        base_ops: Operation,
+        out_op_to_edge_ops: Dict[Operation, Set[Operation]],
+        sub_graph_ops: Set[Operation],
+    ) -> None:
+        for output_tensor in base_ops.outputs:
+            for output_consumer in output_tensor.consumers():
+                if output_consumer not in sub_graph_ops:
+                    res = out_op_to_edge_ops.get(output_consumer, set())
+                    res.add(base_ops)
+                    out_op_to_edge_ops[output_consumer] = res
+
+    @staticmethod
+    def _upward_bfs_op(base_ops: Union[Operation, Set[Operation], List[Operation]], tgt_op_type: str) -> Operation:
+        if not isinstance(base_ops, (set, list)):
+            base_ops = [base_ops]
+
+        parent_ops = base_ops
+        while True:
+            for parent_op in parent_ops:
+                if parent_op.type == tgt_op_type:
+                    return parent_op
+            base_ops = parent_ops
+            parent_ops = []
+            for base_op in base_ops:
+                parent_ops.extend(utils.find_parent_op(base_op))
+            if not parent_ops:
+                raise ValueError(f"target operation '{tgt_op_type}'' was not found.")
+
+    @staticmethod
+    def _topo_sort_sliced_ops(sliced_ops: Set[Operation]) -> List[Operation]:
+        topo_subgraph_list = []
+        topo_subgraph_set = set()
+        start_nodes = set()
+        [start_nodes.add(x) for x in sliced_ops]
+        logger.info("Got topo_subgraph start nodes: %s", start_nodes)
+
+        def topo_sort_helper(curr_op, output_list, output_set):
+            if not isinstance(curr_op, Operation):
+                raise RuntimeError(f"topo_subgraph_dfs input should be node(aka. tf.Operator). {curr_op}")
+            curr_inputs = curr_op.inputs
+            logger.debug("Got topo_dfs: %s <- %s", curr_op.name, [x.name for x in curr_inputs])
+            current_control_inputs = curr_op.control_inputs
+            if len(current_control_inputs) > 0:
+                raise RuntimeError(
+                    f"control input are not supported: {curr_op.name}, control_inputs: {current_control_inputs}"
+                )
+            if curr_op in output_set:
+                return
+            output_set.add(curr_op)
+            for tensor in curr_inputs:
+                node = tensor.op
+                if node.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value and node not in output_set:
+                    topo_sort_helper(node, output_list, output_set)
+            output_list.append(curr_op)
+
+        [topo_sort_helper(x, topo_subgraph_list, topo_subgraph_set) for x in start_nodes]
+        if len(topo_subgraph_list) != len(topo_subgraph_set):
+            raise RuntimeError(f"got duplicated topo node: {sorted(topo_subgraph_list, key=lambda x: x.name)}.")
+        logger.info("Got topo_subgraph: %s", topo_subgraph_list)
+        return topo_subgraph_list
+
+    @staticmethod
+    def _get_mapping_for_subgraph_in(
+        from_op: Operation,
+        to_ops: Set[Operation],
+        tensor_mapping: Union[Dict[Tensor, Tensor], Dict[SparseTensor, SparseTensor]],
+    ) -> None:
+        if from_op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
+            raise RuntimeError(f"expect IteratorGetNext for input tensor of subgraph, but got {from_op}")
+        for node in to_ops:
+            for each_tensor in node.inputs:
+                if each_tensor.op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
+                    continue
+                old_tensor_name = each_tensor.name
+                x_index = int(old_tensor_name.split(":")[-1])
+                g = tf.compat.v1.get_default_graph()
+                arg_tensor = g.get_tensor_by_name("args_%d:0" % x_index)
+                tensor_mapping[each_tensor] = arg_tensor
+
+    @staticmethod
+    def _get_mapping_for_subgraph(
+        old_op: Operation,
+        node_mapping: Dict[Operation, Operation],
+        tensor_mapping: Dict[Tensor, Tensor],
+    ) -> None:
+        logger.debug("old operation name: %s\nold operation inputs: %s\n", old_op.name, [x for x in old_op.inputs])
+
+        for each_tensor in old_op.inputs:
+            if each_tensor not in tensor_mapping:
+                raise RuntimeError(
+                    f"each_tensor(input) {each_tensor} need by {old_op.name} not in tensor_mapping.{tensor_mapping}"
+                )
+        new_inputs = NoGradSubgraphSlicer._get_mapped_tensor(tensor_mapping, old_op.inputs)
+
+        node_def = old_op.node_def
+        node_def.name = "{}/{}".format(NoGradSubgraphSlicer._SLICED_OP_NAME_PREFIX, node_def.name)
+        new_node = tf.Operation(node_def=node_def, g=tf.compat.v1.get_default_graph(), inputs=new_inputs)
+
+        node_mapping[old_op] = new_node
+        for old_out_tensor, new_out_tensor in zip(old_op.outputs, new_node.outputs):
+            tensor_mapping[old_out_tensor] = new_out_tensor
+
+    @staticmethod
+    def _get_mapped_tensor(tensor2tensor: Dict[Tensor, Tensor], keys: List[Tensor]) -> List[Tensor]:
+        tensors = []
+        for k in keys:
+            if k not in tensor2tensor:
+                raise KeyError(f"failed to find key tensor: {k} from tensor map: {tensor2tensor}.")
+            tensors.append(tensor2tensor[k])
+        return tensors
+
+    @staticmethod
+    def _sort_sliced_graph_outputs(subgraph_out: Dict[Operation, Set[Operation]]) -> List[Tensor]:
+        extra_outputs = []
+        sorted_outputs = sorted(subgraph_out.items(), key=lambda x: x[0].name)
+        for outside_op, edge_ops in sorted_outputs:
+            outside_op_inputs = set(outside_op.inputs)
+            for edge_op in edge_ops:
+                NoGradSubgraphSlicer._add_sorted_additional_tensors(extra_outputs, outside_op_inputs, edge_op)
+        return extra_outputs
+
+    @staticmethod
+    def _add_sorted_additional_tensors(extra_outputs, outside_op_inputs, edge_op) -> None:
+        for each_tensor in sorted(edge_op.outputs, key=lambda x: x.name):
+            if each_tensor not in outside_op_inputs:
+                continue
+            if each_tensor in extra_outputs:
+                continue
+            extra_outputs.append(each_tensor)
+
+    @staticmethod
+    def _get_tensor_consumers(tensor: Tensor) -> List[Operation]:
+        if not isinstance(tensor, NoGradSubgraphSlicer._VALID_TENSOR_CLASS):
+            raise RuntimeError(f"expected 'tf.Tensor' or 'tf.SparseTensor', but got: {tensor}")
+
+        graph = tensor.graph
+        consumers = []
+        consumer_names = [op.name for op in tensor.consumers()]
+
+        with graph._lock:
+            for name in consumer_names:
+                if name not in graph._nodes_by_name:  # ignore deleted node
+                    continue
+                consumers.append(graph._nodes_by_name[name])
+
+        return consumers
+
     def _slice_ops(self, sliceable_ops: Set[Operation], is_training: bool) -> None:
         """Slice the minimum dependency graph of given operation set.
 
@@ -474,226 +694,6 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
                     new_tensor,
                 )
 
-    @staticmethod
-    def _find_min_dep_ops(
-        tgt_ops: Set[Operation],
-    ) -> Set[Operation]:
-        logger.debug("Search from base nodes: %s.", tgt_ops)
-        base_ops = tgt_ops.copy()
-        visited_ops = base_ops
-
-        loop_cnt = 0
-        while base_ops:
-            loop_cnt += 1
-            if loop_cnt > MAX_WHILE_SIZE:
-                raise RuntimeError(f"maximum loop times exceed limit: {MAX_WHILE_SIZE}.")
-
-            parent_ops = set()
-            for base_node in base_ops:
-                if len(base_node.control_inputs) != 0:
-                    raise ValueError("control dependencies are not supported.")
-
-                parent_ops.update(
-                    tensor_in.op
-                    for tensor_in in base_node.inputs
-                    if tensor_in.op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value
-                )
-
-            new_ops = parent_ops - visited_ops
-            base_ops = parent_ops
-            visited_ops.update(new_ops)
-
-        logger.debug("Found minimum dependency graph nodes: %s.", visited_ops)
-        return visited_ops
-
-    @staticmethod
-    def _validate_op(op: Operation) -> bool:
-        op_type = op.type
-        op_name = op.name
-        op_inputs = op.inputs
-        op_outputs = op.outputs
-
-        for s in NoGradSubgraphSlicer._INVALID_STR_IN_OP_TYPE:
-            if s in op_type:
-                logger.warning("Invalid operation type: %s which contains str: %s.", op_type, s)
-                return False
-        for s in NoGradSubgraphSlicer._INVALID_STR_IN_OP_NAME:
-            if s in op_name:
-                logger.warning("Invalid operation name: %s which contains str: %s.", op_name, s)
-                return False
-        for t in op_inputs:
-            if t.dtype in NoGradSubgraphSlicer._INVALID_TENSOR_DTYPE:
-                logger.warning("Invalid operation input tensor of operation: %s whose type is %s.", t, t.dtype)
-                return False
-        for t in op_outputs:
-            if t.dtype in NoGradSubgraphSlicer._INVALID_TENSOR_DTYPE:
-                logger.warning("Invalid operation output tensor of operation: %s whose type is %s.", t, t.dtype)
-                return False
-
-        return True
-
-    @staticmethod
-    def _update_subgraph_in(
-        base_ops: Operation,
-        input_to_edge_ops: Dict[Operation, Set[Operation]],
-        sub_graph_ops: Set[Operation],
-    ) -> None:
-        for input_tensor in base_ops.inputs:
-            input_node = input_tensor.op
-            if input_node not in sub_graph_ops:
-                res = input_to_edge_ops.get(input_node, set())
-                res.add(base_ops)
-                input_to_edge_ops[input_node] = res
-
-    @staticmethod
-    def _update_subgraph_out(
-        base_ops: Operation,
-        out_op_to_edge_ops: Dict[Operation, Set[Operation]],
-        sub_graph_ops: Set[Operation],
-    ) -> None:
-        for output_tensor in base_ops.outputs:
-            for output_consumer in output_tensor.consumers():
-                if output_consumer not in sub_graph_ops:
-                    res = out_op_to_edge_ops.get(output_consumer, set())
-                    res.add(base_ops)
-                    out_op_to_edge_ops[output_consumer] = res
-
-    @staticmethod
-    def _upward_bfs_op(base_ops: Union[Operation, Set[Operation], List[Operation]], tgt_op_type: str) -> Operation:
-        if not isinstance(base_ops, (set, list)):
-            base_ops = [base_ops]
-
-        parent_ops = base_ops
-        while True:
-            for parent_op in parent_ops:
-                if parent_op.type == tgt_op_type:
-                    return parent_op
-            base_ops = parent_ops
-            parent_ops = []
-            for base_op in base_ops:
-                parent_ops.extend(utils.find_parent_op(base_op))
-            if not parent_ops:
-                raise ValueError(f"target operation '{tgt_op_type}'' was not found.")
-
-    @staticmethod
-    def _topo_sort_sliced_ops(sliced_ops: Set[Operation]) -> List[Operation]:
-        topo_subgraph_list = []
-        topo_subgraph_set = set()
-        start_nodes = set()
-        [start_nodes.add(x) for x in sliced_ops]
-        logger.info("Got topo_subgraph start nodes: %s", start_nodes)
-
-        def topo_sort_helper(curr_op, output_list, output_set):
-            if not isinstance(curr_op, Operation):
-                raise RuntimeError(f"topo_subgraph_dfs input should be node(aka. tf.Operator). {curr_op}")
-            curr_inputs = curr_op.inputs
-            logger.debug("Got topo_dfs: %s <- %s", curr_op.name, [x.name for x in curr_inputs])
-            current_control_inputs = curr_op.control_inputs
-            if len(current_control_inputs) > 0:
-                raise RuntimeError(
-                    f"control input are not supported: {curr_op.name}, control_inputs: {current_control_inputs}"
-                )
-            if curr_op in output_set:
-                return
-            output_set.add(curr_op)
-            for tensor in curr_inputs:
-                node = tensor.op
-                if node.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value and node not in output_set:
-                    topo_sort_helper(node, output_list, output_set)
-            output_list.append(curr_op)
-
-        [topo_sort_helper(x, topo_subgraph_list, topo_subgraph_set) for x in start_nodes]
-        if len(topo_subgraph_list) != len(topo_subgraph_set):
-            raise RuntimeError(f"got duplicated topo node: {sorted(topo_subgraph_list, key=lambda x: x.name)}.")
-        logger.info("Got topo_subgraph: %s", topo_subgraph_list)
-        return topo_subgraph_list
-
-    @staticmethod
-    def _get_mapping_for_subgraph_in(
-        from_op: Operation,
-        to_ops: Set[Operation],
-        tensor_mapping: Union[Dict[Tensor, Tensor], Dict[SparseTensor, SparseTensor]],
-    ) -> None:
-        if from_op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-            raise RuntimeError(f"expect IteratorGetNext for input tensor of subgraph, but got {from_op}")
-        for node in to_ops:
-            for each_tensor in node.inputs:
-                if each_tensor.op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-                    continue
-                old_tensor_name = each_tensor.name
-                x_index = int(old_tensor_name.split(":")[-1])
-                g = tf.compat.v1.get_default_graph()
-                arg_tensor = g.get_tensor_by_name("args_%d:0" % x_index)
-                tensor_mapping[each_tensor] = arg_tensor
-
-    @staticmethod
-    def _get_mapping_for_subgraph(
-        old_op: Operation,
-        node_mapping: Dict[Operation, Operation],
-        tensor_mapping: Dict[Tensor, Tensor],
-    ) -> None:
-        logger.debug("old operation name: %s\nold operation inputs: %s\n", old_op.name, [x for x in old_op.inputs])
-
-        for each_tensor in old_op.inputs:
-            if each_tensor not in tensor_mapping:
-                raise RuntimeError(
-                    f"each_tensor(input) {each_tensor} need by {old_op.name} not in tensor_mapping.{tensor_mapping}"
-                )
-        new_inputs = NoGradSubgraphSlicer._get_mapped_tensor(tensor_mapping, old_op.inputs)
-
-        node_def = old_op.node_def
-        node_def.name = "{}/{}".format(NoGradSubgraphSlicer._SLICED_OP_NAME_PREFIX, node_def.name)
-        new_node = tf.Operation(node_def=node_def, g=tf.compat.v1.get_default_graph(), inputs=new_inputs)
-
-        node_mapping[old_op] = new_node
-        for old_out_tensor, new_out_tensor in zip(old_op.outputs, new_node.outputs):
-            tensor_mapping[old_out_tensor] = new_out_tensor
-
-    @staticmethod
-    def _get_mapped_tensor(tensor2tensor: Dict[Tensor, Tensor], keys: List[Tensor]) -> List[Tensor]:
-        tensors = []
-        for k in keys:
-            if k not in tensor2tensor:
-                raise KeyError(f"failed to find key tensor: {k} from tensor map: {tensor2tensor}.")
-            tensors.append(tensor2tensor[k])
-        return tensors
-
-    @staticmethod
-    def _sort_sliced_graph_outputs(subgraph_out: Dict[Operation, Set[Operation]]) -> List[Tensor]:
-        extra_outputs = []
-        sorted_outputs = sorted(subgraph_out.items(), key=lambda x: x[0].name)
-        for outside_op, edge_ops in sorted_outputs:
-            outside_op_inputs = set(outside_op.inputs)
-            for edge_op in edge_ops:
-                NoGradSubgraphSlicer._add_sorted_additional_tensors(extra_outputs, outside_op_inputs, edge_op)
-        return extra_outputs
-
-    @staticmethod
-    def _add_sorted_additional_tensors(extra_outputs, outside_op_inputs, edge_op) -> None:
-        for each_tensor in sorted(edge_op.outputs, key=lambda x: x.name):
-            if each_tensor not in outside_op_inputs:
-                continue
-            if each_tensor in extra_outputs:
-                continue
-            extra_outputs.append(each_tensor)
-
-    @staticmethod
-    def _get_tensor_consumers(tensor: Tensor) -> List[Operation]:
-        if not isinstance(tensor, NoGradSubgraphSlicer._VALID_TENSOR_CLASS):
-            raise RuntimeError(f"expected 'tf.Tensor' or 'tf.SparseTensor', but got: {tensor}")
-
-        graph = tensor.graph
-        consumers = []
-        consumer_names = [op.name for op in tensor.consumers()]
-
-        with graph._lock:
-            for name in consumer_names:
-                if name not in graph._nodes_by_name:  # ignore deleted node
-                    continue
-                consumers.append(graph._nodes_by_name[name])
-
-        return consumers
-
 
 @para_checker_decorator(
     check_option_list=[
-- 
Gitee


From 9266d4b823e83a591ce76b31c5bef9bec6aed329 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 6 May 2024 11:23:24 +0800
Subject: [PATCH 085/302] reset key_process

---
 src/core/key_process/key_process.cpp | 396 +++++++++++++--------------
 src/core/key_process/key_process.h   | 372 ++++++++++++-------------
 2 files changed, 384 insertions(+), 384 deletions(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 7ba9106d..b2dfab04 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -15,21 +15,19 @@ See the License for the specific language governing permissions and
 
 #include "key_process.h"
 
-#include <mpi.h>
-
 #include <cstddef>
 #include <iostream>
-
-#include "emb_table/embedding_mgmt.h"
-#include "hd_transfer/hd_transfer.h"
-#include "host_emb/host_emb.h"
-#include "ock_ctr_common/include/error_code.h"
+#include <mpi.h>
 #include "utils/common.h"
-#include "utils/config.h"
 #include "utils/logger.h"
 #include "utils/safe_queue.h"
 #include "utils/singleton.h"
 #include "utils/time_cost.h"
+#include "utils/config.h"
+#include "host_emb/host_emb.h"
+#include "emb_table/embedding_mgmt.h"
+#include "hd_transfer/hd_transfer.h"
+#include "ock_ctr_common/include/error_code.h"
 
 using namespace std;
 using namespace chrono;
@@ -43,14 +41,15 @@ void KeyProcess::SetupHotEmbUpdateStep()
 }
 
 bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-                            const vector<ThresholdValue>& thresholdValues, int seed)
+                            const vector<ThresholdValue>& thresholdValues,
+                            int seed)
 {
     this->rankInfo = rInfo;
-
+    
     SetupHotEmbUpdateStep();
-
+    
     map<EmbNameT, int> scInfo;
-    for (const auto& info : eInfos) {
+    for (const auto& info: eInfos) {
         embInfos[info.name] = info;
         scInfo[info.name] = info.sendCount;
         InitHotEmbTotCount(info, rInfo);
@@ -64,8 +63,8 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
     LOG_INFO(KEY_PROCESS "hot emb count info:{}", MapToString(hotEmbTotCount));
     MPI_Group worldGroup;
     MPI_Comm_group(MPI_COMM_WORLD, &worldGroup);
-    for (auto& i : comm) {
-        for (auto& j : i) {
+    for (auto& i: comm) {
+        for (auto& j: i) {
             MPI_Comm_create(MPI_COMM_WORLD, worldGroup, &j);
         }
     }
@@ -87,8 +86,8 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
         }
     }
 
-    LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}", MapToString(scInfo),
-             rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic);
+    LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}",
+        MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic);
 #ifndef GTEST
     Start();
 #endif
@@ -102,7 +101,7 @@ int KeyProcess::Start()
     // 0 1 2 3 4 5 0 1 2 3 4 5
     // |  rank0  | |  rank1  |
     // each rank creates KEY_PROCESS_THREAD threads, each thread process one batchdata
-    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF));  // 查看CPU核数
+    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF)); // 查看CPU核数
     auto fn = [this](int channel, int threadId) {
 #ifndef GTEST
         auto ret = aclrtSetDevice(static_cast<int32_t>(rankInfo.deviceId));
@@ -116,7 +115,7 @@ int KeyProcess::Start()
         } else {
             KeyProcessTask(channel, threadId);
         }
-    };  // for clean code
+    }; // for clean code
     int threadNum = GetThreadNumEnv();
     for (int channel = 0; channel < MAX_CHANNEL_NUM; ++channel) {
         LOG_INFO(KEY_PROCESS "key process thread num: {}", threadNum);
@@ -180,7 +179,7 @@ void KeyProcess::Destroy()
 {
     isRunning = false;
     LOG_INFO(KEY_PROCESS "rankId:{} KeyProcess begin destroy.", rankInfo.rankId);
-    for (auto& i : procThreads) {
+    for (auto& i: procThreads) {
         i->join();
     }
     procThreads.clear();
@@ -190,8 +189,8 @@ void KeyProcess::Destroy()
 /// 每个数据通道的所有数据处理线程上锁
 void KeyProcess::LoadSaveLock()
 {
-    for (int channelId{0}; channelId < MAX_CHANNEL_NUM; ++channelId) {
-        for (int threadId{0}; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
+    for (int channelId { 0 }; channelId < MAX_CHANNEL_NUM; ++channelId) {
+        for (int threadId { 0 }; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
             loadSaveMut[channelId][threadId].lock();
         }
     }
@@ -200,8 +199,8 @@ void KeyProcess::LoadSaveLock()
 /// 每个数据通道的所有数据处理线程释放锁
 void KeyProcess::LoadSaveUnlock()
 {
-    for (int channelId{0}; channelId < MAX_CHANNEL_NUM; ++channelId) {
-        for (int threadId{0}; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
+    for (int channelId { 0 }; channelId < MAX_CHANNEL_NUM; ++channelId) {
+        for (int threadId { 0 }; threadId < MAX_KEY_PROCESS_THREAD; ++threadId) {
             loadSaveMut[channelId][threadId].unlock();
         }
     }
@@ -228,7 +227,7 @@ void KeyProcess::GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf)
 }
 
 void KeyProcess::InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
-                                  const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique)
+                                  const unique_ptr <EmbBatchT>& batch, ock::ctr::UniquePtr& unique)
 {
     uniqueConf.desiredSize = static_cast<uint32_t>(batch->Size());
     if (preBatchSize != batch->Size()) {
@@ -270,7 +269,7 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch = GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
+            batch = GetBatchData(channel, threadId); // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -284,20 +283,21 @@ void KeyProcess::KeyProcessTaskWithFastUnique(int channel, int threadId)
             }
             LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process with fast unique cost:{},"
                                  " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
-                     getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime, batch->name,
-                     batch->channel, threadId, batch->batchId);
+                     getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
+                     batch->name, batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
             auto batchQueue = SingletonQueue<EmbBatchT>::GetInstances(queueIndex);
             batchQueue->PutDirty(move(batch));
         }
         unique->UnInitialize();
-    } catch (const EndRunExit& e) {
+    } catch (const EndRunExit &e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
-    LOG_INFO(KEY_PROCESS "KeyProcessTaskWithFastUnique exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId,
-             channel, threadId);
+    LOG_INFO(KEY_PROCESS "KeyProcessTaskWithFastUnique exit. rank:{} channelId:{}, threadId:{}",
+        rankInfo.rankId, channel, threadId);
 }
 
+
 void KeyProcess::KeyProcessTask(int channel, int threadId)
 {
     unique_ptr<EmbBatchT> batch;
@@ -305,7 +305,7 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
         while (true) {
             TimeCost getAndProcessTC;
             TimeCost getBatchDataTC;
-            batch = GetBatchData(channel, threadId);  // get batch data from SingletonQueue<EmbBatchT>
+            batch = GetBatchData(channel, threadId); // get batch data from SingletonQueue<EmbBatchT>
             LOG_DEBUG("getBatchDataTC(ms):{}", getBatchDataTC.ElapsedMS());
             if (batch == nullptr) {
                 break;
@@ -318,27 +318,28 @@ void KeyProcess::KeyProcessTask(int channel, int threadId)
             }
             LOG_INFO(KEY_PROCESS "getAndProcessTC(ms):{}, key process cost:{},"
                                  " get data time(ms):{}, batch name:{}, channelId:{}, threadId:{}, batchId:{}",
-                     getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime, batch->name,
-                     batch->channel, threadId, batch->batchId);
+                     getAndProcessTC.ElapsedMS(), processDataTime.ElapsedMS(), getBatchTime,
+                     batch->name, batch->channel, threadId, batch->batchId);
             int queueIndex = threadId + (MAX_KEY_PROCESS_THREAD * batch->channel);
             auto batchQueue = SingletonQueue<EmbBatchT>::GetInstances(queueIndex);
             batchQueue->PutDirty(move(batch));
         }
-    } catch (const EndRunExit& e) {
+    } catch (const EndRunExit &e) {
         LOG_INFO(KEY_PROCESS "channel: {}, thread: {}, abort run: {}", channel, threadId, e.what());
     }
     LOG_INFO(KEY_PROCESS "KeyProcessTask exit. rank:{} channelId:{}, threadId:{}", rankInfo.rankId, channel, threadId);
 }
 
-void KeyProcess::HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys, vector<int32_t>& restore,
-                                 vector<int32_t>& hotPos, vector<vector<uint32_t>>& keyCount)
+void KeyProcess::HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <KeysT>& splitKeys,
+                                 vector <int32_t>& restore, vector <int32_t>& hotPos,
+                                 vector <vector<uint32_t>>& keyCount)
 {
     TimeCost uniqueTc;
     if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE) {
-        tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch);  // 按存储dev id切分并去重
+        tie(splitKeys, restore, keyCount) = HashSplitWithFAAE(batch); // 按存储dev id切分并去重
     } else {
-        tie(splitKeys, restore, hotPos) = HotHashSplit(batch);  // 按存储dev id切分并去重
+        tie(splitKeys, restore, hotPos) = HotHashSplit(batch);   // 按存储dev id切分并去重
     }
     LOG_DEBUG("uniqueTc(ms):{}", uniqueTc.ElapsedMS());
 }
@@ -348,7 +349,7 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
 {
     // tuple for keyRec restore hotPos scAll countRecv
     isWithFAAE = m_featureAdmitAndEvict.GetFunctionSwitch() &&
-                 FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
+                  FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE;
     TimeCost totalTimeCost = TimeCost();
     TimeCost fastUniqueTC;
     UniqueInfo uniqueInfo;
@@ -356,11 +357,12 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
     LOG_DEBUG("ProcessBatchWithFastUnique(ms):{}", fastUniqueTC.ElapsedMS());
 
     // 特征准入&淘汰
-    const auto errStatus = FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR;
-    if (isWithFAAE && (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, uniqueInfo.all2AllInfo.keyRecv,
-                                                           uniqueInfo.all2AllInfo.countRecv) == errStatus)) {
-        LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...", rankInfo.rankId,
-                  threadId, channel);
+    if (isWithFAAE &&
+        (m_featureAdmitAndEvict.FeatureAdmit(
+            channel, batch, uniqueInfo.all2AllInfo.keyRecv, uniqueInfo.all2AllInfo.countRecv) ==
+            FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
+            rankInfo.rankId, threadId, channel);
         return false;
     }
     std::lock_guard<std::mutex> lock(loadSaveMut[channel][threadId]);
@@ -374,27 +376,25 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
         LOG_DEBUG("key2OffsetTC(ms):{}", key2OffsetTC.ElapsedMS());
     }
     // Static all2all，need send count
-    if (!rankInfo.useStatic) {
-        SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId);
-    }
+    if (!rankInfo.useStatic) { SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId); }
 
     auto tensors = make_unique<vector<Tensor>>();
     tensors->push_back(Vec2TensorI32(uniqueInfo.restore));
 
     uniqueInfo.hotPos.resize(hotEmbTotCount[batch->name], -1);
     tensors->push_back(Vec2TensorI32(uniqueInfo.hotPos));
-
+    
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(move(tensors), uniqueInfo.all2AllInfo.keyRecv, channel);
-        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv)
-                                                        : Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
+        tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv) :
+                                                            Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
     }
 
     TimeCost pushResultTC;
     PushResult(batch, move(tensors), uniqueInfo.all2AllInfo.keyRecv);
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}", channel,
-                 batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}",
+            channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     return true;
@@ -422,10 +422,10 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     // 特征准入&淘汰
     if (m_featureAdmitAndEvict.GetFunctionSwitch() &&
         FeatureAdmitAndEvict::m_embStatus[batch->name] != SingleEmbTableStatus::SETS_NONE &&
-        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys, countRecv) ==
-         FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
-        LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...", rankInfo.rankId,
-                  threadId, channel);
+        (m_featureAdmitAndEvict.FeatureAdmit(channel, batch, lookupKeys,
+                                             countRecv) == FeatureAdmitReturnType::FEATURE_ADMIT_RETURN_ERROR)) {
+        LOG_ERROR(KEY_PROCESS "rank:{} thread:{}, channel:{}, Feature-admit-and-evict error ...",
+                  rankInfo.rankId, threadId, channel);
         return false;
     }
 
@@ -436,9 +436,7 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     }
 
     // Static all2all，need send count
-    if (!rankInfo.useStatic) {
-        SendA2A(scAll, batch->name, batch->channel, batch->batchId);
-    }
+    if (!rankInfo.useStatic) { SendA2A(scAll, batch->name, batch->channel, batch->batchId); }
 
     TimeCost pushResultTC;
     auto tensors = make_unique<vector<Tensor>>();
@@ -446,7 +444,7 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
     hotPos.resize(hotEmbTotCount[batch->name], 0);
     tensors->push_back(Vec2TensorI32(hotPos));
-
+    
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(tensors, lookupKeys, channel);
         tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys) : Vec2TensorI32(lookupKeys));
@@ -455,8 +453,8 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
     PushResult(batch, move(tensors), lookupKeys);
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}", channel, batch->batchId,
-                 rankInfo.rankId, totalTimeCost.ElapsedMS());
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}",
+            channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
     return true;
 }
@@ -482,27 +480,27 @@ vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, in
 {
     TimeCost getCountRecvTC;
     if (rankInfo.useStatic) {
-        for (auto& cnt : keyCount) {
+        for (auto& cnt: keyCount) {
             cnt.resize(embInfos[batch->name].sendCount, 0);
         }
     }
     vector<uint32_t> countSend;
-    for (auto& cnt : keyCount) {
+    for (auto& cnt: keyCount) {
         countSend.insert(countSend.cend(), cnt.cbegin(), cnt.cend());
     }
     vector<int> sc;
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         sc.push_back(scAll.at(rankInfo.rankSize * rankInfo.rankId + i));
     }
-    vector<int> rc;  // receive count
+    vector<int> rc;                                // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         rc.push_back(scAll.at(i * rankInfo.rankSize + rankInfo.rankId));
     }
-    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
     vector<uint32_t> countRecv;
     countRecv.resize(rs.back() + rc.back());
-    int retCode = MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(), rc.data(),
-                                rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
+    int retCode = MPI_Alltoallv(countSend.data(), sc.data(), ss.data(), MPI_UINT32_T, countRecv.data(),
+                                rc.data(), rs.data(), MPI_UINT32_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -510,7 +508,8 @@ vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, in
     return countRecv;
 }
 
-void KeyProcess::PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys)
+void KeyProcess::PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
+                            KeysT& lookupKeys)
 {
     std::unique_lock<std::mutex> lockGuard(mut);
     storage.push_front(move(tensors));
@@ -545,8 +544,7 @@ unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
         if (tc.ElapsedSec() > GET_BATCH_TIMEOUT) {
             if (commId == 0) {
                 LOG_WARN(KEY_PROCESS "getting batch timeout! 1. check last 'read batch cost' print. "
-                                     "channel[{}] commId[{}]",
-                         channel, commId);
+                    "channel[{}] commId[{}]", channel, commId);
             }
             this_thread::sleep_for(seconds(1));
             tc = TimeCost();
@@ -570,7 +568,7 @@ unique_ptr<EmbBatchT> KeyProcess::GetBatchData(int channel, int commId) const
     return batch;
 }
 
-size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT>& batch)
+size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT> &batch)
 {
     size_t size = rankInfo.rankSize * embInfos[batch->name].sendCount;
     if (!rankInfo.useStatic) {
@@ -579,8 +577,8 @@ size_t KeyProcess::GetKeySize(const unique_ptr<EmbBatchT>& batch)
     return size;
 }
 
-void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique, int id,
-                                            UniqueInfo& uniqueInfoOut)
+void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
+                                            int id, UniqueInfo& uniqueInfoOut)
 {
     EASY_FUNCTION(profiler::colors::Purple)
     EASY_VALUE("batchId", batch->batchId)
@@ -599,10 +597,10 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
 
     ock::ctr::UniqueIn uniqueIn;
     uniqueIn.inputIdCnt = static_cast<uint32_t>(batch->Size());
-    uniqueIn.inputId = reinterpret_cast<void*>(batch->sample.data());
+    uniqueIn.inputId = reinterpret_cast<void *>(batch->sample.data());
 
     ock::ctr::EnhancedUniqueOut uniqueOut;
-    uniqueOut.uniqueId = reinterpret_cast<void*>(keySendInfo.keySend.data());
+    uniqueOut.uniqueId = reinterpret_cast<void *>(keySendInfo.keySend.data());
     uniqueOut.index = reinterpret_cast<uint32_t*>(uniqueInfoOut.restore.data());
     if (rankInfo.useStatic) {
         uniqueOut.idCnt = idCount.data();
@@ -611,7 +609,7 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
         uniqueOut.idCnt = keySendInfo.keyCount.data();
     }
     uniqueOut.uniqueIdCntInBucket = splitSize.data();
-    uniqueOut.uniqueIdInBucket = reinterpret_cast<void*>(uniqueVector.data());
+    uniqueOut.uniqueIdInBucket = reinterpret_cast<void *>(uniqueVector.data());
     uniqueOut.uniqueIdCnt = 0;
 
     int ret = unique->DoEnhancedUnique(uniqueIn, uniqueOut);
@@ -627,18 +625,18 @@ void KeyProcess::ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch,
     All2All(sc, id, batch, keySendInfo, uniqueInfoOut.all2AllInfo);
 
     LOG_DEBUG(KEY_PROCESS "ProcessBatchWithFastUnique get batchId:{}, batchSize:{},"
-                          " channel:{}, name:{}, restore:{}, keyCount:{}",
-              batch->batchId, batch->Size(), batch->channel, batch->name, uniqueInfoOut.restore.size(),
-              keySendInfo.keyCount.size());
+        " channel:{}, name:{}, restore:{}, keyCount:{}",
+        batch->batchId, batch->Size(), batch->channel, batch->name,
+        uniqueInfoOut.restore.size(), keySendInfo.keyCount.size());
 
     if (GlogConfig::gStatOn) {
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} "
-                           "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueOut.uniqueIdCnt);
+            "batch_key_num_with_fast_unique {} unique_key_num_with_fast_unique {}",
+            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueOut.uniqueIdCnt);
     }
 }
 
-void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, UniqueInfo& uniqueInfoOut,
+void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
                                        KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize)
 {
     std::shared_lock<std::shared_mutex> lock(g_smut);
@@ -652,8 +650,8 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, Uniqu
     TimeCost computeHotTc;
     ComputeHotPos(batch, hotMap, uniqueInfoOut.hotPos, uniqueInfoOut.restore, hotOffset);
     LOG_DEBUG("ComputeHot TimeCost(ms):{}", computeHotTc.ElapsedMS());
-    UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount, hotOffset, batch->batchId % hotEmbUpdateStep == 0,
-                          batch->name);
+    UpdateHotMapForUnique(keySendInfo.keySend, keySendInfo.keyCount,
+                          hotOffset, batch->batchId % hotEmbUpdateStep == 0, batch->name);
 
     if (rankInfo.useStatic) {
         sc.resize(rankInfo.rankSize, embInfos[batch->name].sendCount);
@@ -665,8 +663,8 @@ void KeyProcess::HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, Uniqu
     }
 }
 
-void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch, absl::flat_hash_map<emb_key_t, int>& hotMap,
-                               vector<int>& hotPos, vector<int32_t>& restore, const int hotOffset) const
+void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_hash_map<emb_key_t, int> &hotMap,
+                               vector<int> &hotPos, vector<int32_t> &restore, const int hotOffset) const
 {
     emb_key_t* inputData = batch->sample.data();
     size_t miniBs = batch->Size();
@@ -689,27 +687,27 @@ void KeyProcess::ComputeHotPos(const unique_ptr<EmbBatchT>& batch, absl::flat_ha
     }
 }
 
-void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch, KeySendInfo& keySendInfo,
+void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &batch, KeySendInfo& keySendInfo,
                          All2AllInfo& all2AllInfoOut)
 {
     TimeCost getScAllTC;
     int channel = batch->channel;
-    GetScAllForUnique(sc, id, batch,
-                      all2AllInfoOut.scAll);  // Allgather通信获取所有（不同rank相同thread id的）
+    GetScAllForUnique(sc, id, batch, all2AllInfoOut.scAll); // Allgather通信获取所有（不同rank相同thread id的）
     LOG_DEBUG("GetScAll TimeCost(ms):{}", getScAllTC.ElapsedMS());
 
     TimeCost all2allTC;
-    vector<int> ss = Count2Start(sc);   // send displays/offset 发送数据的起始偏移量
-    vector<int> rc(rankInfo.rankSize);  // receive count
+    vector<int> ss = Count2Start(sc); // send displays/offset 发送数据的起始偏移量
+    vector<int> rc(rankInfo.rankSize);            // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         // 通信量矩阵某一列的和即为本地要从其他设备接受的key数据量
         rc[i] = all2AllInfoOut.scAll.at(i * rankInfo.rankSize + rankInfo.rankId);
     }
-    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
     all2AllInfoOut.keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
     int retCode = MPI_Alltoallv(keySendInfo.keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
-                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T, comm[channel][id]);
+                                all2AllInfoOut.keyRecv.data(), rc.data(), rs.data(),
+                                MPI_INT64_T, comm[channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -717,64 +715,65 @@ void KeyProcess::All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& b
     all2AllInfoOut.countRecv.resize(rs.back() + rc.back());
     if (isWithFAAE) {
         retCode = MPI_Alltoallv(keySendInfo.keyCount.data(), sc.data(), ss.data(), MPI_UINT32_T,
-                                all2AllInfoOut.countRecv.data(), rc.data(), rs.data(), MPI_UINT32_T, comm[channel][id]);
+                                all2AllInfoOut.countRecv.data(), rc.data(),
+                                rs.data(), MPI_UINT32_T, comm[channel][id]);
         if (retCode != MPI_SUCCESS) {
-            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}", channel, id, batch->batchId,
-                      retCode);
+            LOG_ERROR("channelId:{} threadId:{} batchId:{}, MPI_Alltoallv failed:{}",
+                      channel, id, batch->batchId, retCode);
         }
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC TimeCost(ms):{}", channel, id,
-              batch->batchId, all2allTC.ElapsedMS());
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, All2All end, all2allTC TimeCost(ms):{}",
+              channel, id, batch->batchId, all2allTC.ElapsedMS());
     EASY_END_BLOCK
 }
 
-auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id, vector<KeysT>& splitKeys)
-    -> tuple<KeysT, vector<int>, vector<int>>
+auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
+                                  vector<KeysT>& splitKeys) -> tuple<KeysT, vector<int>, vector<int>>
 {
     TimeCost processSplitKeysTC;
     EASY_FUNCTION(profiler::colors::Purple)
     EASY_VALUE("batchId", batch->batchId)
-    LOG_INFO(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, ProcessSplitKeys start.", batch->channel, id,
-             batch->batchId);
+    LOG_INFO(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, ProcessSplitKeys start.",
+             batch->channel, id, batch->batchId);
 
     // 使用静态all2all通信：发送或接受量为预置固定值 scInfo[batch->name] = 65536 / rankSize 经验值
-    if (rankInfo.useStatic) {  // maybe move after all2all
-        for (KeysT& i : splitKeys) {
+    if (rankInfo.useStatic) { // maybe move after all2all
+        for (KeysT& i: splitKeys) {
             if (static_cast<int>(i.size()) > embInfos[batch->name].sendCount) {
-                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}", batch->name, batch->channel,
-                          batch->batchId, i.size());
-                throw runtime_error(StringFormat("%s[%d]:%d overflow! set send count bigger than %d",
-                                                 batch->name.c_str(), batch->channel, batch->batchId, i.size())
-                                        .c_str());
+                LOG_ERROR("{}[{}]:{} overflow! set send count bigger than {}",
+                    batch->name, batch->channel, batch->batchId, i.size());
+                throw runtime_error(
+                    StringFormat("%s[%d]:%d overflow! set send count bigger than %d",
+                        batch->name.c_str(), batch->channel, batch->batchId, i.size()).c_str());
             }
             i.resize(embInfos[batch->name].sendCount, -1);
         }
     }
     KeysT keySend;
-    vector<int> sc;  // send count
-    for (const auto& i : splitKeys) {
+    vector<int> sc; // send count
+    for (const auto& i: splitKeys) {
         sc.push_back(static_cast<int>(i.size()));
         keySend.insert(keySend.cend(), i.cbegin(), i.cend());
     }
     KeysT keyRecv;
 
     TimeCost getScAllTC;
-    vector<int> scAll = GetScAll(sc, id, batch);  // Allgather通信获取所有（不同rank相同thread id的）线程间通信量矩阵
+    vector<int> scAll = GetScAll(sc, id, batch);    // Allgather通信获取所有（不同rank相同thread id的）线程间通信量矩阵
     LOG_DEBUG("getScAllTC(ms)(AllReduce-AllGather):{}", getScAllTC.ElapsedMS());
 
     vector<int> ss = Count2Start(sc);  // send displays/offset 发送数据的起始偏移量
-    vector<int> rc;                    // receive count
+    vector<int> rc; // receive count
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         // 通信量矩阵某一列的和即为本地要从其他设备接受的key数据量
         rc.push_back(scAll.at(i * rankInfo.rankSize + rankInfo.rankId));
     }
-    vector<int> rs = Count2Start(rc);  // receive displays/offset 接受数据的起始偏移量
+    vector<int> rs = Count2Start(rc); // receive displays/offset 接受数据的起始偏移量
     keyRecv.resize(rs.back() + rc.back());
     EASY_BLOCK("all2all")
 
     TimeCost uniqueAll2AllTC;
-    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T, keyRecv.data(), rc.data(), rs.data(),
-                                MPI_INT64_T, comm[batch->channel][id]);
+    int retCode = MPI_Alltoallv(keySend.data(), sc.data(), ss.data(), MPI_INT64_T,
+                                keyRecv.data(), rc.data(), rs.data(), MPI_INT64_T, comm[batch->channel][id]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Alltoallv failed:{}", rankInfo.rankId, retCode);
     }
@@ -783,8 +782,8 @@ auto KeyProcess::ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id, ve
     EASY_END_BLOCK
     LOG_DEBUG(KEY_PROCESS "channelId:{} threadId:{} batchId:{}, batchName:{}, MPI_Alltoallv finish."
                           " processSplitKeysTC(ms):{}",
-              batch->channel, id, batch->batchId, batch->name, processSplitKeysTC.ElapsedMS());
-    return {keyRecv, scAll, ss};
+                          batch->channel, id, batch->batchId, batch->name, processSplitKeysTC.ElapsedMS());
+    return { keyRecv, scAll, ss };
 }
 
 /*
@@ -799,8 +798,8 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<Emb
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize);  // 初始化全0，记录每个桶的长度
-    absl::flat_hash_map<emb_key_t, int> uKey;      // 用于去重查询
+    vector<int> hashSplitLens(rankInfo.rankSize); // 初始化全0，记录每个桶的长度
+    absl::flat_hash_map<emb_key_t, int> uKey;     // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
         const emb_key_t& key = batchData[i];
@@ -808,9 +807,9 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<Emb
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] = hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] = hashSplitLens[devId]++; // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key] = restore[i];
-        } else {  // 去重
+        } else { // 去重
             restore[i] = result->second;
         }
     }
@@ -823,10 +822,10 @@ tuple<vector<KeysT>, vector<int32_t>> KeyProcess::HashSplit(const unique_ptr<Emb
         for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
             uniqueKeyNum += splitKeys[devId].size();
         }
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} unique_key_num {}", batch->channel,
-                 batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} unique_key_num {}",
+            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
-    return {splitKeys, restore};
+    return { splitKeys, restore };
 }
 
 void KeyProcess::PaddingAlltoallVC(vector<KeysT>& splitKeys) const
@@ -848,10 +847,10 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
     emb_key_t* batchData = batch->sample.data();
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
-    vector<vector<uint32_t>> keyCount(rankInfo.rankSize);  // splitKeys在原始batch中对应的频次
+    vector<vector<uint32_t>> keyCount(rankInfo.rankSize); // splitKeys在原始batch中对应的频次
     vector<int32_t> restore(batch->Size());
-    vector<int> hashSplitLens(rankInfo.rankSize);                   // 初始化全0，记录每个桶的长度
-    absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey;  // 用于去重查询
+    vector<int> hashSplitLens(rankInfo.rankSize);                  // 初始化全0，记录每个桶的长度
+    absl::flat_hash_map<emb_key_t, std::pair<int, uint32_t>> uKey; // 用于去重查询
     EASY_BLOCK("split push back")
     for (size_t i = 0; i < miniBs; i++) {
         const emb_key_t& key = batchData[i];
@@ -859,10 +858,10 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
         auto result = uKey.find(key);
         if (result == uKey.end()) {
             splitKeys[devId].push_back(key);
-            restore[i] = hashSplitLens[devId]++;  // restore记录去重后key在桶内偏移量（用于计算恢复向量）
+            restore[i] = hashSplitLens[devId]++; // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             uKey[key].first = restore[i];
             uKey[key].second = 1;
-        } else {  // 去重
+        } else { // 去重
             restore[i] = result->second.first;
             uKey[key].second++;
         }
@@ -889,9 +888,9 @@ tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> KeyProcess::Hash
             uniqueKeyNum += splitKeys[devId].size();
         }
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} faae_unique_key_num {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
-    return {splitKeys, restore, keyCount};
+    return { splitKeys, restore, keyCount };
 }
 
 tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(const unique_ptr<EmbBatchT>& batch)
@@ -901,7 +900,7 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(cons
     size_t miniBs = batch->Size();
     vector<KeysT> splitKeys(rankInfo.rankSize);
     vector<int32_t> restore(batch->Size());
-    absl::flat_hash_map<emb_key_t, int> uKey;  // 用于去重查询
+    absl::flat_hash_map<emb_key_t, int> uKey;   // 用于去重查询
     absl::flat_hash_map<emb_key_t, int> keyCountMapByEmbName;
     std::shared_lock<std::shared_mutex> lock(g_smut);
     auto hotMap = hotKey[batch->name];
@@ -910,31 +909,31 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(cons
     vector<int> hotPosDev(hotEmbTotCount[batch->name]);
     int hotCount = 0;
     int hotOffset = hotEmbTotCount[batch->name];
-    for (size_t i = 0; i < miniBs; i++) {  // for mini batch
+    for (size_t i = 0; i < miniBs; i++) { // for mini batch
         const emb_key_t& key = batchData[i];
         if (batch->batchId % hotEmbUpdateStep == 0) {
             keyCountMapByEmbName[key]++;
         }
         emb_key_t devId = abs(key % static_cast<emb_key_t>(rankInfo.rankSize));
         auto result = uKey.find(key);
-        if (result != uKey.end()) {  // // already in splitKeys
+        if (result != uKey.end()) { // // already in splitKeys
             restore[i] = result->second;
             continue;
         }
         // new key in current batch
-        splitKeys[devId].push_back(key);  // push to bucket
+        splitKeys[devId].push_back(key); // push to bucket
         auto hot = hotMap.find(key);
-        if (hot != hotMap.end()) {    // is hot key
-            if (hot->second == -1) {  // is new hot key in this batch
+        if (hot != hotMap.end()) { // is hot key
+            if (hot->second == -1) { // is new hot key in this batch
                 // pos in lookup vec (need add ss) for hot-gather
                 hotPos[hotCount] = static_cast<int>(splitKeys[devId].size()) - 1;
-                hotPosDev[hotCount] = devId;  // which dev, for get ss
+                hotPosDev[hotCount] = devId; // which dev, for get ss
                 hot->second = hotCount;
-                restore[i] = hotCount++;  // get pos of hot emb
+                restore[i] = hotCount++; // get pos of hot emb
             } else {
                 restore[i] = hot->second;
             }
-        } else {  // is not hot key
+        } else { // is not hot key
             // restore记录去重后key在桶内偏移量（用于计算恢复向量）
             restore[i] = static_cast<int32_t>(splitKeys[devId].size() + (hotOffset - 1));
         }
@@ -947,20 +946,20 @@ tuple<vector<KeysT>, vector<int32_t>, vector<int>> KeyProcess::HotHashSplit(cons
             uniqueKeyNum += splitKeys[devId].size();
         }
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} batch_key_num {} hot_unique_key_num {}",
-                 batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
+            batch->channel, batch->batchId, rankInfo.rankId, batch->Size(), uniqueKeyNum);
     }
 
     UpdateHotMap(keyCountMapByEmbName, hotEmbTotCount[batch->name], batch->batchId % hotEmbUpdateStep == 0,
                  batch->name);
     AddCountStartToHotPos(splitKeys, hotPos, hotPosDev, batch);
-    return {splitKeys, restore, hotPos};
+    return { splitKeys, restore, hotPos };
 }
 
 void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
                                        const unique_ptr<EmbBatchT>& batch)
 {
     vector<int> splitKeysSize;
-    for (auto& splitKey : splitKeys) {
+    for (auto& splitKey: splitKeys) {
         int tmp = rankInfo.useStatic ? embInfos[batch->name].sendCount : static_cast<int>(splitKey.size());
         splitKeysSize.push_back(tmp);
     }
@@ -971,13 +970,13 @@ void KeyProcess::AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& ho
     }
 }
 
-void KeyProcess::UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount, uint32_t count,
-                                       bool refresh, const string& embName)
+void KeyProcess::UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_t> &keyCount,
+                                       uint32_t count, bool refresh, const string& embName)
 {
     auto& hotMap = hotKey[embName];
     if (refresh) {
         priority_queue<pair<int, emb_key_t>> pq;
-        for (size_t i = 0; i < keySend.size(); ++i) {
+        for (size_t i = 0;i < keySend.size(); ++i) {
             if (keySend[i] == -1) {
                 continue;
             }
@@ -1003,8 +1002,8 @@ void KeyProcess::UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapBy
         return;
     }
     auto& hotMap = hotKey[embName];
-    priority_queue<pair<int, emb_key_t>> pq;  // top k key
-    for (auto& p : keyCountMapByEmbName) {
+    priority_queue<pair<int, emb_key_t>> pq; // top k key
+    for (auto& p: keyCountMapByEmbName) {
         pq.push(pair<int, emb_key_t>(-p.second, p.first));
         if (pq.size() > count) {
             pq.pop();
@@ -1031,40 +1030,42 @@ vector<int> KeyProcess::GetScAll(const vector<int>& keyScLocal, int commId, cons
     LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll start.", batch->channel, commId, batch->batchId);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAll.data(), rankInfo.rankSize,
-                                 MPI_INT, comm[batch->channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT,
+                                 scAll.data(), rankInfo.rankSize, MPI_INT,
+                                 comm[batch->channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {} commId {}, MPI_Allgather failed:{}", rankInfo.rankId, commId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, key scAll matrix:\n{}", batch->channel,
-              commId, batch->batchId, VectorToString(scAll));
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAll MPI_Allgather end, key scAll matrix:\n{}",
+              batch->channel, commId, batch->batchId, VectorToString(scAll));
     return scAll;
 }
 
-void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch,
-                                   vector<int>& scAllOut)
+void KeyProcess::GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT> &batch,
+                                   vector<int> &scAllOut)
 {
     EASY_FUNCTION()
     int channel = batch->channel;
     scAllOut.resize(rankInfo.rankSize * rankInfo.rankSize);
 
     // allgather keyScLocal(key all2all keyScLocal = device all2all rc)
-    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT, scAllOut.data(), rankInfo.rankSize,
-                                 MPI_INT, comm[channel][commId]);
+    auto retCode = MPI_Allgather(keyScLocal.data(), rankInfo.rankSize, MPI_INT,
+                                 scAllOut.data(), rankInfo.rankSize, MPI_INT,
+                                 comm[channel][commId]);
     if (retCode != MPI_SUCCESS) {
         LOG_ERROR("rank {}, MPI_Allgather failed:{}", rankInfo.rankId, retCode);
     }
-    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key scAllOut matrix:\n{}", channel, commId,
-              batch->batchId, VectorToString(scAllOut));
+    LOG_DEBUG("channelId:{} threadId:{} batchId:{}, GetScAllForUnique end, key scAllOut matrix:\n{}",
+              channel, commId, batch->batchId, VectorToString(scAllOut));
 }
 
 void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel)
 {
     TimeCost key2OffsetTC;
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
     auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp = maxOffset[embName];
+    auto& maxOffsetTmp  = maxOffset[embName];
     auto& evictPos = evictPosMap[embName];
     for (long& key : splitKey) {
         if (key == -1) {
@@ -1077,9 +1078,8 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
             size_t offset;
             // 新值, emb有pos可复用
             offset = evictPos.back();
-            LOG_TRACE("HBM mode, evictPos is not null, name[{}] key [{}] reuse offset [{}], "
-                      "evictSize [{}]!!!",
-                      embName, key, offset, evictPos.size());
+            LOG_TRACE("HBM mode, evictPos is not null, name[{}] key [{}] reuse offset [{}], evictSize [{}]!!!",
+                embName, key, offset, evictPos.size());
             key2Offset[key] = offset;
             key = offset;
             evictPos.pop_back();
@@ -1097,18 +1097,18 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
         LOG_ERROR("dev cache overflow {} > {}", maxOffsetTmp, embInfos[embName].devVocabSize);
         throw std::runtime_error("dev cache overflow!");
     }
-    LOG_DEBUG("current hbm emb:{}, usage:{}/{} key2OffsetTC({} ms)", embName, maxOffsetTmp,
-              embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
+    LOG_DEBUG("current hbm emb:{}, usage:{}/{} key2OffsetTC({} ms)",
+        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
 void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel)
 {
     TimeCost key2OffsetTC;
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
     auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp = maxOffset[embName];
-    auto& curEmbTable = embeddingTableMap[embName];  // empty when not use dynamic expansion
+    auto& maxOffsetTmp  = maxOffset[embName];
+    auto& curEmbTable = embeddingTableMap[embName]; // empty when not use dynamic expansion
     for (long& key : splitKey) {
         if (key == -1) {
             key = 0;
@@ -1131,8 +1131,8 @@ void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& spli
             key = 0;
         }
     }
-    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)", embName, maxOffsetTmp,
-              embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
+    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)",
+        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
 /*
@@ -1157,10 +1157,11 @@ void KeyProcess::BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vecto
             hotNum += 1;
         }
     }
-    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}", hotNum, batch->Size(), buildRestoreVecTC.ElapsedMS());
+    LOG_DEBUG("hot num in all:{}/{} buildRestoreVecTC(ms):{}",
+        hotNum, batch->Size(), buildRestoreVecTC.ElapsedMS());
 }
 
-template <class T>
+template<class T>
 T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel)
 {
     std::lock_guard<std::mutex> lockGuard(mut);
@@ -1199,7 +1200,7 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
             LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                      embName, channel, batch);
+                    embName, channel, batch);
             return {};
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1220,9 +1221,8 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
                 SendEos(batch, channel);
                 return {};
             }
-            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey "
-                      "batchId: {}.",
-                      embName, channel, batch, readEmbKeyBatchId);
+            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
+                embName, channel, batch, readEmbKeyBatchId);
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
             LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
@@ -1246,7 +1246,7 @@ void KeyProcess::SendEos(int batchId, int channel)
     vector<Tensor> tensors;
     bool isNeedResend = true;
 
-    for (const auto& emb : as_const(embInfos)) {  // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
+    for (const auto& emb: as_const(embInfos)) { // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
         LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos start.", channel, batchId, emb.first);
         if (!isRunning) {
             throw EndRunExit("SendEos end run, isRunning is false after lock destroyMutex.");
@@ -1254,7 +1254,7 @@ void KeyProcess::SendEos(int batchId, int channel)
         for (const string& transName : usedChannelNames) {
             string sendName = StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
             size_t channelSize = 0;
-
+            
             acltdtQueryChannelSize(transChannels[sendName], &channelSize);
             LOG_INFO("[EOS] Before send eos, {} contains {}.", sendName, channelSize);
             SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors, isNeedResend);
@@ -1301,7 +1301,7 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
         if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
             LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                      embName, channel, batch);
+                embName, channel, batch);
             return nullptr;
         }
         if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
@@ -1318,17 +1318,15 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
             return uTensor;
         } catch (EmptyList&) {
             unique_lock<mutex> lockEosGuard(eosMutex);
-            // 避免eos在keyProcess还未处理完数据时插队到通道前面,
-            // readEmbKey真实的次数是readEmbedBatchId减1
+            // 避免eos在keyProcess还未处理完数据时插队到通道前面, readEmbKey真实的次数是readEmbedBatchId减1
             if (isNeedSendEos[channel] && (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
                 LOG_INFO("channelId:{} batchId:{}, GetInfoVec eos.", channel, batch);
                 unique_lock<mutex> lockDestroyGuard(destroyMutex);
                 SendEos(batch, channel);
                 return nullptr;
             }
-            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey "
-                      "batchId: {}.",
-                      embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
+            LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
+                embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
             LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
@@ -1341,7 +1339,7 @@ void KeyProcess::SendA2A(const vector<int>& a2aInfo, const string& embName, int
 {
     // 数据放到队列里，在mgmt里面发送（检查发送数据量）
     auto tensors = make_unique<vector<Tensor>>();
-    Tensor tmpTensor(tensorflow::DT_INT64, {rankInfo.rankSize, rankInfo.rankSize});
+    Tensor tmpTensor(tensorflow::DT_INT64, { rankInfo.rankSize, rankInfo.rankSize });
     auto tmpData = tmpTensor.matrix<int64>();
     for (int i = 0; i < rankInfo.rankSize; ++i) {
         for (int j = 0; j < rankInfo.rankSize; ++j) {
@@ -1361,13 +1359,13 @@ int KeyProcess::GetMaxStep(int channelId) const
     return rankInfo.ctrlSteps.at(channelId);
 }
 
-void KeyProcess::EvictKeys(const string& embName, const vector<emb_key_t>& keys)  // hbm
+void KeyProcess::EvictKeys(const string& embName, const vector<emb_key_t>& keys) // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm funEvictCall: [{}]! keySize:{}", embName, keys.size());
     EmbeddingMgmt::Instance()->EvictKeys(embName, keys);
 }
 
-void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys)  // hbm
+void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys) // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm combine funEvictCall, keySize:{}", keys.size());
     EmbeddingMgmt::Instance()->EvictKeysCombine(keys);
@@ -1376,7 +1374,7 @@ void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys)  // hbm
 void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys)
 {
     EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut);  // lock for PROCESS_THREAD
+    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
 
     size_t keySize = keys.size();
     auto& devHashMap = keyOffsetMap.at(embName);
@@ -1390,7 +1388,7 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
             continue;
         }
         const auto& iter = devHashMap.find(key);
-        if (iter == devHashMap.end()) {  // not found
+        if (iter == devHashMap.end()) { // not found
             continue;
         }
         offset = iter->second;
@@ -1404,18 +1402,18 @@ void KeyProcess::EvictDeleteDeviceEmb(const string& embName, const vector<emb_ke
 void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset)
 {
     if (offset.size() > embInfos[embName].devVocabSize) {
-        LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}", embName,
-                  offset.size(), embInfos[embName].devVocabSize);
+        LOG_ERROR("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
+            embName, offset.size(), embInfos[embName].devVocabSize);
         throw runtime_error(
-            Logger::Format("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}", embName,
-                           offset.size(), embInfos[embName].devVocabSize)
-                .c_str());
+            Logger::Format("{} overflow! init evict dev, evictOffset size {} bigger than dev vocabSize {}",
+                embName, offset.size(), embInfos[embName].devVocabSize
+            ).c_str());
     }
 
     vector<Tensor> tmpDataOut;
     Tensor tmpData = Vec2TensorI32(offset);
     tmpDataOut.emplace_back(tmpData);
-    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, {1}));
+    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
 
     auto evictLen = tmpDataOut.back().flat<int32>();
     int evictSize = static_cast<int>(offset.size());
@@ -1428,12 +1426,12 @@ void KeyProcess::EvictInitDeviceEmb(const string& embName, vector<size_t> offset
     LOG_INFO(KEY_PROCESS "hbm EvictInitDeviceEmb: [{}]! send offsetSize:{}", embName, offset.size());
 }
 
-string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const
+string KeyProcess::DumpSplitKeys(vector<vector<emb_key_t>> &splitKeys) const
 {
     stringstream ssTrace;
     for (int devId = 0; devId < rankInfo.rankSize; ++devId) {
         ssTrace << '|' << devId << ":";
-        for (auto key : splitKeys[devId]) {
+        for (auto key: splitKeys[devId]) {
             ssTrace << key << ',';
         }
         ssTrace << '|';
diff --git a/src/core/key_process/key_process.h b/src/core/key_process/key_process.h
index 4dafc07f..8bd7b8d0 100644
--- a/src/core/key_process/key_process.h
+++ b/src/core/key_process/key_process.h
@@ -16,281 +16,283 @@ See the License for the specific language governing permissions and
 #ifndef MX_REC_KEY_PROCESS_H
 #define MX_REC_KEY_PROCESS_H
 
-#include <absl/container/flat_hash_map.h>
-#include <mpi.h>
-
+#include <vector>
 #include <map>
 #include <memory>
-#include <shared_mutex>
 #include <string>
 #include <thread>
-#include <vector>
+#include <shared_mutex>
 
+#include <mpi.h>
+#include <absl/container/flat_hash_map.h>
+#include "ock_ctr_common/include/factory.h"
+
+#include "utils/common.h"
 #include "emb_table/emb_table.h"
 #include "feature_admit_and_evict.h"
 #include "hybrid_mgmt/hybrid_mgmt_block.h"
-#include "ock_ctr_common/include/factory.h"
-#include "utils/common.h"
 #include "utils/singleton.h"
 
 namespace MxRec {
-using namespace std;
+    using namespace std;
 
-template <class T>
-struct Cmp {
-    bool operator()(const T& a, const T& b) const
-    {
-        return get<int>(a) > get<int>(b);  // batch id order
-    }
-};
+    template<class T>
+    struct Cmp {
+        bool operator()(const T& a, const T& b) const
+        {
+            return get<int>(a) > get<int>(b); // batch id order
+        }
+    };
 
-template <class T>
-using heap_t = priority_queue<T, deque<T>, Cmp<T>>;
+    template<class T>
+    using heap_t = priority_queue<T, deque<T>, Cmp<T>>;
 
-template <class T>
-using info_list_t = map<EmbNameT, array<heap_t<T>, MAX_QUEUE_NUM>>;
+    template<class T>
+    using info_list_t = map<EmbNameT, array<heap_t<T>, MAX_QUEUE_NUM>>;
 
-enum class ProcessedInfo {
-    RESTORE,
-    ALL2ALL,
-    INVALID
-};
+    enum class ProcessedInfo {
+        RESTORE,
+        ALL2ALL,
+        INVALID
+    };
 
-class EndRunExit : public std::exception {
-public:
-    explicit EndRunExit(const char* message) : errorMessage(message) {}
+    class EndRunExit : public std::exception {
+    public:
+        explicit EndRunExit(const char* message) : errorMessage(message) {}
 
-    const char* what() const noexcept override
-    {
-        return errorMessage;
-    }
+        const char* what() const noexcept override
+        {
+            return errorMessage;
+        }
 
-private:
-    const char* errorMessage;
-};
+    private:
+        const char* errorMessage;
+    };
 
-constexpr int MPI_ABNORMAL_SEND_VALUE = 0;  // MPI异常通信时发送0
-constexpr int MPI_NORMAL_SEND_VALUE = 1;    // MPI正常通信时发送1
+    constexpr int MPI_ABNORMAL_SEND_VALUE = 0; // MPI异常通信时发送0
+    constexpr int MPI_NORMAL_SEND_VALUE = 1; // MPI正常通信时发送1
 
-class EmptyList : public std::exception {};
+    class EmptyList : public std::exception {
+    };
 
-class WrongListTop : public std::exception {};
+    class WrongListTop : public std::exception {
+    };
 
-class KeyProcess {
-public:
-    bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-                    const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
+    class KeyProcess {
+    public:
+        bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
+                       const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
 
-    unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type);
+        unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type);
 
-    KeysT GetLookupKeys(int batch, const string& embName, int channel);
+        KeysT GetLookupKeys(int batch, const string& embName, int channel);
 
-    int GetMaxStep(int channelId) const;
+        int GetMaxStep(int channelId) const;
 
-    OffsetMemT GetMaxOffset();
+        OffsetMemT GetMaxOffset();
 
-    KeyOffsetMemT GetKeyOffsetMap();
+        KeyOffsetMemT GetKeyOffsetMap();
 
-    KeyCountMemT GetKeyCountMap();
+        KeyCountMemT GetKeyCountMap();
 
-    FeatureAdmitAndEvict& GetFeatAdmitAndEvict();
+        FeatureAdmitAndEvict& GetFeatAdmitAndEvict();
 
-    void LoadMaxOffset(OffsetMemT& loadData);
+        void LoadMaxOffset(OffsetMemT& loadData);
 
-    void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
+        void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
 
-    void LoadKeyCountMap(KeyCountMemT& loadData);
+        void LoadKeyCountMap(KeyCountMemT& loadData);
 
-    void Destroy();
+        void Destroy();
 
-    void LoadSaveLock();
+        void LoadSaveLock();
 
-    void LoadSaveUnlock();
+        void LoadSaveUnlock();
 
-    void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
+        void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
 
-    void EvictKeysCombine(const vector<emb_key_t>& keys);
+        void EvictKeysCombine(const vector<emb_key_t>& keys);
 
-    void SetupHotEmbUpdateStep();
+        void SetupHotEmbUpdateStep();
 
-    int64_t GetExpansionTableSize(const string& embName);
+        int64_t GetExpansionTableSize(const string& embName);
 
-    int64_t GetExpansionTableCapacity(const string& embName);
+        int64_t GetExpansionTableCapacity(const string& embName);
 
-    void RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch);
+        void RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch);
 
-    template <typename T>
-    void GlobalUnique(T& lookupKeys, T& uniqueKeys, vector<int32_t>& restoreVecSec)
-    {
-        absl::flat_hash_map<emb_key_t, int32_t> umap;
-        restoreVecSec.resize(lookupKeys.size(), -1);
-        int32_t length = 0;
+        template <typename T>
+        void GlobalUnique(T& lookupKeys, T& uniqueKeys, vector<int32_t>& restoreVecSec)
+        {
+            absl::flat_hash_map<emb_key_t, int32_t> umap;
+            restoreVecSec.resize(lookupKeys.size(), -1);
+            int32_t length = 0;
 
-        for (size_t i = 0; i < lookupKeys.size(); ++i) {
-            int64_t key = lookupKeys[i];
-            if (rankInfo.useStatic
-                && ((!rankInfo.useDynamicExpansion && key == -1) || (rankInfo.useDynamicExpansion && key == 0))) {
-                continue;
-            }
+            for (size_t i = 0; i < lookupKeys.size(); ++i) {
+                int64_t key = lookupKeys[i];
+                if (rankInfo.useStatic && (
+                        (!rankInfo.useDynamicExpansion && key == -1) || (rankInfo.useDynamicExpansion && key == 0))) {
+                    continue;
+                }
 
-            auto result = umap.find(key);
-            if (result == umap.end()) {
-                uniqueKeys.push_back(lookupKeys[i]);
-                umap[key] = length;
-                restoreVecSec[i] = length;
-                length++;
-            } else {
-                restoreVecSec[i] = result->second;
+                auto result = umap.find(key);
+                if (result == umap.end()) {
+                    uniqueKeys.push_back(lookupKeys[i]);
+                    umap[key] = length;
+                    restoreVecSec[i] = length;
+                    length++;
+                } else {
+                    restoreVecSec[i] = result->second;
+                }
             }
-        }
 
-        if (rankInfo.useStatic) {
-            if (rankInfo.useDynamicExpansion) {
-                uniqueKeys.resize(lookupKeys.size(), 0);
-            } else {
-                uniqueKeys.resize(lookupKeys.size(), -1);
+            if (rankInfo.useStatic) {
+                if (rankInfo.useDynamicExpansion) {
+                    uniqueKeys.resize(lookupKeys.size(), 0);
+                } else {
+                    uniqueKeys.resize(lookupKeys.size(), -1);
+                }
             }
         }
-    }
-
-    void SetEos(int status, int channelId);
 
-    void SendEos(int batchId, int channel);
+        void SetEos(int status, int channelId);
 
-    bool isRunning{false};
+        void SendEos(int batchId, int channel);
 
-    std::mutex destroyMutex;
-    std::mutex eosMutex;
-    inline bool HasEmbName(const string& embName)
-    {
-        return embInfos.find(embName) != embInfos.end();
-    };
-    GTEST_PRIVATE :
+        bool isRunning { false };
 
-        int
-        Start();
+        std::mutex destroyMutex;
+        std::mutex eosMutex;
+        inline bool HasEmbName(const string& embName)
+        {
+            return embInfos.find(embName) != embInfos.end();
+        };
+    GTEST_PRIVATE:
 
-    template <class T>
-    T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
+        int Start();
 
-    RankInfo rankInfo;
-    map<EmbNameT, EmbInfo> embInfos;
-    MPI_Comm comm[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD];
-    std::mutex mut{};
-    vector<std::unique_ptr<std::thread>> procThreads{};
-    std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD]{};
-    info_list_t<LookupKeyT> lookupKeysList;
-    list<unique_ptr<vector<Tensor>>> storage;
-    info_list_t<TensorInfoT> infoList;
-    info_list_t<TensorInfoT> all2AllList;
-    map<EmbNameT, size_t> maxOffset{};
-    map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>> keyOffsetMap{};
-    map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>> keyCountMap{};
-    FeatureAdmitAndEvict m_featureAdmitAndEvict{};
-    map<EmbNameT, std::vector<size_t>> evictPosMap{};
-    map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey{};
-    map<EmbNameT, int> hotEmbTotCount;
-    map<EmbNameT, EmbTable> embeddingTableMap{};
-    ock::ctr::FactoryPtr factory{};
-    int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
-    bool isWithFAAE;
-    bool isNeedSendEos[2] = {0, 0};  // 分别代表通道0、1的eos状态
+        template<class T>
+        T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
 
-    void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
+        RankInfo rankInfo;
+        map<EmbNameT, EmbInfo> embInfos;
+        MPI_Comm comm[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD];
+        std::mutex mut {};
+        vector<std::unique_ptr<std::thread>> procThreads {};
+        std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD] {};
+        info_list_t<LookupKeyT> lookupKeysList;
+        list<unique_ptr<vector<Tensor>>> storage;
+        info_list_t<TensorInfoT> infoList;
+        info_list_t<TensorInfoT> all2AllList;
+        map<EmbNameT, size_t> maxOffset {};
+        map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>> keyOffsetMap {};
+        map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>> keyCountMap {};
+        FeatureAdmitAndEvict m_featureAdmitAndEvict {};
+        map<EmbNameT, std::vector<size_t>> evictPosMap {};
+        map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey {};
+        map<EmbNameT, int> hotEmbTotCount;
+        map<EmbNameT, EmbTable> embeddingTableMap {};
+        ock::ctr::FactoryPtr factory {};
+        int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
+        bool isWithFAAE;
+        bool isNeedSendEos[2] = { 0, 0 }; // 分别代表通道0、1的eos状态
 
-    void KeyProcessTask(int channel, int threadId);
+        void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
 
-    void KeyProcessTaskWithFastUnique(int channel, int threadId);
+        void KeyProcessTask(int channel, int threadId);
 
-    bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
+        void KeyProcessTaskWithFastUnique(int channel, int threadId);
 
-    bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique, int channel,
-                                            int threadId);
+        bool KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel, int threadId);
 
-    tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch, int id,
-                                                            vector<KeysT>& splitKeys);
+        bool KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
+                                            int channel, int threadId);
 
-    void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
+        tuple<KeysT, vector<int>, vector<int>> ProcessSplitKeys(const unique_ptr<EmbBatchT>& batch,
+                int id, vector<KeysT>& splitKeys);
 
-    void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
-                          const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique);
+        void GetUniqueConfig(ock::ctr::UniqueConf& uniqueConf);
 
-    void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT>& batch, ock::ctr::UniquePtr& unique, int id,
-                                    UniqueInfo& uniqueInfoOut);
+        void InitializeUnique(ock::ctr::UniqueConf& uniqueConf, size_t& preBatchSize, bool& uniqueInitialize,
+                                  const unique_ptr <EmbBatchT>& batch, ock::ctr::UniquePtr& unique);
 
-    size_t GetKeySize(const unique_ptr<EmbBatchT>& batch);
+        void ProcessBatchWithFastUnique(const unique_ptr<EmbBatchT> &batch, ock::ctr::UniquePtr& unique,
+                                           int id, UniqueInfo& uniqueInfoOut);
 
-    void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT>& batch, KeySendInfo& keySendInfo,
-                 All2AllInfo& all2AllInfoOut);
+        size_t GetKeySize(const unique_ptr<EmbBatchT> &batch);
 
-    auto HashSplit(const unique_ptr<EmbBatchT>& batch) const -> tuple<vector<KeysT>, vector<int32_t>>;
+        void All2All(vector<int>& sc, int id, const unique_ptr<EmbBatchT> &batch, KeySendInfo& keySendInfo,
+                     All2AllInfo& all2AllInfoOut);
 
-    auto HotHashSplit(const unique_ptr<EmbBatchT>& batch) -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
+        auto HashSplit(const unique_ptr<EmbBatchT>& batch) const -> tuple<vector<KeysT>, vector<int32_t>>;
 
-    void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
+        auto HotHashSplit(const unique_ptr<EmbBatchT>& batch) -> tuple<vector<KeysT>, vector<int32_t>, vector<int>>;
 
-    tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>> HashSplitWithFAAE(
-        const unique_ptr<EmbBatchT>& batch) const;
+        void PaddingAlltoallVC(vector<KeysT>& splitKeys) const;
 
-    vector<int> GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch);
+        tuple<vector<KeysT>, vector<int32_t>, vector<vector<uint32_t>>>
+        HashSplitWithFAAE(const unique_ptr<EmbBatchT>& batch) const;
 
-    void GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch,
-                           vector<int>& scAllOut);
+        vector<int> GetScAll(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT>& batch);
 
-    void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
+        void GetScAllForUnique(const vector<int>& keyScLocal, int commId, const unique_ptr<EmbBatchT> &batch,
+                               vector<int> &scAllOut);
 
-    void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
+        void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-    unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
+        void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-    void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset, vector<int>& restoreVec,
-                         int hotPosSize = 0) const;
+        unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
 
-    void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
+        void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
+                             vector<int>& restoreVec, int hotPosSize = 0) const;
+        
+        void SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch);
 
-    void EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys);
+        void EvictDeleteDeviceEmb(const string& embName, const vector<emb_key_t>& keys);
 
-    void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
+        void EvictInitDeviceEmb(const string& embName, vector<size_t> offset);
 
-    void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
-                      const string& embName);
+        void UpdateHotMap(absl::flat_hash_map<emb_key_t, int>& keyCountMapByEmbName, uint32_t count, bool refresh,
+                          const string& embName);
 
-    void UpdateHotMapForUnique(const KeysT& keySend, const vector<int32_t>& keyCount, uint32_t count, bool refresh,
-                               const string& embName);
+        void UpdateHotMapForUnique(const KeysT &keySend, const vector<int32_t> &keyCount,
+                                   uint32_t count, bool refresh, const string& embName);
 
-    void HandleHotAndSendCount(const unique_ptr<EmbBatchT>& batch, UniqueInfo& uniqueInfoOut, KeySendInfo& keySendInfo,
-                               vector<int>& sc, vector<int>& splitSize);
+        void HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
+                                       KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
 
-    void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys);
+        void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys);
 
-    void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel);
+        void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel);
 
-    void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
-                               const unique_ptr<EmbBatchT>& batch);
+        void AddCountStartToHotPos(vector<KeysT>& splitKeys, vector<int>& hotPos, const vector<int>& hotPosDev,
+                                   const unique_ptr<EmbBatchT>& batch);
 
-    void ComputeHotPos(const unique_ptr<EmbBatchT>& batch, absl::flat_hash_map<emb_key_t, int>& hotMap,
-                       vector<int>& hotPos, vector<int32_t>& restore, const int hotOffset) const;
+        void ComputeHotPos(const unique_ptr<EmbBatchT> &batch, absl::flat_hash_map<emb_key_t, int> &hotMap,
+                           vector<int> &hotPos, vector<int32_t> &restore, const int hotOffset) const;
 
-    vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id, vector<vector<uint32_t>>& keyCount,
-                                  vector<int> scAll, vector<int> ss);
+        vector<uint32_t> GetCountRecv(const unique_ptr<EmbBatchT>& batch, int id,
+                                      vector<vector<uint32_t>>& keyCount, vector<int> scAll, vector<int> ss);
 
-    void HashSplitHelper(const unique_ptr<EmbBatchT>& batch, vector<KeysT>& splitKeys, vector<int32_t>& restore,
-                         vector<int32_t>& hotPos, vector<vector<uint32_t>>& keyCount);
+        void HashSplitHelper(const unique_ptr <EmbBatchT>& batch, vector <KeysT>& splitKeys,
+                             vector <int32_t>& restore, vector <int32_t>& hotPos,
+                             vector <vector<uint32_t>>& keyCount);
 
-    template <class T>
-    inline vector<T> Count2Start(const vector<T>& count) const
-    {
-        vector<T> start = {0};
-        for (size_t i = 0; i < count.size() - 1; ++i) {
-            start.push_back(count[i] + start.back());
+        template<class T>
+        inline vector<T> Count2Start(const vector<T>& count) const
+        {
+            vector<T> start = { 0 };
+            for (size_t i = 0; i < count.size() - 1; ++i) {
+                start.push_back(count[i] + start.back());
+            }
+            return start;
         }
-        return start;
-    }
 
-    string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
-};
+        string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
+    };
 
 #define KEY_PROCESS_INSTANCE Singleton<KeyProcess>::GetInstance()
-}  // end namespace MxRec
+} // end namespace MxRec
 
-#endif  // MX_REC_KEY_PROCESS_H
+#endif // MX_REC_KEY_PROCESS_H
-- 
Gitee


From d52aca711795d11361f102c41ca72fc23e17d6d6 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 06:12:35 +0000
Subject: [PATCH 086/302] =?UTF-8?q?!111=20LazyAdam=E8=9E=8D=E5=90=88?=
 =?UTF-8?q?=E7=AE=97=E5=AD=90-aclnn=E9=83=A8=E5=88=86=E6=8F=90=E4=BA=A4=20?=
 =?UTF-8?q?*=20=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B93=20*=20=E9=97=A8?=
 =?UTF-8?q?=E7=A6=81=E4=BF=AE=E6=94=B92=20*=20aclnn=E6=B5=8B=E8=AF=95?=
 =?UTF-8?q?=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B9=20*=20LazyAdam=E8=9E=8D?=
 =?UTF-8?q?=E5=90=88=E7=AE=97=E5=AD=90-aclnn=E9=83=A8=E5=88=86=E6=8F=90?=
 =?UTF-8?q?=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/inc/common.h         |  52 ++++
 .../aclnn_lazy_adam_test/inc/op_runner.h      | 195 +++++++++++++++
 .../aclnn_lazy_adam_test/inc/operator_desc.h  |  67 +++++
 .../aclnn_lazy_adam_test/input/.keep          |   0
 .../aclnn_lazy_adam_test/output/.keep         |   0
 .../aclnn_lazy_adam_test/run.sh               | 106 ++++++++
 .../aclnn_lazy_adam_test/scripts/gen_data.py  | 145 +++++++++++
 .../scripts/verify_result.py                  |  50 ++++
 .../aclnn_lazy_adam_test/src/CMakeLists.txt   |  67 +++++
 .../aclnn_lazy_adam_test/src/common.cpp       |  84 +++++++
 .../aclnn_lazy_adam_test/src/main.cpp         | 228 ++++++++++++++++++
 11 files changed, 994 insertions(+)
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/input/.keep
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/output/.keep
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/verify_result.py
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
new file mode 100644
index 00000000..601a2617
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/common.h
@@ -0,0 +1,52 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <cstdio>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <iomanip>
+
+#include "acl/acl.h"
+
+namespace AclnnLazyAdam {
+#define SUCCESS 0
+#define FAILED 1
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
+
+    /**
+     * @brief Read data from file
+     * @param [in] filePath: file path
+     * @param [out] fileSize: file size
+     * @return read result
+     */
+    bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize);
+
+    /**
+     * @brief Write data to file
+     * @param [in] filePath: file path
+     * @param [in] buffer: data to write to file
+     * @param [in] size: size to write
+     * @return write result
+     */
+    bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
+}
+#endif // COMMON_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
new file mode 100644
index 00000000..6f91f905
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
@@ -0,0 +1,195 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef OP_RUNNER_H
+#define OP_RUNNER_H
+
+#include "aclnn/acl_meta.h"
+#include "acl/acl.h"
+#include "common.h"
+#include "operator_desc.h"
+
+namespace AclnnLazyAdam {
+    /**
+     * Op Runner
+     */
+    class OpRunner {
+    public:
+        /**
+         * @brief Constructor
+         * @param [in] opDesc: op description
+         */
+        explicit OpRunner(OperatorDesc *opDesc);
+
+        /**
+         * @brief Destructor
+         */
+        virtual ~OpRunner();
+
+        /**
+        * @brief Init op runner
+        */
+        bool Init();
+
+        /**
+         * @brief Get number of inputs
+         * @return number of inputs
+         */
+        const size_t NumInputs();
+
+        /**
+         * @brief Get number of outputs
+         * @return number of outputs
+         */
+        const size_t NumOutputs();
+
+        /**
+         * @brief Get input size by index
+         * @param [in] index: input index
+         * @return size of the input
+         */
+        const size_t GetInputSize(size_t index) const;
+
+        const size_t GetInputNumDims(size_t index) const;
+
+        aclDataType GetInputDataType(size_t index) const;
+
+        aclFormat GetInputFormat(size_t index) const;
+
+        /**
+         * @brief Get output size by index
+         * @param [in] index: output index
+         * @return size of the output
+         */
+        size_t GetOutputSize(size_t index) const;
+
+        const size_t GetOutputNumDims(size_t index) const;
+
+        aclDataType GetOutputDataType(size_t index) const;
+
+        aclFormat GetOutputFormat(size_t index) const;
+
+        /**
+         * @brief Get input element count by index
+         * @param i[in] ndex: input index
+         * @return element count of the input
+         */
+        size_t GetInputElementCount(size_t index) const;
+
+        /**
+         * @brief Get output element count by index
+         * @param [in] index: output index
+         * @return element count of the output
+         */
+        size_t GetOutputElementCount(size_t index) const;
+
+        /**
+         * @brief Get input shape by index
+         * @param [in] index: input index
+         * @return shape of the output
+         */
+        std::vector <int64_t> GetInputShape(size_t index) const;
+
+        /**
+         * @brief Get output shape by index
+         * @param [in] index: output index
+         * @return shape of the output
+         */
+        std::vector <int64_t> GetOutputShape(size_t index) const;
+
+        /**
+         * @brief Get input buffer(host memory) by index
+         * @tparam T: data type
+         * @param [in] index: input index
+         * @return host address of the input
+         */
+        template<typename T>
+        T *GetInputBuffer(size_t index)
+        {
+            if (index >= numInputs_) {
+                ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+                return nullptr;
+            }
+            return reinterpret_cast<T *>(hostInputs_[index]);
+        }
+
+        /**
+         * @brief Get output buffer(host memory) by index
+         * @tparam T: data type
+         * @param [in] index: output index
+         * @return host address of the output
+         */
+        template<typename T>
+        const T *GetOutputBuffer(size_t index)
+        {
+            if (index >= numOutputs_) {
+                ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+                return nullptr;
+            }
+
+            return reinterpret_cast<T *>(hostOutputs_[index]);
+        }
+
+        /**
+         * @brief Print readable input by index
+         * @param [in] index: input index
+         * @param [in] numElementsPerRow: number of elements per row
+         */
+        void PrintInput(size_t index, size_t numElementsPerRow = 16);
+
+        /**
+          * @brief Print readable output by index
+          * @param [in] index: output index
+          * @param [in] numElementsPerRow: number of elements per row
+          */
+        void PrintOutput(size_t index, size_t numElementsPerRow = 16);
+
+        /**
+         * @brief Compile static op
+         * @return compile result
+         */
+        bool CompileStaticOp();
+
+        /**
+         * @brief Compile dynamic op
+         * @return compile result
+         */
+        bool CompileDynamicOp();
+
+        /**
+         * @brief Run op
+         * @return run result
+         */
+        bool RunOp();
+
+    private:
+        size_t numInputs_;
+        size_t numOutputs_;
+
+        std::vector<aclDataBuffer *> inputBuffers_;
+        std::vector<aclDataBuffer *> outputBuffers_;
+
+        std::vector<void *> devInputs_;
+        std::vector<void *> devOutputs_;
+
+        std::vector<void *> hostInputs_;
+        std::vector<void *> hostOutputs_;
+
+        std::vector<aclTensor *> inputTensor_;
+        std::vector<aclTensor *> outputTensor_;
+        OperatorDesc *opDesc_;
+    };
+}
+#endif // OP_RUNNER_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
new file mode 100644
index 00000000..ddd3b3a9
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/operator_desc.h
@@ -0,0 +1,67 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef OPERATOR_DESC_H
+#define OPERATOR_DESC_H
+
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+namespace AclnnLazyAdam {
+    /**
+     * Op description
+     */
+    struct OperatorDesc {
+        /**
+         * Constructor
+         */
+        explicit OperatorDesc();
+
+        /**
+         * Destructor
+         */
+        virtual ~OperatorDesc();
+
+        /**
+         * Add an input tensor description
+         * @param [in] dataType: data type
+         * @param [in] numDims: number of dims
+         * @param [in] dims: dims
+         * @param [in] format: format
+         * @return OperatorDesc
+         */
+        OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+        /**
+         * Add an output tensor description
+         * @param [in] dataType: data type
+         * @param [in] numDims: number of dims
+         * @param [in] dims: dims
+         * @param [in] format: format
+         * @return OperatorDesc
+         */
+        OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+        std::string opType;
+        std::vector<aclTensorDesc *> inputDesc;
+        std::vector<aclTensorDesc *> outputDesc;
+        double beta1;
+        double beta2;
+        double epsilon;
+    };
+}
+#endif // OPERATOR_DESC_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/input/.keep b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/input/.keep
new file mode 100644
index 00000000..e69de29b
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/output/.keep b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/output/.keep
new file mode 100644
index 00000000..e69de29b
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
new file mode 100644
index 00000000..3d4af97c
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+export ASCEND_GLOBAL_LOG_LEVEL=0
+
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+cd $CURRENT_DIR
+
+# 导出环境变量
+SHORT=v:,
+LONG=dtype:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+while :
+do
+    case "$1" in
+        # float16, float, int32
+        (-v | --dtype)
+            DTYPE="$2"
+            shift 2;;
+        (--)
+            shift;
+            break;;
+        (*)
+            echo "[ERROR] Unexpected option: $1";
+            break;;
+    esac
+done
+
+if [ ! $ASCEND_HOME_DIR ]; then
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        export ASCEND_HOME_DIR=$HOME/Ascend/ascend-toolkit/latest
+    else
+        export ASCEND_HOME_DIR=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $ASCEND_HOME_DIR/bin/setenv.bash
+
+export DDK_PATH=$ASCEND_HOME_DIR
+arch=$(uname -m)
+export NPU_HOST_LIB=$ASCEND_HOME_DIR/${arch}-linux/lib64
+
+function main {
+    # 1. 清除遗留生成文件和日志文件
+    rm -rf $HOME/ascend/log/*
+    rm ./input/*.bin
+    rm ./output/*.bin
+
+    # 2. 生成输入数据和真值数据
+    cd $CURRENT_DIR
+    python3 scripts/gen_data.py
+    if [ $? -ne 0 ]; then
+        echo "ERROR: generate input data failed!"
+        return 1
+    fi
+    echo "INFO: generate input data success!"
+
+    # 3. 编译acl可执行文件
+    cd $CURRENT_DIR; rm -rf build; mkdir -p build; cd build
+    cmake ../src
+    if [ $? -ne 0 ]; then
+        echo "ERROR: cmake failed!"
+        return 1
+    fi
+    echo "INFO: cmake success!"
+    make
+    if [ $? -ne 0 ]; then
+        echo "ERROR: make failed!"
+        return 1
+    fi
+    echo "INFO: make success!"
+
+    # 4. 运行可执行文件
+    cd $CURRENT_DIR/output
+    echo "INFO: execute op!"
+    ./execute_op
+
+    if [ $? -ne 0 ]; then
+        echo "ERROR: acl executable run failed! please check your project!"
+        return 1
+    fi
+    echo "INFO: acl executable run success!"
+
+    # 5. 比较真值文件
+    cd $CURRENT_DIR
+    python3 scripts/verify_result.py
+}
+
+main
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
new file mode 100644
index 00000000..6e07f836
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import copy
+import os
+import numpy as np
+
+# 获取项目路径
+_CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
+_PROJECT_PATH = os.path.dirname(_CURRENT_PATH)
+_INPUT_PATH = os.path.join(_PROJECT_PATH, "./input")
+_OUTPUT_PATH = os.path.join(_PROJECT_PATH, "./output")
+
+_DIM_0 = 2000000
+_DIM_1 = 564096
+_DIM_2 = 32
+
+
+def _gather(input_data, indices):
+    out = np.zeros((len(indices), input_data.shape[1]))
+    for i, index_ in enumerate(indices):
+        # 跳过index小于0的数据
+        if index_[0] < 0:
+            continue
+        out[i] = input_data[index_[0]]
+    return out
+
+
+def _scatter_nd_update(momentum, indices, update_value):
+    out = copy.deepcopy(momentum)
+    for i, index_ in enumerate(indices):
+        if index_[0] < 0:
+            continue
+        else:
+            out[index_[0]] = update_value[i]
+    return out
+
+
+def _scatter_nd_add(momentum, indices, update_value):
+    out = copy.deepcopy(momentum)
+    for i, index_ in enumerate(indices):
+        if index_[0] < 0:
+            continue
+        else:
+            out[indices[i][0]] = out[index_[0]] + update_value[i]
+    return out
+
+
+def _gen_input_data():
+    range_start = 1
+    range_end = 2
+
+    dtype_chose = np.float32
+    shape0 = (_DIM_0, _DIM_2)
+    indices_shape = (_DIM_1, 1)
+    grad_shape = (_DIM_1, _DIM_2)
+
+    input_var = np.random.uniform(range_start, range_end, size=shape0).astype(dtype_chose)  # shape [2000000,32]
+    input_m = np.random.uniform(range_start, range_end, size=shape0).astype(dtype_chose)  # shape [2000000,32]
+    input_v = np.random.uniform(range_start, range_end, size=shape0).astype(dtype_chose)  # shape [2000000,32]
+
+    # indices shape [564096,1]
+    indices = np.random.permutation(np.arange(_DIM_0)).astype(np.int32)[:indices_shape[0]].reshape(-1, 1)
+    # gradient shape [564096,32]
+    gradient = np.random.uniform(range_start, range_end, size=grad_shape).astype(dtype_chose)
+
+    if not os.path.exists(_INPUT_PATH):
+        os.makedirs(_INPUT_PATH)
+    indices.tofile(os.path.join(_INPUT_PATH, "indices.bin"))
+    gradient.tofile(os.path.join(_INPUT_PATH, "gradient.bin"))
+    input_m.tofile(os.path.join(_INPUT_PATH, "inputM.bin"))
+    input_v.tofile(os.path.join(_INPUT_PATH, "inputV.bin"))
+    input_var.tofile(os.path.join(_INPUT_PATH, "inputVar.bin"))
+
+
+def _gen_golden_data():
+    beta1 = 0.9
+    beta2 = 0.999
+    lr = 0.001
+    epsilon = 1e-7
+
+    lr = np.array(lr).astype(np.float32)
+    beta1 = np.array(beta1).astype(np.float32)
+    beta2 = np.array(beta2).astype(np.float32)
+    epsilon = np.array(epsilon).astype(np.float32)
+
+    lr.tofile(os.path.join(_INPUT_PATH, "learningRate.bin"))
+
+    indices = np.fromfile(os.path.join(_INPUT_PATH, "indices.bin"), dtype=np.int32).reshape(
+        (_DIM_1, 1))  # shape (564096,1)
+    gradient = np.fromfile(os.path.join(_INPUT_PATH, "gradient.bin"), dtype=np.float32).reshape(
+        (_DIM_1, _DIM_2))  # shape (564096,32)
+    input_m = np.fromfile(os.path.join(_INPUT_PATH, "inputM.bin"), dtype=np.float32).reshape(
+        (_DIM_0, _DIM_2))  # shape (2000000,32)
+    input_v = np.fromfile(os.path.join(_INPUT_PATH, "inputV.bin"), dtype=np.float32).reshape(
+        (_DIM_0, _DIM_2))  # shape (2000000,32)
+    input_var = np.fromfile(os.path.join(_INPUT_PATH, "inputVar.bin"), dtype=np.float32).reshape(
+        (_DIM_0, _DIM_2))  # shape (2000000,32)
+
+    old_m_slice = _gather(input_m, indices)  # shape(564096,32)
+    old_m_slice = np.array(old_m_slice).astype(np.float32)  #
+    update_m = beta1 * old_m_slice + (1 - beta1) * gradient
+    out_m = _scatter_nd_update(input_m, indices, update_m)
+
+    old_v_slice = _gather(input_v, indices)
+    old_v_slice = np.array(old_v_slice).astype(np.float32)
+    update_v = beta2 * old_v_slice + (1 - beta2) * np.square(gradient)
+    out_v = _scatter_nd_update(input_v, indices, update_v)
+
+    denominator_slice = np.sqrt(update_v) + epsilon
+    update_var = np.divide(-lr * update_m, denominator_slice)
+    out_var = _scatter_nd_add(input_var, indices, update_var)
+
+    return out_m, out_v, out_var
+
+
+def _gen_input_and_golden_data():
+    # 产生输入数据
+    _gen_input_data()
+
+    # 产生真值数据
+    out_m, out_v, out_var = _gen_golden_data()
+    if not os.path.exists(_OUTPUT_PATH):
+        os.makedirs(_OUTPUT_PATH)
+    out_m.tofile(os.path.join(_OUTPUT_PATH, "goldenOutputM.bin"))
+    out_v.tofile(os.path.join(_OUTPUT_PATH, "goldenOutputV.bin"))
+    out_var.tofile(os.path.join(_OUTPUT_PATH, "goldenOutputVar.bin"))
+
+
+if __name__ == "__main__":
+    _gen_input_and_golden_data()
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/verify_result.py b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/verify_result.py
new file mode 100644
index 00000000..1cc516db
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/verify_result.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import logging
+import numpy as np
+
+_LOSS_THRESHOLD = 1e-6  # 容忍偏差，一般fp16要求绝对误差和相对误差均不超过万分之一
+_MINIMUM = 10e-10
+
+logging.getLogger().setLevel(logging.INFO)
+
+
+def verify_result(real_result, golden):
+    real_result = np.fromfile(real_result, dtype=np.float32)  # 从bin文件读取实际运算结果
+    golden = np.fromfile(golden, dtype=np.float32)  # 从bin文件读取预期运算结果
+    result = np.abs(real_result - golden)  # 计算运算结果和预期结果偏差
+    deno = np.maximum(np.abs(real_result), np.abs(golden))  # 获取最大值并组成新数组
+    result_atol = np.less_equal(result, _LOSS_THRESHOLD)  # 计算绝对误差
+    result_rtol = np.less_equal(result / np.add(deno, _MINIMUM), _LOSS_THRESHOLD)  # 计算相对误差
+    if not result_rtol.all() and not result_atol.all():
+        # 误差超出预期时返回打印错误，返回对比失败
+        if np.sum(result_rtol == False) > real_result.size * _LOSS_THRESHOLD \
+                and np.sum(result_atol == False) > real_result.size * _LOSS_THRESHOLD:
+            logging.error("[ERROR] output verify result error.")
+            return False
+    logging.info("output verify pass.")
+    return True
+
+
+if __name__ == '__main__':
+    logging.info("start verify outputM.")
+    verify_result("output/outputM.bin", "output/goldenOutputM.bin")
+    logging.info("start verify outputV.")
+    verify_result("output/outputV.bin", "output/goldenOutputV.bin")
+    logging.info("start verify outputVar.")
+    verify_result("output/outputVar.bin", "output/goldenOutputVar.bin")
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
new file mode 100644
index 00000000..1642e3ca
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
@@ -0,0 +1,67 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_lazy_adam)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output")
+
+set(INC_PATH $ENV{DDK_PATH})
+
+if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+else ()
+    message(STATUS "env INC_PATH: ${INC_PATH}")
+endif()
+
+set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize_lazy_adam/op_api")
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
+    set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+set(AUTO_GEN_PATH "../../lazy_adam/build_out/autogen")
+# Header path
+include_directories(
+    ${INC_PATH}/runtime/include
+    ${INC_PATH}/atc/include
+    ../inc
+    ${CUST_PKG_PATH}/include
+    ${AUTO_GEN_PATH}
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${LIB_PATH1}
+    ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_op
+        main.cpp
+        common.cpp
+)
+
+target_link_libraries(execute_op
+    ascendcl
+    cust_opapi
+    acl_op_compiler
+    nnopbase
+    stdc++
+)
+
+install(TARGETS execute_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
new file mode 100644
index 00000000..e2cd6865
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/common.cpp
@@ -0,0 +1,84 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include <fcntl.h>
+#include <fstream>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "common.h"
+
+namespace AclnnLazyAdam {
+    bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize)
+    {
+        struct stat sBuf;
+        int fileStatus = stat(filePath.data(), &sBuf);
+        if (fileStatus == -1) {
+            ERROR_LOG("failed to get file %s", filePath.c_str());
+            return false;
+        }
+        if (S_ISREG(sBuf.st_mode) == 0) {
+            ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+            return false;
+        }
+
+        std::ifstream file;
+        file.open(filePath, std::ios::binary);
+        if (!file.is_open()) {
+            ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+            return false;
+        }
+
+        std::filebuf *buf = file.rdbuf();
+        size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+        if (size == 0) {
+            ERROR_LOG("file size is 0");
+            file.close();
+            return false;
+        }
+        if (size > bufferSize) {
+            ERROR_LOG("file size is larger than buffer size");
+            file.close();
+            return false;
+        }
+        buf->pubseekpos(0, std::ios::in);
+        buf->sgetn(static_cast<char *>(buffer), size);
+        fileSize = size;
+        file.close();
+        return true;
+    }
+
+    bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+    {
+        if (buffer == nullptr) {
+            ERROR_LOG("Write file failed. buffer is nullptr");
+            return false;
+        }
+        int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+        if (fd < 0) {
+            ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+            return false;
+        }
+
+        auto writeSize = write(fd, buffer, size);
+        (void) close(fd);
+        if (writeSize != size) {
+            ERROR_LOG("Write file Failed.");
+            return false;
+        }
+
+        return true;
+    }
+}
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
new file mode 100644
index 00000000..c4253996
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/main.cpp
@@ -0,0 +1,228 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <iostream>
+#include <stdexcept>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "acl/acl.h"
+#include "aclnn_lazy_adam.h"
+#include "common.h"
+#include "op_runner.h"
+
+using namespace AclnnLazyAdam;
+
+bool g_isDevice = false;
+int g_deviceId = 0;
+namespace {
+    constexpr int DIM0 = 2000000;  // inputM inputV inputVar 的行数
+    constexpr int DIM1 = 564096;  // indices长度
+    constexpr int DIM2 = 32;  // inputM inputV inputVar gradient等每行的数据个数
+    constexpr int INPUT_M_INDEX = 2;
+    constexpr int INPUT_V_INDEX = 3;
+    constexpr int INPUT_VAR_INDEX = 4;
+    constexpr int LEARNING_RATE_INDEX = 5;
+    constexpr int OUTPUT_M_INDEX = 0;
+    constexpr int OUTPUT_V_INDEX = 1;
+    constexpr int OUTPUT_VAR_INDEX = 2;
+    constexpr float LEARNING_RATE = 0.001;
+    constexpr float BETA1 = 0.9;
+    constexpr float BETA2 = 0.999;
+    constexpr float EPSILON = 1e-7;
+    const char* READ_ERROR_INFO = "read input file error, please check whether file exist and access rights is correct";
+    const char* WRITE_ERROR_INFO = "write output file error, please check access rights is correct";
+
+    OperatorDesc CreateOpDesc()
+    {
+        std::vector <int64_t> indicesShape{DIM1, 1};
+        std::vector <int64_t> gradientShape{DIM1, DIM2};
+        std::vector <int64_t> inputMShape{DIM0, DIM2};  // inputM inputV inputVar 的shape相同
+        std::vector <int64_t> learningRateShape{1};
+        aclDataType dataType = ACL_FLOAT;
+        aclDataType indexDataType = ACL_INT32;
+        aclFormat format = ACL_FORMAT_ND;
+        OperatorDesc opDesc;
+        opDesc.AddInputTensorDesc(dataType, gradientShape.size(), gradientShape.data(), format);
+        opDesc.AddInputTensorDesc(indexDataType, indicesShape.size(), indicesShape.data(), format);
+        opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputM
+        opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputV
+        opDesc.AddInputTensorDesc(dataType, inputMShape.size(), inputMShape.data(), format);  // inputVar
+        opDesc.AddInputTensorDesc(dataType, learningRateShape.size(), learningRateShape.data(),
+                                  format);  // learningRate
+        opDesc.beta1 = BETA1;
+        opDesc.beta2 = BETA2;
+        opDesc.epsilon = EPSILON;
+        return opDesc;
+    }
+
+    bool SetInputData(OpRunner& runner)
+    {
+        size_t fileSize = 0;
+        if (!ReadFile("../input/gradient.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/indices.bin", fileSize, runner.GetInputBuffer<void>(1), runner.GetInputSize(1))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/inputM.bin", fileSize, runner.GetInputBuffer<void>(INPUT_M_INDEX),
+                      runner.GetInputSize(INPUT_M_INDEX))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/inputV.bin", fileSize, runner.GetInputBuffer<void>(INPUT_V_INDEX),
+                      runner.GetInputSize(INPUT_V_INDEX))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/inputVar.bin", fileSize, runner.GetInputBuffer<void>(INPUT_VAR_INDEX),
+                      runner.GetInputSize(INPUT_VAR_INDEX))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        if (!ReadFile("../input/learningRate.bin", fileSize, runner.GetInputBuffer<void>(LEARNING_RATE_INDEX),
+                      runner.GetInputSize(LEARNING_RATE_INDEX))) {
+            throw std::runtime_error(READ_ERROR_INFO);
+        }
+        INFO_LOG("Set input success");
+        return true;
+    }
+
+    bool ProcessOutputData(OpRunner& runner)
+    {
+        // 保存输出数据 由于输出仅有hostOutputs_数据，未设置outputDesc，因此数据size从inputTensor获取
+        if (!WriteFile("../output/outputM.bin", runner.GetOutputBuffer<void>(OUTPUT_M_INDEX),
+                       runner.GetInputSize(INPUT_M_INDEX))) {
+            throw std::runtime_error(WRITE_ERROR_INFO);
+        }
+        if (!WriteFile("../output/outputV.bin", runner.GetOutputBuffer<void>(OUTPUT_V_INDEX),
+                       runner.GetInputSize(INPUT_V_INDEX))) {
+            throw std::runtime_error(WRITE_ERROR_INFO);
+        }
+        if (!WriteFile("../output/outputVar.bin", runner.GetOutputBuffer<void>(OUTPUT_VAR_INDEX),
+                       runner.GetInputSize(INPUT_VAR_INDEX))) {
+            throw std::runtime_error(WRITE_ERROR_INFO);
+        }
+        INFO_LOG("Write output success");
+        return true;
+    }
+
+    void DestroyResource()
+    {
+        bool flag = false;
+        if (aclrtResetDevice(g_deviceId) != ACL_SUCCESS) {
+            ERROR_LOG("Reset device %d failed", g_deviceId);
+            flag = true;
+        }
+        INFO_LOG("Reset Device success");
+        if (aclFinalize() != ACL_SUCCESS) {
+            ERROR_LOG("Finalize acl failed");
+            flag = true;
+        }
+        if (flag) {
+            ERROR_LOG("Destroy resource failed");
+        } else {
+            INFO_LOG("Destroy resource success");
+        }
+    }
+
+    bool InitResource()
+    {
+        std::string output = "../output";
+        if (access(output.c_str(), 0) == -1) {
+            int ret = mkdir(output.c_str(), 0700);
+            if (ret == 0) {
+                INFO_LOG("Make output directory successfully");
+            } else {
+                ERROR_LOG("Make output directory fail");
+                return false;
+            }
+        }
+
+        // acl.json is dump or profiling config file
+        if (aclInit(NULL) != ACL_SUCCESS) {
+            ERROR_LOG("acl init failed");
+            return false;
+        }
+
+        if (aclrtSetDevice(g_deviceId) != ACL_SUCCESS) {
+            ERROR_LOG("Set device failed. g_deviceId is %d", g_deviceId);
+            (void) aclFinalize();
+            return false;
+        }
+        INFO_LOG("Set device[%d] success", g_deviceId);
+
+        // runMode is ACL_HOST which represents app is running in host
+        // runMode is ACL_DEVICE which represents app is running in device
+        aclrtRunMode runMode;
+        if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
+            ERROR_LOG("Get run mode failed");
+            DestroyResource();
+            return false;
+        }
+        g_isDevice = (runMode == ACL_DEVICE);
+        INFO_LOG("Get RunMode[%d] success", runMode);
+
+        return true;
+    }
+
+    bool RunOp()
+    {
+        // create op desc
+        OperatorDesc opDesc = CreateOpDesc();
+
+        // create Runner
+        OpRunner opRunner(&opDesc);
+        if (!opRunner.Init()) {
+            ERROR_LOG("Init OpRunner failed");
+            return false;
+        }
+
+        // Load inputs
+        if (!SetInputData(opRunner)) {
+            ERROR_LOG("Set input data failed");
+            return false;
+        }
+
+        // Run op
+        if (!opRunner.RunOp()) {
+            ERROR_LOG("Run op failed");
+            return false;
+        }
+
+        // process output data
+        if (!ProcessOutputData(opRunner)) {
+            ERROR_LOG("Process output data failed");
+            return false;
+        }
+        INFO_LOG("Run op success");
+        return true;
+    }
+}
+
+int main(int argc, char** argv)
+{
+    if (!InitResource()) {
+        ERROR_LOG("Init resource failed");
+        return FAILED;
+    }
+    INFO_LOG("Init resource success");
+
+    if (!RunOp()) {
+        DestroyResource();
+        return FAILED;
+    }
+    DestroyResource();
+    return SUCCESS;
+}
-- 
Gitee


From 646f6224bd493247f4eb157f8bfbafca55659b55 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 14:30:18 +0800
Subject: [PATCH 087/302] =?UTF-8?q?=E8=9E=8D=E5=90=88=E7=AE=97=E5=AD=90acl?=
 =?UTF-8?q?nn=E9=AA=8C=E8=AF=81-part2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build/gen_mxrec_tar_pkg.sh                    |   3 +
 .../aclnn_lazy_adam_test/src/op_runner.cpp    | 423 ++++++++++++++++++
 .../src/operator_desc.cpp                     |  53 +++
 cust_op/fused_lazy_adam/lazy_adam.json        | 117 +++++
 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp | 223 +++++++++
 .../op_host/lazy_adam_tiling.h                |  41 ++
 6 files changed, 860 insertions(+)
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
 create mode 100644 cust_op/fused_lazy_adam/lazy_adam.json
 create mode 100644 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
 create mode 100644 cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h

diff --git a/build/gen_mxrec_tar_pkg.sh b/build/gen_mxrec_tar_pkg.sh
index 72ccfe49..1f9045b3 100644
--- a/build/gen_mxrec_tar_pkg.sh
+++ b/build/gen_mxrec_tar_pkg.sh
@@ -51,11 +51,14 @@ function gen_tar_file()
   chmod 550 ./build/"${pkg_dir}"/tf2_whl/mx_rec*.whl
   chmod 550 ./build/"${pkg_dir}"/cust_op/
   chmod 550 ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
+  chmod 550 ./build/"${pkg_dir}"/cust_op/fused_lazy_adam
   cd ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
   chmod 550 *.sh
   chmod 640 *.json
   chmod 550 op_host op_kernel op_host/* op_kernel/*
   cd -
+  cd ./build/"${pkg_dir}"/cust_op/fused_lazy_adam
+
   cd ./build
   tar -zvcf "${release_tar}" "${pkg_dir}" || {
       warn "compression failed, packages might be broken"
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
new file mode 100644
index 00000000..fb2ccd19
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
@@ -0,0 +1,423 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include <cassert>
+#include <limits>
+
+#include "acl/acl_op_compiler.h"
+#include "aclnn_lazy_adam.h"
+#include "common.h"
+#include "op_runner.h"
+
+extern bool g_isDevice;
+
+namespace AclnnLazyAdam {
+    using namespace std;
+    constexpr int PRINT_OUT_WIDTH = 10;
+    constexpr int PRINT_OUT_PRECISION = 4;
+    constexpr int STREAM_TIMEOUT = 5000;  // 等待Stream任务完成，超时时间单位：ms
+    constexpr int OUTPUT_SIZE = 3;
+    constexpr int INPUT_TENSOR_OFFSET = 2;
+
+    OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc) {
+        numInputs_ = opDesc->inputDesc.size();
+        numOutputs_ = opDesc->outputDesc.size();
+    }
+
+    OpRunner::~OpRunner() {
+        for (size_t i = 0; i < numInputs_; ++i) {
+            (void) aclDestroyTensor(inputTensor_[i]);
+            (void) aclDestroyDataBuffer(inputBuffers_[i]);
+            (void) aclrtFree(devInputs_[i]);
+            if (g_isDevice) {
+                (void) aclrtFree(hostInputs_[i]);
+            } else {
+                (void) aclrtFreeHost(hostInputs_[i]);
+            }
+        }
+        for (size_t i = 0; i < numOutputs_; ++i) {
+            if (g_isDevice) {
+                (void) aclrtFree(hostOutputs_[i]);
+            } else {
+                (void) aclrtFreeHost(hostOutputs_[i]);
+            }
+        }
+    }
+
+    bool OpRunner::Init() {
+        for (size_t i = 0; i < numInputs_; ++i) {
+            auto size = GetInputSize(i);
+            void *devMem = nullptr;
+            if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+            devInputs_.emplace_back(devMem);
+            inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+            void *hostInput = nullptr;
+            if (g_isDevice) {
+                if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                    return false;
+                }
+            } else {
+                if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                    return false;
+                }
+            }
+            if (hostInput == nullptr) {
+                ERROR_LOG("Malloc memory for input[%zu] failed", i);
+                return false;
+            }
+            hostInputs_.emplace_back(hostInput);
+
+            aclTensor *inputTensor = aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i),
+                                                     nullptr, 0, GetInputFormat(i), GetInputShape(i).data(),
+                                                     GetInputNumDims(i), devInputs_[i]);
+            if (inputTensor == nullptr) {
+                ERROR_LOG("Create Tensor for input[%zu] failed", i);
+                return false;
+            }
+            inputTensor_.emplace_back(inputTensor);
+        }
+
+        // 手动修改输出数据实现，仅申请host上的输出数据空间，析构出需同时适配
+        numOutputs_ = OUTPUT_SIZE;
+        for (size_t i = 0; i < numOutputs_; ++i) {
+            int inputTensorIndex = i + INPUT_TENSOR_OFFSET;
+            auto size = GetInputSize(inputTensorIndex);
+
+            void *hostOutput = nullptr;
+            if (g_isDevice) {
+                if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                    return false;
+                }
+            } else {
+                if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                    return false;
+                }
+            }
+            if (hostOutput == nullptr) {
+                ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+                return false;
+            }
+            hostOutputs_.emplace_back(hostOutput);
+        }
+        return true;
+    }
+
+    const size_t OpRunner::NumInputs() {
+        return numInputs_;
+    }
+
+    const size_t OpRunner::NumOutputs() {
+        return numOutputs_;
+    }
+
+    const size_t OpRunner::GetInputSize(size_t index) const {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return 0;
+        }
+        return aclGetTensorDescSize(opDesc_->inputDesc[index]);
+    }
+
+    const size_t OpRunner::GetInputNumDims(size_t index) const {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return 0;
+        }
+        return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
+    }
+
+    aclDataType OpRunner::GetInputDataType(size_t index) const {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return ACL_DT_UNDEFINED;
+        }
+        return aclGetTensorDescType(opDesc_->inputDesc[index]);
+    }
+
+    aclFormat OpRunner::GetInputFormat(size_t index) const {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return ACL_FORMAT_UNDEFINED;
+        }
+        return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
+    }
+
+    std::vector <int64_t> OpRunner::GetInputShape(size_t index) const {
+        std::vector <int64_t> ret;
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return ret;
+        }
+
+        auto desc = opDesc_->inputDesc[index];
+        for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+            int64_t dimSize;
+            if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+                ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+                ret.clear();
+                return ret;
+            }
+            ret.emplace_back(dimSize);
+        }
+        return ret;
+    }
+
+    size_t OpRunner::GetOutputSize(size_t index) const {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return 0;
+        }
+        return aclGetTensorDescSize(opDesc_->outputDesc[index]);
+    }
+
+    const size_t OpRunner::GetOutputNumDims(size_t index) const {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return 0;
+        }
+        return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
+    }
+
+    aclDataType OpRunner::GetOutputDataType(size_t index) const {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return ACL_DT_UNDEFINED;
+        }
+        return aclGetTensorDescType(opDesc_->outputDesc[index]);
+    }
+
+
+    aclFormat OpRunner::GetOutputFormat(size_t index) const {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return ACL_FORMAT_UNDEFINED;
+        }
+
+        return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
+    }
+
+    std::vector <int64_t> OpRunner::GetOutputShape(size_t index) const {
+        std::vector <int64_t> ret;
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return ret;
+        }
+
+        auto desc = opDesc_->outputDesc[index];
+        for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+            int64_t dimSize;
+            if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+                ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+                ret.clear();
+                return ret;
+            }
+            ret.emplace_back(dimSize);
+        }
+        return ret;
+    }
+
+    size_t OpRunner::GetInputElementCount(size_t index) const {
+        if (index >= opDesc_->inputDesc.size()) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return 0;
+        }
+
+        return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
+    }
+
+    size_t OpRunner::GetOutputElementCount(size_t index) const {
+        if (index >= opDesc_->outputDesc.size()) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return 0;
+        }
+        return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
+    }
+
+    bool OpRunner::RunOp() {
+        for (size_t i = 0; i < numInputs_; ++i) {
+            auto size = GetInputSize(i);
+            aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+            if (g_isDevice) {
+                kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+            }
+            if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+                ERROR_LOG("Copy input[%zu] failed", i);
+                return false;
+            }
+            INFO_LOG("Copy input[%zu] success", i);
+        }
+
+        aclrtStream stream = nullptr;
+        if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+            ERROR_LOG("Create stream failed");
+            return false;
+        }
+        INFO_LOG("Create stream success");
+
+        size_t workspaceSize = 0;
+        aclOpExecutor *handle = nullptr;
+        auto ret = aclnnLazyAdamGetWorkspaceSize(inputTensor_[0], inputTensor_[1],
+                                                 inputTensor_[2], inputTensor_[3], inputTensor_[4], inputTensor_[5],
+                                                 opDesc_->beta1, opDesc_->beta2, opDesc_->epsilon,
+                                                 &workspaceSize, &handle);
+        if (ret != ACL_SUCCESS) {
+            (void) aclrtDestroyStream(stream);
+            ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
+            return false;
+        }
+        INFO_LOG("Execute aclnnAddCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
+
+        void *workspace = nullptr;
+        if (workspaceSize != 0) {
+            if (aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory failed");
+            }
+        }
+
+        ret = aclnnLazyAdam(workspace, workspaceSize, handle, stream);
+        if (ret != ACL_SUCCESS) {
+            (void) aclrtDestroyStream(stream);
+            ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
+            return false;
+        }
+        INFO_LOG("Execute aclnnAddCustom success");
+
+        ret = aclrtSynchronizeStreamWithTimeout(stream, STREAM_TIMEOUT);
+        if (ret != SUCCESS) {
+            ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+            (void) aclrtDestroyStream(stream);
+            return false;
+        }
+        INFO_LOG("Synchronize stream success");
+
+        // 把输入数据：inputM inputV inputVar 作为输出数据拷贝出来
+        for (size_t i = 0; i < OUTPUT_SIZE; ++i) {
+            int inputTensorIndex = i + INPUT_TENSOR_OFFSET;  // 加上输入tensor偏移值
+            auto size = GetInputSize(inputTensorIndex);
+            aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+            if (g_isDevice) {
+                kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+            }
+            if (aclrtMemcpy(hostOutputs_[i], size, devInputs_[inputTensorIndex], size, kind) != ACL_SUCCESS) {
+                INFO_LOG("Copy output[%zu] success", i);
+                (void) aclrtDestroyStream(stream);
+                return false;
+            }
+            INFO_LOG("Copy output[%zu] success", i);
+        }
+
+        (void) aclrtDestroyStream(stream);
+        return true;
+    }
+
+
+    template<typename T>
+    void DoPrintData(const T *data, size_t count, size_t elementsPerRow) {
+        assert(elementsPerRow != 0);
+        for (size_t i = 0; i < count; ++i) {
+            std::cout << std::setw(PRINT_OUT_WIDTH) << data[i];
+            if (i % elementsPerRow == elementsPerRow - 1) {
+                std::cout << std::endl;
+            }
+        }
+    }
+
+    void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow) {
+        assert(elementsPerRow != 0);
+        for (size_t i = 0; i < count; ++i) {
+            std::cout << std::setw(PRINT_OUT_WIDTH) << std::setprecision(PRINT_OUT_PRECISION)
+                      << aclFloat16ToFloat(data[i]);
+            if (i % elementsPerRow == elementsPerRow - 1) {
+                std::cout << std::endl;
+            }
+        }
+    }
+
+    void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow) {
+        if (data == nullptr) {
+            ERROR_LOG("Print data failed. data is nullptr");
+            return;
+        }
+
+        switch (dataType) {
+            case ACL_BOOL:
+                DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+                break;
+            case ACL_INT8:
+                DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+                break;
+            case ACL_UINT8:
+                DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+                break;
+            case ACL_INT16:
+                DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+                break;
+            case ACL_UINT16:
+                DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+                break;
+            case ACL_INT32:
+                DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+                break;
+            case ACL_UINT32:
+                DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+                break;
+            case ACL_INT64:
+                DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+                break;
+            case ACL_UINT64:
+                DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+                break;
+            case ACL_FLOAT16:
+                DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+                break;
+            case ACL_FLOAT:
+                DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+                break;
+            case ACL_DOUBLE:
+                DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+                break;
+            default:
+                ERROR_LOG("Unsupported type: %d", dataType);
+        }
+    }
+
+    void OpRunner::PrintInput(size_t index, size_t numElementsPerRow) {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
+            return;
+        }
+
+        auto desc = opDesc_->inputDesc[index];
+        PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+    }
+
+    void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow) {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return;
+        }
+
+        auto desc = opDesc_->outputDesc[index];
+        PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+    }
+}
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
new file mode 100644
index 00000000..826de46a
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
@@ -0,0 +1,53 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include "common.h"
+#include "operator_desc.h"
+namespace AclnnLazyAdam {
+    using namespace std;
+
+    OperatorDesc::OperatorDesc() {}
+
+    OperatorDesc::~OperatorDesc() {
+        for (auto *desc: inputDesc) {
+            aclDestroyTensorDesc(desc);
+        }
+        for (auto *desc: outputDesc) {
+            aclDestroyTensorDesc(desc);
+        }
+    }
+
+    OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
+                                                   aclFormat format) {
+        aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+        if (desc == nullptr) {
+            ERROR_LOG("create tensor failed");
+            return *this;
+        }
+        inputDesc.emplace_back(desc);
+        return *this;
+    }
+
+    OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims,
+                                                    const int64_t *dims, aclFormat format) {
+        aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+        if (desc == nullptr) {
+            ERROR_LOG("create tensor failed");
+            return *this;
+        }
+        outputDesc.emplace_back(desc);
+        return *this;
+    }
+}
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/lazy_adam.json b/cust_op/fused_lazy_adam/lazy_adam.json
new file mode 100644
index 00000000..e6fc2c00
--- /dev/null
+++ b/cust_op/fused_lazy_adam/lazy_adam.json
@@ -0,0 +1,117 @@
+[
+    {
+        "op": "LazyAdam",
+        "language": "cpp",
+        "input_desc": [
+            {
+                "name": "gradient",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "indices",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "int32"
+                ]
+            },
+            {
+                "name": "inputM",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "inputV",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "inputVar",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "lr",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            }
+        ],
+        "output_desc": [
+            {
+                "name": "inputM",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "inputV",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "inputVar",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            }
+        ],
+        "attr": [                                                                   
+            {
+                "name": "beta1",
+                "param_type": "required",
+                "type": "float"
+            },
+            {
+                "name": "beta2",
+                "param_type": "required",
+                "type": "float"
+            },
+            {
+                "name": "epsilon",
+                "param_type": "required",
+                "type": "float"
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
new file mode 100644
index 00000000..1a147912
--- /dev/null
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
@@ -0,0 +1,223 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include "lazy_adam_tiling.h"
+#include "register/op_def_registry.h"
+#include "tiling/platform/platform_ascendc.h"
+
+namespace optiling {
+    constexpr int BLOCK_SIZE = 32;
+    constexpr int RESERVE_UB_SIZE = 20 * 1024;
+    constexpr int DATA_NUM_PER_COMPUTE = 8;
+    constexpr int32_t USR_SIZE = 256;
+    constexpr int32_t SYS_WORKSPACE_SIZE = 16 * 1024 * 1024;
+
+    template<typename T>
+    static ge::graphStatus CheckNullPointer(T* pointer, const char* errorMessage)
+    {
+        if (pointer == nullptr) {
+            printf("%s nullptr\n", errorMessage);
+            return ge::GRAPH_FAILED;
+        }
+
+        return ge::GRAPH_SUCCESS;
+    }
+
+    static ge::graphStatus LazyAdamTilingFunc(gert::TilingContext* context)
+    {
+        size_t* currentWorkspace = context->GetWorkspaceSizes(1);
+        if (CheckNullPointer(currentWorkspace, "currentWorkspace") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+        currentWorkspace[0] = SYS_WORKSPACE_SIZE + USR_SIZE;
+
+        LazyAdamTilingData tiling;
+        const gert::StorageShape* indicesShape = context->GetInputShape(1);
+        const gert::StorageShape* inputMShape = context->GetInputShape(2);
+        uint64_t dim0 = inputMShape->GetStorageShape().GetDim(0);
+        uint64_t dim1 = indicesShape->GetStorageShape().GetDim(0);
+        uint64_t dim2 = inputMShape->GetStorageShape().GetDim(1);
+        ge::DataType inputMDtype = context->GetInputDesc(2)->GetDataType();
+        int inputMDtypeSize = ge::GetSizeByDataType(inputMDtype);
+        ge::DataType indicesDtype = context->GetInputDesc(1)->GetDataType();
+        int indicesDtypeSize = ge::GetSizeByDataType(indicesDtype);
+
+        tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+        context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+        auto attrs = context->GetAttrs();
+
+        float beta1 = *attrs->GetAttrPointer<float>(0);
+        float beta2 = *attrs->GetAttrPointer<float>(1);
+        float epsilon = *attrs->GetAttrPointer<float>(2);
+
+        auto platformInfo = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+        uint32_t coreNum = platformInfo.GetCoreNum();
+        uint64_t ub;
+        platformInfo.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub);
+        ub = ub - RESERVE_UB_SIZE;
+        // ub大小除以每行的数据大小，得到每次处理的行数
+        uint64_t row = ub / (dim2 * inputMDtypeSize * DATA_NUM_PER_COMPUTE + 1 * indicesDtypeSize);
+        if (row > dim1) {
+            row = dim1;
+        }
+
+        // 保证申请的内存是32的倍数并且向上取整 计算方式：(num+31)/32*32
+        uint64_t indicesAllocSize = (row * indicesDtypeSize + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE;
+        uint64_t otherAllocSize = (row * inputMDtypeSize * dim2 + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE;
+        // 前 CORE_NUM - 1 个核分配的任务量
+        uint64_t batch = dim1 / coreNum;
+        // 实际使用的核数
+        context->SetBlockDim(coreNum);
+        uint64_t loopCount = batch / row;  // CORE_NUM - 1 个核的任务量，除以UB每一次能处理的数据，得到处理次数
+        uint64_t rowLeft = batch - row * loopCount;  // UB处理 loopCount 那么多次后，分给当前core剩下的数据量
+
+        // 最后一个核分配的任务量
+        uint64_t batchTail = dim1 - batch * (coreNum - 1);  // phy 该写法适配了dim1刚好整除coreNum的情况
+        uint64_t loopCountTail = batchTail / row;
+        uint64_t rowLeftTail = batchTail - row * loopCountTail;
+
+        tiling.set_beta1(beta1);
+        tiling.set_beta2(beta2);
+        tiling.set_epsilon(epsilon);
+        tiling.set_dim0(dim0);
+        tiling.set_dim1(dim1);
+        tiling.set_dim2(dim2);
+        tiling.set_row(row);  // 每个ai core一次能分配的数据行数
+        tiling.set_indicesAllocSize(indicesAllocSize);  // indices大小，用于申请空间
+        tiling.set_otherAllocSize(otherAllocSize);  // 入参中非indices要申请的空间大小
+        tiling.set_batch(batch);  // 前CORE_NUM - 1个核分配的任务量
+        tiling.set_loopCount(loopCount);  // 前CORE_NUM - 1 个核内循环处理次数
+        tiling.set_rowLeft(rowLeft);  // 前CORE_NUM - 1 个核, 核内处理 loopCount 次后，分给当前core剩下的数据量
+        tiling.set_loopCountTail(loopCountTail);  // 最后一个核，核内循环次数
+        tiling.set_rowLeftTail(rowLeftTail);  // 最后一个核，核内循环loopCountTail次后，剩余数据量
+        tiling.set_coreNum(coreNum);
+
+        tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+        context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+
+        return ge::GRAPH_SUCCESS;
+    }
+}
+
+namespace ge {
+    static ge::graphStatus LazyAdamInferShape(gert::InferShapeContext* context)
+    {
+        if (optiling::CheckNullPointer(context, "context") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+
+        gert::Shape* outputMShape = context->GetOutputShape(0);
+        if (optiling::CheckNullPointer(outputMShape, "outputMShape") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+        const gert::Shape* inputMShape = context->GetInputShape(2);
+        if (optiling::CheckNullPointer(inputMShape, "inputMShape") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+        *outputMShape = *inputMShape;
+
+        gert::Shape* outputVShape = context->GetOutputShape(1);
+        if (optiling::CheckNullPointer(outputVShape, "outputVShape") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+        const gert::Shape* inputVShape = context->GetInputShape(3);
+        if (optiling::CheckNullPointer(inputVShape, "inputVShape") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+        *outputVShape = *inputVShape;
+
+        gert::Shape* outputVarShape = context->GetOutputShape(2);
+        if (optiling::CheckNullPointer(outputVarShape, "outputVarShape") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+        const gert::Shape* inputVarShape = context->GetInputShape(4);
+        if (optiling::CheckNullPointer(inputVarShape, "inputVarShape") != ge::GRAPH_SUCCESS) {
+            return ge::GRAPH_FAILED;
+        }
+        *outputVarShape = *inputVarShape;
+
+        return GRAPH_SUCCESS;
+    }
+
+    static ge::graphStatus LazyAdamInferDataType(gert::InferDataTypeContext* context)
+    {
+        return GRAPH_SUCCESS;
+    }
+}
+
+
+namespace ops {
+    class LazyAdam : public OpDef {
+    public:
+        explicit LazyAdam(const char* name) : OpDef(name)
+        {
+            this->Input("gradient")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_FLOAT})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Input("indices")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_INT32})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Input("inputM")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_FLOAT})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Input("inputV")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_FLOAT})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Input("inputVar")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_FLOAT})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Input("lr")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_FLOAT})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Output("inputM")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_FLOAT})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Output("inputV")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_FLOAT})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Output("inputVar")
+                    .ParamType(REQUIRED)
+                    .DataType({ge::DT_FLOAT})
+                    .Format({ge::FORMAT_ND})
+                    .UnknownShapeFormat({ge::FORMAT_ND});
+            this->Attr("beta1").Float();
+            this->Attr("beta2").Float();
+            this->Attr("epsilon").Float();
+            this->SetInferShape(ge::LazyAdamInferShape)
+                    .SetInferDataType(ge::LazyAdamInferDataType);
+
+            this->AICore().SetTiling(optiling::LazyAdamTilingFunc);
+            this->AICore().AddConfig("ascend910b");
+        }
+    };
+
+    OP_ADD(LazyAdam);
+}
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h b/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h
new file mode 100644
index 00000000..10b11a9a
--- /dev/null
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h
@@ -0,0 +1,41 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef LAZY_ADAM_TILING_H
+#define LAZY_ADAM_TILING_H
+#include "register/tilingdata_base.h"
+
+namespace optiling {
+BEGIN_TILING_DATA_DEF(LazyAdamTilingData)
+    TILING_DATA_FIELD_DEF(float, beta1);
+    TILING_DATA_FIELD_DEF(float, beta2);
+    TILING_DATA_FIELD_DEF(float, epsilon);
+    TILING_DATA_FIELD_DEF(int32_t, dim0);
+    TILING_DATA_FIELD_DEF(int32_t, dim1);
+    TILING_DATA_FIELD_DEF(int32_t, dim2);
+    TILING_DATA_FIELD_DEF(int32_t, row);
+    TILING_DATA_FIELD_DEF(int32_t, indicesAllocSize);
+    TILING_DATA_FIELD_DEF(int32_t, otherAllocSize);
+    TILING_DATA_FIELD_DEF(int32_t, batch);
+    TILING_DATA_FIELD_DEF(int32_t, loopCount);
+    TILING_DATA_FIELD_DEF(int32_t, rowLeft);
+    TILING_DATA_FIELD_DEF(int32_t, loopCountTail);
+    TILING_DATA_FIELD_DEF(int32_t, rowLeftTail);
+    TILING_DATA_FIELD_DEF(int32_t, coreNum);
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(LazyAdam, LazyAdamTilingData)
+}
+#endif // LAZY_ADAM_TILING_H
\ No newline at end of file
-- 
Gitee


From 3f57fbbc9877bbfbe22e55d0b53314dab87a2f38 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 14:41:04 +0800
Subject: [PATCH 088/302] =?UTF-8?q?=E5=87=BA=E5=8C=85=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E8=BF=98=E5=8E=9F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build/gen_mxrec_tar_pkg.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/build/gen_mxrec_tar_pkg.sh b/build/gen_mxrec_tar_pkg.sh
index 1f9045b3..72ccfe49 100644
--- a/build/gen_mxrec_tar_pkg.sh
+++ b/build/gen_mxrec_tar_pkg.sh
@@ -51,14 +51,11 @@ function gen_tar_file()
   chmod 550 ./build/"${pkg_dir}"/tf2_whl/mx_rec*.whl
   chmod 550 ./build/"${pkg_dir}"/cust_op/
   chmod 550 ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
-  chmod 550 ./build/"${pkg_dir}"/cust_op/fused_lazy_adam
   cd ./build/"${pkg_dir}"/cust_op/cust_op_by_addr
   chmod 550 *.sh
   chmod 640 *.json
   chmod 550 op_host op_kernel op_host/* op_kernel/*
   cd -
-  cd ./build/"${pkg_dir}"/cust_op/fused_lazy_adam
-
   cd ./build
   tar -zvcf "${release_tar}" "${pkg_dir}" || {
       warn "compression failed, packages might be broken"
-- 
Gitee


From d5cdcf92b4039531285e5575a82ed32b3448aeb9 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 14:54:26 +0800
Subject: [PATCH 089/302] =?UTF-8?q?=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/src/op_runner.cpp    | 79 +++++++++++++------
 .../src/operator_desc.cpp                     | 25 +++---
 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp |  3 +
 3 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
index fb2ccd19..c1a732e1 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
         limitations under the License.
 ==============================================================================*/
 
-#include <cassert>
 #include <limits>
+#include <stdexcept>
 
 #include "acl/acl_op_compiler.h"
 #include "aclnn_lazy_adam.h"
@@ -31,12 +31,14 @@ namespace AclnnLazyAdam {
     constexpr int OUTPUT_SIZE = 3;
     constexpr int INPUT_TENSOR_OFFSET = 2;
 
-    OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc) {
+    OpRunner::OpRunner(OperatorDesc* opDesc) : opDesc_(opDesc)
+    {
         numInputs_ = opDesc->inputDesc.size();
         numOutputs_ = opDesc->outputDesc.size();
     }
 
-    OpRunner::~OpRunner() {
+    OpRunner::~OpRunner()
+    {
         for (size_t i = 0; i < numInputs_; ++i) {
             (void) aclDestroyTensor(inputTensor_[i]);
             (void) aclDestroyDataBuffer(inputBuffers_[i]);
@@ -56,7 +58,8 @@ namespace AclnnLazyAdam {
         }
     }
 
-    bool OpRunner::Init() {
+    bool OpRunner::Init()
+    {
         for (size_t i = 0; i < numInputs_; ++i) {
             auto size = GetInputSize(i);
             void *devMem = nullptr;
@@ -122,15 +125,18 @@ namespace AclnnLazyAdam {
         return true;
     }
 
-    const size_t OpRunner::NumInputs() {
+    const size_t OpRunner::NumInputs()
+    {
         return numInputs_;
     }
 
-    const size_t OpRunner::NumOutputs() {
+    const size_t OpRunner::NumOutputs()
+    {
         return numOutputs_;
     }
 
-    const size_t OpRunner::GetInputSize(size_t index) const {
+    const size_t OpRunner::GetInputSize(size_t index) const
+    {
         if (index >= numInputs_) {
             ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
             return 0;
@@ -138,7 +144,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescSize(opDesc_->inputDesc[index]);
     }
 
-    const size_t OpRunner::GetInputNumDims(size_t index) const {
+    const size_t OpRunner::GetInputNumDims(size_t index) const
+    {
         if (index >= numInputs_) {
             ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
             return 0;
@@ -146,7 +153,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
     }
 
-    aclDataType OpRunner::GetInputDataType(size_t index) const {
+    aclDataType OpRunner::GetInputDataType(size_t index) const
+    {
         if (index >= numInputs_) {
             ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
             return ACL_DT_UNDEFINED;
@@ -154,7 +162,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescType(opDesc_->inputDesc[index]);
     }
 
-    aclFormat OpRunner::GetInputFormat(size_t index) const {
+    aclFormat OpRunner::GetInputFormat(size_t index) const
+    {
         if (index >= numInputs_) {
             ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
             return ACL_FORMAT_UNDEFINED;
@@ -162,7 +171,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
     }
 
-    std::vector <int64_t> OpRunner::GetInputShape(size_t index) const {
+    std::vector <int64_t> OpRunner::GetInputShape(size_t index) const
+    {
         std::vector <int64_t> ret;
         if (index >= numInputs_) {
             ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
@@ -182,7 +192,8 @@ namespace AclnnLazyAdam {
         return ret;
     }
 
-    size_t OpRunner::GetOutputSize(size_t index) const {
+    size_t OpRunner::GetOutputSize(size_t index) const
+    {
         if (index >= numOutputs_) {
             ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
             return 0;
@@ -190,7 +201,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescSize(opDesc_->outputDesc[index]);
     }
 
-    const size_t OpRunner::GetOutputNumDims(size_t index) const {
+    const size_t OpRunner::GetOutputNumDims(size_t index) const
+    {
         if (index >= numOutputs_) {
             ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
             return 0;
@@ -198,7 +210,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
     }
 
-    aclDataType OpRunner::GetOutputDataType(size_t index) const {
+    aclDataType OpRunner::GetOutputDataType(size_t index) const
+    {
         if (index >= numOutputs_) {
             ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
             return ACL_DT_UNDEFINED;
@@ -207,7 +220,8 @@ namespace AclnnLazyAdam {
     }
 
 
-    aclFormat OpRunner::GetOutputFormat(size_t index) const {
+    aclFormat OpRunner::GetOutputFormat(size_t index) const
+    {
         if (index >= numOutputs_) {
             ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
             return ACL_FORMAT_UNDEFINED;
@@ -216,7 +230,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
     }
 
-    std::vector <int64_t> OpRunner::GetOutputShape(size_t index) const {
+    std::vector <int64_t> OpRunner::GetOutputShape(size_t index) const
+    {
         std::vector <int64_t> ret;
         if (index >= numOutputs_) {
             ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
@@ -236,7 +251,8 @@ namespace AclnnLazyAdam {
         return ret;
     }
 
-    size_t OpRunner::GetInputElementCount(size_t index) const {
+    size_t OpRunner::GetInputElementCount(size_t index) const
+    {
         if (index >= opDesc_->inputDesc.size()) {
             ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
             return 0;
@@ -245,7 +261,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
     }
 
-    size_t OpRunner::GetOutputElementCount(size_t index) const {
+    size_t OpRunner::GetOutputElementCount(size_t index) const
+    {
         if (index >= opDesc_->outputDesc.size()) {
             ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
             return 0;
@@ -253,7 +270,8 @@ namespace AclnnLazyAdam {
         return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
     }
 
-    bool OpRunner::RunOp() {
+    bool OpRunner::RunOp()
+    {
         for (size_t i = 0; i < numInputs_; ++i) {
             auto size = GetInputSize(i);
             aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
@@ -332,8 +350,11 @@ namespace AclnnLazyAdam {
 
 
     template<typename T>
-    void DoPrintData(const T *data, size_t count, size_t elementsPerRow) {
-        assert(elementsPerRow != 0);
+    void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+    {
+        if (elementsPerRow == 0) {
+            throw std::runtime_error("value must not be zero.");
+        }
         for (size_t i = 0; i < count; ++i) {
             std::cout << std::setw(PRINT_OUT_WIDTH) << data[i];
             if (i % elementsPerRow == elementsPerRow - 1) {
@@ -342,8 +363,11 @@ namespace AclnnLazyAdam {
         }
     }
 
-    void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow) {
-        assert(elementsPerRow != 0);
+    void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+    {
+        if (elementsPerRow == 0) {
+            throw std::runtime_error("value must not be zero.");
+        }
         for (size_t i = 0; i < count; ++i) {
             std::cout << std::setw(PRINT_OUT_WIDTH) << std::setprecision(PRINT_OUT_PRECISION)
                       << aclFloat16ToFloat(data[i]);
@@ -353,7 +377,8 @@ namespace AclnnLazyAdam {
         }
     }
 
-    void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow) {
+    void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
+    {
         if (data == nullptr) {
             ERROR_LOG("Print data failed. data is nullptr");
             return;
@@ -401,7 +426,8 @@ namespace AclnnLazyAdam {
         }
     }
 
-    void OpRunner::PrintInput(size_t index, size_t numElementsPerRow) {
+    void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
+    {
         if (index >= numInputs_) {
             ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
             return;
@@ -411,7 +437,8 @@ namespace AclnnLazyAdam {
         PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
     }
 
-    void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow) {
+    void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
+    {
         if (index >= numOutputs_) {
             ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
             return;
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
index 826de46a..dad4ab0f 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
@@ -15,23 +15,27 @@ See the License for the specific language governing permissions and
 
 #include "common.h"
 #include "operator_desc.h"
+
 namespace AclnnLazyAdam {
     using namespace std;
 
-    OperatorDesc::OperatorDesc() {}
+    OperatorDesc::OperatorDesc()
+    {}
 
-    OperatorDesc::~OperatorDesc() {
-        for (auto *desc: inputDesc) {
+    OperatorDesc::~OperatorDesc()
+    {
+        for (auto* desc: inputDesc) {
             aclDestroyTensorDesc(desc);
         }
-        for (auto *desc: outputDesc) {
+        for (auto* desc: outputDesc) {
             aclDestroyTensorDesc(desc);
         }
     }
 
-    OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
-                                                   aclFormat format) {
-        aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    OperatorDesc& OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t* dims,
+                                                   aclFormat format)
+    {
+        aclTensorDesc* desc = aclCreateTensorDesc(dataType, numDims, dims, format);
         if (desc == nullptr) {
             ERROR_LOG("create tensor failed");
             return *this;
@@ -40,9 +44,10 @@ namespace AclnnLazyAdam {
         return *this;
     }
 
-    OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims,
-                                                    const int64_t *dims, aclFormat format) {
-        aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    OperatorDesc& OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims,
+                                                    const int64_t* dims, aclFormat format)
+    {
+        aclTensorDesc* desc = aclCreateTensorDesc(dataType, numDims, dims, format);
         if (desc == nullptr) {
             ERROR_LOG("create tensor failed");
             return *this;
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
index 1a147912..b93fc0d2 100644
--- a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
@@ -64,6 +64,9 @@ namespace optiling {
 
         auto platformInfo = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
         uint32_t coreNum = platformInfo.GetCoreNum();
+        if (coreNum == 0) {
+            return ge::GRAPH_FAILED;
+        }
         uint64_t ub;
         platformInfo.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub);
         ub = ub - RESERVE_UB_SIZE;
-- 
Gitee


From 0c781001539ec2c9253c35f57e7d9aa2dc5d8f49 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 15:30:51 +0800
Subject: [PATCH 090/302] =?UTF-8?q?=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B92?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/inc/op_runner.h      |  5 ++
 .../aclnn_lazy_adam_test/src/op_runner.cpp    | 55 ++++++++++---------
 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp |  4 +-
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
index 6f91f905..77f0aee5 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
@@ -43,6 +43,11 @@ namespace AclnnLazyAdam {
         */
         bool Init();
 
+        /**
+        * @brief Init op runner output info
+        */
+        bool InitOutputInfo();
+
         /**
          * @brief Get number of inputs
          * @return number of inputs
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
index c1a732e1..0f126212 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
@@ -58,6 +58,35 @@ namespace AclnnLazyAdam {
         }
     }
 
+    bool OpRunner::InitOutputInfo()
+    {
+        // 手动修改输出数据实现，仅申请host上的输出数据空间，析构出需同时适配
+        numOutputs_ = OUTPUT_SIZE;
+        for (size_t i = 0; i < numOutputs_; ++i) {
+            int inputTensorIndex = i + INPUT_TENSOR_OFFSET;
+            auto size = GetInputSize(inputTensorIndex);
+
+            void *hostOutput = nullptr;
+            if (g_isDevice) {
+                if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                    return false;
+                }
+            } else {
+                if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                    return false;
+                }
+            }
+            if (hostOutput == nullptr) {
+                ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+                return false;
+            }
+            hostOutputs_.emplace_back(hostOutput);
+        }
+        return true;
+    }
+
     bool OpRunner::Init()
     {
         for (size_t i = 0; i < numInputs_; ++i) {
@@ -98,31 +127,7 @@ namespace AclnnLazyAdam {
             inputTensor_.emplace_back(inputTensor);
         }
 
-        // 手动修改输出数据实现，仅申请host上的输出数据空间，析构出需同时适配
-        numOutputs_ = OUTPUT_SIZE;
-        for (size_t i = 0; i < numOutputs_; ++i) {
-            int inputTensorIndex = i + INPUT_TENSOR_OFFSET;
-            auto size = GetInputSize(inputTensorIndex);
-
-            void *hostOutput = nullptr;
-            if (g_isDevice) {
-                if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                    return false;
-                }
-            } else {
-                if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
-                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                    return false;
-                }
-            }
-            if (hostOutput == nullptr) {
-                ERROR_LOG("Malloc host memory for output[%zu] failed", i);
-                return false;
-            }
-            hostOutputs_.emplace_back(hostOutput);
-        }
-        return true;
+        return InitOutputInfo();
     }
 
     const size_t OpRunner::NumInputs()
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
index b93fc0d2..34fc9c7e 100644
--- a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
@@ -214,9 +214,7 @@ namespace ops {
             this->Attr("beta1").Float();
             this->Attr("beta2").Float();
             this->Attr("epsilon").Float();
-            this->SetInferShape(ge::LazyAdamInferShape)
-                    .SetInferDataType(ge::LazyAdamInferDataType);
-
+            this->SetInferShape(ge::LazyAdamInferShape).SetInferDataType(ge::LazyAdamInferDataType);
             this->AICore().SetTiling(optiling::LazyAdamTilingFunc);
             this->AICore().AddConfig("ascend910b");
         }
-- 
Gitee


From 2881bae3a7b1275596f5ec3a6a94a9d526d8278f Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 16:00:19 +0800
Subject: [PATCH 091/302] =?UTF-8?q?clang-format=E6=96=87=E4=BB=B6=E6=A0=BC?=
 =?UTF-8?q?=E5=BC=8F=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/src/op_runner.cpp    | 721 +++++++++---------
 .../src/operator_desc.cpp                     |  67 +-
 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp | 381 +++++----
 .../op_host/lazy_adam_tiling.h                |  34 +-
 4 files changed, 599 insertions(+), 604 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
index 0f126212..3d737564 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
@@ -13,443 +13,440 @@ See the License for the specific language governing permissions and
         limitations under the License.
 ==============================================================================*/
 
+#include "op_runner.h"
+
 #include <limits>
 #include <stdexcept>
 
 #include "acl/acl_op_compiler.h"
 #include "aclnn_lazy_adam.h"
 #include "common.h"
-#include "op_runner.h"
 
 extern bool g_isDevice;
 
 namespace AclnnLazyAdam {
-    using namespace std;
-    constexpr int PRINT_OUT_WIDTH = 10;
-    constexpr int PRINT_OUT_PRECISION = 4;
-    constexpr int STREAM_TIMEOUT = 5000;  // 等待Stream任务完成，超时时间单位：ms
-    constexpr int OUTPUT_SIZE = 3;
-    constexpr int INPUT_TENSOR_OFFSET = 2;
-
-    OpRunner::OpRunner(OperatorDesc* opDesc) : opDesc_(opDesc)
-    {
-        numInputs_ = opDesc->inputDesc.size();
-        numOutputs_ = opDesc->outputDesc.size();
-    }
-
-    OpRunner::~OpRunner()
-    {
-        for (size_t i = 0; i < numInputs_; ++i) {
-            (void) aclDestroyTensor(inputTensor_[i]);
-            (void) aclDestroyDataBuffer(inputBuffers_[i]);
-            (void) aclrtFree(devInputs_[i]);
-            if (g_isDevice) {
-                (void) aclrtFree(hostInputs_[i]);
-            } else {
-                (void) aclrtFreeHost(hostInputs_[i]);
-            }
+using namespace std;
+constexpr int PRINT_OUT_WIDTH = 10;
+constexpr int PRINT_OUT_PRECISION = 4;
+constexpr int STREAM_TIMEOUT = 5000;  // 等待Stream任务完成，超时时间单位：ms
+constexpr int OUTPUT_SIZE = 3;
+constexpr int INPUT_TENSOR_OFFSET = 2;
+
+OpRunner::OpRunner(OperatorDesc* opDesc) : opDesc_(opDesc)
+{
+    numInputs_ = opDesc->inputDesc.size();
+    numOutputs_ = opDesc->outputDesc.size();
+}
+
+OpRunner::~OpRunner()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        (void)aclDestroyTensor(inputTensor_[i]);
+        (void)aclDestroyDataBuffer(inputBuffers_[i]);
+        (void)aclrtFree(devInputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostInputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostInputs_[i]);
         }
-        for (size_t i = 0; i < numOutputs_; ++i) {
-            if (g_isDevice) {
-                (void) aclrtFree(hostOutputs_[i]);
-            } else {
-                (void) aclrtFreeHost(hostOutputs_[i]);
-            }
+    }
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        if (g_isDevice) {
+            (void)aclrtFree(hostOutputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostOutputs_[i]);
         }
     }
-
-    bool OpRunner::InitOutputInfo()
-    {
-        // 手动修改输出数据实现，仅申请host上的输出数据空间，析构出需同时适配
-        numOutputs_ = OUTPUT_SIZE;
-        for (size_t i = 0; i < numOutputs_; ++i) {
-            int inputTensorIndex = i + INPUT_TENSOR_OFFSET;
-            auto size = GetInputSize(inputTensorIndex);
-
-            void *hostOutput = nullptr;
-            if (g_isDevice) {
-                if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                    return false;
-                }
-            } else {
-                if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
-                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                    return false;
-                }
+}
+
+bool OpRunner::InitOutputInfo()
+{
+    // 手动修改输出数据实现，仅申请host上的输出数据空间，析构出需同时适配
+    numOutputs_ = OUTPUT_SIZE;
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        int inputTensorIndex = i + INPUT_TENSOR_OFFSET;
+        auto size = GetInputSize(inputTensorIndex);
+
+        void* hostOutput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
             }
-            if (hostOutput == nullptr) {
-                ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+        } else {
+            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
                 return false;
             }
-            hostOutputs_.emplace_back(hostOutput);
         }
-        return true;
+        if (hostOutput == nullptr) {
+            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+            return false;
+        }
+        hostOutputs_.emplace_back(hostOutput);
     }
+    return true;
+}
+
+bool OpRunner::Init()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        void* devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+            return false;
+        }
+        devInputs_.emplace_back(devMem);
+        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
 
-    bool OpRunner::Init()
-    {
-        for (size_t i = 0; i < numInputs_; ++i) {
-            auto size = GetInputSize(i);
-            void *devMem = nullptr;
-            if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+        void* hostInput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
                 ERROR_LOG("Malloc device memory for input[%zu] failed", i);
                 return false;
             }
-            devInputs_.emplace_back(devMem);
-            inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
-
-            void *hostInput = nullptr;
-            if (g_isDevice) {
-                if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                    ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-                    return false;
-                }
-            } else {
-                if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
-                    ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-                    return false;
-                }
-            }
-            if (hostInput == nullptr) {
-                ERROR_LOG("Malloc memory for input[%zu] failed", i);
-                return false;
-            }
-            hostInputs_.emplace_back(hostInput);
-
-            aclTensor *inputTensor = aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i),
-                                                     nullptr, 0, GetInputFormat(i), GetInputShape(i).data(),
-                                                     GetInputNumDims(i), devInputs_[i]);
-            if (inputTensor == nullptr) {
-                ERROR_LOG("Create Tensor for input[%zu] failed", i);
+        } else {
+            if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
                 return false;
             }
-            inputTensor_.emplace_back(inputTensor);
         }
+        if (hostInput == nullptr) {
+            ERROR_LOG("Malloc memory for input[%zu] failed", i);
+            return false;
+        }
+        hostInputs_.emplace_back(hostInput);
 
-        return InitOutputInfo();
+        aclTensor* inputTensor =
+            aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0,
+                            GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]);
+        if (inputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for input[%zu] failed", i);
+            return false;
+        }
+        inputTensor_.emplace_back(inputTensor);
     }
 
-    const size_t OpRunner::NumInputs()
-    {
-        return numInputs_;
-    }
+    return InitOutputInfo();
+}
 
-    const size_t OpRunner::NumOutputs()
-    {
-        return numOutputs_;
-    }
+const size_t OpRunner::NumInputs()
+{
+    return numInputs_;
+}
 
-    const size_t OpRunner::GetInputSize(size_t index) const
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return 0;
-        }
-        return aclGetTensorDescSize(opDesc_->inputDesc[index]);
-    }
+const size_t OpRunner::NumOutputs()
+{
+    return numOutputs_;
+}
 
-    const size_t OpRunner::GetInputNumDims(size_t index) const
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return 0;
-        }
-        return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
+const size_t OpRunner::GetInputSize(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
     }
-
-    aclDataType OpRunner::GetInputDataType(size_t index) const
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return ACL_DT_UNDEFINED;
-        }
-        return aclGetTensorDescType(opDesc_->inputDesc[index]);
+    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
+}
+
+const size_t OpRunner::GetInputNumDims(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
     }
-
-    aclFormat OpRunner::GetInputFormat(size_t index) const
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return ACL_FORMAT_UNDEFINED;
-        }
-        return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
+    return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
+}
+
+aclDataType OpRunner::GetInputDataType(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_DT_UNDEFINED;
     }
-
-    std::vector <int64_t> OpRunner::GetInputShape(size_t index) const
-    {
-        std::vector <int64_t> ret;
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return ret;
-        }
-
-        auto desc = opDesc_->inputDesc[index];
-        for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-            int64_t dimSize;
-            if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-                ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-                ret.clear();
-                return ret;
-            }
-            ret.emplace_back(dimSize);
-        }
+    return aclGetTensorDescType(opDesc_->inputDesc[index]);
+}
+
+aclFormat OpRunner::GetInputFormat(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+    return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
         return ret;
     }
 
-    size_t OpRunner::GetOutputSize(size_t index) const
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return 0;
+    auto desc = opDesc_->inputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
         }
-        return aclGetTensorDescSize(opDesc_->outputDesc[index]);
+        ret.emplace_back(dimSize);
     }
-
-    const size_t OpRunner::GetOutputNumDims(size_t index) const
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return 0;
-        }
-        return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
+    return ret;
+}
+
+size_t OpRunner::GetOutputSize(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
     }
-
-    aclDataType OpRunner::GetOutputDataType(size_t index) const
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return ACL_DT_UNDEFINED;
-        }
-        return aclGetTensorDescType(opDesc_->outputDesc[index]);
+    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
+}
+
+const size_t OpRunner::GetOutputNumDims(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+    return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
+}
+
+aclDataType OpRunner::GetOutputDataType(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_DT_UNDEFINED;
+    }
+    return aclGetTensorDescType(opDesc_->outputDesc[index]);
+}
+
+aclFormat OpRunner::GetOutputFormat(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_FORMAT_UNDEFINED;
     }
 
+    return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
+}
 
-    aclFormat OpRunner::GetOutputFormat(size_t index) const
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return ACL_FORMAT_UNDEFINED;
-        }
-
-        return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
+std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ret;
     }
 
-    std::vector <int64_t> OpRunner::GetOutputShape(size_t index) const
-    {
-        std::vector <int64_t> ret;
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+    auto desc = opDesc_->outputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
             return ret;
         }
-
-        auto desc = opDesc_->outputDesc[index];
-        for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-            int64_t dimSize;
-            if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-                ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-                ret.clear();
-                return ret;
-            }
-            ret.emplace_back(dimSize);
-        }
-        return ret;
+        ret.emplace_back(dimSize);
+    }
+    return ret;
+}
+
+size_t OpRunner::GetInputElementCount(size_t index) const
+{
+    if (index >= opDesc_->inputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
     }
 
-    size_t OpRunner::GetInputElementCount(size_t index) const
-    {
-        if (index >= opDesc_->inputDesc.size()) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return 0;
-        }
+    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
+}
 
-        return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
+size_t OpRunner::GetOutputElementCount(size_t index) const
+{
+    if (index >= opDesc_->outputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
     }
-
-    size_t OpRunner::GetOutputElementCount(size_t index) const
-    {
-        if (index >= opDesc_->outputDesc.size()) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return 0;
+    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
+}
+
+bool OpRunner::RunOp()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+            ERROR_LOG("Copy input[%zu] failed", i);
+            return false;
         }
-        return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
+        INFO_LOG("Copy input[%zu] success", i);
     }
 
-    bool OpRunner::RunOp()
-    {
-        for (size_t i = 0; i < numInputs_; ++i) {
-            auto size = GetInputSize(i);
-            aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
-            if (g_isDevice) {
-                kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
-            }
-            if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
-                ERROR_LOG("Copy input[%zu] failed", i);
-                return false;
-            }
-            INFO_LOG("Copy input[%zu] success", i);
-        }
+    aclrtStream stream = nullptr;
+    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+        ERROR_LOG("Create stream failed");
+        return false;
+    }
+    INFO_LOG("Create stream success");
+
+    size_t workspaceSize = 0;
+    aclOpExecutor* handle = nullptr;
+    auto ret = aclnnLazyAdamGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3],
+                                             inputTensor_[4], inputTensor_[5], opDesc_->beta1, opDesc_->beta2,
+                                             opDesc_->epsilon, &workspaceSize, &handle);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
 
-        aclrtStream stream = nullptr;
-        if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
-            ERROR_LOG("Create stream failed");
-            return false;
+    void* workspace = nullptr;
+    if (workspaceSize != 0) {
+        if (aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory failed");
         }
-        INFO_LOG("Create stream success");
-
-        size_t workspaceSize = 0;
-        aclOpExecutor *handle = nullptr;
-        auto ret = aclnnLazyAdamGetWorkspaceSize(inputTensor_[0], inputTensor_[1],
-                                                 inputTensor_[2], inputTensor_[3], inputTensor_[4], inputTensor_[5],
-                                                 opDesc_->beta1, opDesc_->beta2, opDesc_->epsilon,
-                                                 &workspaceSize, &handle);
-        if (ret != ACL_SUCCESS) {
-            (void) aclrtDestroyStream(stream);
-            ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
-            return false;
-        }
-        INFO_LOG("Execute aclnnAddCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
+    }
 
-        void *workspace = nullptr;
-        if (workspaceSize != 0) {
-            if (aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory failed");
-            }
-        }
+    ret = aclnnLazyAdam(workspace, workspaceSize, handle, stream);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddCustom success");
 
-        ret = aclnnLazyAdam(workspace, workspaceSize, handle, stream);
-        if (ret != ACL_SUCCESS) {
-            (void) aclrtDestroyStream(stream);
-            ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
-            return false;
-        }
-        INFO_LOG("Execute aclnnAddCustom success");
+    ret = aclrtSynchronizeStreamWithTimeout(stream, STREAM_TIMEOUT);
+    if (ret != SUCCESS) {
+        ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+        (void)aclrtDestroyStream(stream);
+        return false;
+    }
+    INFO_LOG("Synchronize stream success");
 
-        ret = aclrtSynchronizeStreamWithTimeout(stream, STREAM_TIMEOUT);
-        if (ret != SUCCESS) {
-            ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
-            (void) aclrtDestroyStream(stream);
-            return false;
+    // 把输入数据：inputM inputV inputVar 作为输出数据拷贝出来
+    for (size_t i = 0; i < OUTPUT_SIZE; ++i) {
+        int inputTensorIndex = i + INPUT_TENSOR_OFFSET;  // 加上输入tensor偏移值
+        auto size = GetInputSize(inputTensorIndex);
+        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
         }
-        INFO_LOG("Synchronize stream success");
-
-        // 把输入数据：inputM inputV inputVar 作为输出数据拷贝出来
-        for (size_t i = 0; i < OUTPUT_SIZE; ++i) {
-            int inputTensorIndex = i + INPUT_TENSOR_OFFSET;  // 加上输入tensor偏移值
-            auto size = GetInputSize(inputTensorIndex);
-            aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
-            if (g_isDevice) {
-                kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
-            }
-            if (aclrtMemcpy(hostOutputs_[i], size, devInputs_[inputTensorIndex], size, kind) != ACL_SUCCESS) {
-                INFO_LOG("Copy output[%zu] success", i);
-                (void) aclrtDestroyStream(stream);
-                return false;
-            }
+        if (aclrtMemcpy(hostOutputs_[i], size, devInputs_[inputTensorIndex], size, kind) != ACL_SUCCESS) {
             INFO_LOG("Copy output[%zu] success", i);
+            (void)aclrtDestroyStream(stream);
+            return false;
         }
-
-        (void) aclrtDestroyStream(stream);
-        return true;
+        INFO_LOG("Copy output[%zu] success", i);
     }
 
+    (void)aclrtDestroyStream(stream);
+    return true;
+}
 
-    template<typename T>
-    void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
-    {
-        if (elementsPerRow == 0) {
-            throw std::runtime_error("value must not be zero.");
-        }
-        for (size_t i = 0; i < count; ++i) {
-            std::cout << std::setw(PRINT_OUT_WIDTH) << data[i];
-            if (i % elementsPerRow == elementsPerRow - 1) {
-                std::cout << std::endl;
-            }
-        }
+template <typename T>
+void DoPrintData(const T* data, size_t count, size_t elementsPerRow)
+{
+    if (elementsPerRow == 0) {
+        throw std::runtime_error("value must not be zero.");
     }
-
-    void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
-    {
-        if (elementsPerRow == 0) {
-            throw std::runtime_error("value must not be zero.");
-        }
-        for (size_t i = 0; i < count; ++i) {
-            std::cout << std::setw(PRINT_OUT_WIDTH) << std::setprecision(PRINT_OUT_PRECISION)
-                      << aclFloat16ToFloat(data[i]);
-            if (i % elementsPerRow == elementsPerRow - 1) {
-                std::cout << std::endl;
-            }
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(PRINT_OUT_WIDTH) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
         }
     }
+}
 
-    void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
-    {
-        if (data == nullptr) {
-            ERROR_LOG("Print data failed. data is nullptr");
-            return;
+void DoPrintFp16Data(const aclFloat16* data, size_t count, size_t elementsPerRow)
+{
+    if (elementsPerRow == 0) {
+        throw std::runtime_error("value must not be zero.");
+    }
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(PRINT_OUT_WIDTH) << std::setprecision(PRINT_OUT_PRECISION) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
         }
+    }
+}
 
-        switch (dataType) {
-            case ACL_BOOL:
-                DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
-                break;
-            case ACL_INT8:
-                DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
-                break;
-            case ACL_UINT8:
-                DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
-                break;
-            case ACL_INT16:
-                DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
-                break;
-            case ACL_UINT16:
-                DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
-                break;
-            case ACL_INT32:
-                DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
-                break;
-            case ACL_UINT32:
-                DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
-                break;
-            case ACL_INT64:
-                DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
-                break;
-            case ACL_UINT64:
-                DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
-                break;
-            case ACL_FLOAT16:
-                DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
-                break;
-            case ACL_FLOAT:
-                DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
-                break;
-            case ACL_DOUBLE:
-                DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
-                break;
-            default:
-                ERROR_LOG("Unsupported type: %d", dataType);
-        }
+void PrintData(const void* data, size_t count, aclDataType dataType, size_t elementsPerRow)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
     }
 
-    void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
-            return;
-        }
+    switch (dataType) {
+        case ACL_BOOL:
+            DoPrintData(reinterpret_cast<const bool*>(data), count, elementsPerRow);
+            break;
+        case ACL_INT8:
+            DoPrintData(reinterpret_cast<const int8_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT8:
+            DoPrintData(reinterpret_cast<const uint8_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_INT16:
+            DoPrintData(reinterpret_cast<const int16_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT16:
+            DoPrintData(reinterpret_cast<const uint16_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_INT32:
+            DoPrintData(reinterpret_cast<const int32_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT32:
+            DoPrintData(reinterpret_cast<const uint32_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_INT64:
+            DoPrintData(reinterpret_cast<const int64_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT64:
+            DoPrintData(reinterpret_cast<const uint64_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT16:
+            DoPrintFp16Data(reinterpret_cast<const aclFloat16*>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT:
+            DoPrintData(reinterpret_cast<const float*>(data), count, elementsPerRow);
+            break;
+        case ACL_DOUBLE:
+            DoPrintData(reinterpret_cast<const double*>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+}
 
-        auto desc = opDesc_->inputDesc[index];
-        PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
+        return;
     }
 
-    void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return;
-        }
+    auto desc = opDesc_->inputDesc[index];
+    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
 
-        auto desc = opDesc_->outputDesc[index];
-        PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return;
     }
-}
\ No newline at end of file
+
+    auto desc = opDesc_->outputDesc[index];
+    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
+}  // namespace AclnnLazyAdam
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
index dad4ab0f..13602e17 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
@@ -13,46 +13,45 @@ See the License for the specific language governing permissions and
         limitations under the License.
 ==============================================================================*/
 
-#include "common.h"
 #include "operator_desc.h"
 
+#include "common.h"
+
 namespace AclnnLazyAdam {
-    using namespace std;
-
-    OperatorDesc::OperatorDesc()
-    {}
-
-    OperatorDesc::~OperatorDesc()
-    {
-        for (auto* desc: inputDesc) {
-            aclDestroyTensorDesc(desc);
-        }
-        for (auto* desc: outputDesc) {
-            aclDestroyTensorDesc(desc);
-        }
+using namespace std;
+
+OperatorDesc::OperatorDesc() {}
+
+OperatorDesc::~OperatorDesc()
+{
+    for (auto* desc : inputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+    for (auto* desc : outputDesc) {
+        aclDestroyTensorDesc(desc);
     }
+}
 
-    OperatorDesc& OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t* dims,
-                                                   aclFormat format)
-    {
-        aclTensorDesc* desc = aclCreateTensorDesc(dataType, numDims, dims, format);
-        if (desc == nullptr) {
-            ERROR_LOG("create tensor failed");
-            return *this;
-        }
-        inputDesc.emplace_back(desc);
+OperatorDesc& OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t* dims, aclFormat format)
+{
+    aclTensorDesc* desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
         return *this;
     }
-
-    OperatorDesc& OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims,
-                                                    const int64_t* dims, aclFormat format)
-    {
-        aclTensorDesc* desc = aclCreateTensorDesc(dataType, numDims, dims, format);
-        if (desc == nullptr) {
-            ERROR_LOG("create tensor failed");
-            return *this;
-        }
-        outputDesc.emplace_back(desc);
+    inputDesc.emplace_back(desc);
+    return *this;
+}
+
+OperatorDesc& OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t* dims,
+                                                aclFormat format)
+{
+    aclTensorDesc* desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
         return *this;
     }
-}
\ No newline at end of file
+    outputDesc.emplace_back(desc);
+    return *this;
+}
+}  // namespace AclnnLazyAdam
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
index 34fc9c7e..77826029 100644
--- a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
@@ -18,207 +18,206 @@ See the License for the specific language governing permissions and
 #include "tiling/platform/platform_ascendc.h"
 
 namespace optiling {
-    constexpr int BLOCK_SIZE = 32;
-    constexpr int RESERVE_UB_SIZE = 20 * 1024;
-    constexpr int DATA_NUM_PER_COMPUTE = 8;
-    constexpr int32_t USR_SIZE = 256;
-    constexpr int32_t SYS_WORKSPACE_SIZE = 16 * 1024 * 1024;
-
-    template<typename T>
-    static ge::graphStatus CheckNullPointer(T* pointer, const char* errorMessage)
-    {
-        if (pointer == nullptr) {
-            printf("%s nullptr\n", errorMessage);
-            return ge::GRAPH_FAILED;
-        }
-
-        return ge::GRAPH_SUCCESS;
+constexpr int BLOCK_SIZE = 32;
+constexpr int RESERVE_UB_SIZE = 20 * 1024;
+constexpr int DATA_NUM_PER_COMPUTE = 8;
+constexpr int32_t USR_SIZE = 256;
+constexpr int32_t SYS_WORKSPACE_SIZE = 16 * 1024 * 1024;
+
+template <typename T>
+static ge::graphStatus CheckNullPointer(T* pointer, const char* errorMessage)
+{
+    if (pointer == nullptr) {
+        printf("%s nullptr\n", errorMessage);
+        return ge::GRAPH_FAILED;
     }
 
-    static ge::graphStatus LazyAdamTilingFunc(gert::TilingContext* context)
-    {
-        size_t* currentWorkspace = context->GetWorkspaceSizes(1);
-        if (CheckNullPointer(currentWorkspace, "currentWorkspace") != ge::GRAPH_SUCCESS) {
-            return ge::GRAPH_FAILED;
-        }
-        currentWorkspace[0] = SYS_WORKSPACE_SIZE + USR_SIZE;
-
-        LazyAdamTilingData tiling;
-        const gert::StorageShape* indicesShape = context->GetInputShape(1);
-        const gert::StorageShape* inputMShape = context->GetInputShape(2);
-        uint64_t dim0 = inputMShape->GetStorageShape().GetDim(0);
-        uint64_t dim1 = indicesShape->GetStorageShape().GetDim(0);
-        uint64_t dim2 = inputMShape->GetStorageShape().GetDim(1);
-        ge::DataType inputMDtype = context->GetInputDesc(2)->GetDataType();
-        int inputMDtypeSize = ge::GetSizeByDataType(inputMDtype);
-        ge::DataType indicesDtype = context->GetInputDesc(1)->GetDataType();
-        int indicesDtypeSize = ge::GetSizeByDataType(indicesDtype);
-
-        tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-        context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
-        auto attrs = context->GetAttrs();
-
-        float beta1 = *attrs->GetAttrPointer<float>(0);
-        float beta2 = *attrs->GetAttrPointer<float>(1);
-        float epsilon = *attrs->GetAttrPointer<float>(2);
-
-        auto platformInfo = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
-        uint32_t coreNum = platformInfo.GetCoreNum();
-        if (coreNum == 0) {
-            return ge::GRAPH_FAILED;
-        }
-        uint64_t ub;
-        platformInfo.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub);
-        ub = ub - RESERVE_UB_SIZE;
-        // ub大小除以每行的数据大小，得到每次处理的行数
-        uint64_t row = ub / (dim2 * inputMDtypeSize * DATA_NUM_PER_COMPUTE + 1 * indicesDtypeSize);
-        if (row > dim1) {
-            row = dim1;
-        }
-
-        // 保证申请的内存是32的倍数并且向上取整 计算方式：(num+31)/32*32
-        uint64_t indicesAllocSize = (row * indicesDtypeSize + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE;
-        uint64_t otherAllocSize = (row * inputMDtypeSize * dim2 + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE;
-        // 前 CORE_NUM - 1 个核分配的任务量
-        uint64_t batch = dim1 / coreNum;
-        // 实际使用的核数
-        context->SetBlockDim(coreNum);
-        uint64_t loopCount = batch / row;  // CORE_NUM - 1 个核的任务量，除以UB每一次能处理的数据，得到处理次数
-        uint64_t rowLeft = batch - row * loopCount;  // UB处理 loopCount 那么多次后，分给当前core剩下的数据量
-
-        // 最后一个核分配的任务量
-        uint64_t batchTail = dim1 - batch * (coreNum - 1);  // phy 该写法适配了dim1刚好整除coreNum的情况
-        uint64_t loopCountTail = batchTail / row;
-        uint64_t rowLeftTail = batchTail - row * loopCountTail;
-
-        tiling.set_beta1(beta1);
-        tiling.set_beta2(beta2);
-        tiling.set_epsilon(epsilon);
-        tiling.set_dim0(dim0);
-        tiling.set_dim1(dim1);
-        tiling.set_dim2(dim2);
-        tiling.set_row(row);  // 每个ai core一次能分配的数据行数
-        tiling.set_indicesAllocSize(indicesAllocSize);  // indices大小，用于申请空间
-        tiling.set_otherAllocSize(otherAllocSize);  // 入参中非indices要申请的空间大小
-        tiling.set_batch(batch);  // 前CORE_NUM - 1个核分配的任务量
-        tiling.set_loopCount(loopCount);  // 前CORE_NUM - 1 个核内循环处理次数
-        tiling.set_rowLeft(rowLeft);  // 前CORE_NUM - 1 个核, 核内处理 loopCount 次后，分给当前core剩下的数据量
-        tiling.set_loopCountTail(loopCountTail);  // 最后一个核，核内循环次数
-        tiling.set_rowLeftTail(rowLeftTail);  // 最后一个核，核内循环loopCountTail次后，剩余数据量
-        tiling.set_coreNum(coreNum);
-
-        tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-        context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
-
-        return ge::GRAPH_SUCCESS;
+    return ge::GRAPH_SUCCESS;
+}
+
+static ge::graphStatus LazyAdamTilingFunc(gert::TilingContext* context)
+{
+    size_t* currentWorkspace = context->GetWorkspaceSizes(1);
+    if (CheckNullPointer(currentWorkspace, "currentWorkspace") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    currentWorkspace[0] = SYS_WORKSPACE_SIZE + USR_SIZE;
+
+    LazyAdamTilingData tiling;
+    const gert::StorageShape* indicesShape = context->GetInputShape(1);
+    const gert::StorageShape* inputMShape = context->GetInputShape(2);
+    uint64_t dim0 = inputMShape->GetStorageShape().GetDim(0);
+    uint64_t dim1 = indicesShape->GetStorageShape().GetDim(0);
+    uint64_t dim2 = inputMShape->GetStorageShape().GetDim(1);
+    ge::DataType inputMDtype = context->GetInputDesc(2)->GetDataType();
+    int inputMDtypeSize = ge::GetSizeByDataType(inputMDtype);
+    ge::DataType indicesDtype = context->GetInputDesc(1)->GetDataType();
+    int indicesDtypeSize = ge::GetSizeByDataType(indicesDtype);
+
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+    auto attrs = context->GetAttrs();
+
+    float beta1 = *attrs->GetAttrPointer<float>(0);
+    float beta2 = *attrs->GetAttrPointer<float>(1);
+    float epsilon = *attrs->GetAttrPointer<float>(2);
+
+    auto platformInfo = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    uint32_t coreNum = platformInfo.GetCoreNum();
+    if (coreNum == 0) {
+        return ge::GRAPH_FAILED;
     }
+    uint64_t ub;
+    platformInfo.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub);
+    ub = ub - RESERVE_UB_SIZE;
+    // ub大小除以每行的数据大小，得到每次处理的行数
+    uint64_t row = ub / (dim2 * inputMDtypeSize * DATA_NUM_PER_COMPUTE + 1 * indicesDtypeSize);
+    if (row > dim1) {
+        row = dim1;
+    }
+
+    // 保证申请的内存是32的倍数并且向上取整 计算方式：(num+31)/32*32
+    uint64_t indicesAllocSize = (row * indicesDtypeSize + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE;
+    uint64_t otherAllocSize = (row * inputMDtypeSize * dim2 + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE;
+    // 前 CORE_NUM - 1 个核分配的任务量
+    uint64_t batch = dim1 / coreNum;
+    // 实际使用的核数
+    context->SetBlockDim(coreNum);
+    uint64_t loopCount = batch / row;  // CORE_NUM - 1 个核的任务量，除以UB每一次能处理的数据，得到处理次数
+    uint64_t rowLeft = batch - row * loopCount;  // UB处理 loopCount 那么多次后，分给当前core剩下的数据量
+
+    // 最后一个核分配的任务量
+    uint64_t batchTail = dim1 - batch * (coreNum - 1);  // phy 该写法适配了dim1刚好整除coreNum的情况
+    uint64_t loopCountTail = batchTail / row;
+    uint64_t rowLeftTail = batchTail - row * loopCountTail;
+
+    tiling.set_beta1(beta1);
+    tiling.set_beta2(beta2);
+    tiling.set_epsilon(epsilon);
+    tiling.set_dim0(dim0);
+    tiling.set_dim1(dim1);
+    tiling.set_dim2(dim2);
+    tiling.set_row(row);                            // 每个ai core一次能分配的数据行数
+    tiling.set_indicesAllocSize(indicesAllocSize);  // indices大小，用于申请空间
+    tiling.set_otherAllocSize(otherAllocSize);      // 入参中非indices要申请的空间大小
+    tiling.set_batch(batch);                        // 前CORE_NUM - 1个核分配的任务量
+    tiling.set_loopCount(loopCount);                // 前CORE_NUM - 1 个核内循环处理次数
+    tiling.set_rowLeft(rowLeft);  // 前CORE_NUM - 1 个核, 核内处理 loopCount 次后，分给当前core剩下的数据量
+    tiling.set_loopCountTail(loopCountTail);  // 最后一个核，核内循环次数
+    tiling.set_rowLeftTail(rowLeftTail);      // 最后一个核，核内循环loopCountTail次后，剩余数据量
+    tiling.set_coreNum(coreNum);
+
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+
+    return ge::GRAPH_SUCCESS;
 }
+}  // namespace optiling
 
 namespace ge {
-    static ge::graphStatus LazyAdamInferShape(gert::InferShapeContext* context)
-    {
-        if (optiling::CheckNullPointer(context, "context") != ge::GRAPH_SUCCESS) {
-            return ge::GRAPH_FAILED;
-        }
-
-        gert::Shape* outputMShape = context->GetOutputShape(0);
-        if (optiling::CheckNullPointer(outputMShape, "outputMShape") != ge::GRAPH_SUCCESS) {
-            return ge::GRAPH_FAILED;
-        }
-        const gert::Shape* inputMShape = context->GetInputShape(2);
-        if (optiling::CheckNullPointer(inputMShape, "inputMShape") != ge::GRAPH_SUCCESS) {
-            return ge::GRAPH_FAILED;
-        }
-        *outputMShape = *inputMShape;
-
-        gert::Shape* outputVShape = context->GetOutputShape(1);
-        if (optiling::CheckNullPointer(outputVShape, "outputVShape") != ge::GRAPH_SUCCESS) {
-            return ge::GRAPH_FAILED;
-        }
-        const gert::Shape* inputVShape = context->GetInputShape(3);
-        if (optiling::CheckNullPointer(inputVShape, "inputVShape") != ge::GRAPH_SUCCESS) {
-            return ge::GRAPH_FAILED;
-        }
-        *outputVShape = *inputVShape;
-
-        gert::Shape* outputVarShape = context->GetOutputShape(2);
-        if (optiling::CheckNullPointer(outputVarShape, "outputVarShape") != ge::GRAPH_SUCCESS) {
-            return ge::GRAPH_FAILED;
-        }
-        const gert::Shape* inputVarShape = context->GetInputShape(4);
-        if (optiling::CheckNullPointer(inputVarShape, "inputVarShape") != ge::GRAPH_SUCCESS) {
-            return ge::GRAPH_FAILED;
-        }
-        *outputVarShape = *inputVarShape;
-
-        return GRAPH_SUCCESS;
+static ge::graphStatus LazyAdamInferShape(gert::InferShapeContext* context)
+{
+    if (optiling::CheckNullPointer(context, "context") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
     }
 
-    static ge::graphStatus LazyAdamInferDataType(gert::InferDataTypeContext* context)
-    {
-        return GRAPH_SUCCESS;
+    gert::Shape* outputMShape = context->GetOutputShape(0);
+    if (optiling::CheckNullPointer(outputMShape, "outputMShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    const gert::Shape* inputMShape = context->GetInputShape(2);
+    if (optiling::CheckNullPointer(inputMShape, "inputMShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
     }
+    *outputMShape = *inputMShape;
+
+    gert::Shape* outputVShape = context->GetOutputShape(1);
+    if (optiling::CheckNullPointer(outputVShape, "outputVShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    const gert::Shape* inputVShape = context->GetInputShape(3);
+    if (optiling::CheckNullPointer(inputVShape, "inputVShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    *outputVShape = *inputVShape;
+
+    gert::Shape* outputVarShape = context->GetOutputShape(2);
+    if (optiling::CheckNullPointer(outputVarShape, "outputVarShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    const gert::Shape* inputVarShape = context->GetInputShape(4);
+    if (optiling::CheckNullPointer(inputVarShape, "inputVarShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    *outputVarShape = *inputVarShape;
+
+    return GRAPH_SUCCESS;
 }
 
+static ge::graphStatus LazyAdamInferDataType(gert::InferDataTypeContext* context)
+{
+    return GRAPH_SUCCESS;
+}
+}  // namespace ge
 
 namespace ops {
-    class LazyAdam : public OpDef {
-    public:
-        explicit LazyAdam(const char* name) : OpDef(name)
-        {
-            this->Input("gradient")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_FLOAT})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Input("indices")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_INT32})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Input("inputM")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_FLOAT})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Input("inputV")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_FLOAT})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Input("inputVar")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_FLOAT})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Input("lr")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_FLOAT})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Output("inputM")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_FLOAT})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Output("inputV")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_FLOAT})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Output("inputVar")
-                    .ParamType(REQUIRED)
-                    .DataType({ge::DT_FLOAT})
-                    .Format({ge::FORMAT_ND})
-                    .UnknownShapeFormat({ge::FORMAT_ND});
-            this->Attr("beta1").Float();
-            this->Attr("beta2").Float();
-            this->Attr("epsilon").Float();
-            this->SetInferShape(ge::LazyAdamInferShape).SetInferDataType(ge::LazyAdamInferDataType);
-            this->AICore().SetTiling(optiling::LazyAdamTilingFunc);
-            this->AICore().AddConfig("ascend910b");
-        }
-    };
-
-    OP_ADD(LazyAdam);
-}
+class LazyAdam : public OpDef {
+public:
+    explicit LazyAdam(const char* name) : OpDef(name)
+    {
+        this->Input("gradient")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("indices")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_INT32})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("inputM")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("inputV")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("inputVar")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("lr")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Output("inputM")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Output("inputV")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Output("inputVar")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Attr("beta1").Float();
+        this->Attr("beta2").Float();
+        this->Attr("epsilon").Float();
+        this->SetInferShape(ge::LazyAdamInferShape).SetInferDataType(ge::LazyAdamInferDataType);
+        this->AICore().SetTiling(optiling::LazyAdamTilingFunc);
+        this->AICore().AddConfig("ascend910b");
+    }
+};
+
+OP_ADD(LazyAdam);
+}  // namespace ops
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h b/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h
index 10b11a9a..4f1534a4 100644
--- a/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h
@@ -19,23 +19,23 @@ See the License for the specific language governing permissions and
 
 namespace optiling {
 BEGIN_TILING_DATA_DEF(LazyAdamTilingData)
-    TILING_DATA_FIELD_DEF(float, beta1);
-    TILING_DATA_FIELD_DEF(float, beta2);
-    TILING_DATA_FIELD_DEF(float, epsilon);
-    TILING_DATA_FIELD_DEF(int32_t, dim0);
-    TILING_DATA_FIELD_DEF(int32_t, dim1);
-    TILING_DATA_FIELD_DEF(int32_t, dim2);
-    TILING_DATA_FIELD_DEF(int32_t, row);
-    TILING_DATA_FIELD_DEF(int32_t, indicesAllocSize);
-    TILING_DATA_FIELD_DEF(int32_t, otherAllocSize);
-    TILING_DATA_FIELD_DEF(int32_t, batch);
-    TILING_DATA_FIELD_DEF(int32_t, loopCount);
-    TILING_DATA_FIELD_DEF(int32_t, rowLeft);
-    TILING_DATA_FIELD_DEF(int32_t, loopCountTail);
-    TILING_DATA_FIELD_DEF(int32_t, rowLeftTail);
-    TILING_DATA_FIELD_DEF(int32_t, coreNum);
+TILING_DATA_FIELD_DEF(float, beta1);
+TILING_DATA_FIELD_DEF(float, beta2);
+TILING_DATA_FIELD_DEF(float, epsilon);
+TILING_DATA_FIELD_DEF(int32_t, dim0);
+TILING_DATA_FIELD_DEF(int32_t, dim1);
+TILING_DATA_FIELD_DEF(int32_t, dim2);
+TILING_DATA_FIELD_DEF(int32_t, row);
+TILING_DATA_FIELD_DEF(int32_t, indicesAllocSize);
+TILING_DATA_FIELD_DEF(int32_t, otherAllocSize);
+TILING_DATA_FIELD_DEF(int32_t, batch);
+TILING_DATA_FIELD_DEF(int32_t, loopCount);
+TILING_DATA_FIELD_DEF(int32_t, rowLeft);
+TILING_DATA_FIELD_DEF(int32_t, loopCountTail);
+TILING_DATA_FIELD_DEF(int32_t, rowLeftTail);
+TILING_DATA_FIELD_DEF(int32_t, coreNum);
 END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(LazyAdam, LazyAdamTilingData)
-}
-#endif // LAZY_ADAM_TILING_H
\ No newline at end of file
+}  // namespace optiling
+#endif  // LAZY_ADAM_TILING_H
\ No newline at end of file
-- 
Gitee


From cdd048be9cfb7133001fb8251dee42adea8a531e Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 16:05:50 +0800
Subject: [PATCH 092/302] =?UTF-8?q?=E7=AE=97=E5=AD=90=E6=B3=A8=E5=86=8C?=
 =?UTF-8?q?=E9=85=8D=E7=BD=AE=E6=B7=BB=E5=8A=A0910c?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
index 77826029..fb7f86b3 100644
--- a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
@@ -216,6 +216,7 @@ public:
         this->SetInferShape(ge::LazyAdamInferShape).SetInferDataType(ge::LazyAdamInferDataType);
         this->AICore().SetTiling(optiling::LazyAdamTilingFunc);
         this->AICore().AddConfig("ascend910b");
+        this->AICore().AddConfig("ascend910c");
     }
 };
 
-- 
Gitee


From f1538624fb81c35472a4da4a9f340c668aa849cc Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 09:04:49 +0000
Subject: [PATCH 093/302] =?UTF-8?q?!117=20=E8=9E=8D=E5=90=88=E7=AE=97?=
 =?UTF-8?q?=E5=AD=90aclnn=E9=AA=8C=E8=AF=81=EF=BC=8C=E7=AE=97=E5=AD=90host?=
 =?UTF-8?q?=E4=BE=A7=E5=AE=9E=E7=8E=B0-part2=20*=20=E7=AE=97=E5=AD=90?=
 =?UTF-8?q?=E6=B3=A8=E5=86=8C=E9=85=8D=E7=BD=AE=E6=B7=BB=E5=8A=A0910c=20*?=
 =?UTF-8?q?=20clang-format=E6=96=87=E4=BB=B6=E6=A0=BC=E5=BC=8F=E5=8C=96=20?=
 =?UTF-8?q?*=20=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B92=20*=20=E9=97=A8?=
 =?UTF-8?q?=E7=A6=81=E4=BF=AE=E6=94=B91=20*=20=E5=87=BA=E5=8C=85=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E8=BF=98=E5=8E=9F=20*=20Merge=20branch=20'develop'=20?=
 =?UTF-8?q?of=20gitee.com:ascend/mxrec=20into=20develop=20*=20=E8=9E=8D?=
 =?UTF-8?q?=E5=90=88=E7=AE=97=E5=AD=90aclnn=E9=AA=8C=E8=AF=81-part2=20*=20?=
 =?UTF-8?q?=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B93=20*=20=E9=97=A8=E7=A6=81?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B92=20*=20aclnn=E6=B5=8B=E8=AF=95=E9=97=A8?=
 =?UTF-8?q?=E7=A6=81=E4=BF=AE=E6=94=B9=20*=20LazyAdam=E8=9E=8D=E5=90=88?=
 =?UTF-8?q?=E7=AE=97=E5=AD=90-aclnn=E9=83=A8=E5=88=86=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/inc/op_runner.h      |   5 +
 .../aclnn_lazy_adam_test/src/op_runner.cpp    | 452 ++++++++++++++++++
 .../src/operator_desc.cpp                     |  57 +++
 cust_op/fused_lazy_adam/lazy_adam.json        | 117 +++++
 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp | 224 +++++++++
 .../op_host/lazy_adam_tiling.h                |  41 ++
 6 files changed, 896 insertions(+)
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
 create mode 100644 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
 create mode 100644 cust_op/fused_lazy_adam/lazy_adam.json
 create mode 100644 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
 create mode 100644 cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
index 6f91f905..77f0aee5 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
@@ -43,6 +43,11 @@ namespace AclnnLazyAdam {
         */
         bool Init();
 
+        /**
+        * @brief Init op runner output info
+        */
+        bool InitOutputInfo();
+
         /**
          * @brief Get number of inputs
          * @return number of inputs
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
new file mode 100644
index 00000000..3d737564
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
@@ -0,0 +1,452 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include "op_runner.h"
+
+#include <limits>
+#include <stdexcept>
+
+#include "acl/acl_op_compiler.h"
+#include "aclnn_lazy_adam.h"
+#include "common.h"
+
+extern bool g_isDevice;
+
+namespace AclnnLazyAdam {
+using namespace std;
+constexpr int PRINT_OUT_WIDTH = 10;
+constexpr int PRINT_OUT_PRECISION = 4;
+constexpr int STREAM_TIMEOUT = 5000;  // 等待Stream任务完成，超时时间单位：ms
+constexpr int OUTPUT_SIZE = 3;
+constexpr int INPUT_TENSOR_OFFSET = 2;
+
+OpRunner::OpRunner(OperatorDesc* opDesc) : opDesc_(opDesc)
+{
+    numInputs_ = opDesc->inputDesc.size();
+    numOutputs_ = opDesc->outputDesc.size();
+}
+
+OpRunner::~OpRunner()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        (void)aclDestroyTensor(inputTensor_[i]);
+        (void)aclDestroyDataBuffer(inputBuffers_[i]);
+        (void)aclrtFree(devInputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostInputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostInputs_[i]);
+        }
+    }
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        if (g_isDevice) {
+            (void)aclrtFree(hostOutputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostOutputs_[i]);
+        }
+    }
+}
+
+bool OpRunner::InitOutputInfo()
+{
+    // 手动修改输出数据实现，仅申请host上的输出数据空间，析构出需同时适配
+    numOutputs_ = OUTPUT_SIZE;
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        int inputTensorIndex = i + INPUT_TENSOR_OFFSET;
+        auto size = GetInputSize(inputTensorIndex);
+
+        void* hostOutput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostOutput == nullptr) {
+            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+            return false;
+        }
+        hostOutputs_.emplace_back(hostOutput);
+    }
+    return true;
+}
+
+bool OpRunner::Init()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        void* devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+            return false;
+        }
+        devInputs_.emplace_back(devMem);
+        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void* hostInput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostInput == nullptr) {
+            ERROR_LOG("Malloc memory for input[%zu] failed", i);
+            return false;
+        }
+        hostInputs_.emplace_back(hostInput);
+
+        aclTensor* inputTensor =
+            aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0,
+                            GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]);
+        if (inputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for input[%zu] failed", i);
+            return false;
+        }
+        inputTensor_.emplace_back(inputTensor);
+    }
+
+    return InitOutputInfo();
+}
+
+const size_t OpRunner::NumInputs()
+{
+    return numInputs_;
+}
+
+const size_t OpRunner::NumOutputs()
+{
+    return numOutputs_;
+}
+
+const size_t OpRunner::GetInputSize(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
+}
+
+const size_t OpRunner::GetInputNumDims(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+    return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
+}
+
+aclDataType OpRunner::GetInputDataType(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_DT_UNDEFINED;
+    }
+    return aclGetTensorDescType(opDesc_->inputDesc[index]);
+}
+
+aclFormat OpRunner::GetInputFormat(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+    return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+    return ret;
+}
+
+size_t OpRunner::GetOutputSize(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
+}
+
+const size_t OpRunner::GetOutputNumDims(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+    return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
+}
+
+aclDataType OpRunner::GetOutputDataType(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_DT_UNDEFINED;
+    }
+    return aclGetTensorDescType(opDesc_->outputDesc[index]);
+}
+
+aclFormat OpRunner::GetOutputFormat(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+    return ret;
+}
+
+size_t OpRunner::GetInputElementCount(size_t index) const
+{
+    if (index >= opDesc_->inputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
+}
+
+size_t OpRunner::GetOutputElementCount(size_t index) const
+{
+    if (index >= opDesc_->outputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
+}
+
+bool OpRunner::RunOp()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+            ERROR_LOG("Copy input[%zu] failed", i);
+            return false;
+        }
+        INFO_LOG("Copy input[%zu] success", i);
+    }
+
+    aclrtStream stream = nullptr;
+    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+        ERROR_LOG("Create stream failed");
+        return false;
+    }
+    INFO_LOG("Create stream success");
+
+    size_t workspaceSize = 0;
+    aclOpExecutor* handle = nullptr;
+    auto ret = aclnnLazyAdamGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3],
+                                             inputTensor_[4], inputTensor_[5], opDesc_->beta1, opDesc_->beta2,
+                                             opDesc_->epsilon, &workspaceSize, &handle);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
+
+    void* workspace = nullptr;
+    if (workspaceSize != 0) {
+        if (aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory failed");
+        }
+    }
+
+    ret = aclnnLazyAdam(workspace, workspaceSize, handle, stream);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddCustom success");
+
+    ret = aclrtSynchronizeStreamWithTimeout(stream, STREAM_TIMEOUT);
+    if (ret != SUCCESS) {
+        ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+        (void)aclrtDestroyStream(stream);
+        return false;
+    }
+    INFO_LOG("Synchronize stream success");
+
+    // 把输入数据：inputM inputV inputVar 作为输出数据拷贝出来
+    for (size_t i = 0; i < OUTPUT_SIZE; ++i) {
+        int inputTensorIndex = i + INPUT_TENSOR_OFFSET;  // 加上输入tensor偏移值
+        auto size = GetInputSize(inputTensorIndex);
+        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(hostOutputs_[i], size, devInputs_[inputTensorIndex], size, kind) != ACL_SUCCESS) {
+            INFO_LOG("Copy output[%zu] success", i);
+            (void)aclrtDestroyStream(stream);
+            return false;
+        }
+        INFO_LOG("Copy output[%zu] success", i);
+    }
+
+    (void)aclrtDestroyStream(stream);
+    return true;
+}
+
+template <typename T>
+void DoPrintData(const T* data, size_t count, size_t elementsPerRow)
+{
+    if (elementsPerRow == 0) {
+        throw std::runtime_error("value must not be zero.");
+    }
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(PRINT_OUT_WIDTH) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintFp16Data(const aclFloat16* data, size_t count, size_t elementsPerRow)
+{
+    if (elementsPerRow == 0) {
+        throw std::runtime_error("value must not be zero.");
+    }
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(PRINT_OUT_WIDTH) << std::setprecision(PRINT_OUT_PRECISION) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void* data, size_t count, aclDataType dataType, size_t elementsPerRow)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case ACL_BOOL:
+            DoPrintData(reinterpret_cast<const bool*>(data), count, elementsPerRow);
+            break;
+        case ACL_INT8:
+            DoPrintData(reinterpret_cast<const int8_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT8:
+            DoPrintData(reinterpret_cast<const uint8_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_INT16:
+            DoPrintData(reinterpret_cast<const int16_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT16:
+            DoPrintData(reinterpret_cast<const uint16_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_INT32:
+            DoPrintData(reinterpret_cast<const int32_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT32:
+            DoPrintData(reinterpret_cast<const uint32_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_INT64:
+            DoPrintData(reinterpret_cast<const int64_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT64:
+            DoPrintData(reinterpret_cast<const uint64_t*>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT16:
+            DoPrintFp16Data(reinterpret_cast<const aclFloat16*>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT:
+            DoPrintData(reinterpret_cast<const float*>(data), count, elementsPerRow);
+            break;
+        case ACL_DOUBLE:
+            DoPrintData(reinterpret_cast<const double*>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+}
+
+void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
+        return;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
+
+void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
+}  // namespace AclnnLazyAdam
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
new file mode 100644
index 00000000..13602e17
--- /dev/null
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/operator_desc.cpp
@@ -0,0 +1,57 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include "operator_desc.h"
+
+#include "common.h"
+
+namespace AclnnLazyAdam {
+using namespace std;
+
+OperatorDesc::OperatorDesc() {}
+
+OperatorDesc::~OperatorDesc()
+{
+    for (auto* desc : inputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+    for (auto* desc : outputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+}
+
+OperatorDesc& OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t* dims, aclFormat format)
+{
+    aclTensorDesc* desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+    inputDesc.emplace_back(desc);
+    return *this;
+}
+
+OperatorDesc& OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t* dims,
+                                                aclFormat format)
+{
+    aclTensorDesc* desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+    outputDesc.emplace_back(desc);
+    return *this;
+}
+}  // namespace AclnnLazyAdam
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/lazy_adam.json b/cust_op/fused_lazy_adam/lazy_adam.json
new file mode 100644
index 00000000..e6fc2c00
--- /dev/null
+++ b/cust_op/fused_lazy_adam/lazy_adam.json
@@ -0,0 +1,117 @@
+[
+    {
+        "op": "LazyAdam",
+        "language": "cpp",
+        "input_desc": [
+            {
+                "name": "gradient",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "indices",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "int32"
+                ]
+            },
+            {
+                "name": "inputM",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "inputV",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "inputVar",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "lr",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            }
+        ],
+        "output_desc": [
+            {
+                "name": "inputM",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "inputV",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            },
+            {
+                "name": "inputVar",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "fp32"
+                ]
+            }
+        ],
+        "attr": [                                                                   
+            {
+                "name": "beta1",
+                "param_type": "required",
+                "type": "float"
+            },
+            {
+                "name": "beta2",
+                "param_type": "required",
+                "type": "float"
+            },
+            {
+                "name": "epsilon",
+                "param_type": "required",
+                "type": "float"
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
new file mode 100644
index 00000000..fb7f86b3
--- /dev/null
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
@@ -0,0 +1,224 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include "lazy_adam_tiling.h"
+#include "register/op_def_registry.h"
+#include "tiling/platform/platform_ascendc.h"
+
+namespace optiling {
+constexpr int BLOCK_SIZE = 32;
+constexpr int RESERVE_UB_SIZE = 20 * 1024;
+constexpr int DATA_NUM_PER_COMPUTE = 8;
+constexpr int32_t USR_SIZE = 256;
+constexpr int32_t SYS_WORKSPACE_SIZE = 16 * 1024 * 1024;
+
+template <typename T>
+static ge::graphStatus CheckNullPointer(T* pointer, const char* errorMessage)
+{
+    if (pointer == nullptr) {
+        printf("%s nullptr\n", errorMessage);
+        return ge::GRAPH_FAILED;
+    }
+
+    return ge::GRAPH_SUCCESS;
+}
+
+static ge::graphStatus LazyAdamTilingFunc(gert::TilingContext* context)
+{
+    size_t* currentWorkspace = context->GetWorkspaceSizes(1);
+    if (CheckNullPointer(currentWorkspace, "currentWorkspace") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    currentWorkspace[0] = SYS_WORKSPACE_SIZE + USR_SIZE;
+
+    LazyAdamTilingData tiling;
+    const gert::StorageShape* indicesShape = context->GetInputShape(1);
+    const gert::StorageShape* inputMShape = context->GetInputShape(2);
+    uint64_t dim0 = inputMShape->GetStorageShape().GetDim(0);
+    uint64_t dim1 = indicesShape->GetStorageShape().GetDim(0);
+    uint64_t dim2 = inputMShape->GetStorageShape().GetDim(1);
+    ge::DataType inputMDtype = context->GetInputDesc(2)->GetDataType();
+    int inputMDtypeSize = ge::GetSizeByDataType(inputMDtype);
+    ge::DataType indicesDtype = context->GetInputDesc(1)->GetDataType();
+    int indicesDtypeSize = ge::GetSizeByDataType(indicesDtype);
+
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+    auto attrs = context->GetAttrs();
+
+    float beta1 = *attrs->GetAttrPointer<float>(0);
+    float beta2 = *attrs->GetAttrPointer<float>(1);
+    float epsilon = *attrs->GetAttrPointer<float>(2);
+
+    auto platformInfo = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    uint32_t coreNum = platformInfo.GetCoreNum();
+    if (coreNum == 0) {
+        return ge::GRAPH_FAILED;
+    }
+    uint64_t ub;
+    platformInfo.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub);
+    ub = ub - RESERVE_UB_SIZE;
+    // ub大小除以每行的数据大小，得到每次处理的行数
+    uint64_t row = ub / (dim2 * inputMDtypeSize * DATA_NUM_PER_COMPUTE + 1 * indicesDtypeSize);
+    if (row > dim1) {
+        row = dim1;
+    }
+
+    // 保证申请的内存是32的倍数并且向上取整 计算方式：(num+31)/32*32
+    uint64_t indicesAllocSize = (row * indicesDtypeSize + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE;
+    uint64_t otherAllocSize = (row * inputMDtypeSize * dim2 + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE;
+    // 前 CORE_NUM - 1 个核分配的任务量
+    uint64_t batch = dim1 / coreNum;
+    // 实际使用的核数
+    context->SetBlockDim(coreNum);
+    uint64_t loopCount = batch / row;  // CORE_NUM - 1 个核的任务量，除以UB每一次能处理的数据，得到处理次数
+    uint64_t rowLeft = batch - row * loopCount;  // UB处理 loopCount 那么多次后，分给当前core剩下的数据量
+
+    // 最后一个核分配的任务量
+    uint64_t batchTail = dim1 - batch * (coreNum - 1);  // phy 该写法适配了dim1刚好整除coreNum的情况
+    uint64_t loopCountTail = batchTail / row;
+    uint64_t rowLeftTail = batchTail - row * loopCountTail;
+
+    tiling.set_beta1(beta1);
+    tiling.set_beta2(beta2);
+    tiling.set_epsilon(epsilon);
+    tiling.set_dim0(dim0);
+    tiling.set_dim1(dim1);
+    tiling.set_dim2(dim2);
+    tiling.set_row(row);                            // 每个ai core一次能分配的数据行数
+    tiling.set_indicesAllocSize(indicesAllocSize);  // indices大小，用于申请空间
+    tiling.set_otherAllocSize(otherAllocSize);      // 入参中非indices要申请的空间大小
+    tiling.set_batch(batch);                        // 前CORE_NUM - 1个核分配的任务量
+    tiling.set_loopCount(loopCount);                // 前CORE_NUM - 1 个核内循环处理次数
+    tiling.set_rowLeft(rowLeft);  // 前CORE_NUM - 1 个核, 核内处理 loopCount 次后，分给当前core剩下的数据量
+    tiling.set_loopCountTail(loopCountTail);  // 最后一个核，核内循环次数
+    tiling.set_rowLeftTail(rowLeftTail);      // 最后一个核，核内循环loopCountTail次后，剩余数据量
+    tiling.set_coreNum(coreNum);
+
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+
+    return ge::GRAPH_SUCCESS;
+}
+}  // namespace optiling
+
+namespace ge {
+static ge::graphStatus LazyAdamInferShape(gert::InferShapeContext* context)
+{
+    if (optiling::CheckNullPointer(context, "context") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+
+    gert::Shape* outputMShape = context->GetOutputShape(0);
+    if (optiling::CheckNullPointer(outputMShape, "outputMShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    const gert::Shape* inputMShape = context->GetInputShape(2);
+    if (optiling::CheckNullPointer(inputMShape, "inputMShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    *outputMShape = *inputMShape;
+
+    gert::Shape* outputVShape = context->GetOutputShape(1);
+    if (optiling::CheckNullPointer(outputVShape, "outputVShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    const gert::Shape* inputVShape = context->GetInputShape(3);
+    if (optiling::CheckNullPointer(inputVShape, "inputVShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    *outputVShape = *inputVShape;
+
+    gert::Shape* outputVarShape = context->GetOutputShape(2);
+    if (optiling::CheckNullPointer(outputVarShape, "outputVarShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    const gert::Shape* inputVarShape = context->GetInputShape(4);
+    if (optiling::CheckNullPointer(inputVarShape, "inputVarShape") != ge::GRAPH_SUCCESS) {
+        return ge::GRAPH_FAILED;
+    }
+    *outputVarShape = *inputVarShape;
+
+    return GRAPH_SUCCESS;
+}
+
+static ge::graphStatus LazyAdamInferDataType(gert::InferDataTypeContext* context)
+{
+    return GRAPH_SUCCESS;
+}
+}  // namespace ge
+
+namespace ops {
+class LazyAdam : public OpDef {
+public:
+    explicit LazyAdam(const char* name) : OpDef(name)
+    {
+        this->Input("gradient")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("indices")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_INT32})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("inputM")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("inputV")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("inputVar")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("lr")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Output("inputM")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Output("inputV")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Output("inputVar")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Attr("beta1").Float();
+        this->Attr("beta2").Float();
+        this->Attr("epsilon").Float();
+        this->SetInferShape(ge::LazyAdamInferShape).SetInferDataType(ge::LazyAdamInferDataType);
+        this->AICore().SetTiling(optiling::LazyAdamTilingFunc);
+        this->AICore().AddConfig("ascend910b");
+        this->AICore().AddConfig("ascend910c");
+    }
+};
+
+OP_ADD(LazyAdam);
+}  // namespace ops
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h b/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h
new file mode 100644
index 00000000..4f1534a4
--- /dev/null
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam_tiling.h
@@ -0,0 +1,41 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef LAZY_ADAM_TILING_H
+#define LAZY_ADAM_TILING_H
+#include "register/tilingdata_base.h"
+
+namespace optiling {
+BEGIN_TILING_DATA_DEF(LazyAdamTilingData)
+TILING_DATA_FIELD_DEF(float, beta1);
+TILING_DATA_FIELD_DEF(float, beta2);
+TILING_DATA_FIELD_DEF(float, epsilon);
+TILING_DATA_FIELD_DEF(int32_t, dim0);
+TILING_DATA_FIELD_DEF(int32_t, dim1);
+TILING_DATA_FIELD_DEF(int32_t, dim2);
+TILING_DATA_FIELD_DEF(int32_t, row);
+TILING_DATA_FIELD_DEF(int32_t, indicesAllocSize);
+TILING_DATA_FIELD_DEF(int32_t, otherAllocSize);
+TILING_DATA_FIELD_DEF(int32_t, batch);
+TILING_DATA_FIELD_DEF(int32_t, loopCount);
+TILING_DATA_FIELD_DEF(int32_t, rowLeft);
+TILING_DATA_FIELD_DEF(int32_t, loopCountTail);
+TILING_DATA_FIELD_DEF(int32_t, rowLeftTail);
+TILING_DATA_FIELD_DEF(int32_t, coreNum);
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(LazyAdam, LazyAdamTilingData)
+}  // namespace optiling
+#endif  // LAZY_ADAM_TILING_H
\ No newline at end of file
-- 
Gitee


From a16bd07b5553d59befe30ac51ea8a611e5ef09d8 Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Mon, 6 May 2024 09:23:27 +0000
Subject: [PATCH 094/302] !115 cleancode * cleancode * cleancode * cleancode *
 cleancode

---
 .../op_host/embedding_lookup_by_address.cpp              | 2 +-
 .../op_kernel/embedding_lookup_by_address.cpp            | 9 ++++++---
 src/core/emb_table/embedding_ddr.cpp                     | 6 +++++-
 src/core/emb_table/embedding_static.cpp                  | 1 +
 src/core/utils/config.h                                  | 2 --
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/cust_op/cust_op_by_addr/op_host/embedding_lookup_by_address.cpp b/cust_op/cust_op_by_addr/op_host/embedding_lookup_by_address.cpp
index 41a5b33a..722914d3 100644
--- a/cust_op/cust_op_by_addr/op_host/embedding_lookup_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_host/embedding_lookup_by_address.cpp
@@ -28,7 +28,7 @@ namespace optiling
     constexpr int32_t SIZE_OF_HALF = 2;
     constexpr int32_t SIZE_OF_FLOAT_OR_INT = 4;
     constexpr int32_t MIN_BLOCK_SIZE = 32; // ub空间的数据都要按照32对齐
-    constexpr int32_t UB_LIMIT = 175 * 1024;
+    constexpr uint32_t UB_LIMIT = 175 * 1024;
     constexpr int32_t USR_SIZE = 256;
     constexpr int32_t SYS_WORKSPACE_SIZE = 16 * 1024 * 1024;
     constexpr int32_t PING_PONG_NUM = 1;
diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
index 3fded632..cc45c5be 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
@@ -16,6 +16,8 @@ See the License for the specific language governing permissions and
 #include "kernel_operator.h"
 using namespace AscendC;
 
+namespace AscendC {
+
 constexpr int32_t SIZE_OF_HALF = 2;
 constexpr int32_t SIZE_OF_FLOAT_OR_INT = 4;
 constexpr int32_t PADDING_ZERO_NUM_PER_TIME = 8;
@@ -180,6 +182,7 @@ private:
   GlobalTensor<T> srcDataBufferGm, dstDataGm;
   GlobalTensor<int64_t> srcAddrGlobal;
 };
+}
 
 extern "C" __global__ __aicore__ void embedding_lookup_by_address(GM_ADDR address, GM_ADDR y, GM_ADDR usrWorkspace,
                                                                   GM_ADDR tiling)
@@ -192,7 +195,7 @@ extern "C" __global__ __aicore__ void embedding_lookup_by_address(GM_ADDR addres
   {
   case 0:
   {
-    KernelEimtable<int32_t> op;
+    AscendC::KernelEimtable<int32_t> op;
     op.Init_param(tiling);
     op.Init(address, y);
     op.Process();
@@ -200,7 +203,7 @@ extern "C" __global__ __aicore__ void embedding_lookup_by_address(GM_ADDR addres
   break;
   case 2:
   {
-    KernelEimtable<half> op;
+    AscendC::KernelEimtable<half> op;
     op.Init_param(tiling);
     op.Init(address, y);
     op.Process();
@@ -208,7 +211,7 @@ extern "C" __global__ __aicore__ void embedding_lookup_by_address(GM_ADDR addres
   break;
   default:
   {
-    KernelEimtable<float> op;
+    AscendC::KernelEimtable<float> op;
     op.Init_param(tiling);
     op.Init(address, y);
     op.Process();
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 8f529646..24aa07a7 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -345,7 +345,11 @@ int EmbeddingDDR::LoadHashMap(const string& savePath)
         LOG_ERROR("malloc failed: {}", strerror(errno));
         return -1;
     }
-    fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
+    ssize_t result = fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
+    if (result == -1) {
+        free(static_cast<void*>(buf));
+        return -1;
+    }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
 
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index 225c90c9..dab8a195 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -153,6 +153,7 @@ int EmbeddingStatic::LoadKey(const string &savePath)
 
     if (loadOffset.size() > devVocabSize) {
         LOG_ERROR("load key size exceeds device vocab size: {}", strerror(errno));
+        free(static_cast<void*>(buf));
         return -1;
     }
 
diff --git a/src/core/utils/config.h b/src/core/utils/config.h
index 3ecb4c36..fc5536f6 100644
--- a/src/core/utils/config.h
+++ b/src/core/utils/config.h
@@ -16,8 +16,6 @@ See the License for the specific language governing permissions and
 #ifndef MXREC_CONFIG_H
 #define MXREC_CONFIG_H
 
-#include <string>
-
 namespace MxRec {
     namespace RecEnvNames {
         const char *const ACL_TIMEOUT = "AclTimeout";
-- 
Gitee


From 30bd34ebdd69e48b37e9a2c48f6d4755280c8a69 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 17:44:05 +0800
Subject: [PATCH 095/302] =?UTF-8?q?LazyAdam=E8=9E=8D=E5=90=88=E7=AE=97?=
 =?UTF-8?q?=E5=AD=90-part3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md             | 129 +++++++++
 .../fused_lazy_adam/op_kernel/lazy_adam.cpp   | 245 ++++++++++++++++++
 cust_op/fused_lazy_adam/run.sh                |  53 ++++
 src/ops_tf/hybrid_dataset_ops.cpp             |  18 ++
 4 files changed, 445 insertions(+)
 create mode 100644 cust_op/fused_lazy_adam/README.md
 create mode 100644 cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
 create mode 100644 cust_op/fused_lazy_adam/run.sh

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
new file mode 100644
index 00000000..e0e64d23
--- /dev/null
+++ b/cust_op/fused_lazy_adam/README.md
@@ -0,0 +1,129 @@
+# LazyAdam优化器融合算子及样例说明
+
+## LazyAdam融合算子文件结构
+```shell
+├── aclnn_lazy_adam_test  # 单算子测试用例
+├── lazy_adam.json    # 算子原型配置
+├── op_host    # LazyAdam融合算子Host侧实现
+├── op_kernel  # LazyAdam融合算子Kernel测实现
+├── README.md  # LazyAdam融合算子说明文档
+└── run.sh     # LazyAdam融合算子安装脚本
+```
+
+## Ascend C参考设计
+更多详情可以参考CANN官方的Ascend C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0001.html)。
+
+
+## lazy_adam优化器同名融合算子lazy_adam
+
+1. 算子分析
+
+a) 算子的主要功能是实现lazy_adam优化器反向更新时m、v、variable三项数据的计算和更新；  
+b) 算子参数说明：  
+* gradient: lazy_adam优化器计算时使用的梯度；
+* indices: 参与计算/更新的数据索引；
+* inputM: lazy_adam优化器一阶矩估计；计算结果原地更新；
+* inputV: lazy_adam优化器二阶矩估计；计算结果原地更新；
+* inputVar: embedding表对应的variable数据；计算结果原地更新；
+c) 算子约束说明：
+* 支持的型号：Atlas A2系列产品;
+* 支持的输入数据类型：float32；  
+* embedding表的dim值需要时8的倍数；
+
+2. Host侧算子实现
+
+Host侧算子实现在目录 fused_lazy_adam/op_host下，其中包括：lazy_adam.cpp和
+lazy_adam_tiling.h。
+
+a) Tiling实现 
+
+namespace optiling域中的LazyAdamTilingFunc函数，主要实现从context中获取外部入参信息（输入参数指针、shape信息），及校验有效性；  
+并计算kernel侧需要的数据切分相关参数，包括row、loopCount、batch等（详情见tiling文件注释），设置BlockDim，最后通过TilingData传递属性信息。
+
+b) Shape推导
+
+因算子计算结果原地更新到输入参数中，namespace ge域中的InferShape和InferDataType函数体为空。
+
+c) 原型注册
+
+namespace ops域中的LazyAdam类定义了算子原型，并将算子注册到GE。
+
+3. Kernel侧算子实现
+
+Kernel侧算子实现在目录fused_lazy_adam/op_kernel下，其中包括：lazy_adam.cpp。
+
+a) 核函数的入口：extern "C" __global__ __aicore__ void lazy_adam
+
+b) 解析tiling参数：GET_TILING_DATA(tilingData, tiling)从TilingData中获取host侧传入的数据
+
+c) Init方法，进行算子运行数据的初始化；
+
+d) Process方法，进行数据搬入和计算，并且计算完成后将计算结果数据分别更新到对应入参中；
+
+## AclNN单算子测试参考设计
+
+更多详情可以参考CANN官方的[Ascend C单算子调用概述](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0036.html)。
+
+单算子调用分为两种方式：单算子API执行和模型执行。mxRec提供单算子API执行供参考。
+
+单算子测试用例在目录fused_lazy_adam/aclnn_lazy_adam_test下，其中：
+* inc是头文件目录
+* scripts存放生成数据和验证数据的python脚本
+* input是存放算子入参的bin文件
+* output是存放生成的可执行程序execute_op、算子输出bin文件和用于验证的golden数据bin文件
+* src是存放公共函数common、构造算子输入输出描述类oprator_desc、单算子调用主体流程实现op_runner文件和入口main文件
+
+执行单算子测试：
+```shell
+bash run.sh
+```
+
+### 前置条件
+
+1. 参考[基于msopgen工具创建算子工程](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0023.html)完成算子工程的创建，
+参考[kernel侧算子实现](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0024.html)完成kernel侧实现的相关准备，
+参考[host侧算子实现](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0026.html)完成host侧实现相关准备。
+2. 参考[算子编译部署](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0031.html)完成算子的编译部署，编译部署时需要开启算子的二进制编译功能：修改算子工程中的编译配置项文件CMakePresets.json，将
+ENABLE_BINARY_PACKAGE设置为True。编译部署时可将算子的二进制部署到当前环境，便于后续算子的调用。
+3. 检查API执行需要的头文件和库文件是否自动生成，针对mxRec，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
+aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
+
+注意：对于cust_op/fused_lazy_adam/run.sh脚本，安装算子后会删除构建目录。运行单算子测试时，需要屏蔽掉删除rm rf ./lazy_adam这一步，以确保前置条件3。
+
+### 融合算子 lazy_adam
+针对lazy_adam算子，入口src/main.cpp中：
+
+1. InitResource函数：初始化AscendCL并运行管理资源申请，不用修改
+2. RunLookupOp运行算子：
+
+a) 创建算子输入输出描述CreateOpDesc，OperatorDesc对象定义(inc/operator_desc.h)中设置了算子入参为成员变量，以便后续
+op_runner中使用；
+
+b) 创建OpRunner的对象，并依次执行：
+* opRunner.Init()：申请内存存放执行算子的输入输出数据
+* SetInputData()：加载数据输入bin文件并传输给OpRunner的Buffer供后续算子执行使用
+* opRunner.RunOp()：算子执行，主要流程为：入参数据拷贝，创建Stream，执行Stream，输出数据拷贝，释放Stream资源
+* ProcessOutputData()：算子输出数据处理，并落盘文件，以供后续与golden数据比对
+
+3. DestroyResource函数：释放内存，不用修改
+
+### 运行脚本
+run.sh脚本依次执行：
+1. 清除遗留生成文件和日志文件 
+2. 生成输入数据和真值数据 
+3. 编译acl可执行文件 
+4. 运行可执行文件 
+5. 比较真值文件
+
+### scripts脚本
+* gen_data.py：生成lazy_adam算子的输入数据和用于精度校验的golden数据，可自行修改测试相关dim参数。
+* verify_result.py：将算子的输出和脚本生成的golden数据进行精度比对，并输出比较结果。比对规则为：允许误差精度loss：1e-4
+
+a) 绝对误差
+b) 相对误差
+c) 误差相对个数
+
+同时满足绝对误差不全小于loss，相对误差不全小于loss，且绝对误差和相对误差大于loss的个数都超过总数的1/loss，也就是
+1/10000（双万分之一），即认为算子精度不达标。其余情况均认为算子达标。
+
+用户可自行修改允许精度误差范围loss。
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
new file mode 100644
index 00000000..815e6567
--- /dev/null
+++ b/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
@@ -0,0 +1,245 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include "kernel_operator.h"
+
+using namespace AscendC;
+
+template<typename T>
+class LazyAdam {
+public:
+    __aicore__ inline LazyAdam()
+    {}
+
+    // 初始化函数，完成内存初始化相关操作
+    __aicore__ inline void Init(GM_ADDR gradient, GM_ADDR indices,
+                                GM_ADDR inputM, GM_ADDR inputV, GM_ADDR inputVar, GM_ADDR lr,
+                                GM_ADDR inputMRef, GM_ADDR inputVRef, GM_ADDR inputVarRef,
+                                float beta1, float beta2, float epsilon,
+                                int32_t dim0, int32_t dim1, int32_t dim2,
+                                int32_t row, int32_t indicesAllocSize, int32_t otherAllocSize,
+                                int32_t batch, int32_t loopCount, int32_t rowLeft,
+                                int32_t loopCountTail, int32_t rowLeftTail, int32_t coreNum)
+    {
+        ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
+        // 属性赋值
+        this->beta1 = beta1;
+        this->beta2 = beta2;
+        this->epsilon = epsilon;
+        // tiling 数据赋值
+        this->dim0 = dim0;
+        this->dim1 = dim1;
+        this->dim2 = dim2;
+        this->row = row;
+        this->batch = batch;
+        this->loopCount = loopCount;
+        this->rowLeft = rowLeft;
+        this->loopCountTail = loopCountTail;
+        this->rowLeftTail = rowLeftTail;
+        this->coreNum = coreNum;
+        // 输入的 gm shape 大小
+        int32_t shape = this->dim0 * this->dim2;
+        int32_t shapeIndices = this->dim1 * 1;
+        int32_t shapeGradient = this->dim1 * this->dim2;
+        this->gmGradient.SetGlobalBuffer((__gm__ T *)gradient + this->batch * this->dim2 * get_block_idx(),
+                                         shapeGradient);
+        this->gmIndices.SetGlobalBuffer((__gm__ int32_t *)indices + this->batch * get_block_idx(), shapeIndices);
+
+        this->gmInputM.SetGlobalBuffer((__gm__ T *)inputM, shape);
+        this->gmInputV.SetGlobalBuffer((__gm__ T *)inputV, shape);
+        this->gmInputVar.SetGlobalBuffer((__gm__ T *)inputVar, shape);
+
+        this->gmLearningRate.SetGlobalBuffer((__gm__ T *)lr, sizeof(float));
+        this->lr = this->gmLearningRate.GetValue(0);
+
+        // 将输出地址指向输入地址
+        inputMRef = inputM;
+        inputVRef = inputV;
+        inputVarRef = inputVar;
+
+        // 单次循环申请的 ub 大小, 32位对齐后的大小
+        this->pipe.InitBuffer(this->inQueGradient, 1, otherAllocSize);
+        this->pipe.InitBuffer(this->inQueIndices, 1, indicesAllocSize);
+        this->pipe.InitBuffer(this->queMSlice, 1, otherAllocSize);
+        this->pipe.InitBuffer(this->queVSlice, 1, otherAllocSize);
+        this->pipe.InitBuffer(this->queVarSlice, 1, otherAllocSize);
+
+        this->pipe.InitBuffer(this->calcBufM, otherAllocSize);
+        this->updateM = this->calcBufM.template Get<T>();
+
+        this->pipe.InitBuffer(this->calcBufV, otherAllocSize);
+        this->updateV = this->calcBufV.template Get<T>();
+
+        this->pipe.InitBuffer(this->calcBufVar, otherAllocSize);
+        this->updateVar = this->calcBufVar.template Get<T>();
+
+        this->pipe.InitBuffer(this->calcBuf, otherAllocSize);
+        this->temp = this->calcBuf.template Get<T>();
+    }
+
+    // 核心处理函数，实现算子逻辑，调用私有成员函数CopyIn、Compute、CopyOut完成矢量算子的三级流水操作
+    __aicore__ inline void Process()
+    {
+        if (get_block_idx() == this->coreNum - 1) {
+            for (int32_t i = 0; i < this->loopCountTail; i++) {
+                CopyIn(i, this->row);
+                Compute(i, this->row);
+            }
+            // 尾块处理
+            if (this->rowLeft > 0) {
+                CopyIn(this->loopCountTail, this->rowLeftTail);
+                Compute(this->loopCountTail, this->rowLeftTail);
+            }
+        } else {
+            for (int32_t i = 0; i < this->loopCount; i++) {
+                CopyIn(i, this->row);
+                Compute(i, this->row);
+            }
+            // 尾块处理
+            if (this->rowLeft > 0) {
+                CopyIn(this->loopCount, this->rowLeft);
+                Compute(this->loopCount, this->rowLeft);
+            }
+        }
+    }
+
+private:
+    // 搬入函数，完成CopyIn阶段的处理，被核心Process函数调用
+    __aicore__ inline void CopyIn(int32_t progress, int32_t row)
+    {
+        LocalTensor <T> localGradient = this->inQueGradient.template AllocTensor<T>();
+        uint32_t gradientDataLen = row * this->dim2 * sizeof(T);
+        // 连续传输数据块个数；len:连续传输数据块长度，Byte，非对齐搬运；0, 0, 0:源/目标数据块间隔，保留字段
+        DataCopyExtParams gradientParams{1, gradientDataLen, 0, 0, 0};
+        // 搬运填充参数
+        DataCopyPadExtParams <T> gradientPadParams{true, 0, 2, 0};
+        DataCopyPad(localGradient, this->gmGradient[progress * this->row * this->dim2], gradientParams,
+                    gradientPadParams);
+
+        LocalTensor <int32_t> localIndices = this->inQueIndices.template AllocTensor<int32_t>();
+        uint32_t indicesDataLen = row * sizeof(int32_t);
+        DataCopyExtParams indicesParams{1, indicesDataLen, 0, 0, 0};
+        DataCopyPadExtParams <int32_t> indicesPadParams{true, 0, 2, 0};
+        DataCopyPad(localIndices, this->gmIndices[progress * this->row], indicesParams, indicesPadParams);
+
+        this->inQueGradient.EnQue(localGradient);
+        this->inQueIndices.EnQue(localIndices);
+    }
+
+    // 计算函数，完成Compute阶段的处理，被核心Process函数调用
+    __aicore__ inline void Compute(int32_t progress, int32_t row)
+    {
+        LocalTensor <T> localGradient = this->inQueGradient.template DeQue<T>();
+        LocalTensor <int32_t> localIndices = this->inQueIndices.template DeQue<int32_t>();
+        Muls(localIndices, localIndices, this->dim2, row);
+        // 根据 indices 从 inputM 中切分出来 m_slice
+        LocalTensor <T> localMSlice = this->queMSlice.template AllocTensor<T>();
+        LocalTensor <T> localVSlice = this->queVSlice.template AllocTensor<T>();
+        LocalTensor <T> localVarSlice = this->queVarSlice.template AllocTensor<T>();
+
+        pipe_barrier(PIPE_ALL);
+
+        int32_t index = 0;
+        for (int32_t i = 0; i < row; i++) {
+            index = localIndices.GetValue(i);
+            if (index >= 0) {
+                DataCopy(localMSlice[i * this->dim2], gmInputM[index], this->dim2);
+                DataCopy(localVSlice[i * this->dim2], gmInputV[index], this->dim2);
+                DataCopy(localVarSlice[i * this->dim2], gmInputVar[index], this->dim2);
+            }
+        }
+
+        this->queMSlice.EnQue(localMSlice);
+        this->queVSlice.EnQue(localVSlice);
+        this->queVarSlice.EnQue(localVarSlice);
+        localMSlice = this->queMSlice.template DeQue<T>();
+        localVSlice = this->queVSlice.template DeQue<T>();
+        localVarSlice = this->queVarSlice.template DeQue<T>();
+
+        // 计算M
+        Muls(localMSlice, localMSlice, this->beta1, row * this->dim2);
+        Muls(this->updateM, localGradient, (1 - this->beta1), row * this->dim2);
+        this->updateM = localMSlice + this->updateM;
+
+        // 计算V
+        Muls(localVSlice, localVSlice, this->beta2, row * this->dim2);
+        Mul(this->updateV, localGradient, localGradient, row * this->dim2);
+        Muls(this->updateV, this->updateV, (1 - this->beta2), row * this->dim2);
+        this->updateV = localVSlice + this->updateV;
+
+        // 计算Var
+        Sqrt(this->updateVar, this->updateV, row * this->dim2);
+        Adds(this->updateVar, this->updateVar, this->epsilon, row * this->dim2);
+        Muls(this->temp, this->updateM, -this->lr, row * this->dim2);
+        Div(this->updateVar, this->temp, this->updateVar, row * this->dim2);
+        Add(this->updateVar, this->updateVar, localVarSlice, row * this->dim2);
+
+        pipe_barrier(PIPE_ALL);
+
+        // 计算结果数据原地更新到输入tensor中
+        for (int32_t i = 0; i < row; i++) {
+            index = localIndices.GetValue(i);
+            if (index >= 0) {
+                // __GET_CODE_CHANNEL__宏的作用是防止拷贝操作被识别为matmul而报错
+#ifndef __GET_CODE_CHANNEL__
+                DataCopy(this->gmInputM[index], this->updateM[i * this->dim2], this->dim2);
+                DataCopy(this->gmInputV[index], this->updateV[i * this->dim2], this->dim2);
+                DataCopy(this->gmInputVar[index], this->updateVar[i * this->dim2], this->dim2);
+#endif
+            }
+        }
+        pipe_barrier(PIPE_ALL);
+
+        this->inQueGradient.FreeTensor(localGradient);
+        this->queMSlice.FreeTensor(localMSlice);
+        this->queVSlice.FreeTensor(localVSlice);
+        this->queVarSlice.FreeTensor(localVarSlice);
+        this->inQueIndices.FreeTensor(localIndices);
+    }
+
+private:
+    float lr, beta1, beta2, epsilon;
+    int32_t dim0, dim1, dim2, row, batch, loopCount, rowLeft, loopCountTail, rowLeftTail, coreNum;
+    LocalTensor <T> updateM, updateV, updateVar, temp;
+    LocalTensor <int32_t> localIndices;
+    GlobalTensor <T> gmGradient, gmInputM, gmInputV, gmInputVar;
+    GlobalTensor <int32_t> gmIndices;
+    GlobalTensor <T> gmLearningRate;
+    TPipe pipe;
+    TQue<QuePosition::VECIN, 1> inQueGradient, inQueIndices;
+    TQue<QuePosition::VECIN, 1> queMSlice, queVSlice, queVarSlice;
+    TBuf <TPosition::VECCALC> calcBufM;
+    TBuf <TPosition::VECCALC> calcBufV;
+    TBuf <TPosition::VECCALC> calcBufVar;
+    TBuf <TPosition::VECCALC> calcBuf;
+};
+
+extern "C" __global__ __aicore__ void lazy_adam(GM_ADDR gradient, GM_ADDR indices,
+               GM_ADDR inputM, GM_ADDR inputV, GM_ADDR inputVar, GM_ADDR lr,
+               GM_ADDR inputMRef, GM_ADDR inputVRef, GM_ADDR inputVarRef,
+               GM_ADDR workspace, GM_ADDR tiling)
+{
+    GET_TILING_DATA(tiling_data, tiling);
+    LazyAdam<float> op32;
+    op32.Init(gradient, indices,
+              inputM, inputV, inputVar, lr,
+              inputMRef, inputVRef, inputVarRef,
+              tiling_data.beta1, tiling_data.beta2, tiling_data.epsilon,
+              tiling_data.dim0, tiling_data.dim1, tiling_data.dim2,
+              tiling_data.row, tiling_data.indicesAllocSize, tiling_data.otherAllocSize,
+              tiling_data.batch, tiling_data.loopCount, tiling_data.rowLeft,
+              tiling_data.loopCountTail, tiling_data.rowLeftTail, tiling_data.coreNum);
+    op32.Process();
+}
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/run.sh b/cust_op/fused_lazy_adam/run.sh
new file mode 100644
index 00000000..c1e80ce5
--- /dev/null
+++ b/cust_op/fused_lazy_adam/run.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+source /etc/profile
+
+# 查找msopgen的路径，加入到环境变量PATH中
+msopgen_path=$(find /usr/local/Ascend/ -name msopgen | grep bin)
+parent_dir=$(dirname "$msopgen_path")
+export PATH=$parent_dir:$PATH
+
+# 利用msopgen生成可编译文件
+rm -rf ./lazy_adam
+msopgen gen -i lazy_adam.json -f tf -c ai_core-Ascend910B1 -lan cpp -out ./lazy_adam -m 0 -op LazyAdam
+
+cp -rf op_kernel lazy_adam/
+cp -rf op_host lazy_adam/
+
+cd lazy_adam
+
+# 判断当前目录下是否存在CMakePresets.json文件
+if [ ! -f "CMakePresets.json" ]; then
+  echo "ERROR, CMakePresets.json file not exist."
+  exit 1
+fi
+
+# 禁止生成CRC校验和
+sed -i 's/--nomd5/--nomd5 --nocrc/g' ./cmake/makeself.cmake
+
+# 修改cann安装路径
+sed -i 's:"/usr/local/Ascend/latest":"/usr/local/Ascend/ascend-toolkit/latest":g' CMakePresets.json
+# 修改vendor_name 防止覆盖之前vendor_name为customize的算子
+sed -i 's:"customize":"customize_lazy_adam":g' CMakePresets.json
+
+bash build.sh
+
+# 安装编译成功的算子包
+bash ./build_out/custom_opp*.run
+
+cd ..
+rm -rf ./lazy_adam
diff --git a/src/ops_tf/hybrid_dataset_ops.cpp b/src/ops_tf/hybrid_dataset_ops.cpp
index c3687e8a..2eee8531 100644
--- a/src/ops_tf/hybrid_dataset_ops.cpp
+++ b/src/ops_tf/hybrid_dataset_ops.cpp
@@ -640,4 +640,22 @@ namespace tensorflow {
     });
 
     REGISTER_KERNEL_BUILDER(Name("EmbeddingUpdateByAddress").Device(DEVICE_CPU), MxRec::CustOps);
+
+    // ######################## tf注册LazyAdam融合算子同名算子 ########################
+    REGISTER_OP("LazyAdam")
+        .Input("gradient: float32")
+        .Input("indices: int32")
+        .Input("input_m: float32")
+        .Input("input_v: float32")
+        .Input("input_var: float32")
+        .Input("lr: float32")
+        .Attr("beta1: float")
+        .Attr("beta2: float")
+        .Attr("epsilon: float")
+        .Output("output_m: float32")
+        .Output("output_v: float32")
+        .Output("output_var: float32")
+        .SetIsStateful()
+        .SetShapeFn(::tensorflow::shape_inference::UnknownShape);
+    REGISTER_KERNEL_BUILDER(Name("LazyAdam").Device(DEVICE_CPU), MxRec::CustOps);
 }
\ No newline at end of file
-- 
Gitee


From b84fd8fbb628fd42138186575e872e92b6a8f3ce Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 19:30:55 +0800
Subject: [PATCH 096/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=9C=AA=E4=BD=BF?=
 =?UTF-8?q?=E7=94=A8=E5=88=B0=E7=9A=84Print=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/inc/op_runner.h      |  42 +-
 .../aclnn_lazy_adam_test/src/op_runner.cpp    | 632 ++++++++----------
 2 files changed, 281 insertions(+), 393 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
index 77f0aee5..2e25341f 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/inc/op_runner.h
@@ -31,7 +31,7 @@ namespace AclnnLazyAdam {
          * @brief Constructor
          * @param [in] opDesc: op description
          */
-        explicit OpRunner(OperatorDesc *opDesc);
+        explicit OpRunner(OperatorDesc* opDesc);
 
         /**
          * @brief Destructor
@@ -121,13 +121,13 @@ namespace AclnnLazyAdam {
          * @return host address of the input
          */
         template<typename T>
-        T *GetInputBuffer(size_t index)
+        T* GetInputBuffer(size_t index)
         {
             if (index >= numInputs_) {
                 ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
                 return nullptr;
             }
-            return reinterpret_cast<T *>(hostInputs_[index]);
+            return reinterpret_cast<T*>(hostInputs_[index]);
         }
 
         /**
@@ -137,30 +137,16 @@ namespace AclnnLazyAdam {
          * @return host address of the output
          */
         template<typename T>
-        const T *GetOutputBuffer(size_t index)
+        const T* GetOutputBuffer(size_t index)
         {
             if (index >= numOutputs_) {
                 ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
                 return nullptr;
             }
 
-            return reinterpret_cast<T *>(hostOutputs_[index]);
+            return reinterpret_cast<T*>(hostOutputs_[index]);
         }
 
-        /**
-         * @brief Print readable input by index
-         * @param [in] index: input index
-         * @param [in] numElementsPerRow: number of elements per row
-         */
-        void PrintInput(size_t index, size_t numElementsPerRow = 16);
-
-        /**
-          * @brief Print readable output by index
-          * @param [in] index: output index
-          * @param [in] numElementsPerRow: number of elements per row
-          */
-        void PrintOutput(size_t index, size_t numElementsPerRow = 16);
-
         /**
          * @brief Compile static op
          * @return compile result
@@ -183,18 +169,18 @@ namespace AclnnLazyAdam {
         size_t numInputs_;
         size_t numOutputs_;
 
-        std::vector<aclDataBuffer *> inputBuffers_;
-        std::vector<aclDataBuffer *> outputBuffers_;
+        std::vector<aclDataBuffer*> inputBuffers_;
+        std::vector<aclDataBuffer*> outputBuffers_;
 
-        std::vector<void *> devInputs_;
-        std::vector<void *> devOutputs_;
+        std::vector<void*> devInputs_;
+        std::vector<void*> devOutputs_;
 
-        std::vector<void *> hostInputs_;
-        std::vector<void *> hostOutputs_;
+        std::vector<void*> hostInputs_;
+        std::vector<void*> hostOutputs_;
 
-        std::vector<aclTensor *> inputTensor_;
-        std::vector<aclTensor *> outputTensor_;
-        OperatorDesc *opDesc_;
+        std::vector<aclTensor*> inputTensor_;
+        std::vector<aclTensor*> outputTensor_;
+        OperatorDesc* opDesc_;
     };
 }
 #endif // OP_RUNNER_H
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
index 3d737564..e9711379 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
@@ -25,428 +25,330 @@ See the License for the specific language governing permissions and
 extern bool g_isDevice;
 
 namespace AclnnLazyAdam {
-using namespace std;
-constexpr int PRINT_OUT_WIDTH = 10;
-constexpr int PRINT_OUT_PRECISION = 4;
-constexpr int STREAM_TIMEOUT = 5000;  // 等待Stream任务完成，超时时间单位：ms
-constexpr int OUTPUT_SIZE = 3;
-constexpr int INPUT_TENSOR_OFFSET = 2;
-
-OpRunner::OpRunner(OperatorDesc* opDesc) : opDesc_(opDesc)
-{
-    numInputs_ = opDesc->inputDesc.size();
-    numOutputs_ = opDesc->outputDesc.size();
-}
-
-OpRunner::~OpRunner()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        (void)aclDestroyTensor(inputTensor_[i]);
-        (void)aclDestroyDataBuffer(inputBuffers_[i]);
-        (void)aclrtFree(devInputs_[i]);
-        if (g_isDevice) {
-            (void)aclrtFree(hostInputs_[i]);
-        } else {
-            (void)aclrtFreeHost(hostInputs_[i]);
+    using namespace std;
+    constexpr int PRINT_OUT_WIDTH = 10;
+    constexpr int PRINT_OUT_PRECISION = 4;
+    constexpr int STREAM_TIMEOUT = 5000;  // 等待Stream任务完成，超时时间单位：ms
+    constexpr int OUTPUT_SIZE = 3;
+    constexpr int INPUT_TENSOR_OFFSET = 2;
+
+    OpRunner::OpRunner(OperatorDesc* opDesc) : opDesc_(opDesc)
+    {
+        numInputs_ = opDesc->inputDesc.size();
+        numOutputs_ = opDesc->outputDesc.size();
+    }
+
+    OpRunner::~OpRunner()
+    {
+        for (size_t i = 0; i < numInputs_; ++i) {
+            (void) aclDestroyTensor(inputTensor_[i]);
+            (void) aclDestroyDataBuffer(inputBuffers_[i]);
+            (void) aclrtFree(devInputs_[i]);
+            if (g_isDevice) {
+                (void) aclrtFree(hostInputs_[i]);
+            } else {
+                (void) aclrtFreeHost(hostInputs_[i]);
+            }
         }
-    }
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        if (g_isDevice) {
-            (void)aclrtFree(hostOutputs_[i]);
-        } else {
-            (void)aclrtFreeHost(hostOutputs_[i]);
+        for (size_t i = 0; i < numOutputs_; ++i) {
+            if (g_isDevice) {
+                (void) aclrtFree(hostOutputs_[i]);
+            } else {
+                (void) aclrtFreeHost(hostOutputs_[i]);
+            }
         }
     }
-}
-
-bool OpRunner::InitOutputInfo()
-{
-    // 手动修改输出数据实现，仅申请host上的输出数据空间，析构出需同时适配
-    numOutputs_ = OUTPUT_SIZE;
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        int inputTensorIndex = i + INPUT_TENSOR_OFFSET;
-        auto size = GetInputSize(inputTensorIndex);
-
-        void* hostOutput = nullptr;
-        if (g_isDevice) {
-            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                return false;
+
+    bool OpRunner::InitOutputInfo()
+    {
+        // 手动修改输出数据实现，仅申请host上的输出数据空间，析构出需同时适配
+        numOutputs_ = OUTPUT_SIZE;
+        for (size_t i = 0; i < numOutputs_; ++i) {
+            int inputTensorIndex = i + INPUT_TENSOR_OFFSET;
+            auto size = GetInputSize(inputTensorIndex);
+
+            void* hostOutput = nullptr;
+            if (g_isDevice) {
+                if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                    return false;
+                }
+            } else {
+                if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                    return false;
+                }
             }
-        } else {
-            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+            if (hostOutput == nullptr) {
+                ERROR_LOG("Malloc host memory for output[%zu] failed", i);
                 return false;
             }
+            hostOutputs_.emplace_back(hostOutput);
         }
-        if (hostOutput == nullptr) {
-            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
-            return false;
-        }
-        hostOutputs_.emplace_back(hostOutput);
+        return true;
     }
-    return true;
-}
-
-bool OpRunner::Init()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        auto size = GetInputSize(i);
-        void* devMem = nullptr;
-        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-            return false;
-        }
-        devInputs_.emplace_back(devMem);
-        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
 
-        void* hostInput = nullptr;
-        if (g_isDevice) {
-            if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+    bool OpRunner::Init()
+    {
+        for (size_t i = 0; i < numInputs_; ++i) {
+            auto size = GetInputSize(i);
+            void* devMem = nullptr;
+            if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
                 ERROR_LOG("Malloc device memory for input[%zu] failed", i);
                 return false;
             }
-        } else {
-            if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+            devInputs_.emplace_back(devMem);
+            inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+            void* hostInput = nullptr;
+            if (g_isDevice) {
+                if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                    return false;
+                }
+            } else {
+                if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
+                    ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                    return false;
+                }
+            }
+            if (hostInput == nullptr) {
+                ERROR_LOG("Malloc memory for input[%zu] failed", i);
                 return false;
             }
-        }
-        if (hostInput == nullptr) {
-            ERROR_LOG("Malloc memory for input[%zu] failed", i);
-            return false;
-        }
-        hostInputs_.emplace_back(hostInput);
+            hostInputs_.emplace_back(hostInput);
 
-        aclTensor* inputTensor =
-            aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0,
-                            GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]);
-        if (inputTensor == nullptr) {
-            ERROR_LOG("Create Tensor for input[%zu] failed", i);
-            return false;
+            aclTensor* inputTensor =
+                    aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0,
+                                    GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]);
+            if (inputTensor == nullptr) {
+                ERROR_LOG("Create Tensor for input[%zu] failed", i);
+                return false;
+            }
+            inputTensor_.emplace_back(inputTensor);
         }
-        inputTensor_.emplace_back(inputTensor);
-    }
-
-    return InitOutputInfo();
-}
-
-const size_t OpRunner::NumInputs()
-{
-    return numInputs_;
-}
 
-const size_t OpRunner::NumOutputs()
-{
-    return numOutputs_;
-}
-
-const size_t OpRunner::GetInputSize(size_t index) const
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
-    }
-    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
-}
-
-const size_t OpRunner::GetInputNumDims(size_t index) const
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
-    }
-    return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
-}
-
-aclDataType OpRunner::GetInputDataType(size_t index) const
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return ACL_DT_UNDEFINED;
+        return InitOutputInfo();
     }
-    return aclGetTensorDescType(opDesc_->inputDesc[index]);
-}
-
-aclFormat OpRunner::GetInputFormat(size_t index) const
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return ACL_FORMAT_UNDEFINED;
+
+    const size_t OpRunner::NumInputs()
+    {
+        return numInputs_;
     }
-    return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
-}
-
-std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
-{
-    std::vector<int64_t> ret;
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return ret;
+
+    const size_t OpRunner::NumOutputs()
+    {
+        return numOutputs_;
     }
 
-    auto desc = opDesc_->inputDesc[index];
-    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-        int64_t dimSize;
-        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-            ret.clear();
-            return ret;
+    const size_t OpRunner::GetInputSize(size_t index) const
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return 0;
         }
-        ret.emplace_back(dimSize);
-    }
-    return ret;
-}
-
-size_t OpRunner::GetOutputSize(size_t index) const
-{
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
+        return aclGetTensorDescSize(opDesc_->inputDesc[index]);
     }
-    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
-}
-
-const size_t OpRunner::GetOutputNumDims(size_t index) const
-{
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
+
+    const size_t OpRunner::GetInputNumDims(size_t index) const
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return 0;
+        }
+        return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
     }
-    return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
-}
-
-aclDataType OpRunner::GetOutputDataType(size_t index) const
-{
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return ACL_DT_UNDEFINED;
+
+    aclDataType OpRunner::GetInputDataType(size_t index) const
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return ACL_DT_UNDEFINED;
+        }
+        return aclGetTensorDescType(opDesc_->inputDesc[index]);
     }
-    return aclGetTensorDescType(opDesc_->outputDesc[index]);
-}
-
-aclFormat OpRunner::GetOutputFormat(size_t index) const
-{
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return ACL_FORMAT_UNDEFINED;
+
+    aclFormat OpRunner::GetInputFormat(size_t index) const
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return ACL_FORMAT_UNDEFINED;
+        }
+        return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
     }
 
-    return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
-}
+    std::vector <int64_t> OpRunner::GetInputShape(size_t index) const
+    {
+        std::vector <int64_t> ret;
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return ret;
+        }
 
-std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
-{
-    std::vector<int64_t> ret;
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        auto desc = opDesc_->inputDesc[index];
+        for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+            int64_t dimSize;
+            if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+                ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+                ret.clear();
+                return ret;
+            }
+            ret.emplace_back(dimSize);
+        }
         return ret;
     }
 
-    auto desc = opDesc_->outputDesc[index];
-    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-        int64_t dimSize;
-        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-            ret.clear();
-            return ret;
+    size_t OpRunner::GetOutputSize(size_t index) const
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return 0;
         }
-        ret.emplace_back(dimSize);
-    }
-    return ret;
-}
-
-size_t OpRunner::GetInputElementCount(size_t index) const
-{
-    if (index >= opDesc_->inputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
+        return aclGetTensorDescSize(opDesc_->outputDesc[index]);
     }
 
-    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
-}
-
-size_t OpRunner::GetOutputElementCount(size_t index) const
-{
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
-    }
-    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
-}
-
-bool OpRunner::RunOp()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        auto size = GetInputSize(i);
-        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
-        if (g_isDevice) {
-            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+    const size_t OpRunner::GetOutputNumDims(size_t index) const
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return 0;
         }
-        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
-            ERROR_LOG("Copy input[%zu] failed", i);
-            return false;
-        }
-        INFO_LOG("Copy input[%zu] success", i);
+        return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
     }
 
-    aclrtStream stream = nullptr;
-    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
-        ERROR_LOG("Create stream failed");
-        return false;
-    }
-    INFO_LOG("Create stream success");
-
-    size_t workspaceSize = 0;
-    aclOpExecutor* handle = nullptr;
-    auto ret = aclnnLazyAdamGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3],
-                                             inputTensor_[4], inputTensor_[5], opDesc_->beta1, opDesc_->beta2,
-                                             opDesc_->epsilon, &workspaceSize, &handle);
-    if (ret != ACL_SUCCESS) {
-        (void)aclrtDestroyStream(stream);
-        ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
-        return false;
+    aclDataType OpRunner::GetOutputDataType(size_t index) const
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return ACL_DT_UNDEFINED;
+        }
+        return aclGetTensorDescType(opDesc_->outputDesc[index]);
     }
-    INFO_LOG("Execute aclnnAddCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
 
-    void* workspace = nullptr;
-    if (workspaceSize != 0) {
-        if (aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-            ERROR_LOG("Malloc device memory failed");
+    aclFormat OpRunner::GetOutputFormat(size_t index) const
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return ACL_FORMAT_UNDEFINED;
         }
-    }
 
-    ret = aclnnLazyAdam(workspace, workspaceSize, handle, stream);
-    if (ret != ACL_SUCCESS) {
-        (void)aclrtDestroyStream(stream);
-        ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
-        return false;
+        return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
     }
-    INFO_LOG("Execute aclnnAddCustom success");
 
-    ret = aclrtSynchronizeStreamWithTimeout(stream, STREAM_TIMEOUT);
-    if (ret != SUCCESS) {
-        ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
-        (void)aclrtDestroyStream(stream);
-        return false;
-    }
-    INFO_LOG("Synchronize stream success");
-
-    // 把输入数据：inputM inputV inputVar 作为输出数据拷贝出来
-    for (size_t i = 0; i < OUTPUT_SIZE; ++i) {
-        int inputTensorIndex = i + INPUT_TENSOR_OFFSET;  // 加上输入tensor偏移值
-        auto size = GetInputSize(inputTensorIndex);
-        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
-        if (g_isDevice) {
-            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+    std::vector <int64_t> OpRunner::GetOutputShape(size_t index) const
+    {
+        std::vector <int64_t> ret;
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return ret;
         }
-        if (aclrtMemcpy(hostOutputs_[i], size, devInputs_[inputTensorIndex], size, kind) != ACL_SUCCESS) {
-            INFO_LOG("Copy output[%zu] success", i);
-            (void)aclrtDestroyStream(stream);
-            return false;
+
+        auto desc = opDesc_->outputDesc[index];
+        for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+            int64_t dimSize;
+            if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+                ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+                ret.clear();
+                return ret;
+            }
+            ret.emplace_back(dimSize);
         }
-        INFO_LOG("Copy output[%zu] success", i);
+        return ret;
     }
 
-    (void)aclrtDestroyStream(stream);
-    return true;
-}
+    size_t OpRunner::GetInputElementCount(size_t index) const
+    {
+        if (index >= opDesc_->inputDesc.size()) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return 0;
+        }
 
-template <typename T>
-void DoPrintData(const T* data, size_t count, size_t elementsPerRow)
-{
-    if (elementsPerRow == 0) {
-        throw std::runtime_error("value must not be zero.");
+        return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
     }
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(PRINT_OUT_WIDTH) << data[i];
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
+
+    size_t OpRunner::GetOutputElementCount(size_t index) const
+    {
+        if (index >= opDesc_->outputDesc.size()) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return 0;
         }
+        return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
     }
-}
 
-void DoPrintFp16Data(const aclFloat16* data, size_t count, size_t elementsPerRow)
-{
-    if (elementsPerRow == 0) {
-        throw std::runtime_error("value must not be zero.");
-    }
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(PRINT_OUT_WIDTH) << std::setprecision(PRINT_OUT_PRECISION) << aclFloat16ToFloat(data[i]);
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
+    bool OpRunner::RunOp()
+    {
+        for (size_t i = 0; i < numInputs_; ++i) {
+            auto size = GetInputSize(i);
+            aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+            if (g_isDevice) {
+                kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+            }
+            if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+                ERROR_LOG("Copy input[%zu] failed", i);
+                return false;
+            }
+            INFO_LOG("Copy input[%zu] success", i);
         }
-    }
-}
 
-void PrintData(const void* data, size_t count, aclDataType dataType, size_t elementsPerRow)
-{
-    if (data == nullptr) {
-        ERROR_LOG("Print data failed. data is nullptr");
-        return;
-    }
+        aclrtStream stream = nullptr;
+        if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+            ERROR_LOG("Create stream failed");
+            return false;
+        }
+        INFO_LOG("Create stream success");
+
+        size_t workspaceSize = 0;
+        aclOpExecutor* handle = nullptr;
+        auto ret = aclnnLazyAdamGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3],
+                                                 inputTensor_[4], inputTensor_[5], opDesc_->beta1, opDesc_->beta2,
+                                                 opDesc_->epsilon, &workspaceSize, &handle);
+        if (ret != ACL_SUCCESS) {
+            (void) aclrtDestroyStream(stream);
+            ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
+            return false;
+        }
+        INFO_LOG("Execute aclnnAddCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
 
-    switch (dataType) {
-        case ACL_BOOL:
-            DoPrintData(reinterpret_cast<const bool*>(data), count, elementsPerRow);
-            break;
-        case ACL_INT8:
-            DoPrintData(reinterpret_cast<const int8_t*>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT8:
-            DoPrintData(reinterpret_cast<const uint8_t*>(data), count, elementsPerRow);
-            break;
-        case ACL_INT16:
-            DoPrintData(reinterpret_cast<const int16_t*>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT16:
-            DoPrintData(reinterpret_cast<const uint16_t*>(data), count, elementsPerRow);
-            break;
-        case ACL_INT32:
-            DoPrintData(reinterpret_cast<const int32_t*>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT32:
-            DoPrintData(reinterpret_cast<const uint32_t*>(data), count, elementsPerRow);
-            break;
-        case ACL_INT64:
-            DoPrintData(reinterpret_cast<const int64_t*>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT64:
-            DoPrintData(reinterpret_cast<const uint64_t*>(data), count, elementsPerRow);
-            break;
-        case ACL_FLOAT16:
-            DoPrintFp16Data(reinterpret_cast<const aclFloat16*>(data), count, elementsPerRow);
-            break;
-        case ACL_FLOAT:
-            DoPrintData(reinterpret_cast<const float*>(data), count, elementsPerRow);
-            break;
-        case ACL_DOUBLE:
-            DoPrintData(reinterpret_cast<const double*>(data), count, elementsPerRow);
-            break;
-        default:
-            ERROR_LOG("Unsupported type: %d", dataType);
-    }
-}
+        void* workspace = nullptr;
+        if (workspaceSize != 0) {
+            if (aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory failed");
+            }
+        }
 
-void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
-        return;
-    }
+        ret = aclnnLazyAdam(workspace, workspaceSize, handle, stream);
+        if (ret != ACL_SUCCESS) {
+            (void) aclrtDestroyStream(stream);
+            ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
+            return false;
+        }
+        INFO_LOG("Execute aclnnAddCustom success");
 
-    auto desc = opDesc_->inputDesc[index];
-    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
-}
+        ret = aclrtSynchronizeStreamWithTimeout(stream, STREAM_TIMEOUT);
+        if (ret != SUCCESS) {
+            ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+            (void) aclrtDestroyStream(stream);
+            return false;
+        }
+        INFO_LOG("Synchronize stream success");
+
+        // 把输入数据：inputM inputV inputVar 作为输出数据拷贝出来
+        for (size_t i = 0; i < OUTPUT_SIZE; ++i) {
+            int inputTensorIndex = i + INPUT_TENSOR_OFFSET;  // 加上输入tensor偏移值
+            auto size = GetInputSize(inputTensorIndex);
+            aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+            if (g_isDevice) {
+                kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+            }
+            if (aclrtMemcpy(hostOutputs_[i], size, devInputs_[inputTensorIndex], size, kind) != ACL_SUCCESS) {
+                INFO_LOG("Copy output[%zu] success", i);
+                (void) aclrtDestroyStream(stream);
+                return false;
+            }
+            INFO_LOG("Copy output[%zu] success", i);
+        }
 
-void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
-{
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return;
+        (void) aclrtDestroyStream(stream);
+        return true;
     }
-
-    auto desc = opDesc_->outputDesc[index];
-    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
-}
 }  // namespace AclnnLazyAdam
\ No newline at end of file
-- 
Gitee


From e86cc03685b2f4ff29ce297fd3b21ab8fcdcac4f Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 19:56:29 +0800
Subject: [PATCH 097/302] =?UTF-8?q?clang=5Fformat=E6=A0=BC=E5=BC=8F?=
 =?UTF-8?q?=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../fused_lazy_adam/op_kernel/lazy_adam.cpp   | 86 +++++++++----------
 1 file changed, 39 insertions(+), 47 deletions(-)

diff --git a/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
index 815e6567..76164e50 100644
--- a/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
+++ b/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
@@ -17,21 +17,17 @@ See the License for the specific language governing permissions and
 
 using namespace AscendC;
 
-template<typename T>
+template <typename T>
 class LazyAdam {
 public:
-    __aicore__ inline LazyAdam()
-    {}
+    __aicore__ inline LazyAdam() {}
 
     // 初始化函数，完成内存初始化相关操作
-    __aicore__ inline void Init(GM_ADDR gradient, GM_ADDR indices,
-                                GM_ADDR inputM, GM_ADDR inputV, GM_ADDR inputVar, GM_ADDR lr,
-                                GM_ADDR inputMRef, GM_ADDR inputVRef, GM_ADDR inputVarRef,
-                                float beta1, float beta2, float epsilon,
-                                int32_t dim0, int32_t dim1, int32_t dim2,
-                                int32_t row, int32_t indicesAllocSize, int32_t otherAllocSize,
-                                int32_t batch, int32_t loopCount, int32_t rowLeft,
-                                int32_t loopCountTail, int32_t rowLeftTail, int32_t coreNum)
+    __aicore__ inline void Init(GM_ADDR gradient, GM_ADDR indices, GM_ADDR inputM, GM_ADDR inputV, GM_ADDR inputVar,
+                                GM_ADDR lr, GM_ADDR inputMRef, GM_ADDR inputVRef, GM_ADDR inputVarRef, float beta1,
+                                float beta2, float epsilon, int32_t dim0, int32_t dim1, int32_t dim2, int32_t row,
+                                int32_t indicesAllocSize, int32_t otherAllocSize, int32_t batch, int32_t loopCount,
+                                int32_t rowLeft, int32_t loopCountTail, int32_t rowLeftTail, int32_t coreNum)
     {
         ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
         // 属性赋值
@@ -53,15 +49,15 @@ public:
         int32_t shape = this->dim0 * this->dim2;
         int32_t shapeIndices = this->dim1 * 1;
         int32_t shapeGradient = this->dim1 * this->dim2;
-        this->gmGradient.SetGlobalBuffer((__gm__ T *)gradient + this->batch * this->dim2 * get_block_idx(),
+        this->gmGradient.SetGlobalBuffer((__gm__ T*)gradient + this->batch * this->dim2 * get_block_idx(),
                                          shapeGradient);
-        this->gmIndices.SetGlobalBuffer((__gm__ int32_t *)indices + this->batch * get_block_idx(), shapeIndices);
+        this->gmIndices.SetGlobalBuffer((__gm__ int32_t*)indices + this->batch * get_block_idx(), shapeIndices);
 
-        this->gmInputM.SetGlobalBuffer((__gm__ T *)inputM, shape);
-        this->gmInputV.SetGlobalBuffer((__gm__ T *)inputV, shape);
-        this->gmInputVar.SetGlobalBuffer((__gm__ T *)inputVar, shape);
+        this->gmInputM.SetGlobalBuffer((__gm__ T*)inputM, shape);
+        this->gmInputV.SetGlobalBuffer((__gm__ T*)inputV, shape);
+        this->gmInputVar.SetGlobalBuffer((__gm__ T*)inputVar, shape);
 
-        this->gmLearningRate.SetGlobalBuffer((__gm__ T *)lr, sizeof(float));
+        this->gmLearningRate.SetGlobalBuffer((__gm__ T*)lr, sizeof(float));
         this->lr = this->gmLearningRate.GetValue(0);
 
         // 将输出地址指向输入地址
@@ -119,19 +115,19 @@ private:
     // 搬入函数，完成CopyIn阶段的处理，被核心Process函数调用
     __aicore__ inline void CopyIn(int32_t progress, int32_t row)
     {
-        LocalTensor <T> localGradient = this->inQueGradient.template AllocTensor<T>();
+        LocalTensor<T> localGradient = this->inQueGradient.template AllocTensor<T>();
         uint32_t gradientDataLen = row * this->dim2 * sizeof(T);
         // 连续传输数据块个数；len:连续传输数据块长度，Byte，非对齐搬运；0, 0, 0:源/目标数据块间隔，保留字段
         DataCopyExtParams gradientParams{1, gradientDataLen, 0, 0, 0};
         // 搬运填充参数
-        DataCopyPadExtParams <T> gradientPadParams{true, 0, 2, 0};
+        DataCopyPadExtParams<T> gradientPadParams{true, 0, 2, 0};
         DataCopyPad(localGradient, this->gmGradient[progress * this->row * this->dim2], gradientParams,
                     gradientPadParams);
 
-        LocalTensor <int32_t> localIndices = this->inQueIndices.template AllocTensor<int32_t>();
+        LocalTensor<int32_t> localIndices = this->inQueIndices.template AllocTensor<int32_t>();
         uint32_t indicesDataLen = row * sizeof(int32_t);
         DataCopyExtParams indicesParams{1, indicesDataLen, 0, 0, 0};
-        DataCopyPadExtParams <int32_t> indicesPadParams{true, 0, 2, 0};
+        DataCopyPadExtParams<int32_t> indicesPadParams{true, 0, 2, 0};
         DataCopyPad(localIndices, this->gmIndices[progress * this->row], indicesParams, indicesPadParams);
 
         this->inQueGradient.EnQue(localGradient);
@@ -141,13 +137,13 @@ private:
     // 计算函数，完成Compute阶段的处理，被核心Process函数调用
     __aicore__ inline void Compute(int32_t progress, int32_t row)
     {
-        LocalTensor <T> localGradient = this->inQueGradient.template DeQue<T>();
-        LocalTensor <int32_t> localIndices = this->inQueIndices.template DeQue<int32_t>();
+        LocalTensor<T> localGradient = this->inQueGradient.template DeQue<T>();
+        LocalTensor<int32_t> localIndices = this->inQueIndices.template DeQue<int32_t>();
         Muls(localIndices, localIndices, this->dim2, row);
         // 根据 indices 从 inputM 中切分出来 m_slice
-        LocalTensor <T> localMSlice = this->queMSlice.template AllocTensor<T>();
-        LocalTensor <T> localVSlice = this->queVSlice.template AllocTensor<T>();
-        LocalTensor <T> localVarSlice = this->queVarSlice.template AllocTensor<T>();
+        LocalTensor<T> localMSlice = this->queMSlice.template AllocTensor<T>();
+        LocalTensor<T> localVSlice = this->queVSlice.template AllocTensor<T>();
+        LocalTensor<T> localVarSlice = this->queVarSlice.template AllocTensor<T>();
 
         pipe_barrier(PIPE_ALL);
 
@@ -212,34 +208,30 @@ private:
 private:
     float lr, beta1, beta2, epsilon;
     int32_t dim0, dim1, dim2, row, batch, loopCount, rowLeft, loopCountTail, rowLeftTail, coreNum;
-    LocalTensor <T> updateM, updateV, updateVar, temp;
-    LocalTensor <int32_t> localIndices;
-    GlobalTensor <T> gmGradient, gmInputM, gmInputV, gmInputVar;
-    GlobalTensor <int32_t> gmIndices;
-    GlobalTensor <T> gmLearningRate;
+    LocalTensor<T> updateM, updateV, updateVar, temp;
+    LocalTensor<int32_t> localIndices;
+    GlobalTensor<T> gmGradient, gmInputM, gmInputV, gmInputVar;
+    GlobalTensor<int32_t> gmIndices;
+    GlobalTensor<T> gmLearningRate;
     TPipe pipe;
     TQue<QuePosition::VECIN, 1> inQueGradient, inQueIndices;
     TQue<QuePosition::VECIN, 1> queMSlice, queVSlice, queVarSlice;
-    TBuf <TPosition::VECCALC> calcBufM;
-    TBuf <TPosition::VECCALC> calcBufV;
-    TBuf <TPosition::VECCALC> calcBufVar;
-    TBuf <TPosition::VECCALC> calcBuf;
+    TBuf<TPosition::VECCALC> calcBufM;
+    TBuf<TPosition::VECCALC> calcBufV;
+    TBuf<TPosition::VECCALC> calcBufVar;
+    TBuf<TPosition::VECCALC> calcBuf;
 };
 
-extern "C" __global__ __aicore__ void lazy_adam(GM_ADDR gradient, GM_ADDR indices,
-               GM_ADDR inputM, GM_ADDR inputV, GM_ADDR inputVar, GM_ADDR lr,
-               GM_ADDR inputMRef, GM_ADDR inputVRef, GM_ADDR inputVarRef,
-               GM_ADDR workspace, GM_ADDR tiling)
+extern "C" __global__ __aicore__ void lazy_adam(GM_ADDR gradient, GM_ADDR indices, GM_ADDR inputM, GM_ADDR inputV,
+                                                GM_ADDR inputVar, GM_ADDR lr, GM_ADDR inputMRef, GM_ADDR inputVRef,
+                                                GM_ADDR inputVarRef, GM_ADDR workspace, GM_ADDR tiling)
 {
     GET_TILING_DATA(tiling_data, tiling);
     LazyAdam<float> op32;
-    op32.Init(gradient, indices,
-              inputM, inputV, inputVar, lr,
-              inputMRef, inputVRef, inputVarRef,
-              tiling_data.beta1, tiling_data.beta2, tiling_data.epsilon,
-              tiling_data.dim0, tiling_data.dim1, tiling_data.dim2,
-              tiling_data.row, tiling_data.indicesAllocSize, tiling_data.otherAllocSize,
-              tiling_data.batch, tiling_data.loopCount, tiling_data.rowLeft,
-              tiling_data.loopCountTail, tiling_data.rowLeftTail, tiling_data.coreNum);
+    op32.Init(gradient, indices, inputM, inputV, inputVar, lr, inputMRef, inputVRef, inputVarRef, tiling_data.beta1,
+              tiling_data.beta2, tiling_data.epsilon, tiling_data.dim0, tiling_data.dim1, tiling_data.dim2,
+              tiling_data.row, tiling_data.indicesAllocSize, tiling_data.otherAllocSize, tiling_data.batch,
+              tiling_data.loopCount, tiling_data.rowLeft, tiling_data.loopCountTail, tiling_data.rowLeftTail,
+              tiling_data.coreNum);
     op32.Process();
 }
\ No newline at end of file
-- 
Gitee


From 161c2f4595f09d0989ea4b5cfaad8d4ef9fd8cf9 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 20:09:47 +0800
Subject: [PATCH 098/302] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A3=80=E8=A7=86?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 52 +++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index e0e64d23..e97ef46f 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -1,6 +1,7 @@
 # LazyAdam优化器融合算子及样例说明
 
 ## LazyAdam融合算子文件结构
+
 ```shell
 ├── aclnn_lazy_adam_test  # 单算子测试用例
 ├── lazy_adam.json    # 算子原型配置
@@ -11,33 +12,36 @@
 ```
 
 ## Ascend C参考设计
-更多详情可以参考CANN官方的Ascend C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0001.html)。
 
+更多详情可以参考CANN官方的Ascend
+C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0001.html)。
 
 ## lazy_adam优化器同名融合算子lazy_adam
 
 1. 算子分析
 
 a) 算子的主要功能是实现lazy_adam优化器反向更新时m、v、variable三项数据的计算和更新；  
-b) 算子参数说明：  
+b) 算子参数说明：
+
 * gradient: lazy_adam优化器计算时使用的梯度；
 * indices: 参与计算/更新的数据索引；
 * inputM: lazy_adam优化器一阶矩估计；计算结果原地更新；
 * inputV: lazy_adam优化器二阶矩估计；计算结果原地更新；
 * inputVar: embedding表对应的variable数据；计算结果原地更新；
-c) 算子约束说明：
+  c) 算子约束说明：
 * 支持的型号：Atlas A2系列产品;
-* 支持的输入数据类型：float32；  
-* embedding表的dim值需要时8的倍数；
+* 支持的输入数据类型：float32；
+* embedding表的dim值需要是8的倍数；
 
 2. Host侧算子实现
 
 Host侧算子实现在目录 fused_lazy_adam/op_host下，其中包括：lazy_adam.cpp和
 lazy_adam_tiling.h。
 
-a) Tiling实现 
+a) Tiling实现
 
-namespace optiling域中的LazyAdamTilingFunc函数，主要实现从context中获取外部入参信息（输入参数指针、shape信息），及校验有效性；  
+namespace
+optiling域中的LazyAdamTilingFunc函数，主要实现从context中获取外部入参信息（输入参数指针、shape信息），及校验有效性；  
 并计算kernel侧需要的数据切分相关参数，包括row、loopCount、batch等（详情见tiling文件注释），设置BlockDim，最后通过TilingData传递属性信息。
 
 b) Shape推导
@@ -67,6 +71,7 @@ d) Process方法，进行数据搬入和计算，并且计算完成后将计算
 单算子调用分为两种方式：单算子API执行和模型执行。mxRec提供单算子API执行供参考。
 
 单算子测试用例在目录fused_lazy_adam/aclnn_lazy_adam_test下，其中：
+
 * inc是头文件目录
 * scripts存放生成数据和验证数据的python脚本
 * input是存放算子入参的bin文件
@@ -74,23 +79,32 @@ d) Process方法，进行数据搬入和计算，并且计算完成后将计算
 * src是存放公共函数common、构造算子输入输出描述类oprator_desc、单算子调用主体流程实现op_runner文件和入口main文件
 
 执行单算子测试：
+
 ```shell
 bash run.sh
 ```
 
 ### 前置条件
 
-1. 参考[基于msopgen工具创建算子工程](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0023.html)完成算子工程的创建，
-参考[kernel侧算子实现](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0024.html)完成kernel侧实现的相关准备，
-参考[host侧算子实现](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0026.html)完成host侧实现相关准备。
-2. 参考[算子编译部署](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0031.html)完成算子的编译部署，编译部署时需要开启算子的二进制编译功能：修改算子工程中的编译配置项文件CMakePresets.json，将
+1.
+参考[基于msopgen工具创建算子工程](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0023.html)
+完成算子工程的创建，
+参考[kernel侧算子实现](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0024.html)
+完成kernel侧实现的相关准备，
+参考[host侧算子实现](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0026.html)
+完成host侧实现相关准备。
+2.
+参考[算子编译部署](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0031.html)
+完成算子的编译部署，编译部署时需要开启算子的二进制编译功能：修改算子工程中的编译配置项文件CMakePresets.json，将
 ENABLE_BINARY_PACKAGE设置为True。编译部署时可将算子的二进制部署到当前环境，便于后续算子的调用。
 3. 检查API执行需要的头文件和库文件是否自动生成，针对mxRec，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
-aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
+   aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
 
-注意：对于cust_op/fused_lazy_adam/run.sh脚本，安装算子后会删除构建目录。运行单算子测试时，需要屏蔽掉删除rm rf ./lazy_adam这一步，以确保前置条件3。
+注意：对于cust_op/fused_lazy_adam/run.sh脚本，安装算子后会删除构建目录。运行单算子测试时，需要屏蔽掉删除rm rf
+./lazy_adam这一步，以确保前置条件3。
 
 ### 融合算子 lazy_adam
+
 针对lazy_adam算子，入口src/main.cpp中：
 
 1. InitResource函数：初始化AscendCL并运行管理资源申请，不用修改
@@ -100,6 +114,7 @@ a) 创建算子输入输出描述CreateOpDesc，OperatorDesc对象定义(inc/ope
 op_runner中使用；
 
 b) 创建OpRunner的对象，并依次执行：
+
 * opRunner.Init()：申请内存存放执行算子的输入输出数据
 * SetInputData()：加载数据输入bin文件并传输给OpRunner的Buffer供后续算子执行使用
 * opRunner.RunOp()：算子执行，主要流程为：入参数据拷贝，创建Stream，执行Stream，输出数据拷贝，释放Stream资源
@@ -108,14 +123,17 @@ b) 创建OpRunner的对象，并依次执行：
 3. DestroyResource函数：释放内存，不用修改
 
 ### 运行脚本
+
 run.sh脚本依次执行：
-1. 清除遗留生成文件和日志文件 
-2. 生成输入数据和真值数据 
-3. 编译acl可执行文件 
-4. 运行可执行文件 
+
+1. 清除遗留生成文件和日志文件
+2. 生成输入数据和真值数据
+3. 编译acl可执行文件
+4. 运行可执行文件
 5. 比较真值文件
 
 ### scripts脚本
+
 * gen_data.py：生成lazy_adam算子的输入数据和用于精度校验的golden数据，可自行修改测试相关dim参数。
 * verify_result.py：将算子的输出和脚本生成的golden数据进行精度比对，并输出比较结果。比对规则为：允许误差精度loss：1e-4
 
-- 
Gitee


From 69e2d86f47dfcfae9e2d7d876250dd3a702398b4 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 20:52:28 +0800
Subject: [PATCH 099/302] =?UTF-8?q?=E7=AE=97=E5=AD=90vendor=5Fname?=
 =?UTF-8?q?=E5=90=8D=E7=A7=B0=E4=BF=AE=E6=94=B9=EF=BC=8C=E8=A7=A3=E5=86=B3?=
 =?UTF-8?q?=E5=A4=9A=E7=AE=97=E5=AD=90=E5=9C=BA=E6=99=AF=E4=B8=8B=E7=AE=97?=
 =?UTF-8?q?=E5=AD=90=E8=A6=86=E7=9B=96=E9=97=AE=E9=A2=98=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/run.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cust_op/fused_lazy_adam/run.sh b/cust_op/fused_lazy_adam/run.sh
index c1e80ce5..6f51d7a7 100644
--- a/cust_op/fused_lazy_adam/run.sh
+++ b/cust_op/fused_lazy_adam/run.sh
@@ -41,8 +41,10 @@ sed -i 's/--nomd5/--nomd5 --nocrc/g' ./cmake/makeself.cmake
 
 # 修改cann安装路径
 sed -i 's:"/usr/local/Ascend/latest":"/usr/local/Ascend/ascend-toolkit/latest":g' CMakePresets.json
-# 修改vendor_name 防止覆盖之前vendor_name为customize的算子
-sed -i 's:"customize":"customize_lazy_adam":g' CMakePresets.json
+# 修改vendor_name 防止覆盖之前vendor_name为customize的算子;
+# vendor_name需要和aclnn中的CMakeLists.txt中的CUST_PKG_PATH值同步，不同步aclnn会调用失败;
+# vendor_name字段值不能包含customize；包含会导致多算子部署场景CANN的vendors路径下config.ini文件内容截取错误，部署工具bug;
+sed -i 's:"customize":"mxrec_fused_lazy_adam":g' CMakePresets.json
 
 bash build.sh
 
-- 
Gitee


From 554b60add9f01e6b3a307824c4d0869c6ff2cf78 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 20:54:09 +0800
Subject: [PATCH 100/302] =?UTF-8?q?aclnn=E4=B8=ADvendor=5Fname=E5=90=8D?=
 =?UTF-8?q?=E7=A7=B0=E4=BF=AE=E6=94=B9=EF=BC=8C=E8=A7=A3=E5=86=B3=E5=A4=9A?=
 =?UTF-8?q?=E7=AE=97=E5=AD=90=E5=9C=BA=E6=99=AF=E4=B8=8B=E7=AE=97=E5=AD=90?=
 =?UTF-8?q?=E8=A6=86=E7=9B=96=E9=97=AE=E9=A2=98=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/src/CMakeLists.txt   | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
index 1642e3ca..c4a727bf 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
@@ -19,9 +19,9 @@ if (NOT DEFINED ENV{DDK_PATH})
     message(STATUS "set default INC_PATH: ${INC_PATH}")
 else ()
     message(STATUS "env INC_PATH: ${INC_PATH}")
-endif()
+endif ()
 
-set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize_lazy_adam/op_api")
+set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/mxrec_fused_lazy_adam/op_api")
 
 set(LIB_PATH $ENV{NPU_HOST_LIB})
 
@@ -32,23 +32,23 @@ if (NOT DEFINED ENV{NPU_HOST_LIB})
     message(STATUS "set default LIB_PATH: ${LIB_PATH}")
 else ()
     message(STATUS "env LIB_PATH: ${LIB_PATH}")
-endif()
+endif ()
 
 set(AUTO_GEN_PATH "../../lazy_adam/build_out/autogen")
 # Header path
 include_directories(
-    ${INC_PATH}/runtime/include
-    ${INC_PATH}/atc/include
-    ../inc
-    ${CUST_PKG_PATH}/include
-    ${AUTO_GEN_PATH}
+        ${INC_PATH}/runtime/include
+        ${INC_PATH}/atc/include
+        ../inc
+        ${CUST_PKG_PATH}/include
+        ${AUTO_GEN_PATH}
 )
 
 # add host lib path
 link_directories(
-    ${LIB_PATH}
-    ${LIB_PATH1}
-    ${CUST_PKG_PATH}/lib
+        ${LIB_PATH}
+        ${LIB_PATH1}
+        ${CUST_PKG_PATH}/lib
 )
 
 add_executable(execute_op
@@ -57,11 +57,11 @@ add_executable(execute_op
 )
 
 target_link_libraries(execute_op
-    ascendcl
-    cust_opapi
-    acl_op_compiler
-    nnopbase
-    stdc++
+        ascendcl
+        cust_opapi
+        acl_op_compiler
+        nnopbase
+        stdc++
 )
 
 install(TARGETS execute_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
-- 
Gitee


From aa732bccd3d2f52c3ea0da4d73d1a632ec18d1f1 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 21:30:52 +0800
Subject: [PATCH 101/302] =?UTF-8?q?=E7=AE=97=E5=AD=90run.sh=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cust_op/fused_lazy_adam/run.sh b/cust_op/fused_lazy_adam/run.sh
index 6f51d7a7..ff604cea 100644
--- a/cust_op/fused_lazy_adam/run.sh
+++ b/cust_op/fused_lazy_adam/run.sh
@@ -43,7 +43,7 @@ sed -i 's/--nomd5/--nomd5 --nocrc/g' ./cmake/makeself.cmake
 sed -i 's:"/usr/local/Ascend/latest":"/usr/local/Ascend/ascend-toolkit/latest":g' CMakePresets.json
 # 修改vendor_name 防止覆盖之前vendor_name为customize的算子;
 # vendor_name需要和aclnn中的CMakeLists.txt中的CUST_PKG_PATH值同步，不同步aclnn会调用失败;
-# vendor_name字段值不能包含customize；包含会导致多算子部署场景CANN的vendors路径下config.ini文件内容截取错误，部署工具bug;
+# vendor_name字段值不能包含customize；包含会导致多算子部署场景CANN的vendors路径下config.ini文件内容截取错误
 sed -i 's:"customize":"mxrec_fused_lazy_adam":g' CMakePresets.json
 
 bash build.sh
-- 
Gitee


From d66828edaa2dd649a1785125b3713c17aead80b6 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 6 May 2024 23:14:11 +0800
Subject: [PATCH 102/302] =?UTF-8?q?readme=E8=84=9A=E6=9C=AC=E6=9B=B4?=
 =?UTF-8?q?=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index e97ef46f..42f5bfc9 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -16,6 +16,16 @@
 更多详情可以参考CANN官方的Ascend
 C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0001.html)。
 
+## lazy_adam融合算子使用
+
+1. 进入当前目录，执行指令进行编译和部署lazy_adam融合算子
+
+```
+bash run.sh
+```
+
+2. 模型py脚本中导入mxRec中的lazy_adam优化器。lazy_adam优化器使用知道参考mxRec用户指南。
+
 ## lazy_adam优化器同名融合算子lazy_adam
 
 1. 算子分析
@@ -87,16 +97,20 @@ bash run.sh
 ### 前置条件
 
 1.
+
 参考[基于msopgen工具创建算子工程](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0023.html)
 完成算子工程的创建，
 参考[kernel侧算子实现](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0024.html)
 完成kernel侧实现的相关准备，
 参考[host侧算子实现](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0026.html)
 完成host侧实现相关准备。
+
 2.
+
 参考[算子编译部署](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0031.html)
 完成算子的编译部署，编译部署时需要开启算子的二进制编译功能：修改算子工程中的编译配置项文件CMakePresets.json，将
 ENABLE_BINARY_PACKAGE设置为True。编译部署时可将算子的二进制部署到当前环境，便于后续算子的调用。
+
 3. 检查API执行需要的头文件和库文件是否自动生成，针对mxRec，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
    aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
 
-- 
Gitee


From 25d94cd481cf5bdcd9a523be2d75ccd897fe643d Mon Sep 17 00:00:00 2001
From: sihaixianyu <sihaixianyu@qq.com>
Date: Tue, 7 May 2024 00:58:57 +0000
Subject: [PATCH 103/302] =?UTF-8?q?!119=20=E3=80=90CleanCode=E3=80=91?=
 =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=8A=BD=E8=B1=A1=E6=96=B9=E6=B3=95=E5=92=8C?=
 =?UTF-8?q?=E9=9D=99=E6=80=81=E6=96=B9=E6=B3=95=E7=9A=84=E9=A1=BA=E5=BA=8F?=
 =?UTF-8?q?=E3=80=82=20*=20=E3=80=90CleanCode=E3=80=91=E8=B0=83=E6=95=B4?=
 =?UTF-8?q?=E6=8A=BD=E8=B1=A1=E6=96=B9=E6=B3=95=E5=92=8C=E9=9D=99=E6=80=81?=
 =?UTF-8?q?=E6=96=B9=E6=B3=95=E7=9A=84=E9=A1=BA=E5=BA=8F=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/graph/slicers.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mx_rec/graph/slicers.py b/mx_rec/graph/slicers.py
index 3999cdd4..a4014195 100644
--- a/mx_rec/graph/slicers.py
+++ b/mx_rec/graph/slicers.py
@@ -61,14 +61,6 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
             os.makedirs(info_dir)
         self._info_dir = info_dir
 
-    @abc.abstractmethod
-    def summarize(self) -> None:
-        pass
-
-    @abc.abstractmethod
-    def slice(self) -> None:
-        pass
-
     @staticmethod
     def _find_min_dep_ops(
         tgt_ops: Set[Operation],
@@ -289,6 +281,14 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
 
         return consumers
 
+    @abc.abstractmethod
+    def summarize(self) -> None:
+        pass
+
+    @abc.abstractmethod
+    def slice(self) -> None:
+        pass
+
     def _slice_ops(self, sliceable_ops: Set[Operation], is_training: bool) -> None:
         """Slice the minimum dependency graph of given operation set.
 
-- 
Gitee


From 99c68d2f16fd91c3fb4a40579cba5f1fbcf161df Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Tue, 7 May 2024 00:59:37 +0000
Subject: [PATCH 104/302] !120 cleancode * cleancode

---
 src/core/emb_table/embedding_ddr.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 24aa07a7..be5fab22 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -363,6 +363,7 @@ int EmbeddingDDR::LoadHashMap(const string& savePath)
         }
         if (keyCount > devVocabSize + hostVocabSize) {
             LOG_ERROR("load key size exceeds the sum of device vocab size and host vocab size: {}", strerror(errno));
+            free(static_cast<void*>(buf));
             return -1;
         } else if (keyCount < devVocabSize) {
             loadOffset.push_back(i);
-- 
Gitee


From 7ae9e65543a3a5bae9406a37f23102280c844b91 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 7 May 2024 09:48:47 +0800
Subject: [PATCH 105/302] =?UTF-8?q?hdfs=E4=B8=ADread=E3=80=81write?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0=E5=8A=A0=E5=9B=BA=EF=BC=8C=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E4=BF=9D=E5=AD=98=E6=97=A5=E5=BF=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py                         |  12 +-
 src/core/checkpoint/checkpoint.cpp            |  18 ++-
 src/core/emb_table/embedding_ddr.cpp          | 120 +++++++++-----
 src/core/emb_table/embedding_ddr.h            |   4 +-
 src/core/emb_table/embedding_dynamic.cpp      |  54 +++----
 src/core/emb_table/embedding_dynamic.h        |   4 +-
 src/core/emb_table/embedding_static.cpp       |  54 +++----
 src/core/emb_table/embedding_static.h         |   4 +-
 src/core/file_system/file_system.h            |   6 +
 .../hdfs_file_system/hdfs_file_system.cpp     | 152 +++++++++---------
 .../hdfs_file_system/hdfs_wrapper.h           |  25 ++-
 src/core/utils/common.h                       |   3 +
 12 files changed, 259 insertions(+), 197 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index d776b699..e2e58340 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -233,7 +233,9 @@ class Saver(object):
 
             attribute = attribute.astype(np.int64)
             attribute_dir = os.path.join(upper_dir, "slice.attribute")
-            attribute.tofile(attribute_dir)
+            with tf.io.gfile.GFile(attribute_dir, "wb") as file:
+                attribute = attribute.tostring()
+                file.write(attribute)
 
     @performance("_save")
     def _save(self, sess, root_dir):
@@ -445,8 +447,9 @@ def write_binary_data(writing_path, suffix, data, attributes=None):
         raise RuntimeError(f"make dir {writing_path} for writing data failed!") from err
     data_file, attribute_file = generate_file_name(suffix)
     target_data_dir = os.path.join(writing_path, data_file)
-
-    with tf.io.gfile.GFile(target_data_dir, "ab") as file:
+    # append mode of hdfs system supports not well when the file not exists.
+    file_mode = "wb" if not tf.io.gfile.exists(target_data_dir) else "ab"
+    with tf.io.gfile.GFile(target_data_dir, file_mode) as file:
         data = data.tostring()
         file.write(data)
 
@@ -470,7 +473,8 @@ def read_binary_data(reading_path: str, data_name: str, table_name: str, load_of
 
     with tf.io.gfile.GFile(target_attribute_dir, "rb") as fin:
         validate_read_file(target_attribute_dir)
-        attributes = np.fromfile(target_attribute_dir, dtype=np.int64)
+        attributes = fin.read()
+        attributes = np.fromstring(attributes, dtype=np.int64)
 
     with tf.io.gfile.GFile(target_data_dir, "rb") as file:
         validate_read_file(target_data_dir)
diff --git a/src/core/checkpoint/checkpoint.cpp b/src/core/checkpoint/checkpoint.cpp
index 673c7ce3..b4ce187e 100644
--- a/src/core/checkpoint/checkpoint.cpp
+++ b/src/core/checkpoint/checkpoint.cpp
@@ -210,8 +210,13 @@ void Checkpoint::WriteStream(CkptTransData& transData, const string& dataDir, si
     }
 
     if (writeBytesNum == -1) {
-        LOG_ERROR("error happened when writing data to file.");
-        throw runtime_error("error happened when writing data to file.");
+        throw runtime_error("Error: Save data failed. data type: {} .An error occurred while writing file: {}.",
+                            dataType, dataDir);
+    }
+    if (writeBytesNum != dataSize) {
+        throw runtime_error(
+                "Error: Save data failed. data type: {} .Expected to write {} bytes, but actually write {} bytes to file {}.",
+                dataType, dataSize, writeBytesNum, dataDir);
     }
 }
 
@@ -330,8 +335,13 @@ void Checkpoint::ReadStream(CkptTransData& transData,
     }
 
     if (readBytesNum == -1) {
-        LOG_ERROR("error happened when reading data from file.");
-        throw runtime_error("error happened when reading data from file.");
+        throw runtime_error("Error: Load data failed. data type: {} .An error occurred while reading file: {}.",
+                            dataType, dataDir);
+    }
+    if (readBytesNum != datasetSize) {
+        throw runtime_error(
+                "Error: Load data failed. data type: {} .Expected to read {} bytes, but actually read {} bytes to file {}.",
+                dataType, datasetSize, readBytesNum, dataDir);
     }
 }
 
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 02d7c116..f8820a7d 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -316,47 +316,42 @@ void EmbeddingDDR::SetStartCount()
     freeSize_ = devVocabSize;
 }
 
-void EmbeddingDDR::Load(const string& savePath)
-{
-    int res = LoadHashMap(savePath);
-    if (res == -1) {
-        throw std::runtime_error("load key failed!");
-    }
+void EmbeddingDDR::Load(const string& savePath) {
+    LoadKey(savePath);
     LoadEmbAndOptim(savePath);
 }
 
-void EmbeddingDDR::Save(const string& savePath)
-{
+void EmbeddingDDR::Save(const string& savePath) {
     SaveKey(savePath);
     SaveEmbAndOptim(savePath);
 }
 
-int EmbeddingDDR::LoadHashMap(const string& savePath)
-{
+void EmbeddingDDR::LoadKey(const string& savePath) {
     stringstream ss;
     ss << savePath << "/" << name << "/key/slice.data";
 
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
 
-    size_t fileSize = 0;
-    try {
-        fileSize = fileSystemPtr->GetFileSize(ss.str());
-    } catch (exception& e) {
-        LOG_ERROR("open file {} failed:{}", ss.str(), strerror(errno));
-        return -1;
-    }
+    size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        LOG_ERROR("file {} size = {} is too big", ss.str(), fileSize);
-        return -1;
+        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        LOG_ERROR("malloc failed: {}", strerror(errno));
-        return -1;
+        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+    }
+
+    ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
+    if (res == -1) {
+        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+    }
+    if (res != fileSize) {
+        throw runtime_error(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
+                fileSize, res, ss.str());
     }
-    fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
 
@@ -369,8 +364,9 @@ int EmbeddingDDR::LoadHashMap(const string& savePath)
             continue;
         }
         if (keyCount > devVocabSize + hostVocabSize) {
-            LOG_ERROR("load key size exceeds the sum of device vocab size and host vocab size: {}", strerror(errno));
-            return -1;
+            throw runtime_error(
+                    "Error: Load keys failed. Load key size :{} exceeds the sum of device vocab size and host vocab size: {}.",
+                    keyCount, devVocabSize + hostVocabSize);
         } else if (keyCount < devVocabSize) {
             loadOffset.push_back(i);
             devOffset2Key[keyCount] = buf[i];
@@ -381,9 +377,7 @@ int EmbeddingDDR::LoadHashMap(const string& savePath)
         keyCount++;
     }
     maxOffset = keyOffsetMap.size();
-
     free(static_cast<void*>(buf));
-    return 0;
 }
 
 void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
@@ -404,21 +398,41 @@ void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
     // 读embedding
     stringstream embedStream;
     embedStream << ss.str() << "/" << "embedding/slice.data";
+
+    size_t readSize = hostLoadOffset.size() * embSize_ * sizeof(float);
     ssize_t res = fileSystemPtr->Read(embedStream.str(), table.embData, 0, hostLoadOffset, embSize_);
+    if (res == -1) {
+        throw runtime_error("Error: Load embeddings failed. An error occurred while reading file: {}.",
+                            embedStream.str());
+    }
+    if (res != readSize) {
+        throw runtime_error(
+                "Error: Load embeddings failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
+                readSize, res, embedStream.str());
+    }
 
     // 读optim
     int64_t optimIndex = 1;
     for (const auto &param: optimParams) {
         stringstream paramStream;
         paramStream << ss.str() << "/" << optimName + "_" + param << "/slice.data";
+
         ssize_t res = fileSystemPtr->Read(paramStream.str(), table.embData, optimIndex, hostLoadOffset, embSize_);
-        optimIndex ++;
+        if (res == -1) {
+            throw runtime_error("Error: Load optimizers failed. An error occurred while reading file: {}.",
+                                paramStream.str());
+        }
+        if (res != readSize) {
+            throw runtime_error(
+                    "Error: Load embeddings failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
+                    readSize, res, paramStream.str());
+        }
+        optimIndex++;
     }
 }
 
 
-int EmbeddingDDR::SaveKey(const string& savePath)
-{
+void EmbeddingDDR::SaveKey(const string& savePath) {
     stringstream ss;
     ss << savePath << "/" << name << "/key/";
     MakeDir(ss.str());
@@ -442,19 +456,17 @@ int EmbeddingDDR::SaveKey(const string& savePath)
         }
     }
 
-    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(hostKey.data()),
-                                       static_cast<size_t>(hostKey.size() * sizeof(int64_t)));
+    hostKey.insert(hostKey.end(), deviceKey.begin(), deviceKey.end());
+    size_t writeSize = static_cast<size_t>(hostKey.size() * sizeof(int64_t));
+    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(hostKey.data()), writeSize);
     if (res == -1) {
-        return -1;
+        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
     }
-    ssize_t res2 = fileSystemPtr->Write(
-        ss.str(), reinterpret_cast<const char *>(deviceKey.data()),
-        static_cast<size_t>(deviceKey.size() * sizeof(int64_t))
-    );
-    if (res2 == -1) {
-        return -1;
+    if (res != writeSize) {
+        throw runtime_error(
+                "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
+                writeSize, res, ss.str());
     }
-    return 0;
 }
 
 void EmbeddingDDR::SaveEmbData(const string& savePath)
@@ -466,8 +478,17 @@ void EmbeddingDDR::SaveEmbData(const string& savePath)
 
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-    vector<size_t> attribute;
-    fileSystemPtr->Write(ss.str(), embContent, embSize_ * sizeof(float));
+
+    size_t writeSize = embSize_ * sizeof(float) * embContent.size();
+    ssize_t res = fileSystemPtr->Write(ss.str(), embContent, embSize_ * sizeof(float));
+    if (res == -1) {
+        throw runtime_error("Error: Save embeddings failed. An error occurred while writing file: {}.", ss.str());
+    }
+    if (res != writeSize) {
+        throw runtime_error(
+                "Error: Save embeddings failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
+                writeSize, res, ss.str());
+    }
 }
 
 void EmbeddingDDR::SaveOptimData(const string& savePath)
@@ -480,8 +501,18 @@ void EmbeddingDDR::SaveOptimData(const string& savePath)
 
         unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
         unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-        vector<size_t> attribute;
-        fileSystemPtr->Write(ss.str(), content.second, embSize_ * sizeof(float));
+
+        size_t writeSize = embSize_ * sizeof(float) * content.second.size();
+        ssize_t res = fileSystemPtr->Write(ss.str(), content.second, embSize_ * sizeof(float));
+
+        if (res == -1) {
+            throw runtime_error("Error: Save optimizers failed. An error occurred while writing file: {}.", ss.str());
+        }
+        if (res != writeSize) {
+            throw runtime_error(
+                    "Error: Save optimizers failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
+                    writeSize, res, ss.str());
+        }
     }
 }
 
@@ -501,7 +532,7 @@ void EmbeddingDDR::SaveEmbAndOptim(const string& savePath)
         int optim_param_count = 1;
         for (const string &param: optimParams) {
             optimContentMap[param].push_back(table.embData[offset - devVocabSize].data() +
-                                                sizeof(float) * embSize_ * optim_param_count);
+                                             sizeof(float) * embSize_ * optim_param_count);
             optim_param_count++;
         }
     }
@@ -523,6 +554,7 @@ void EmbeddingDDR::SetOptimizerInfo(OptimizerInfo& optimizerInfo)
         optimContentMap[param] = vector<float*>{};
     }
 }
+
 void EmbeddingDDR::SetCacheManager(CacheManager *cm)
 {
     cacheManager_ = cm;
diff --git a/src/core/emb_table/embedding_ddr.h b/src/core/emb_table/embedding_ddr.h
index b2a461d8..ab7cc3fb 100644
--- a/src/core/emb_table/embedding_ddr.h
+++ b/src/core/emb_table/embedding_ddr.h
@@ -74,10 +74,10 @@ public:
 
 GTEST_PRIVATE:
 
-    int LoadHashMap(const string& savePath);
+    void LoadKey(const string& savePath);
     void LoadEmbAndOptim(const string& savePath);
 
-    int SaveKey(const string& savePath);
+    void SaveKey(const string& savePath);
     void SaveEmbData(const string &savePath);
     void SaveOptimData(const string& savePath);
     void SaveEmbAndOptim(const string& savePath);
diff --git a/src/core/emb_table/embedding_dynamic.cpp b/src/core/emb_table/embedding_dynamic.cpp
index 9fd26546..f81f2ab7 100644
--- a/src/core/emb_table/embedding_dynamic.cpp
+++ b/src/core/emb_table/embedding_dynamic.cpp
@@ -128,14 +128,11 @@ void EmbeddingDynamic::RandomInit(void* addr, size_t embNum)
 
 void EmbeddingDynamic::Save(const string& savePath)
 {
-    int res = SaveKey(savePath);
-    if (res == -1) {
-        throw std::runtime_error("save key failed!");
-    }
+    SaveKey(savePath);
     SaveEmbAndOptim(savePath);
 }
 
-int EmbeddingDynamic::SaveKey(const string& savePath)
+void EmbeddingDynamic::SaveKey(const string& savePath)
 {
     stringstream ss;
     ss << savePath << "/" << name << "/key/";
@@ -153,12 +150,16 @@ int EmbeddingDynamic::SaveKey(const string& savePath)
         embAddress.push_back(it.second);
     }
 
-    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()),
-                                       static_cast<size_t>(deviceKey.size() * sizeof(int64_t)));
+    size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
+    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        return -1;
+        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
+    }
+    if (res != writeSize) {
+        throw runtime_error(
+                "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
+                writeSize, res, ss.str());
     }
-    return 0;
 }
 
 void EmbeddingDynamic::SaveEmbAndOptim(const string& savePath)
@@ -215,10 +216,7 @@ void EmbeddingDynamic::SaveOptimData(const string &savePath)
 
 void EmbeddingDynamic::Load(const string& savePath)
 {
-    int res = LoadKey(savePath);
-    if (res == -1) {
-        throw std::runtime_error("load key failed!");
-    }
+    LoadKey(savePath);
     LoadEmbAndOptim(savePath);
 }
 
@@ -234,7 +232,7 @@ void EmbeddingDynamic::LoadEmbAndOptim(const string& savePath)
     stringstream embedStream;
     embedStream << ss.str() << "/" << "embedding/slice.data";
     EmbeddingSizeInfo embeddingSizeInfo = {embSize_, extEmbSize_};
-    fileSystemPtr->ReadEmbedding(savePath, embeddingSizeInfo, firstAddress, rankId_, loadOffset);
+    fileSystemPtr->ReadEmbedding(embedStream.str(), embeddingSizeInfo, firstAddress, rankId_, loadOffset);
 
     // 读optim
     int optimIndex = 1;
@@ -255,24 +253,25 @@ int EmbeddingDynamic::LoadKey(const string& savePath)
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
 
-    size_t fileSize = 0;
-    try {
-        fileSize = fileSystemPtr->GetFileSize(ss.str());
-    } catch (exception& e) {
-        LOG_ERROR("open file {} failed:{}", ss.str(), strerror(errno));
-        return -1;
-    }
+    size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        LOG_ERROR("file {} size = {} is too big", ss.str(), fileSize);
-        return -1;
+        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        LOG_ERROR("malloc failed: {}", strerror(errno));
-        return -1;
+        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+    }
+
+    ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
+    if (res == -1) {
+        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+    }
+    if (res != fileSize) {
+        throw runtime_error(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
+                fileSize, res, ss.str());
     }
-    fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
 
@@ -289,7 +288,7 @@ int EmbeddingDynamic::LoadKey(const string& savePath)
     void *newBlock = nullptr;
     aclError ret = aclrtMalloc(&newBlock, static_cast<int>(datasetSize), ACL_MEM_MALLOC_HUGE_FIRST);
     if (ret != ACL_SUCCESS) {
-        throw runtime_error(StringFormat("aclrtMalloc failed, ret=%d", ret).c_str());
+        throw runtime_error("Error: in dynamic expansion mode,  aclrtMalloc failed, malloc size: {}.", datasetSize);
     }
     // 此处的 newBlock -> first address;
     // 对key_offset map 进行一个恢复操作
@@ -303,5 +302,4 @@ int EmbeddingDynamic::LoadKey(const string& savePath)
 
     maxOffset = keyOffsetMap.size();
     free(static_cast<void*>(buf));
-    return 0;
 }
diff --git a/src/core/emb_table/embedding_dynamic.h b/src/core/emb_table/embedding_dynamic.h
index 2c867530..59418229 100644
--- a/src/core/emb_table/embedding_dynamic.h
+++ b/src/core/emb_table/embedding_dynamic.h
@@ -48,13 +48,13 @@ private:
 
     void MallocEmbeddingBlock(int embNum);
 
-    int SaveKey(const string& savePath);
+    void SaveKey(const string& savePath);
 
     void SaveEmbAndOptim(const string& savePath);
 
     void SetOptimizerInfo(OptimizerInfo& optimizerInfo);
 
-    int LoadKey(const string& savePath);
+    void LoadKey(const string& savePath);
 
     void LoadEmbAndOptim(const string& savePath);
 
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index 225c90c9..2ff5a49e 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -73,10 +73,7 @@ int64_t EmbeddingStatic::capacity() const
 
 void EmbeddingStatic::Save(const string& savePath)
 {
-    int res = SaveKey(savePath);
-    if (res == -1) {
-        throw std::runtime_error("save embedding table failed!");
-    }
+    SaveKey(savePath);
 }
 
 int EmbeddingStatic::SaveKey(const string& savePath)
@@ -97,23 +94,24 @@ int EmbeddingStatic::SaveKey(const string& savePath)
         deviceOffset.push_back(it.second);
     }
 
-    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()),
-                                       static_cast<size_t>(deviceKey.size() * sizeof(int64_t)));
+    size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
+    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        return -1;
+        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
+    }
+    if (res != writeSize) {
+        throw runtime_error(
+                "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
+                writeSize, res, ss.str());
     }
-    return 0;
 }
 
 void EmbeddingStatic::Load(const string& savePath)
 {
-    int res = LoadKey(savePath);
-    if (res == -1) {
-        throw std::runtime_error("load embedding table failed!");
-    }
+    LoadKey(savePath);
 }
 
-int EmbeddingStatic::LoadKey(const string &savePath)
+void EmbeddingStatic::LoadKey(const string &savePath)
 {
     stringstream ss;
     ss << savePath << "/" << name << "/key/slice.data";
@@ -121,24 +119,25 @@ int EmbeddingStatic::LoadKey(const string &savePath)
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
 
-    size_t fileSize = 0;
-    try {
-        fileSize = fileSystemPtr->GetFileSize(ss.str());
-    } catch (exception &e) {
-        LOG_ERROR("open file {} failed:{}", ss.str(), strerror(errno));
-        return -1;
-    }
+    size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        LOG_ERROR("file {} size = {} is too big", ss.str(), fileSize);
-        return -1;
+        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
     }
 
     int64_t* buf = static_cast<int64_t *>(malloc(fileSize));
     if (buf == nullptr) {
-        LOG_ERROR("malloc failed: {}", strerror(errno));
-        return -1;
+        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+    }
+
+    ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
+    if (res == -1) {
+        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+    }
+    if (res != fileSize) {
+        throw runtime_error(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
+                fileSize, res, ss.str());
     }
-    fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
     loadOffset.clear();
@@ -152,14 +151,13 @@ int EmbeddingStatic::LoadKey(const string &savePath)
     }
 
     if (loadOffset.size() > devVocabSize) {
-        LOG_ERROR("load key size exceeds device vocab size: {}", strerror(errno));
-        return -1;
+        throw runtime_error("Error: Load keys failed. Load key size :{} exceeds device vocab size: {}.",
+                            loadOffset.size(), devVocabSize);
     }
 
     maxOffset = keyOffsetMap.size();
 
     free(static_cast<void*>(buf));
-    return 0;
 }
 
 vector<int64_t> EmbeddingStatic::GetDeviceOffset()
diff --git a/src/core/emb_table/embedding_static.h b/src/core/emb_table/embedding_static.h
index 06e24efa..965bce0e 100644
--- a/src/core/emb_table/embedding_static.h
+++ b/src/core/emb_table/embedding_static.h
@@ -42,9 +42,9 @@ public:
     vector<int64_t> GetDeviceOffset();
 
 GTEST_PRIVATE:
-    int SaveKey(const string& savePath);
+    void SaveKey(const string& savePath);
 
-    int LoadKey(const string& savePath);
+    void LoadKey(const string& savePath);
 
     vector<int64_t> deviceKey;
     vector<int64_t> deviceOffset;
diff --git a/src/core/file_system/file_system.h b/src/core/file_system/file_system.h
index 2f7d3b62..66c142db 100644
--- a/src/core/file_system/file_system.h
+++ b/src/core/file_system/file_system.h
@@ -32,12 +32,18 @@ namespace MxRec {
 
         virtual ssize_t Write(const string& filePath, const char* fileContent, size_t dataSize) = 0;
         virtual ssize_t Write(const string& filePath, vector<float*> fileContent, size_t dataSize) = 0;
+
+        // In the dynamic expansion mode, embedding is transported to the host side from the device side
+        // and written into a file.
         virtual void WriteEmbedding(const string& filePath, const int& embeddingSize,
                                     const vector<int64_t>& addressArr, int deviceId) = 0;
 
         virtual ssize_t Read(const string& filePath, char* fileContent, size_t datasetSize) = 0;
         virtual ssize_t Read(const string& filePath, vector<vector<float>>& fileContent, int64_t contentOffset,
                              vector<int64_t> offsetArr, const size_t& embeddingSize) = 0;
+
+        // In the dynamic expansion mode, embedding is read from the file
+        // and transported from the host side to the device side.
         virtual void ReadEmbedding(const string& filePath, EmbeddingSizeInfo& embedSizeInfo, int64_t firstAddress,
                                    int deviceId, vector <int64_t> offsetArr) = 0;
 
diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
index 999f2fa9..ec9e9bac 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
@@ -60,7 +60,7 @@ size_t HdfsFileSystem::GetFileSize(const string& filePath)
     hdfsFileInfo* fileInfo = hdfs->GetPathInfo(fs, filePath.c_str());
     hdfs->Disconnect(fs);
     if (fileInfo == nullptr) {
-        return 0;
+        throw runtime_error("Error: Unable to get hdfs file info : {}.", filePath.c_str());
     }
     auto fileSize = static_cast<size_t>(fileInfo->mSize);
     return fileSize;
@@ -69,35 +69,25 @@ size_t HdfsFileSystem::GetFileSize(const string& filePath)
 ssize_t HdfsFileSystem::Write(const string& filePath, const char* fileContent, size_t dataSize)
 {
     hdfsFS fs = ConnectHdfs();
-
-    hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
+    int flag = O_WRONLY | O_CREAT;
+    hdfsFileInfo* fileInfo = hdfs->GetPathInfo(fs, filePath.c_str());
+    if (fileInfo) {
+        flag = O_WRONLY | O_APPEND;
+    }
+    hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), flag, 0, 0, 0);
     if (!file) {
         hdfs->Disconnect(fs);
-        throw runtime_error("Error writing to hdfs file.");
+        throw runtime_error("Error: Unable to open hdfs file : {}.", filePath.c_str());
     }
 
-    size_t dataCol = dataSize;
-    size_t writeSize = 0;
-    size_t idx = 0;
     tSize writeBytesNum = 0;
-
-    while (dataCol != 0) {
-        if (dataCol > oneTimeReadWriteLen) {
-            writeSize = oneTimeReadWriteLen;
-        } else {
-            writeSize = dataCol;
-        }
-
-        tSize res = hdfs->Write(fs, file, fileContent + idx, writeSize);
-        if (res == -1) {
-            hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
-            return static_cast<ssize_t>(res);
-        }
-        dataCol -= writeSize;
-        idx += writeSize;
-        writeBytesNum += res;
+    tSize res = hdfs->Write(fs, file, fileContent, dataSize, sizeof(char));
+    if (res == -1) {
+        hdfs->CloseFile(fs, file);
+        hdfs->Disconnect(fs);
+        return static_cast<ssize_t>(res);
     }
+    writeBytesNum += res;
 
     hdfs->CloseFile(fs, file);
     hdfs->Disconnect(fs);
@@ -111,31 +101,19 @@ ssize_t HdfsFileSystem::Write(const string& filePath, vector<float*> fileContent
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
     if (!file) {
         hdfs->Disconnect(fs);
-        throw runtime_error("Error writing to hdfs file.");
+        throw runtime_error("Error: Unable to open hdfs file : {}.", filePath.c_str());
     }
 
     tSize writeBytesNum = 0;
     size_t loops = fileContent.size();
     for (size_t i = 0; i < loops; i++) {
-        size_t dataCol = dataSize;
-        size_t writeSize = 0;
-        size_t idx = 0;
-        while (dataCol != 0) {
-            if (dataCol > oneTimeReadWriteLen) {
-                writeSize = oneTimeReadWriteLen;
-            } else {
-                writeSize = dataCol;
-            }
-            tSize res = hdfs->Write(fs, file, fileContent[i] + idx, writeSize);
-            if (res == -1) {
-                hdfs->CloseFile(fs, file);
-                hdfs->Disconnect(fs);
-                return static_cast<ssize_t>(res);
-            }
-            dataCol -= writeSize;
-            idx += writeSize;
-            writeBytesNum += res;
+        tSize res = hdfs->Write(fs, file, fileContent[i], dataSize, sizeof(float));
+        if (res == -1) {
+            hdfs->CloseFile(fs, file);
+            hdfs->Disconnect(fs);
+            return static_cast<ssize_t>(res);
         }
+        writeBytesNum += res;
     }
     hdfs->CloseFile(fs, file);
     hdfs->Disconnect(fs);
@@ -156,11 +134,10 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
     if (!file) {
         hdfs->Disconnect(fs);
-        throw runtime_error("Error writing to hdfs file.");
+        throw runtime_error("Error: Unable to open hdfs file : {}.", filePath.c_str());
     }
 
 #ifndef GTEST
-
     for (size_t i = 0; i < addressArr.size(); i += embHashNum) {
         vector<float> row(embeddingSize);
         int64_t address = addressArr.at(i);
@@ -172,14 +149,21 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
         if (ret != ACL_SUCCESS) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
-            throw runtime_error("aclrtMemcpy failed");
+            throw runtime_error("Error: Execute aclrtmemcpy from device to host failed.");
         }
 
-        auto numBytesWritten = hdfs->Write(fs, file, row.data(), embeddingSize * sizeof(float));
-        if (numBytesWritten != embeddingSize * sizeof(float)) {
+        tSize res = hdfs->Write(fs, file, row.data(), embeddingSize * sizeof(float), sizeof(float));
+        if (res == -1) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
-            throw runtime_error("Error writing to hdfs file.");
+            throw runtime_error("Error: An error occurred while writing file: {}.", filePath.c_str());
+        }
+
+        if (res != embeddingSize * sizeof(float)) {
+            hdfs->CloseFile(fs, file);
+            hdfs->Disconnect(fs);
+            throw runtime_error("Error: Expected to write {} bytes, but actually write {} bytes to file {}.",
+                                embeddingSize * sizeof(float), res, filePath.c_str());
         }
     }
 #endif
@@ -194,29 +178,17 @@ ssize_t HdfsFileSystem::Read(const string& filePath, char* fileContent, size_t d
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
         hdfs->Disconnect(fs);
-        throw runtime_error("open hdfs file failed.");
+        throw runtime_error("Error: Unable to open hdfs file : {}.", filePath.c_str());
     }
 
-    size_t dataCol = datasetSize;
-    size_t idx = 0;
-    size_t readSize = 0;
     tSize readBytesNum = 0;
-    while (dataCol != 0) {
-        if (dataCol > oneTimeReadWriteLen) {
-            readSize = oneTimeReadWriteLen;
-        } else {
-            readSize = dataCol;
-        }
-        tSize res = hdfs->Read(fs, file, fileContent + idx, readSize);
-        if (res == -1) {
-            hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
-            return static_cast<ssize_t>(res);
-        }
-        dataCol -= readSize;
-        idx += readSize;
-        readBytesNum += res;
+    tSize res = hdfs->Read(fs, file, fileContent, datasetSize);
+    if (res == -1) {
+        hdfs->CloseFile(fs, file);
+        hdfs->Disconnect(fs);
+        return static_cast<ssize_t>(res);
     }
+    readBytesNum += res;
 
     hdfs->CloseFile(fs, file);
     hdfs->Disconnect(fs);
@@ -231,7 +203,7 @@ ssize_t HdfsFileSystem::Read(const string& filePath, vector<vector<float>>& file
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
         hdfs->Disconnect(fs);
-        throw runtime_error("open hdfs file failed.");
+        throw runtime_error("Error: Unable to open hdfs file : {}.", filePath.c_str());
     }
 
     ssize_t readBytesNum = 0;
@@ -241,9 +213,13 @@ ssize_t HdfsFileSystem::Read(const string& filePath, vector<vector<float>>& file
 
         tSize res = hdfs->Read(fs, file, fileContent[embeddingCount].data() + contentOffset * embeddingSize,
                                embeddingSize * sizeof(float));
-
+        if (res == -1) {
+            hdfs->CloseFile(fs, file);
+            hdfs->Disconnect(fs);
+            return static_cast<ssize_t>(res);
+        }
         embeddingCount++;
-        readBytesNum += embeddingSize * sizeof(float);
+        readBytesNum += res;
     }
 
     hdfs->CloseFile(fs, file);
@@ -266,26 +242,44 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
         hdfs->Disconnect(fs);
-        throw runtime_error("open hdfs file failed.");
+        throw runtime_error("Error: Unable to open hdfs file : {}.", filePath.c_str());
     }
 
     float* floatPtr = reinterpret_cast<float*>(firstAddress);
     auto i = 0;
     for (const auto& offset: offsetArr) {
         vector<float> row(embedSizeInfo.embeddingSize);
-        hdfs->Seek(fs, file, offset * embedSizeInfo.embeddingSize * sizeof(float));
+        int seekRes = hdfs->Seek(fs, file, offset * embedSizeInfo.embeddingSize * sizeof(float));
+        if (seekRes == -1) {
+            hdfs->CloseFile(fs, file);
+            hdfs->Disconnect(fs);
+            throw runtime_error("Error: hdfsSeek failed with error. file offset: {}",
+                                offset * embedSizeInfo.embeddingSize * sizeof(float));
+        }
+
         tSize res = hdfs->Read(fs, file, row.data(), embedSizeInfo.embeddingSize * sizeof(float));
-        try {
-            aclrtMemcpy(floatPtr + i * embedSizeInfo.extendEmbSize, embedSizeInfo.embeddingSize * sizeof(float),
-                        row.data(), embedSizeInfo.embeddingSize * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE);
-        } catch (std::exception& e) {
+        if (res == -1) {
+            hdfs->CloseFile(fs, file);
+            hdfs->Disconnect(fs);
+            throw runtime_error("Error: An error occurred while reading file: {}.", filePath.c_str());
+        }
+        if (res != embeddingSize * sizeof(float)) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
-            throw runtime_error(StringFormat("error happen when acl memory copy from host to device: %s", e.what()));
+            throw runtime_error("Error: Expected to read {} bytes, but actually read {} bytes from file {}.",
+                                embeddingSize * sizeof(float), res, filePath.c_str());
+        }
+
+        aclError ret = aclrtMemcpy(floatPtr + i * embedSizeInfo.extendEmbSize,
+                                   embedSizeInfo.embeddingSize * sizeof(float),
+                                   row.data(), embedSizeInfo.embeddingSize * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE);
+        if (ret != ACL_SUCCESS) {
+            hdfs->CloseFile(fs, file);
+            hdfs->Disconnect(fs);
+            throw runtime_error("Error: Execute aclrtmemcpy from host to device failed.");
         }
         i++;
     }
-
     hdfs->CloseFile(fs, file);
     hdfs->Disconnect(fs);
 #endif
diff --git a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
index 0f33934f..144f0a3a 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
+++ b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
@@ -134,20 +134,37 @@ namespace MxRec {
             return hdfsCloseFile(fs, file);
         }
 
-        tSize Read(hdfsFS fs, hdfsFile file, void* buffer, tSize length) const
+        tSize Read(hdfsFS fs, hdfsFile file, void* buffer, tSize length, tSize typeSize) const
         {
             if (hdfsRead == nullptr) {
                 throw runtime_error("Failed to obtain the pointer of the function hdfsRead from the libhdfs.");
             }
-            return hdfsRead(fs, file, buffer, length);
+            return WrapperHdfsRead(fs, file, buffer, length, typeSize);
         }
 
-        tSize Write(hdfsFS fs, hdfsFile file, const void* buffer, tSize length) const
+        tSize WrapperHdfsRead(hdfsFS fs, hdfsFile file, void *buffer, tSize length, tSize typeSize) {
+            tSize reTryCount = 0;
+            tSize unReadLength = length;
+            tSize readBytes = 0;
+
+            while (unReadLength != 0 && reTryCount < RETRY_COUNT) {
+                tSize offset = buffer + (length - unReadLength) / typeSize;
+                tSize res = hdfsRead(fs, file, buffer + offset, unReadLength);
+                if (res == -1) {
+                    return res;
+                }
+                unReadLength -= res;
+                readBytes += res;
+            }
+            return readBytes;
+        }
+
+        tSize Write(hdfsFS fs, hdfsFile file, const void* buffer, tSize length, tSize typeSize) const
         {
             if (hdfsWrite == nullptr) {
                 throw runtime_error("Failed to obtain the pointer of the function hdfsWrite from the libhdfs.");
             }
-            return hdfsWrite(fs, file, buffer, length);
+            return WrapperHdfsWrite(fs, file, buffer, length, typeSize);
         }
 
         int Seek(hdfsFS fs, hdfsFile file, tOffset desiredPos) const
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 95a76ca5..3839b725 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -77,6 +77,9 @@ namespace MxRec {
     constexpr int GLOG_TIME_WIDTH_6 = 6;
     constexpr char GLOG_STAT_FLAG[] = "statOn";
 
+    // for file system
+    constexpr int RETRY_COUNT = 100;
+
     // unique related config
     constexpr int UNIQUE_BUCKET = 6;
     constexpr int MIN_UNIQUE_THREAD_NUM = 1;
-- 
Gitee


From 36ca59798c7c9346596c4e79f7e660711436af92 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 10:26:39 +0800
Subject: [PATCH 106/302] =?UTF-8?q?1=E3=80=81readme=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0=202=E3=80=81lazy=5Fadam=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=99=A8=E5=AE=9E=E7=8E=B0=E9=80=82=E9=85=8D=E8=9E=8D=E5=90=88?=
 =?UTF-8?q?=E7=AE=97=E5=AD=90=203=E3=80=81=E6=89=93=E5=8C=85=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build/gen_mxrec_tar_pkg.sh        |  4 ++++
 cust_op/fused_lazy_adam/README.md | 14 +++++++++++---
 mx_rec/optimizers/lazy_adam.py    | 32 +++++++++++++++++++++++++------
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/build/gen_mxrec_tar_pkg.sh b/build/gen_mxrec_tar_pkg.sh
index 72ccfe49..3b6a9713 100644
--- a/build/gen_mxrec_tar_pkg.sh
+++ b/build/gen_mxrec_tar_pkg.sh
@@ -56,6 +56,10 @@ function gen_tar_file()
   chmod 640 *.json
   chmod 550 op_host op_kernel op_host/* op_kernel/*
   cd -
+  cd ./build/"${pkg_dir}"/cust_op/
+  chmod 550 -R fused_lazy_adam
+  chmod 640 fused_lazy_adam/*.json
+  cd -
   cd ./build
   tar -zvcf "${release_tar}" "${pkg_dir}" || {
       warn "compression failed, packages might be broken"
diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index 42f5bfc9..32167b43 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -18,13 +18,21 @@ C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/deta
 
 ## lazy_adam融合算子使用
 
-1. 进入当前目录，执行指令进行编译和部署lazy_adam融合算子
+1. 上次fused_lazy_adam文件夹到目标环境，并进入当前目录，执行指令进行编译和部署lazy_adam融合算子
 
-```
+```shell
 bash run.sh
 ```
 
-2. 模型py脚本中导入mxRec中的lazy_adam优化器。lazy_adam优化器使用知道参考mxRec用户指南。
+2. 模型脚本中創建lazy_adam优化器并指定使用融合算子。创建使用融合算子的lazy_adam优化器示例：
+
+```python
+from mx_rec.optimizers.lazy_adam import create_hash_optimizer
+
+# 创建lazy_adam优化器时增加"use_fusion_optim=True"参数，表示使用融合算子实现。use_fusion_optim参数默认值为False。
+# lazy_adam优化器详细使用指导请参考mxRec用户指南
+sparse_optimizer = create_hash_optimizer(learning_rate=0.001, use_fusion_optim=True)
+```
 
 ## lazy_adam优化器同名融合算子lazy_adam
 
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 1f491d14..81c8ecba 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -32,7 +32,8 @@ from tensorflow.python.training import slot_creator
 
 from mx_rec.optimizers.base import CustomizedOptimizer
 from mx_rec.util.initialize import ConfigInitializer
-from mx_rec.validator.validator import para_checker_decorator, StringValidator, FloatValidator
+from mx_rec.util.ops import import_host_pipeline_ops
+from mx_rec.validator.validator import para_checker_decorator, StringValidator, FloatValidator, ClassValidator
 
 
 @para_checker_decorator(check_option_list=[
@@ -40,9 +41,11 @@ from mx_rec.validator.validator import para_checker_decorator, StringValidator,
     ("beta1", FloatValidator, {"min_value": 0.0, "max_value": 1.0}, ["check_value_for_open_interval"]),
     ("beta2", FloatValidator, {"min_value": 0.0, "max_value": 1.0}, ["check_value"]),
     ("epsilon", FloatValidator, {"min_value": 0.0, "max_value": 1.0}, ["check_value_for_left_open_interval"]),
-    ("name", StringValidator, {"min_len": 1, "max_len": 200}, ["check_string_length"])
+    ("name", StringValidator, {"min_len": 1, "max_len": 200}, ["check_string_length"]),
+    ("use_fusion_optim", ClassValidator, {"classes": (bool, type(None))}),
 ])
-def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, name="LazyAdam"):
+def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, name="LazyAdam",
+                          use_fusion_optim=False):
     """
     Args:
         learning_rate: learning rate
@@ -50,13 +53,14 @@ def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1
         beta2:
         epsilon:
         name:
-
+        use_fusion_optim: if use fused optimizer
     Returns: a customized optimizer instance
     """
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         raise ValueError("dynamic expansion mode is not compatible with the optimizer, please config dynamic "
                          "expansion mode and optimizer correctly")
-    optimizer = CustomizedLazyAdam(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon, name=name)
+    optimizer = CustomizedLazyAdam(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon, name=name,
+                                   use_fusion_optim=use_fusion_optim)
     ConfigInitializer.get_instance().optimizer_config.optimizer_instance = optimizer
     return optimizer
 
@@ -64,10 +68,16 @@ def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1
 class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
     name_counter = defaultdict(int)
 
-    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, use_locking=False, name="LazyAdam"):
+    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, use_locking=False, name="LazyAdam",
+                 use_fusion_optim=False):
         self.optimizer_type = "LazyAdam"
         self.optim_param_list = ["momentum", "velocity"]
         self.config_instance = ConfigInitializer.get_instance()
+        self.use_fusion_optim = use_fusion_optim
+        if self.use_fusion_optim:
+            self._custom_initial_beta1 = beta1
+            self._custom_initial_beta2 = beta2
+            self._custom_initial_epsilon = epsilon
         super(CustomizedLazyAdam, self)._get_name(name=name)
         super(CustomizedLazyAdam, self).__init__(learning_rate=learning_rate, beta1=beta1, beta2=beta2,
                                                  epsilon=epsilon, use_locking=use_locking, name=self.unique_name)
@@ -164,6 +174,16 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         temp_epsilon = temp.get("temp_epsilon")
         learning_rate = tf.divide(temp_lr * math_ops.sqrt(1 - power_b2), (1 - power_b1))
 
+        if self.use_fusion_optim:
+            nd_indices = tf.expand_dims(indices, 1)
+            slot_m = self.get_slot(var, "m")
+            slot_v = self.get_slot(var, "v")
+            output_m, output_v, output_var =\
+                import_host_pipeline_ops().lazy_adam(grad, nd_indices, slot_m, slot_v, var, learning_rate,
+                                                     self._custom_initial_beta1, self._custom_initial_beta2,
+                                                     self._custom_initial_epsilon)
+            return control_flow_ops.group(output_m, output_v, output_var)
+
         abs_indices = tf.math.maximum(indices, 0)
         nd_indices = tf.expand_dims(indices, 1)
 
-- 
Gitee


From d0f34e40bb9aa774de0c16a33dad9c29a6958f9f Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 10:32:45 +0800
Subject: [PATCH 107/302] =?UTF-8?q?1=E3=80=81readme=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index 32167b43..3c30a40a 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -18,19 +18,19 @@ C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/deta
 
 ## lazy_adam融合算子使用
 
-1. 上次fused_lazy_adam文件夹到目标环境，并进入当前目录，执行指令进行编译和部署lazy_adam融合算子
+1. 上传fused_lazy_adam文件夹到目标环境，并进入当前目录，执行指令对lazy_adam融合算子进行编译和部署
 
 ```shell
 bash run.sh
 ```
 
-2. 模型脚本中創建lazy_adam优化器并指定使用融合算子。创建使用融合算子的lazy_adam优化器示例：
+2. 模型脚本中创建lazy_adam优化器并指定使用融合算子。代码示例：
 
 ```python
 from mx_rec.optimizers.lazy_adam import create_hash_optimizer
 
 # 创建lazy_adam优化器时增加"use_fusion_optim=True"参数，表示使用融合算子实现。use_fusion_optim参数默认值为False。
-# lazy_adam优化器详细使用指导请参考mxRec用户指南
+# lazy_adam优化器详细使用指导请参考mxRec用户指南。
 sparse_optimizer = create_hash_optimizer(learning_rate=0.001, use_fusion_optim=True)
 ```
 
-- 
Gitee


From 6584ea407ba256523d815c653a7c72b1bf5a05e7 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 7 May 2024 11:03:32 +0800
Subject: [PATCH 108/302] =?UTF-8?q?hdfs=E4=B8=ADread=E3=80=81write?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0=E5=8A=A0=E5=9B=BA=EF=BC=8C=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E4=BF=9D=E5=AD=98=E6=97=A5=E5=BF=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../hdfs_file_system/hdfs_wrapper.h           | 21 ++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
index 144f0a3a..78699b01 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
+++ b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
@@ -142,7 +142,8 @@ namespace MxRec {
             return WrapperHdfsRead(fs, file, buffer, length, typeSize);
         }
 
-        tSize WrapperHdfsRead(hdfsFS fs, hdfsFile file, void *buffer, tSize length, tSize typeSize) {
+        tSize WrapperHdfsRead(hdfsFS fs, hdfsFile file, void *buffer, tSize length, tSize typeSize) const
+        {
             tSize reTryCount = 0;
             tSize unReadLength = length;
             tSize readBytes = 0;
@@ -167,6 +168,24 @@ namespace MxRec {
             return WrapperHdfsWrite(fs, file, buffer, length, typeSize);
         }
 
+        tSize WrapperHdfsWrite(hdfsFS fs, hdfsFile file, const void *buffer, tSize length, tSize typeSize) const
+        {
+            tSize reTryCount = 0;
+            tSize unWriteLength = length;
+            tSize writeBytes = 0;
+
+            while (unWriteLength != 0 && reTryCount < RETRY_COUNT) {
+                tSize offset = buffer + (length - unWriteLength) / typeSize;
+                tSize res = hdfsWrite(fs, file, buffer + offset, unWriteLength);
+                if (res == -1) {
+                    return res;
+                }
+                unWriteLength -= res;
+                writeBytes += res;
+            }
+            return writeBytes;
+        }
+
         int Seek(hdfsFS fs, hdfsFile file, tOffset desiredPos) const
         {
             if (hdfsSeek == nullptr) {
-- 
Gitee


From ffbc239527973d61032d6d52dea59a469a4d182d Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 7 May 2024 11:03:32 +0800
Subject: [PATCH 109/302] =?UTF-8?q?hdfs=E4=B8=ADread=E3=80=81write?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0=E5=8A=A0=E5=9B=BA=EF=BC=8C=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E4=BF=9D=E5=AD=98=E6=97=A5=E5=BF=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../hdfs_file_system/hdfs_file_system.cpp     |  6 +--
 .../hdfs_file_system/hdfs_wrapper.h           | 50 ++++++++++++++++---
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
index ec9e9bac..7fde1a22 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
@@ -81,7 +81,7 @@ ssize_t HdfsFileSystem::Write(const string& filePath, const char* fileContent, s
     }
 
     tSize writeBytesNum = 0;
-    tSize res = hdfs->Write(fs, file, fileContent, dataSize, sizeof(char));
+    tSize res = hdfs->Write(fs, file, fileContent, dataSize);
     if (res == -1) {
         hdfs->CloseFile(fs, file);
         hdfs->Disconnect(fs);
@@ -107,7 +107,7 @@ ssize_t HdfsFileSystem::Write(const string& filePath, vector<float*> fileContent
     tSize writeBytesNum = 0;
     size_t loops = fileContent.size();
     for (size_t i = 0; i < loops; i++) {
-        tSize res = hdfs->Write(fs, file, fileContent[i], dataSize, sizeof(float));
+        tSize res = hdfs->Write(fs, file, fileContent[i], dataSize);
         if (res == -1) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
@@ -152,7 +152,7 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
             throw runtime_error("Error: Execute aclrtmemcpy from device to host failed.");
         }
 
-        tSize res = hdfs->Write(fs, file, row.data(), embeddingSize * sizeof(float), sizeof(float));
+        tSize res = hdfs->Write(fs, file, row.data(), embeddingSize * sizeof(float));
         if (res == -1) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
diff --git a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
index 78699b01..33f2738b 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
+++ b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
@@ -134,22 +134,40 @@ namespace MxRec {
             return hdfsCloseFile(fs, file);
         }
 
-        tSize Read(hdfsFS fs, hdfsFile file, void* buffer, tSize length, tSize typeSize) const
+        tSize Read(hdfsFS fs, hdfsFile file, char* buffer, tSize length) const
         {
             if (hdfsRead == nullptr) {
                 throw runtime_error("Failed to obtain the pointer of the function hdfsRead from the libhdfs.");
             }
-            return WrapperHdfsRead(fs, file, buffer, length, typeSize);
+
+            tSize reTryCount = 0;
+            tSize unReadLength = length;
+            tSize readBytes = 0;
+
+            while (unReadLength != 0 && reTryCount < RETRY_COUNT) {
+                tSize offset = buffer + (length - unReadLength) / sizeof(char);
+                tSize res = hdfsRead(fs, file, buffer + offset, unReadLength);
+                if (res == -1) {
+                    return res;
+                }
+                unReadLength -= res;
+                readBytes += res;
+            }
+            return readBytes;
         }
 
-        tSize WrapperHdfsRead(hdfsFS fs, hdfsFile file, void *buffer, tSize length, tSize typeSize) const
+        tSize Read(hdfsFS fs, hdfsFile file, float* buffer, tSize length) const
         {
+            if (hdfsRead == nullptr) {
+                throw runtime_error("Failed to obtain the pointer of the function hdfsRead from the libhdfs.");
+            }
+
             tSize reTryCount = 0;
             tSize unReadLength = length;
             tSize readBytes = 0;
 
             while (unReadLength != 0 && reTryCount < RETRY_COUNT) {
-                tSize offset = buffer + (length - unReadLength) / typeSize;
+                tSize offset = buffer + (length - unReadLength) / sizeof(float);
                 tSize res = hdfsRead(fs, file, buffer + offset, unReadLength);
                 if (res == -1) {
                     return res;
@@ -160,22 +178,38 @@ namespace MxRec {
             return readBytes;
         }
 
-        tSize Write(hdfsFS fs, hdfsFile file, const void* buffer, tSize length, tSize typeSize) const
+        tSize Write(hdfsFS fs, hdfsFile file, const char* buffer, tSize length, tSize typeSize) const
         {
             if (hdfsWrite == nullptr) {
                 throw runtime_error("Failed to obtain the pointer of the function hdfsWrite from the libhdfs.");
             }
-            return WrapperHdfsWrite(fs, file, buffer, length, typeSize);
+            tSize reTryCount = 0;
+            tSize unWriteLength = length;
+            tSize writeBytes = 0;
+
+            while (unWriteLength != 0 && reTryCount < RETRY_COUNT) {
+                tSize offset = buffer + (length - unWriteLength) / sizeof(char);
+                tSize res = hdfsWrite(fs, file, buffer + offset, unWriteLength);
+                if (res == -1) {
+                    return res;
+                }
+                unWriteLength -= res;
+                writeBytes += res;
+            }
+            return writeBytes;
         }
 
-        tSize WrapperHdfsWrite(hdfsFS fs, hdfsFile file, const void *buffer, tSize length, tSize typeSize) const
+        tSize Write(hdfsFS fs, hdfsFile file, const float* buffer, tSize length, tSize typeSize) const
         {
+            if (hdfsWrite == nullptr) {
+                throw runtime_error("Failed to obtain the pointer of the function hdfsWrite from the libhdfs.");
+            }
             tSize reTryCount = 0;
             tSize unWriteLength = length;
             tSize writeBytes = 0;
 
             while (unWriteLength != 0 && reTryCount < RETRY_COUNT) {
-                tSize offset = buffer + (length - unWriteLength) / typeSize;
+                tSize offset = buffer + (length - unWriteLength) / sizeof(float);
                 tSize res = hdfsWrite(fs, file, buffer + offset, unWriteLength);
                 if (res == -1) {
                     return res;
-- 
Gitee


From a505ea1b0d403d263a82f9dfddf8b5abdc853565 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 13:06:22 +0800
Subject: [PATCH 110/302] =?UTF-8?q?1=E3=80=81aclnn=20cmake=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
index c4a727bf..112c0a8c 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
@@ -52,6 +52,8 @@ link_directories(
 )
 
 add_executable(execute_op
+        operator_desc.cpp
+        op_runner.cpp
         main.cpp
         common.cpp
 )
-- 
Gitee


From 836e97bec2cc3beb902769601672333e34732efb Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 14:47:00 +0800
Subject: [PATCH 111/302] =?UTF-8?q?1=E3=80=81readme=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index 3c30a40a..fb92806d 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -24,7 +24,7 @@ C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/deta
 bash run.sh
 ```
 
-2. 模型脚本中创建lazy_adam优化器并指定使用融合算子。代码示例：
+2. 模型脚本中创建lazy_adam优化器并指定使用融合算子实现。代码示例：
 
 ```python
 from mx_rec.optimizers.lazy_adam import create_hash_optimizer
@@ -34,7 +34,7 @@ from mx_rec.optimizers.lazy_adam import create_hash_optimizer
 sparse_optimizer = create_hash_optimizer(learning_rate=0.001, use_fusion_optim=True)
 ```
 
-## lazy_adam优化器同名融合算子lazy_adam
+## LazyAdam融合算子
 
 1. 算子分析
 
@@ -119,15 +119,17 @@ bash run.sh
 完成算子的编译部署，编译部署时需要开启算子的二进制编译功能：修改算子工程中的编译配置项文件CMakePresets.json，将
 ENABLE_BINARY_PACKAGE设置为True。编译部署时可将算子的二进制部署到当前环境，便于后续算子的调用。
 
-3. 检查API执行需要的头文件和库文件是否自动生成，针对mxRec，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
-   aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
+3.
+
+检查API执行需要的头文件和库文件是否自动生成，针对融合算子，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
+aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
 
 注意：对于cust_op/fused_lazy_adam/run.sh脚本，安装算子后会删除构建目录。运行单算子测试时，需要屏蔽掉删除rm rf
 ./lazy_adam这一步，以确保前置条件3。
 
-### 融合算子 lazy_adam
+### LazyAdam融合算子de AclNN调用实现
 
-针对lazy_adam算子，入口src/main.cpp中：
+针对LazyAdam融合算子，入口src/main.cpp中：
 
 1. InitResource函数：初始化AscendCL并运行管理资源申请，不用修改
 2. RunLookupOp运行算子：
@@ -156,14 +158,14 @@ run.sh脚本依次执行：
 
 ### scripts脚本
 
-* gen_data.py：生成lazy_adam算子的输入数据和用于精度校验的golden数据，可自行修改测试相关dim参数。
-* verify_result.py：将算子的输出和脚本生成的golden数据进行精度比对，并输出比较结果。比对规则为：允许误差精度loss：1e-4
+* gen_data.py：生成LazyAdam融合算子的输入数据和用于精度校验的golden数据，可自行修改测试相关dim参数。
+* verify_result.py：将算子的输出和脚本生成的golden数据进行精度比对，并输出比较结果。比对规则为：允许误差精度loss：1e-6
 
 a) 绝对误差
 b) 相对误差
 c) 误差相对个数
 
 同时满足绝对误差不全小于loss，相对误差不全小于loss，且绝对误差和相对误差大于loss的个数都超过总数的1/loss，也就是
-1/10000（双万分之一），即认为算子精度不达标。其余情况均认为算子达标。
+1/1000000（百万分之一），即认为算子精度不达标。其余情况均认为算子达标。
 
 用户可自行修改允许精度误差范围loss。
\ No newline at end of file
-- 
Gitee


From b55b2586c884d5c79fc07ec727bc6050d9e11bb3 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 15:10:52 +0800
Subject: [PATCH 112/302] =?UTF-8?q?1=E3=80=81=E8=9E=8D=E5=90=88=E7=AE=97?=
 =?UTF-8?q?=E5=AD=90readme=E8=84=9A=E6=9C=AC=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index fb92806d..136a50e7 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -16,7 +16,7 @@
 更多详情可以参考CANN官方的Ascend
 C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0001.html)。
 
-## lazy_adam融合算子使用
+## LazyAdam融合算子使用
 
 1. 上传fused_lazy_adam文件夹到目标环境，并进入当前目录，执行指令对lazy_adam融合算子进行编译和部署
 
@@ -24,6 +24,12 @@ C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/deta
 bash run.sh
 ```
 
+注：需先环境中设置CANN相关环境变量，再执行算子编译和安装指令。使用默认路径安装CANN时设置环境变量指令如下：
+
+```shell
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+```
+
 2. 模型脚本中创建lazy_adam优化器并指定使用融合算子实现。代码示例：
 
 ```python
@@ -34,7 +40,7 @@ from mx_rec.optimizers.lazy_adam import create_hash_optimizer
 sparse_optimizer = create_hash_optimizer(learning_rate=0.001, use_fusion_optim=True)
 ```
 
-## LazyAdam融合算子
+## LazyAdam融合算子介绍
 
 1. 算子分析
 
-- 
Gitee


From 18cdbad6d302af3d35360d170cb0bb41ea2a071f Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 15:14:25 +0800
Subject: [PATCH 113/302] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A3=80=E8=A7=86?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
index 112c0a8c..c2366f4a 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
@@ -52,9 +52,9 @@ link_directories(
 )
 
 add_executable(execute_op
+        main.cpp
         operator_desc.cpp
         op_runner.cpp
-        main.cpp
         common.cpp
 )
 
-- 
Gitee


From e842f98fe595d6b16ee9d15385cd8685a64fbd31 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 7 May 2024 15:26:26 +0800
Subject: [PATCH 114/302] =?UTF-8?q?hdfs=E4=B8=ADread=E3=80=81write?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0=E5=8A=A0=E5=9B=BA=EF=BC=8C=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E4=BF=9D=E5=AD=98=E6=97=A5=E5=BF=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/emb_table/embedding_ddr.cpp     | 55 +++++++++++++-----------
 src/core/emb_table/embedding_dynamic.cpp | 24 ++++++-----
 src/core/emb_table/embedding_static.cpp  | 24 ++++++-----
 3 files changed, 57 insertions(+), 46 deletions(-)

diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 7226f849..ca48230b 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -324,24 +324,26 @@ void EmbeddingDDR::LoadKey(const string& savePath) {
 
     size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
     if (res == -1) {
         free(static_cast<void*>(buf));
-        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
         free(static_cast<void*>(buf));
-        throw runtime_error(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                fileSize, res, ss.str());
+        throw runtime_error(StringFormat(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
+                res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -356,9 +358,9 @@ void EmbeddingDDR::LoadKey(const string& savePath) {
         }
         if (keyCount > devVocabSize + hostVocabSize) {
             free(static_cast<void*>(buf));
-            throw runtime_error(
+            throw runtime_error(StringFormat(
                     "Error: Load keys failed. Load key size :{} exceeds the sum of device vocab size and host vocab size: {}.",
-                    keyCount, devVocabSize + hostVocabSize);
+                    keyCount, devVocabSize + hostVocabSize));
         } else if (keyCount < devVocabSize) {
             loadOffset.push_back(i);
             devOffset2Key[keyCount] = buf[i];
@@ -394,13 +396,13 @@ void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
     size_t readSize = hostLoadOffset.size() * embSize_ * sizeof(float);
     ssize_t res = fileSystemPtr->Read(embedStream.str(), table.embData, 0, hostLoadOffset, embSize_);
     if (res == -1) {
-        throw runtime_error("Error: Load embeddings failed. An error occurred while reading file: {}.",
-                            embedStream.str());
+        throw runtime_error(StringFormat("Error: Load embeddings failed. An error occurred while reading file: {}.",
+                                         embedStream.str()));
     }
     if (res != readSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Load embeddings failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                readSize, res, embedStream.str());
+                readSize, res, embedStream.str()));
     }
 
     // 读optim
@@ -411,13 +413,13 @@ void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
 
         ssize_t res = fileSystemPtr->Read(paramStream.str(), table.embData, optimIndex, hostLoadOffset, embSize_);
         if (res == -1) {
-            throw runtime_error("Error: Load optimizers failed. An error occurred while reading file: {}.",
-                                paramStream.str());
+            throw runtime_error(StringFormat("Error: Load optimizers failed. An error occurred while reading file: {}.",
+                                             paramStream.str()));
         }
         if (res != readSize) {
-            throw runtime_error(
+            throw runtime_error(StringFormat(
                     "Error: Load embeddings failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                    readSize, res, paramStream.str());
+                    readSize, res, paramStream.str()));
         }
         optimIndex++;
     }
@@ -452,12 +454,13 @@ void EmbeddingDDR::SaveKey(const string& savePath) {
     size_t writeSize = static_cast<size_t>(hostKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(hostKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str());
+                writeSize, res, ss.str()));
     }
 }
 
@@ -474,12 +477,13 @@ void EmbeddingDDR::SaveEmbData(const string& savePath)
     size_t writeSize = embSize_ * sizeof(float) * embContent.size();
     ssize_t res = fileSystemPtr->Write(ss.str(), embContent, embSize_ * sizeof(float));
     if (res == -1) {
-        throw runtime_error("Error: Save embeddings failed. An error occurred while writing file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Save embeddings failed. An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Save embeddings failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str());
+                writeSize, res, ss.str()));
     }
 }
 
@@ -498,12 +502,13 @@ void EmbeddingDDR::SaveOptimData(const string& savePath)
         ssize_t res = fileSystemPtr->Write(ss.str(), content.second, embSize_ * sizeof(float));
 
         if (res == -1) {
-            throw runtime_error("Error: Save optimizers failed. An error occurred while writing file: {}.", ss.str());
+            throw runtime_error(
+                    StringFormat("Error: Save optimizers failed. An error occurred while writing file: {}.", ss.str()));
         }
         if (res != writeSize) {
-            throw runtime_error(
+            throw runtime_error(StringFormat(
                     "Error: Save optimizers failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                    writeSize, res, ss.str());
+                    writeSize, res, ss.str()));
         }
     }
 }
diff --git a/src/core/emb_table/embedding_dynamic.cpp b/src/core/emb_table/embedding_dynamic.cpp
index f81f2ab7..a4562d10 100644
--- a/src/core/emb_table/embedding_dynamic.cpp
+++ b/src/core/emb_table/embedding_dynamic.cpp
@@ -153,12 +153,13 @@ void EmbeddingDynamic::SaveKey(const string& savePath)
     size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str());
+                writeSize, res, ss.str()));
     }
 }
 
@@ -255,22 +256,24 @@ int EmbeddingDynamic::LoadKey(const string& savePath)
 
     size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
     if (res == -1) {
-        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
-        throw runtime_error(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                fileSize, res, ss.str());
+        throw runtime_error(StringFormat(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
+                res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -288,7 +291,8 @@ int EmbeddingDynamic::LoadKey(const string& savePath)
     void *newBlock = nullptr;
     aclError ret = aclrtMalloc(&newBlock, static_cast<int>(datasetSize), ACL_MEM_MALLOC_HUGE_FIRST);
     if (ret != ACL_SUCCESS) {
-        throw runtime_error("Error: in dynamic expansion mode,  aclrtMalloc failed, malloc size: {}.", datasetSize);
+        throw runtime_error(
+                StringFormat("Error: in dynamic expansion mode,  aclrtMalloc failed, malloc size: {}.", datasetSize));
     }
     // 此处的 newBlock -> first address;
     // 对key_offset map 进行一个恢复操作
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index 3c741e46..caf15e7c 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -97,12 +97,13 @@ int EmbeddingStatic::SaveKey(const string& savePath)
     size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str());
+                writeSize, res, ss.str()));
     }
 }
 
@@ -121,22 +122,23 @@ void EmbeddingStatic::LoadKey(const string &savePath)
 
     size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t *>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
     if (res == -1) {
-        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
-        throw runtime_error(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                fileSize, res, ss.str());
+        throw runtime_error(StringFormat(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
+                res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -152,8 +154,8 @@ void EmbeddingStatic::LoadKey(const string &savePath)
 
     if (loadOffset.size() > devVocabSize) {
         free(static_cast<void*>(buf));
-        throw runtime_error("Error: Load keys failed. Load key size :{} exceeds device vocab size: {}.",
-                            loadOffset.size(), devVocabSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. Load key size :{} exceeds device vocab size: {}.",
+                                         loadOffset.size(), devVocabSize));
     }
 
     maxOffset = keyOffsetMap.size();
-- 
Gitee


From 741a8b70eaa547e407be3e49615f61a6666f196c Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 7 May 2024 15:26:26 +0800
Subject: [PATCH 115/302] =?UTF-8?q?hdfs=E4=B8=ADread=E3=80=81write?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0=E5=8A=A0=E5=9B=BA=EF=BC=8C=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E4=BF=9D=E5=AD=98=E6=97=A5=E5=BF=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/emb_table/embedding_ddr.cpp          | 55 ++++++++++---------
 src/core/emb_table/embedding_dynamic.cpp      | 24 ++++----
 src/core/emb_table/embedding_static.cpp       | 24 ++++----
 .../hdfs_file_system/hdfs_wrapper.h           |  8 +--
 4 files changed, 61 insertions(+), 50 deletions(-)

diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 7226f849..ca48230b 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -324,24 +324,26 @@ void EmbeddingDDR::LoadKey(const string& savePath) {
 
     size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
     if (res == -1) {
         free(static_cast<void*>(buf));
-        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
         free(static_cast<void*>(buf));
-        throw runtime_error(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                fileSize, res, ss.str());
+        throw runtime_error(StringFormat(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
+                res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -356,9 +358,9 @@ void EmbeddingDDR::LoadKey(const string& savePath) {
         }
         if (keyCount > devVocabSize + hostVocabSize) {
             free(static_cast<void*>(buf));
-            throw runtime_error(
+            throw runtime_error(StringFormat(
                     "Error: Load keys failed. Load key size :{} exceeds the sum of device vocab size and host vocab size: {}.",
-                    keyCount, devVocabSize + hostVocabSize);
+                    keyCount, devVocabSize + hostVocabSize));
         } else if (keyCount < devVocabSize) {
             loadOffset.push_back(i);
             devOffset2Key[keyCount] = buf[i];
@@ -394,13 +396,13 @@ void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
     size_t readSize = hostLoadOffset.size() * embSize_ * sizeof(float);
     ssize_t res = fileSystemPtr->Read(embedStream.str(), table.embData, 0, hostLoadOffset, embSize_);
     if (res == -1) {
-        throw runtime_error("Error: Load embeddings failed. An error occurred while reading file: {}.",
-                            embedStream.str());
+        throw runtime_error(StringFormat("Error: Load embeddings failed. An error occurred while reading file: {}.",
+                                         embedStream.str()));
     }
     if (res != readSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Load embeddings failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                readSize, res, embedStream.str());
+                readSize, res, embedStream.str()));
     }
 
     // 读optim
@@ -411,13 +413,13 @@ void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
 
         ssize_t res = fileSystemPtr->Read(paramStream.str(), table.embData, optimIndex, hostLoadOffset, embSize_);
         if (res == -1) {
-            throw runtime_error("Error: Load optimizers failed. An error occurred while reading file: {}.",
-                                paramStream.str());
+            throw runtime_error(StringFormat("Error: Load optimizers failed. An error occurred while reading file: {}.",
+                                             paramStream.str()));
         }
         if (res != readSize) {
-            throw runtime_error(
+            throw runtime_error(StringFormat(
                     "Error: Load embeddings failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                    readSize, res, paramStream.str());
+                    readSize, res, paramStream.str()));
         }
         optimIndex++;
     }
@@ -452,12 +454,13 @@ void EmbeddingDDR::SaveKey(const string& savePath) {
     size_t writeSize = static_cast<size_t>(hostKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(hostKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str());
+                writeSize, res, ss.str()));
     }
 }
 
@@ -474,12 +477,13 @@ void EmbeddingDDR::SaveEmbData(const string& savePath)
     size_t writeSize = embSize_ * sizeof(float) * embContent.size();
     ssize_t res = fileSystemPtr->Write(ss.str(), embContent, embSize_ * sizeof(float));
     if (res == -1) {
-        throw runtime_error("Error: Save embeddings failed. An error occurred while writing file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Save embeddings failed. An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Save embeddings failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str());
+                writeSize, res, ss.str()));
     }
 }
 
@@ -498,12 +502,13 @@ void EmbeddingDDR::SaveOptimData(const string& savePath)
         ssize_t res = fileSystemPtr->Write(ss.str(), content.second, embSize_ * sizeof(float));
 
         if (res == -1) {
-            throw runtime_error("Error: Save optimizers failed. An error occurred while writing file: {}.", ss.str());
+            throw runtime_error(
+                    StringFormat("Error: Save optimizers failed. An error occurred while writing file: {}.", ss.str()));
         }
         if (res != writeSize) {
-            throw runtime_error(
+            throw runtime_error(StringFormat(
                     "Error: Save optimizers failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                    writeSize, res, ss.str());
+                    writeSize, res, ss.str()));
         }
     }
 }
diff --git a/src/core/emb_table/embedding_dynamic.cpp b/src/core/emb_table/embedding_dynamic.cpp
index f81f2ab7..a4562d10 100644
--- a/src/core/emb_table/embedding_dynamic.cpp
+++ b/src/core/emb_table/embedding_dynamic.cpp
@@ -153,12 +153,13 @@ void EmbeddingDynamic::SaveKey(const string& savePath)
     size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str());
+                writeSize, res, ss.str()));
     }
 }
 
@@ -255,22 +256,24 @@ int EmbeddingDynamic::LoadKey(const string& savePath)
 
     size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
     if (res == -1) {
-        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
-        throw runtime_error(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                fileSize, res, ss.str());
+        throw runtime_error(StringFormat(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
+                res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -288,7 +291,8 @@ int EmbeddingDynamic::LoadKey(const string& savePath)
     void *newBlock = nullptr;
     aclError ret = aclrtMalloc(&newBlock, static_cast<int>(datasetSize), ACL_MEM_MALLOC_HUGE_FIRST);
     if (ret != ACL_SUCCESS) {
-        throw runtime_error("Error: in dynamic expansion mode,  aclrtMalloc failed, malloc size: {}.", datasetSize);
+        throw runtime_error(
+                StringFormat("Error: in dynamic expansion mode,  aclrtMalloc failed, malloc size: {}.", datasetSize));
     }
     // 此处的 newBlock -> first address;
     // 对key_offset map 进行一个恢复操作
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index 3c741e46..caf15e7c 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -97,12 +97,13 @@ int EmbeddingStatic::SaveKey(const string& savePath)
     size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error("Error: Save keys failed. An error occurred while writing file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(
+        throw runtime_error(StringFormat(
                 "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str());
+                writeSize, res, ss.str()));
     }
 }
 
@@ -121,22 +122,23 @@ void EmbeddingStatic::LoadKey(const string &savePath)
 
     size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t *>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
     if (res == -1) {
-        throw runtime_error("Error: Load keys failed. An error occurred while reading file: {}.", ss.str());
+        throw runtime_error(
+                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
-        throw runtime_error(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                fileSize, res, ss.str());
+        throw runtime_error(StringFormat(
+                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
+                res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -152,8 +154,8 @@ void EmbeddingStatic::LoadKey(const string &savePath)
 
     if (loadOffset.size() > devVocabSize) {
         free(static_cast<void*>(buf));
-        throw runtime_error("Error: Load keys failed. Load key size :{} exceeds device vocab size: {}.",
-                            loadOffset.size(), devVocabSize);
+        throw runtime_error(StringFormat("Error: Load keys failed. Load key size :{} exceeds device vocab size: {}.",
+                                         loadOffset.size(), devVocabSize));
     }
 
     maxOffset = keyOffsetMap.size();
diff --git a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
index 33f2738b..205e5365 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
+++ b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
@@ -145,7 +145,7 @@ namespace MxRec {
             tSize readBytes = 0;
 
             while (unReadLength != 0 && reTryCount < RETRY_COUNT) {
-                tSize offset = buffer + (length - unReadLength) / sizeof(char);
+                tSize offset = (length - unReadLength) / sizeof(char);
                 tSize res = hdfsRead(fs, file, buffer + offset, unReadLength);
                 if (res == -1) {
                     return res;
@@ -167,7 +167,7 @@ namespace MxRec {
             tSize readBytes = 0;
 
             while (unReadLength != 0 && reTryCount < RETRY_COUNT) {
-                tSize offset = buffer + (length - unReadLength) / sizeof(float);
+                tSize offset = (length - unReadLength) / sizeof(float);
                 tSize res = hdfsRead(fs, file, buffer + offset, unReadLength);
                 if (res == -1) {
                     return res;
@@ -188,7 +188,7 @@ namespace MxRec {
             tSize writeBytes = 0;
 
             while (unWriteLength != 0 && reTryCount < RETRY_COUNT) {
-                tSize offset = buffer + (length - unWriteLength) / sizeof(char);
+                tSize offset = (length - unWriteLength) / sizeof(char);
                 tSize res = hdfsWrite(fs, file, buffer + offset, unWriteLength);
                 if (res == -1) {
                     return res;
@@ -209,7 +209,7 @@ namespace MxRec {
             tSize writeBytes = 0;
 
             while (unWriteLength != 0 && reTryCount < RETRY_COUNT) {
-                tSize offset = buffer + (length - unWriteLength) / sizeof(float);
+                tSize offset = (length - unWriteLength) / sizeof(float);
                 tSize res = hdfsWrite(fs, file, buffer + offset, unWriteLength);
                 if (res == -1) {
                     return res;
-- 
Gitee


From 61525ff68b3798bf86f46fae48b3b4a964acffea Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 15:38:52 +0800
Subject: [PATCH 116/302] =?UTF-8?q?=E5=8F=82=E6=95=B0=E6=A0=A1=E9=AA=8C?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/optimizers/lazy_adam.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 81c8ecba..875f350f 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -42,7 +42,7 @@ from mx_rec.validator.validator import para_checker_decorator, StringValidator,
     ("beta2", FloatValidator, {"min_value": 0.0, "max_value": 1.0}, ["check_value"]),
     ("epsilon", FloatValidator, {"min_value": 0.0, "max_value": 1.0}, ["check_value_for_left_open_interval"]),
     ("name", StringValidator, {"min_len": 1, "max_len": 200}, ["check_string_length"]),
-    ("use_fusion_optim", ClassValidator, {"classes": (bool, type(None))}),
+    ("use_fusion_optim", ClassValidator, {"classes": (bool,)}),
 ])
 def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, name="LazyAdam",
                           use_fusion_optim=False):
@@ -178,7 +178,7 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
             nd_indices = tf.expand_dims(indices, 1)
             slot_m = self.get_slot(var, "m")
             slot_v = self.get_slot(var, "v")
-            output_m, output_v, output_var =\
+            output_m, output_v, output_var = \
                 import_host_pipeline_ops().lazy_adam(grad, nd_indices, slot_m, slot_v, var, learning_rate,
                                                      self._custom_initial_beta1, self._custom_initial_beta2,
                                                      self._custom_initial_epsilon)
-- 
Gitee


From 199e06dd2506248e9830d81edb3d0720c080be64 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 15:44:19 +0800
Subject: [PATCH 117/302] =?UTF-8?q?readme=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index 136a50e7..c42d1bfe 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -127,15 +127,15 @@ ENABLE_BINARY_PACKAGE设置为True。编译部署时可将算子的二进制部
 
 3.
 
-检查API执行需要的头文件和库文件是否自动生成，针对融合算子，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
+检查API执行需要的头文件和库文件是否自动生成，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
 aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
 
 注意：对于cust_op/fused_lazy_adam/run.sh脚本，安装算子后会删除构建目录。运行单算子测试时，需要屏蔽掉删除rm rf
 ./lazy_adam这一步，以确保前置条件3。
 
-### LazyAdam融合算子de AclNN调用实现
+### LazyAdam融合算子的AclNN调用实现
 
-针对LazyAdam融合算子，入口src/main.cpp中：
+调用入口在src/main.cpp中：
 
 1. InitResource函数：初始化AscendCL并运行管理资源申请，不用修改
 2. RunLookupOp运行算子：
-- 
Gitee


From fb792fec233d602971287ccbd7f5c06ff5ea0139 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 7 May 2024 08:05:17 +0000
Subject: [PATCH 118/302] =?UTF-8?q?!121=20=E8=9E=8D=E5=90=88=E7=AE=97?=
 =?UTF-8?q?=E5=AD=90=E9=80=82=E9=85=8D=20*=20readme=E4=BF=AE=E6=94=B9=20*?=
 =?UTF-8?q?=20=E5=8F=82=E6=95=B0=E6=A0=A1=E9=AA=8C=E4=BF=AE=E6=94=B9=20*?=
 =?UTF-8?q?=20=E4=BB=A3=E7=A0=81=E6=A3=80=E8=A7=86=E4=BF=AE=E6=94=B9=20*?=
 =?UTF-8?q?=201=E3=80=81=E8=9E=8D=E5=90=88=E7=AE=97=E5=AD=90readme?=
 =?UTF-8?q?=E8=84=9A=E6=9C=AC=E6=9B=B4=E6=96=B0=20*=201=E3=80=81readme?=
 =?UTF-8?q?=E8=84=9A=E6=9C=AC=E6=9B=B4=E6=96=B0=20*=201=E3=80=81aclnn=20cm?=
 =?UTF-8?q?ake=E4=BF=AE=E6=94=B9=20*=201=E3=80=81readme=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0=20*=201=E3=80=81readme=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build/gen_mxrec_tar_pkg.sh                    |  4 ++
 cust_op/fused_lazy_adam/README.md             | 40 +++++++++++++------
 .../aclnn_lazy_adam_test/src/CMakeLists.txt   |  2 +
 mx_rec/optimizers/lazy_adam.py                | 32 ++++++++++++---
 4 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/build/gen_mxrec_tar_pkg.sh b/build/gen_mxrec_tar_pkg.sh
index 72ccfe49..3b6a9713 100644
--- a/build/gen_mxrec_tar_pkg.sh
+++ b/build/gen_mxrec_tar_pkg.sh
@@ -56,6 +56,10 @@ function gen_tar_file()
   chmod 640 *.json
   chmod 550 op_host op_kernel op_host/* op_kernel/*
   cd -
+  cd ./build/"${pkg_dir}"/cust_op/
+  chmod 550 -R fused_lazy_adam
+  chmod 640 fused_lazy_adam/*.json
+  cd -
   cd ./build
   tar -zvcf "${release_tar}" "${pkg_dir}" || {
       warn "compression failed, packages might be broken"
diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index 42f5bfc9..c42d1bfe 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -16,17 +16,31 @@
 更多详情可以参考CANN官方的Ascend
 C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/operatordev/Ascendcopdevg/atlas_ascendc_10_0001.html)。
 
-## lazy_adam融合算子使用
+## LazyAdam融合算子使用
 
-1. 进入当前目录，执行指令进行编译和部署lazy_adam融合算子
+1. 上传fused_lazy_adam文件夹到目标环境，并进入当前目录，执行指令对lazy_adam融合算子进行编译和部署
 
-```
+```shell
 bash run.sh
 ```
 
-2. 模型py脚本中导入mxRec中的lazy_adam优化器。lazy_adam优化器使用知道参考mxRec用户指南。
+注：需先环境中设置CANN相关环境变量，再执行算子编译和安装指令。使用默认路径安装CANN时设置环境变量指令如下：
+
+```shell
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+```
+
+2. 模型脚本中创建lazy_adam优化器并指定使用融合算子实现。代码示例：
+
+```python
+from mx_rec.optimizers.lazy_adam import create_hash_optimizer
 
-## lazy_adam优化器同名融合算子lazy_adam
+# 创建lazy_adam优化器时增加"use_fusion_optim=True"参数，表示使用融合算子实现。use_fusion_optim参数默认值为False。
+# lazy_adam优化器详细使用指导请参考mxRec用户指南。
+sparse_optimizer = create_hash_optimizer(learning_rate=0.001, use_fusion_optim=True)
+```
+
+## LazyAdam融合算子介绍
 
 1. 算子分析
 
@@ -111,15 +125,17 @@ bash run.sh
 完成算子的编译部署，编译部署时需要开启算子的二进制编译功能：修改算子工程中的编译配置项文件CMakePresets.json，将
 ENABLE_BINARY_PACKAGE设置为True。编译部署时可将算子的二进制部署到当前环境，便于后续算子的调用。
 
-3. 检查API执行需要的头文件和库文件是否自动生成，针对mxRec，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
-   aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
+3.
+
+检查API执行需要的头文件和库文件是否自动生成，检查cust_op/fused_lazy_adam/lazy_adam/build_out/autogen目录下，是否有
+aclnn_lazy_adam.cpp和aclnn_lazy_adam.h等。
 
 注意：对于cust_op/fused_lazy_adam/run.sh脚本，安装算子后会删除构建目录。运行单算子测试时，需要屏蔽掉删除rm rf
 ./lazy_adam这一步，以确保前置条件3。
 
-### 融合算子 lazy_adam
+### LazyAdam融合算子的AclNN调用实现
 
-针对lazy_adam算子，入口src/main.cpp中：
+调用入口在src/main.cpp中：
 
 1. InitResource函数：初始化AscendCL并运行管理资源申请，不用修改
 2. RunLookupOp运行算子：
@@ -148,14 +164,14 @@ run.sh脚本依次执行：
 
 ### scripts脚本
 
-* gen_data.py：生成lazy_adam算子的输入数据和用于精度校验的golden数据，可自行修改测试相关dim参数。
-* verify_result.py：将算子的输出和脚本生成的golden数据进行精度比对，并输出比较结果。比对规则为：允许误差精度loss：1e-4
+* gen_data.py：生成LazyAdam融合算子的输入数据和用于精度校验的golden数据，可自行修改测试相关dim参数。
+* verify_result.py：将算子的输出和脚本生成的golden数据进行精度比对，并输出比较结果。比对规则为：允许误差精度loss：1e-6
 
 a) 绝对误差
 b) 相对误差
 c) 误差相对个数
 
 同时满足绝对误差不全小于loss，相对误差不全小于loss，且绝对误差和相对误差大于loss的个数都超过总数的1/loss，也就是
-1/10000（双万分之一），即认为算子精度不达标。其余情况均认为算子达标。
+1/1000000（百万分之一），即认为算子精度不达标。其余情况均认为算子达标。
 
 用户可自行修改允许精度误差范围loss。
\ No newline at end of file
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
index c4a727bf..c2366f4a 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/CMakeLists.txt
@@ -53,6 +53,8 @@ link_directories(
 
 add_executable(execute_op
         main.cpp
+        operator_desc.cpp
+        op_runner.cpp
         common.cpp
 )
 
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 1f491d14..875f350f 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -32,7 +32,8 @@ from tensorflow.python.training import slot_creator
 
 from mx_rec.optimizers.base import CustomizedOptimizer
 from mx_rec.util.initialize import ConfigInitializer
-from mx_rec.validator.validator import para_checker_decorator, StringValidator, FloatValidator
+from mx_rec.util.ops import import_host_pipeline_ops
+from mx_rec.validator.validator import para_checker_decorator, StringValidator, FloatValidator, ClassValidator
 
 
 @para_checker_decorator(check_option_list=[
@@ -40,9 +41,11 @@ from mx_rec.validator.validator import para_checker_decorator, StringValidator,
     ("beta1", FloatValidator, {"min_value": 0.0, "max_value": 1.0}, ["check_value_for_open_interval"]),
     ("beta2", FloatValidator, {"min_value": 0.0, "max_value": 1.0}, ["check_value"]),
     ("epsilon", FloatValidator, {"min_value": 0.0, "max_value": 1.0}, ["check_value_for_left_open_interval"]),
-    ("name", StringValidator, {"min_len": 1, "max_len": 200}, ["check_string_length"])
+    ("name", StringValidator, {"min_len": 1, "max_len": 200}, ["check_string_length"]),
+    ("use_fusion_optim", ClassValidator, {"classes": (bool,)}),
 ])
-def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, name="LazyAdam"):
+def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, name="LazyAdam",
+                          use_fusion_optim=False):
     """
     Args:
         learning_rate: learning rate
@@ -50,13 +53,14 @@ def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1
         beta2:
         epsilon:
         name:
-
+        use_fusion_optim: if use fused optimizer
     Returns: a customized optimizer instance
     """
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         raise ValueError("dynamic expansion mode is not compatible with the optimizer, please config dynamic "
                          "expansion mode and optimizer correctly")
-    optimizer = CustomizedLazyAdam(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon, name=name)
+    optimizer = CustomizedLazyAdam(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon, name=name,
+                                   use_fusion_optim=use_fusion_optim)
     ConfigInitializer.get_instance().optimizer_config.optimizer_instance = optimizer
     return optimizer
 
@@ -64,10 +68,16 @@ def create_hash_optimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1
 class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
     name_counter = defaultdict(int)
 
-    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, use_locking=False, name="LazyAdam"):
+    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, use_locking=False, name="LazyAdam",
+                 use_fusion_optim=False):
         self.optimizer_type = "LazyAdam"
         self.optim_param_list = ["momentum", "velocity"]
         self.config_instance = ConfigInitializer.get_instance()
+        self.use_fusion_optim = use_fusion_optim
+        if self.use_fusion_optim:
+            self._custom_initial_beta1 = beta1
+            self._custom_initial_beta2 = beta2
+            self._custom_initial_epsilon = epsilon
         super(CustomizedLazyAdam, self)._get_name(name=name)
         super(CustomizedLazyAdam, self).__init__(learning_rate=learning_rate, beta1=beta1, beta2=beta2,
                                                  epsilon=epsilon, use_locking=use_locking, name=self.unique_name)
@@ -164,6 +174,16 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         temp_epsilon = temp.get("temp_epsilon")
         learning_rate = tf.divide(temp_lr * math_ops.sqrt(1 - power_b2), (1 - power_b1))
 
+        if self.use_fusion_optim:
+            nd_indices = tf.expand_dims(indices, 1)
+            slot_m = self.get_slot(var, "m")
+            slot_v = self.get_slot(var, "v")
+            output_m, output_v, output_var = \
+                import_host_pipeline_ops().lazy_adam(grad, nd_indices, slot_m, slot_v, var, learning_rate,
+                                                     self._custom_initial_beta1, self._custom_initial_beta2,
+                                                     self._custom_initial_epsilon)
+            return control_flow_ops.group(output_m, output_v, output_var)
+
         abs_indices = tf.math.maximum(indices, 0)
         nd_indices = tf.expand_dims(indices, 1)
 
-- 
Gitee


From 5e6bd96e69ca5893cf07511522b6a3f68ce4c59f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Tue, 7 May 2024 13:46:16 +0000
Subject: [PATCH 119/302] =?UTF-8?q?!81=20create=5Ftable=E6=8E=A5=E5=8F=A3?=
 =?UTF-8?q?=E4=B8=8E=E4=BC=98=E5=8C=96=E5=99=A8=E5=88=9B=E5=BB=BA=E8=A7=A3?=
 =?UTF-8?q?=E8=80=A6=EF=BC=88=E4=B8=8D=E4=BC=A0=E5=85=A5=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=99=A8=E5=8F=82=E6=95=B0=EF=BC=89=20*=20=E3=80=90=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91clean=20code?=
 =?UTF-8?q?=E5=92=8C=E8=85=BE=E8=AE=AFeval=E9=83=A8=E5=88=86=E6=94=B9?=
 =?UTF-8?q?=E5=9B=BE=E7=9A=84=E4=BF=AE=E6=94=B9=20*=20Merge=20remote-track?=
 =?UTF-8?q?ing=20branch=20'upstream/develop'=20into=20develop-ddr-witho?=
 =?UTF-8?q?=E2=80=A6=20*=20Merge=20remote-tracking=20branch=20'upstream/de?=
 =?UTF-8?q?velop'=20into=20develop-ddr-witho=E2=80=A6=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91slo?=
 =?UTF-8?q?t=E5=92=8Cderivative=E7=A7=BB=E8=87=B3=E4=B8=8A=E5=B1=82base=20?=
 =?UTF-8?q?*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modificatio?=
 =?UTF-8?q?n=E3=80=91create=5Ftable=E6=8E=A5=E5=8F=A3=E4=B8=8E=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=E5=99=A8=E5=88=9B=E5=BB=BA=E8=A7=A3=E8=80=A6=20*=20Me?=
 =?UTF-8?q?rge=20remote-tracking=20branch=20'origin/develop-global-unique'?=
 =?UTF-8?q?=20into=20devel=E2=80=A6=20*=20Merge=20remote-tracking=20branch?=
 =?UTF-8?q?=20'upstream/develop'=20into=20develop-ddr-witho=E2=80=A6=20*?=
 =?UTF-8?q?=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr?=
 =?UTF-8?q?=20without=20optimizer=20for=20fp=20*=20=E3=80=90=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr=20withou?=
 =?UTF-8?q?t=20optimizer=20for=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr=20without=20optim?=
 =?UTF-8?q?izer=20for=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91ddr=20without=20optimizer=20fo?=
 =?UTF-8?q?r=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Mo?=
 =?UTF-8?q?dification=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20Merge=20rem?=
 =?UTF-8?q?ote-tracking=20branch=20'upstream/develop'=20into=20develop-ddr?=
 =?UTF-8?q?-witho=E2=80=A6=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91ddr=20without=20optimizer=20fo?=
 =?UTF-8?q?r=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Mo?=
 =?UTF-8?q?dification=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr?=
 =?UTF-8?q?=20without=20optimizer=20for=20fp=20*=20=E3=80=90=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr=20withou?=
 =?UTF-8?q?t=20optimizer=20for=20fp=20*=20Merge=20remote-tracking=20branch?=
 =?UTF-8?q?=20'upstream/develop'=20into=20develop-ddr-witho=E2=80=A6=20*?=
 =?UTF-8?q?=20Merge=20remote-tracking=20branch=20'origin/develop-ddr-witho?=
 =?UTF-8?q?ut-optimizer'=20in=E2=80=A6=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr=20without=20optim?=
 =?UTF-8?q?izer=20for=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91ddr=20without=20optimizer=20fo?=
 =?UTF-8?q?r=20fp?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py                  |   2 -
 examples/demo/little_demo/main.py             |   5 +-
 examples/demo/little_demo/run.sh              |   2 +-
 .../little_demo_estimator/nn_model_build.py   |   9 +-
 .../demo/little_demo_estimator/nn_optim.py    |  12 --
 examples/dlrm/model/gradient_descent_w.py     |   8 --
 examples/dlrm/model/main_mxrec.py             |   2 -
 mx_rec/core/asc/build_graph.py                |  58 ++------
 mx_rec/core/asc/manager.py                    |  11 +-
 mx_rec/core/asc/swap_args.py                  |  57 ++++++++
 mx_rec/core/emb/base_sparse_embedding.py      |  61 ++++----
 mx_rec/core/emb/dynamic_sparse_embedding.py   |  35 -----
 mx_rec/core/emb/sparse_embedding.py           |  76 ----------
 mx_rec/core/embedding.py                      |   6 +-
 mx_rec/graph/modifier.py                      | 131 +++++++++++++++++-
 mx_rec/graph/utils.py                         |  49 +++++++
 mx_rec/optimizers/adagrad.py                  |  25 ----
 mx_rec/optimizers/base.py                     |   6 -
 mx_rec/optimizers/emb_optimizer.py            |  76 ----------
 mx_rec/optimizers/ftrl.py                     |  25 ----
 mx_rec/optimizers/gradient_descent.py         |   3 -
 mx_rec/optimizers/gradient_descent_by_addr.py |   3 -
 mx_rec/optimizers/lazy_adam.py                |  29 ----
 mx_rec/util/variable.py                       |   7 +-
 tests/mx_rec/core/mock_class.py               |  19 ---
 tests/mx_rec/core/test_build_graph.py         |  88 ++----------
 tests/mx_rec/core/test_embedding.py           |  25 +---
 tests/mx_rec/core/test_manager.py             |   4 -
 tests/mx_rec/graph/test_modifier.py           |  21 ++-
 tests/mx_rec/saver/sparse_embedding_mock.py   |   7 -
 tests/mx_rec/saver/test_saver.py              |   5 +-
 tests/mx_rec/util/test_variable.py            |  11 +-
 tools/atomic/sparse_lookup_with_grad.py       |   1 -
 33 files changed, 318 insertions(+), 561 deletions(-)
 create mode 100644 mx_rec/core/asc/swap_args.py
 delete mode 100644 mx_rec/optimizers/emb_optimizer.py

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index 5e4efe02..eb1d91ea 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -294,7 +294,6 @@ if __name__ == "__main__":
         cfg.dev_vocab_size = cfg.dev_vocab_size // 2
 
     optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
-    sparse_optimizer_list = [sparse_optimizer for dense_optimizer, sparse_optimizer in optimizer_list]
 
     # note: variance_scaling_initializer only support HBM mode
     emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=SPARSE_HASHTABLE_SEED) \
@@ -305,7 +304,6 @@ if __name__ == "__main__":
         dim=tf.TensorShape([cfg.emb_dim]),
         name="sparse_embeddings",
         emb_initializer=emb_initializer,
-        optimizer_list=[sparse_optimizer_list[0]._optimizer],
         **cfg.get_emb_table_cfg()
     )
     if use_faae:
diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index 14b2e065..a6ef96fc 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -246,7 +246,6 @@ if __name__ == "__main__":
         eval_feature_spec_list = create_feature_spec_list(use_timestamp=USE_TIMESTAMP)
 
     optimizer_list = [create_dense_and_sparse_optimizer(cfg)]
-    sparse_optimizer_list = [sparse_optimizer for dense_optimizer, sparse_optimizer in optimizer_list]
 
     # 如需验证DDR模式，请按照key数量、batch unique数量合理设置device与host表大小。
     # 验证DDR的配置参考：建议跑dynamic避免调参。数据集key总量大于device表，小于device+host；一个batch的unique key数量小于device表。
@@ -273,7 +272,6 @@ if __name__ == "__main__":
                                   dim=tf.TensorShape([cfg.user_hashtable_dim]),
                                   name='user_table',
                                   emb_initializer=emb_initializer,
-                                  optimizer_list=sparse_optimizer_list,
                                   all2all_gradients_op="sum_gradients_and_div_by_ranksize",
                                   **cache_mode_dict[cache_mode])
 
@@ -281,7 +279,6 @@ if __name__ == "__main__":
                                   dim=tf.TensorShape([cfg.item_hashtable_dim]),
                                   name='item_table',
                                   emb_initializer=emb_initializer,
-                                  optimizer_list=sparse_optimizer_list,
                                   **cache_mode_dict[cache_mode])
 
     # 在predict的场景下，train model不需要被执行
@@ -300,7 +297,7 @@ if __name__ == "__main__":
                                                         batch_number=MAX_DATASET_GENERATE * get_rank_size())
     dense_variables, sparse_variables = get_dense_and_sparse_variable()
 
-    params = {"train_batch": train_batch, "eval_batch": eval_batch, "use_one_shot": USE_ONE_SHOT, 
+    params = {"train_batch": train_batch, "eval_batch": eval_batch, "use_one_shot": USE_ONE_SHOT,
               "use_deterministic": USE_DETERMINISTIC}
     run_mode = RunMode(
         MODIFY_GRAPH_FLAG, USE_TIMESTAMP, table_list, optimizer_list, train_model, eval_model, train_iterator,
diff --git a/examples/demo/little_demo/run.sh b/examples/demo/little_demo/run.sh
index 9462a0cb..d585be02 100644
--- a/examples/demo/little_demo/run.sh
+++ b/examples/demo/little_demo/run.sh
@@ -106,7 +106,7 @@ export USE_DYNAMIC=1            # 0：静态shape；1：动态shape
 export USE_DYNAMIC_EXPANSION=0  # 0：关闭动态扩容；1: 开启动态扩容
 export USE_MULTI_LOOKUP=1       # 0：一表一查；1：一表多查
 export MULTI_LOOKUP_TIMES=2     # 一表多查次数：默认2，上限127（因为一表已经有一查）；仅当export USE_MULTI_LOOKUP=1时生效
-export USE_MODIFY_GRAPH=0       # 0：feature spec模式；1：自动改图模式
+export USE_MODIFY_GRAPH=1       # 0：feature spec模式；1：自动改图模式
 export USE_TIMESTAMP=0          # 0：关闭特征准入淘汰；1：开启特征准入淘汰
 export USE_ONE_SHOT=0           # 0：MakeIterator；1：OneShotIterator
 export UpdateEmb_V2=1           # 0: UpdateEmb同步更新；1：UpdateEmb_V2异步更新
diff --git a/examples/demo/little_demo_estimator/nn_model_build.py b/examples/demo/little_demo_estimator/nn_model_build.py
index 11faadf1..aeeab8f8 100644
--- a/examples/demo/little_demo_estimator/nn_model_build.py
+++ b/examples/demo/little_demo_estimator/nn_model_build.py
@@ -21,7 +21,6 @@ from mx_rec.util.tf_version_adapter import npu_ops
 from mx_rec.core.embedding import create_table, sparse_lookup
 from mx_rec.constants.constants import ASCEND_TIMESTAMP
 
-from nn_optim import get_dense_and_sparse_optimizer
 from utils import FeatureSpecIns
 
 
@@ -137,22 +136,18 @@ class LittleModel:
         return logit_list
 
     def _get_embedding_list(self):
-        optimizer_list = [get_dense_and_sparse_optimizer(self.cfg)]
-        sparse_optimizer_list = [sparse_optimizer for dense_optimizer, sparse_optimizer in optimizer_list]
         user_hashtable = create_table(key_dtype=tf.int64,
                                       dim=tf.TensorShape([self.cfg.user_hashtable_dim]),
                                       name='user_table',
                                       emb_initializer=tf.compat.v1.truncated_normal_initializer(),
                                       device_vocabulary_size=self.cfg.user_vocab_size * 10,
-                                      host_vocabulary_size=self.cfg.user_vocab_size * 0,
-                                      optimizer_list=sparse_optimizer_list)
+                                      host_vocabulary_size=self.cfg.user_vocab_size * 0)
         item_hashtable = create_table(key_dtype=tf.int64,
                                       dim=tf.TensorShape([self.cfg.item_hashtable_dim]),
                                       name='item_table',
                                       emb_initializer=tf.compat.v1.truncated_normal_initializer(),
                                       device_vocabulary_size=self.cfg.item_vocab_size * 10,
-                                      host_vocabulary_size=self.cfg.item_vocab_size * 0,
-                                      optimizer_list=sparse_optimizer_list)
+                                      host_vocabulary_size=self.cfg.item_vocab_size * 0)
 
         if self.params.modify_graph:
             if not self.params.enable_slicer_test:
diff --git a/examples/demo/little_demo_estimator/nn_optim.py b/examples/demo/little_demo_estimator/nn_optim.py
index 4d519366..d07556a6 100644
--- a/examples/demo/little_demo_estimator/nn_optim.py
+++ b/examples/demo/little_demo_estimator/nn_optim.py
@@ -28,18 +28,6 @@ from mx_rec.optimizers.gradient_descent_by_addr import create_hash_optimizer_by_
 from mx_rec.util.log import logger
 
 
-def get_dense_and_sparse_optimizer(cfg):
-    dense_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=cfg.learning_rate)
-    if ConfigInitializer.get_instance().use_dynamic_expansion:
-        sparse_optimizer = create_hash_optimizer_by_addr(learning_rate=cfg.learning_rate)
-        logger.info("optimizer create_hash_optimizer_by_addr")
-    else:
-        sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate)
-        logger.info("optimizer create_hash_optimizer")
-
-    return dense_optimizer, sparse_optimizer
-
-
 def get_train_op_list(losses, learning_rate):
     train_ops_list = []
     update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
diff --git a/examples/dlrm/model/gradient_descent_w.py b/examples/dlrm/model/gradient_descent_w.py
index a2a5635a..53adb996 100644
--- a/examples/dlrm/model/gradient_descent_w.py
+++ b/examples/dlrm/model/gradient_descent_w.py
@@ -50,14 +50,6 @@ class CustomizedGradientDescentWithWeighDecay(gradient_descent.GradientDescentOp
         self._slot_num = 0
         self._derivative = 1
 
-    def initialize_slots(self, var, table_instance):
-        logger.info("no slot for gradient descent")
-        return []
-
-    def insert_slot(self, slot, named_slots_key, slot_name):
-        logger.info("no slot for gradient descent")
-        return dict()
-
     def get_slot_init_values(self):
         logger.info("no slot for gradient descent")
         return []
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 8c4cdd7e..3464f84e 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -298,7 +298,6 @@ if __name__ == "__main__":
         cfg.dev_vocab_size = cfg.dev_vocab_size // 2
 
     optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
-    sparse_optimizer_list = [sparse_optimizer for dense_optimizer, sparse_optimizer in optimizer_list]
 
     # note: variance_scaling_initializer only support HBM mode
     emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed) \
@@ -309,7 +308,6 @@ if __name__ == "__main__":
         dim=tf.TensorShape([cfg.emb_dim]),
         name="sparse_embeddings",
         emb_initializer=emb_initializer,
-        optimizer_list=[sparse_optimizer_list[0]._optimizer],
         **cfg.get_emb_table_cfg()
     )
     if use_faae:
diff --git a/mx_rec/core/asc/build_graph.py b/mx_rec/core/asc/build_graph.py
index 2bb72621..82e40b29 100644
--- a/mx_rec/core/asc/build_graph.py
+++ b/mx_rec/core/asc/build_graph.py
@@ -23,6 +23,7 @@ import mxrec_pybind
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.tf_version_adapter import npu_ops
 from mx_rec.util.log import logger
+from mx_rec.core.asc.swap_args import SwapArgs, SwapDataType
 
 
 def get_restore_vector(config):
@@ -38,7 +39,7 @@ def get_restore_vector(config):
             raise TypeError("ext_emb_size must be a int")
         if config.get("ext_emb_size") < 1:
             raise ValueError("ext_emb_size is less than 1")
-        emb_size = config.get("ext_emb_size")
+        emb_size = None
 
     if ConfigInitializer.get_instance().use_static:
         restore_size = config.get("batch_size") * config.get("feat_cnt")
@@ -46,8 +47,7 @@ def get_restore_vector(config):
         restore_size = None
 
     with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
-        device_id = int(config.get("device_id"))
-        hot_size = int(mxrec_pybind.get_ub_hot_size(device_id) / emb_size)
+        hot_size = None
         restore_vector, hot_pos = npu_ops.gen_npu_ops.get_next(
             output_types=[tf.int32, tf.int32],
             output_shapes=[restore_size, [hot_size]],
@@ -103,49 +103,6 @@ def get_all2all_args(use_static: bool, config: dict) -> Optional[list]:
     return all2all_args
 
 
-def get_swap_info(config: dict, swap_len: int, swap_pos: list, table: tf.Variable) -> list:
-    """
-    Get swap info if threshold is configured.
-    :param config: training job config
-    :param swap_len: swap length
-    :param swap_pos: swap position
-    :param table: the instance to do swap
-    :return: swap info
-    """
-    use_static = ConfigInitializer.get_instance().use_static
-    max_lookup_vec_size = None
-    if use_static:
-        max_lookup_vec_size = config.get("send_count") * config.get("rank_size")
-
-    if config.get("is_hbm"):
-        swap_in = [tf.no_op()]
-    else:
-        with tf.compat.v1.variable_scope("h2d_emb"):
-            logger.debug('Channel %s_h2d_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
-            h2d_emb = npu_ops.gen_npu_ops.get_next(
-                output_types=[tf.float32],
-                output_shapes=[[max_lookup_vec_size, config.get("ext_emb_size")]],
-                channel_name=f'{config.get("table_name")}_h2d_{config.get("channel_id")}')[0]
-        logger.debug("h2d_emb shape: %s", h2d_emb)
-        if not isinstance(table, list):
-            raise RuntimeError("When enable emb_transfer, optimizer should have slots")
-        if use_static:
-            swap_pos = swap_pos[0:swap_len]
-            h2d_emb = h2d_emb[0:swap_len, :]
-        swap_outs = [tf.gather(one_table, swap_pos) for one_table in table]
-        swap_out = tf.concat(swap_outs, axis=1)
-        logger.debug('Channel %s_d2h_%s was built for op outfeed.', config.get("table_name"), config.get("channel_id"))
-        swap_out_op = npu_ops.outfeed_enqueue_op(
-            channel_name=f'{config.get("table_name")}_d2h_{config.get("channel_id")}', inputs=[swap_out])
-        with tf.control_dependencies([swap_out_op]):
-            nd_swap_pos = tf.expand_dims(swap_pos, 1)
-            table_num = len(table)
-            h2d_emb_split = tf.split(h2d_emb, table_num, axis=1)
-            swap_in = [tf.compat.v1.scatter_nd_update(table[i], nd_swap_pos, h2d_emb_split[i])
-                       for i in range(len(table))]
-    return swap_in
-
-
 def get_preprocessed_tensor_for_asc(table, config):
     use_static = ConfigInitializer.get_instance().use_static
     max_lookup_vec_size = None
@@ -158,15 +115,18 @@ def get_preprocessed_tensor_for_asc(table, config):
     with tf.compat.v1.variable_scope("id_offsets"):
         id_offsets, swap_pos, swap_len = get_id_offsets(max_lookup_vec_size, config)
 
-    all2all_args = get_all2all_args(use_static, config)
+    if not config.get("is_hbm"):
+        # 一表多查时，会多次进入get_preprocessed_tensor_for_asc，最后一次大查询替换map的key-value即可
+        swap_args = SwapArgs()
+        swap_args.set_data(SwapDataType.CONFIG.value, var_name=config.get("table_name"),
+                           var_channel=config.get("channel_id"), config=config, swap_pos=swap_pos, swap_len=swap_len)
 
-    swap_in = get_swap_info(config, swap_len, swap_pos, table)
+    all2all_args = get_all2all_args(use_static, config)
 
     result = {
         'restore_vector': restore_vector,
         'hot_pos': hot_pos,
         'id_offsets': id_offsets,
-        'swap_in': swap_in,
         'all2all_args': all2all_args,
     }
 
diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index 64611295..8b62b66b 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -37,16 +37,10 @@ def generate_table_info_list():
         raise ValueError(f"The DDR mode of all tables must be used or not used at the same time. However, is_hbm "
                          f"of each table `{table_instance_dict.keys()}` is `{is_hbm_list}`.")
 
-    optimizer = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
     # generate table info
     dangling_table = check_dangling_table()
 
     for _, table_instance in ConfigInitializer.get_instance().sparse_embed_config.table_instance_dict.items():
-        # When dynamic expansion mode, ext_emb_size is set by optimizer
-        if ConfigInitializer.get_instance().use_dynamic_expansion and optimizer:
-            table_instance.ext_emb_size = table_instance.emb_size * (1 + optimizer.slot_num)
-            logger.debug("ext_emb_size is reset to be %s for EmbInfo", table_instance.ext_emb_size)
-
         skip = should_skip(table_instance.table_name)
         if table_instance.table_name in dangling_table or skip:
             logger.info("skip table %s: %s which does not need to be provided to the EmbInfo.",
@@ -158,9 +152,8 @@ def matched_opt_slot_initializers(table_instance):
         slot_initializers.append(slot_initializer)
         start_index += table_instance.emb_size
 
-    logger.debug("matched_opt_slot_initializers, ext emb size:%s, optimizer_instance_list size:%s, "
-                 "slot_initializers size:%s", table_instance.ext_emb_size, len(table_instance.optimizer_instance_list),
-                 len(slot_initializers))
+    logger.debug("matched_opt_slot_initializers, ext emb size:%s, slot_initializers size:%s",
+                 table_instance.ext_emb_size, len(slot_initializers))
     return slot_initializers
 
 
diff --git a/mx_rec/core/asc/swap_args.py b/mx_rec/core/asc/swap_args.py
new file mode 100644
index 00000000..4494cc26
--- /dev/null
+++ b/mx_rec/core/asc/swap_args.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import functools
+from collections import defaultdict
+from enum import Enum
+
+
+class SwapDataType(Enum):
+    CONFIG = "config"
+    CONTROL = "control"
+
+
+def singleton(cls):
+    _instance = {}
+
+    def inner():
+        if cls not in _instance:
+            _instance[cls] = cls()
+        return _instance[cls]
+
+    return inner
+
+
+@singleton
+class SwapArgs:
+    def __init__(self):
+        self.swap_config_dict = defaultdict(dict)
+        self.swap_control_dict = defaultdict(dict)
+
+    def set_data(self, data_type: str, **kwargs):
+        if "var_name" not in kwargs:
+            raise ValueError("Missing Required key: var_name")
+        if "var_channel" not in kwargs:
+            raise ValueError("Missing Required key: var_channel")
+        var_name = kwargs.pop("var_name")
+        var_channel = kwargs.pop("var_channel")
+
+        if data_type == SwapDataType.CONFIG.value:
+            self.swap_config_dict[var_name][var_channel] = kwargs
+        elif data_type == SwapDataType.CONTROL.value:
+            self.swap_control_dict[var_name][var_channel] = kwargs
+        else:
+            raise ValueError(f"Error data type in swap args: {data_type}")
diff --git a/mx_rec/core/emb/base_sparse_embedding.py b/mx_rec/core/emb/base_sparse_embedding.py
index 07dc70f7..2a52b3a6 100644
--- a/mx_rec/core/emb/base_sparse_embedding.py
+++ b/mx_rec/core/emb/base_sparse_embedding.py
@@ -10,7 +10,9 @@ import tensorflow as tf
 from tensorflow.python.ops import array_ops
 
 from mx_rec.constants.constants import All2allGradientsOp, ASCEND_SPARSE_LOOKUP_ENTRANCE, ASCAnchorAttr
+from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 from mx_rec.core.asc.feature_spec import set_temporary_feature_spec_attribute, get_feature_spec, FeatureSpec
+from mx_rec.core.asc.swap_args import SwapArgs, SwapDataType
 from mx_rec.util.communication.hccl_ops import get_rank_size, get_rank_id, get_device_id
 from mx_rec.util.tf_version_adapter import hccl_ops
 from mx_rec.util.initialize import ConfigInitializer
@@ -81,14 +83,6 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
                 ConfigInitializer.get_instance().train_params_config.ascend_global_hashtable_collection, self._variable)
         self._set_ext_emb_size()
 
-    @property
-    def optimizer_instance_list(self):
-        return []
-
-    @property
-    def optimizer(self):
-        return dict()
-
     @property
     def embedding_size(self):
         return self._embedding_size
@@ -117,6 +111,10 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
     def send_count(self):
         return self._send_count
 
+    @property
+    def rank_size(self):
+        return self._rank_size
+
     @property
     def slice_device_vocabulary_size(self):
         return self._slice_device_vocabulary_size
@@ -201,35 +199,11 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
         """
         pass
 
-    @abc.abstractmethod
-    def set_optimizer(self, key: str, state_dict: dict):
-        """
-        设置optimizer state.
-
-        Args:
-            key: 优化器名字
-            state_dict: optimizer state
-
-        Returns: None
-        """
-        pass
 
     @abc.abstractmethod
     def _set_slice_vocab_size(self):
         pass
 
-    @abc.abstractmethod
-    def _set_ext_emb_size(self):
-        pass
-
-    @abc.abstractmethod
-    def _build_optimizer_states(self):
-        pass
-
-    @abc.abstractmethod
-    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, is_training: bool, send_count: Optional[int]) -> dict:
-        pass
-
     @abc.abstractmethod
     def _get_update_grad(self, local_grad: tf.Tensor, result: dict,
                          table: Union[tf.compat.v1.Variable, tf.Tensor]) -> Union[tf.IndexedSlices, tf.Tensor]:
@@ -289,6 +263,19 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
         """
         self._multi_lookup_times[is_training] = self._multi_lookup_times.get(is_training) + 1
 
+    def _set_ext_emb_size(self):
+        # 初始设置_ext_emb_size等于_emb_size，改图阶段会根据优化器的不同而exchange该值
+        self._ext_emb_size = self._emb_size * self._ext_coefficient
+        logger.debug("Init table, ext_emb_size is set to be %s.", self._ext_emb_size)
+
+    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, channel_id: int, send_count: Optional[int]) -> dict:
+        config = dict(batch_size=feature_spec.batch_size, feat_cnt=feature_spec.feat_cnt, send_count=send_count,
+                      rank_size=self._rank_size, channel_id=channel_id, table_name=self._table_name,
+                      is_hbm=self._is_hbm, ext_emb_size=self._ext_emb_size, emb_size=self._emb_size,
+                      use_dynamic_expansion=ConfigInitializer.get_instance().use_dynamic_expansion)
+
+        return get_preprocessed_tensor_for_asc(self._variable, config)
+
     def lookup(self, ids: tf.Tensor, send_count: Optional[int], **kwargs) -> tf.Tensor:
         """
         稀疏表的lookup，自动改图模式.
@@ -409,7 +396,8 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
         check_emb_lookup_params(hashtable_params, feature_spec, send_count, is_training)
         if ConfigInitializer.get_instance().use_static:
             self._send_count = send_count
-        result = self._get_preprocessed_tensor(feature_spec, is_training, send_count)
+        channel_id = ConfigInitializer.get_instance().train_params_config.get_training_mode_channel_id(is_training)
+        result = self._get_preprocessed_tensor(feature_spec, channel_id, send_count)
 
         @tf.custom_gradient
         def sparse_forward(table):
@@ -469,7 +457,11 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
 
             return array_ops.reshape(embeddings, dest_shape), grad
 
-        with tf.control_dependencies(result.get("swap_in")):
+        ddr_control_ops = tf.no_op(name="place_holder_swap_op")
+        swap_args = SwapArgs()
+        swap_args.set_data(SwapDataType.CONTROL.value, var_name=self._table_name, var_channel=channel_id,
+                           control_ops=ddr_control_ops)
+        with tf.control_dependencies([ddr_control_ops]):
             return self._get_sparse_forward_result(sparse_forward, self._variable, result, is_training)
 
     def __initialize_variables(self):
@@ -481,7 +473,6 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
         ConfigInitializer.get_instance().sparse_embed_config.insert_removing_var_list(self._variable.name)
 
         self.__record()
-        self._build_optimizer_states()
 
     def __record(self, eval_flag=False):
         ConfigInitializer.get_instance().sparse_embed_config.insert_table_instance(
diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index 671c593e..49979261 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -10,7 +10,6 @@ import tensorflow as tf
 from mx_rec.constants.constants import ASCEND_TABLE_NAME_MUST_CONTAIN, ASCEND_SPARSE_LOOKUP_LOCAL_EMB, \
      ASCEND_SPARSE_LOOKUP_ID_OFFSET
 from mx_rec.core.asc.feature_spec import FeatureSpec
-from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
@@ -28,26 +27,10 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
     def capacity(self) -> int:
         return ConfigInitializer.get_instance().hybrid_manager_config.asc_manager.get_table_capacity(self._table_name)
 
-    @abc.abstractmethod
-    def set_optimizer(self, key: str, state_dict: dict):
-        pass
-
-    @abc.abstractmethod
-    def _build_optimizer_states(self):
-        pass
-
-    @abc.abstractmethod
-    def _set_ext_emb_size(self):
-        pass
-
     @abc.abstractmethod
     def _set_slice_vocab_size(self):
         pass
 
-    @abc.abstractmethod
-    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, is_training: bool, send_count: Optional[int]) -> dict:
-        pass
-
     def _get_update_grad(self, local_grad: tf.Tensor, result: dict,
                          table: Union[tf.compat.v1.Variable, tf.Tensor]) -> Union[tf.IndexedSlices, tf.Tensor]:
         return local_grad
@@ -81,25 +64,7 @@ class HBMDynamicSparseEmbedding(DynamicSparseEmbedding):
     def __init__(self, config: dict):
         super(DynamicSparseEmbedding, self).__init__(config)
 
-    def set_optimizer(self, key: str, state_dict: dict):
-        pass
-
-    def _build_optimizer_states(self):
-        pass
-
-    def _set_ext_emb_size(self):
-        self._ext_emb_size = self._emb_size * self._ext_coefficient
-        logger.debug("init table, ext_emb_size is set to be %s.", self._ext_emb_size)
-
     def _set_slice_vocab_size(self):
         # 动态扩容模式下，保留device侧variable，大小设置为1
         self._slice_device_vocabulary_size = 1
 
-    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, is_training: bool, send_count: Optional[int]) -> dict:
-        channel_id = ConfigInitializer.get_instance().train_params_config.get_training_mode_channel_id(is_training)
-        config = dict(batch_size=feature_spec.batch_size, feat_cnt=feature_spec.feat_cnt, send_count=send_count,
-                      rank_size=self._rank_size, channel_id=channel_id, table_name=self._table_name,
-                      is_hbm=self._is_hbm, ext_emb_size=self._ext_emb_size,
-                      emb_size=self._emb_size, device_id=self._device_id, use_dynamic_expansion=True)
-
-        return get_preprocessed_tensor_for_asc(self._variable, config)
diff --git a/mx_rec/core/emb/sparse_embedding.py b/mx_rec/core/emb/sparse_embedding.py
index 938f917d..071f4506 100644
--- a/mx_rec/core/emb/sparse_embedding.py
+++ b/mx_rec/core/emb/sparse_embedding.py
@@ -11,10 +11,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 
 from mx_rec.core.asc.feature_spec import FeatureSpec
-from mx_rec.core.asc.build_graph import get_preprocessed_tensor_for_asc
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
-from mx_rec.optimizers.emb_optimizer import EmbOptimizer
-from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
 
 
@@ -30,22 +27,6 @@ class SparseEmbedding(BaseSparseEmbedding):
     def capacity(self) -> int:
         pass
 
-    @abc.abstractmethod
-    def set_optimizer(self, key: str, state_dict: dict):
-        pass
-
-    @abc.abstractmethod
-    def _set_ext_emb_size(self):
-        pass
-
-    @abc.abstractmethod
-    def _build_optimizer_states(self):
-        pass
-
-    @abc.abstractmethod
-    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, is_training: bool, send_count: Optional[int]) -> dict:
-        pass
-
     def _set_slice_vocab_size(self):
         self._slice_device_vocabulary_size = math.ceil(self._device_vocabulary_size / self._rank_size)
         self._slice_host_vocabulary_size = math.ceil(self._host_vocabulary_size / self._rank_size)
@@ -84,25 +65,6 @@ class HBMSparseEmbedding(SparseEmbedding):
     def capacity(self) -> int:
         return self._device_vocabulary_size
 
-    def set_optimizer(self, key: str, state_dict: dict):
-        pass
-
-    def _build_optimizer_states(self):
-        pass
-
-    def _set_ext_emb_size(self):
-        self._ext_emb_size = self._emb_size * self._ext_coefficient
-        logger.debug("Init table, ext_emb_size is set to be %s.", self._ext_emb_size)
-
-    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, is_training: bool, send_count: Optional[int]) -> dict:
-        channel_id = ConfigInitializer.get_instance().train_params_config.get_training_mode_channel_id(is_training)
-        config = dict(batch_size=feature_spec.batch_size, feat_cnt=feature_spec.feat_cnt, send_count=send_count,
-                      rank_size=self._rank_size, channel_id=channel_id, table_name=self._table_name,
-                      is_hbm=self._is_hbm, ext_emb_size=self._ext_emb_size,
-                      emb_size=self._emb_size, device_id=self._device_id)
-
-        return get_preprocessed_tensor_for_asc(self._variable, config)
-
 
 class ExternalStorageSparseEmbedding(SparseEmbedding):
     """
@@ -110,19 +72,8 @@ class ExternalStorageSparseEmbedding(SparseEmbedding):
     """
 
     def __init__(self, config: dict):
-        self.emb_optimizer = EmbOptimizer(config.get("optimizer_list"))
-        self.emb_optimizer.check_optimizer_instance_list()
-
         super(ExternalStorageSparseEmbedding, self).__init__(config)
 
-    @property
-    def optimizer(self):
-        return self.emb_optimizer.optimizer
-
-    @property
-    def optimizer_instance_list(self):
-        return self.emb_optimizer.optimizer_instance_list
-
     def capacity(self) -> int:
         # DDR
         if not self._ssd_vocabulary_size:
@@ -130,33 +81,6 @@ class ExternalStorageSparseEmbedding(SparseEmbedding):
         # SSD
         return self._device_vocabulary_size + self._host_vocabulary_size + self._ssd_vocabulary_size
 
-    def set_optimizer(self, key: str, state_dict: dict):
-        self.emb_optimizer.set_optimizer(key, state_dict, self._table_name)
-
-    def _set_ext_emb_size(self):
-        self._ext_coefficient += len(self.emb_optimizer.optimizer_slot_info_list)
-        self._ext_emb_size = self._emb_size * self._ext_coefficient
-        logger.debug("Init table, ext_emb_size is set to be %s.", self._ext_emb_size)
-
-    def _build_optimizer_states(self):
-        for sparse_optimizer_instance in self.emb_optimizer.optimizer_instance_list:
-            slot_info_list = sparse_optimizer_instance.initialize_slots(self._variable, self)
-            self.emb_optimizer.optimizer_slot_info_list.extend(slot_info_list)
-
-        for slot_info in self.emb_optimizer.optimizer_slot_info_list:
-            self.emb_optimizer.set_optimizer_slot(slot_info)
-
-    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, is_training: bool, send_count: Optional[int]) -> dict:
-        channel_id = ConfigInitializer.get_instance().train_params_config.get_training_mode_channel_id(is_training)
-        config = dict(batch_size=feature_spec.batch_size, feat_cnt=feature_spec.feat_cnt, send_count=send_count,
-                      rank_size=self._rank_size, channel_id=channel_id, table_name=self._table_name,
-                      is_hbm=self._is_hbm, ext_emb_size=self._ext_emb_size,
-                      emb_size=self._emb_size, device_id=self._device_id)
-
-        variable_list = [self._variable] + \
-                        [slot_info.get("slot") for slot_info in self.emb_optimizer.optimizer_slot_info_list]
-        return get_preprocessed_tensor_for_asc(variable_list, config)
-
 
 def _set_specific_value_for_non_valid_key(id_offsets: Optional[tf.Tensor],
                                           embeddings: Optional[tf.Tensor],
diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index f90efcf6..16f19d04 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -43,7 +43,6 @@ from mx_rec.util.log import logger
     ("dim", NumValidator, {"min_value": 1, "max_value": 8192}, ["check_value"]),
     ("name", StringValidator, {"min_len": 1, "max_len": 100}, ["check_string_length", "check_whitelist"]),
     ("emb_initializer", ClassValidator, {"classes": (InitializerV1, InitializerV2)}),
-    ("optimizer_list", ClassValidator, {"classes": (list, type(None))}),
     (["ssd_vocabulary_size", "ssd_data_path", "host_vocabulary_size"], SSDFeatureValidator),
     ("device_vocabulary_size", IntValidator, {"min_value": 1, "max_value": MAX_DEVICE_VOCABULARY_SIZE},
      ["check_value"]),
@@ -59,7 +58,6 @@ from mx_rec.util.log import logger
     ("hashtable_threshold", IntValidator, {"min_value": 0, "max_value": MAX_INT32}, ["check_value"])
 ])
 def create_table(key_dtype, dim, name, emb_initializer,
-                 optimizer_list: Optional[list] = None,
                  device_vocabulary_size=1,
                  host_vocabulary_size=0,
                  ssd_vocabulary_size=0,
@@ -77,7 +75,6 @@ def create_table(key_dtype, dim, name, emb_initializer,
         dim: embedding vector size
         name: hash table name
         emb_initializer: the initializer for embedding values
-        optimizer_list: specify the optimizers to use for current hash table
         device_vocabulary_size: embedding vector numbers on device
         host_vocabulary_size: embedding vector numbers on ddr
         ssd_vocabulary_size: embedding vector numbers on ssd
@@ -95,8 +92,7 @@ def create_table(key_dtype, dim, name, emb_initializer,
     config = dict(key_dtype=key_dtype, embedding_size=dim, table_name=name, emb_initializer=emb_initializer,
                   device_vocabulary_size=device_vocabulary_size, host_vocabulary_size=host_vocabulary_size,
                   ssd_vocabulary_size=ssd_vocabulary_size, ssd_data_path=ssd_data_path,
-                  optimizer_list=optimizer_list, init_param=init_param, is_save=is_save,
-                  all2all_gradients_op=all2all_gradients_op)
+                  init_param=init_param, is_save=is_save, all2all_gradients_op=all2all_gradients_op)
     # 动态扩容
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         return HBMDynamicSparseEmbeddingFactory().create_embedding(config)
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index e0b4bdeb..72772c5f 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -31,15 +31,17 @@ from mx_rec.constants.constants import ASCEND_CUTTING_POINT_INITIALIZER, ASCEND_
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.helper import get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
+from mx_rec.core.asc.swap_args import SwapArgs
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.graph.merge_lookup import do_merge_lookup
-from mx_rec.graph.utils import check_input_list, find_parent_op, check_cutting_points, \
-record_ops_to_replace, export_pb_graph, make_sorted_key_to_tensor_list
+from mx_rec.graph.utils import check_input_list, find_parent_op, check_cutting_points, record_ops_to_replace, \
+    export_pb_graph, make_sorted_key_to_tensor_list, replace_anchor_control
 from mx_rec.graph.constants import DeprecatedOp, AnchorDatasetOp, AnchorIteratorOp
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
 from mx_rec.util.ops import import_host_pipeline_ops
 from mx_rec.util.perf import performance
+from mx_rec.util.tf_version_adapter import hccl_ops, npu_ops
 from mx_rec.validator.validator import para_checker_decorator, ClassValidator
 
 
@@ -381,6 +383,14 @@ def get_dataset_tensor_count(dataset: DatasetV1Adapter) -> int:
     return len(src_sorted_keys)
 
 
+def change_ext_emb_size_by_opt(optimizer):
+    for _, table_instance in ConfigInitializer.get_instance().sparse_embed_config.table_instance_dict.items():
+        # When dynamic expansion mode, ext_emb_size is set by optimizer
+        if ConfigInitializer.get_instance().use_dynamic_expansion or not table_instance.is_hbm:
+            table_instance.ext_emb_size = table_instance.emb_size * (1 + optimizer.slot_num)
+            logger.debug("ext_emb_size is reset to be %s for EmbInfo", table_instance.ext_emb_size)
+
+
 @para_checker_decorator(
     check_option_list=[("dump_graph", ClassValidator, {"classes": (bool,)})]
 )
@@ -457,6 +467,9 @@ def get_src_dataset(get_next_op: Operation, is_training: bool) -> DatasetV1Adapt
         elif is_training and len(dataset_op_list) == 2:
             prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
             target_op = prefetch_dataset_op_list[0]
+        elif not is_training and len(dataset_op_list) == 2:
+            prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
+            target_op = prefetch_dataset_op_list[1]
         elif not is_training and len(dataset_op_list) == 3:
             prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
             target_op = prefetch_dataset_op_list[1]
@@ -567,6 +580,118 @@ def update_iterator_getnext(get_next_op: Operation,
     update_input_tensor_with_new_batch(record.replacement_spec, new_get_next_op_name, new_batch)
 
 
+def get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list: list, swap_len: int, swap_pos: list,
+                  channel_id: int) -> list:
+    """
+    Get swap info if threshold is configured.
+    :param table_instance: BaseSparseEmbedding
+    :param variable_and_slot_list: [var + slots]
+    :param swap_len: swap length
+    :param swap_pos: swap position
+    :param channel_id: train or predict
+    :return: swap info
+    """
+    use_static = ConfigInitializer.get_instance().use_static
+    max_lookup_vec_size = None
+    if use_static:
+        max_lookup_vec_size = table_instance.send_count * table_instance.rank_size
+
+    if table_instance.is_hbm:
+        swap_in = [tf.no_op()]
+    else:
+        with tf.compat.v1.variable_scope("h2d_emb"):
+            logger.debug('Channel %s_h2d_%s was built for getnext', table_instance.table_name, channel_id)
+            h2d_emb = npu_ops.gen_npu_ops.get_next(
+                output_types=[tf.float32],
+                output_shapes=[[max_lookup_vec_size, table_instance.ext_emb_size]],
+                channel_name=f'{table_instance.table_name}_h2d_{channel_id}')[0]
+        logger.debug("h2d_emb shape: %s", h2d_emb)
+        if not isinstance(variable_and_slot_list, list):
+            raise RuntimeError("When enable emb_transfer, optimizer should have slots")
+        if use_static:
+            swap_pos = swap_pos[0:swap_len]
+            h2d_emb = h2d_emb[0:swap_len, :]
+        swap_outs = [tf.gather(one_table, swap_pos) for one_table in variable_and_slot_list]
+        swap_out = tf.concat(swap_outs, axis=1)
+        logger.debug('Channel %s_d2h_%s was built for op outfeed.', table_instance.table_name, channel_id)
+        swap_out_op = npu_ops.outfeed_enqueue_op(
+            channel_name=f'{table_instance.table_name}_d2h_{channel_id}', inputs=[swap_out])
+        with tf.control_dependencies([swap_out_op]):
+            nd_swap_pos = tf.expand_dims(swap_pos, 1)
+            table_num = len(variable_and_slot_list)
+            h2d_emb_split = tf.split(h2d_emb, table_num, axis=1)
+            optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(
+                table_instance.table_name)
+            if optimizer is None and channel_id == 1:
+                swap_in = [tf.compat.v1.scatter_nd_update(variable_and_slot_list[0], nd_swap_pos, h2d_emb_split[0])]
+            else:
+                swap_in = [tf.compat.v1.scatter_nd_update(variable_and_slot_list[i], nd_swap_pos, h2d_emb_split[i])
+                           for i in range(len(variable_and_slot_list))]
+    return swap_in
+
+
+def get_variable_and_slot_list(each_var, slot_num, table_name, channel_id):
+    variable_and_slot_list = [each_var]
+    if slot_num == 0:
+        return variable_and_slot_list
+
+    # 通过apply_gradients创建optimizer
+    optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(table_name)
+    if optimizer is None and channel_id == 0:
+        raise RuntimeError("In training mode, table_instance should have been set_optimizer_for_table "
+                           "before modify_graph, please check whether apply_gradients is performed")
+
+    # predict不需要传优化器，但是如果客户创建了优化器，ddr模式加载的是维度ext_size的emb用作换入换出，所以需要给slot零值占位
+    if optimizer is None and channel_id == 1:
+        slot_place_holder = tf.zeros_like(each_var)
+        for i in range(slot_num):
+            variable_and_slot_list.append(slot_place_holder)
+    else:
+        # opt name to slot dict
+        for slot_dict in optimizer.values():
+            for slot_val in slot_dict.values():
+                variable_and_slot_list.append(slot_val)
+
+    return variable_and_slot_list
+
+
+def modify_graph_for_ddr(get_next_op_map):
+    # 通过create_hash_optimizer创建optimizer_instance
+    optimizer_instance = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
+    # predict
+    if optimizer_instance is None:
+        slot_num = 0
+    else:
+        # ddr和扩容需要在获取优化器后重置ext
+        change_ext_emb_size_by_opt(optimizer_instance)
+        slot_num = optimizer_instance.slot_num
+
+    for _, record in get_next_op_map.items():
+        is_training = record.is_training
+        channel_id = 0 if is_training else 1
+
+        swap_args = SwapArgs()
+        sparse_variables = tf.compat.v1.get_collection(
+            ConfigInitializer.get_instance().train_params_config.ascend_global_hashtable_collection)
+
+        for each_var in sparse_variables:
+            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(each_var)
+            if table_instance.is_hbm:
+                continue
+            swap_args_dict = swap_args.swap_config_dict[table_instance.table_name][channel_id]
+            swap_pos = swap_args_dict['swap_pos']
+            swap_len = swap_args_dict['swap_len']
+            variable_and_slot_list = get_variable_and_slot_list(each_var, slot_num, table_instance.table_name,
+                                                                channel_id)
+
+            swap_op = get_swap_info(table_instance, variable_and_slot_list, swap_len, swap_pos, channel_id)
+            swap_control_dict = swap_args.swap_control_dict[table_instance.table_name][channel_id]
+            if "control_ops" not in swap_control_dict:
+                raise ValueError("Missing Required key in modify_graph_for_asc: control_ops")
+            control_ops = swap_control_dict['control_ops']
+            replace_anchor_control(control_ops, swap_op)
+
+
 @performance("graph_modifier")
 def modify_graph_for_asc(dump_graph: bool = False, prefetch: int = 10):
     cutting_point_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE)
@@ -612,6 +737,8 @@ def modify_graph_for_asc(dump_graph: bool = False, prefetch: int = 10):
         if is_training and not ConfigInitializer.get_instance().train_params_config.get_merged_multi_lookup(True):
             raise RuntimeError("In training mode, `do_merge_lookup` should have been executed in compute gradients "
                                "phase. Please check whether compute gradients is performed.")
+    # ddr
+    modify_graph_for_ddr(get_next_op_map)
 
     logger.info("Graph has been revised.")
     export_pb_graph("new_graph.pb", dump_graph)
diff --git a/mx_rec/graph/utils.py b/mx_rec/graph/utils.py
index 8ffc8bc6..ca328ae3 100644
--- a/mx_rec/graph/utils.py
+++ b/mx_rec/graph/utils.py
@@ -23,11 +23,13 @@ import tensorflow as tf
 from tensorflow import Operation, Tensor
 from tensorflow.core.framework.graph_pb2 import GraphDef
 from tensorflow.python.framework.errors_impl import InvalidArgumentError
+from tensorflow.python.ops import control_flow_ops
 
 from mx_rec.graph.slicers import OrphanLookupKeySlicer
 from mx_rec.graph.constants import AnchorIteratorOp
 from mx_rec.constants.constants import ASCAnchorAttr, DUMP_MIDIFY_GRAPH_FILE_MODE
 from mx_rec.core.embedding import BaseSparseEmbedding
+from mx_rec.core.asc.swap_args import SwapArgs, SwapDataType
 from mx_rec.util.log import logger
 
 
@@ -90,6 +92,32 @@ def replace_anchor(replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operat
                                    f"new tensor: {new_tensor_list[tensor_idx]}.") from err
 
 
+def record_control_to_replace(src_op: Operation) -> DefaultDict[Tensor, List[Tuple[int, Operation]]]:
+    replacement_specs = defaultdict(list)
+    op_list = tf.compat.v1.get_default_graph().get_operations()
+    for operator in op_list:
+        if src_op in operator.control_inputs:
+            input_index = operator.control_inputs.index(src_op)
+            replacement_specs[src_op].append((input_index, operator))
+
+    return replacement_specs
+
+
+def replace_control_anchor(replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]],
+                           new_tensor_list: List[Tensor]):
+
+    for tensor_idx, (old_tensor, items) in enumerate(replacement_specs.items()):
+        for _, operator in items:
+            try:
+                control_op = control_flow_ops.group(new_tensor_list)
+                operator._add_control_input(control_op)
+            except InvalidArgumentError as err:
+                logger.info("The replacement control specs keys (old batch) is: %s. \n\t\t The new_tensor_list is: %s.",
+                            replacement_specs.keys(), new_tensor_list)
+                raise RuntimeError(f"Cannot update edge, old tensor: {old_tensor}, "
+                                   f"new tensor: {new_tensor_list[tensor_idx]}.") from err
+
+
 def export_pb_graph(file_name: str,
                     dump_graph: bool = False,
                     graph_def: GraphDef = None,
@@ -165,6 +193,27 @@ def replace_anchor_vec(cutting_point: Tensor, attribute: ASCAnchorAttr, anchor:
     replace_anchor(replacement_specs_for_anchor_vec, [anchor])
 
 
+def replace_anchor_control(place_holder_control: tf.Operation, real_anchor: Tensor):
+    """
+    将place_holder_control替换为入参real_anchor.
+
+    Args:
+        place_holder_control: control op
+        real_anchor: 用来替换打桩节点的tensor
+
+    Returns: None
+
+    """
+
+    if place_holder_control is None:
+        raise RuntimeError(f"Node place_holder_control does not exist. Check whether the sparse lookup interface "
+                           f"is correctly invoked.")
+    # find the op with stub node as the input
+    replacement_specs_for_anchor_vec = record_control_to_replace(place_holder_control)
+    # replace anchor_vec with anchor
+    replace_control_anchor(replacement_specs_for_anchor_vec, real_anchor)
+
+
 def mark_orphan_lookup_key(lookup_key: Tensor) -> Tensor:
     graph_def = tf.compat.v1.get_default_graph().as_graph_def()
     subgraph = tf.compat.v1.graph_util.extract_sub_graph(graph_def, [lookup_key.op.name])
diff --git a/mx_rec/optimizers/adagrad.py b/mx_rec/optimizers/adagrad.py
index 125346b9..9998ec1f 100644
--- a/mx_rec/optimizers/adagrad.py
+++ b/mx_rec/optimizers/adagrad.py
@@ -25,7 +25,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import adagrad, training_ops
-from tensorflow.python.training import slot_creator
 
 from mx_rec.optimizers.base import CustomizedOptimizer
 from mx_rec.util.initialize import ConfigInitializer
@@ -80,30 +79,6 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
         self._slot_num = 1
         self._derivative = 2
 
-    def initialize_slots(self, var, table_instance):
-        # Create slots for the first and second moments.
-        def creat_one_single_slot(var, op_name):
-            new_slot_variable = slot_creator.create_zeros_slot(var, op_name)
-            # make sure sparse optimizer statements will not be saved and restored within tf checkpoint.
-            return new_slot_variable
-
-        accumulator = creat_one_single_slot(var, self._name + "/" + "accumulator")
-        ConfigInitializer.get_instance().sparse_embed_config.insert_removing_var_list(accumulator.name)
-        named_slot_key = (var.op.graph, var.op.name)
-        table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
-        ConfigInitializer.get_instance().optimizer_config.set_optimizer_for_table(table_instance.table_name,
-                                                                                  self.optimizer_type,
-                                                                                  {"accumulator": accumulator})
-        return [{"slot": accumulator, "named_slot_key": named_slot_key, "slot_name": "acc", "optimizer": self}]
-
-    def insert_slot(self, slot, named_slots_key, slot_name):
-        named_slots = self._slot_dict(slot_name)
-        if named_slots_key in named_slots:
-            raise EnvironmentError(f"named_slots_key should be global unique, but it has been in use now, "
-                                   f"please double check.")
-
-        named_slots[named_slots_key] = slot
-
     def get_slot_init_values(self):
         # return state value list of adagrad that needs to initialize in ASC DDR.
         initial_accumulator_value = 0.0
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index 696406f8..fbc63193 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -122,12 +122,6 @@ class CustomizedOptimizer:
                                                               array_ops.shape(unique_keys)[0])
         return unique_local_grad, unique_keys
 
-    def initialize_slots(self, var, table_instance):
-        raise NotImplementedError(f"Please define a specific realization on {self.__class__.__name__}")
-
-    def insert_slot(self, slot, named_slots_key, slot_name):
-        raise NotImplementedError(f"Please define a specific realization on {self.__class__.__name__}")
-
     def get_slot_init_values(self):
         raise NotImplementedError(f"Please define a specific realization on {self.__class__.__name__}")
 
diff --git a/mx_rec/optimizers/emb_optimizer.py b/mx_rec/optimizers/emb_optimizer.py
deleted file mode 100644
index 9e6a80e1..00000000
--- a/mx_rec/optimizers/emb_optimizer.py
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved.
-
-from mx_rec.optimizers.base import CustomizedOptimizer
-from mx_rec.util.tf_version_adapter import NPULossScaleOptimizer
-
-
-class EmbOptimizer:
-    """
-    稀疏表的优化器.
-    """
-
-    def __init__(self, optimizer_list):
-        self._optimizer_instance_list = optimizer_list
-        self._optimizer_slot_info_list = []
-        self._optimizer = dict()
-
-    @property
-    def optimizer_instance_list(self):
-        return self._optimizer_instance_list
-
-    @property
-    def optimizer_slot_info_list(self):
-        return self._optimizer_slot_info_list
-
-    @property
-    def optimizer(self):
-        return self._optimizer
-
-    @staticmethod
-    def set_optimizer_slot(slot_info: dict):
-        """
-        设置稀疏表优化器的slot信息.
-
-        Args:
-            slot_info: 优化器slot信息
-
-        Returns: None
-        """
-        slot = slot_info.get("slot")
-        slot_name = slot_info.get("slot_name")
-        optimizer = slot_info.get("optimizer")
-        named_slot_key = slot_info.get("named_slot_key")
-
-        optimizer.insert_slot(slot, named_slot_key, slot_name)
-
-    def set_optimizer(self, key: str, state_dict: dict, table_name: str):
-        """
-        设置optimizer state.
-
-        Args:
-            key: 优化器名字
-            state_dict: optimizer state
-            table_name: 稀疏表名
-
-        Returns: None
-        """
-        if key in self._optimizer:
-            raise ValueError(f"optimizer {key} has been set for hash table {table_name}.")
-        self._optimizer[key] = state_dict
-
-    def check_optimizer_instance_list(self):
-        """
-        校验优化器实例列表.
-        """
-        if not self._optimizer_instance_list:
-            raise ValueError("External storage mode should config optimizers before instantiating sparse table, "
-                             "but nothing was configured.")
-
-        for optimizer_instance in self._optimizer_instance_list:
-            if isinstance(optimizer_instance, NPULossScaleOptimizer):
-                optimizer_instance = getattr(optimizer_instance, '_opt')
-
-            if not isinstance(optimizer_instance, CustomizedOptimizer):
-                raise TypeError("the optimizer instance must be an instance of CustomizedOptimizer.")
diff --git a/mx_rec/optimizers/ftrl.py b/mx_rec/optimizers/ftrl.py
index ef617c2d..30287abd 100644
--- a/mx_rec/optimizers/ftrl.py
+++ b/mx_rec/optimizers/ftrl.py
@@ -29,7 +29,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import gen_state_ops
 from tensorflow.python.training import ftrl
-from tensorflow.python.training import slot_creator
 
 from mx_rec.optimizers.base import CustomizedOptimizer
 from mx_rec.util.initialize import ConfigInitializer
@@ -82,30 +81,6 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
         self._slot_num = 2
         self._derivative = 2
 
-    def initialize_slots(self, var, table_instance):
-        val = constant_op.constant(
-            self._initial_accumulator_value, dtype=var.dtype, shape=var.get_shape())
-
-        accum = slot_creator.create_slot(var, val, self._name + "/" + "accum")
-        linear = slot_creator.create_zeros_slot(var, self._name + "/" + "linear")
-        ConfigInitializer.get_instance().sparse_embed_config.insert_removing_var_list(accum.name)
-        ConfigInitializer.get_instance().sparse_embed_config.insert_removing_var_list(linear.name)
-        named_slot_key = (var.op.graph, var.op.name)
-        table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
-        ConfigInitializer.get_instance().optimizer_config.set_optimizer_for_table(table_instance.table_name,
-                                                                                  self.optimizer_type,
-                                                                                  {"accum": accum, "linear": linear})
-        return [{"slot": accum, "named_slot_key": named_slot_key, "slot_name": "accum", "optimizer": self},
-                {"slot": linear, "named_slot_key": named_slot_key, "slot_name": "linear", "optimizer": self}]
-
-    def insert_slot(self, slot, named_slots_key, slot_name):
-        named_slots = self._slot_dict(slot_name)
-        if named_slots_key in named_slots:
-            raise EnvironmentError(f"named_slots_key should be global unique, but it has been in use now, "
-                                   f"please double check.")
-
-        named_slots[named_slots_key] = slot
-
     def get_slot_init_values(self):
         # return state value list of ftrl that needs to initialize in ASC DDR.
         initial_linear_value = 0.0
diff --git a/mx_rec/optimizers/gradient_descent.py b/mx_rec/optimizers/gradient_descent.py
index d021f69f..89d67d89 100644
--- a/mx_rec/optimizers/gradient_descent.py
+++ b/mx_rec/optimizers/gradient_descent.py
@@ -57,9 +57,6 @@ class CustomizedGradientDescent(gradient_descent.GradientDescentOptimizer, Custo
         self._slot_num = 0
         self._derivative = 1
 
-    def initialize_slots(self, var, table_instance):
-        return []
-
     def get_slot_init_values(self):
         return []
 
diff --git a/mx_rec/optimizers/gradient_descent_by_addr.py b/mx_rec/optimizers/gradient_descent_by_addr.py
index 9db7c2ae..8cf9257e 100644
--- a/mx_rec/optimizers/gradient_descent_by_addr.py
+++ b/mx_rec/optimizers/gradient_descent_by_addr.py
@@ -62,9 +62,6 @@ class CustomizedGradientDescentByAddr(gradient_descent.GradientDescentOptimizer,
         self._slot_num = 0
         self._derivative = 1
 
-    def initialize_slots(self, var, table_instance):
-        return []
-
     def get_slot_init_values(self):
         return []
 
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 875f350f..9aee0204 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -28,7 +28,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_state_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import adam
-from tensorflow.python.training import slot_creator
 
 from mx_rec.optimizers.base import CustomizedOptimizer
 from mx_rec.util.initialize import ConfigInitializer
@@ -84,34 +83,6 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         self._slot_num = 2
         self._derivative = 2
 
-    def initialize_slots(self, var, table_instance):
-        # Create slots for the first and second moments.
-        def creat_one_single_slot(var, op_name):
-            new_slot_variable = slot_creator.create_zeros_slot(var, op_name)
-            # make sure sparse optimizer statements will not be saved and restored within tf checkpoint.
-            return new_slot_variable
-
-        momentum = creat_one_single_slot(var, self._name + "/" + "momentum")
-        velocity = creat_one_single_slot(var, self._name + "/" + "velocity")
-        self.config_instance.sparse_embed_config.insert_removing_var_list(momentum.name)
-        self.config_instance.sparse_embed_config.insert_removing_var_list(velocity.name)
-        named_slot_key = (var.op.graph, var.op.name)
-        table_instance = self.config_instance.sparse_embed_config.get_table_instance(var)
-        ConfigInitializer.get_instance().optimizer_config.set_optimizer_for_table(table_instance.table_name,
-                                                                                  self.optimizer_type,
-                                                                                  {"momentum": momentum,
-                                                                                   "velocity": velocity})
-        return [{"slot": momentum, "named_slot_key": named_slot_key, "slot_name": "m", "optimizer": self},
-                {"slot": velocity, "named_slot_key": named_slot_key, "slot_name": "v", "optimizer": self}]
-
-    def insert_slot(self, slot, named_slots_key, slot_name):
-        named_slots = self._slot_dict(slot_name)
-        if named_slots_key in named_slots:
-            raise EnvironmentError(f"named_slots_key should be global unique, but it has been in use now, "
-                                   f"please double check.")
-
-        named_slots[named_slots_key] = slot
-
     def get_slot_init_values(self):
         # return state value list of adam that needs to initialize in ASC DDR.
         initial_momentum_value = 0.0
diff --git a/mx_rec/util/variable.py b/mx_rec/util/variable.py
index 2c9f49a9..0040e2b5 100644
--- a/mx_rec/util/variable.py
+++ b/mx_rec/util/variable.py
@@ -27,11 +27,6 @@ def get_dense_and_sparse_variable():
     return dense_variables, sparse_variables
 
 
-def check_and_get_config_via_var(variable, optimizer_type: str):
+def get_config_via_var(variable):
     table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(variable)
-
-    if not table_instance.is_hbm and not table_instance.optimizer:
-        raise EnvironmentError(f"When ASC with DDR, you must pass the '{optimizer_type}' optimizer instances to the"
-                               f" init method of SparseEmbedding.")
-
     return table_instance
diff --git a/tests/mx_rec/core/mock_class.py b/tests/mx_rec/core/mock_class.py
index 6fb2ef38..e02f6257 100644
--- a/tests/mx_rec/core/mock_class.py
+++ b/tests/mx_rec/core/mock_class.py
@@ -20,8 +20,6 @@ import tensorflow as tf
 from tensorflow_core.python.training import slot_creator
 
 from mx_rec import ASCEND_GLOBAL_HASHTABLE_COLLECTION
-from mx_rec.optimizers.lazy_adam import CustomizedLazyAdam
-from mx_rec.util.config_utils.embedding_utils import SparseEmbedConfig
 from mx_rec.util.config_utils.feature_spec_utils import FeatureSpecConfig
 from mx_rec.util.config_utils.optimizer_utils import OptimizerConfig
 
@@ -209,23 +207,6 @@ class MockOptimizer:
         self.slot_num = 2
         self.derivative = 2
 
-    def initialize_slots(self, var, table_instance):
-        # Create slots for the first and second moments.
-        def creat_one_single_slot(var, op_name):
-            new_slot_variable = slot_creator.create_zeros_slot(var, op_name)
-            return new_slot_variable
-
-        momentum = creat_one_single_slot(var, self._name + "/" + "momentum")
-        velocity = creat_one_single_slot(var, self._name + "/" + "velocity")
-        named_slot_key = (var.op.graph, var.op.name)
-
-        table_instance.set_optimizer(self._name, {"momentum": momentum, "velocity": velocity})
-        return [{"slot": momentum, "named_slot_key": named_slot_key, "slot_name": "m", "optimizer": self},
-                {"slot": velocity, "named_slot_key": named_slot_key, "slot_name": "v", "optimizer": self}]
-
-    def insert_slot(self, slot, named_slots_key, slot_name):
-        pass
-
     def get_slot_init_values(self):
         initial_momentum_value = 0.0
         initial_velocity_value = 0.0
diff --git a/tests/mx_rec/core/test_build_graph.py b/tests/mx_rec/core/test_build_graph.py
index dd17afec..c5766179 100644
--- a/tests/mx_rec/core/test_build_graph.py
+++ b/tests/mx_rec/core/test_build_graph.py
@@ -32,14 +32,12 @@ class TestGetRestoreVectorFunc(unittest.TestCase):
     def setUp(self):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
 
     def test_get_restore_vector_case1(self):
         """
@@ -114,15 +112,13 @@ class TestGetIdOffsetsFunc(unittest.TestCase):
     def setUp(self):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
         self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
 
     @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
     def test_get_id_offsets_case1(self, mock_get_next):
@@ -164,14 +160,12 @@ class TestGetAll2allArgsFunc(unittest.TestCase):
     def setUp(self):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
 
     def test_get_all2all_args_case1(self):
         """
@@ -198,60 +192,6 @@ class TestGetAll2allArgsFunc(unittest.TestCase):
             self.assertEqual(all2all_args, 0)
 
 
-class TestGetSwapInfoFunc(unittest.TestCase):
-    """
-    Test for 'mx_rec.core.asc.build_graph.get_swap_info'.
-    """
-
-    def setUp(self):
-        # 默认动态扩容、hot emb、HBM
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
-
-    def tearDown(self):
-        # 恢复config
-        self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
-
-    @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
-    def test_get_swap_info_case1(self, build_graph_config_initializer):
-        """
-        case1: 静态shape，HBM
-        """
-
-        from mx_rec.core.asc.build_graph import get_swap_info
-
-        with tf.Graph().as_default():
-            mock_config_initializer = MockConfigInitializer(use_static=True)
-            build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
-
-            swap_in = get_swap_info(self.config, None, None, None)
-            self.assertIsInstance(swap_in[0], type(tf.no_op()))
-
-    @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
-    @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
-    def test_get_swap_info_case2(self, mock_get_next, build_graph_config_initializer):
-        """
-        case2: 静态shape，非HBM，table传入非list，抛出异常
-        """
-
-        from mx_rec.core.asc.build_graph import get_swap_info
-
-        with tf.Graph().as_default():
-            mock_config_initializer = MockConfigInitializer(use_static=True)
-            build_graph_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
-
-            mock_get_next.return_value = tf.ones(shape=[8, 8], dtype=tf.float32)
-            swap_pos = tf.constant([8, 9], dtype=tf.int32)
-            swap_len = tf.constant(2, dtype=tf.int32)
-            table = tf.compat.v1.get_variable("test_table", shape=[10, 8], initializer=tf.ones_initializer())
-            self.config["is_hbm"] = False
-            with self.assertRaises(RuntimeError):
-                get_swap_info(self.config, swap_len, swap_pos, table)
-
-
 class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
     """
     Test for 'mx_rec.core.asc.build_graph.get_preprocessed_tensor_for_asc'.
@@ -260,21 +200,17 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
     def setUp(self):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
-                           use_dynamic_expansion=True)
-        global_env.apply_gradients_strategy = "direct_apply"
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
-                         get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0))
+                         get_all2all_args=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case1(self, build_graph_config_initializer):
         """
@@ -293,8 +229,7 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
-                         get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0))
+                         get_all2all_args=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case2(self, build_graph_config_initializer):
         """
@@ -313,8 +248,7 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
                          get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
-                         get_all2all_args=mock.MagicMock(return_value=0),
-                         get_swap_info=mock.MagicMock(return_value=0))
+                         get_all2all_args=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case3(self, build_graph_config_initializer):
         """
diff --git a/tests/mx_rec/core/test_embedding.py b/tests/mx_rec/core/test_embedding.py
index bf7d9240..5bc762f4 100644
--- a/tests/mx_rec/core/test_embedding.py
+++ b/tests/mx_rec/core/test_embedding.py
@@ -120,8 +120,7 @@ class TestCreateTableFunc(unittest.TestCase):
                                       dim=8,
                                       name='test_table',
                                       emb_initializer=tf.compat.v1.truncated_normal_initializer(),
-                                      host_vocabulary_size=8,
-                                      optimizer_list=[create_hash_optimizer(learning_rate=0.01)])
+                                      host_vocabulary_size=8)
             self.assertIsInstance(test_table, ExternalStorageSparseEmbedding)
 
 
@@ -134,12 +133,11 @@ class TestSparseLookupFunc(unittest.TestCase):
                          get_rank_size=mock.MagicMock(return_value=8),
                          get_rank_id=mock.MagicMock(return_value=0),
                          get_device_id=mock.MagicMock(return_value=0))
-    @mock.patch("mx_rec.core.emb.sparse_embedding.get_preprocessed_tensor_for_asc")
+    @mock.patch("mx_rec.core.emb.base_sparse_embedding.get_preprocessed_tensor_for_asc")
     @mock.patch("mx_rec.core.embedding.ConfigInitializer")
     @mock.patch("mx_rec.core.emb.base_sparse_embedding.ConfigInitializer")
     @mock.patch("mx_rec.validator.emb_validator.ConfigInitializer")
-    @mock.patch("mx_rec.core.emb.sparse_embedding.ConfigInitializer")
-    def test_sparse_lookup_case1(self, embedding_config_initializer, base_sparse_embedding_config_initializer,
+    def test_sparse_lookup_case1(self, base_sparse_embedding_config_initializer,
                                  emb_validator_config_initializer, sparse_embedding_config_initializer,
                                  mock_get_preprocessed_tensor_for_asc):
         """
@@ -154,7 +152,6 @@ class TestSparseLookupFunc(unittest.TestCase):
             # mock
             mock_config_initializer = MockConfigInitializer(use_dynamic_expansion=False)
 
-            embedding_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
             base_sparse_embedding_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
             emb_validator_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
             sparse_embedding_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
@@ -166,12 +163,9 @@ class TestSparseLookupFunc(unittest.TestCase):
             batch = {"case1_feat": tf.ones(shape=[8, 8], dtype=tf.int64)}
             mock_get_preprocessed_tensor_for_asc.return_value = {
                 "restore_vector": tf.ones(shape=[8, 8], dtype=tf.int64),
-                "restore_vector_second": tf.ones(shape=[8, ], dtype=tf.int64),
-                "unique_keys": tf.ones(shape=[8, ], dtype=tf.int64),
                 "hot_pos": tf.ones(shape=[8, ], dtype=tf.int64),
                 "id_offsets": tf.ones(shape=[8, ], dtype=tf.int64),
-                "all2all_args": tf.ones(shape=[8, 8], dtype=tf.int64),
-                "swap_in": [tf.no_op()]
+                "all2all_args": tf.ones(shape=[8, 8], dtype=tf.int64)
             }
 
             # test
@@ -190,12 +184,11 @@ class TestSparseLookupFunc(unittest.TestCase):
                          get_rank_id=mock.MagicMock(return_value=0),
                          get_device_id=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.feature_spec.ConfigInitializer")
-    @mock.patch("mx_rec.core.emb.sparse_embedding.get_preprocessed_tensor_for_asc")
+    @mock.patch("mx_rec.core.emb.base_sparse_embedding.get_preprocessed_tensor_for_asc")
     @mock.patch("mx_rec.core.embedding.ConfigInitializer")
     @mock.patch("mx_rec.core.emb.base_sparse_embedding.ConfigInitializer")
     @mock.patch("mx_rec.validator.emb_validator.ConfigInitializer")
-    @mock.patch("mx_rec.core.emb.sparse_embedding.ConfigInitializer")
-    def test_sparse_lookup_case2(self, embedding_config_initializer, base_sparse_embedding_config_initializer,
+    def test_sparse_lookup_case2(self, base_sparse_embedding_config_initializer,
                                  emb_validator_config_initializer, sparse_embedding_config_initializer,
                                  mock_get_preprocessed_tensor_for_asc, feature_spec_config_initializer):
         """
@@ -210,7 +203,6 @@ class TestSparseLookupFunc(unittest.TestCase):
             # mock
             mock_config_initializer = MockConfigInitializer(use_dynamic_expansion=False)
 
-            embedding_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
             base_sparse_embedding_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
             emb_validator_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
             sparse_embedding_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
@@ -219,12 +211,9 @@ class TestSparseLookupFunc(unittest.TestCase):
             case2_feat = tf.ones(shape=[8, 8], dtype=tf.int64)
             mock_get_preprocessed_tensor_for_asc.return_value = {
                 "restore_vector": tf.ones(shape=[8, 8], dtype=tf.int64),
-                "restore_vector_second": tf.ones(shape=[8, ], dtype=tf.int64),
-                "unique_keys": tf.ones(shape=[8, ], dtype=tf.int64),
                 "hot_pos": tf.ones(shape=[8, ], dtype=tf.int64),
                 "id_offsets": tf.ones(shape=[8, ], dtype=tf.int64),
-                "all2all_args": tf.ones(shape=[8, 8], dtype=tf.int64),
-                "swap_in": [tf.no_op()]
+                "all2all_args": tf.ones(shape=[8, 8], dtype=tf.int64)
             }
 
             # test
diff --git a/tests/mx_rec/core/test_manager.py b/tests/mx_rec/core/test_manager.py
index b08a6a6f..70c2f150 100644
--- a/tests/mx_rec/core/test_manager.py
+++ b/tests/mx_rec/core/test_manager.py
@@ -74,7 +74,6 @@ class TestGenerateTableInfoListFunc(unittest.TestCase):
 
             mock_opt = MockOptimizer()
             manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
-            test_table.optimizer_instance_list = [mock_opt]
 
             table_info_list = generate_table_info_list()
             self.assertListEqual(table_info_list, [])
@@ -100,7 +99,6 @@ class TestGenerateTableInfoListFunc(unittest.TestCase):
 
             mock_opt = MockOptimizer()
             manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
-            test_table.optimizer_instance_list = [mock_opt]
 
             table_info_list = generate_table_info_list()
             self.assertListEqual(table_info_list, [])
@@ -139,7 +137,6 @@ class TestGenerateTableInfoListFunc(unittest.TestCase):
 
             mock_opt = MockOptimizer()
             manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
-            test_table.optimizer_instance_list = [mock_opt]
 
             table_info_list = generate_table_info_list()
             self.assertListEqual(table_info_list, ["test_table_info"])
@@ -338,7 +335,6 @@ class TestMatchedOptSlotInitializersFunc(unittest.TestCase):
             table_instance.ext_emb_size = 24
             mock_opt = MockOptimizer()
             manager_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
-            table_instance.optimizer_instance_list = [mock_opt]
 
             slot_initializers = matched_opt_slot_initializers(table_instance)
             self.assertListEqual(slot_initializers, ["slot_initializer", "slot_initializer"])
diff --git a/tests/mx_rec/graph/test_modifier.py b/tests/mx_rec/graph/test_modifier.py
index 2a9af10d..ff9a6664 100644
--- a/tests/mx_rec/graph/test_modifier.py
+++ b/tests/mx_rec/graph/test_modifier.py
@@ -18,7 +18,7 @@
 import unittest
 from collections import defaultdict
 from unittest import TestCase
-from unittest.mock import patch, Mock
+from unittest.mock import patch, Mock, MagicMock
 from typing import Union, Callable
 
 import tensorflow as tf
@@ -47,7 +47,7 @@ from mx_rec.graph.modifier import (
     get_timestamp_index,
     modify_graph_for_asc,
 )
-from tests.mx_rec.core.mock_class import MockConfigInitializer
+from tests.mx_rec.core.mock_class import MockConfigInitializer, MockSparseEmbedding, MockOptimizer
 from tests.mx_rec.graph.mock_dataset import gen_mock_dataset
 
 
@@ -257,6 +257,9 @@ class ModifyGraphForAscTest(TestCase):
         get_asc_insert_func=Mock(return_value=lambda x, y: x),
     )
     @patch.multiple("mx_rec.graph.modifier.BaseSparseEmbedding", get_anchor_attribute=_gen_mock_get_anchor_attribute())
+    @patch.multiple("mx_rec.core.asc.manager",
+                         should_skip=MagicMock(return_value=True),
+                         check_dangling_table=MagicMock(return_value=["test_table"]))
     @patch("mx_rec.graph.modifier.ConfigInitializer")
     def test_ok_train_mode(self, modifier_config_initializer):
         mock_config_initializer = MockConfigInitializer(modify_graph=True, merged_multi_lookup=True)
@@ -268,6 +271,13 @@ class ModifyGraphForAscTest(TestCase):
         mock_ids = mock_batch.get("mock_ids")
         mock_cutting_point = tf.identity(mock_ids)
 
+        test_table = MockSparseEmbedding("test_table")
+        test_table.is_hbm = True
+        mock_config_initializer.get_instance().sparse_embed_config.table_instance_dict = dict(test_table=test_table)
+
+        mock_opt = MockOptimizer()
+        modifier_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
+
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE, mock_cutting_point)
 
         modify_graph_for_asc()
@@ -293,6 +303,13 @@ class ModifyGraphForAscTest(TestCase):
         mock_ids = mock_batch.get("mock_ids")
         mock_cutting_point = tf.identity(mock_ids)
 
+        test_table = MockSparseEmbedding("test_table")
+        test_table.is_hbm = True
+        mock_config_initializer.get_instance().sparse_embed_config.table_instance_dict = dict(test_table=test_table)
+
+        mock_opt = MockOptimizer()
+        modifier_config_initializer.get_instance().optimizer_config.optimizer_instance = mock_opt
+
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE, mock_cutting_point)
 
         modify_graph_for_asc()
diff --git a/tests/mx_rec/saver/sparse_embedding_mock.py b/tests/mx_rec/saver/sparse_embedding_mock.py
index 83507e63..7f7d437d 100644
--- a/tests/mx_rec/saver/sparse_embedding_mock.py
+++ b/tests/mx_rec/saver/sparse_embedding_mock.py
@@ -29,11 +29,4 @@ class SparseEmbeddingMock:
         self.emb_size = 4
         self.is_hbm = host_vocab_size == 0
         self.host_vocabulary_size = host_vocab_size
-        self.optimizer = dict()
         self.use_dynamic_expansion = False
-
-    def set_optimizer(self, key, state_dict):
-        if key in self.optimizer:
-            raise ValueError(f"optimizer {key} has been set for hash table {self.table_name}")
-
-        self.optimizer[key] = state_dict
diff --git a/tests/mx_rec/saver/test_saver.py b/tests/mx_rec/saver/test_saver.py
index 60c40a21..c0436a72 100644
--- a/tests/mx_rec/saver/test_saver.py
+++ b/tests/mx_rec/saver/test_saver.py
@@ -23,6 +23,7 @@ import tensorflow as tf
 
 from mx_rec.saver.saver import Saver
 from mx_rec.constants.constants import ASCEND_GLOBAL_HASHTABLE_COLLECTION
+from mx_rec.util.initialize import ConfigInitializer
 from tests.mx_rec.core.mock_class import MockConfigInitializer
 from tests.mx_rec.saver.sparse_embedding_mock import SparseEmbeddingMock
 
@@ -40,8 +41,7 @@ class TestSaver(unittest.TestCase):
 
     @mock.patch.multiple("mx_rec.saver.saver",
                          get_rank_id=mock.MagicMock(return_value=0),
-                         get_local_rank_size=mock.MagicMock(return_value=1),
-                         set_optimizer_info=mock.MagicMock(return_value=None))
+                         get_local_rank_size=mock.MagicMock(return_value=1))
     @mock.patch("mx_rec.saver.saver.ConfigInitializer")
     def test_save_and_load_is_consistent(self, saver_config_initializer):
         mock_config_initializer = \
@@ -86,7 +86,6 @@ class TestSaver(unittest.TestCase):
             optim_v_tensor = emb_initializer(self.shape)
             self.optimizer_v = tf.compat.v1.get_variable(self.optim_v_name, trainable=False, initializer=optim_v_tensor)
 
-            table_instance.set_optimizer("LazyAdam", {"momentum": self.optimizer_m, "velocity": self.optimizer_v})
             tf.compat.v1.add_to_collection(ASCEND_GLOBAL_HASHTABLE_COLLECTION, self.var)
         return self.graph
 
diff --git a/tests/mx_rec/util/test_variable.py b/tests/mx_rec/util/test_variable.py
index f8cd2725..a3370e84 100644
--- a/tests/mx_rec/util/test_variable.py
+++ b/tests/mx_rec/util/test_variable.py
@@ -21,7 +21,7 @@ from unittest.mock import patch
 
 import tensorflow as tf
 from mx_rec.util.global_env_conf import global_env
-from mx_rec.util.variable import check_and_get_config_via_var
+from mx_rec.util.variable import get_config_via_var
 from mx_rec.util.variable import get_dense_and_sparse_variable
 from tests.mx_rec.core.mock_class import MockConfigInitializer
 
@@ -29,7 +29,6 @@ from tests.mx_rec.core.mock_class import MockConfigInitializer
 class MockTableInstance:
     def __init__(self):
         self.is_hbm = False
-        self.optimizer = False
 
 
 @patch.multiple(
@@ -73,14 +72,6 @@ class VariableTest(unittest.TestCase):
         self.assertTrue(result_run)
         tf.reset_default_graph()
 
-    @mock.patch("mx_rec.util.variable.ConfigInitializer")
-    def test_check_and_get_config_via_var_when_environment_error(self, variable_config_initializer):
-        mock_config_initializer = MockConfigInitializer(var=MockTableInstance())
-        variable_config_initializer.get_instance = mock.Mock(return_value=mock_config_initializer)
-
-        with self.assertRaises(EnvironmentError):
-            self.assertEqual(MockTableInstance(), check_and_get_config_via_var("1", "optimize"))
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tools/atomic/sparse_lookup_with_grad.py b/tools/atomic/sparse_lookup_with_grad.py
index 26633abe..ea80bce3 100644
--- a/tools/atomic/sparse_lookup_with_grad.py
+++ b/tools/atomic/sparse_lookup_with_grad.py
@@ -203,7 +203,6 @@ if __name__ == '__main__':
                                     emb_initializer=tf.variance_scaling_initializer(mode="fan_avg",
                                                                                     distribution='normal', seed=0),
                                     device_vocabulary_size=dev_vocab_size * local_rank_size,
-                                    optimizer_list=sparse_optimizer_list,
                                     mode=MxRecMode.mapping("ASC"))
 
     sparse_variables = tf.compat.v1.get_collection(get_ascend_global_hashtable_collection())
-- 
Gitee


From d2676a117aaba5fed51519e5999c50e951978456 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 7 May 2024 21:53:54 +0800
Subject: [PATCH 120/302] Merge remote-tracking branch 'origin/hdfs_dev_dts'
 into hdfs_dev_dts

---
 src/core/file_system/hdfs_file_system/hdfs_file_system.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
index 715107d3..704a89b5 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
@@ -267,7 +267,7 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
         if (res != embedSizeInfo.embeddingSize * sizeof(float)) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
-            throw runtime_error(StringFormat(
+            throw runtime_error(
                     StringFormat("Error: Expected to read {} bytes, but actually read {} bytes from file {}.",
                                  embedSizeInfo.embeddingSize * sizeof(float), res, filePath.c_str()));
         }
-- 
Gitee


From cfd97d0f41f6cd1e21164c3187e4cd713f619d13 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 7 May 2024 22:11:02 +0800
Subject: [PATCH 121/302] Merge remote-tracking branch 'origin/hdfs_dev_dts'
 into hdfs_dev_dts

---
 src/core/emb_table/embedding_ddr.cpp | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index ca48230b..2a8f1548 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -450,7 +450,6 @@ void EmbeddingDDR::SaveKey(const string& savePath) {
         }
     }
 
-    hostKey.insert(hostKey.end(), deviceKey.begin(), deviceKey.end());
     size_t writeSize = static_cast<size_t>(hostKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(hostKey.data()), writeSize);
     if (res == -1) {
@@ -462,6 +461,20 @@ void EmbeddingDDR::SaveKey(const string& savePath) {
                 "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
                 writeSize, res, ss.str()));
     }
+
+    writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
+    res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
+    if (res == -1) {
+        throw runtime_error(
+                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
+    }
+    if (res != writeSize) {
+        throw runtime_error(StringFormat(
+                "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
+                writeSize, res, ss.str()));
+    }
+
+
 }
 
 void EmbeddingDDR::SaveEmbData(const string& savePath)
-- 
Gitee


From 1c468f72a5acf5c4e4bd814f6c27d7c41b81a7f8 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Wed, 8 May 2024 11:31:46 +0800
Subject: [PATCH 122/302] =?UTF-8?q?=E8=9E=8D=E5=90=88=E7=AE=97=E5=AD=90rea?=
 =?UTF-8?q?dme=E5=92=8Crun=E8=84=9A=E6=9C=AC=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md                   | 2 +-
 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh | 8 ++++----
 cust_op/fused_lazy_adam/run.sh                      | 2 ++
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index c42d1bfe..994a7153 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -24,7 +24,7 @@ C算子开发手册[Ascend C算子开发](https://www.hiascend.com/document/deta
 bash run.sh
 ```
 
-注：需先环境中设置CANN相关环境变量，再执行算子编译和安装指令。使用默认路径安装CANN时设置环境变量指令如下：
+注：需先在环境中设置CANN相关环境变量，再执行算子编译和安装指令。使用默认路径安装CANN时设置环境变量指令如下：
 
 ```shell
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
index 3d4af97c..37b00b42 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
@@ -59,9 +59,9 @@ export NPU_HOST_LIB=$ASCEND_HOME_DIR/${arch}-linux/lib64
 
 function main {
     # 1. 清除遗留生成文件和日志文件
-    rm -rf $HOME/ascend/log/*
-    rm ./input/*.bin
-    rm ./output/*.bin
+    rm -rf $HOME/ascend/log/* > /dev/null 2>&1
+    rm ./input/*.bin > /dev/null 2>&1
+    rm ./output/*.bin > /dev/null 2>&1
 
     # 2. 生成输入数据和真值数据
     cd $CURRENT_DIR
@@ -76,7 +76,7 @@ function main {
     cd $CURRENT_DIR; rm -rf build; mkdir -p build; cd build
     cmake ../src
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "ERROR: cmake f ailed!"
         return 1
     fi
     echo "INFO: cmake success!"
diff --git a/cust_op/fused_lazy_adam/run.sh b/cust_op/fused_lazy_adam/run.sh
index ff604cea..63bf7af4 100644
--- a/cust_op/fused_lazy_adam/run.sh
+++ b/cust_op/fused_lazy_adam/run.sh
@@ -14,6 +14,8 @@
 # limitations under the License.
 # ==============================================================================
 
+set -e
+
 source /etc/profile
 
 # 查找msopgen的路径，加入到环境变量PATH中
-- 
Gitee


From 56be32ba3fe81b86a73b3cc181986af9bd9c6ecd Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Wed, 8 May 2024 10:16:07 +0800
Subject: [PATCH 123/302] =?UTF-8?q?hdfs=E4=B8=AD=E7=9A=84read=E3=80=81writ?=
 =?UTF-8?q?e=E5=87=BD=E6=95=B0=E5=8A=A0=E5=9B=BA=EF=BC=8C=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=E5=8A=A0=E8=BD=BD=E4=BF=9D=E5=AD=98=E6=97=A5=E5=BF=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py                         | 15 ++--
 src/core/checkpoint/checkpoint.cpp            | 22 +++--
 src/core/emb_table/embedding_ddr.cpp          | 84 +++++++++----------
 src/core/emb_table/embedding_dynamic.cpp      | 26 +++---
 src/core/emb_table/embedding_static.cpp       | 25 +++---
 .../hdfs_file_system/hdfs_file_system.cpp     | 12 +--
 .../hdfs_file_system/hdfs_wrapper.h           |  4 +
 .../file_system/hdfs_file_system_test.cpp     | 22 -----
 8 files changed, 92 insertions(+), 118 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index e2e58340..d6c1d9e4 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -395,7 +395,7 @@ def save_embedding_data(root_dir, table_name, dump_data_dict, suffix):
     attribute = dict()
     attribute[DataAttr.DATATYPE.value] = data_to_write.dtype.name
     attribute[DataAttr.SHAPE.value] = data_to_write.shape
-    write_binary_data(target_path, suffix, data_to_write, attributes=attribute)
+    write_binary_data(target_path, suffix, data_to_write)
 
 
 def save_feature_mapping_data(root_dir, table_name, dump_data_dict, suffix):
@@ -407,7 +407,7 @@ def save_feature_mapping_data(root_dir, table_name, dump_data_dict, suffix):
     attribute = dict()
     attribute[DataAttr.DATATYPE.value] = data_to_write.dtype.name
     attribute[DataName.THRESHOLD.value] = int(dump_data_dict.get(DataName.THRESHOLD.value))
-    write_binary_data(target_path, suffix, data_to_write, attributes=attribute)
+    write_binary_data(target_path, suffix, data_to_write)
 
 
 def save_offset_data(root_dir, table_name, dump_data_dict, suffix):
@@ -418,7 +418,7 @@ def save_offset_data(root_dir, table_name, dump_data_dict, suffix):
 
     attribute = dict()
     attribute[DataAttr.DATATYPE.value] = data_to_write.dtype.name
-    write_binary_data(target_path, suffix, data_to_write, attributes=attribute)
+    write_binary_data(target_path, suffix, data_to_write)
 
 
 def save_optimizer_state_data(root_dir, table_name, optimizer_name, dump_optimizer_data, suffix):
@@ -429,7 +429,7 @@ def save_optimizer_state_data(root_dir, table_name, optimizer_name, dump_optimiz
         attribute = dict()
         attribute[DataAttr.DATATYPE.value] = data_to_write.dtype.name
         attribute[DataAttr.SHAPE.value] = data_to_write.shape
-        write_binary_data(target_path, suffix, data_to_write, attributes=attribute)
+        write_binary_data(target_path, suffix, data_to_write)
 
 
 def generate_path(*args):
@@ -440,7 +440,7 @@ def generate_file_name(suffix):
     return "slice_%d.data" % suffix, "slice_%d.attribute" % suffix
 
 
-def write_binary_data(writing_path, suffix, data, attributes=None):
+def write_binary_data(writing_path: str, suffix: int, data: np.ndarray):
     try:
         tf.io.gfile.makedirs(writing_path)
     except Exception as err:
@@ -474,7 +474,10 @@ def read_binary_data(reading_path: str, data_name: str, table_name: str, load_of
     with tf.io.gfile.GFile(target_attribute_dir, "rb") as fin:
         validate_read_file(target_attribute_dir)
         attributes = fin.read()
-        attributes = np.fromstring(attributes, dtype=np.int64)
+        try:
+            attributes = np.fromstring(attributes, dtype=np.int64)
+        except ValueError as err:
+            raise RuntimeError(f"get attributes from file {target_attribute_dir} failed.") from err
 
     with tf.io.gfile.GFile(target_data_dir, "rb") as file:
         validate_read_file(target_data_dir)
diff --git a/src/core/checkpoint/checkpoint.cpp b/src/core/checkpoint/checkpoint.cpp
index bbb1fd6c..0fc03feb 100644
--- a/src/core/checkpoint/checkpoint.cpp
+++ b/src/core/checkpoint/checkpoint.cpp
@@ -210,14 +210,13 @@ void Checkpoint::WriteStream(CkptTransData& transData, const string& dataDir, si
     }
 
     if (writeBytesNum == -1) {
-        throw runtime_error(
-                StringFormat("Error: Save data failed. data type: {} .An error occurred while writing file: {}.",
-                             dataType, dataDir));
+        throw runtime_error(StringFormat("Error: Save data failed. data type: {}. "
+                                         "An error occurred while writing file: {}.", dataType, dataDir));
     }
     if (writeBytesNum != dataSize) {
-        throw runtime_error(StringFormat(
-                "Error: Save data failed. data type: {} .Expected to write {} bytes, but actually write {} bytes to file {}.",
-                dataType, dataSize, writeBytesNum, dataDir));
+        throw runtime_error(StringFormat("Error: Save data failed. data type: {} ."
+                                         "Expected to write {} bytes, but actually write {} bytes to file {}.",
+                                         dataType, dataSize, writeBytesNum, dataDir));
     }
 }
 
@@ -336,14 +335,13 @@ void Checkpoint::ReadStream(CkptTransData& transData,
     }
 
     if (readBytesNum == -1) {
-        throw runtime_error(
-                StringFormat("Error: Load data failed. data type: {} .An error occurred while reading file: {}.",
-                             dataType, dataDir));
+        throw runtime_error(StringFormat("Error: Load data failed. data type: {} ."
+                                         "An error occurred while reading file: {}.", dataType, dataDir));
     }
     if (readBytesNum != datasetSize) {
-        throw runtime_error(StringFormat(
-                "Error: Load data failed. data type: {} .Expected to read {} bytes, but actually read {} bytes to file {}.",
-                dataType, datasetSize, readBytesNum, dataDir));
+        throw runtime_error(StringFormat("Error: Load data failed. data type: {} ."
+                                         "Expected to read {} bytes, but actually read {} bytes to file {}.",
+                                         dataType, datasetSize, readBytesNum, dataDir));
     }
 }
 
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 2a8f1548..3d2b77e7 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -305,17 +305,20 @@ void EmbeddingDDR::SetStartCount()
     freeSize_ = devVocabSize;
 }
 
-void EmbeddingDDR::Load(const string& savePath) {
+void EmbeddingDDR::Load(const string& savePath)
+{
     LoadKey(savePath);
     LoadEmbAndOptim(savePath);
 }
 
-void EmbeddingDDR::Save(const string& savePath) {
+void EmbeddingDDR::Save(const string& savePath)
+{
     SaveKey(savePath);
     SaveEmbAndOptim(savePath);
 }
 
-void EmbeddingDDR::LoadKey(const string& savePath) {
+void EmbeddingDDR::LoadKey(const string& savePath)
+{
     stringstream ss;
     ss << savePath << "/" << name << "/key/slice.data";
 
@@ -324,26 +327,25 @@ void EmbeddingDDR::LoadKey(const string& savePath) {
 
     size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
+        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {} is too big.", ss.str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error(
-                StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
+        throw runtime_error(StringFormat("Error: Load keys failed. "
+                                         "failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
     if (res == -1) {
         free(static_cast<void*>(buf));
-        throw runtime_error(
-                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. "
+                                         "An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
         free(static_cast<void*>(buf));
-        throw runtime_error(StringFormat(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
-                res, ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read {} bytes, "
+                                         "but actually read {} bytes to file {}.", fileSize, res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -358,9 +360,9 @@ void EmbeddingDDR::LoadKey(const string& savePath) {
         }
         if (keyCount > devVocabSize + hostVocabSize) {
             free(static_cast<void*>(buf));
-            throw runtime_error(StringFormat(
-                    "Error: Load keys failed. Load key size :{} exceeds the sum of device vocab size and host vocab size: {}.",
-                    keyCount, devVocabSize + hostVocabSize));
+            throw runtime_error(StringFormat("Error: Load keys failed. Load key size :{} , "
+                                             "exceeds the sum of device vocab size and host vocab size: {}.",
+                                             keyCount, devVocabSize + hostVocabSize));
         } else if (keyCount < devVocabSize) {
             loadOffset.push_back(i);
             devOffset2Key[keyCount] = buf[i];
@@ -400,9 +402,8 @@ void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
                                          embedStream.str()));
     }
     if (res != readSize) {
-        throw runtime_error(StringFormat(
-                "Error: Load embeddings failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                readSize, res, embedStream.str()));
+        throw runtime_error(StringFormat("Error: Load embeddings failed. Expected to read {} bytes, "
+                                         "but actually read {} bytes to file {}.", readSize, res, embedStream.str()));
     }
 
     // 读optim
@@ -417,16 +418,16 @@ void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
                                              paramStream.str()));
         }
         if (res != readSize) {
-            throw runtime_error(StringFormat(
-                    "Error: Load embeddings failed. Expected to read {} bytes, but actually read {} bytes to file {}.",
-                    readSize, res, paramStream.str()));
+            throw runtime_error(StringFormat("Error: Load embeddings failed. Expected to read {} bytes, "
+                                             "but actually read {} bytes to file {}.",
+                                             readSize, res, paramStream.str()));
         }
         optimIndex++;
     }
 }
 
-
-void EmbeddingDDR::SaveKey(const string& savePath) {
+void EmbeddingDDR::SaveKey(const string& savePath)
+{
     stringstream ss;
     ss << savePath << "/" << name << "/key/";
     MakeDir(ss.str());
@@ -453,28 +454,24 @@ void EmbeddingDDR::SaveKey(const string& savePath) {
     size_t writeSize = static_cast<size_t>(hostKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(hostKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error(
-                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. "
+                                         "An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(StringFormat(
-                "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write {} bytes, "
+                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
     }
 
     writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
     res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error(
-                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. "
+                                         "An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(StringFormat(
-                "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write {} bytes, "
+                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
     }
-
-
 }
 
 void EmbeddingDDR::SaveEmbData(const string& savePath)
@@ -490,13 +487,12 @@ void EmbeddingDDR::SaveEmbData(const string& savePath)
     size_t writeSize = embSize_ * sizeof(float) * embContent.size();
     ssize_t res = fileSystemPtr->Write(ss.str(), embContent, embSize_ * sizeof(float));
     if (res == -1) {
-        throw runtime_error(
-                StringFormat("Error: Save embeddings failed. An error occurred while writing file: {}.", ss.str()));
+        throw runtime_error(StringFormat("Error: Save embeddings failed. "
+                                         "An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(StringFormat(
-                "Error: Save embeddings failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Save embeddings failed. Expected to write {} bytes, "
+                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
     }
 }
 
@@ -513,15 +509,13 @@ void EmbeddingDDR::SaveOptimData(const string& savePath)
 
         size_t writeSize = embSize_ * sizeof(float) * content.second.size();
         ssize_t res = fileSystemPtr->Write(ss.str(), content.second, embSize_ * sizeof(float));
-
         if (res == -1) {
-            throw runtime_error(
-                    StringFormat("Error: Save optimizers failed. An error occurred while writing file: {}.", ss.str()));
+            throw runtime_error(StringFormat("Error: Save optimizers failed. "
+                                             "An error occurred while writing file: {}.", ss.str()));
         }
         if (res != writeSize) {
-            throw runtime_error(StringFormat(
-                    "Error: Save optimizers failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                    writeSize, res, ss.str()));
+            throw runtime_error(StringFormat("Error: Save optimizers failed. Expected to write {} bytes, "
+                                             "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
         }
     }
 }
diff --git a/src/core/emb_table/embedding_dynamic.cpp b/src/core/emb_table/embedding_dynamic.cpp
index 706f399e..bca77178 100644
--- a/src/core/emb_table/embedding_dynamic.cpp
+++ b/src/core/emb_table/embedding_dynamic.cpp
@@ -153,13 +153,12 @@ void EmbeddingDynamic::SaveKey(const string& savePath)
     size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error(
-                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. "
+                                         "An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(StringFormat(
-                "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write {} bytes, "
+                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
     }
 }
 
@@ -261,19 +260,18 @@ void EmbeddingDynamic::LoadKey(const string& savePath)
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error(
-                StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
+        throw runtime_error(StringFormat("Error: Load keys failed. "
+                                         "failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
     if (res == -1) {
-        throw runtime_error(
-                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. "
+                                         "An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
-        throw runtime_error(StringFormat(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
-                res, ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read {} bytes, "
+                                         "but actually read {} bytes to file {}.", fileSize, res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -291,8 +289,8 @@ void EmbeddingDynamic::LoadKey(const string& savePath)
     void *newBlock = nullptr;
     aclError ret = aclrtMalloc(&newBlock, static_cast<int>(datasetSize), ACL_MEM_MALLOC_HUGE_FIRST);
     if (ret != ACL_SUCCESS) {
-        throw runtime_error(
-                StringFormat("Error: in dynamic expansion mode,  aclrtMalloc failed, malloc size: {}.", datasetSize));
+        throw runtime_error(StringFormat("Error: in dynamic expansion mode, "
+                                         "aclrtMalloc failed, malloc size: {}.", datasetSize));
     }
     // 此处的 newBlock -> first address;
     // 对key_offset map 进行一个恢复操作
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index f80f076a..312b8a77 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -97,13 +97,12 @@ void EmbeddingStatic::SaveKey(const string& savePath)
     size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
     ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
-        throw runtime_error(
-                StringFormat("Error: Save keys failed. An error occurred while writing file: {}.", ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. "
+                                         "An error occurred while writing file: {}.", ss.str()));
     }
     if (res != writeSize) {
-        throw runtime_error(StringFormat(
-                "Error: Save keys failed. Expected to write {} bytes, but actually write {} bytes to file {}.",
-                writeSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write {} bytes, "
+                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
     }
 }
 
@@ -112,7 +111,7 @@ void EmbeddingStatic::Load(const string& savePath)
     LoadKey(savePath);
 }
 
-void EmbeddingStatic::LoadKey(const string &savePath)
+void EmbeddingStatic::LoadKey(const string& savePath)
 {
     stringstream ss;
     ss << savePath << "/" << name << "/key/slice.data";
@@ -125,20 +124,20 @@ void EmbeddingStatic::LoadKey(const string &savePath)
         throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
 
-    int64_t* buf = static_cast<int64_t *>(malloc(fileSize));
+    int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error(StringFormat("Error: Load keys failed. failed to allocate {} bytes using malloc.", fileSize));
+        throw runtime_error(StringFormat("Error: Load keys failed. "
+                                         "failed to allocate {} bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
     if (res == -1) {
-        throw runtime_error(
-                StringFormat("Error: Load keys failed. An error occurred while reading file: {}.", ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. "
+                                         "An error occurred while reading file: {}.", ss.str()));
     }
     if (res != fileSize) {
-        throw runtime_error(StringFormat(
-                "Error: Load keys failed. Expected to read {} bytes, but actually read {} bytes to file {}.", fileSize,
-                res, ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read {} bytes, "
+                                         "but actually read {} bytes to file {}.", fileSize, res, ss.str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
index 704a89b5..2c463115 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
@@ -162,9 +162,9 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
         if (res != embeddingSize * sizeof(float)) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
-            throw runtime_error(
-                    StringFormat("Error: Expected to write {} bytes, but actually write {} bytes to file {}.",
-                                 embeddingSize * sizeof(float), res, filePath.c_str()));
+            throw runtime_error(StringFormat("Error: Expected to write {} bytes, "
+                                             "but actually write {} bytes to file {}.",
+                                             embeddingSize * sizeof(float), res, filePath.c_str()));
         }
     }
 #endif
@@ -267,9 +267,9 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
         if (res != embedSizeInfo.embeddingSize * sizeof(float)) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
-            throw runtime_error(
-                    StringFormat("Error: Expected to read {} bytes, but actually read {} bytes from file {}.",
-                                 embedSizeInfo.embeddingSize * sizeof(float), res, filePath.c_str()));
+            throw runtime_error(StringFormat("Error: Expected to read {} bytes, "
+                                             "but actually read {} bytes from file {}.",
+                                             embedSizeInfo.embeddingSize * sizeof(float), res, filePath.c_str()));
         }
 
         aclError ret = aclrtMemcpy(floatPtr + i * embedSizeInfo.extendEmbSize,
diff --git a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
index 6ba0d7bb..6b9fe19c 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
+++ b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
@@ -152,6 +152,7 @@ namespace MxRec {
                 }
                 unReadLength -= res;
                 readBytes += res;
+                reTryCount++;
             }
             return readBytes;
         }
@@ -174,6 +175,7 @@ namespace MxRec {
                 }
                 unReadLength -= res;
                 readBytes += res;
+                reTryCount++;
             }
             return readBytes;
         }
@@ -195,6 +197,7 @@ namespace MxRec {
                 }
                 unWriteLength -= res;
                 writeBytes += res;
+                reTryCount++;
             }
             return writeBytes;
         }
@@ -216,6 +219,7 @@ namespace MxRec {
                 }
                 unWriteLength -= res;
                 writeBytes += res;
+                reTryCount++;
             }
             return writeBytes;
         }
diff --git a/src/tests/file_system/hdfs_file_system_test.cpp b/src/tests/file_system/hdfs_file_system_test.cpp
index a8c8bbf5..3794d14d 100644
--- a/src/tests/file_system/hdfs_file_system_test.cpp
+++ b/src/tests/file_system/hdfs_file_system_test.cpp
@@ -38,8 +38,6 @@ void MockHdfs()
     EMOCK(&HdfsWrapper::FreeFileInfo).stubs().will(ignoreReturnValue());
     EMOCK(&HdfsWrapper::OpenFile).stubs().will(returnValue(hdfsFileHandler));
     EMOCK(&HdfsWrapper::CloseFile).stubs().will(returnValue(1));
-    EMOCK(&HdfsWrapper::Write).stubs().will(returnValue(1));
-    EMOCK(&HdfsWrapper::Read).stubs().will(returnValue(1));
     EMOCK(&HdfsWrapper::Seek).stubs().will(returnValue(1));
 }
 
@@ -86,23 +84,3 @@ TEST_F(HdfsFileSystemTest, GetFileSize)
     EXPECT_NO_THROW(fileSystemPtr->GetFileSize(filePath));
 }
 
-TEST_F(HdfsFileSystemTest, testCase)
-{
-    string filePath = "hdfs://master:9000/test_dir/";
-    auto fileSystemHandler = make_unique<FileSystemHandler>();
-    auto fileSystemPtr = fileSystemHandler->Create(filePath);
-
-    vector<string> dirs;
-    dirs = fileSystemPtr->ListDir(filePath);
-    EXPECT_EQ(dirs.size(), 0);
-
-    vector<int64_t> writeData = {0, 1, 2, 3, 4, 5};
-    size_t testDataSize = writeData.size() * sizeof(int64_t);
-    EXPECT_NO_THROW(fileSystemPtr->Write(filePath, reinterpret_cast<const char *>(writeData.data()), testDataSize));
-    float p[5] = {1.1, 2.2, 3.3, 4.4, 5.5};
-    vector<float*> writeData1 = {p, p+1, p+2, p+3, p+4};
-    EXPECT_NO_THROW(fileSystemPtr->Write(filePath, writeData1, sizeof(float)));
-
-    vector<int64_t> readData = {};
-    EXPECT_NO_THROW(fileSystemPtr->Read(filePath, reinterpret_cast<char*>(readData.data()), 1));
-}
\ No newline at end of file
-- 
Gitee


From 4b7de4c286ad018e5b0fa830b93ee3b47928b0ca Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Wed, 8 May 2024 17:52:50 +0800
Subject: [PATCH 124/302] =?UTF-8?q?=E8=9E=8D=E5=90=88=E7=AE=97=E5=AD=90rea?=
 =?UTF-8?q?dme=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index 994a7153..7aa64218 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -54,6 +54,7 @@ b) 算子参数说明：
 * inputVar: embedding表对应的variable数据；计算结果原地更新；
   c) 算子约束说明：
 * 支持的型号：Atlas A2系列产品;
+* 支持的CANN版本：8.0.RC1及之后版本；
 * 支持的输入数据类型：float32；
 * embedding表的dim值需要是8的倍数；
 
-- 
Gitee


From 57116797d207dd8a730a13f07f643268dbfc9abb Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Wed, 8 May 2024 17:54:43 +0800
Subject: [PATCH 125/302] =?UTF-8?q?=E8=9E=8D=E5=90=88=E7=AE=97=E5=AD=90rea?=
 =?UTF-8?q?dme=E4=BF=AE=E6=94=B92?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
index 37b00b42..b44855df 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/run.sh
@@ -76,7 +76,7 @@ function main {
     cd $CURRENT_DIR; rm -rf build; mkdir -p build; cd build
     cmake ../src
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake f ailed!"
+        echo "ERROR: cmake failed!"
         return 1
     fi
     echo "INFO: cmake success!"
-- 
Gitee


From 0989313f37ad2fb2dfb95df19bbfd77d8069e1a2 Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Wed, 8 May 2024 19:51:07 +0800
Subject: [PATCH 126/302] cleancode

---
 mx_rec/core/asc/swap_args.py             |  1 +
 mx_rec/core/emb/base_sparse_embedding.py | 26 ++++++++++++------------
 mx_rec/graph/modifier.py                 |  2 +-
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/mx_rec/core/asc/swap_args.py b/mx_rec/core/asc/swap_args.py
index 4494cc26..5bcba234 100644
--- a/mx_rec/core/asc/swap_args.py
+++ b/mx_rec/core/asc/swap_args.py
@@ -27,6 +27,7 @@ class SwapDataType(Enum):
 def singleton(cls):
     _instance = {}
 
+    @functools.wraps(cls)
     def inner():
         if cls not in _instance:
             _instance[cls] = cls()
diff --git a/mx_rec/core/emb/base_sparse_embedding.py b/mx_rec/core/emb/base_sparse_embedding.py
index 2a52b3a6..2c29f9c8 100644
--- a/mx_rec/core/emb/base_sparse_embedding.py
+++ b/mx_rec/core/emb/base_sparse_embedding.py
@@ -263,19 +263,6 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
         """
         self._multi_lookup_times[is_training] = self._multi_lookup_times.get(is_training) + 1
 
-    def _set_ext_emb_size(self):
-        # 初始设置_ext_emb_size等于_emb_size，改图阶段会根据优化器的不同而exchange该值
-        self._ext_emb_size = self._emb_size * self._ext_coefficient
-        logger.debug("Init table, ext_emb_size is set to be %s.", self._ext_emb_size)
-
-    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, channel_id: int, send_count: Optional[int]) -> dict:
-        config = dict(batch_size=feature_spec.batch_size, feat_cnt=feature_spec.feat_cnt, send_count=send_count,
-                      rank_size=self._rank_size, channel_id=channel_id, table_name=self._table_name,
-                      is_hbm=self._is_hbm, ext_emb_size=self._ext_emb_size, emb_size=self._emb_size,
-                      use_dynamic_expansion=ConfigInitializer.get_instance().use_dynamic_expansion)
-
-        return get_preprocessed_tensor_for_asc(self._variable, config)
-
     def lookup(self, ids: tf.Tensor, send_count: Optional[int], **kwargs) -> tf.Tensor:
         """
         稀疏表的lookup，自动改图模式.
@@ -388,6 +375,19 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
             return tf.stop_gradient(self._lookup_result.get(spec_name).get(is_training), name="stop_grad_lookup_res")
         return self._lookup_result.get(spec_name).get(is_training)
 
+    def _set_ext_emb_size(self):
+        # 初始设置_ext_emb_size等于_emb_size，改图阶段会根据优化器的不同而exchange该值
+        self._ext_emb_size = self._emb_size * self._ext_coefficient
+        logger.debug("Init table, ext_emb_size is set to be %s.", self._ext_emb_size)
+
+    def _get_preprocessed_tensor(self, feature_spec: FeatureSpec, channel_id: int, send_count: Optional[int]) -> dict:
+        config = dict(batch_size=feature_spec.batch_size, feat_cnt=feature_spec.feat_cnt, send_count=send_count,
+                      rank_size=self._rank_size, channel_id=channel_id, table_name=self._table_name,
+                      is_hbm=self._is_hbm, ext_emb_size=self._ext_emb_size, emb_size=self._emb_size,
+                      use_dynamic_expansion=ConfigInitializer.get_instance().use_dynamic_expansion)
+
+        return get_preprocessed_tensor_for_asc(self._variable, config)
+
     def _lookup_forward(self, feature_spec: FeatureSpec, send_count: Optional[int], **kwargs) -> tf.Tensor:
         is_training = kwargs.get("is_train")
         hashtable_params = dict(slice_device_vocabulary_size=self._slice_device_vocabulary_size,
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index 72772c5f..33c4b958 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -644,7 +644,7 @@ def get_variable_and_slot_list(each_var, slot_num, table_name, channel_id):
     # predict不需要传优化器，但是如果客户创建了优化器，ddr模式加载的是维度ext_size的emb用作换入换出，所以需要给slot零值占位
     if optimizer is None and channel_id == 1:
         slot_place_holder = tf.zeros_like(each_var)
-        for i in range(slot_num):
+        for _ in range(slot_num):
             variable_and_slot_list.append(slot_place_holder)
     else:
         # opt name to slot dict
-- 
Gitee


From 2c6746af79208e34df9848f74f1b621acb22758c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 8 May 2024 13:42:28 +0000
Subject: [PATCH 127/302] =?UTF-8?q?!127=20hot=E6=A8=A1=E5=BC=8F=E7=9A=84?=
 =?UTF-8?q?=E9=9D=99=E6=80=81shape=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91hot?=
 =?UTF-8?q?=20size=E9=9D=99=E6=80=81=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91hot?=
 =?UTF-8?q?=20size=E9=9D=99=E6=80=81=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91hot?=
 =?UTF-8?q?=20size=E9=9D=99=E6=80=81=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91hot?=
 =?UTF-8?q?=20size=E9=9D=99=E6=80=81=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91hot?=
 =?UTF-8?q?=20size=E9=9D=99=E6=80=81=E4=BF=AE=E5=A4=8D=20*=20Merge=20remot?=
 =?UTF-8?q?e-tracking=20branch=20'upstream/develop'=20into=20develop-ddr-w?=
 =?UTF-8?q?itho=E2=80=A6=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91hot=20size=E9=9D=99=E6=80=81?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91clean=20code=E5=92=8C=E8=85=BE?=
 =?UTF-8?q?=E8=AE=AFeval=E9=83=A8=E5=88=86=E6=94=B9=E5=9B=BE=E7=9A=84?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=20*=20Merge=20remote-tracking=20branch=20'up?=
 =?UTF-8?q?stream/develop'=20into=20develop-ddr-witho=E2=80=A6=20*=20Merge?=
 =?UTF-8?q?=20remote-tracking=20branch=20'upstream/develop'=20into=20devel?=
 =?UTF-8?q?op-ddr-witho=E2=80=A6=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91slot=E5=92=8Cderivati?=
 =?UTF-8?q?ve=E7=A7=BB=E8=87=B3=E4=B8=8A=E5=B1=82base=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91cre?=
 =?UTF-8?q?ate=5Ftable=E6=8E=A5=E5=8F=A3=E4=B8=8E=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=99=A8=E5=88=9B=E5=BB=BA=E8=A7=A3=E8=80=A6=20*=20Merge=20rem?=
 =?UTF-8?q?ote-tracking=20branch=20'origin/develop-global-unique'=20into?=
 =?UTF-8?q?=20devel=E2=80=A6=20*=20Merge=20remote-tracking=20branch=20'ups?=
 =?UTF-8?q?tream/develop'=20into=20develop-ddr-witho=E2=80=A6=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr?=
 =?UTF-8?q?=20without=20optimizer=20for=20fp=20*=20=E3=80=90=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr=20withou?=
 =?UTF-8?q?t=20optimizer=20for=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr=20without=20optim?=
 =?UTF-8?q?izer=20for=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91ddr=20without=20optimizer=20fo?=
 =?UTF-8?q?r=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Mo?=
 =?UTF-8?q?dification=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20Merge=20rem?=
 =?UTF-8?q?ote-tracking=20branch=20'upstream/develop'=20into=20develop-ddr?=
 =?UTF-8?q?-witho=E2=80=A6=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91ddr=20without=20optimizer=20fo?=
 =?UTF-8?q?r=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Mo?=
 =?UTF-8?q?dification=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91ddr=20without=20optimizer=20for=20fp=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr?=
 =?UTF-8?q?=20without=20optimizer=20for=20fp=20*=20=E3=80=90=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr=20withou?=
 =?UTF-8?q?t=20optimizer=20for=20fp=20*=20Merge=20remote-tracking=20branch?=
 =?UTF-8?q?=20'upstream/develop'=20into=20develop-ddr-witho=E2=80=A6=20*?=
 =?UTF-8?q?=20Merge=20remote-tracking=20branch=20'origin/develop-ddr-witho?=
 =?UTF-8?q?ut-optimizer'=20in=E2=80=A6=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91ddr=20without=20optim?=
 =?UTF-8?q?izer=20for=20fp=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91ddr=20without=20optimizer=20fo?=
 =?UTF-8?q?r=20fp?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/asc/build_graph.py             |   6 +-
 mx_rec/core/emb/base_sparse_embedding.py   |   3 +-
 src/core/key_process/key_process.cpp       |   6 +-
 src/tests/key_process/key_process_test.cpp | 174 ---------------------
 tests/mx_rec/core/test_build_graph.py      |  24 ++-
 5 files changed, 23 insertions(+), 190 deletions(-)

diff --git a/mx_rec/core/asc/build_graph.py b/mx_rec/core/asc/build_graph.py
index 82e40b29..46dbf193 100644
--- a/mx_rec/core/asc/build_graph.py
+++ b/mx_rec/core/asc/build_graph.py
@@ -39,15 +39,17 @@ def get_restore_vector(config):
             raise TypeError("ext_emb_size must be a int")
         if config.get("ext_emb_size") < 1:
             raise ValueError("ext_emb_size is less than 1")
-        emb_size = None
+        emb_size = config.get("emb_size")
 
     if ConfigInitializer.get_instance().use_static:
         restore_size = config.get("batch_size") * config.get("feat_cnt")
+        device_id = int(config.get("device_id"))
+        hot_size = int(mxrec_pybind.get_ub_hot_size(device_id) / emb_size)
     else:
         restore_size = None
+        hot_size = None
 
     with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
-        hot_size = None
         restore_vector, hot_pos = npu_ops.gen_npu_ops.get_next(
             output_types=[tf.int32, tf.int32],
             output_shapes=[restore_size, [hot_size]],
diff --git a/mx_rec/core/emb/base_sparse_embedding.py b/mx_rec/core/emb/base_sparse_embedding.py
index 2a52b3a6..a654629a 100644
--- a/mx_rec/core/emb/base_sparse_embedding.py
+++ b/mx_rec/core/emb/base_sparse_embedding.py
@@ -272,7 +272,8 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
         config = dict(batch_size=feature_spec.batch_size, feat_cnt=feature_spec.feat_cnt, send_count=send_count,
                       rank_size=self._rank_size, channel_id=channel_id, table_name=self._table_name,
                       is_hbm=self._is_hbm, ext_emb_size=self._ext_emb_size, emb_size=self._emb_size,
-                      use_dynamic_expansion=ConfigInitializer.get_instance().use_dynamic_expansion)
+                      use_dynamic_expansion=ConfigInitializer.get_instance().use_dynamic_expansion,
+                      device_id=self._device_id)
 
         return get_preprocessed_tensor_for_asc(self._variable, config)
 
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index b2dfab04..22148581 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -129,12 +129,8 @@ int KeyProcess::Start()
 
 void KeyProcess::InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo)
 {
-    int embeddingSize = info.extEmbeddingSize;
-    if (rankInfo.useDynamicExpansion) {
-        embeddingSize = info.embeddingSize;
-    }
     hotEmbTotCount[info.name] = static_cast<int>(static_cast<float>(GetUBSize(rInfo.deviceId) / sizeof(float)) *
-                                                 HOT_EMB_CACHE_PCT / static_cast<float>(embeddingSize));
+                                                 HOT_EMB_CACHE_PCT / static_cast<float>(info.embeddingSize));
 }
 
 OffsetMemT KeyProcess::GetMaxOffset()
diff --git a/src/tests/key_process/key_process_test.cpp b/src/tests/key_process/key_process_test.cpp
index 86ec3f80..a68f4787 100644
--- a/src/tests/key_process/key_process_test.cpp
+++ b/src/tests/key_process/key_process_test.cpp
@@ -428,34 +428,6 @@ TEST_F(KeyProcessTest, PaddingHashSplitWithFAAE)
     }
 }
 
-TEST_F(KeyProcessTest, HotHashSplit)
-{
-    PrepareBatch();
-    ASSERT_EQ(process.Initialize(rankInfo, embInfos), true);
-    LOG_INFO("CPU Core Num: %{}", sysconf(_SC_NPROCESSORS_CONF)); // 查看CPU核数
-
-    auto fn = [this](int channel, int id) {
-        auto embName = embInfos[0].name;
-        process.hotEmbTotCount[embName] = 10;
-        vector<KeysT> splitKeys;
-        vector<int32_t> restore;
-        vector<int32_t> hotPos;
-        unique_ptr<EmbBatchT> batch;
-        batch = process.GetBatchData(channel, id); // get batch data from SingletonQueue<EmbBatchT>
-        LOG_INFO("rankid :{},batchid: {}", rankInfo.rankId, batch->batchId);
-        tie(splitKeys, restore, hotPos) = process.HotHashSplit(batch);
-        LOG_INFO("rankid :{},batchid: {}, hotPos {}", rankInfo.rankId, batch->batchId, VectorToString(hotPos));
-    }; // for clean code
-    for (int channel = 0; channel < 1; ++channel) {
-        for (int id = 0; id < 1; ++id) {
-        // use lambda expression initialize thread
-            process.procThreads.emplace_back(std::make_unique<std::thread>(fn, channel, id));
-        }
-    }
-    this_thread::sleep_for(10s);
-    process.Destroy();
-}
-
 TEST_F(KeyProcessTest, GetScAll)
 {
     vector<int> keyScLocal(worldSize, worldRank + 1); // 用worldRank+1初始化发送数据量
@@ -527,38 +499,6 @@ TEST_F(KeyProcessTest, BuildRestoreVec_4cpu)
     ASSERT_THAT(restore, ElementsAreArray(allExpectRestore[worldRank]));
 }
 
-// hot模式，batch随机数，ProcessSplitKeys后人为校验lookupKeys、scAll、restore
-TEST_F(KeyProcessTest, BuildRestoreVec_rebuilt)
-{
-    PrepareBatch();
-    ASSERT_EQ(process.Initialize(rankInfo, embInfos), true);
-    LOG_INFO("CPU Core Num: {}", sysconf(_SC_NPROCESSORS_CONF)); // 查看CPU核数
-
-    auto fn = [this](int channel, int id) {
-        auto embName = embInfos[0].name;
-        vector<KeysT> splitKeys;
-        vector<int32_t> restore;
-        vector<int32_t> hotPos;
-        unique_ptr<EmbBatchT> batch;
-        batch = process.GetBatchData(channel, id); // get batch data from SingletonQueue<EmbBatchT>
-        LOG_INFO("rankid :{}, batchid: {}", rankInfo.rankId, batch->batchId);
-        tie(splitKeys, restore, hotPos) = process.HotHashSplit(batch);
-        auto [lookupKeys, scAll, ss] = process.ProcessSplitKeys(batch, id, splitKeys);
-        process.BuildRestoreVec(batch, ss, restore, hotPos.size());
-        LOG_INFO("rankid :{}, batchid: {}, lookupKeys: {}, scAll: {}, restore after build {}",
-                 rankInfo.rankId, batch->batchId, VectorToString(lookupKeys),
-                 VectorToString(scAll), VectorToString(restore));
-    }; // for clean code
-    for (int channel = 0; channel < 1; ++channel) {
-        for (int id = 0; id < KEY_PROCESS_THREAD; ++id) {
-            // use lambda expression initialize thread
-            process.procThreads.emplace_back(std::make_unique<std::thread>(fn, channel, id));
-        }
-    }
-    this_thread::sleep_for(10s);
-    process.Destroy();
-}
-
 // 准入模式，batch随机数，ProcessSplitKeys后人为校验lookupKeys、scAll、count
 TEST_F(KeyProcessTest, GetCountRecv)
 {
@@ -638,120 +578,6 @@ TEST_F(KeyProcessTest, GetUniqueConfig)
     process.GetUniqueConfig(uniqueConf);
 }
 
-// HBM端到端测试，动态shape，固定batch输入
-TEST_F(KeyProcessTest, KeyProcessTaskHelper)
-{
-    rankInfo.isDDR = false;
-    rankInfo.useStatic = false;
-    rankInfo.useDynamicExpansion = false;
-    EmbeddingMgmt::Instance()->Init(rankInfo, embInfos);
-    ASSERT_EQ(process.Initialize(rankInfo, embInfos), true);
-    ASSERT_EQ(process.isRunning, true);
-    int batchId = 0;
-    int channelId = 0;
-    auto batch = GenBatch(embInfos[0].name, batchId, channelId); // 测试一个表
-
-    LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, batchSize: {}",
-             rankInfo.rankId, batch->batchId, batch->sample.size());
-
-    ASSERT_EQ(process.KeyProcessTaskHelper(batch, channelId, 0), true); // threadId = 0
-    auto infoVecs = process.GetInfoVec(batchId, embInfos[0].name, channelId, ProcessedInfo::RESTORE);
-    ASSERT_NE(infoVecs, nullptr);
-    auto all2all = process.GetInfoVec(batchId, embInfos[0].name, channelId, ProcessedInfo::ALL2ALL);
-    ASSERT_NE(all2all, nullptr);
-
-    ASSERT_EQ(CheckMatrixTensor(*all2all, allExpectAll2all), true);
-    ASSERT_EQ(CheckFlatTensor({infoVecs->back()}, allExpectOffset[worldRank]), true);
-    infoVecs->pop_back();
-    int64_t hotPosition = process.hotEmbTotCount[batch->name];
-    vector<int64_t> expectRestore(allExpectRestore[worldRank].size());
-    for (int i = 0; i < expectRestore.size(); i++) {
-        expectRestore[i] = allExpectRestore[worldRank][i] + hotPosition;
-    }
-    ASSERT_EQ(CheckFlatTensor(*infoVecs, expectRestore), true);
-    LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, normal status success", rankInfo.rankId, batch->batchId);
-    // 测试batchId错误
-    HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
-    hybridMgmtBlock->hybridBatchId[0] = 1;
-    ASSERT_EQ(process.GetInfoVec(batchId, embInfos[0].name, channelId, ProcessedInfo::RESTORE), nullptr);
-    LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, batchId exception success",
-             rankInfo.rankId, batch->batchId);
-    // 测试empty场景
-    hybridMgmtBlock->pythonBatchId[1] = 1;
-    hybridMgmtBlock->hybridBatchId[1] = 1;
-    hybridMgmtBlock->readEmbedBatchId[1] = 1;
-    hybridMgmtBlock->loop[1] = 1;
-    ASSERT_EQ(process.GetInfoVec(batchId + 1, embInfos[0].name, channelId + 1, ProcessedInfo::RESTORE), nullptr);
-    LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, batch empty success", rankInfo.rankId, batch->batchId);
-    // eos
-    process.SetEos(1, 1);
-    ASSERT_EQ(process.GetInfoVec(batchId + 1, embInfos[0].name, channelId + 1, ProcessedInfo::RESTORE), nullptr);
-    LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, eos status success", rankInfo.rankId, batch->batchId);
-    this_thread::sleep_for(10s);
-    process.Destroy();
-}
-
-// DDR端到端测试，静态shape，固定batch输入
-TEST_F(KeyProcessTest, KeyProcessTaskHelperDDR)
-{
-    rankInfo.isDDR = true;
-    rankInfo.useStatic = true;
-    rankInfo.useDynamicExpansion = false;
-    EmbeddingMgmt::Instance()->Init(rankInfo, embInfos);
-    ASSERT_EQ(process.Initialize(rankInfo, embInfos), true);
-    ASSERT_EQ(process.isRunning, true);
-    int batchId = 0;
-    int channelId = 0;
-    auto batch = GenBatch(embInfos[0].name, batchId, channelId); // 测试第一个表
-    HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
-    hybridMgmtBlock->hybridBatchId[0] = 0;
-    LOG_INFO("KeyProcessTaskHelperDDR, rankid: {}, batchid: {}", rankInfo.rankId, batch->batchId);
-
-    ASSERT_EQ(process.KeyProcessTaskHelper(batch, channelId, 0), true); // threadId = 0
-
-    auto lookupKeys = process.GetLookupKeys(batchId, embInfos[0].name, channelId); // lookup list返回的不是tensor
-    ASSERT_EQ(lookupKeys.size(), sendCount * worldSize);
-    LOG_INFO("KeyProcessTaskHelperDDR, rankid: {}, batchid: {}, lookupKeys: {}",
-             rankInfo.rankId, batch->batchId, VectorToString(lookupKeys));
-    ASSERT_EQ(CheckPaddingVec(lookupKeys, allExpectLookupKeys[worldRank]), true);
-
-    auto infoVecs = process.GetInfoVec(batchId, embInfos[0].name, channelId, ProcessedInfo::RESTORE);
-    ASSERT_NE(infoVecs, nullptr);
-    int col = allExpectRestore[worldRank].size();
-    auto tmpTensor = (*infoVecs).at(0);
-    auto tmpData = tmpTensor.flat<int32>();
-
-    int64_t hotPosition = process.hotEmbTotCount[batch->name];
-    vector<int> actualGetRestore(col);
-    for (int j = 0; j < col; j++) {
-        actualGetRestore[j] = tmpData(j)-hotPosition;
-    }
-    LOG_INFO("KeyProcessTaskHelperDDR, rankid: {}, batchid: {}, Restore: {}",
-             rankInfo.rankId, batch->batchId, VectorToString(actualGetRestore));
-    ASSERT_THAT(actualGetRestore, ElementsAreArray(allExpectRestoreStatic[worldRank]));
-    LOG_INFO("KeyProcessTaskHelperDDR, rankid: {}, batchid: {}, normal status success",
-             rankInfo.rankId, batch->batchId);
-
-    // 测试batchId错误
-    hybridMgmtBlock->hybridBatchId[0] = 1;
-    ASSERT_EQ(process.GetLookupKeys(batchId, embInfos[0].name, channelId).empty(), true);
-    LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, batchId exception success",
-             rankInfo.rankId, batch->batchId);
-    // 测试empty场景
-    hybridMgmtBlock->pythonBatchId[1] = 1;
-    hybridMgmtBlock->hybridBatchId[1] = 1;
-    hybridMgmtBlock->readEmbedBatchId[1] = 1;
-    hybridMgmtBlock->loop[1] = 1;
-    ASSERT_EQ(process.GetLookupKeys(batchId + 1, embInfos[0].name, channelId + 1).empty(), true);
-    LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, batch empty success", rankInfo.rankId, batch->batchId);
-    // eos
-    process.SetEos(1, 1);
-    ASSERT_EQ(process.GetLookupKeys(batchId + 1, embInfos[0].name, channelId + 1).empty(), true);
-    LOG_INFO("KeyProcessTaskHelper, rankid: {}, batchid: {}, eos status success", rankInfo.rankId, batch->batchId);
-    this_thread::sleep_for(10s);
-    process.Destroy();
-}
-
 TEST_F(KeyProcessTest, InitializeUnique)
 {
     ASSERT_EQ(ock::ctr::Factory::Create(factory), -1);
diff --git a/tests/mx_rec/core/test_build_graph.py b/tests/mx_rec/core/test_build_graph.py
index c5766179..5360f908 100644
--- a/tests/mx_rec/core/test_build_graph.py
+++ b/tests/mx_rec/core/test_build_graph.py
@@ -32,12 +32,14 @@ class TestGetRestoreVectorFunc(unittest.TestCase):
     def setUp(self):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_dynamic_expansion=True)
 
     def test_get_restore_vector_case1(self):
         """
@@ -112,13 +114,15 @@ class TestGetIdOffsetsFunc(unittest.TestCase):
     def setUp(self):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_dynamic_expansion=True)
         self.max_lookup_vec_size = self.config.get("send_count") * self.config.get("rank_size")
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_dynamic_expansion=True)
 
     @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
     def test_get_id_offsets_case1(self, mock_get_next):
@@ -160,12 +164,14 @@ class TestGetAll2allArgsFunc(unittest.TestCase):
     def setUp(self):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_dynamic_expansion=True)
 
     def test_get_all2all_args_case1(self):
         """
@@ -200,12 +206,14 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
     def setUp(self):
         # 默认动态扩容、hot emb、HBM
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_dynamic_expansion=True)
 
     def tearDown(self):
         # 恢复config
         self.config = dict(table_name="test_table", channel_id=0, is_hbm=True, emb_size=8, ext_emb_size=8,
-                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, use_dynamic_expansion=True)
+                           feat_cnt=8, batch_size=32, rank_size=8, send_count=1, device_id=0,
+                           use_dynamic_expansion=True)
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
-- 
Gitee


From c8355ad1fde2e9828da0ed8edb85aaa99f154b86 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 9 May 2024 15:34:19 +0800
Subject: [PATCH 128/302] =?UTF-8?q?dockerfile=E5=8F=96=E6=B6=88=E8=AE=BE?=
 =?UTF-8?q?=E7=BD=AECC=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/build_mxRec_images/centos_build/Dockerfile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/build_mxRec_images/centos_build/Dockerfile b/docs/build_mxRec_images/centos_build/Dockerfile
index ee1d98e8..4e21166c 100644
--- a/docs/build_mxRec_images/centos_build/Dockerfile
+++ b/docs/build_mxRec_images/centos_build/Dockerfile
@@ -130,6 +130,9 @@ RUN pip3.7 install -U pip && \
     pip3.7 install h5py==3.1.0 && \
     rm -rf /root/.cache/pip
 
+# 安装mpi4py时使用该环境变了，安装完成后取消
+RUN unset CC
+
 # 10.设置驱动路径环境变量
 ARG ASCEND_BASE=/usr/local/Ascend
 ENV LD_LIBRARY_PATH=$ASCEND_BASE/driver/lib64:$ASCEND_BASE/driver/lib64/common:$ASCEND_BASE/driver/lib64/driver:$LD_LIBRARY_PATH
-- 
Gitee


From c9561815ded4b64c83863c09e247f1b966aa39e5 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 9 May 2024 15:38:45 +0800
Subject: [PATCH 129/302] =?UTF-8?q?dockerfile=E5=8F=96=E6=B6=88=E8=AE=BE?=
 =?UTF-8?q?=E7=BD=AECC=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F-=E6=8F=8F?=
 =?UTF-8?q?=E8=BF=B0=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/build_mxRec_images/centos_build/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/build_mxRec_images/centos_build/Dockerfile b/docs/build_mxRec_images/centos_build/Dockerfile
index 4e21166c..3e93704e 100644
--- a/docs/build_mxRec_images/centos_build/Dockerfile
+++ b/docs/build_mxRec_images/centos_build/Dockerfile
@@ -130,7 +130,7 @@ RUN pip3.7 install -U pip && \
     pip3.7 install h5py==3.1.0 && \
     rm -rf /root/.cache/pip
 
-# 安装mpi4py时使用该环境变了，安装完成后取消
+# 安装mpi4py时使用该环境变，安装完成后取消
 RUN unset CC
 
 # 10.设置驱动路径环境变量
-- 
Gitee


From de83703090bfd967a0dc09a29f95052195aa4aa9 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 9 May 2024 15:51:57 +0800
Subject: [PATCH 130/302] =?UTF-8?q?dockerfile=E5=8F=96=E6=B6=88=E8=AE=BE?=
 =?UTF-8?q?=E7=BD=AECC=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F-=E6=8F=8F?=
 =?UTF-8?q?=E8=BF=B0=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/build_mxRec_images/centos_build/Dockerfile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/build_mxRec_images/centos_build/Dockerfile b/docs/build_mxRec_images/centos_build/Dockerfile
index 3e93704e..16fd0688 100644
--- a/docs/build_mxRec_images/centos_build/Dockerfile
+++ b/docs/build_mxRec_images/centos_build/Dockerfile
@@ -130,7 +130,7 @@ RUN pip3.7 install -U pip && \
     pip3.7 install h5py==3.1.0 && \
     rm -rf /root/.cache/pip
 
-# 安装mpi4py时使用该环境变，安装完成后取消
+# 安装mpi4py时使用该环境变量，安装完成后取消
 RUN unset CC
 
 # 10.设置驱动路径环境变量
@@ -139,6 +139,8 @@ ENV LD_LIBRARY_PATH=$ASCEND_BASE/driver/lib64:$ASCEND_BASE/driver/lib64/common:$
 
 # 11.CANN相关参数
 ARG TOOLKIT_PKG=Ascend-cann-toolkit*.run
+
+
 ARG TOOLKIT_PATH=$ASCEND_BASE/ascend-toolkit/latest
 
 # 12.TF相关
-- 
Gitee


From 3df4015a148ec9544d0e13ef1cd4fd32dee48ba5 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 9 May 2024 15:52:52 +0800
Subject: [PATCH 131/302] =?UTF-8?q?dockerfile=E5=8F=96=E6=B6=88=E8=AE=BE?=
 =?UTF-8?q?=E7=BD=AECC=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F-=E6=8F=8F?=
 =?UTF-8?q?=E8=BF=B0=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/build_mxRec_images/centos_build/Dockerfile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/build_mxRec_images/centos_build/Dockerfile b/docs/build_mxRec_images/centos_build/Dockerfile
index 16fd0688..2d2b3579 100644
--- a/docs/build_mxRec_images/centos_build/Dockerfile
+++ b/docs/build_mxRec_images/centos_build/Dockerfile
@@ -139,8 +139,6 @@ ENV LD_LIBRARY_PATH=$ASCEND_BASE/driver/lib64:$ASCEND_BASE/driver/lib64/common:$
 
 # 11.CANN相关参数
 ARG TOOLKIT_PKG=Ascend-cann-toolkit*.run
-
-
 ARG TOOLKIT_PATH=$ASCEND_BASE/ascend-toolkit/latest
 
 # 12.TF相关
-- 
Gitee


From 14af0e7af544001db46f5612e599f044480bea5a Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Fri, 10 May 2024 09:17:19 +0800
Subject: [PATCH 132/302] bugfix

---
 mx_rec/core/emb/base_sparse_embedding.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mx_rec/core/emb/base_sparse_embedding.py b/mx_rec/core/emb/base_sparse_embedding.py
index 2c29f9c8..f984697c 100644
--- a/mx_rec/core/emb/base_sparse_embedding.py
+++ b/mx_rec/core/emb/base_sparse_embedding.py
@@ -384,7 +384,8 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
         config = dict(batch_size=feature_spec.batch_size, feat_cnt=feature_spec.feat_cnt, send_count=send_count,
                       rank_size=self._rank_size, channel_id=channel_id, table_name=self._table_name,
                       is_hbm=self._is_hbm, ext_emb_size=self._ext_emb_size, emb_size=self._emb_size,
-                      use_dynamic_expansion=ConfigInitializer.get_instance().use_dynamic_expansion)
+                      use_dynamic_expansion=ConfigInitializer.get_instance().use_dynamic_expansion,
+                      device_id=self._device_id)
 
         return get_preprocessed_tensor_for_asc(self._variable, config)
 
-- 
Gitee


From b85aaa314050ce38b48b1c2cb0b7facf4ec02c79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Fri, 10 May 2024 08:59:39 +0000
Subject: [PATCH 133/302] =?UTF-8?q?!133=20=E5=85=A8=E5=B1=80=E5=8E=BB?=
 =?UTF-8?q?=E9=87=8D+=E6=89=A9=E5=AE=B9=E6=A8=A1=E5=BC=8F=EF=BC=8C?=
 =?UTF-8?q?=E8=A1=A8=E5=90=8D=E5=B8=A6=E6=9C=89=E2=80=9C/=E2=80=9D?=
 =?UTF-8?q?=E5=AD=97=E6=A0=B7=E9=9A=90=E6=82=A3=E4=BF=AE=E5=A4=8D=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91=E5=85=A8=E5=B1=80unique=E5=8A=9F=E8=83=BD=E5=9C=A8?=
 =?UTF-8?q?=E6=89=A9=E5=AE=B9=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8C=E8=A1=A8?=
 =?UTF-8?q?=E5=90=8D=E5=AD=97=E2=80=9C/=E2=80=9D=E9=9A=90=E6=82=A3?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80unique?=
 =?UTF-8?q?=E5=8A=9F=E8=83=BD=E5=9C=A8=E6=89=A9=E5=AE=B9=E6=A8=A1=E5=BC=8F?=
 =?UTF-8?q?=E4=B8=8B=EF=BC=8C=E8=A1=A8=E5=90=8D=E5=AD=97=E2=80=9C/?=
 =?UTF-8?q?=E2=80=9D=E9=9A=90=E6=82=A3=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91?=
 =?UTF-8?q?=E5=85=A8=E5=B1=80unique=E5=8A=9F=E8=83=BD=E5=9C=A8=E6=89=A9?=
 =?UTF-8?q?=E5=AE=B9=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8C=E8=A1=A8=E5=90=8D?=
 =?UTF-8?q?=E5=AD=97=E2=80=9C/=E2=80=9D=E9=9A=90=E6=82=A3=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/asc/manager.py                  |  6 ++++++
 mx_rec/core/emb/dynamic_sparse_embedding.py |  4 +++-
 mx_rec/graph/modifier.py                    |  2 +-
 mx_rec/optimizers/base.py                   |  5 ++---
 mx_rec/util/config_utils/embedding_utils.py | 14 ++++++++++++++
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index 8b62b66b..97a71a4d 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -37,10 +37,16 @@ def generate_table_info_list():
         raise ValueError(f"The DDR mode of all tables must be used or not used at the same time. However, is_hbm "
                          f"of each table `{table_instance_dict.keys()}` is `{is_hbm_list}`.")
 
+    # 通过create_hash_optimizer创建optimizer_instance
+    optimizer_instance = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
     # generate table info
     dangling_table = check_dangling_table()
 
     for _, table_instance in ConfigInitializer.get_instance().sparse_embed_config.table_instance_dict.items():
+        # FS模式扩容场景
+        if ConfigInitializer.get_instance().use_dynamic_expansion and optimizer_instance:
+            table_instance.ext_emb_size = table_instance.emb_size * (1 + optimizer_instance.slot_num)
+            logger.info("ext_emb_size is reset to be %s in generate_table_info_list.", table_instance.ext_emb_size)
         skip = should_skip(table_instance.table_name)
         if table_instance.table_name in dangling_table or skip:
             logger.info("skip table %s: %s which does not need to be provided to the EmbInfo.",
diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index 49979261..a7616991 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -50,7 +50,9 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
                      self._table_name, ASCEND_TABLE_NAME_MUST_CONTAIN)
         if not add_collection_condition:
             return sparse_forward_fn(local_embeddings)
-
+        # 创建扩容查询tensor和table_instance的映射关系，以便优化器中使用
+        ConfigInitializer.get_instance().sparse_embed_config.insert_table_instance_to_tensor_dict(
+            result.get("id_offsets"), self)
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB, local_embeddings)
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get("id_offsets"))
         return sparse_forward_fn(local_embeddings)
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index 33c4b958..6b6013d8 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -388,7 +388,7 @@ def change_ext_emb_size_by_opt(optimizer):
         # When dynamic expansion mode, ext_emb_size is set by optimizer
         if ConfigInitializer.get_instance().use_dynamic_expansion or not table_instance.is_hbm:
             table_instance.ext_emb_size = table_instance.emb_size * (1 + optimizer.slot_num)
-            logger.debug("ext_emb_size is reset to be %s for EmbInfo", table_instance.ext_emb_size)
+            logger.info("ext_emb_size is reset to be %s in change_ext_emb_size_by_opt", table_instance.ext_emb_size)
 
 
 @para_checker_decorator(
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index fbc63193..f74e9778 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -97,9 +97,8 @@ class CustomizedOptimizer:
     @staticmethod
     def sum_same_id_gradients(grad, var, is_expansion):
         if isinstance(var, ops.Tensor):
-            # 扩容模式从scope获取表名,偏移是-2
-            table_name = var.op.name.split('/')[-2]
-            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance_by_name(table_name)
+            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance_by_tensor(var)
+            table_name = table_instance.table_name
         else:
             table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(var)
             table_name = table_instance.table_name
diff --git a/mx_rec/util/config_utils/embedding_utils.py b/mx_rec/util/config_utils/embedding_utils.py
index 68ceef3a..e13d9d51 100644
--- a/mx_rec/util/config_utils/embedding_utils.py
+++ b/mx_rec/util/config_utils/embedding_utils.py
@@ -3,6 +3,7 @@
 # Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
 from typing import Optional
 
+from tensorflow.python.framework import ops
 from tensorflow import Variable
 
 from mx_rec.util.log import logger
@@ -18,6 +19,7 @@ class SparseEmbedConfig:
         self._table_name_set = set()
         self._removing_var_list = []
         self._name_to_var_dict = dict()
+        self._tensor_to_table_instance_dict = dict()
 
     @property
     def table_instance_dict(self):
@@ -45,6 +47,12 @@ class SparseEmbedConfig:
 
         return self._table_instance_dict.get(key)
 
+    def get_table_instance_by_tensor(self, tensor) -> object:
+        if tensor not in self._tensor_to_table_instance_dict:
+            raise KeyError(f"Given tensor does not exist.")
+
+        return self._tensor_to_table_instance_dict.get(tensor)
+
     def get_table_instance_by_name(self, table_name: Optional[str]) -> object:
         if table_name not in self._name_to_var_dict:
             raise KeyError(f"Given table name does not exist.")
@@ -74,5 +82,11 @@ class SparseEmbedConfig:
         self._name_to_var_dict[name] = key
         self._table_instance_dict[key] = instance
 
+    def insert_table_instance_to_tensor_dict(self, tensor: ops.Tensor, instance: object) -> None:
+        if tensor in self._tensor_to_table_instance_dict:
+            raise KeyError(f"Given tensor {tensor} has been used.")
+        logger.debug("Record one hash table for expansion mode, with tensor: %s.", tensor)
+        self._tensor_to_table_instance_dict[tensor] = instance
+
     def export_table_num(self) -> int:
         return len(self.table_instance_dict) if self.table_instance_dict else 0
-- 
Gitee


From 0bd44e8ea30d2450de60aedd35cf50cc4dc68524 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Sat, 11 May 2024 10:45:52 +0800
Subject: [PATCH 134/302] =?UTF-8?q?=E8=9E=8D=E5=90=88=E7=AE=97=E5=AD=90rea?=
 =?UTF-8?q?dme=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index 7aa64218..13ed6994 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -52,7 +52,13 @@ b) 算子参数说明：
 * inputM: lazy_adam优化器一阶矩估计；计算结果原地更新；
 * inputV: lazy_adam优化器二阶矩估计；计算结果原地更新；
 * inputVar: embedding表对应的variable数据；计算结果原地更新；
-  c) 算子约束说明：
+* lr: 学习率；
+* beta1: 一阶矩估计的指数衰减率；
+* beta2: 二阶矩估计的指数衰减率；
+* epsilon: 极小值；
+
+c) 算子约束说明：
+
 * 支持的型号：Atlas A2系列产品;
 * 支持的CANN版本：8.0.RC1及之后版本；
 * 支持的输入数据类型：float32；
-- 
Gitee


From 7ea23a49d9d6691517d7079c62b9b1ba20260d28 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Wed, 15 May 2024 10:53:14 +0800
Subject: [PATCH 135/302] =?UTF-8?q?warm=20start=20=E5=BC=80=E5=8F=91?=
 =?UTF-8?q?=E8=A1=A5=E5=85=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py                         |  2 +-
 mx_rec/saver/warm_start.py                    |  4 ++--
 mx_rec/util/config_utils/hybrid_mgmt_utils.py |  4 ++--
 src/core/emb_table/embedding_mgmt.cpp         |  6 ++++++
 src/core/emb_table/embedding_mgmt.h           |  5 +++++
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          | 11 +++++++++--
 src/core/hybrid_mgmt/hybrid_mgmt.h            |  2 +-
 src/pybind/module_main.cpp                    |  2 +-
 8 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index dc545822..4f789a30 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -343,7 +343,7 @@ class Saver(object):
                 set_optimizer_info(optimizer_instance, table_name)
 
         if self.config_instance.hybrid_manager_config.asc_manager:
-            self.config_instance.hybrid_manager_config.restore_host_data(reading_path)
+            self.config_instance.hybrid_manager_config.restore_host_data(reading_path, warm_start_tables)
             logger.info("host data was restored.")
 
         if self.config_instance.use_dynamic_expansion:
diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index 520c3df3..b5df5887 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -95,7 +95,7 @@ def patch_for_estimator_train(func):
         hooks = kwargs.get('hooks', [])
         if WarmStartController().get_elements():
             hooks.append(SparseRestoreHook())
-        return func(*args, *kwargs)
+        return func(*args, **kwargs)
     return wrapper
 
 
@@ -243,4 +243,4 @@ class SparseRestoreHook(tf.estimator.SessionRunHook):
             for path, restore_tables in self._warm_start_dict.items():
                 restore_path = get_latest_ckpt(path)
                 self._saver.restore(session, restore_path, restore_tables)
-            self._is_warm_start = False
+            self._is_warm_start = True
diff --git a/mx_rec/util/config_utils/hybrid_mgmt_utils.py b/mx_rec/util/config_utils/hybrid_mgmt_utils.py
index 737ce7cb..89ba16cf 100644
--- a/mx_rec/util/config_utils/hybrid_mgmt_utils.py
+++ b/mx_rec/util/config_utils/hybrid_mgmt_utils.py
@@ -83,11 +83,11 @@ class HybridManagerConfig:
         self.asc_manager.save(root_dir)
         logger.debug("Data from host pipeline has been saved.")
 
-    def restore_host_data(self, root_dir: Optional[str]) -> None:
+    def restore_host_data(self, root_dir: Optional[str], warm_start_tables=None) -> None:
         if self.asc_manager is None:
             raise RuntimeError("ASC manager does not exist.")
 
-        if not self.asc_manager.load(root_dir):
+        if not self.asc_manager.load(root_dir, warm_start_tables):
             raise TypeError("Asc load data does not match usr setups, \
             please re-consider if you want to restore from this dir")
         logger.debug("Data from host pipeline has been restored.")
diff --git a/src/core/emb_table/embedding_mgmt.cpp b/src/core/emb_table/embedding_mgmt.cpp
index 2c2f9e39..f850e254 100644
--- a/src/core/emb_table/embedding_mgmt.cpp
+++ b/src/core/emb_table/embedding_mgmt.cpp
@@ -142,6 +142,12 @@ std::shared_ptr<EmbeddingTable> EmbeddingMgmt::GetTable(const string& name)
     return std::dynamic_pointer_cast<EmbeddingTable>(it->second);
 }
 
+void EmbeddingMgmt::Load(const string& name, const string& filePath)
+{
+    return embeddings[name]->Load(filePath);
+}
+
+
 void EmbeddingMgmt::Load(const string& filePath)
 {
     for (auto& tablePair: embeddings) {
diff --git a/src/core/emb_table/embedding_mgmt.h b/src/core/emb_table/embedding_mgmt.h
index 11ed2325..d091bdef 100644
--- a/src/core/emb_table/embedding_mgmt.h
+++ b/src/core/emb_table/embedding_mgmt.h
@@ -83,6 +83,11 @@ public:
 
     std::shared_ptr<EmbeddingTable> GetTable(const string& name);
 
+     /**
+     * 加载单个表
+     */
+    void Load(const string& name, const string& filePath);
+
     /**
      * 加载所有表
      */
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 894dc230..be12dd53 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -279,7 +279,7 @@ bool HybridMgmt::Save(const string savePath)
 /// 加载模型
 /// \param loadPath
 /// \return
-bool HybridMgmt::Load(const string& loadPath)
+bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
 {
 #ifndef GTEST
     if (!isInitialized) {
@@ -296,7 +296,14 @@ bool HybridMgmt::Load(const string& loadPath)
     vector<CkptFeatureType> loadFeatures;
     SetFeatureTypeForLoad(loadFeatures);
 
-    EmbeddingMgmt::Instance()->Load(loadPath);
+    if(warmStartTables.size() == 0) {
+        EmbeddingMgmt::Instance()->Load(loadPath);
+    } else {
+        for (auto& tableName: warmStartTables) {
+            EmbeddingMgmt::Instance()->Load(tableName, loadPath);
+        }
+    }
+
     loadOffsetToSend = EmbeddingMgmt::Instance()->GetLoadOffsets();
 
     // 执行加载操作
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index 0251eb91..a7bdcee6 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -61,7 +61,7 @@ namespace MxRec {
 
         bool Save(const string savePath);
 
-        bool Load(const string& loadPath);
+        bool Load(const string& loadPath, vector<string> warmStartTables);
 
         OffsetT SendHostMap(const string tableName);
 
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index 0df47092..acb914f2 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -214,7 +214,7 @@ namespace {
                      py::arg("seed") = DEFAULT_RANDOM_SEED, py::arg("threshold_values") = vector<ThresholdValue> {},
                      py::arg("if_load") = false)
                 .def("save", &MxRec::HybridMgmt::Save, py::arg("save_path") = "")
-                .def("load", &MxRec::HybridMgmt::Load, py::arg("load_path") = "")
+                .def("load", &MxRec::HybridMgmt::Load, py::arg("load_path") = "", py::arg("warm_start_tables") = vector<string> {})
                 .def("destroy", &MxRec::HybridMgmt::Destroy)
                 .def("evict", &MxRec::HybridMgmt::Evict)
                 .def("send", &MxRec::HybridMgmt::SendHostMap, py::arg("table_name") = "")
-- 
Gitee


From ca6369f6718e213efc4e8474cb5089da94f60242 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 16 May 2024 06:29:15 +0000
Subject: [PATCH 136/302] =?UTF-8?q?!136=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E5=A2=9E=E5=8A=A0?=
 =?UTF-8?q?=E5=BC=82=E5=B8=B8=E6=8D=95=E8=8E=B7=EF=BC=8C=E9=9D=9Ehbm?=
 =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=B8=8B=E5=BF=85=E9=A1=BB=E4=BD=BF=E7=94=A8?=
 =?UTF-8?q?=E6=94=B9=E5=9B=BE=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=A2=9E=E5=8A=A0=E5=BC=82?=
 =?UTF-8?q?=E5=B8=B8=E6=8D=95=E8=8E=B7=EF=BC=8C=E9=9D=9Ehbm=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E4=B8=8B=E5=BF=85=E9=A1=BB=E4=BD=BF=E7=94=A8=E6=94=B9?=
 =?UTF-8?q?=E5=9B=BE=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91=E5=A2=9E=E5=8A=A0=E5=BC=82=E5=B8=B8?=
 =?UTF-8?q?=E6=8D=95=E8=8E=B7=EF=BC=8C=E9=9D=9Ehbm=E6=A8=A1=E5=BC=8F?=
 =?UTF-8?q?=E4=B8=8B=E5=BF=85=E9=A1=BB=E4=BD=BF=E7=94=A8=E6=94=B9=E5=9B=BE?=
 =?UTF-8?q?=20*=20Merge=20remote-tracking=20branch=20'upstream/develop'=20?=
 =?UTF-8?q?into=20develop-bugfix=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80uni?=
 =?UTF-8?q?que=E5=8A=9F=E8=83=BD=E5=9C=A8=E6=89=A9=E5=AE=B9=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E4=B8=8B=EF=BC=8C=E8=A1=A8=E5=90=8D=E5=AD=97=E2=80=9C?=
 =?UTF-8?q?/=E2=80=9D=E9=9A=90=E6=82=A3=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91?=
 =?UTF-8?q?=E5=85=A8=E5=B1=80unique=E5=8A=9F=E8=83=BD=E5=9C=A8=E6=89=A9?=
 =?UTF-8?q?=E5=AE=B9=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8C=E8=A1=A8=E5=90=8D?=
 =?UTF-8?q?=E5=AD=97=E2=80=9C/=E2=80=9D=E9=9A=90=E6=82=A3=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91=E5=85=A8=E5=B1=80unique=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD=E5=9C=A8=E6=89=A9=E5=AE=B9=E6=A8=A1=E5=BC=8F=E4=B8=8B?=
 =?UTF-8?q?=EF=BC=8C=E8=A1=A8=E5=90=8D=E5=AD=97=E2=80=9C/=E2=80=9D?=
 =?UTF-8?q?=E9=9A=90=E6=82=A3=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/emb/base_sparse_embedding.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mx_rec/core/emb/base_sparse_embedding.py b/mx_rec/core/emb/base_sparse_embedding.py
index f984697c..1a59bd24 100644
--- a/mx_rec/core/emb/base_sparse_embedding.py
+++ b/mx_rec/core/emb/base_sparse_embedding.py
@@ -296,6 +296,8 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
 
         # set modify graph
         self._modify_graph = kwargs.get("modify_graph", True)
+        if not self._modify_graph and not self._is_hbm:
+            raise RuntimeError("when the 'ddr or ssd' mode are used, the 'modify graph' is required")
 
         # return the stub tensor of the lookup result
         if not self._use_static:
@@ -328,7 +330,9 @@ class BaseSparseEmbedding(metaclass=abc.ABCMeta):
             return lookup_result
 
         if not self._use_static and not self._modify_graph and kwargs.get("batch") is None:
-            raise RuntimeError("When the 'feature spec' mode and 'dynamic shape' are used, the 'batch' is required.")
+            raise RuntimeError("when the 'feature spec' mode and 'dynamic shape' are used, the 'batch' is required")
+        if not self._modify_graph and not self._is_hbm:
+            raise RuntimeError("when the 'ddr or ssd' mode are used, the 'modify graph' is required")
         table_name = feature_spec.table_name
         same_table_feature_spec = \
             ConfigInitializer.get_instance().feature_spec_config.table_name_to_feature_spec[table_name][is_training]
-- 
Gitee


From 84324b354023eb6f05051ae9c2749c071832e21d Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 16 May 2024 15:15:25 +0800
Subject: [PATCH 137/302] =?UTF-8?q?=E9=80=82=E9=85=8Dno=20ranktable?=
 =?UTF-8?q?=E5=90=AF=E5=8A=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py      |  2 +-
 examples/dlrm/model/config.py     |  6 +--
 examples/dlrm/model/main_mxrec.py |  3 +-
 examples/dlrm/model/run.sh        | 63 +++++++++++++++++++++----------
 4 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index eb1d91ea..a1e38897 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -249,7 +249,7 @@ if __name__ == "__main__":
     warnings.filterwarnings("ignore")
 
     rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
-    rank_size = int(os.getenv("RANK_SIZE")) if os.getenv("RANK_SIZE") else None
+    rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
     interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
     train_steps = 10000
     eval_steps = 1360
diff --git a/examples/dlrm/model/config.py b/examples/dlrm/model/config.py
index 23b042c2..fd38276d 100644
--- a/examples/dlrm/model/config.py
+++ b/examples/dlrm/model/config.py
@@ -89,10 +89,10 @@ class LearningRateScheduler:
 
 class Config:
     def __init__(self, ):
-        self.rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
-        tmp = os.getenv("RANK_SIZE")
+        self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
+        tmp = os.getenv("TRAIN_RANK_SIZE")
         if tmp is None:
-            raise ValueError("please export RANK_SIZE")
+            raise ValueError("please export TRAIN_RANK_SIZE")
         self.rank_size = int(tmp)
 
         self.data_path = os.getenv("DLRM_CRITEO_DATA_PATH")
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 3464f84e..6fda4f0a 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -41,7 +41,6 @@ from mx_rec.util.variable import get_dense_and_sparse_variable
 from mx_rec.util.log import logger
 from npu_bridge.npu_init import *
 
-
 npu_plugin.set_device_sat_mode(0)
 
 dense_hashtable_seed = 128
@@ -253,7 +252,7 @@ if __name__ == "__main__":
     warnings.filterwarnings("ignore")
 
     rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
-    rank_size = int(os.getenv("RANK_SIZE")) if os.getenv("RANK_SIZE") else None
+    rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
     interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
     train_steps = 10000
     eval_steps = 1360
diff --git a/examples/dlrm/model/run.sh b/examples/dlrm/model/run.sh
index f5cb4449..be509608 100644
--- a/examples/dlrm/model/run.sh
+++ b/examples/dlrm/model/run.sh
@@ -20,10 +20,25 @@ so_path=$1
 mx_rec_package_path=$2
 hccl_cfg_json=$3
 dlrm_criteo_data_path=$4
+ip=$5  # no ranktable时传入该参数
 
-export RANK_SIZE=8
-echo "RANK_SIZE=${RANK_SIZE}, please make sure hccl configuration json file match this parameter"
-export RANK_TABLE_FILE=${hccl_cfg_json}
+interface="lo"
+num_server=1
+local_rank_size=8
+num_process=$((num_server * local_rank_size))
+export TRAIN_RANK_SIZE=$num_process
+
+# 删除数据
+echo "CACHE_MODE:${CACHE_MODE}"
+if [ ${CACHE_MODE} = "SSD" ]; then
+  echo "SSD train mode not allow file exist before training,
+        deleting dir ${cur_path}/ssd_data then create for SSD use case"
+  rm -rf ssd_data
+  mkdir ssd_data
+fi
+rm -rf kernel*
+rm -rf /root/ascend/log/*
+rm -rf model_dir_rank* op_cache
 
 ################# 参数配置 ######################
 export USE_DYNAMIC=0            # 0：静态shape；1：动态shape
@@ -34,25 +49,11 @@ export USE_MULTI_LOOKUP=0       # 0：一表一查；1：一表多查
 export USE_MODIFY_GRAPH=0       # 0：feature spec模式；1：自动改图模式
 ################################################
 
-echo "CACHE_MODE:${CACHE_MODE}"
-if [ ${CACHE_MODE} = "SSD" ]; then
-  echo "SSD train mode not allow file exist before training,
-        deleting dir ${cur_path}/ssd_data then create for SSD use case"
-  rm -rf ssd_data
-  mkdir ssd_data
-fi
-
 export HCCL_CONNECT_TIMEOUT=1200
-
 export DLRM_CRITEO_DATA_PATH=${dlrm_criteo_data_path}
 export PYTHONPATH=${mx_rec_package_path}:${so_path}:$PYTHONPATH
 export LD_PRELOAD=/usr/lib64/libgomp.so.1
 export LD_LIBRARY_PATH=${so_path}:/usr/local/lib:$LD_LIBRARY_PATH
-
-rm -rf kernel*
-rm -rf /root/ascend/log/*
-rm -rf model_dir_rank* op_cache
-
 export ASCEND_DEVICE_ID=0
 export RANK_ID_START=0
 export JOB_ID=10086
@@ -78,10 +79,32 @@ echo "MXREC_MODE is $MXREC_MODE"
 export py=main_mxrec.py
 echo "py is $py"
 
+# 区分ranktable和no ranktable
+if [ -n "$ip" ]; then
+    # no ranktable分支
+    echo "Current is no ranktable solution."
+    echo "Input node ip: $ip, please make sure this ip is available."
+    export CM_CHIEF_IP=$ip  # 主节点ip
+    export CM_CHIEF_PORT=60001  # 主节点监听端口
+    export CM_CHIEF_DEVICE=0  # 主节点device id
+    export CM_WORKER_IP=$ip  # 当前节点ip
+    export CM_WORKER_SIZE=$num_process  # 参与集群训练的device数量
+    echo "CM_CHIEF_IP=$CM_CHIEF_IP"
+    echo "CM_CHIEF_PORT=$CM_CHIEF_PORT"
+    echo "CM_CHIEF_DEVICE=$CM_CHIEF_DEVICE"
+    echo "CM_WORKER_IP=$CM_WORKER_IP"
+    echo "CM_WORKER_SIZE=$CM_WORKER_SIZE"
+else
+    # ranktable分支
+    echo "Current is ranktable solution, hccl json file:${hccl_cfg_json}"
+    export RANK_SIZE=$num_process
+    echo "RANK_SIZE=${RANK_SIZE}, please make sure hccl configuration json file match this parameter"
+    export RANK_TABLE_FILE=${hccl_cfg_json}
+fi
+
 echo "use horovod to start tasks"
 # GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
 mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
-interface="lo"
 
-horovodrun --network-interface ${interface} -np ${RANK_SIZE} --mpi-args "${mpi_args}" --mpi -H localhost:${RANK_SIZE} \
-python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${RANK_SIZE}p.log
+horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
+python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${num_process}p.log
-- 
Gitee


From ae23ba5eed96ec82762ba14be9890e2bb6ab401b Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 16 May 2024 15:21:16 +0800
Subject: [PATCH 138/302] =?UTF-8?q?=E5=A2=9E=E5=8A=A0pandas=E6=A8=A1?=
 =?UTF-8?q?=E5=9D=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/build_mxRec_images/centos_build/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/build_mxRec_images/centos_build/Dockerfile b/docs/build_mxRec_images/centos_build/Dockerfile
index 2d2b3579..190ec21b 100644
--- a/docs/build_mxRec_images/centos_build/Dockerfile
+++ b/docs/build_mxRec_images/centos_build/Dockerfile
@@ -114,6 +114,7 @@ RUN pip3.7 install -U pip && \
     pip3.7 install cffi==1.12.3 && \
     pip3.7 install pyyaml && \
     pip3.7 install pathlib2 && \
+    pip3.7 install pandas && \
     pip3.7 install grpcio && \
     pip3.7 install grpcio-tools && \
     pip3.7 install protobuf==3.20.0 && \
-- 
Gitee


From c0a1b74b3146047143d9de2ff4085fff7e367e7c Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 16 May 2024 15:24:34 +0800
Subject: [PATCH 139/302] =?UTF-8?q?=E5=A2=9E=E5=8A=A0pandas=E6=A8=A1?=
 =?UTF-8?q?=E5=9D=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index fd3b0691..44481bee 100644
--- a/README.md
+++ b/README.md
@@ -17,17 +17,20 @@ mxRec作为面向互联网市场搜索推荐广告的应用使能SDK产品，对
 安装前，请参考《CANN 软件安装指南》安装CANN开发套件软件包和TensorFlow适配昇腾插件。
 
 CANN软件提供进程级环境变量设置脚本，供用户在进程中引用，以自动完成环境变量设置。用户进程结束后自动失效。可在程序启动的Shell脚本中使用如下命令设置CANN的相关环境变量，也可通过命令行执行如下命令（以root用户默认安装路径“/usr/local/Ascend”为例）：
+
 ```shell
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 source /usr/local/Ascend/tfplugin/set_env.sh
 ```
 
 安装依赖，若未构建镜像，直接在物理机上进行开发，则须安装以下Python依赖
+
 ```shell
-pip3 install numpy decorator sympy==1.4 cffi==1.12.3 pyyaml pathlib2 grpcio grpcio-tools protobuf==3.20.0 scipy requests mpi4py easydict scikit-learn==0.20.0 attrs
+pip3 install numpy decorator sympy==1.4 cffi==1.12.3 pyyaml pathlib2 pandas grpcio grpcio-tools protobuf==3.20.0 scipy requests mpi4py easydict scikit-learn==0.20.0 attrs
 ```
 
 horovod依赖安装前需配置“HOROVOD_WITH_MPI”、“HOROVOD_WITH_TENSORFLOW”，依赖安装命令参考如下。
+
 ```shell
 HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 pip3.7 install horovod --no-cache-dir
 ```
@@ -35,6 +38,7 @@ HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 pip3.7 install horovod --no-cache-d
 ### 二进制包安装
 
 从昇腾开源社区直接获取编译打包后的产品包。解压后包含tf1和tf2两个版本的whl安装包，使用pip命令安装whl包（请根据实际需求，选取对应TensorFlow版本匹配的Wheel包）：
+
 ```shell
 pip3 install mx_rec-{version}-py3-none-linux_{arch}.whl
 ```
@@ -46,6 +50,7 @@ export PYTHONPATH={mxrec_install_path}:{mxrec_install_path}/mxRec:$PYTHONPATH
 ```
 
 如需使用动态扩容功能，进入已解压的mxRec软件包“mindxsdk-mxrec/cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
+
 ```shell
 bash run.sh
 ```
@@ -53,11 +58,13 @@ bash run.sh
 ### 源码编译安装
 
 编译环境依赖：
+
 - Python3.7.5
 - GCC 7.3.0
 - CMake 3.20.6
 
 开源依赖：
+
 - [pybind11 v2.10.3](https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip)
 - [securec](https://github.com/huaweicloud/huaweicloud-sdk-c-obs/archive/refs/tags/v3.23.9.zip)
 - [openmpi 4.1.5](https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz): 请参考软件文档在编译环境完成安装
@@ -68,11 +75,18 @@ bash run.sh
 为了构建多个版本的whl包，编译脚本在python虚拟环境完成对应tensorflow版本的安装。用户可以根据实际情况调整编译脚本，指定tensorflow的安装路径。编译方法：
 
 进入mxRec代码目录：
-- setup.py：执行脚本setup.py，比如：**python3.7 setup.py**完成tf1和tf2版本whl包的构建和打包，构建成功后，whl包在build/mindxsdk-mxrec/目录下，其中tf1_whl和tf2_whl目录下存在对应的whl包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
-- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**完成tf1版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，请参考build/build_tf1.sh创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
-- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**完成tf2版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，请参考build/build_tf2.sh创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
+
+- setup.py：执行脚本setup.py，比如：**python3.7 setup.py**
+  完成tf1和tf2版本whl包的构建和打包，构建成功后，whl包在build/mindxsdk-mxrec/目录下，其中tf1_whl和tf2_whl目录下存在对应的whl包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
+- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**
+  完成tf1版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，请参考build/build_tf1.sh创建tf1虚拟环境，在虚拟环境中完成tensorflow
+  1.15.0版本的安装，并修改对应的激活命令。
+- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**
+  完成tf2版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，请参考build/build_tf2.sh创建tf2虚拟环境，在虚拟环境中完成tensorflow
+  2.6.5版本的安装，并修改对应的激活命令。
 
 如需使用动态扩容功能，进入“./cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
+
 ```shell
 bash run.sh
 ```
@@ -88,6 +102,7 @@ bash run.sh
 - pytest-html
 
 如需使用python测试用例，需要先安装上述依赖以及能够在tf1环境下进行源码编译，然后进入tests目录中。参考以下命令执行python侧测试用例：
+
 ```shell
 bash run_python_dt.sh
 ```
@@ -108,11 +123,13 @@ emock-0.9.0.zip、pybind11-2.10.3.zip、 huaweicloud-sdk-c-obs-3.23.9.zip。如
 如需使用C++测试用例，需要按照上述描述准备需要的依赖，准备好之后，进入src目录中。参考以下命令执行C++测试用例：
 
 tf1环境下使用如下命令：
+
 ```shell
 bash test_ut.sh tf1
 ```
 
 tf2环境下使用如下命令：
+
 ```shell
 bash test_ut.sh tf2
 ```
-- 
Gitee


From 80cc0503448850b5d0c3c9a289480647f3cb0df3 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 16 May 2024 15:42:59 +0800
Subject: [PATCH 140/302] =?UTF-8?q?readme=E5=A2=9E=E5=8A=A0pandas?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 44481bee..17d38fcd 100644
--- a/README.md
+++ b/README.md
@@ -17,20 +17,17 @@ mxRec作为面向互联网市场搜索推荐广告的应用使能SDK产品，对
 安装前，请参考《CANN 软件安装指南》安装CANN开发套件软件包和TensorFlow适配昇腾插件。
 
 CANN软件提供进程级环境变量设置脚本，供用户在进程中引用，以自动完成环境变量设置。用户进程结束后自动失效。可在程序启动的Shell脚本中使用如下命令设置CANN的相关环境变量，也可通过命令行执行如下命令（以root用户默认安装路径“/usr/local/Ascend”为例）：
-
 ```shell
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 source /usr/local/Ascend/tfplugin/set_env.sh
 ```
 
 安装依赖，若未构建镜像，直接在物理机上进行开发，则须安装以下Python依赖
-
 ```shell
 pip3 install numpy decorator sympy==1.4 cffi==1.12.3 pyyaml pathlib2 pandas grpcio grpcio-tools protobuf==3.20.0 scipy requests mpi4py easydict scikit-learn==0.20.0 attrs
 ```
 
 horovod依赖安装前需配置“HOROVOD_WITH_MPI”、“HOROVOD_WITH_TENSORFLOW”，依赖安装命令参考如下。
-
 ```shell
 HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 pip3.7 install horovod --no-cache-dir
 ```
@@ -38,7 +35,6 @@ HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 pip3.7 install horovod --no-cache-d
 ### 二进制包安装
 
 从昇腾开源社区直接获取编译打包后的产品包。解压后包含tf1和tf2两个版本的whl安装包，使用pip命令安装whl包（请根据实际需求，选取对应TensorFlow版本匹配的Wheel包）：
-
 ```shell
 pip3 install mx_rec-{version}-py3-none-linux_{arch}.whl
 ```
@@ -50,7 +46,6 @@ export PYTHONPATH={mxrec_install_path}:{mxrec_install_path}/mxRec:$PYTHONPATH
 ```
 
 如需使用动态扩容功能，进入已解压的mxRec软件包“mindxsdk-mxrec/cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
-
 ```shell
 bash run.sh
 ```
@@ -58,13 +53,11 @@ bash run.sh
 ### 源码编译安装
 
 编译环境依赖：
-
 - Python3.7.5
 - GCC 7.3.0
 - CMake 3.20.6
 
 开源依赖：
-
 - [pybind11 v2.10.3](https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip)
 - [securec](https://github.com/huaweicloud/huaweicloud-sdk-c-obs/archive/refs/tags/v3.23.9.zip)
 - [openmpi 4.1.5](https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz): 请参考软件文档在编译环境完成安装
@@ -75,18 +68,11 @@ bash run.sh
 为了构建多个版本的whl包，编译脚本在python虚拟环境完成对应tensorflow版本的安装。用户可以根据实际情况调整编译脚本，指定tensorflow的安装路径。编译方法：
 
 进入mxRec代码目录：
-
-- setup.py：执行脚本setup.py，比如：**python3.7 setup.py**
-  完成tf1和tf2版本whl包的构建和打包，构建成功后，whl包在build/mindxsdk-mxrec/目录下，其中tf1_whl和tf2_whl目录下存在对应的whl包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
-- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**
-  完成tf1版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，请参考build/build_tf1.sh创建tf1虚拟环境，在虚拟环境中完成tensorflow
-  1.15.0版本的安装，并修改对应的激活命令。
-- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**
-  完成tf2版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，请参考build/build_tf2.sh创建tf2虚拟环境，在虚拟环境中完成tensorflow
-  2.6.5版本的安装，并修改对应的激活命令。
+- setup.py：执行脚本setup.py，比如：**python3.7 setup.py**完成tf1和tf2版本whl包的构建和打包，构建成功后，whl包在build/mindxsdk-mxrec/目录下，其中tf1_whl和tf2_whl目录下存在对应的whl包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
+- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**完成tf1版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，请参考build/build_tf1.sh创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
+- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**完成tf2版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，请参考build/build_tf2.sh创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
 
 如需使用动态扩容功能，进入“./cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
-
 ```shell
 bash run.sh
 ```
@@ -102,7 +88,6 @@ bash run.sh
 - pytest-html
 
 如需使用python测试用例，需要先安装上述依赖以及能够在tf1环境下进行源码编译，然后进入tests目录中。参考以下命令执行python侧测试用例：
-
 ```shell
 bash run_python_dt.sh
 ```
@@ -123,13 +108,11 @@ emock-0.9.0.zip、pybind11-2.10.3.zip、 huaweicloud-sdk-c-obs-3.23.9.zip。如
 如需使用C++测试用例，需要按照上述描述准备需要的依赖，准备好之后，进入src目录中。参考以下命令执行C++测试用例：
 
 tf1环境下使用如下命令：
-
 ```shell
 bash test_ut.sh tf1
 ```
 
 tf2环境下使用如下命令：
-
 ```shell
 bash test_ut.sh tf2
 ```
-- 
Gitee


From 75539d487923064b20dce016beeffa4a92f1cdad Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 16 May 2024 21:37:35 +0800
Subject: [PATCH 141/302] =?UTF-8?q?=E4=BF=AE=E6=94=B9main=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E5=86=85=E5=88=A0=E9=99=A4=E5=B7=B2=E4=BF=9D=E5=AD=98?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py                | 23 +++++++++--
 examples/demo/little_demo/main.py           | 42 +++++++++++++--------
 examples/demo/little_demo/run.sh            | 16 +-------
 examples/demo/little_demo_estimator/main.py | 24 ++++++++++++
 examples/demo/little_demo_estimator/run.sh  |  7 +---
 5 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index a1e38897..fb2efdee 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -13,13 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import numpy as np
+import os
+import random
+import shutil
 import time
 import warnings
-import random
 from glob import glob
-
 from sklearn.metrics import roc_auc_score
-import numpy as np
+
 from npu_bridge.npu_init import *
 
 from model import MyModel
@@ -244,11 +246,24 @@ def create_feature_spec_list(use_timestamp=False):
     return feature_spec_list
 
 
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+
+
 if __name__ == "__main__":
     tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
     warnings.filterwarnings("ignore")
+    _clear_saved_model()
 
-    rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
     rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
     interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
     train_steps = 10000
diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index a6ef96fc..426eb64b 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -148,23 +148,34 @@ def create_feature_spec_list(use_timestamp=False):
     return feature_spec_list
 
 
-def clear_saved_model():
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+    _del_related_dir("kernel*")
+    _del_related_dir("export_graph")
+
     mode = UseMode.mapping(os.getenv("USE_MODE"))
-    if mode == UseMode.TRAIN:
-        logger.info("current mode is train, will delete previous saved model data if exist.")
-        save_model_path = os.path.join(os.getcwd(), "saved-model")
-        shutil.rmtree(save_model_path, ignore_errors=True)
-    if not (os.getenv("CACHE_MODE", "") == CacheModeEnum.SSD.value and mode == UseMode.TRAIN):
+    if mode != UseMode.TRAIN:
         return
+    logger.info("current mode is train, will delete previous saved model data if exist.")
+    _del_related_dir("saved-model")
 
-    # ssd not allow overwrite file, should clear it before training
-    logger.info("current cache mode is SSD, will delete previous saved ssd data if exist.")
-    for part_path in _SSD_SAVE_PATH:
-        if "/" not in part_path and "\\" not in part_path:
-            part_path = os.path.join(os.getcwd(), part_path)
-        shutil.rmtree(part_path, ignore_errors=True)
+    if not (os.getenv("CACHE_MODE", "") == CacheModeEnum.SSD.value):
+        return
+    logger.info("current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
+                " then create empty directory for this use case.")
+    for sub_path in _SSD_SAVE_PATH:
+        _del_related_dir(sub_path)
         try:
-            os.mkdir(part_path)
+            os.mkdir(sub_path)
         except OSError:
             logger.warning("ssd path has exist")  # 多进程并行，忽略异常
 
@@ -172,6 +183,7 @@ def clear_saved_model():
 if __name__ == "__main__":
     tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
     warnings.filterwarnings("ignore")
+    _clear_saved_model()
 
     use_mode = UseMode.mapping(os.getenv("USE_MODE"))
     # 最大数据集生成数量
@@ -217,7 +229,7 @@ if __name__ == "__main__":
         if len(model_file) == 0:
             raise ValueError(f"get USE_MODE:{use_mode}, but no model file exist at:{load_path_pattern}")
         if_load = True
-    
+
     # nbatch function needs to be used together with the prefetch and host_vocabulary_size != 0
     init(train_steps=TRAIN_STEPS,
          eval_steps=EVAL_STEPS,
@@ -267,7 +279,7 @@ if __name__ == "__main__":
     if cache_mode in ["DDR", "SSD"] and not use_dynamic:
         logger.warning("when cache_mode in [DDR, SSD], suggest use_dynamic=true to avoid tuning size parameter")
     emb_initializer = tf.compat.v1.constant_initializer(0) if USE_DETERMINISTIC \
-                      else tf.compat.v1.truncated_normal_initializer()
+        else tf.compat.v1.truncated_normal_initializer()
     user_hashtable = create_table(key_dtype=tf.int64,
                                   dim=tf.TensorShape([cfg.user_hashtable_dim]),
                                   name='user_table',
diff --git a/examples/demo/little_demo/run.sh b/examples/demo/little_demo/run.sh
index d585be02..5b45af84 100644
--- a/examples/demo/little_demo/run.sh
+++ b/examples/demo/little_demo/run.sh
@@ -15,26 +15,12 @@
 # ==============================================================================
 
 kill -9 `ps -ef | grep python | grep -v grep | awk '{print $2}'` > /dev/null 2>&1
-rm -rf /root/ascend/log/*
-rm -rf ./kernel*
-rm -rf ./export_graph/*
 
 # 支持[train, load_and_train, predict]
-export USE_MODE="train"
-if [ $USE_MODE = "train" ]; then
-  echo "train mode: saved-model will be deleted"
-  rm -rf ./saved-model
-fi
+export USE_MODE="train"  # if train mode, will remove dir ./saved-model
 
 # cache mode support: HBM, DDR, SSD
 export CACHE_MODE="HBM"
-if [ $CACHE_MODE = "SSD" ] && [ $USE_MODE = "train" ]; then
-  echo "SSD train mode not allow file exist in directory when training a model from stratch in case overwrite,
-        deleting directory ssd_data then create for this use case"
-  rm -rf ssd_data
-  mkdir ssd_data
-fi
-
 
 # 获取输入参数：py、ip
 if [ $# -ge 1 ]; then
diff --git a/examples/demo/little_demo_estimator/main.py b/examples/demo/little_demo_estimator/main.py
index cca5a7a5..20b7381d 100644
--- a/examples/demo/little_demo_estimator/main.py
+++ b/examples/demo/little_demo_estimator/main.py
@@ -17,6 +17,8 @@
 
 import argparse
 import os
+import shutil
+from glob import glob
 
 import tensorflow as tf
 from mx_rec.util.initialize import init, terminate_config_initializer
@@ -142,6 +144,27 @@ def create_feature_spec_list(use_timestamp=False):
     return feature_spec_list
 
 
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+    _del_related_dir("kernel*")
+    _del_related_dir("export_graph")
+
+    mode = args.run_mode
+    if not mode.startswith("train"):
+        return
+    logger.info("current mode contains train, will delete previous saved model data if exist.")
+    _del_related_dir("_rank*")
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--run_mode', type=str, default='train_and_evaluate')  # 运行模式，在run.sh中进行配置
@@ -185,6 +208,7 @@ if __name__ == '__main__':
         args.eval_steps = -1
     elif args.run_mode == 'train_and_evaluate':
         args.save_checkpoints_steps = args.train_steps
+    _clear_saved_model()
 
     # set init
     init(train_steps=args.train_steps,
diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index f3d34c82..79fdc3f4 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -83,12 +83,7 @@ export TF_CPP_MIN_LOG_LEVEL=3 # tensorflow日志级别,3对应FATAL
 # 设置应用类日志的全局日志级别及各模块日志级别，具体请参考昇腾官网CANN文档
 export ASCEND_GLOBAL_LOG_LEVEL=3 # “设置日志级别”章节0:debug, 1:info, 2:warning, 3:error, 4:NULL
 export MXREC_MODE="ASC"
-export USE_MODE="train_and_evaluate" # 支持[train, predict, train_and_evaluate]
-
-if [ $USE_MODE = "train" ] || [ $USE_MODE = "train_and_evaluate" ];then
-  echo "train mode: saved-model will be deleted"
-  rm -rf ./_rank*
-fi
+export USE_MODE="train_and_evaluate" # 支持[train, predict, train_and_evaluate],train相关模式将删除./_rank*目录
 
 ################# 参数配置 ######################
 export USE_DYNAMIC=1            # 0：静态shape；1：动态shape
-- 
Gitee


From 639e33ad4eab81762d607ae00368406e9dc97df7 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 16 May 2024 21:47:23 +0800
Subject: [PATCH 142/302] =?UTF-8?q?=E4=BF=AE=E6=94=B9main=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E5=86=85=E5=88=A0=E9=99=A4=E5=B7=B2=E4=BF=9D=E5=AD=98?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo/main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index 426eb64b..d8dd851a 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -176,6 +176,7 @@ def _clear_saved_model() -> None:
         _del_related_dir(sub_path)
         try:
             os.mkdir(sub_path)
+            logger.info(f"mkdir dir:{sub_path}")
         except OSError:
             logger.warning("ssd path has exist")  # 多进程并行，忽略异常
 
-- 
Gitee


From d9e866bb86c9654e24294fdb0a9e27202aaba036 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 16 May 2024 22:02:51 +0800
Subject: [PATCH 143/302] =?UTF-8?q?=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/main_mxrec.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index fb2efdee..f789b2c5 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-import numpy as np
+
 import os
 import random
 import shutil
@@ -22,6 +22,8 @@ import warnings
 from glob import glob
 from sklearn.metrics import roc_auc_score
 
+import numpy as np
+
 from npu_bridge.npu_init import *
 
 from model import MyModel
-- 
Gitee


From b05c404ddf42e4593d83d57c72a762ad9fe95c88 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Fri, 17 May 2024 10:34:53 +0800
Subject: [PATCH 144/302] =?UTF-8?q?=E5=88=A0=E9=99=A4run=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E5=86=97=E4=BD=99=E6=8C=87=E4=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo_estimator/run.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index 79fdc3f4..8bb43b19 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -15,9 +15,6 @@
 # ==============================================================================
 
 kill -9 `ps -ef | grep python | grep -v grep | awk '{print $2}'` > /dev/null 2>&1
-rm -rf /root/ascend/log/*
-rm -rf ./kernel*
-rm -rf ./export_graph/*
 
 # 获取输入参数：py、ip
 if [ $# -ge 1 ]; then
-- 
Gitee


From 1cef4c2bdfc0431d9027702071ef4d3a94028273 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Fri, 17 May 2024 15:40:35 +0800
Subject: [PATCH 145/302] =?UTF-8?q?=E5=BC=95=E7=94=A8=E5=BD=93=E5=89=8D?=
 =?UTF-8?q?=E7=9B=AE=E5=BD=95=E4=B8=8Bconfig=E6=96=87=E4=BB=B6=E5=92=8Cdlr?=
 =?UTF-8?q?m=E8=A7=A3=E9=99=A4=E8=80=A6=E5=90=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/config.py     | 230 +++++++++++++++++++++++++++++++++++
 examples/DCNv2/main_mxrec.py |   2 +-
 2 files changed, 231 insertions(+), 1 deletion(-)
 create mode 100644 examples/DCNv2/config.py

diff --git a/examples/DCNv2/config.py b/examples/DCNv2/config.py
new file mode 100644
index 00000000..fd38276d
--- /dev/null
+++ b/examples/DCNv2/config.py
@@ -0,0 +1,230 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+
+import tensorflow as tf
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+from npu_bridge.estimator.npu.npu_config import NPURunConfig
+
+
+class LearningRateScheduler:
+    """
+    LR Scheduler combining Polynomial Decay with Warmup at the beginning.
+    TF-based cond operations necessary for performance in graph mode.
+    """
+
+    def __init__(self, base_lr_dense, base_lr_sparse, warmup_steps, decay_start_step, decay_steps):
+        self.warmup_steps = tf.constant(warmup_steps, dtype=tf.int32)
+        self.decay_start_step = tf.constant(decay_start_step, dtype=tf.int32)
+        self.decay_steps = tf.constant(decay_steps)
+        self.decay_end_step = decay_start_step + decay_steps  # 65041
+        self.poly_power = 2.0
+        self.base_lr_dense = base_lr_dense
+        self.base_lr_sparse = base_lr_sparse
+
+    def calc(self, global_step):
+        # used for the warmup stage
+        warmup_step = tf.cast(1 / self.warmup_steps, tf.float32)
+        lr_factor_warmup = 1 - tf.cast(self.warmup_steps - global_step, tf.float32) * warmup_step
+        lr_factor_warmup = tf.cast(lr_factor_warmup, tf.float32)
+        # used for the constant stage
+        lr_factor_constant = tf.cast(1.0, tf.float32)
+
+        # used for the decay stage
+        lr_factor_decay = (self.decay_end_step - global_step) / self.decay_steps
+        lr_factor_decay = tf.math.pow(lr_factor_decay, self.poly_power)
+        lr_factor_decay = tf.cast(lr_factor_decay, tf.float32)
+        sparse_after_decay = tf.cast(1 / self.decay_steps, tf.float32)
+
+        lr_factor_decay_sparse = tf.cond(
+            global_step < self.decay_end_step,
+            lambda: lr_factor_decay,
+            lambda: sparse_after_decay,
+        )
+
+        lr_factor_decay_dense = tf.cond(
+            global_step < self.decay_end_step,
+            lambda: lr_factor_decay,
+            lambda: sparse_after_decay,
+        )
+
+        poly_schedule_sparse = tf.cond(
+            global_step < self.decay_start_step,
+            lambda: lr_factor_constant,
+            lambda: lr_factor_decay_sparse,
+        )
+
+        poly_schedule_dense = tf.cond(
+            global_step < self.decay_start_step,
+            lambda: lr_factor_constant,
+            lambda: lr_factor_decay_dense,
+        )
+
+        lr_factor_sparse = tf.cond(
+            global_step < self.warmup_steps, lambda: lr_factor_warmup, lambda: poly_schedule_sparse
+        )
+
+        lr_factor_dense = tf.cond(
+            global_step < self.warmup_steps, lambda: lr_factor_warmup, lambda: poly_schedule_dense
+        )
+
+        lr_sparse = self.base_lr_sparse * lr_factor_sparse
+        lr_dense = self.base_lr_dense * lr_factor_dense
+        return lr_dense, lr_sparse
+
+
+class Config:
+    def __init__(self, ):
+        self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
+        tmp = os.getenv("TRAIN_RANK_SIZE")
+        if tmp is None:
+            raise ValueError("please export TRAIN_RANK_SIZE")
+        self.rank_size = int(tmp)
+
+        self.data_path = os.getenv("DLRM_CRITEO_DATA_PATH")
+        self.train_file_pattern = "train"
+        self.test_file_pattern = "test"
+
+        self.batch_size = 8192
+        self.line_per_sample = 1024
+        self.train_epoch = 3
+        self.test_epoch = 1
+        self.perform_shuffle = False
+
+        self.key_type = tf.int64
+        self.label_type = tf.float32
+        self.value_type = tf.int64
+
+        self.feat_cnt = 26
+        self.__set_emb_table_size()
+
+        self.field_num = 26
+        self.send_count = 46000 // self.rank_size
+
+        self.emb_dim = 128
+        self.hashtable_threshold = 1
+
+        self.USE_PIPELINE_TEST = False
+
+        # 动态学习率
+        GLOBAL_BATCH_SIZE = 8192 * 8
+        LR_SCHEDULE_STEPS = [
+            int(2750 * 55296 / GLOBAL_BATCH_SIZE),
+            int(49315 * 55296 / GLOBAL_BATCH_SIZE),
+            int(27772 * 55296 / GLOBAL_BATCH_SIZE),
+        ]
+        self.global_step = tf.Variable(0, trainable=False)
+        _lr_scheduler = LearningRateScheduler(
+            28.443,
+            33.71193,
+            LR_SCHEDULE_STEPS[0],
+            LR_SCHEDULE_STEPS[1],
+            LR_SCHEDULE_STEPS[2],
+        )
+        self.learning_rate = _lr_scheduler.calc(self.global_step)
+
+    def __set_emb_table_size(self):
+        self.cache_mode = os.getenv("CACHE_MODE")
+        if self.cache_mode is None:
+            raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
+
+        if self.cache_mode == "HBM":
+            self.dev_vocab_size = 24_000_000 * self.rank_size
+            self.host_vocab_size = 0
+        elif self.cache_mode == "DDR":
+            self.dev_vocab_size = 500_000 * self.rank_size
+            self.host_vocab_size = 24_000_000 * self.rank_size
+        elif self.cache_mode == "SSD":
+            self.dev_vocab_size = 100_000 * self.rank_size
+            self.host_vocab_size = 2_000_000 * self.rank_size
+            self.ssd_vocab_size = 24_000_000 * self.rank_size
+        else:
+            raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
+
+    def get_emb_table_cfg(self) -> dict:
+        if self.cache_mode == "HBM":
+            return {"device_vocabulary_size": self.dev_vocab_size}
+        elif self.cache_mode == "DDR":
+            return {"device_vocabulary_size": self.dev_vocab_size,
+                    "host_vocabulary_size": self.host_vocab_size}
+        elif self.cache_mode == "SSD":
+            return {"device_vocabulary_size": self.dev_vocab_size,
+                    "host_vocabulary_size": self.host_vocab_size,
+                    "ssd_vocabulary_size": self.ssd_vocab_size,
+                    "ssd_data_path": ["ssd_data"]}
+        else:
+            raise RuntimeError(f"get CACHE_MODE:{self.cache_mode}, check Config.__set_emb_table_size implementation")
+
+
+def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2"):
+    session_config = tf.ConfigProto(allow_soft_placement=False,
+                                    log_device_placement=False)
+    session_config.gpu_options.allow_growth = True
+    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    custom_op.parameter_map["mix_compile_mode"].b = False
+    custom_op.parameter_map["use_off_line"].b = True
+    custom_op.parameter_map["min_group_size"].b = 1
+    # 可选配置level0:pairwise;level1:pairwise
+    custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:fullmesh;level1:fullmesh")
+    custom_op.parameter_map["enable_data_pre_proc"].b = True
+    custom_op.parameter_map["iterations_per_loop"].i = 10
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    custom_op.parameter_map["hcom_parallel"].b = False
+    custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes("op_impl_mode.ini")
+    custom_op.parameter_map["op_execute_timeout"].i = 2000
+    custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(
+        str(13 * 1024 * 1024 * 1024))  # total 31 need 13;
+    custom_op.parameter_map["graph_memory_max_size"].s = tf.compat.as_bytes(str(18 * 1024 * 1024 * 1024))  # need 25
+    custom_op.parameter_map["stream_max_parallel_num"].s = tf.compat.as_bytes("DNN_VM_AICPU:3,AIcoreEngine:3")
+
+    if dump_data:
+        custom_op.parameter_map["enable_dump"].b = True
+        custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(dump_path)
+        custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(dump_steps)
+        custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+
+    session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+
+    return session_config
+
+
+def get_npu_run_config():
+    session_config = tf.ConfigProto(allow_soft_placement=False,
+                                    log_device_placement=False)
+
+    session_config.gpu_options.allow_growth = True
+    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+
+    run_config = NPURunConfig(
+        save_summary_steps=1000,
+        save_checkpoints_steps=100,
+        keep_checkpoint_max=5,
+        session_config=session_config,
+        log_step_count_steps=20,
+        precision_mode='allow_mix_precision',
+        enable_data_pre_proc=True,
+        iterations_per_loop=1,
+        jit_compile=False,
+        op_compiler_cache_mode="enable",
+        HCCL_algorithm="level0:fullmesh;level1:fullmesh"  # 可选配置：level0:pairwise;level1:pairwise
+    )
+    return run_config
diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index f789b2c5..18ab273e 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -27,7 +27,7 @@ import numpy as np
 from npu_bridge.npu_init import *
 
 from model import MyModel
-from dlrm.model.config import sess_config, Config
+from config import sess_config, Config
 from optimizer import get_dense_and_sparse_optimizer
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
-- 
Gitee


From 7a77b33512a726a4dff6e70b5aa6cd2f6dbef67f Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Fri, 17 May 2024 15:47:37 +0800
Subject: [PATCH 146/302] =?UTF-8?q?=E5=88=A0=E9=99=A4config=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E4=B8=AD=E5=86=97=E4=BD=99=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/config.py | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/examples/DCNv2/config.py b/examples/DCNv2/config.py
index fd38276d..73ab2797 100644
--- a/examples/DCNv2/config.py
+++ b/examples/DCNv2/config.py
@@ -18,7 +18,6 @@ import os
 
 import tensorflow as tf
 from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
-from npu_bridge.estimator.npu.npu_config import NPURunConfig
 
 
 class LearningRateScheduler:
@@ -202,29 +201,3 @@ def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2"):
     session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
 
     return session_config
-
-
-def get_npu_run_config():
-    session_config = tf.ConfigProto(allow_soft_placement=False,
-                                    log_device_placement=False)
-
-    session_config.gpu_options.allow_growth = True
-    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
-    custom_op.name = "NpuOptimizer"
-    session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
-    session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
-
-    run_config = NPURunConfig(
-        save_summary_steps=1000,
-        save_checkpoints_steps=100,
-        keep_checkpoint_max=5,
-        session_config=session_config,
-        log_step_count_steps=20,
-        precision_mode='allow_mix_precision',
-        enable_data_pre_proc=True,
-        iterations_per_loop=1,
-        jit_compile=False,
-        op_compiler_cache_mode="enable",
-        HCCL_algorithm="level0:fullmesh;level1:fullmesh"  # 可选配置：level0:pairwise;level1:pairwise
-    )
-    return run_config
-- 
Gitee


From a610ddbf53660ac128bb1122a43941013a44d4f5 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Fri, 17 May 2024 08:35:38 +0000
Subject: [PATCH 147/302] =?UTF-8?q?!138=20=E3=80=90=E9=9C=80=E6=B1=82?=
 =?UTF-8?q?=E3=80=91=E6=96=B0=E5=A2=9E=E5=8A=A8=E6=80=81=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E5=9C=BA=E6=99=AFadagrad=E5=AE=9E=E7=8E=B0=20*=20update=20mx?=
 =?UTF-8?q?=5Frec/optimizers/adagrad=5Fby=5Faddr.py.=20*=20update=20mx=5Fr?=
 =?UTF-8?q?ec/optimizers/adagrad=5Fby=5Faddr.py.=20*=20add=20mx=5Frec/opti?=
 =?UTF-8?q?mizers/adagrad=5Fby=5Faddr.py.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/optimizers/adagrad_by_addr.py | 125 +++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 mx_rec/optimizers/adagrad_by_addr.py

diff --git a/mx_rec/optimizers/adagrad_by_addr.py b/mx_rec/optimizers/adagrad_by_addr.py
new file mode 100644
index 00000000..72f1d86e
--- /dev/null
+++ b/mx_rec/optimizers/adagrad_by_addr.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import, division, print_function
+
+from typing import List
+
+import tensorflow as tf
+from tensorflow.python.ops import math_ops
+from tensorflow.python.training import adagrad
+from tensorflow.python.training.optimizer import Optimizer
+
+from mx_rec.optimizers.base import CustomizedOptimizer
+from mx_rec.util.initialize import ConfigInitializer
+from mx_rec.util.ops import import_host_pipeline_ops
+from mx_rec.validator.validator import (
+    FloatValidator,
+    StringValidator,
+    para_checker_decorator,
+)
+
+
+@para_checker_decorator(
+    check_option_list=[
+        ("learning_rate", FloatValidator, {"min_value": 0.0, "max_value": 10.0}, ["check_value"]),
+        (
+            "initial_accumulator_value",
+            FloatValidator,
+            {"min_value": 0.0, "max_value": 1.0},
+            ["check_value_for_left_open_interval"],
+        ),
+        ("name", StringValidator, {"min_len": 1, "max_len": 200}, ["check_string_length"]),
+    ]
+)
+def create_hash_optimizer_by_address(learning_rate=0.001, initial_accumulator_value=0.9, name="Adagrad") -> Optimizer:
+    """Create an instance of adagrad hash optimizer.
+
+    Args:
+        learning_rate: A `Tensor` or a floating point value. The learning rate.
+        initial_accumulator_value: A floating point value. Starting value for the accumulators, must be positive.
+        name: Optional name prefix for the operations created when applying gradients. Defaults to "Adagrad".
+
+    Returns:
+        Adagrad hash optimizer instance
+
+    Raises:
+        ValueError: If `use_dynamic_expansion` was not set.
+    """
+    if not ConfigInitializer.get_instance().use_dynamic_expansion:
+        raise ValueError(
+            "dynamic expansion mode is not compatible with the optimizer, please config dynamic "
+            "expansion mode and optimizer correctly"
+        )
+    optimizer = CustomizedAdagradByAddress(
+        learning_rate=learning_rate,
+        initial_accumulator_value=initial_accumulator_value,
+        name=name,
+    )
+    ConfigInitializer.get_instance().optimizer_config.optimizer_instance = optimizer
+    return optimizer
+
+
+class CustomizedAdagradByAddress(adagrad.AdagradOptimizer, CustomizedOptimizer):
+    def __init__(
+        self,
+        learning_rate: float,
+        initial_accumulator_value: float,
+        name="Adagrad",
+    ):
+        self.optimizer_type = "Adagrad"
+        self.optim_param_list = ["accumulator"]
+        super(CustomizedAdagradByAddress, self)._get_name(name=name)
+        super(CustomizedAdagradByAddress, self).__init__(
+            learning_rate=learning_rate,
+            initial_accumulator_value=initial_accumulator_value,
+            name=self.unique_name,
+        )
+        self._epsilon = 1e-7
+        self._slot_num = 1
+        self._derivative = 2
+
+    def get_slot_init_values(self) -> List[float]:
+        # return state value list of adagrad that needs to initialize in ASC DDR.
+        return [self._initial_accumulator_value]
+
+    def _apply_sparse(self, grad: tf.Tensor, var: tf.Tensor) -> tf.Operation:
+        grad, var = self.sum_same_id_gradients(grad=grad, var=var, is_expansion=True)
+        learning_rate_tensor = math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype)
+        epsilon = math_ops.cast(self._epsilon, grad.dtype.base_dtype)
+
+        host_pipeline_ops = import_host_pipeline_ops()
+        dim = grad.shape.as_list()[-1]
+
+        combined_tensor = host_pipeline_ops.embedding_lookup_by_address(var, embedding_dim=2 * dim, embedding_type=1)
+        split_length = [dim] + [dim]
+        split_tensors = tf.split(combined_tensor, split_length, axis=1)
+
+        old_s_slice = split_tensors[1]
+        s_t_slice = old_s_slice + math_ops.square(grad)
+
+        denominator_slice = math_ops.sqrt(s_t_slice + epsilon)
+
+        update_list = [tf.divide(-learning_rate_tensor * grad, denominator_slice)] + [s_t_slice - old_s_slice]
+        update_tensor = tf.concat(update_list, axis=1)
+        var_update_op = host_pipeline_ops.embedding_update_by_address(var, update_tensor, update_type=0)
+
+        return var_update_op
+
+    def _create_slots(self, var_list: List[tf.Variable]):
+        # slot变量由lookup算子控制 跳过父类的实现
+        pass
-- 
Gitee


From ddbfce3d7bffa17f05ccb56dad65550ac8b80618 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 20 May 2024 08:53:07 +0000
Subject: [PATCH 148/302] =?UTF-8?q?!142=20=E6=94=AF=E6=8C=81no=20ranktable?=
 =?UTF-8?q?=EF=BC=8Cmain=E8=84=9A=E6=9C=AC=E5=86=85=E5=88=A0=E9=99=A4?=
 =?UTF-8?q?=E5=B7=B2=E4=BF=9D=E5=AD=98=E6=95=B0=E6=8D=AE=20*=20=E5=88=A0?=
 =?UTF-8?q?=E9=99=A4config=E6=96=87=E4=BB=B6=E4=B8=AD=E5=86=97=E4=BD=99?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81=20*=20=E5=BC=95=E7=94=A8=E5=BD=93=E5=89=8D?=
 =?UTF-8?q?=E7=9B=AE=E5=BD=95=E4=B8=8Bconfig=E6=96=87=E4=BB=B6=E5=92=8Cdlr?=
 =?UTF-8?q?m=E8=A7=A3=E9=99=A4=E8=80=A6=E5=90=88=20*=20=E5=88=A0=E9=99=A4r?=
 =?UTF-8?q?un=E8=84=9A=E6=9C=AC=E5=86=97=E4=BD=99=E6=8C=87=E4=BB=A4=20*=20?=
 =?UTF-8?q?=E9=97=A8=E7=A6=81=E4=BF=AE=E6=94=B9=20*=20=E4=BF=AE=E6=94=B9ma?=
 =?UTF-8?q?in=E8=84=9A=E6=9C=AC=E5=86=85=E5=88=A0=E9=99=A4=E5=B7=B2?=
 =?UTF-8?q?=E4=BF=9D=E5=AD=98=E6=96=87=E4=BB=B6=20*=20=E4=BF=AE=E6=94=B9ma?=
 =?UTF-8?q?in=E8=84=9A=E6=9C=AC=E5=86=85=E5=88=A0=E9=99=A4=E5=B7=B2?=
 =?UTF-8?q?=E4=BF=9D=E5=AD=98=E6=96=87=E4=BB=B6=20*=20=E9=80=82=E9=85=8Dno?=
 =?UTF-8?q?=20ranktable=E5=90=AF=E5=8A=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/config.py                    | 203 ++++++++++++++++++++
 examples/DCNv2/main_mxrec.py                |  27 ++-
 examples/demo/little_demo/main.py           |  43 +++--
 examples/demo/little_demo/run.sh            |  16 +-
 examples/demo/little_demo_estimator/main.py |  24 +++
 examples/demo/little_demo_estimator/run.sh  |  10 +-
 examples/dlrm/model/config.py               |   6 +-
 examples/dlrm/model/main_mxrec.py           |   3 +-
 examples/dlrm/model/run.sh                  |  63 ++++--
 9 files changed, 326 insertions(+), 69 deletions(-)
 create mode 100644 examples/DCNv2/config.py

diff --git a/examples/DCNv2/config.py b/examples/DCNv2/config.py
new file mode 100644
index 00000000..73ab2797
--- /dev/null
+++ b/examples/DCNv2/config.py
@@ -0,0 +1,203 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+
+import tensorflow as tf
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+
+
+class LearningRateScheduler:
+    """
+    LR Scheduler combining Polynomial Decay with Warmup at the beginning.
+    TF-based cond operations necessary for performance in graph mode.
+    """
+
+    def __init__(self, base_lr_dense, base_lr_sparse, warmup_steps, decay_start_step, decay_steps):
+        self.warmup_steps = tf.constant(warmup_steps, dtype=tf.int32)
+        self.decay_start_step = tf.constant(decay_start_step, dtype=tf.int32)
+        self.decay_steps = tf.constant(decay_steps)
+        self.decay_end_step = decay_start_step + decay_steps  # 65041
+        self.poly_power = 2.0
+        self.base_lr_dense = base_lr_dense
+        self.base_lr_sparse = base_lr_sparse
+
+    def calc(self, global_step):
+        # used for the warmup stage
+        warmup_step = tf.cast(1 / self.warmup_steps, tf.float32)
+        lr_factor_warmup = 1 - tf.cast(self.warmup_steps - global_step, tf.float32) * warmup_step
+        lr_factor_warmup = tf.cast(lr_factor_warmup, tf.float32)
+        # used for the constant stage
+        lr_factor_constant = tf.cast(1.0, tf.float32)
+
+        # used for the decay stage
+        lr_factor_decay = (self.decay_end_step - global_step) / self.decay_steps
+        lr_factor_decay = tf.math.pow(lr_factor_decay, self.poly_power)
+        lr_factor_decay = tf.cast(lr_factor_decay, tf.float32)
+        sparse_after_decay = tf.cast(1 / self.decay_steps, tf.float32)
+
+        lr_factor_decay_sparse = tf.cond(
+            global_step < self.decay_end_step,
+            lambda: lr_factor_decay,
+            lambda: sparse_after_decay,
+        )
+
+        lr_factor_decay_dense = tf.cond(
+            global_step < self.decay_end_step,
+            lambda: lr_factor_decay,
+            lambda: sparse_after_decay,
+        )
+
+        poly_schedule_sparse = tf.cond(
+            global_step < self.decay_start_step,
+            lambda: lr_factor_constant,
+            lambda: lr_factor_decay_sparse,
+        )
+
+        poly_schedule_dense = tf.cond(
+            global_step < self.decay_start_step,
+            lambda: lr_factor_constant,
+            lambda: lr_factor_decay_dense,
+        )
+
+        lr_factor_sparse = tf.cond(
+            global_step < self.warmup_steps, lambda: lr_factor_warmup, lambda: poly_schedule_sparse
+        )
+
+        lr_factor_dense = tf.cond(
+            global_step < self.warmup_steps, lambda: lr_factor_warmup, lambda: poly_schedule_dense
+        )
+
+        lr_sparse = self.base_lr_sparse * lr_factor_sparse
+        lr_dense = self.base_lr_dense * lr_factor_dense
+        return lr_dense, lr_sparse
+
+
+class Config:
+    def __init__(self, ):
+        self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
+        tmp = os.getenv("TRAIN_RANK_SIZE")
+        if tmp is None:
+            raise ValueError("please export TRAIN_RANK_SIZE")
+        self.rank_size = int(tmp)
+
+        self.data_path = os.getenv("DLRM_CRITEO_DATA_PATH")
+        self.train_file_pattern = "train"
+        self.test_file_pattern = "test"
+
+        self.batch_size = 8192
+        self.line_per_sample = 1024
+        self.train_epoch = 3
+        self.test_epoch = 1
+        self.perform_shuffle = False
+
+        self.key_type = tf.int64
+        self.label_type = tf.float32
+        self.value_type = tf.int64
+
+        self.feat_cnt = 26
+        self.__set_emb_table_size()
+
+        self.field_num = 26
+        self.send_count = 46000 // self.rank_size
+
+        self.emb_dim = 128
+        self.hashtable_threshold = 1
+
+        self.USE_PIPELINE_TEST = False
+
+        # 动态学习率
+        GLOBAL_BATCH_SIZE = 8192 * 8
+        LR_SCHEDULE_STEPS = [
+            int(2750 * 55296 / GLOBAL_BATCH_SIZE),
+            int(49315 * 55296 / GLOBAL_BATCH_SIZE),
+            int(27772 * 55296 / GLOBAL_BATCH_SIZE),
+        ]
+        self.global_step = tf.Variable(0, trainable=False)
+        _lr_scheduler = LearningRateScheduler(
+            28.443,
+            33.71193,
+            LR_SCHEDULE_STEPS[0],
+            LR_SCHEDULE_STEPS[1],
+            LR_SCHEDULE_STEPS[2],
+        )
+        self.learning_rate = _lr_scheduler.calc(self.global_step)
+
+    def __set_emb_table_size(self):
+        self.cache_mode = os.getenv("CACHE_MODE")
+        if self.cache_mode is None:
+            raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
+
+        if self.cache_mode == "HBM":
+            self.dev_vocab_size = 24_000_000 * self.rank_size
+            self.host_vocab_size = 0
+        elif self.cache_mode == "DDR":
+            self.dev_vocab_size = 500_000 * self.rank_size
+            self.host_vocab_size = 24_000_000 * self.rank_size
+        elif self.cache_mode == "SSD":
+            self.dev_vocab_size = 100_000 * self.rank_size
+            self.host_vocab_size = 2_000_000 * self.rank_size
+            self.ssd_vocab_size = 24_000_000 * self.rank_size
+        else:
+            raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
+
+    def get_emb_table_cfg(self) -> dict:
+        if self.cache_mode == "HBM":
+            return {"device_vocabulary_size": self.dev_vocab_size}
+        elif self.cache_mode == "DDR":
+            return {"device_vocabulary_size": self.dev_vocab_size,
+                    "host_vocabulary_size": self.host_vocab_size}
+        elif self.cache_mode == "SSD":
+            return {"device_vocabulary_size": self.dev_vocab_size,
+                    "host_vocabulary_size": self.host_vocab_size,
+                    "ssd_vocabulary_size": self.ssd_vocab_size,
+                    "ssd_data_path": ["ssd_data"]}
+        else:
+            raise RuntimeError(f"get CACHE_MODE:{self.cache_mode}, check Config.__set_emb_table_size implementation")
+
+
+def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2"):
+    session_config = tf.ConfigProto(allow_soft_placement=False,
+                                    log_device_placement=False)
+    session_config.gpu_options.allow_growth = True
+    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    custom_op.parameter_map["mix_compile_mode"].b = False
+    custom_op.parameter_map["use_off_line"].b = True
+    custom_op.parameter_map["min_group_size"].b = 1
+    # 可选配置level0:pairwise;level1:pairwise
+    custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:fullmesh;level1:fullmesh")
+    custom_op.parameter_map["enable_data_pre_proc"].b = True
+    custom_op.parameter_map["iterations_per_loop"].i = 10
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    custom_op.parameter_map["hcom_parallel"].b = False
+    custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes("op_impl_mode.ini")
+    custom_op.parameter_map["op_execute_timeout"].i = 2000
+    custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(
+        str(13 * 1024 * 1024 * 1024))  # total 31 need 13;
+    custom_op.parameter_map["graph_memory_max_size"].s = tf.compat.as_bytes(str(18 * 1024 * 1024 * 1024))  # need 25
+    custom_op.parameter_map["stream_max_parallel_num"].s = tf.compat.as_bytes("DNN_VM_AICPU:3,AIcoreEngine:3")
+
+    if dump_data:
+        custom_op.parameter_map["enable_dump"].b = True
+        custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(dump_path)
+        custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(dump_steps)
+        custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+
+    session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+
+    return session_config
diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index eb1d91ea..18ab273e 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -13,17 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+
+import os
+import random
+import shutil
 import time
 import warnings
-import random
 from glob import glob
-
 from sklearn.metrics import roc_auc_score
+
 import numpy as np
+
 from npu_bridge.npu_init import *
 
 from model import MyModel
-from dlrm.model.config import sess_config, Config
+from config import sess_config, Config
 from optimizer import get_dense_and_sparse_optimizer
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
@@ -244,12 +248,25 @@ def create_feature_spec_list(use_timestamp=False):
     return feature_spec_list
 
 
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+
+
 if __name__ == "__main__":
     tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
     warnings.filterwarnings("ignore")
+    _clear_saved_model()
 
-    rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
-    rank_size = int(os.getenv("RANK_SIZE")) if os.getenv("RANK_SIZE") else None
+    rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
     interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
     train_steps = 10000
     eval_steps = 1360
diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index a6ef96fc..d8dd851a 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -148,23 +148,35 @@ def create_feature_spec_list(use_timestamp=False):
     return feature_spec_list
 
 
-def clear_saved_model():
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+    _del_related_dir("kernel*")
+    _del_related_dir("export_graph")
+
     mode = UseMode.mapping(os.getenv("USE_MODE"))
-    if mode == UseMode.TRAIN:
-        logger.info("current mode is train, will delete previous saved model data if exist.")
-        save_model_path = os.path.join(os.getcwd(), "saved-model")
-        shutil.rmtree(save_model_path, ignore_errors=True)
-    if not (os.getenv("CACHE_MODE", "") == CacheModeEnum.SSD.value and mode == UseMode.TRAIN):
+    if mode != UseMode.TRAIN:
         return
+    logger.info("current mode is train, will delete previous saved model data if exist.")
+    _del_related_dir("saved-model")
 
-    # ssd not allow overwrite file, should clear it before training
-    logger.info("current cache mode is SSD, will delete previous saved ssd data if exist.")
-    for part_path in _SSD_SAVE_PATH:
-        if "/" not in part_path and "\\" not in part_path:
-            part_path = os.path.join(os.getcwd(), part_path)
-        shutil.rmtree(part_path, ignore_errors=True)
+    if not (os.getenv("CACHE_MODE", "") == CacheModeEnum.SSD.value):
+        return
+    logger.info("current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
+                " then create empty directory for this use case.")
+    for sub_path in _SSD_SAVE_PATH:
+        _del_related_dir(sub_path)
         try:
-            os.mkdir(part_path)
+            os.mkdir(sub_path)
+            logger.info(f"mkdir dir:{sub_path}")
         except OSError:
             logger.warning("ssd path has exist")  # 多进程并行，忽略异常
 
@@ -172,6 +184,7 @@ def clear_saved_model():
 if __name__ == "__main__":
     tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
     warnings.filterwarnings("ignore")
+    _clear_saved_model()
 
     use_mode = UseMode.mapping(os.getenv("USE_MODE"))
     # 最大数据集生成数量
@@ -217,7 +230,7 @@ if __name__ == "__main__":
         if len(model_file) == 0:
             raise ValueError(f"get USE_MODE:{use_mode}, but no model file exist at:{load_path_pattern}")
         if_load = True
-    
+
     # nbatch function needs to be used together with the prefetch and host_vocabulary_size != 0
     init(train_steps=TRAIN_STEPS,
          eval_steps=EVAL_STEPS,
@@ -267,7 +280,7 @@ if __name__ == "__main__":
     if cache_mode in ["DDR", "SSD"] and not use_dynamic:
         logger.warning("when cache_mode in [DDR, SSD], suggest use_dynamic=true to avoid tuning size parameter")
     emb_initializer = tf.compat.v1.constant_initializer(0) if USE_DETERMINISTIC \
-                      else tf.compat.v1.truncated_normal_initializer()
+        else tf.compat.v1.truncated_normal_initializer()
     user_hashtable = create_table(key_dtype=tf.int64,
                                   dim=tf.TensorShape([cfg.user_hashtable_dim]),
                                   name='user_table',
diff --git a/examples/demo/little_demo/run.sh b/examples/demo/little_demo/run.sh
index d585be02..5b45af84 100644
--- a/examples/demo/little_demo/run.sh
+++ b/examples/demo/little_demo/run.sh
@@ -15,26 +15,12 @@
 # ==============================================================================
 
 kill -9 `ps -ef | grep python | grep -v grep | awk '{print $2}'` > /dev/null 2>&1
-rm -rf /root/ascend/log/*
-rm -rf ./kernel*
-rm -rf ./export_graph/*
 
 # 支持[train, load_and_train, predict]
-export USE_MODE="train"
-if [ $USE_MODE = "train" ]; then
-  echo "train mode: saved-model will be deleted"
-  rm -rf ./saved-model
-fi
+export USE_MODE="train"  # if train mode, will remove dir ./saved-model
 
 # cache mode support: HBM, DDR, SSD
 export CACHE_MODE="HBM"
-if [ $CACHE_MODE = "SSD" ] && [ $USE_MODE = "train" ]; then
-  echo "SSD train mode not allow file exist in directory when training a model from stratch in case overwrite,
-        deleting directory ssd_data then create for this use case"
-  rm -rf ssd_data
-  mkdir ssd_data
-fi
-
 
 # 获取输入参数：py、ip
 if [ $# -ge 1 ]; then
diff --git a/examples/demo/little_demo_estimator/main.py b/examples/demo/little_demo_estimator/main.py
index cca5a7a5..20b7381d 100644
--- a/examples/demo/little_demo_estimator/main.py
+++ b/examples/demo/little_demo_estimator/main.py
@@ -17,6 +17,8 @@
 
 import argparse
 import os
+import shutil
+from glob import glob
 
 import tensorflow as tf
 from mx_rec.util.initialize import init, terminate_config_initializer
@@ -142,6 +144,27 @@ def create_feature_spec_list(use_timestamp=False):
     return feature_spec_list
 
 
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+    _del_related_dir("kernel*")
+    _del_related_dir("export_graph")
+
+    mode = args.run_mode
+    if not mode.startswith("train"):
+        return
+    logger.info("current mode contains train, will delete previous saved model data if exist.")
+    _del_related_dir("_rank*")
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--run_mode', type=str, default='train_and_evaluate')  # 运行模式，在run.sh中进行配置
@@ -185,6 +208,7 @@ if __name__ == '__main__':
         args.eval_steps = -1
     elif args.run_mode == 'train_and_evaluate':
         args.save_checkpoints_steps = args.train_steps
+    _clear_saved_model()
 
     # set init
     init(train_steps=args.train_steps,
diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index f3d34c82..8bb43b19 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -15,9 +15,6 @@
 # ==============================================================================
 
 kill -9 `ps -ef | grep python | grep -v grep | awk '{print $2}'` > /dev/null 2>&1
-rm -rf /root/ascend/log/*
-rm -rf ./kernel*
-rm -rf ./export_graph/*
 
 # 获取输入参数：py、ip
 if [ $# -ge 1 ]; then
@@ -83,12 +80,7 @@ export TF_CPP_MIN_LOG_LEVEL=3 # tensorflow日志级别,3对应FATAL
 # 设置应用类日志的全局日志级别及各模块日志级别，具体请参考昇腾官网CANN文档
 export ASCEND_GLOBAL_LOG_LEVEL=3 # “设置日志级别”章节0:debug, 1:info, 2:warning, 3:error, 4:NULL
 export MXREC_MODE="ASC"
-export USE_MODE="train_and_evaluate" # 支持[train, predict, train_and_evaluate]
-
-if [ $USE_MODE = "train" ] || [ $USE_MODE = "train_and_evaluate" ];then
-  echo "train mode: saved-model will be deleted"
-  rm -rf ./_rank*
-fi
+export USE_MODE="train_and_evaluate" # 支持[train, predict, train_and_evaluate],train相关模式将删除./_rank*目录
 
 ################# 参数配置 ######################
 export USE_DYNAMIC=1            # 0：静态shape；1：动态shape
diff --git a/examples/dlrm/model/config.py b/examples/dlrm/model/config.py
index 23b042c2..fd38276d 100644
--- a/examples/dlrm/model/config.py
+++ b/examples/dlrm/model/config.py
@@ -89,10 +89,10 @@ class LearningRateScheduler:
 
 class Config:
     def __init__(self, ):
-        self.rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
-        tmp = os.getenv("RANK_SIZE")
+        self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
+        tmp = os.getenv("TRAIN_RANK_SIZE")
         if tmp is None:
-            raise ValueError("please export RANK_SIZE")
+            raise ValueError("please export TRAIN_RANK_SIZE")
         self.rank_size = int(tmp)
 
         self.data_path = os.getenv("DLRM_CRITEO_DATA_PATH")
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 3464f84e..6fda4f0a 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -41,7 +41,6 @@ from mx_rec.util.variable import get_dense_and_sparse_variable
 from mx_rec.util.log import logger
 from npu_bridge.npu_init import *
 
-
 npu_plugin.set_device_sat_mode(0)
 
 dense_hashtable_seed = 128
@@ -253,7 +252,7 @@ if __name__ == "__main__":
     warnings.filterwarnings("ignore")
 
     rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
-    rank_size = int(os.getenv("RANK_SIZE")) if os.getenv("RANK_SIZE") else None
+    rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
     interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
     train_steps = 10000
     eval_steps = 1360
diff --git a/examples/dlrm/model/run.sh b/examples/dlrm/model/run.sh
index f5cb4449..be509608 100644
--- a/examples/dlrm/model/run.sh
+++ b/examples/dlrm/model/run.sh
@@ -20,10 +20,25 @@ so_path=$1
 mx_rec_package_path=$2
 hccl_cfg_json=$3
 dlrm_criteo_data_path=$4
+ip=$5  # no ranktable时传入该参数
 
-export RANK_SIZE=8
-echo "RANK_SIZE=${RANK_SIZE}, please make sure hccl configuration json file match this parameter"
-export RANK_TABLE_FILE=${hccl_cfg_json}
+interface="lo"
+num_server=1
+local_rank_size=8
+num_process=$((num_server * local_rank_size))
+export TRAIN_RANK_SIZE=$num_process
+
+# 删除数据
+echo "CACHE_MODE:${CACHE_MODE}"
+if [ ${CACHE_MODE} = "SSD" ]; then
+  echo "SSD train mode not allow file exist before training,
+        deleting dir ${cur_path}/ssd_data then create for SSD use case"
+  rm -rf ssd_data
+  mkdir ssd_data
+fi
+rm -rf kernel*
+rm -rf /root/ascend/log/*
+rm -rf model_dir_rank* op_cache
 
 ################# 参数配置 ######################
 export USE_DYNAMIC=0            # 0：静态shape；1：动态shape
@@ -34,25 +49,11 @@ export USE_MULTI_LOOKUP=0       # 0：一表一查；1：一表多查
 export USE_MODIFY_GRAPH=0       # 0：feature spec模式；1：自动改图模式
 ################################################
 
-echo "CACHE_MODE:${CACHE_MODE}"
-if [ ${CACHE_MODE} = "SSD" ]; then
-  echo "SSD train mode not allow file exist before training,
-        deleting dir ${cur_path}/ssd_data then create for SSD use case"
-  rm -rf ssd_data
-  mkdir ssd_data
-fi
-
 export HCCL_CONNECT_TIMEOUT=1200
-
 export DLRM_CRITEO_DATA_PATH=${dlrm_criteo_data_path}
 export PYTHONPATH=${mx_rec_package_path}:${so_path}:$PYTHONPATH
 export LD_PRELOAD=/usr/lib64/libgomp.so.1
 export LD_LIBRARY_PATH=${so_path}:/usr/local/lib:$LD_LIBRARY_PATH
-
-rm -rf kernel*
-rm -rf /root/ascend/log/*
-rm -rf model_dir_rank* op_cache
-
 export ASCEND_DEVICE_ID=0
 export RANK_ID_START=0
 export JOB_ID=10086
@@ -78,10 +79,32 @@ echo "MXREC_MODE is $MXREC_MODE"
 export py=main_mxrec.py
 echo "py is $py"
 
+# 区分ranktable和no ranktable
+if [ -n "$ip" ]; then
+    # no ranktable分支
+    echo "Current is no ranktable solution."
+    echo "Input node ip: $ip, please make sure this ip is available."
+    export CM_CHIEF_IP=$ip  # 主节点ip
+    export CM_CHIEF_PORT=60001  # 主节点监听端口
+    export CM_CHIEF_DEVICE=0  # 主节点device id
+    export CM_WORKER_IP=$ip  # 当前节点ip
+    export CM_WORKER_SIZE=$num_process  # 参与集群训练的device数量
+    echo "CM_CHIEF_IP=$CM_CHIEF_IP"
+    echo "CM_CHIEF_PORT=$CM_CHIEF_PORT"
+    echo "CM_CHIEF_DEVICE=$CM_CHIEF_DEVICE"
+    echo "CM_WORKER_IP=$CM_WORKER_IP"
+    echo "CM_WORKER_SIZE=$CM_WORKER_SIZE"
+else
+    # ranktable分支
+    echo "Current is ranktable solution, hccl json file:${hccl_cfg_json}"
+    export RANK_SIZE=$num_process
+    echo "RANK_SIZE=${RANK_SIZE}, please make sure hccl configuration json file match this parameter"
+    export RANK_TABLE_FILE=${hccl_cfg_json}
+fi
+
 echo "use horovod to start tasks"
 # GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
 mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
-interface="lo"
 
-horovodrun --network-interface ${interface} -np ${RANK_SIZE} --mpi-args "${mpi_args}" --mpi -H localhost:${RANK_SIZE} \
-python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${RANK_SIZE}p.log
+horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
+python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${num_process}p.log
-- 
Gitee


From ff62ae878e8738095b0cf0808686f2475df9509e Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Mon, 20 May 2024 19:22:47 +0800
Subject: [PATCH 149/302] =?UTF-8?q?warm=20start=20=E5=BC=80=E5=8F=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py                |  7 +++----
 mx_rec/saver/warm_start.py           | 22 ++++++++++------------
 src/core/hybrid_mgmt/hybrid_mgmt.cpp |  2 +-
 src/pybind/module_main.cpp           |  3 ++-
 4 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index 0dc28a99..f1ce6ea3 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -67,7 +67,7 @@ class Saver(object):
         ("prefix_name", ClassValidator, {"classes": (str, type(None))}),
         ("prefix_name", OptionalStringValidator, {"min_len": 1, "max_len": 50}, ["check_string_length"]),
     ])
-    def __init__(self, var_list=None, max_to_keep=3, prefix_name="checkpoint", warm_start_tables = None):
+    def __init__(self, var_list=None, max_to_keep=3, prefix_name="checkpoint", warm_start_tables=None):
         self.max_to_keep = max_to_keep
         self._prefix_name = prefix_name
         self.var_list = var_list
@@ -297,7 +297,7 @@ class Saver(object):
                                                                       table_instance.emb_size],
                                              name=DataName.EMBEDDING.value)
                 assign_op = var.assign(variable)
-                self.restore_fetch_dict[table_instance.table_name]= [assign_op]
+                self.restore_fetch_dict[table_instance.table_name] = [assign_op]
                 optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(
                     table_instance.table_name)
                 if optimizer:
@@ -330,8 +330,7 @@ class Saver(object):
             logger.warning("no tables can be warm start restored.")
         return placeholder_dict, restore_fetch_list
 
-    def _restore(self, sess, reading_path , warm_start_tables=None):
-        # todo:这里增加新的参数，table_list
+    def _restore(self, sess, reading_path, warm_start_tables=None):
         # 根据table_list去改造
         if warm_start_tables:
             placeholder_dict, restore_fetch_list = self.get_warm_start_dict(warm_start_tables)
diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index b5df5887..c6040316 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -14,25 +14,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import os
 import logging
-
-import six
 import re
-import os
 from typing import List
+import six
 
 import tensorflow as tf
 from tensorflow.python.estimator import estimator as estimator_lib
 from tensorflow.python.training import warm_starting_util
-
-from mx_rec.util.log import logger
-from mx_rec.saver.saver import Saver
-
 if tf.__version__.startswith("1"):
     from npu_bridge.npu_init import NPUEstimator
 else:
     from npu_device.compat.v1.npu_init import NPUEstimator
 
+from mx_rec.util.log import logger
+from mx_rec.saver.saver import Saver
+
 
 class WarmStartController:
     _instance = None
@@ -81,7 +79,8 @@ def patch_for_func_warm_start(func):
         if isinstance(ckpt_to_initialize_from, (list, tuple)):
             vars_to_warm_start_list = args[1]
             var_name_to_prev_var_name_list = args[3]
-            for i in range(len(ckpt_to_initialize_from)):
+            warm_start_num = len(ckpt_to_initialize_from)
+            for i in range(warm_start_num):
                 f = func(ckpt_to_initialize_from[i], vars_to_warm_start_list[i], args[2],
                          var_name_to_prev_var_name_list[i], **kwargs)
             return f
@@ -117,13 +116,10 @@ def warm_settings_filter(warm_start_from):
             filter_setting = _warm_settings_filter(warm_start_from)
             if filter_setting:
                 return filter_setting
-        return None
     elif isinstance(warm_start_from, (six.string_types, six.binary_type)):
         table_name_list = get_table_name_set_by_ckpt_path(warm_start_from)
         WarmStartController().add_element(warm_start_from, table_name_list)
         return warm_start_from
-    else:
-        pass
 
 
 def recover_warm_settings(setting_list):
@@ -176,7 +172,7 @@ def _warm_settings_filter(warm_start_setting):
         if matching_tables:
             WarmStartController().add_element(warm_start_setting.ckpt_to_initialize_from, matching_tables)
             if vars_to_warm_start != ".*":
-                return None
+                return
         return warm_start_setting
     elif all(isinstance(v, str) for v in vars_to_warm_start):
         sparse_vars = []
@@ -232,6 +228,8 @@ class SparseRestoreHook(tf.estimator.SessionRunHook):
     def __init__(self):
         logging.info("In warm start mode, SparseRestoreHook has been initialized.")
         self._is_warm_start = False
+        self._saver = None
+        self._warm_start_dict = {}
 
     def begin(self):
         self._saver = Saver()
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 8eca48ba..78621829 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -296,7 +296,7 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
     vector<CkptFeatureType> loadFeatures;
     SetFeatureTypeForLoad(loadFeatures);
 
-    if(warmStartTables.size() == 0) {
+    if (warmStartTables.size() == 0) {
         EmbeddingMgmt::Instance()->Load(loadPath);
     } else {
         for (auto& tableName: warmStartTables) {
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index 4de10fc8..351d19a4 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -216,7 +216,8 @@ namespace {
                      py::arg("seed") = DEFAULT_RANDOM_SEED, py::arg("threshold_values") = vector<ThresholdValue> {},
                      py::arg("if_load") = false)
                 .def("save", &MxRec::HybridMgmt::Save, py::arg("save_path") = "")
-                .def("load", &MxRec::HybridMgmt::Load, py::arg("load_path") = "", py::arg("warm_start_tables") = vector<string> {})
+                .def("load", &MxRec::HybridMgmt::Load, py::arg("load_path") = "",
+                     py::arg("warm_start_tables") = vector<string> {})
                 .def("destroy", &MxRec::HybridMgmt::Destroy)
                 .def("evict", &MxRec::HybridMgmt::Evict)
                 .def("send", &MxRec::HybridMgmt::SendHostMap, py::arg("table_name") = "")
-- 
Gitee


From 3878e4cffc106ec5aff6b28b7b895017a7e18365 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Mon, 20 May 2024 19:22:47 +0800
Subject: [PATCH 150/302] =?UTF-8?q?warm=20start=20=E5=BC=80=E5=8F=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py                |  7 +++---
 mx_rec/saver/warm_start.py           | 34 ++++++++++++++--------------
 src/core/hybrid_mgmt/hybrid_mgmt.cpp |  2 +-
 src/pybind/module_main.cpp           |  3 ++-
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index 0dc28a99..f1ce6ea3 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -67,7 +67,7 @@ class Saver(object):
         ("prefix_name", ClassValidator, {"classes": (str, type(None))}),
         ("prefix_name", OptionalStringValidator, {"min_len": 1, "max_len": 50}, ["check_string_length"]),
     ])
-    def __init__(self, var_list=None, max_to_keep=3, prefix_name="checkpoint", warm_start_tables = None):
+    def __init__(self, var_list=None, max_to_keep=3, prefix_name="checkpoint", warm_start_tables=None):
         self.max_to_keep = max_to_keep
         self._prefix_name = prefix_name
         self.var_list = var_list
@@ -297,7 +297,7 @@ class Saver(object):
                                                                       table_instance.emb_size],
                                              name=DataName.EMBEDDING.value)
                 assign_op = var.assign(variable)
-                self.restore_fetch_dict[table_instance.table_name]= [assign_op]
+                self.restore_fetch_dict[table_instance.table_name] = [assign_op]
                 optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(
                     table_instance.table_name)
                 if optimizer:
@@ -330,8 +330,7 @@ class Saver(object):
             logger.warning("no tables can be warm start restored.")
         return placeholder_dict, restore_fetch_list
 
-    def _restore(self, sess, reading_path , warm_start_tables=None):
-        # todo:这里增加新的参数，table_list
+    def _restore(self, sess, reading_path, warm_start_tables=None):
         # 根据table_list去改造
         if warm_start_tables:
             placeholder_dict, restore_fetch_list = self.get_warm_start_dict(warm_start_tables)
diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index b5df5887..6f1e637b 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -14,25 +14,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import os
 import logging
-
-import six
 import re
-import os
 from typing import List
+import six
 
 import tensorflow as tf
 from tensorflow.python.estimator import estimator as estimator_lib
 from tensorflow.python.training import warm_starting_util
-
-from mx_rec.util.log import logger
-from mx_rec.saver.saver import Saver
-
 if tf.__version__.startswith("1"):
     from npu_bridge.npu_init import NPUEstimator
 else:
     from npu_device.compat.v1.npu_init import NPUEstimator
 
+from mx_rec.util.log import logger
+from mx_rec.saver.saver import Saver
+
 
 class WarmStartController:
     _instance = None
@@ -81,7 +79,8 @@ def patch_for_func_warm_start(func):
         if isinstance(ckpt_to_initialize_from, (list, tuple)):
             vars_to_warm_start_list = args[1]
             var_name_to_prev_var_name_list = args[3]
-            for i in range(len(ckpt_to_initialize_from)):
+            warm_start_num = len(ckpt_to_initialize_from)
+            for i in range(warm_start_num):
                 f = func(ckpt_to_initialize_from[i], vars_to_warm_start_list[i], args[2],
                          var_name_to_prev_var_name_list[i], **kwargs)
             return f
@@ -100,6 +99,7 @@ def patch_for_estimator_train(func):
 
 
 def warm_settings_filter(warm_start_from):
+    warm_start_from_res = None
     if isinstance(warm_start_from, estimator_lib.WarmStartSettings):
         if isinstance(warm_start_from.ckpt_to_initialize_from, (list, tuple)):
             out_setting_list = []
@@ -110,20 +110,19 @@ def warm_settings_filter(warm_start_from):
                 if filter_setting:
                     out_setting_list.append(filter_setting)
             if out_setting_list:
-                warm_start_from = recover_warm_settings(out_setting_list)
-                return warm_start_from
+                warm_start_from_res = recover_warm_settings(out_setting_list)
         elif isinstance(warm_start_from.ckpt_to_initialize_from, (six.string_types, six.binary_type)):
             logger.info("According to warm_start_settings, warm start will load from only one checkpoint path.")
             filter_setting = _warm_settings_filter(warm_start_from)
             if filter_setting:
-                return filter_setting
-        return None
+                warm_start_from_res = filter_setting
     elif isinstance(warm_start_from, (six.string_types, six.binary_type)):
         table_name_list = get_table_name_set_by_ckpt_path(warm_start_from)
         WarmStartController().add_element(warm_start_from, table_name_list)
-        return warm_start_from
+        warm_start_from_res = warm_start_from
     else:
-        pass
+        raise ValueError("Invalid parameter: warm_start_from. ")
+    return warm_start_from_res
 
 
 def recover_warm_settings(setting_list):
@@ -176,7 +175,7 @@ def _warm_settings_filter(warm_start_setting):
         if matching_tables:
             WarmStartController().add_element(warm_start_setting.ckpt_to_initialize_from, matching_tables)
             if vars_to_warm_start != ".*":
-                return None
+                return
         return warm_start_setting
     elif all(isinstance(v, str) for v in vars_to_warm_start):
         sparse_vars = []
@@ -215,7 +214,7 @@ def get_table_name_set_by_ckpt_path(warm_start_path: str) -> List[str]:
     return table_name_list
 
 
-def get_latest_ckpt(warm_start_path) -> str:
+def get_latest_ckpt(warm_start_path: str) -> str:
     ckpt_path = os.path.join(warm_start_path, "checkpoint")
     if not tf.io.gfile.exists(ckpt_path):
         raise FileNotFoundError(f"Checkpoint file is missing under the warm start model path {warm_start_path}")
@@ -223,7 +222,6 @@ def get_latest_ckpt(warm_start_path) -> str:
         latest_ckpt = f.readline().rstrip()
         latest_ckpt = latest_ckpt.split(":")[1].strip(' ').replace('"', '')
         latest_ckpt = latest_ckpt.split("/")[-1]
-
     path = os.path.join(warm_start_path, latest_ckpt)
     return path
 
@@ -232,6 +230,8 @@ class SparseRestoreHook(tf.estimator.SessionRunHook):
     def __init__(self):
         logging.info("In warm start mode, SparseRestoreHook has been initialized.")
         self._is_warm_start = False
+        self._saver = None
+        self._warm_start_dict = {}
 
     def begin(self):
         self._saver = Saver()
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 8eca48ba..78621829 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -296,7 +296,7 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
     vector<CkptFeatureType> loadFeatures;
     SetFeatureTypeForLoad(loadFeatures);
 
-    if(warmStartTables.size() == 0) {
+    if (warmStartTables.size() == 0) {
         EmbeddingMgmt::Instance()->Load(loadPath);
     } else {
         for (auto& tableName: warmStartTables) {
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index 4de10fc8..351d19a4 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -216,7 +216,8 @@ namespace {
                      py::arg("seed") = DEFAULT_RANDOM_SEED, py::arg("threshold_values") = vector<ThresholdValue> {},
                      py::arg("if_load") = false)
                 .def("save", &MxRec::HybridMgmt::Save, py::arg("save_path") = "")
-                .def("load", &MxRec::HybridMgmt::Load, py::arg("load_path") = "", py::arg("warm_start_tables") = vector<string> {})
+                .def("load", &MxRec::HybridMgmt::Load, py::arg("load_path") = "",
+                     py::arg("warm_start_tables") = vector<string> {})
                 .def("destroy", &MxRec::HybridMgmt::Destroy)
                 .def("evict", &MxRec::HybridMgmt::Evict)
                 .def("send", &MxRec::HybridMgmt::SendHostMap, py::arg("table_name") = "")
-- 
Gitee


From 85419a98b260a950a2a0e6c37302cf022bec8c78 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 21 May 2024 10:54:45 +0800
Subject: [PATCH 151/302] =?UTF-8?q?warm=20start=20=E8=A1=A5=E5=85=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/warm_start.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index 9e2b2ba9..a29dc4ba 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -186,11 +186,11 @@ def _warm_settings_filter(warm_start_setting):
                 sparse_vars.append(v)
                 WarmStartController().add_element(warm_start_setting.ckpt_to_initialize_from, matching_tables)
         vars_to_warm_start_res = [v for v in vars_to_warm_start if v not in sparse_vars]
-        if not vars_to_warm_start_res:
-            warm_start_setting = None
-        else:
-            warm_start_setting.vars_to_warm_start = vars_to_warm_start_res
-        warm_start_setting_res = warm_start_setting
+        if vars_to_warm_start_res:
+            warm_start_setting_res = estimator_lib.WarmStartSettings(
+                ckpt_to_initialize_from=warm_start_setting.ckpt_to_initialize_from,
+                vars_to_warm_start=vars_to_warm_start_res,
+                var_name_to_prev_var_name=warm_start_setting.var_name_to_prev_var_name)
     else:
         raise ValueError("vars_to_warm_start must be list or str!")
     return warm_start_setting_res
-- 
Gitee


From e8674ed6b2527eeeec3d635257c177ab52c17978 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Tue, 21 May 2024 03:49:04 +0000
Subject: [PATCH 152/302] =?UTF-8?q?!147=20cleancode=EF=BC=8C=E4=BD=BF?=
 =?UTF-8?q?=E7=94=A8SCAnchorAttr.ID=5FOFFSETS=20*=20=E3=80=90=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91cleancode=20?=
 =?UTF-8?q?*=20Merge=20remote-tracking=20branch=20'upstream/develop'=20int?=
 =?UTF-8?q?o=20develop-bugfix=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E5=A2=9E=E5=8A=A0=E5=BC=82?=
 =?UTF-8?q?=E5=B8=B8=E6=8D=95=E8=8E=B7=EF=BC=8C=E9=9D=9Ehbm=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E4=B8=8B=E5=BF=85=E9=A1=BB=E4=BD=BF=E7=94=A8=E6=94=B9?=
 =?UTF-8?q?=E5=9B=BE=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91=E5=A2=9E=E5=8A=A0=E5=BC=82=E5=B8=B8?=
 =?UTF-8?q?=E6=8D=95=E8=8E=B7=EF=BC=8C=E9=9D=9Ehbm=E6=A8=A1=E5=BC=8F?=
 =?UTF-8?q?=E4=B8=8B=E5=BF=85=E9=A1=BB=E4=BD=BF=E7=94=A8=E6=94=B9=E5=9B=BE?=
 =?UTF-8?q?=20*=20Merge=20remote-tracking=20branch=20'upstream/develop'=20?=
 =?UTF-8?q?into=20develop-bugfix=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E5=85=A8=E5=B1=80uni?=
 =?UTF-8?q?que=E5=8A=9F=E8=83=BD=E5=9C=A8=E6=89=A9=E5=AE=B9=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E4=B8=8B=EF=BC=8C=E8=A1=A8=E5=90=8D=E5=AD=97=E2=80=9C?=
 =?UTF-8?q?/=E2=80=9D=E9=9A=90=E6=82=A3=E4=BF=AE=E5=A4=8D=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91?=
 =?UTF-8?q?=E5=85=A8=E5=B1=80unique=E5=8A=9F=E8=83=BD=E5=9C=A8=E6=89=A9?=
 =?UTF-8?q?=E5=AE=B9=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8C=E8=A1=A8=E5=90=8D?=
 =?UTF-8?q?=E5=AD=97=E2=80=9C/=E2=80=9D=E9=9A=90=E6=82=A3=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91=E5=85=A8=E5=B1=80unique=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD=E5=9C=A8=E6=89=A9=E5=AE=B9=E6=A8=A1=E5=BC=8F=E4=B8=8B?=
 =?UTF-8?q?=EF=BC=8C=E8=A1=A8=E5=90=8D=E5=AD=97=E2=80=9C/=E2=80=9D?=
 =?UTF-8?q?=E9=9A=90=E6=82=A3=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/emb/dynamic_sparse_embedding.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index a7616991..4781491c 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -8,7 +8,7 @@ from typing import Optional, Union, Callable
 import tensorflow as tf
 
 from mx_rec.constants.constants import ASCEND_TABLE_NAME_MUST_CONTAIN, ASCEND_SPARSE_LOOKUP_LOCAL_EMB, \
-     ASCEND_SPARSE_LOOKUP_ID_OFFSET
+     ASCEND_SPARSE_LOOKUP_ID_OFFSET, ASCAnchorAttr
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.util.initialize import ConfigInitializer
@@ -42,7 +42,7 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
     def _get_sparse_forward_result(self, sparse_forward_fn: Callable, table: Union[tf.compat.v1.Variable, tf.Tensor],
                                    result: dict, is_training: bool) -> tf.Tensor:
         local_embeddings = import_host_pipeline_ops().embedding_lookup_by_address(
-            result.get("id_offsets"), embedding_dim=self._emb_size, embedding_type=1)
+            result.get(str(ASCAnchorAttr.ID_OFFSETS)), embedding_dim=self._emb_size, embedding_type=1)
 
         add_collection_condition = is_training and (
                 ASCEND_TABLE_NAME_MUST_CONTAIN is None or ASCEND_TABLE_NAME_MUST_CONTAIN in self._table_name)
@@ -52,9 +52,9 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
             return sparse_forward_fn(local_embeddings)
         # 创建扩容查询tensor和table_instance的映射关系，以便优化器中使用
         ConfigInitializer.get_instance().sparse_embed_config.insert_table_instance_to_tensor_dict(
-            result.get("id_offsets"), self)
+            result.get(str(ASCAnchorAttr.ID_OFFSETS)), self)
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB, local_embeddings)
-        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get("id_offsets"))
+        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get(str(ASCAnchorAttr.ID_OFFSETS)))
         return sparse_forward_fn(local_embeddings)
 
 
-- 
Gitee


From 436b753b27260440560c9400e3fcfb3407b73b43 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 21 May 2024 15:58:19 +0800
Subject: [PATCH 153/302] =?UTF-8?q?warm=20start=20=E8=A1=A5=E5=85=85typing?=
 =?UTF-8?q?=E5=92=8C=E5=87=BD=E6=95=B0=E6=B3=A8=E9=87=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/warm_start.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index a29dc4ba..baf01ce8 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -125,7 +125,10 @@ def warm_settings_filter(warm_start_from):
     return warm_start_from_res
 
 
-def recover_warm_settings(setting_list):
+def recover_warm_settings(setting_list: List[tf.estimator.WarmStartSettings]) -> tf.estimator.WarmStartSettings:
+    """
+    Recover WarmStartSettings from a list of custom-defined WarmStartSettings.
+    """
     ckpt_to_initialize_from_list = []
     vars_to_warm_start_list = []
     var_name_to_prev_var_name_list = []
@@ -140,7 +143,10 @@ def recover_warm_settings(setting_list):
         var_name_to_prev_var_name=var_name_to_prev_var_name_list)
 
 
-def _build_warm_settings_list(warm_start_from):
+def _build_warm_settings_list(warm_start_from: tf.estimator.WarmStartSettings) -> List[tf.estimator.WarmStartSettings]:
+    """
+    Converts custom-defined WarmStartSettings into a list of TensorFlow-native WarmStartSettings.
+    """
     ckpt_to_initialize_from = warm_start_from.ckpt_to_initialize_from
     vars_to_warm_start = warm_start_from.vars_to_warm_start
     var_name_to_prev_var_name = warm_start_from.var_name_to_prev_var_name
@@ -165,7 +171,10 @@ def _build_warm_settings_list(warm_start_from):
     return warm_start_settings_list
 
 
-def _warm_settings_filter(warm_start_setting):
+def _warm_settings_filter(warm_start_setting: tf.estimator.WarmStartSettings) -> tf.estimator.WarmStartSettings:
+    """
+    Filter the vars_to_warm_start parameter to remove sparse table parameters.
+    """
     vars_to_warm_start = warm_start_setting.vars_to_warm_start
     var_name_to_prev_var_name = warm_start_setting.var_name_to_prev_var_name
     vars_to_warm_start_res = []
@@ -175,8 +184,8 @@ def _warm_settings_filter(warm_start_setting):
         matching_tables = [table for table in table_name_list if re.match(vars_to_warm_start, table)]
         if matching_tables:
             WarmStartController().add_element(warm_start_setting.ckpt_to_initialize_from, matching_tables)
-            if vars_to_warm_start != ".*":
-                return warm_start_setting_res
+        if vars_to_warm_start != ".*":
+            return warm_start_setting_res
         warm_start_setting_res = warm_start_setting
     elif all(isinstance(v, str) for v in vars_to_warm_start):
         sparse_vars = []
-- 
Gitee


From ea4c5f0a7aae9f68810398d11591809ada52a5b0 Mon Sep 17 00:00:00 2001
From: sihaixianyu <sihaixianyu@qq.com>
Date: Tue, 21 May 2024 11:10:48 +0000
Subject: [PATCH 154/302] =?UTF-8?q?!144=20=E6=94=B9=E5=9B=BE=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=E4=BC=A0=E5=85=A5TF=E5=9B=BE=E5=AE=9E=E4=BE=8B=20*=20?=
 =?UTF-8?q?Adapt=20unit=20test=20for=20modifier.=20*=20Add=20inference=20m?=
 =?UTF-8?q?ode.=20*=20Add=20matrix=20factorization=20model.=20*=20Init=20f?=
 =?UTF-8?q?eat=20branch.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py       |    4 +-
 mx_rec/core/embedding.py            |   19 +-
 mx_rec/graph/__init__.py            |    4 +-
 mx_rec/graph/constants.py           |    1 -
 mx_rec/graph/hooks.py               |   12 +-
 mx_rec/graph/merge_lookup.py        |    3 +-
 mx_rec/graph/modifier.py            | 1110 +++++++++++++--------------
 mx_rec/graph/slicers.py             |   68 +-
 mx_rec/graph/utils.py               |  293 ++++---
 tests/mx_rec/graph/test_modifier.py |  158 ++--
 tests/mx_rec/graph/test_utils.py    |  132 +++-
 11 files changed, 923 insertions(+), 881 deletions(-)

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index f69f32c8..13b3d583 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -29,8 +29,8 @@ EMPTY_STR = ""
 # 获取ConfigInitializer对象实例失败提示信息
 GET_CONFIG_INSTANCE_ERR_MSG = "Please init the environment for mx_rec at first."
 
-# 自动改图模式下从计算图中寻找dataset的锚点名称
-ANCHOR_DATASET_NAME = "PrefetchDataset"
+# Used for slicer finding the orphan lookup key.
+ORPHAN_LOOKUP_KEY_PREFIX = "orphan"
 
 # the name of the embedding table merged by third party
 ASCEND_TABLE_NAME_MUST_CONTAIN = None
diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index 16f19d04..348ab9d6 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -19,15 +19,17 @@ import os
 from typing import Optional, Union
 
 import tensorflow as tf
+from tensorflow import Tensor
 from tensorflow.python.ops.init_ops import Initializer as InitializerV1
 from tensorflow.python.ops.init_ops_v2 import Initializer as InitializerV2
 
+from mx_rec.constants import constants
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.core.emb.emb_factory import HBMDynamicSparseEmbeddingFactory, HBMSparseEmbeddingFactory, \
     ExternalStorageSparseEmbeddingFactory
 from mx_rec.constants.constants import MAX_INT32, All2allGradientsOp, MAX_VOCABULARY_SIZE, MAX_DEVICE_VOCABULARY_SIZE
-from mx_rec.graph.utils import mark_orphan_lookup_key
+from mx_rec.graph.constants import AnchorIteratorOp
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.validator.validator import ClassValidator, StringValidator, SSDFeatureValidator, \
     para_checker_decorator, IntValidator, NumValidator, OptionValidator, OptionalIntValidator, \
@@ -184,3 +186,18 @@ def sparse_lookup(hashtable: BaseSparseEmbedding,
 
         ConfigInitializer.get_instance().modify_graph = modify_graph
         return hashtable.lookup(ids, send_count, **kwargs)
+
+
+def mark_orphan_lookup_key(lookup_key: Tensor) -> Tensor:
+    graph_def = tf.compat.v1.get_default_graph().as_graph_def()
+    subgraph = tf.compat.v1.graph_util.extract_sub_graph(graph_def, [lookup_key.op.name])
+
+    for node in subgraph.node:
+        if node.op == AnchorIteratorOp.ITERATOR_GET_NEXT.value:
+            return lookup_key
+
+    name_prefix = constants.ORPHAN_LOOKUP_KEY_PREFIX
+    marked_lookup_key = tf.identity(lookup_key, name="{}/{}".format(name_prefix, lookup_key.op.name))
+
+    logger.info('Mark orphan lookup key %s as %s.', lookup_key, marked_lookup_key)
+    return marked_lookup_key
diff --git a/mx_rec/graph/__init__.py b/mx_rec/graph/__init__.py
index 687e78ff..f1465971 100644
--- a/mx_rec/graph/__init__.py
+++ b/mx_rec/graph/__init__.py
@@ -16,13 +16,11 @@
 # ==============================================================================
 
 __all__ = [
-    "modify_graph_and_start_emb_cache",
     "GraphModifierHook",
-    "run",
     "LookupSubgraphSlicerHook",
     "OrphanLookupKeySlicerHook",
+    "modify_graph_and_start_emb_cache",
 ]
 
 from mx_rec.graph.modifier import GraphModifierHook, modify_graph_and_start_emb_cache
-from mx_rec.graph.patch import run
 from mx_rec.graph.hooks import LookupSubgraphSlicerHook, OrphanLookupKeySlicerHook
diff --git a/mx_rec/graph/constants.py b/mx_rec/graph/constants.py
index 077405d6..6c67b201 100644
--- a/mx_rec/graph/constants.py
+++ b/mx_rec/graph/constants.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 # ==============================================================================
 
-
 from enum import Enum
 
 
diff --git a/mx_rec/graph/hooks.py b/mx_rec/graph/hooks.py
index 5cf64b15..c97ae299 100644
--- a/mx_rec/graph/hooks.py
+++ b/mx_rec/graph/hooks.py
@@ -28,17 +28,15 @@ from mx_rec.validator.validator import ClassValidator, para_checker_decorator
 @para_checker_decorator(
     check_option_list=[
         ("op_types", ClassValidator, {"classes": (list)}),
-        ("full_graph", ClassValidator, {"classes": (Graph, type(None))}),
     ]
 )
 class LookupSubgraphSlicerHook(tf.estimator.SessionRunHook):
-    def __init__(self, op_types: List[Operation], full_graph: Graph = None) -> None:
+    def __init__(self, op_types: List[Operation]) -> None:
         super().__init__()
         self._op_types = op_types
-        self._full_graph = full_graph
 
     def begin(self) -> None:
-        slicer = LookupSubgraphSlicer(self._op_types, self._full_graph)
+        slicer = LookupSubgraphSlicer(self._op_types)
 
         logger.info("Starts to summarize sliceable specific operations in lookup subgraph!")
         slicer.summarize()
@@ -47,14 +45,12 @@ class LookupSubgraphSlicerHook(tf.estimator.SessionRunHook):
         slicer.slice()
 
 
-@para_checker_decorator(check_option_list=[("full_graph", ClassValidator, {"classes": (Graph, type(None))})])
 class OrphanLookupKeySlicerHook(tf.estimator.SessionRunHook):
-    def __init__(self, full_graph: Graph = None) -> None:
+    def __init__(self) -> None:
         super().__init__()
-        self._full_graph = full_graph
 
     def begin(self) -> None:
-        slicer = OrphanLookupKeySlicer(self._full_graph)
+        slicer = OrphanLookupKeySlicer()
 
         logger.info("Starts to summarize sliceable orphan lookup keys!")
         slicer.summarize()
diff --git a/mx_rec/graph/merge_lookup.py b/mx_rec/graph/merge_lookup.py
index b28872e4..0b646cab 100644
--- a/mx_rec/graph/merge_lookup.py
+++ b/mx_rec/graph/merge_lookup.py
@@ -91,7 +91,8 @@ def do_merge_lookup(is_train: bool = True):
         if not ConfigInitializer.get_instance().use_static:
             kwargs["feature_spec_name_ids_dict"] = feature_spec_name_ids_dict
         lookup_result = table_instance.lookup_for_feat_spec(feature_spec, send_count, **kwargs)
-        replace_anchor_vec(cutting_point, ASCAnchorAttr.MOCK_LOOKUP_RESULT, lookup_result)
+        graph = tf.compat.v1.get_default_graph()
+        replace_anchor_vec(graph, cutting_point, ASCAnchorAttr.MOCK_LOOKUP_RESULT, lookup_result)
         logger.debug("The mock lookup result of %s for %s was replaced.", feature_spec.name, table_instance.table_name)
 
     # records whether the current mode has been merged or restored lookup
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index 6b6013d8..179de09f 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -21,32 +21,71 @@ from collections.abc import Callable
 from typing import Any, List, Dict, Tuple, DefaultDict
 
 import tensorflow as tf
-from tensorflow import Operation, Tensor
+from tensorflow import Operation, Tensor, Graph
 from tensorflow.core.framework.graph_pb2 import GraphDef
 from tensorflow.python.data.ops.dataset_ops import DatasetV1Adapter
 from tensorflow.python.framework.errors_impl import InvalidArgumentError
 
-from mx_rec.constants.constants import ASCEND_CUTTING_POINT_INITIALIZER, ASCEND_SPARSE_LOOKUP_ENTRANCE, \
-    ASCAnchorAttr, ASCEND_TIMESTAMP, MAX_WHILE_SIZE, LIBREC_EOS_OPS_SO
+from mx_rec.graph import utils
+from mx_rec.constants.constants import (
+    ASCEND_CUTTING_POINT_INITIALIZER,
+    ASCEND_SPARSE_LOOKUP_ENTRANCE,
+    ASCAnchorAttr,
+    ASCEND_TIMESTAMP,
+    MAX_WHILE_SIZE,
+    LIBREC_EOS_OPS_SO,
+)
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.helper import get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
 from mx_rec.core.asc.swap_args import SwapArgs
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.graph.merge_lookup import do_merge_lookup
-from mx_rec.graph.utils import check_input_list, find_parent_op, check_cutting_points, record_ops_to_replace, \
-    export_pb_graph, make_sorted_key_to_tensor_list, replace_anchor_control
+from mx_rec.graph.utils import check_and_force_list, export_pb_graph
 from mx_rec.graph.constants import DeprecatedOp, AnchorDatasetOp, AnchorIteratorOp
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.log import logger
 from mx_rec.util.ops import import_host_pipeline_ops
 from mx_rec.util.perf import performance
-from mx_rec.util.tf_version_adapter import hccl_ops, npu_ops
+from mx_rec.util.tf_version_adapter import npu_ops
 from mx_rec.validator.validator import para_checker_decorator, ClassValidator
 
 
+class GraphModifierHook(tf.estimator.SessionRunHook):
+    @para_checker_decorator(
+        check_option_list=[
+            ("dump_graph", ClassValidator, {"classes": (bool,)}),
+            ("modify_graph", ClassValidator, {"classes": (bool,)}),
+        ]
+    )
+    def __init__(self, dump_graph: bool = False, modify_graph: bool = True):
+        self._dump_graph = dump_graph
+        self._modify_graph = modify_graph
+        self._iterator_type = None
+
+        ConfigInitializer.get_instance().train_params_config.is_graph_modify_hook_running = True
+
+    def begin(self):
+        if self._modify_graph:
+            modify_graph_and_start_emb_cache(dump_graph=self._dump_graph)
+        else:
+            start_asc_pipeline()
+
+        self._iterator_type = ConfigInitializer.get_instance().train_params_config.iterator_type
+        if self._modify_graph and self._iterator_type not in (
+            AnchorIteratorOp.MAKE_ITERATOR.value,
+            AnchorIteratorOp.ONE_SHOT_ITERATOR.value,
+        ):
+            raise ValueError("the value of iterator type should be like `MakeIterator` or `OneShotIterator`.")
+        logger.debug("In GraphModifierHook, iterator type is `%s`.", self._iterator_type)
+
+    def after_create_session(self, session, coord):
+        if self._modify_graph and self._iterator_type == AnchorIteratorOp.MAKE_ITERATOR.value:
+            session.run(tf.compat.v1.get_collection(ASCEND_CUTTING_POINT_INITIALIZER))
+
+
 @dataclasses.dataclass
-class AnchorRecord:
+class _AnchorRecord:
     replacement_spec: DefaultDict[Tensor, List[Tuple[int, Operation]]]
     passing_tensors: List[Tensor]
     batch_tensor_indexs: List[int]
@@ -58,56 +97,437 @@ class AnchorRecord:
     input_indexs: List[int] = None
 
 
-def get_preprocessing_map_func(
+class _GraphModifier:
+    @para_checker_decorator(
+        check_option_list=[
+            ("dump_graph", ClassValidator, {"classes": (bool,)}),
+            ("modify_graph", ClassValidator, {"classes": (bool,)}),
+        ]
+    )
+    def __init__(self, full_graph: Graph = None, dump_graph: bool = False):
+        if not full_graph:
+            full_graph = tf.compat.v1.get_default_graph()
+        self._full_graph = full_graph
+        self._dump_graph = dump_graph
+
+    @staticmethod
+    def _get_preprocessing_map_func(
         graph_def: GraphDef,
         input_names: List[str],
         output_names: List[str],
         batch_tensor_names: List[str] = None,
-        pipeline_input_indexes: List[int] = None
-) -> Callable:
-    input_names = check_input_list(input_names, str)
-    output_names = check_input_list(output_names, str)
-    batch_tensor_names = check_input_list(batch_tensor_names, str)
-    pipeline_input_indexes = check_input_list(pipeline_input_indexes, int)
-    both_is_none = batch_tensor_names is None and pipeline_input_indexes is None
-    both_not_none = batch_tensor_names is not None and pipeline_input_indexes is not None
-    if both_is_none or both_not_none:
-        raise ValueError("It is legal when and only when one of the parameters 'batch_tensor_names' and "
-                         "'pipeline_input_indexes' was given.")
-
-    def map_func(*args):
-        logger.debug("In get_preprocessing_map_func, the old batch is: %s.", args)
-        batch = dict()
-        parse_batch(args, batch, key=None)
-        logger.debug("In get_preprocessing_map_func, the parse batch is: %s.", batch)
-
-        input_tensors = []
-        if batch_tensor_names is not None:
-            for tensor_name in batch_tensor_names:
-                tensor = batch.get(tensor_name)
-                if tensor is None:
-                    raise ValueError(f"Given input_tensor_name '{tensor_name}' is invalid.")
-
-                input_tensors.append(tensor)
+        pipeline_input_indexes: List[int] = None,
+    ) -> Callable:
+        input_names = check_and_force_list(input_names, str)
+        output_names = check_and_force_list(output_names, str)
+        batch_tensor_names = check_and_force_list(batch_tensor_names, str)
+        pipeline_input_indexes = check_and_force_list(pipeline_input_indexes, int)
+        both_is_none = batch_tensor_names is None and pipeline_input_indexes is None
+        both_not_none = batch_tensor_names is not None and pipeline_input_indexes is not None
+        if both_is_none or both_not_none:
+            raise ValueError(
+                "It is legal when and only when one of the parameters 'batch_tensor_names' and "
+                "'pipeline_input_indexes' was given."
+            )
+
+        def map_func(*args):
+            logger.debug("In get_preprocessing_map_func, the old batch is: %s.", args)
+            batch = dict()
+            _parse_batch(args, batch, key=None)
+            logger.debug("In get_preprocessing_map_func, the parse batch is: %s.", batch)
+
+            input_tensors = []
+            if batch_tensor_names is not None:
+                for tensor_name in batch_tensor_names:
+                    tensor = batch.get(tensor_name)
+                    if tensor is None:
+                        raise ValueError(f"Given input_tensor_name '{tensor_name}' is invalid.")
+
+                    input_tensors.append(tensor)
+
+            else:
+                graph = tf.compat.v1.get_default_graph()
+                for index in pipeline_input_indexes:
+                    tensor = graph.get_tensor_by_name("args_%d:0" % index)
+                    input_tensors.append(tensor)
+
+            # 以tf.import_graph_def()作为read emb key的输入，保证数据读取到传入lookup的ids过程中的特征处理关系能够保留在子图中。
+            output_list = tf.import_graph_def(
+                graph_def, input_map=dict(zip(input_names, input_tensors)), return_elements=output_names
+            )
+
+            output_batch = [batch, tuple(output_list)]
+            logger.debug("In get_preprocessing_map_func, the output batch is: %s.", output_batch)
+            return tuple(output_batch)
+
+        return map_func
+
+    @performance("graph_modifier")
+    def modify_graph_for_asc(self, prefetch: int = 10):
+        cutting_point_list = self._full_graph.get_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE)
+        utils.check_cutting_points(cutting_point_list)
+        if not cutting_point_list:
+            logger.warning("Nothing to revise.")
+            return
+
+        export_pb_graph("old_graph.pbtxt", self._dump_graph, graph_def=self._full_graph.as_graph_def())
+        get_next_op_map = self._generate_get_next_op_specs(cutting_point_list)
+        logger.debug(
+            "In modify_graph_for_asc function, get_next_op_map.len: %d, get_next_op_map.key: %s.",
+            len(get_next_op_map),
+            get_next_op_map.keys(),
+        )
+
+        for get_next_op, record in get_next_op_map.items():
+            is_training = record.is_training
+
+            # get source dataset
+            src_dataset = self._get_src_dataset(get_next_op, is_training)
+
+            # generate target dataset
+            timestamp_index = _get_timestamp_index(self._full_graph, get_next_op, is_training)
+            original_batch_tensor_count = _get_dataset_tensor_count(src_dataset)
+            sub_cutting_points = record.sub_cutting_points
+            input_index_list = _get_input_index_list(
+                sub_cutting_points,
+                record.replacement_spec,
+                record.output_names,
+                original_batch_tensor_count,
+                timestamp_index=timestamp_index,
+            )
+            record.input_indexs = input_index_list
+
+            with self._full_graph.as_default():
+                tgt_dataset = self._get_tgt_dataset(src_dataset, sub_cutting_points, record, prefetch=prefetch)
+                self._update_iterator_getnext(get_next_op, tgt_dataset, is_training, record)
+
+            # In eval mode, backward is not required. In addition, compute gradients is not executed when
+            # only eval is used. Therefore, `do_merge_lookup` needs to be invoked during modify graph.
+            if not is_training:
+                with self._full_graph.as_default():
+                    do_merge_lookup(is_train=False)
+                if "evaluate" in ConfigInitializer.get_instance().train_params_config.bool_gauge_set:
+                    logger.debug("In estimator mode, eval re-creates graph each time, so the flag needs to be cleared.")
+                    ConfigInitializer.get_instance().train_params_config.insert_merged_multi_lookup(is_training, False)
+            # In training mode, `do_merge_lookup` should have been executed in compute gradients phase.
+            if is_training and not ConfigInitializer.get_instance().train_params_config.get_merged_multi_lookup(True):
+                raise RuntimeError(
+                    "In training mode, `do_merge_lookup` should have been executed in compute gradients "
+                    "phase. Please check whether compute gradients is performed."
+                )
+
+        self._modify_graph_for_ddr(get_next_op_map)
+
+        logger.info("Graph has been revised.")
+        export_pb_graph("new_graph.pbtxt", self._dump_graph, graph_def=self._full_graph.as_graph_def())
+
+    def _modify_graph_for_ddr(self, get_next_op_map: Dict[Tensor, _AnchorRecord]):
+        # 通过create_hash_optimizer创建optimizer_instance
+        optimizer_instance = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
+        # Predict mode
+        if optimizer_instance is None:
+            slot_num = 0
+        else:
+            # DDR和扩容需要在获取优化器后重置ext
+            _change_ext_emb_size_by_opt(optimizer_instance)
+            slot_num = optimizer_instance.slot_num
+
+        for _, record in get_next_op_map.items():
+            is_training = record.is_training
+            channel_id = 0 if is_training else 1
+
+            swap_args = SwapArgs()
+            sparse_variables = self._full_graph.get_collection(
+                ConfigInitializer.get_instance().train_params_config.ascend_global_hashtable_collection
+            )
+
+            for each_var in sparse_variables:
+                table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(each_var)
+                if table_instance.is_hbm:
+                    continue
+                swap_args_dict = swap_args.swap_config_dict[table_instance.table_name][channel_id]
+                swap_pos = swap_args_dict["swap_pos"]
+                swap_len = swap_args_dict["swap_len"]
+                variable_and_slot_list = _get_variable_and_slot_list(
+                    each_var, slot_num, table_instance.table_name, channel_id
+                )
+
+                swap_op = _get_swap_info(table_instance, variable_and_slot_list, swap_len, swap_pos, channel_id)
+                swap_control_dict = swap_args.swap_control_dict[table_instance.table_name][channel_id]
+                if "control_ops" not in swap_control_dict:
+                    raise ValueError("Missing Required key in modify_graph_for_asc: control_ops")
+                control_ops = swap_control_dict["control_ops"]
+                utils.replace_anchor_control(self._full_graph, control_ops, swap_op)
+
+    def _generate_get_next_op_specs(self, cutting_point_list: List[Tensor]) -> Dict[Tensor, _AnchorRecord]:
+        get_next_op_map = defaultdict(dict)
+
+        for input_tensor in cutting_point_list:
+            get_next_op = utils.upward_bfs_op(input_tensor.op, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
+            if get_next_op not in get_next_op_map:
+                logger.debug("find a new get_next_op named '%s'", get_next_op.name)
+
+                replacement_specs = utils.record_ops_to_replace(self._full_graph, get_next_op)
+                passing_tensors, batch_tensor_indexs, sub_cutting_points = _get_passing_tensor_list(
+                    cutting_point_list, get_next_op
+                )
+                sub_graph_def, input_names, output_names = self._get_sub_graph(passing_tensors, sub_cutting_points)
+                is_training = BaseSparseEmbedding.get_anchor_attribute(input_tensor, ASCAnchorAttr.IS_TRAINING)
+
+                record = _AnchorRecord(
+                    replacement_specs,
+                    passing_tensors,
+                    batch_tensor_indexs,
+                    sub_cutting_points,
+                    sub_graph_def,
+                    input_names,
+                    output_names,
+                    is_training,
+                )
+                get_next_op_map[get_next_op] = record
+
+                export_pb_graph(f"cut_graph_{get_next_op.name}.pbtxt", self._dump_graph, graph_def=sub_graph_def)
+
+        return get_next_op_map
+
+    def _get_sub_graph(
+        self, input_tensors: List[Tensor], output_tensors: List[Tensor]
+    ) -> Tuple[GraphDef, List[str], List[str]]:
+        input_tensors = check_and_force_list(input_tensors, tf.Tensor)
+        output_tensors = check_and_force_list(output_tensors, tf.Tensor)
+        input_op_name_list = [tensor.op.name for tensor in input_tensors]
+        output_op_name_list = [tensor.op.name for tensor in output_tensors]
+
+        graph_def = self._full_graph.as_graph_def()
+        cut_graph_input = tf.compat.v1.graph_util.extract_sub_graph(graph_def, input_op_name_list)
+        cut_graph_output = tf.compat.v1.graph_util.extract_sub_graph(graph_def, output_op_name_list)
+
+        node_list = []
+        node_list_input = cut_graph_input.node
+        node_list_output = cut_graph_output.node
+        for node in node_list_output:
+            if node not in node_list_input:
+                node_list.append(node)
+
+        sub_graph_def = tf.compat.v1.GraphDef()
+        sub_graph_def.node.extend(node_list)
+
+        input_name_list = [tensor.name for tensor in input_tensors]
+        output_name_list = [tensor.name for tensor in output_tensors]
+
+        return sub_graph_def, input_name_list, output_name_list
+
+    def _get_src_dataset(self, get_next_op: Operation, is_training: bool) -> DatasetV1Adapter:
+        """
+        根据`IteratorGetNext`算子在计算图中找出原始dataset.
+
+        Args:
+            get_next_op: `IteratorGetNext`算子
+            is_training: 当前是否为训练模式，训练模式为True，否则为False
+
+        Returns: 原始数据集
+
+        """
+
+        try:
+            target_op = utils.find_trans_dataset(self._full_graph, get_next_op)
+        except (ValueError, TypeError, RuntimeError) as err:
+            logger.warning("The dataset op was not found, the error is `%s`. Start to traverse the operations.", err)
+            graph = self._full_graph
+            dataset_op_list = [op for op in graph.get_operations() if AnchorDatasetOp.PREFETCH_DATASET.value in op.name]
+
+            # WARN: Couple with NoGradSubGraphSlicer::_find_old_dataset.
+            dataset_op_list = list(
+                filter(
+                    lambda op: op not in self._full_graph.get_collection(DeprecatedOp.DEPRECATED_PREFETCH_DATASET),
+                    dataset_op_list,
+                )
+            )
+            dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
+
+            logger.debug(
+                "In get_src_dataset function, current mode(train: True, eval: False): %s, dataset_op_list: %s.",
+                is_training,
+                dataset_op_list,
+            )
+
+            if len(dataset_op_list) == 1:
+                target_op = dataset_op_list[0]
+            elif is_training and len(dataset_op_list) == 2:
+                prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
+                target_op = prefetch_dataset_op_list[0]
+            elif not is_training and len(dataset_op_list) == 3:
+                prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
+                target_op = prefetch_dataset_op_list[1]
+            else:
+                raise RuntimeError(
+                    f"'{AnchorDatasetOp.PREFETCH_DATASET.value}' not found, got transformation datasets: "
+                    f"{dataset_op_list}."
+                ) from err
+        except Exception as err:
+            raise RuntimeError(f"The dataset was not found, the error is `{err}`.") from err
+
+        if not target_op.outputs:
+            raise ValueError(f"The length of the outputs of target op `{target_op}` is 0.")
+        logger.debug("Find target op `%s`, and output is `%s`.", target_op.name, target_op.outputs)
+        src_dataset = utils.find_target_instance_dataset(self._full_graph, target_op.outputs[0])
+        return src_dataset
+
+    def _get_tgt_dataset(
+        self,
+        src_dataset: DatasetV1Adapter,
+        sub_cutting_point_list: List[Tensor],
+        record: _AnchorRecord,
+        prefetch: int = 10,
+    ) -> DatasetV1Adapter:
+        """
+        根据原始数据集生成新的数据集实例.
+
+        Args:
+            src_dataset: 原始数据集实例
+            sub_cutting_point_list: 打桩的lookup ids列表
+            records: 记录被打桩ids对应输入/输出算子、子图关系等信息的字典
+            dump_graph: 是否dump计算图，默认为False
+            prefetch: dataset预取数据量，默认为10
+
+        Returns: 新数据集实例
+
+        """
+
+        librec = import_host_pipeline_ops(LIBREC_EOS_OPS_SO)
+        channel_id = ConfigInitializer.get_instance().train_params_config.get_training_mode_channel_id(
+            record.is_training
+        )
+        # 在数据读取完时，通过EosDataset向acl数据通道发送end_of_sequence
+        max_train_steps = ConfigInitializer.get_instance().max_steps
+        max_eval_steps = ConfigInitializer.get_instance().eval_steps
+        src_dataset = src_dataset.eos_map(librec, channel_id, max_train_steps, max_eval_steps)
+
+        tgt_dataset = src_dataset.map(
+            self._get_preprocessing_map_func(
+                record.sub_graph_def,
+                record.input_names,
+                record.output_names,
+                pipeline_input_indexes=record.batch_tensor_indexs,
+            )
+        )
+
+        feature_numbers = [
+            BaseSparseEmbedding.get_anchor_attribute(cutting_point, ASCAnchorAttr.FEATURE_SPEC).feat_cnt
+            for cutting_point in sub_cutting_point_list
+        ]
+        table_names = [
+            BaseSparseEmbedding.get_anchor_attribute(cutting_point, ASCAnchorAttr.FEATURE_SPEC).table_name
+            for cutting_point in sub_cutting_point_list
+        ]
+        tgt_dataset = tgt_dataset.map(
+            get_asc_insert_func(
+                feature_numbers=feature_numbers,
+                table_names=table_names,
+                args_index_list=record.input_indexs,
+                is_training=record.is_training,
+                dump_graph=self._dump_graph,
+            )
+        )
+
+        tgt_dataset = tgt_dataset.prefetch(prefetch)
+        return tgt_dataset
+
+    def _update_iterator_getnext(
+        self, get_next_op: Operation, tgt_dataset: DatasetV1Adapter, is_training: bool, record: _AnchorRecord
+    ) -> None:
+        """
+        用新数据集中的`IteratorGetNext`算子替换计算图中原始数据集的`IteratorGetNext`算子，即用新数据集的batch替换原始数据集的batch.
 
+        Args:
+            get_next_op: `IteratorGetNext`算子
+            tgt_dataset: 新数据集
+            is_training: 当前是否为训练模式，训练模式为True，否则为False
+            records: 记录被打桩ids对应输入/输出算子、子图关系等信息的字典
+
+        Returns: None
+
+        """
+        if not get_next_op.outputs:
+            raise RuntimeError("there is no tensor in the dataset. Please check the dataset and data processing.")
+        iterator_type = ""
+        if get_next_op.outputs[0].op.inputs:
+            iterator_type = get_next_op.outputs[0].op.inputs[0].op.type
+        if iterator_type == "IteratorV2":
+            iterator_type = utils.find_make_iterator_op(self._full_graph, get_next_op.outputs[0]).type
+        if iterator_type not in (AnchorIteratorOp.MAKE_ITERATOR.value, AnchorIteratorOp.ONE_SHOT_ITERATOR.value):
+            raise RuntimeError(
+                f"Only iterators `MakeIterator` and `OneShotIterator` are supported in `graph modify` mode, "
+                f"but the current iterator is `{iterator_type}`."
+            )
+        ConfigInitializer.get_instance().train_params_config.iterator_type = iterator_type
+        logger.info("The iterator type of dataset is `%s`.", iterator_type)
+
+        if iterator_type == AnchorIteratorOp.MAKE_ITERATOR.value:
+            new_iterator = tgt_dataset.make_initializable_iterator()
+            tf.compat.v1.add_to_collection(ASCEND_CUTTING_POINT_INITIALIZER, new_iterator.initializer)
+            ConfigInitializer.get_instance().train_params_config.set_initializer(is_training, new_iterator.initializer)
         else:
-            graph = tf.compat.v1.get_default_graph()
-            for index in pipeline_input_indexes:
-                tensor = graph.get_tensor_by_name("args_%d:0" % index)
-                input_tensors.append(tensor)
+            new_iterator = tgt_dataset.make_one_shot_iterator()
+        new_batch = new_iterator.get_next()
+        ConfigInitializer.get_instance().train_params_config.set_target_batch(is_training, new_batch)
+
+        try:
+            new_batch_tensor = list(new_batch.values())[0]
+        except IndexError as err:
+            raise IndexError("Cannot find a tensor from given batch.") from err
+        new_get_next_op_name = utils.upward_bfs_op(new_batch_tensor.op, AnchorIteratorOp.ITERATOR_GET_NEXT.value).name
+        self._update_input_tensor_with_new_batch(record.replacement_spec, new_get_next_op_name, new_batch)
+
+    def _update_input_tensor_with_new_batch(
+        self,
+        replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]],
+        new_get_next_op_name: str,
+        new_batch: Dict[str, Tensor],
+    ) -> None:
+        """
+        用新batch中的IteratorGetNext替换计算图中老batch的IteratorGetNext.
 
-        # 以tf.import_graph_def()作为read emb key的输入，保证数据读取到传入lookup的ids过程中的特征处理关系能够保留在子图中。
-        output_list = tf.import_graph_def(graph_def, input_map=dict(zip(input_names, input_tensors)),
-                                          return_elements=output_names)
+        Args:
+            replacement_specs: 记录待替换算子的dict，key为老batch的IteratorGetNext，value为以老batch作为输入的算子
+            new_get_next_op_name: 新数据集的get_next算子名称
+            new_batch: 新数据集的batch
+
+        Returns: None
+
+        """
 
-        output_batch = [batch, tuple(output_list)]
-        logger.debug("In get_preprocessing_map_func, the output batch is: %s.", output_batch)
-        return tuple(output_batch)
+        for old_tensor, item in replacement_specs.items():
+            for idx, operator in item:
+                old_tensor_name = old_tensor.name
+                output_index = old_tensor_name.split(":")[-1]
+                new_tensor_name = f"{new_get_next_op_name}:{output_index}"
+                new_tensor = self._full_graph.get_tensor_by_name(new_tensor_name)
+                try:
+                    operator._update_input(idx, new_tensor)
+                except InvalidArgumentError as err:
+                    logger.info(
+                        "The replacement specs keys (old batch) is: %s. \n\t\t The new batch is: %s.",
+                        replacement_specs.keys(),
+                        new_batch,
+                    )
+                    raise RuntimeError(
+                        f"Cannot update edge, old tensor: {old_tensor}, new tensor: {new_tensor}."
+                    ) from err
 
-    return map_func
+
+@para_checker_decorator(
+    check_option_list=[
+        ("dump_graph", ClassValidator, {"classes": (bool,)}),
+    ]
+)
+def modify_graph_and_start_emb_cache(full_graph: Graph = None, dump_graph: bool = False):
+    modifier = _GraphModifier(full_graph=full_graph, dump_graph=dump_graph)
+    modifier.modify_graph_for_asc()
+    start_asc_pipeline()
 
 
-def parse_batch(data_args: Any, data_batch: dict, key: str = None):
+def _parse_batch(data_args: Any, data_batch: dict, key: str = None):
     """
     解析原始数据集中的batch，并将非dict格式的batch转为dict格式.
     Args:
@@ -131,7 +551,7 @@ def parse_batch(data_args: Any, data_batch: dict, key: str = None):
 
         """
 
-        if key is not None:
+        if key:
             data_batch[key] = data_tensor
             return
 
@@ -141,11 +561,11 @@ def parse_batch(data_args: Any, data_batch: dict, key: str = None):
     # 开始解析old batch
     if isinstance(data_args, dict):
         for key, data_tensor in data_args.items():
-            parse_batch(data_tensor, data_batch, key)
+            _parse_batch(data_tensor, data_batch, key)
         return
     if isinstance(data_args, (list, tuple)):
         for data_arg in data_args:
-            parse_batch(data_arg, data_batch, key)
+            _parse_batch(data_arg, data_batch, key)
         return
     if isinstance(data_args, Tensor):
         # 将old batch中的tensor加入到dict中
@@ -155,12 +575,12 @@ def parse_batch(data_args: Any, data_batch: dict, key: str = None):
     raise ValueError(f"Invalid batch type, expected: (dict, list, tuple, Tensor), got: {type(data_args)}.")
 
 
-def get_input_index_list(
-        cutting_point_list: List[Tensor],
-        replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]],
-        mapping_name_list: List[str],
-        base_count: int,
-        timestamp_index: int = None
+def _get_input_index_list(
+    cutting_point_list: List[Tensor],
+    replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]],
+    mapping_name_list: List[str],
+    base_count: int,
+    timestamp_index: int = None,
 ) -> List[int]:
     input_index_list = []
     for cutting_point in cutting_point_list:
@@ -179,78 +599,8 @@ def get_input_index_list(
     return input_index_list
 
 
-def find_make_iterator_op(batch_tensor: Tensor) -> Operation:
-    graph = tf.compat.v1.get_default_graph()
-    operations = graph.get_operations()
-    for each_op in operations:
-        for input_tensor in batch_tensor.op.inputs:
-            if input_tensor.op.outputs and input_tensor.op.outputs[0] in list(
-                    each_op.inputs) and each_op.type == AnchorIteratorOp.MAKE_ITERATOR.value:
-                logger.debug("Op MakeIterator '%s' was found.", each_op.name)
-                return each_op
-
-    raise ValueError(f"op MakeIterator was not found.")
-
-
-@performance("find_target_dataset_op")
-def find_target_dataset_op(base_ops: Operation, op_type: str) -> Operation:
-    base_ops = check_input_list(base_ops, tf.Operation)
-    parent_ops = base_ops
-
-    while_num = 0
-    while True:
-        while_num += 1
-        if while_num > MAX_WHILE_SIZE:
-            raise RuntimeError(f"In find_target_dataset_op function, the maximum cycle depth is greater "
-                               f"than {MAX_WHILE_SIZE}.")
-        for parent_op in parent_ops:
-            if parent_op.type == op_type:
-                return parent_op
-
-        base_ops = parent_ops
-        parent_ops = []
-        for base_op in base_ops:
-            parent_ops.extend(find_parent_op(base_op))
-
-        if not parent_ops:
-            raise ValueError(f"op {op_type} was not found.")
-
-
-def get_dataset_op(get_next_op: Operation) -> Operation:
-    """
-    根据`IteratorGetNext`算子从图中找到`OptimizeDataset`的dataset op.
-    注: TF2没有`OptimizeDataset`，则找的是dataset的默认锚点.
-
-    Args:
-        get_next_op: `IteratorGetNext`算子
-
-    Returns: TF1返回`OptimizeDataset`算子，TF2返回dataset默认锚点的算子
-
-    """
-
-    if get_next_op.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-        raise TypeError(f"op '{get_next_op}' must be one instance of IteratorGetNext.")
-
-    # looking for the MakeIterator operator which corresponds to given batch_tensor
-    base_op = find_make_iterator_op(get_next_op.outputs[0])
-    # looking for the op which is the one before OptimizeDataset operator
-    if tf.__version__.startswith("1"):
-        optimize_dataset_op = find_target_dataset_op(base_op, AnchorDatasetOp.MODEL_DATASET.value)
-        target_op = find_parent_op(optimize_dataset_op)
-        if not target_op:
-            raise RuntimeError("the parent op for 'ModelDataset' op was not found.")
-        if target_op[0].type != AnchorDatasetOp.OPTIMIZE_DATASET.value:
-            raise TypeError("op OptimizeDataset was not found.")
-        target_op = target_op[0]
-    else:
-        # 'OptimizeDataset' is not available in TensorFlow2.X
-        target_op = find_target_dataset_op(base_op, AnchorDatasetOp.PREFETCH_DATASET.value)
-    return target_op
-
-
-def get_passing_tensor_list(
-        src_tensors: List[Tensor],
-        target_op: Operation
+def _get_passing_tensor_list(
+    src_tensors: List[Tensor], target_op: Operation
 ) -> Tuple[List[Tensor], List[int], List[Tensor]]:
     def get_passing_tensors(src_tensor):
         passing_tensors = []
@@ -259,8 +609,9 @@ def get_passing_tensor_list(
         while tensor_list:
             while_num += 1
             if while_num > MAX_WHILE_SIZE:
-                raise RuntimeError(f"In get_passing_tensors function, the maximum cycle depth is greater "
-                                   f"than {MAX_WHILE_SIZE}.")
+                raise RuntimeError(
+                    f"In get_passing_tensors function, the maximum cycle depth is greater " f"than {MAX_WHILE_SIZE}."
+                )
             last_tensor = tensor_list.pop()
             if last_tensor.op is target_op:
                 passing_tensors.append(last_tensor)
@@ -269,7 +620,7 @@ def get_passing_tensor_list(
 
         return passing_tensors
 
-    src_tensors = check_input_list(src_tensors, Tensor)
+    src_tensors = check_and_force_list(src_tensors, Tensor)
     passing_tensor_list = []
     sub_src_tensors = []
     for tensor in src_tensors:
@@ -288,83 +639,7 @@ def get_passing_tensor_list(
     return passing_tensor_list, output_index_list, sub_src_tensors
 
 
-def find_target_instance_dataset(variant_tensor: Tensor) -> DatasetV1Adapter:
-    dataset_instance_list = tf.compat.v1.get_collection("dataset_group")
-    for ins in dataset_instance_list:
-        if ins._variant_tensor == variant_tensor:
-            if not isinstance(ins, DatasetV1Adapter):
-                ins = ins._input_dataset
-            logger.debug("Find target instance '%s', whose variant_tensor is '%s'.", ins, variant_tensor)
-            if not isinstance(ins.element_spec, dict) and not (
-                    isinstance(ins.element_spec, (list, tuple)) and len(ins.element_spec) == 2 and isinstance(
-                ins.element_spec[0], dict)):
-                raise NotImplementedError("the found dataset does not return a valid layout.")
-
-            return ins
-
-    raise LookupError(f"Can not find target instance, whose variant_tensor is '{variant_tensor}' respectively.")
-
-
-def get_sub_graph(
-        input_tensors: List[Tensor],
-        output_tensors: List[Tensor]
-) -> Tuple[GraphDef, List[str], List[str]]:
-    input_tensors = check_input_list(input_tensors, tf.Tensor)
-    output_tensors = check_input_list(output_tensors, tf.Tensor)
-    input_op_name_list = [tensor.op.name for tensor in input_tensors]
-    output_op_name_list = [tensor.op.name for tensor in output_tensors]
-
-    graph_def = tf.compat.v1.get_default_graph().as_graph_def()
-    cut_graph_input = tf.compat.v1.graph_util.extract_sub_graph(graph_def, input_op_name_list)
-    cut_graph_output = tf.compat.v1.graph_util.extract_sub_graph(graph_def, output_op_name_list)
-
-    node_list = []
-    node_list_input = cut_graph_input.node
-    node_list_output = cut_graph_output.node
-    for node in node_list_output:
-        if node not in node_list_input:
-            node_list.append(node)
-
-    sub_graph_def = tf.compat.v1.GraphDef()
-    sub_graph_def.node.extend(node_list)
-
-    input_name_list = [tensor.name for tensor in input_tensors]
-    output_name_list = [tensor.name for tensor in output_tensors]
-
-    return sub_graph_def, input_name_list, output_name_list
-
-
-def update_input_tensor_with_new_batch(replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]],
-                                       new_get_next_op_name: str,
-                                       new_batch: Dict[str, Tensor]):
-    """
-    用新batch中的IteratorGetNext替换计算图中老batch的IteratorGetNext.
-
-    Args:
-        replacement_specs: 记录待替换算子的dict，key为老batch的IteratorGetNext，value为以老batch作为输入的算子
-        new_get_next_op_name: 新数据集的get_next算子名称
-        new_batch: 新数据集的batch
-
-    Returns: None
-
-    """
-
-    graph = tf.compat.v1.get_default_graph()
-    for old_tensor, item in replacement_specs.items():
-        for idx, operator in item:
-            old_tensor_name = old_tensor.name
-            output_index = old_tensor_name.split(":")[-1]
-            new_tensor_name = f"{new_get_next_op_name}:{output_index}"
-            new_tensor = graph.get_tensor_by_name(new_tensor_name)
-            try:
-                operator._update_input(idx, new_tensor)
-            except InvalidArgumentError as err:
-                logger.info("The replacement specs keys (old batch) is: %s. \n\t\t The new batch is: %s.",
-                            replacement_specs.keys(), new_batch)
-                raise RuntimeError(f"Cannot update edge, old tensor: {old_tensor}, new tensor: {new_tensor}.") from err
-
-
-def get_dataset_tensor_count(dataset: DatasetV1Adapter) -> int:
+def _get_dataset_tensor_count(dataset: DatasetV1Adapter) -> int:
     """
     获取数据集中batch的tensor数量.
 
@@ -378,12 +653,37 @@ def get_dataset_tensor_count(dataset: DatasetV1Adapter) -> int:
     src_element_spec = dataset.element_spec
     if not isinstance(src_element_spec, (list, tuple)):
         src_element_spec = [src_element_spec]
-    src_sorted_keys = make_sorted_key_to_tensor_list(src_element_spec, [])
+    src_sorted_keys = utils.make_sorted_key_to_tensor_list(src_element_spec, [])
 
     return len(src_sorted_keys)
 
 
-def change_ext_emb_size_by_opt(optimizer):
+def _get_timestamp_index(graph: Graph, get_next_op: Operation, is_training: bool) -> int:
+    timestamp_tensor_list = graph.get_collection(ASCEND_TIMESTAMP)
+    timestamp_index = None
+    for timestamp in timestamp_tensor_list:
+        if timestamp in get_next_op.outputs:
+            timestamp_index = int(timestamp.name.split(":")[1])
+            timestamp_feature_spec = ConfigInitializer.get_instance().feature_spec_config.get_feature_spec("timestamp")
+            if timestamp_feature_spec is None:
+                timestamp_feature_spec = FeatureSpec("timestamp", index_key=timestamp_index, is_timestamp=True)
+                timestamp_feature_spec.include_timestamp(is_training)
+                ConfigInitializer.get_instance().feature_spec_config.insert_feature_spec(
+                    timestamp_feature_spec, is_training
+                )
+                break
+
+            if timestamp_feature_spec.index_key != timestamp_index:
+                raise ValueError(
+                    f"Given timestamp_index, which is {timestamp_index}, does not match index "
+                    f"key. Please double check."
+                )
+            timestamp_feature_spec.include_timestamp(is_training)
+            break
+    return timestamp_index
+
+
+def _change_ext_emb_size_by_opt(optimizer):
     for _, table_instance in ConfigInitializer.get_instance().sparse_embed_config.table_instance_dict.items():
         # When dynamic expansion mode, ext_emb_size is set by optimizer
         if ConfigInitializer.get_instance().use_dynamic_expansion or not table_instance.is_hbm:
@@ -391,197 +691,36 @@ def change_ext_emb_size_by_opt(optimizer):
             logger.info("ext_emb_size is reset to be %s in change_ext_emb_size_by_opt", table_instance.ext_emb_size)
 
 
-@para_checker_decorator(
-    check_option_list=[("dump_graph", ClassValidator, {"classes": (bool,)})]
-)
-def modify_graph_and_start_emb_cache(dump_graph: bool = False):
-    modify_graph_for_asc(dump_graph=dump_graph)
-    start_asc_pipeline()
-
-
-def generate_get_next_op_specs(
-        cutting_point_list: List[Tensor],
-        dump_graph: bool = False
-) -> Dict[Tensor, AnchorRecord]:
-    get_next_op_map = defaultdict(dict)
-
-    for input_tensor in cutting_point_list:
-        get_next_op = find_target_dataset_op(input_tensor.op, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
-        if get_next_op not in get_next_op_map:
-            logger.debug("find a new get_next_op named '%s'", get_next_op.name)
-
-            replacement_specs = record_ops_to_replace(get_next_op)
-            passing_tensors, batch_tensor_indexs, sub_cutting_points = \
-                get_passing_tensor_list(cutting_point_list, get_next_op)
-            sub_graph_def, input_names, output_names = get_sub_graph(passing_tensors, sub_cutting_points)
-            is_training = BaseSparseEmbedding.get_anchor_attribute(input_tensor, ASCAnchorAttr.IS_TRAINING)
-
-            record = AnchorRecord(
-                replacement_specs,
-                passing_tensors,
-                batch_tensor_indexs,
-                sub_cutting_points,
-                sub_graph_def,
-                input_names,
-                output_names,
-                is_training
-            )
-            get_next_op_map[get_next_op] = record
-
-            export_pb_graph(f"cut_graph_{get_next_op.name}.pb", dump_graph, graph_def=sub_graph_def)
-
-    return get_next_op_map
-
-
-def get_src_dataset(get_next_op: Operation, is_training: bool) -> DatasetV1Adapter:
-    """
-    根据`IteratorGetNext`算子在计算图中找出原始dataset.
-
-    Args:
-        get_next_op: `IteratorGetNext`算子
-        is_training: 当前是否为训练模式，训练模式为True，否则为False
-
-    Returns: 原始数据集
-
-    """
+def _get_variable_and_slot_list(each_var, slot_num, table_name, channel_id):
+    variable_and_slot_list = [each_var]
+    if slot_num == 0:
+        return variable_and_slot_list
 
-    try:
-        target_op = get_dataset_op(get_next_op)
-    except (ValueError, TypeError, RuntimeError) as err:
-        logger.warning("The dataset op was not found, the error is `%s`. Start to traverse the operations.", err)
-        graph = tf.compat.v1.get_default_graph()
-        dataset_op_list = [op for op in graph.get_operations() if AnchorDatasetOp.PREFETCH_DATASET.value in op.name]
-
-        # WARN: Couple with NoGradSubGraphSlicer::_find_old_dataset.
-        dataset_op_list = list(
-            filter(lambda op: op not in tf.compat.v1.get_collection(DeprecatedOp.DEPRECATED_PREFETCH_DATASET),
-            dataset_op_list)
+    # 通过apply_gradients创建optimizer
+    optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(table_name)
+    if optimizer is None and channel_id == 0:
+        raise RuntimeError(
+            "In training mode, table_instance should have been set_optimizer_for_table "
+            "before modify_graph, please check whether apply_gradients is performed"
         )
-        dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
-
-        logger.debug("In get_src_dataset function, current mode(train: True, eval: False): %s, dataset_op_list: %s.",
-                     is_training, dataset_op_list)
-
-        if len(dataset_op_list) == 1:
-            target_op = dataset_op_list[0]
-        elif is_training and len(dataset_op_list) == 2:
-            prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
-            target_op = prefetch_dataset_op_list[0]
-        elif not is_training and len(dataset_op_list) == 2:
-            prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
-            target_op = prefetch_dataset_op_list[1]
-        elif not is_training and len(dataset_op_list) == 3:
-            prefetch_dataset_op_list = sorted(dataset_op_list, key=lambda op: op.name)
-            target_op = prefetch_dataset_op_list[1]
-        else:
-            raise RuntimeError(f"'{AnchorDatasetOp.PREFETCH_DATASET.value}' not found, got transformation datasets: "
-                               f"{dataset_op_list}.") from err
-    except Exception as err:
-        raise RuntimeError(f"The dataset was not found, the error is `{err}`.") from err
-
-    if not target_op.outputs:
-        raise ValueError(f"The length of the outputs of target op `{target_op}` is 0.")
-    logger.debug("Find target op `%s`, and output is `%s`.", target_op.name, target_op.outputs)
-    src_dataset = find_target_instance_dataset(target_op.outputs[0])
-    return src_dataset
-
-
-def get_tgt_dataset(
-        src_dataset: DatasetV1Adapter,
-        sub_cutting_point_list: List[Tensor],
-        record: AnchorRecord,
-        dump_graph: bool = False,
-        prefetch: int = 10
-) -> DatasetV1Adapter:
-    """
-    根据原始数据集生成新的数据集实例.
-
-    Args:
-        src_dataset: 原始数据集实例
-        sub_cutting_point_list: 打桩的lookup ids列表
-        records: 记录被打桩ids对应输入/输出算子、子图关系等信息的字典
-        dump_graph: 是否dump计算图，默认为False
-        prefetch: dataset预取数据量，默认为10
 
-    Returns: 新数据集实例
-
-    """
-
-    librec = import_host_pipeline_ops(LIBREC_EOS_OPS_SO)
-    channel_id = ConfigInitializer.get_instance().train_params_config.get_training_mode_channel_id(
-        record.is_training)
-    # 在数据读取完时，通过EosDataset向acl数据通道发送end_of_sequence
-    max_train_steps = ConfigInitializer.get_instance().max_steps
-    max_eval_steps = ConfigInitializer.get_instance().eval_steps
-    src_dataset = src_dataset.eos_map(librec, channel_id, max_train_steps, max_eval_steps)
-
-    tgt_dataset = src_dataset.map(get_preprocessing_map_func(record.sub_graph_def,
-                                                             record.input_names,
-                                                             record.output_names,
-                                                             pipeline_input_indexes=record.batch_tensor_indexs))
-
-    feature_numbers = [BaseSparseEmbedding.get_anchor_attribute(cutting_point, ASCAnchorAttr.FEATURE_SPEC).feat_cnt for
-                       cutting_point in sub_cutting_point_list]
-    table_names = [BaseSparseEmbedding.get_anchor_attribute(cutting_point, ASCAnchorAttr.FEATURE_SPEC).table_name for
-                   cutting_point in sub_cutting_point_list]
-    tgt_dataset = tgt_dataset.map(get_asc_insert_func(feature_numbers=feature_numbers,
-                                                      table_names=table_names,
-                                                      args_index_list=record.input_indexs,
-                                                      is_training=record.is_training,
-                                                      dump_graph=dump_graph))
-
-    tgt_dataset = tgt_dataset.prefetch(prefetch)
-    return tgt_dataset
-
-
-def update_iterator_getnext(get_next_op: Operation,
-                            tgt_dataset: DatasetV1Adapter,
-                            is_training: bool,
-                            record: AnchorRecord):
-    """
-    用新数据集中的`IteratorGetNext`算子替换计算图中原始数据集的`IteratorGetNext`算子，即用新数据集的batch替换原始数据集的batch.
-
-    Args:
-        get_next_op: `IteratorGetNext`算子
-        tgt_dataset: 新数据集
-        is_training: 当前是否为训练模式，训练模式为True，否则为False
-        records: 记录被打桩ids对应输入/输出算子、子图关系等信息的字典
-
-    Returns: None
-
-    """
-    if not get_next_op.outputs:
-        raise RuntimeError("there is no tensor in the dataset. Please check the dataset and data processing.")
-    iterator_type = ""
-    if get_next_op.outputs[0].op.inputs:
-        iterator_type = get_next_op.outputs[0].op.inputs[0].op.type
-    if iterator_type == "IteratorV2":
-        iterator_type = find_make_iterator_op(get_next_op.outputs[0]).type
-    if iterator_type not in (AnchorIteratorOp.MAKE_ITERATOR.value, AnchorIteratorOp.ONE_SHOT_ITERATOR.value):
-        raise RuntimeError(f"Only iterators `MakeIterator` and `OneShotIterator` are supported in `graph modify` mode, "
-                           f"but the current iterator is `{iterator_type}`.")
-    ConfigInitializer.get_instance().train_params_config.iterator_type = iterator_type
-    logger.info("The iterator type of dataset is `%s`.", iterator_type)
-
-    if iterator_type == AnchorIteratorOp.MAKE_ITERATOR.value:
-        new_iterator = tgt_dataset.make_initializable_iterator()
-        tf.compat.v1.add_to_collection(ASCEND_CUTTING_POINT_INITIALIZER, new_iterator.initializer)
-        ConfigInitializer.get_instance().train_params_config.set_initializer(is_training, new_iterator.initializer)
+    # predict不需要传优化器，但是如果客户创建了优化器，ddr模式加载的是维度ext_size的emb用作换入换出，所以需要给slot零值占位
+    if optimizer is None and channel_id == 1:
+        slot_place_holder = tf.zeros_like(each_var)
+        for _ in range(slot_num):
+            variable_and_slot_list.append(slot_place_holder)
     else:
-        new_iterator = tgt_dataset.make_one_shot_iterator()
-    new_batch = new_iterator.get_next()
-    ConfigInitializer.get_instance().train_params_config.set_target_batch(is_training, new_batch)
+        # opt name to slot dict
+        for slot_dict in optimizer.values():
+            for slot_val in slot_dict.values():
+                variable_and_slot_list.append(slot_val)
 
-    try:
-        new_batch_tensor = list(new_batch.values())[0]
-    except IndexError as err:
-        raise IndexError("Cannot find a tensor from given batch.") from err
-    new_get_next_op_name = find_target_dataset_op(new_batch_tensor.op, AnchorIteratorOp.ITERATOR_GET_NEXT.value).name
-    update_input_tensor_with_new_batch(record.replacement_spec, new_get_next_op_name, new_batch)
+    return variable_and_slot_list
 
 
-def get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list: list, swap_len: int, swap_pos: list,
-                  channel_id: int) -> list:
+def _get_swap_info(
+    table_instance: BaseSparseEmbedding, variable_and_slot_list: list, swap_len: int, swap_pos: list, channel_id: int
+) -> list:
     """
     Get swap info if threshold is configured.
     :param table_instance: BaseSparseEmbedding
@@ -600,11 +739,12 @@ def get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list: l
         swap_in = [tf.no_op()]
     else:
         with tf.compat.v1.variable_scope("h2d_emb"):
-            logger.debug('Channel %s_h2d_%s was built for getnext', table_instance.table_name, channel_id)
+            logger.debug("Channel %s_h2d_%s was built for getnext", table_instance.table_name, channel_id)
             h2d_emb = npu_ops.gen_npu_ops.get_next(
                 output_types=[tf.float32],
                 output_shapes=[[max_lookup_vec_size, table_instance.ext_emb_size]],
-                channel_name=f'{table_instance.table_name}_h2d_{channel_id}')[0]
+                channel_name=f"{table_instance.table_name}_h2d_{channel_id}",
+            )[0]
         logger.debug("h2d_emb shape: %s", h2d_emb)
         if not isinstance(variable_and_slot_list, list):
             raise RuntimeError("When enable emb_transfer, optimizer should have slots")
@@ -613,184 +753,22 @@ def get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list: l
             h2d_emb = h2d_emb[0:swap_len, :]
         swap_outs = [tf.gather(one_table, swap_pos) for one_table in variable_and_slot_list]
         swap_out = tf.concat(swap_outs, axis=1)
-        logger.debug('Channel %s_d2h_%s was built for op outfeed.', table_instance.table_name, channel_id)
+        logger.debug("Channel %s_d2h_%s was built for op outfeed.", table_instance.table_name, channel_id)
         swap_out_op = npu_ops.outfeed_enqueue_op(
-            channel_name=f'{table_instance.table_name}_d2h_{channel_id}', inputs=[swap_out])
+            channel_name=f"{table_instance.table_name}_d2h_{channel_id}", inputs=[swap_out]
+        )
         with tf.control_dependencies([swap_out_op]):
             nd_swap_pos = tf.expand_dims(swap_pos, 1)
             table_num = len(variable_and_slot_list)
             h2d_emb_split = tf.split(h2d_emb, table_num, axis=1)
             optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(
-                table_instance.table_name)
+                table_instance.table_name
+            )
             if optimizer is None and channel_id == 1:
                 swap_in = [tf.compat.v1.scatter_nd_update(variable_and_slot_list[0], nd_swap_pos, h2d_emb_split[0])]
             else:
-                swap_in = [tf.compat.v1.scatter_nd_update(variable_and_slot_list[i], nd_swap_pos, h2d_emb_split[i])
-                           for i in range(len(variable_and_slot_list))]
+                swap_in = [
+                    tf.compat.v1.scatter_nd_update(variable_and_slot_list[i], nd_swap_pos, h2d_emb_split[i])
+                    for i in range(len(variable_and_slot_list))
+                ]
     return swap_in
-
-
-def get_variable_and_slot_list(each_var, slot_num, table_name, channel_id):
-    variable_and_slot_list = [each_var]
-    if slot_num == 0:
-        return variable_and_slot_list
-
-    # 通过apply_gradients创建optimizer
-    optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(table_name)
-    if optimizer is None and channel_id == 0:
-        raise RuntimeError("In training mode, table_instance should have been set_optimizer_for_table "
-                           "before modify_graph, please check whether apply_gradients is performed")
-
-    # predict不需要传优化器，但是如果客户创建了优化器，ddr模式加载的是维度ext_size的emb用作换入换出，所以需要给slot零值占位
-    if optimizer is None and channel_id == 1:
-        slot_place_holder = tf.zeros_like(each_var)
-        for _ in range(slot_num):
-            variable_and_slot_list.append(slot_place_holder)
-    else:
-        # opt name to slot dict
-        for slot_dict in optimizer.values():
-            for slot_val in slot_dict.values():
-                variable_and_slot_list.append(slot_val)
-
-    return variable_and_slot_list
-
-
-def modify_graph_for_ddr(get_next_op_map):
-    # 通过create_hash_optimizer创建optimizer_instance
-    optimizer_instance = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
-    # predict
-    if optimizer_instance is None:
-        slot_num = 0
-    else:
-        # ddr和扩容需要在获取优化器后重置ext
-        change_ext_emb_size_by_opt(optimizer_instance)
-        slot_num = optimizer_instance.slot_num
-
-    for _, record in get_next_op_map.items():
-        is_training = record.is_training
-        channel_id = 0 if is_training else 1
-
-        swap_args = SwapArgs()
-        sparse_variables = tf.compat.v1.get_collection(
-            ConfigInitializer.get_instance().train_params_config.ascend_global_hashtable_collection)
-
-        for each_var in sparse_variables:
-            table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(each_var)
-            if table_instance.is_hbm:
-                continue
-            swap_args_dict = swap_args.swap_config_dict[table_instance.table_name][channel_id]
-            swap_pos = swap_args_dict['swap_pos']
-            swap_len = swap_args_dict['swap_len']
-            variable_and_slot_list = get_variable_and_slot_list(each_var, slot_num, table_instance.table_name,
-                                                                channel_id)
-
-            swap_op = get_swap_info(table_instance, variable_and_slot_list, swap_len, swap_pos, channel_id)
-            swap_control_dict = swap_args.swap_control_dict[table_instance.table_name][channel_id]
-            if "control_ops" not in swap_control_dict:
-                raise ValueError("Missing Required key in modify_graph_for_asc: control_ops")
-            control_ops = swap_control_dict['control_ops']
-            replace_anchor_control(control_ops, swap_op)
-
-
-@performance("graph_modifier")
-def modify_graph_for_asc(dump_graph: bool = False, prefetch: int = 10):
-    cutting_point_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE)
-    check_cutting_points(cutting_point_list)
-    if not cutting_point_list:
-        logger.warning("Nothing to revise.")
-        return
-
-    export_pb_graph("old_graph.pb", dump_graph)
-    get_next_op_map = generate_get_next_op_specs(cutting_point_list, dump_graph)
-    logger.debug("In modify_graph_for_asc function, get_next_op_map.len: %d, get_next_op_map.key: %s.",
-                 len(get_next_op_map), get_next_op_map.keys())
-
-    for get_next_op, record in get_next_op_map.items():
-        is_training = record.is_training
-
-        # get source dataset
-        src_dataset = get_src_dataset(get_next_op, is_training)
-
-        # generate target dataset
-        timestamp_index = get_timestamp_index(get_next_op, is_training)
-        original_batch_tensor_count = get_dataset_tensor_count(src_dataset)
-        sub_cutting_points = record.sub_cutting_points
-        input_index_list = get_input_index_list(sub_cutting_points,
-                                                record.replacement_spec,
-                                                record.output_names,
-                                                original_batch_tensor_count, timestamp_index=timestamp_index)
-        record.input_indexs = input_index_list
-        tgt_dataset = get_tgt_dataset(src_dataset, sub_cutting_points, record,
-                                      dump_graph=dump_graph, prefetch=prefetch)
-
-        # update the batch of dataset
-        update_iterator_getnext(get_next_op, tgt_dataset, is_training, record)
-
-        # In eval mode, backward is not required. In addition, compute gradients is not executed when
-        # only eval is used. Therefore, `do_merge_lookup` needs to be invoked during modify graph.
-        if not is_training:
-            do_merge_lookup(is_train=False)
-            if 'evaluate' in ConfigInitializer.get_instance().train_params_config.bool_gauge_set:
-                logger.debug("In estimator mode, eval re-creates graph each time, so the flag needs to be cleared.")
-                ConfigInitializer.get_instance().train_params_config.insert_merged_multi_lookup(is_training, False)
-        # In training mode, `do_merge_lookup` should have been executed in compute gradients phase.
-        if is_training and not ConfigInitializer.get_instance().train_params_config.get_merged_multi_lookup(True):
-            raise RuntimeError("In training mode, `do_merge_lookup` should have been executed in compute gradients "
-                               "phase. Please check whether compute gradients is performed.")
-    # ddr
-    modify_graph_for_ddr(get_next_op_map)
-
-    logger.info("Graph has been revised.")
-    export_pb_graph("new_graph.pb", dump_graph)
-
-
-def get_timestamp_index(get_next_op: Operation, is_training: bool) -> int:
-    timestamp_tensor_list = tf.compat.v1.get_collection(ASCEND_TIMESTAMP)
-    timestamp_index = None
-    for timestamp in timestamp_tensor_list:
-        if timestamp in get_next_op.outputs:
-            timestamp_index = int(timestamp.name.split(":")[1])
-            timestamp_feature_spec = ConfigInitializer.get_instance().feature_spec_config.get_feature_spec("timestamp")
-            if timestamp_feature_spec is None:
-                timestamp_feature_spec = FeatureSpec("timestamp", index_key=timestamp_index, is_timestamp=True)
-                timestamp_feature_spec.include_timestamp(is_training)
-                ConfigInitializer.get_instance().feature_spec_config.insert_feature_spec(timestamp_feature_spec,
-                                                                                         is_training)
-                break
-
-            if timestamp_feature_spec.index_key != timestamp_index:
-                raise ValueError(f"Given timestamp_index, which is {timestamp_index}, does not match index "
-                                 f"key. Please double check.")
-            timestamp_feature_spec.include_timestamp(is_training)
-            break
-    return timestamp_index
-
-
-class GraphModifierHook(tf.estimator.SessionRunHook):
-    @para_checker_decorator(
-        check_option_list=[
-            ("dump_graph", ClassValidator, {"classes": (bool,)}),
-            ("modify_graph", ClassValidator, {"classes": (bool,)})
-        ]
-    )
-    def __init__(self, dump_graph=False, modify_graph=True):
-        self._dump_graph = dump_graph
-        self._modify_graph = modify_graph
-        self._iterator_type = ""
-        ConfigInitializer.get_instance().train_params_config.is_graph_modify_hook_running = True
-
-    def begin(self):
-        if self._modify_graph:
-            modify_graph_and_start_emb_cache(dump_graph=self._dump_graph)
-        else:
-            start_asc_pipeline()
-
-        self._iterator_type = ConfigInitializer.get_instance().train_params_config.iterator_type
-        if self._modify_graph and self._iterator_type not in (AnchorIteratorOp.MAKE_ITERATOR.value,
-                                                              AnchorIteratorOp.ONE_SHOT_ITERATOR.value):
-            raise ValueError("the value of iterator type should be like `MakeIterator` or `OneShotIterator`.")
-        logger.debug("In GraphModifierHook, iterator type is `%s`.", self._iterator_type)
-
-    def after_create_session(self, session, coord):
-        if self._modify_graph and self._iterator_type == AnchorIteratorOp.MAKE_ITERATOR.value:
-            session.run(tf.compat.v1.get_collection(ASCEND_CUTTING_POINT_INITIALIZER))
diff --git a/mx_rec/graph/slicers.py b/mx_rec/graph/slicers.py
index a4014195..c86e60f1 100644
--- a/mx_rec/graph/slicers.py
+++ b/mx_rec/graph/slicers.py
@@ -24,14 +24,15 @@ import tensorflow as tf
 from tensorflow import Operation, Tensor, SparseTensor, Graph, variant, resource
 from tensorflow.python.data.ops.dataset_ops import DatasetV1Adapter
 
-from mx_rec.graph import utils, modifier
+from mx_rec.graph import utils
 from mx_rec.util.log import logger
 from mx_rec.validator.validator import ClassValidator, para_checker_decorator
 from mx_rec.constants.constants import (
+    ASCAnchorAttr,
     ASCEND_TIMESTAMP,
     MAX_WHILE_SIZE,
-    ASCAnchorAttr,
     ASCEND_SPARSE_LOOKUP_ENTRANCE,
+    ORPHAN_LOOKUP_KEY_PREFIX
 )
 from mx_rec.graph.constants import DeprecatedOp, AnchorDatasetOp, AnchorIteratorOp
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
@@ -145,22 +146,6 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
                     res.add(base_ops)
                     out_op_to_edge_ops[output_consumer] = res
 
-    @staticmethod
-    def _upward_bfs_op(base_ops: Union[Operation, Set[Operation], List[Operation]], tgt_op_type: str) -> Operation:
-        if not isinstance(base_ops, (set, list)):
-            base_ops = [base_ops]
-
-        parent_ops = base_ops
-        while True:
-            for parent_op in parent_ops:
-                if parent_op.type == tgt_op_type:
-                    return parent_op
-            base_ops = parent_ops
-            parent_ops = []
-            for base_op in base_ops:
-                parent_ops.extend(utils.find_parent_op(base_op))
-            if not parent_ops:
-                raise ValueError(f"target operation '{tgt_op_type}'' was not found.")
 
     @staticmethod
     def _topo_sort_sliced_ops(sliced_ops: Set[Operation]) -> List[Operation]:
@@ -386,9 +371,9 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
             old_get_next: The old 'IteratorGetNext' operation.
         """
 
-        old_get_next = self._upward_bfs_op(sliceable_ops, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
+        old_get_next = utils.upward_bfs_op(sliceable_ops, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
 
-        tf.compat.v1.add_to_collection(DeprecatedOp.DEPRECATED_ITERATOR_GET_NEXT, old_get_next)
+        self._full_graph.add_to_collection(DeprecatedOp.DEPRECATED_ITERATOR_GET_NEXT, old_get_next)
         logger.info("Old 'IteratorGetNext' operation has been deprecated now.")
 
         return old_get_next
@@ -412,7 +397,7 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
 
         tgt_trans_dataset = None
         try:
-            tgt_trans_dataset = self._find_trans_dataset(get_next)
+            tgt_trans_dataset = utils.find_trans_dataset(self._full_graph, get_next)
         except (ValueError, TypeError, RuntimeError) as err:
             trans_datasets = [
                 op for op in self._full_graph.get_operations() if AnchorDatasetOp.PREFETCH_DATASET.value in op.name
@@ -442,39 +427,10 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
 
         # WARN: Couple with modifier module, global collection used for filtering deprecated prefetch dataset.
         self._full_graph.add_to_collection(DeprecatedOp.DEPRECATED_PREFETCH_DATASET, tgt_trans_dataset)
-        old_dataset = modifier.find_target_instance_dataset(tgt_trans_dataset.outputs[0])
+        old_dataset = utils.find_target_instance_dataset(self._full_graph, tgt_trans_dataset.outputs[0])
 
         return old_dataset
 
-    def _find_trans_dataset(self, get_next: Operation) -> Operation:
-        """Find the transformation dataset through 'get_next'.
-
-        Args:
-            get_next: The old 'IteratorGetNext' operation.
-
-        Returns:
-            trans_dataset: The target transformation dataset.
-        """
-
-        if get_next.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-            raise TypeError(f"operation '{get_next}' must be one instance of 'IteratorGetNext'.")
-
-        make_iter = modifier.find_make_iterator_op(get_next.outputs[0])
-
-        trans_dataset = None
-        if tf.__version__.startswith("1"):
-            optimize_dataset_op = self._upward_bfs_op(make_iter, AnchorDatasetOp.MODEL_DATASET.value)
-            trans_dataset = utils.find_parent_op(optimize_dataset_op)
-            if not trans_dataset:
-                raise RuntimeError("parent operation of 'ModelDataset' was not found.")
-            if trans_dataset[0].type != AnchorDatasetOp.OPTIMIZE_DATASET.value:
-                raise TypeError(f"operation 'OptimizeDataset' was not found.")
-            trans_dataset = trans_dataset[0]
-        else:
-            trans_dataset = self._upward_bfs_op(make_iter, AnchorDatasetOp.PREFETCH_DATASET.value)
-
-        return trans_dataset
-
     def _clone_subgraph_into_funcgraph(
         self,
         sliced_ops: Set[Operation],
@@ -546,7 +502,7 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
         if old_get_next.inputs:
             iter_type = old_get_next.inputs[0].op.type
         if iter_type == AnchorIteratorOp.ITERATOR_V2.value:
-            iter_type = modifier.find_make_iterator_op(old_get_next.outputs[0]).type
+            iter_type = utils.find_make_iterator_op(self._full_graph, old_get_next.outputs[0]).type
         if iter_type not in (AnchorIteratorOp.MAKE_ITERATOR.value, AnchorIteratorOp.ONE_SHOT_ITERATOR.value):
             raise RuntimeError(
                 f"only iterators `MakeIterator` and `OneShotIterator` are supported in `graph modify` mode, "
@@ -585,7 +541,7 @@ class NoGradSubgraphSlicer(metaclass=abc.ABCMeta):
         except IndexError as err:
             raise IndexError("cannot find a tensor from given batch.") from err
 
-        new_get_next = self._upward_bfs_op(new_batch_tensor.op, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
+        new_get_next = utils.upward_bfs_op(new_batch_tensor.op, AnchorIteratorOp.ITERATOR_GET_NEXT.value)
 
         logger.info("Got old_new_get_next: %s.", new_get_next)
         return new_get_next
@@ -824,8 +780,6 @@ class LookupSubgraphSlicer(NoGradSubgraphSlicer):
     ]
 )
 class OrphanLookupKeySlicer(NoGradSubgraphSlicer):
-    SLICEABLE_ORPHAN_LOOKUP_KEY_PREFIX = "orphan"
-
     def __init__(self, full_graph: Graph = None, info_dir: str = "orphan_slicing") -> None:
         """Initialize OrphanLookupKeySlicer.
         Args:
@@ -887,7 +841,7 @@ class OrphanLookupKeySlicer(NoGradSubgraphSlicer):
         ]
         alive_get_nexts = list(
             filter(
-                lambda op: op not in tf.compat.v1.get_collection(DeprecatedOp.DEPRECATED_ITERATOR_GET_NEXT),
+                lambda op: op not in self._full_graph.get_collection(DeprecatedOp.DEPRECATED_ITERATOR_GET_NEXT),
                 all_get_nexts,
             )
         )
@@ -928,7 +882,7 @@ class OrphanLookupKeySlicer(NoGradSubgraphSlicer):
             for op in min_dep_ops:
                 if not self._validate_op(op):
                     continue
-                if OrphanLookupKeySlicer.SLICEABLE_ORPHAN_LOOKUP_KEY_PREFIX not in op.name:
+                if ORPHAN_LOOKUP_KEY_PREFIX not in op.name:
                     continue
                 sliceable_ops.add(op)
 
diff --git a/mx_rec/graph/utils.py b/mx_rec/graph/utils.py
index ca328ae3..17f071ac 100644
--- a/mx_rec/graph/utils.py
+++ b/mx_rec/graph/utils.py
@@ -17,32 +17,64 @@
 
 import os
 from collections import defaultdict
-from typing import List, Dict, Union, DefaultDict, Tuple
+from typing import List, Dict, Set, Union, DefaultDict, Tuple
 
 import tensorflow as tf
-from tensorflow import Operation, Tensor
+from tensorflow import Operation, Tensor, Graph
 from tensorflow.core.framework.graph_pb2 import GraphDef
+from tensorflow.python.data.ops.dataset_ops import DatasetV1Adapter
 from tensorflow.python.framework.errors_impl import InvalidArgumentError
 from tensorflow.python.ops import control_flow_ops
 
-from mx_rec.graph.slicers import OrphanLookupKeySlicer
-from mx_rec.graph.constants import AnchorIteratorOp
+from mx_rec.graph.constants import AnchorDatasetOp, AnchorIteratorOp
 from mx_rec.constants.constants import ASCAnchorAttr, DUMP_MIDIFY_GRAPH_FILE_MODE
 from mx_rec.core.embedding import BaseSparseEmbedding
-from mx_rec.core.asc.swap_args import SwapArgs, SwapDataType
 from mx_rec.util.log import logger
 
 
-def check_input_list(objs: Union[object, List[object]], obj_type: type) -> Union[object, List[object]]:
-    if isinstance(objs, obj_type):
-        objs = [objs]
+def find_trans_dataset(graph: Graph, get_next: Operation) -> Operation:
+    """Find the transformation dataset through 'get_next'.
 
-    if isinstance(objs, list):
-        for tensor in objs:
-            if not isinstance(tensor, obj_type):
-                raise ValueError(f"Given input parameter must be a {obj_type} or a list of {obj_type}")
+    Args:
+        get_next: The old 'IteratorGetNext' operation.
+
+    Returns:
+        trans_dataset: The target transformation dataset.
+    """
+
+    if get_next.type != AnchorIteratorOp.ITERATOR_GET_NEXT.value:
+        raise TypeError(f"operation '{get_next}' must be one instance of 'IteratorGetNext'.")
+
+    make_iter = find_make_iterator_op(graph, get_next.outputs[0])
+
+    trans_dataset = None
+    if tf.__version__.startswith("1"):
+        optimize_dataset_op = upward_bfs_op(make_iter, AnchorDatasetOp.MODEL_DATASET.value)
+        trans_dataset = find_parent_op(optimize_dataset_op)
+        if not trans_dataset:
+            raise RuntimeError("parent operation of 'ModelDataset' was not found.")
+        if trans_dataset[0].type != AnchorDatasetOp.OPTIMIZE_DATASET.value:
+            raise TypeError(f"operation 'OptimizeDataset' was not found.")
+        trans_dataset = trans_dataset[0]
+    else:
+        trans_dataset = upward_bfs_op(make_iter, AnchorDatasetOp.PREFETCH_DATASET.value)
 
-    return objs
+    return trans_dataset
+
+
+def find_make_iterator_op(graph: Graph, batch_tensor: Tensor) -> Operation:
+    operations = graph.get_operations()
+    for each_op in operations:
+        for input_tensor in batch_tensor.op.inputs:
+            if (
+                input_tensor.op.outputs
+                and input_tensor.op.outputs[0] in list(each_op.inputs)
+                and each_op.type == AnchorIteratorOp.MAKE_ITERATOR.value
+            ):
+                logger.debug("Op MakeIterator '%s' was found.", each_op.name)
+                return each_op
+
+    raise ValueError(f"operation `MakeIterator` cannot be found.")
 
 
 def find_parent_op(operator: Operation) -> List[Operation]:
@@ -54,6 +86,54 @@ def find_parent_op(operator: Operation) -> List[Operation]:
     return parent_ops
 
 
+def upward_bfs_op(base_ops: Union[Operation, Set[Operation], List[Operation]], tgt_op_type: str) -> Operation:
+    if not isinstance(base_ops, (set, list)):
+        base_ops = [base_ops]
+
+    parent_ops = base_ops
+    while True:
+        for parent_op in parent_ops:
+            if parent_op.type == tgt_op_type:
+                return parent_op
+        base_ops = parent_ops
+        parent_ops = []
+        for base_op in base_ops:
+            parent_ops.extend(find_parent_op(base_op))
+        if not parent_ops:
+            raise ValueError(f"target operation '{tgt_op_type}'' was not found.")
+
+
+def find_target_instance_dataset(graph: Graph, variant_tensor: Tensor) -> DatasetV1Adapter:
+    dataset_instance_list = graph.get_collection("dataset_group")
+    for ins in dataset_instance_list:
+        if ins._variant_tensor == variant_tensor:
+            if not isinstance(ins, DatasetV1Adapter):
+                ins = ins._input_dataset
+            logger.debug("Find target instance '%s', whose variant_tensor is '%s'.", ins, variant_tensor)
+            if not isinstance(ins.element_spec, dict) and not (
+                isinstance(ins.element_spec, (list, tuple))
+                and len(ins.element_spec) == 2
+                and isinstance(ins.element_spec[0], dict)
+            ):
+                raise NotImplementedError("the found dataset does not return a valid layout.")
+
+            return ins
+
+    raise LookupError(f"Can not find target instance, whose variant_tensor is '{variant_tensor}' respectively.")
+
+
+def check_and_force_list(obj: Union[object, List[object]], obj_type: type) -> Union[object, List[object]]:
+    if isinstance(obj, obj_type):
+        obj = [obj]
+
+    if isinstance(obj, list):
+        for tensor in obj:
+            if not isinstance(tensor, obj_type):
+                raise ValueError(f"Given input parameter must be a {obj_type} or a list of {obj_type}")
+
+    return obj
+
+
 def check_cutting_points(cutting_point_list: List[Tensor]):
     for tensor in cutting_point_list:
         if not isinstance(tensor, Tensor):
@@ -63,10 +143,10 @@ def check_cutting_points(cutting_point_list: List[Tensor]):
             raise ValueError(f"Cutting point can only be the output of an Operator 'Identity'.")
 
 
-def record_ops_to_replace(src_op: Operation) -> DefaultDict[Tensor, List[Tuple[int, Operation]]]:
+def record_ops_to_replace(graph: Graph, src_op: Operation) -> DefaultDict[Tensor, List[Tuple[int, Operation]]]:
     replacement_specs = defaultdict(list)
     output_list = src_op.outputs
-    op_list = tf.compat.v1.get_default_graph().get_operations()
+    op_list = graph.get_operations()
     for tensor in output_list:
         for operator in op_list:
             if tensor in operator.inputs:
@@ -78,23 +158,52 @@ def record_ops_to_replace(src_op: Operation) -> DefaultDict[Tensor, List[Tuple[i
 
 def replace_anchor(replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]], new_tensor_list: List[Tensor]):
     if len(replacement_specs) != len(new_tensor_list):
-        raise ValueError(f"Given replacement_specs and new_tensor_list must have the same length. "
-                         f"replacement_specs: {replacement_specs}, new_tensor_list: {new_tensor_list}")
+        raise ValueError(
+            f"Given replacement_specs and new_tensor_list must have the same length. "
+            f"replacement_specs: {replacement_specs}, new_tensor_list: {new_tensor_list}"
+        )
 
     for tensor_idx, (old_tensor, items) in enumerate(replacement_specs.items()):
         for input_idx, operator in items:
             try:
                 operator._update_input(input_idx, new_tensor_list[tensor_idx])
             except InvalidArgumentError as err:
-                logger.info("The replacement specs keys (old batch) is: %s. \n\t\t The new_tensor_list is: %s.",
-                            replacement_specs.keys(), new_tensor_list)
-                raise RuntimeError(f"Cannot update edge, old tensor: {old_tensor}, "
-                                   f"new tensor: {new_tensor_list[tensor_idx]}.") from err
+                logger.info(
+                    "The replacement specs keys (old batch) is: %s. \n\t\t The new_tensor_list is: %s.",
+                    replacement_specs.keys(),
+                    new_tensor_list,
+                )
+                raise RuntimeError(
+                    f"Cannot update edge, old tensor: {old_tensor}, " f"new tensor: {new_tensor_list[tensor_idx]}."
+                ) from err
+
+
+def replace_anchor_control(graph: Graph, place_holder_control: tf.Operation, real_anchor: Tensor):
+    """
+    将place_holder_control替换为入参real_anchor.
+
+    Args:
+        place_holder_control: control op
+        real_anchor: 用来替换打桩节点的tensor
 
+    Returns: None
 
-def record_control_to_replace(src_op: Operation) -> DefaultDict[Tensor, List[Tuple[int, Operation]]]:
+    """
+
+    if place_holder_control is None:
+        raise RuntimeError(
+            f"Node place_holder_control does not exist. Check whether the sparse lookup interface "
+            f"is correctly invoked."
+        )
+    # find the op with stub node as the input
+    replacement_specs_for_anchor_vec = record_control_to_replace(graph, place_holder_control)
+    # replace anchor_vec with anchor
+    replace_control_anchor(replacement_specs_for_anchor_vec, real_anchor)
+
+
+def record_control_to_replace(graph: Graph, src_op: Operation) -> DefaultDict[Tensor, List[Tuple[int, Operation]]]:
     replacement_specs = defaultdict(list)
-    op_list = tf.compat.v1.get_default_graph().get_operations()
+    op_list = graph.get_operations()
     for operator in op_list:
         if src_op in operator.control_inputs:
             input_index = operator.control_inputs.index(src_op)
@@ -103,8 +212,9 @@ def record_control_to_replace(src_op: Operation) -> DefaultDict[Tensor, List[Tup
     return replacement_specs
 
 
-def replace_control_anchor(replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]],
-                           new_tensor_list: List[Tensor]):
+def replace_control_anchor(
+    replacement_specs: DefaultDict[Tensor, List[Tuple[int, Operation]]], new_tensor_list: List[Tensor]
+):
 
     for tensor_idx, (old_tensor, items) in enumerate(replacement_specs.items()):
         for _, operator in items:
@@ -112,43 +222,48 @@ def replace_control_anchor(replacement_specs: DefaultDict[Tensor, List[Tuple[int
                 control_op = control_flow_ops.group(new_tensor_list)
                 operator._add_control_input(control_op)
             except InvalidArgumentError as err:
-                logger.info("The replacement control specs keys (old batch) is: %s. \n\t\t The new_tensor_list is: %s.",
-                            replacement_specs.keys(), new_tensor_list)
-                raise RuntimeError(f"Cannot update edge, old tensor: {old_tensor}, "
-                                   f"new tensor: {new_tensor_list[tensor_idx]}.") from err
+                logger.info(
+                    "The replacement control specs keys (old batch) is: %s. \n\t\t The new_tensor_list is: %s.",
+                    replacement_specs.keys(),
+                    new_tensor_list,
+                )
+                raise RuntimeError(
+                    f"Cannot update edge, old tensor: {old_tensor}, " f"new tensor: {new_tensor_list[tensor_idx]}."
+                ) from err
 
 
-def export_pb_graph(file_name: str,
-                    dump_graph: bool = False,
-                    graph_def: GraphDef = None,
-                    export_path: str = "./export_graph",
-                    as_text: bool = True):
+def replace_anchor_vec(graph: Graph, cutting_point: Tensor, attribute: ASCAnchorAttr, anchor: Tensor):
     """
-    Save tensorflow graph before and after modifier graph
-    :param file_name: FileName of the graph
-    :param dump_graph: Is serialize graph or not
-    :param graph_def: A Graph or a GraphDef protocol buffer.
-    :param export_path: Directory where to write the graph.
-    This can refer to remote filesystems, such as Google Cloud Storage (GCS).
-    :param as_text: If True, writes the graph as an ASCII proto
-    :return: None
+    根据打桩节点的名字找到以此为输入的op，并将该op的输入替换为入参anchor.
+
+    Args:
+        cutting_point: sparse lookup查询的ids
+        attribute: 被替换的打桩节点的名字
+        anchor: 用来替换打桩节点的tensor
+
+    Returns: None
+
     """
-    if dump_graph:
-        dir_path = os.path.dirname(os.path.join(export_path, file_name))
-        os.makedirs(dir_path, mode=DUMP_MIDIFY_GRAPH_FILE_MODE, exist_ok=True)
-        graph_def = graph_def if graph_def else tf.compat.v1.get_default_graph().as_graph_def()
-        tf.io.write_graph(graph_def, export_path, file_name, as_text)
+
+    # get stub node
+    anchor_vec = BaseSparseEmbedding.get_anchor_attribute(cutting_point, attribute)
+    if anchor_vec is None:
+        raise RuntimeError(
+            f"Node `{attribute.value}` does not exist. Check whether the sparse lookup interface "
+            f"is correctly invoked."
+        )
+    # find the op with stub node as the input
+    replacement_specs_for_anchor_vec = record_ops_to_replace(graph, anchor_vec.op)
+    # replace anchor_vec with anchor
+    replace_anchor(replacement_specs_for_anchor_vec, [anchor])
 
 
 def make_sorted_key_to_tensor_list(
-    element_spec: List[Dict[str, Tensor]],
-    sorted_keys: List[str],
-    prefix: str = ""
+    element_spec: List[Dict[str, Tensor]], sorted_keys: List[str], prefix: str = ""
 ) -> List[str]:
     if isinstance(element_spec, tf.TensorSpec):
         sorted_keys.append(prefix)
         return sorted_keys
-
     elif isinstance(element_spec, dict):
         for key, item in element_spec.items():
             if not isinstance(key, str):
@@ -169,61 +284,25 @@ def make_sorted_key_to_tensor_list(
     raise TypeError(f"Given element_spec, whose type is {type(element_spec)}, is invalid.")
 
 
-def replace_anchor_vec(cutting_point: Tensor, attribute: ASCAnchorAttr, anchor: Tensor):
+def export_pb_graph(
+    file_name: str,
+    dump_graph: bool = False,
+    graph_def: GraphDef = None,
+    export_path: str = "./export_graph",
+    as_text: bool = True,
+):
     """
-    根据打桩节点的名字找到以此为输入的op，并将该op的输入替换为入参anchor.
-
-    Args:
-        cutting_point: sparse lookup查询的ids
-        attribute: 被替换的打桩节点的名字
-        anchor: 用来替换打桩节点的tensor
-
-    Returns: None
-
-    """
-
-    # get stub node
-    anchor_vec = BaseSparseEmbedding.get_anchor_attribute(cutting_point, attribute)
-    if anchor_vec is None:
-        raise RuntimeError(f"Node `{attribute.value}` does not exist. Check whether the sparse lookup interface "
-                           f"is correctly invoked.")
-    # find the op with stub node as the input
-    replacement_specs_for_anchor_vec = record_ops_to_replace(anchor_vec.op)
-    # replace anchor_vec with anchor
-    replace_anchor(replacement_specs_for_anchor_vec, [anchor])
-
-
-def replace_anchor_control(place_holder_control: tf.Operation, real_anchor: Tensor):
-    """
-    将place_holder_control替换为入参real_anchor.
-
-    Args:
-        place_holder_control: control op
-        real_anchor: 用来替换打桩节点的tensor
-
-    Returns: None
-
+    Save tensorflow graph before and after modifier graph
+    :param file_name: FileName of the graph
+    :param dump_graph: Is serialize graph or not
+    :param graph_def: A Graph or a GraphDef protocol buffer.
+    :param export_path: Directory where to write the graph.
+    This can refer to remote filesystems, such as Google Cloud Storage (GCS).
+    :param as_text: If True, writes the graph as an ASCII proto
+    :return: None
     """
-
-    if place_holder_control is None:
-        raise RuntimeError(f"Node place_holder_control does not exist. Check whether the sparse lookup interface "
-                           f"is correctly invoked.")
-    # find the op with stub node as the input
-    replacement_specs_for_anchor_vec = record_control_to_replace(place_holder_control)
-    # replace anchor_vec with anchor
-    replace_control_anchor(replacement_specs_for_anchor_vec, real_anchor)
-
-
-def mark_orphan_lookup_key(lookup_key: Tensor) -> Tensor:
-    graph_def = tf.compat.v1.get_default_graph().as_graph_def()
-    subgraph = tf.compat.v1.graph_util.extract_sub_graph(graph_def, [lookup_key.op.name])
-
-    for node in subgraph.node:
-        if node.op == AnchorIteratorOp.ITERATOR_GET_NEXT.value:
-            return lookup_key
-
-    name_prefix = OrphanLookupKeySlicer.SLICEABLE_ORPHAN_LOOKUP_KEY_PREFIX
-    marked_lookup_key = tf.identity(lookup_key, name="{}/{}".format(name_prefix, lookup_key.op.name))
-
-    logger.info('Mark orphan lookup key %s as %s.', lookup_key, marked_lookup_key)
-    return marked_lookup_key
+    if dump_graph:
+        dir_path = os.path.dirname(os.path.join(export_path, file_name))
+        os.makedirs(dir_path, mode=DUMP_MIDIFY_GRAPH_FILE_MODE, exist_ok=True)
+        graph_def = graph_def if graph_def else tf.compat.v1.get_default_graph().as_graph_def()
+        tf.io.write_graph(graph_def, export_path, file_name, as_text)
diff --git a/tests/mx_rec/graph/test_modifier.py b/tests/mx_rec/graph/test_modifier.py
index ff9a6664..25caf429 100644
--- a/tests/mx_rec/graph/test_modifier.py
+++ b/tests/mx_rec/graph/test_modifier.py
@@ -30,22 +30,13 @@ from mx_rec.constants.constants import (
     ASCEND_TIMESTAMP,
     ASCAnchorAttr,
 )
-from mx_rec.core.asc import FeatureSpec
 from mx_rec.graph.modifier import (
     GraphModifierHook,
-    AnchorRecord,
-    find_make_iterator_op,
-    find_target_dataset_op,
-    find_target_instance_dataset,
-    generate_get_next_op_specs,
-    get_dataset_op,
-    get_input_index_list,
-    get_passing_tensor_list,
-    get_preprocessing_map_func,
-    get_src_dataset,
-    get_tgt_dataset,
-    get_timestamp_index,
-    modify_graph_for_asc,
+    _GraphModifier,
+    _AnchorRecord,
+    _get_input_index_list,
+    _get_passing_tensor_list,
+    _get_timestamp_index,
 )
 from tests.mx_rec.core.mock_class import MockConfigInitializer, MockSparseEmbedding, MockOptimizer
 from tests.mx_rec.graph.mock_dataset import gen_mock_dataset
@@ -70,16 +61,19 @@ def _gen_mock_get_anchor_attribute(is_training: bool = True) -> Callable:
 
 
 class GetPreprocessingMapFuncTest(TestCase):
+    def setUp(self) -> None:
+        self._modifier = _GraphModifier()
+
     def tearDown(self) -> None:
         tf.compat.v1.reset_default_graph()
 
     def test_err_none_names_and_indexes(self):
-        mock_graph_def = tf.compat.v1.GraphDef()
+        mock_graph_def = self._modifier._full_graph.as_graph_def()
         mock_input_names = []
         mock_output_names = []
 
         with self.assertRaises(ValueError):
-            get_preprocessing_map_func(mock_graph_def, mock_input_names, mock_output_names)
+            _GraphModifier._get_preprocessing_map_func(mock_graph_def, mock_input_names, mock_output_names)
 
 
 class GetInputIndexListTest(TestCase):
@@ -93,70 +87,11 @@ class GetInputIndexListTest(TestCase):
         mock_base_count = 0
 
         with self.assertRaises(ValueError):
-            get_input_index_list(
+            _get_input_index_list(
                 mock_cutting_point_list, mock_replace_ment_specs, mock_mapping_name_list, mock_base_count
             )
 
 
-class FindMakeIteratorOpTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-
-        found_iter_op = find_make_iterator_op(mock_ids)
-        self.assertEqual(found_iter_op.type, "MakeIterator")
-
-    def test_err_no_tgt_dataset_op(self):
-        mock_ids = tf.zeros(shape=(4096, 8))
-        with self.assertRaises(ValueError):
-            find_make_iterator_op(mock_ids)
-
-
-class FindTargetDatasetOpTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_base_op = tf.identity(mock_ids).op
-
-        found_tgt_dataset_op = find_target_dataset_op(base_ops=mock_base_op, op_type="IteratorGetNext")
-        self.assertEqual(found_tgt_dataset_op, mock_ids.op)
-
-    def test_err_no_tgt_op_type(self):
-        mock_ids = tf.zeros(shape=(4096, 8))
-        mock_base_op = mock_ids.op
-        with self.assertRaises(ValueError):
-            find_target_dataset_op(mock_base_op, "IteratorGetNext")
-
-
-class GetDatasetOpTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok(self):
-        mock_dataset = gen_mock_dataset()
-        mock_iterator = mock_dataset.make_initializable_iterator()
-        mock_batch = mock_iterator.get_next()
-        mock_ids = mock_batch.get("mock_ids")
-        mock_get_next_op = mock_ids.op
-
-        found_dataset_op = get_dataset_op(mock_get_next_op)
-        self.assertEqual(found_dataset_op.type, "OptimizeDataset")
-
-    def test_err_invalid_op_type(self):
-        mock_get_next_op = tf.zeros(shape=(4096, 8)).op
-        with self.assertRaises(TypeError):
-            get_dataset_op(mock_get_next_op)
-
 
 class GetPassingTensorList(TestCase):
     def tearDown(self) -> None:
@@ -176,7 +111,7 @@ class GetPassingTensorList(TestCase):
             "output_index_list": [0],
             "sub_src_tensors": mock_cutting_point_list,
         }
-        passing_tensor_list, output_index_list, sub_src_tensors = get_passing_tensor_list(
+        passing_tensor_list, output_index_list, sub_src_tensors = _get_passing_tensor_list(
             mock_cutting_point_list, mock_tgt_op
         )
         self.assertEqual(passing_tensor_list, expected["passing_tensor_list"])
@@ -184,29 +119,23 @@ class GetPassingTensorList(TestCase):
         self.assertEqual(sub_src_tensors, expected["sub_src_tensors"])
 
 
-class FindTargetInstanceDatasetTest(TestCase):
-    def tearDown(self) -> None:
-        tf.compat.v1.reset_default_graph()
-
-    def test_err_no_target_dataset_instance(self):
-        with self.assertRaises(LookupError):
-            find_target_instance_dataset(None)
-
 
 class GetSrcDatasetTest(TestCase):
+    def setUp(self) -> None:
+        self._modifier = _GraphModifier()
+
     def tearDown(self) -> None:
         tf.compat.v1.reset_default_graph()
 
     def test_ok_one_shot(self):
         mock_dataset = gen_mock_dataset()
         mock_prefetch_dataset = mock_dataset.prefetch(10)
-        mock_double_prefetch_dataset = mock_prefetch_dataset.prefetch(10)
         mock_iterator = mock_prefetch_dataset.make_one_shot_iterator()
         mock_batch = mock_iterator.get_next()
         mock_ids = mock_batch.get("mock_ids")
         mock_get_next_op = mock_ids.op
 
-        src_dataset = get_src_dataset(mock_get_next_op, is_training=True)
+        src_dataset = self._modifier._get_src_dataset(mock_get_next_op, is_training=True)
         self.assertEqual(src_dataset, mock_dataset)
 
 
@@ -215,6 +144,9 @@ class GetSrcDatasetTest(TestCase):
     ConfigInitializer=Mock(return_value=MockConfigInitializer()),
 )
 class GetTgtDatasetTest(TestCase):
+    def setUp(self) -> None:
+        self._modifier = _GraphModifier()
+
     def tearDown(self) -> None:
         tf.compat.v1.reset_default_graph()
 
@@ -233,22 +165,16 @@ class GetTgtDatasetTest(TestCase):
         mock_batch = mock_iterator.get_next()
         mock_ids = mock_batch.get("mock_ids")
         mock_sub_cutting_point_list = [mock_ids]
-        mock_records = AnchorRecord(
-            defaultdict(),
-            [],
-            [],
-            [],
-            tf.compat.v1.GraphDef(),
-            [],
-            [],
-            True
-        )
+        mock_records = _AnchorRecord(defaultdict(), [], [], [], tf.compat.v1.GraphDef(), [], [], True)
 
-        tgt_dataset = get_tgt_dataset(mock_dataset, mock_sub_cutting_point_list, mock_records)
+        tgt_dataset = self._modifier._get_tgt_dataset(mock_dataset, mock_sub_cutting_point_list, mock_records)
         self.assertIsNotNone(tgt_dataset)
 
 
 class ModifyGraphForAscTest(TestCase):
+    def setUp(self) -> None:
+        self._modifier = _GraphModifier()
+
     def tearDown(self) -> None:
         tf.compat.v1.reset_default_graph()
 
@@ -257,9 +183,11 @@ class ModifyGraphForAscTest(TestCase):
         get_asc_insert_func=Mock(return_value=lambda x, y: x),
     )
     @patch.multiple("mx_rec.graph.modifier.BaseSparseEmbedding", get_anchor_attribute=_gen_mock_get_anchor_attribute())
-    @patch.multiple("mx_rec.core.asc.manager",
-                         should_skip=MagicMock(return_value=True),
-                         check_dangling_table=MagicMock(return_value=["test_table"]))
+    @patch.multiple(
+        "mx_rec.core.asc.manager",
+        should_skip=MagicMock(return_value=True),
+        check_dangling_table=MagicMock(return_value=["test_table"]),
+    )
     @patch("mx_rec.graph.modifier.ConfigInitializer")
     def test_ok_train_mode(self, modifier_config_initializer):
         mock_config_initializer = MockConfigInitializer(modify_graph=True, merged_multi_lookup=True)
@@ -280,7 +208,7 @@ class ModifyGraphForAscTest(TestCase):
 
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE, mock_cutting_point)
 
-        modify_graph_for_asc()
+        self._modifier.modify_graph_for_asc()
 
     @patch.multiple(
         "mx_rec.graph.modifier",
@@ -289,12 +217,13 @@ class ModifyGraphForAscTest(TestCase):
     )
     @patch.multiple(
         "mx_rec.graph.modifier.BaseSparseEmbedding",
-        get_anchor_attribute=_gen_mock_get_anchor_attribute(is_training=False)
+        get_anchor_attribute=_gen_mock_get_anchor_attribute(is_training=False),
     )
     @patch("mx_rec.graph.modifier.ConfigInitializer")
     def test_ok_eval_mode(self, modifier_config_initializer):
-        mock_config_initializer = MockConfigInitializer(modify_graph=True, merged_multi_lookup=True,
-                                                        bool_gauge_set={"evaluate"})
+        mock_config_initializer = MockConfigInitializer(
+            modify_graph=True, merged_multi_lookup=True, bool_gauge_set={"evaluate"}
+        )
         modifier_config_initializer.get_instance = Mock(return_value=mock_config_initializer)
 
         mock_dataset = gen_mock_dataset()
@@ -312,7 +241,7 @@ class ModifyGraphForAscTest(TestCase):
 
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE, mock_cutting_point)
 
-        modify_graph_for_asc()
+        self._modifier.modify_graph_for_asc()
 
     @patch.multiple(
         "mx_rec.graph.modifier",
@@ -333,10 +262,13 @@ class ModifyGraphForAscTest(TestCase):
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ENTRANCE, mock_cutting_point)
 
         with self.assertRaises(RuntimeError):
-            modify_graph_for_asc()
+            self._modifier.modify_graph_for_asc()
 
 
 class GetTimestampIndexTest(TestCase):
+    def setUp(self) -> None:
+        self._graph = tf.compat.v1.get_default_graph()
+
     def tearDown(self) -> None:
         tf.compat.v1.reset_default_graph()
 
@@ -358,7 +290,7 @@ class GetTimestampIndexTest(TestCase):
 
         tf.compat.v1.add_to_collection(ASCEND_TIMESTAMP, mock_timestamp)
 
-        timestamp_index = get_timestamp_index(mock_get_next_op, is_training=True)
+        timestamp_index = _get_timestamp_index(self._graph, mock_get_next_op, is_training=True)
         self.assertEqual(timestamp_index, 2)
 
 
@@ -382,8 +314,9 @@ class GraphModifierHookTest(TestCase):
     )
     @patch("mx_rec.graph.modifier.ConfigInitializer")
     def test_ok(self, modifier_config_initializer):
-        mock_config_initializer = MockConfigInitializer(modify_graph=True, is_graph_modify_hook_running=True,
-                                                        iterator_type="MakeIterator")
+        mock_config_initializer = MockConfigInitializer(
+            modify_graph=True, is_graph_modify_hook_running=True, iterator_type="MakeIterator"
+        )
         modifier_config_initializer.get_instance = Mock(return_value=mock_config_initializer)
 
         mock_dataset = gen_mock_dataset()
@@ -406,8 +339,9 @@ class GraphModifierHookTest(TestCase):
     )
     @patch("mx_rec.graph.modifier.ConfigInitializer")
     def test_err_invalid_iterator_type(self, modifier_config_initializer):
-        mock_config_initializer = MockConfigInitializer(modify_graph=True, is_graph_modify_hook_running=True,
-                                                        iterator_type="InvalidIterator")
+        mock_config_initializer = MockConfigInitializer(
+            modify_graph=True, is_graph_modify_hook_running=True, iterator_type="InvalidIterator"
+        )
         modifier_config_initializer.get_instance = Mock(return_value=mock_config_initializer)
 
         mock_dataset = gen_mock_dataset()
diff --git a/tests/mx_rec/graph/test_utils.py b/tests/mx_rec/graph/test_utils.py
index 5a4efffc..7aead90e 100644
--- a/tests/mx_rec/graph/test_utils.py
+++ b/tests/mx_rec/graph/test_utils.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 # ==============================================================================
 
-import sys
 import os
 import pathlib
 import shutil
@@ -24,42 +23,45 @@ from unittest import TestCase
 
 import tensorflow as tf
 from tensorflow import Tensor, TensorSpec
+
 from mx_rec.constants.constants import ASCAnchorAttr
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.graph.utils import (
-    check_input_list,
+    find_trans_dataset,
     find_parent_op,
+    find_make_iterator_op,
+    find_target_instance_dataset,
+    upward_bfs_op,
+    check_and_force_list,
     check_cutting_points,
     export_pb_graph,
     make_sorted_key_to_tensor_list,
     replace_anchor_vec,
 )
+from tests.mx_rec.graph.mock_dataset import gen_mock_dataset
 
 
-class CheckInputListTest(TestCase):
-    def tearDown(self):
-        tf.compat.v1.reset_default_graph()
-
-    def test_ok_single_object(self):
-        mock_obj = "obj"
-        obj_type = str
-
-        checked_objs = check_input_list(mock_obj, obj_type)
-        self.assertEqual([mock_obj], checked_objs)
+class FindTransDatasetTest(TestCase):
+    def setUp(self) -> None:
+        self._graph = tf.compat.v1.get_default_graph()
 
-    def test_ok_object_list(self):
-        mock_objs = ["obj1", "obj2", "ojb3"]
-        obj_type = str
+    def tearDown(self) -> None:
+        tf.compat.v1.reset_default_graph()
 
-        checked_cutting_points = check_input_list(mock_objs, obj_type)
-        self.assertEqual(mock_objs, checked_cutting_points)
+    def test_ok(self):
+        mock_dataset = gen_mock_dataset()
+        mock_iterator = mock_dataset.make_initializable_iterator()
+        mock_batch = mock_iterator.get_next()
+        mock_ids = mock_batch.get("mock_ids")
+        mock_get_next_op = mock_ids.op
 
-    def test_err_inconsistent_object_and_type(self):
-        mock_objs = ["obj1", "obj2", "ojb3"]
-        obj_type = Tensor
+        found_dataset_op = find_trans_dataset(self._graph, mock_get_next_op)
+        self.assertEqual(found_dataset_op.type, "OptimizeDataset")
 
-        with self.assertRaises(ValueError):
-            check_input_list(mock_objs, obj_type)
+    def test_err_invalid_op_type(self):
+        mock_get_next_op = tf.zeros(shape=(4096, 8)).op
+        with self.assertRaises(TypeError):
+            find_trans_dataset(self._graph, mock_get_next_op)
 
 
 class FindParentOpTest(TestCase):
@@ -76,6 +78,64 @@ class FindParentOpTest(TestCase):
         self.assertEqual([mock_parent_op], parent_op)
 
 
+class FindMakeIteratorOpTest(TestCase):
+    def setUp(self) -> None:
+        self._graph = tf.compat.v1.get_default_graph()
+
+    def tearDown(self) -> None:
+        tf.compat.v1.reset_default_graph()
+
+    def test_ok(self):
+        mock_dataset = gen_mock_dataset()
+        mock_iterator = mock_dataset.make_initializable_iterator()
+        mock_batch = mock_iterator.get_next()
+        mock_ids = mock_batch.get("mock_ids")
+
+        found_iter_op = find_make_iterator_op(self._graph, mock_ids)
+        self.assertEqual(found_iter_op.type, "MakeIterator")
+
+    def test_err_no_tgt_dataset_op(self):
+        mock_ids = tf.zeros(shape=(4096, 8))
+        with self.assertRaises(ValueError):
+            find_make_iterator_op(self._graph, mock_ids)
+
+
+class FindTargetInstanceDatasetTest(TestCase):
+    def setUp(self) -> None:
+        self._graph = tf.compat.v1.get_default_graph()
+
+    def tearDown(self) -> None:
+        tf.compat.v1.reset_default_graph()
+
+    def test_err_no_target_dataset_instance(self):
+        with self.assertRaises(LookupError):
+            find_target_instance_dataset(self._graph, None)
+
+
+class UpwardBFSOpTest(TestCase):
+    def setUp(self) -> None:
+        self._graph = tf.compat.v1.get_default_graph()
+
+    def tearDown(self) -> None:
+        tf.compat.v1.reset_default_graph()
+
+    def test_ok(self):
+        mock_dataset = gen_mock_dataset()
+        mock_iterator = mock_dataset.make_initializable_iterator()
+        mock_batch = mock_iterator.get_next()
+        mock_ids = mock_batch.get("mock_ids")
+        mock_base_op = tf.identity(mock_ids).op
+
+        found_tgt_dataset_op = upward_bfs_op(base_ops=mock_base_op, tgt_op_type="IteratorGetNext")
+        self.assertEqual(found_tgt_dataset_op, mock_ids.op)
+
+    def test_err_no_tgt_op_type(self):
+        mock_ids = tf.zeros(shape=(4096, 8))
+        mock_base_op = mock_ids.op
+        with self.assertRaises(ValueError):
+            upward_bfs_op(base_ops=mock_base_op, tgt_op_type="IteratorGetNext")
+
+
 class CheckCuttingPointsTest(TestCase):
     def setUp(self):
         self._generator_iter_times = 3
@@ -98,6 +158,32 @@ class CheckCuttingPointsTest(TestCase):
             check_cutting_points(mock_cutting_point_list)
 
 
+class CheckAndForceListTest(TestCase):
+    def tearDown(self):
+        tf.compat.v1.reset_default_graph()
+
+    def test_ok_single_object(self):
+        mock_obj = "obj"
+        obj_type = str
+
+        checked_objs = check_and_force_list(mock_obj, obj_type)
+        self.assertEqual([mock_obj], checked_objs)
+
+    def test_ok_object_list(self):
+        mock_objs = ["obj1", "obj2", "ojb3"]
+        obj_type = str
+
+        checked_cutting_points = check_and_force_list(mock_objs, obj_type)
+        self.assertEqual(mock_objs, checked_cutting_points)
+
+    def test_err_inconsistent_object_and_type(self):
+        mock_objs = ["obj1", "obj2", "ojb3"]
+        obj_type = Tensor
+
+        with self.assertRaises(ValueError):
+            check_and_force_list(mock_objs, obj_type)
+
+
 class ExportPBGraphTest(TestCase):
     def setUp(self) -> None:
         self._dir_name = "./export_graph"
@@ -162,7 +248,7 @@ class ReplaceAnchorVecTest(TestCase):
         anchor_vec_output = tf.identity(anchor_vec, name="anchor_vec_output")
         BaseSparseEmbedding.anchor_tensor_specs[mock_cutting_point][mock_attribute] = anchor_vec
 
-        replace_anchor_vec(mock_cutting_point, mock_attribute, mock_anchor)
+        replace_anchor_vec(tf.compat.v1.get_default_graph(), mock_cutting_point, mock_attribute, mock_anchor)
         self.assertEqual(anchor_vec_output.op.inputs[0], mock_anchor)
 
 
-- 
Gitee


From 61a3be346f100c067e06dcb2b70dd739d45c31a3 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 21 May 2024 19:25:34 +0800
Subject: [PATCH 155/302] =?UTF-8?q?warm=20start=20=E4=BF=AE=E6=94=B9DT?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/mx_rec/core/test_feature_process.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/mx_rec/core/test_feature_process.py b/tests/mx_rec/core/test_feature_process.py
index b8bb0742..787648f4 100644
--- a/tests/mx_rec/core/test_feature_process.py
+++ b/tests/mx_rec/core/test_feature_process.py
@@ -78,13 +78,13 @@ class TestAfterRunFuncOfEvictHookClass(TestEvictHookClass):
 
             mock_get_next.return_value = [tf.constant([8, 9], dtype=tf.int32), tf.constant(2, dtype=tf.int32)]
 
-            evict_hook = EvictHook(evict_enable=True, evict_time_interval=1)
+            evict_hook = EvictHook(evict_enable=True, evict_time_interval=10)
             with tf.compat.v1.train.MonitoredSession(hooks=[evict_hook]) as sess:
                 sess.graph._unsafe_unfinalize()
                 sess.run(tf.compat.v1.global_variables_initializer())
 
                 # sleep 1s 等待淘汰时间evict_time_interval
-                time.sleep(1)
+                time.sleep(10)
 
                 # 获取原variable，淘汰会发生在此session run之后
                 ori_variable = sess.run(test_table.variable)
-- 
Gitee


From 0b3043d3dbf6852aa7409a8326455e1d188de0ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Tue, 21 May 2024 12:05:51 +0000
Subject: [PATCH 156/302] =?UTF-8?q?!150=20cleancode=20bug=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91cle?=
 =?UTF-8?q?ancode?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/emb/dynamic_sparse_embedding.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mx_rec/core/emb/dynamic_sparse_embedding.py b/mx_rec/core/emb/dynamic_sparse_embedding.py
index 4781491c..8dfe504c 100644
--- a/mx_rec/core/emb/dynamic_sparse_embedding.py
+++ b/mx_rec/core/emb/dynamic_sparse_embedding.py
@@ -42,7 +42,7 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
     def _get_sparse_forward_result(self, sparse_forward_fn: Callable, table: Union[tf.compat.v1.Variable, tf.Tensor],
                                    result: dict, is_training: bool) -> tf.Tensor:
         local_embeddings = import_host_pipeline_ops().embedding_lookup_by_address(
-            result.get(str(ASCAnchorAttr.ID_OFFSETS)), embedding_dim=self._emb_size, embedding_type=1)
+            result.get(str(ASCAnchorAttr.ID_OFFSETS.value)), embedding_dim=self._emb_size, embedding_type=1)
 
         add_collection_condition = is_training and (
                 ASCEND_TABLE_NAME_MUST_CONTAIN is None or ASCEND_TABLE_NAME_MUST_CONTAIN in self._table_name)
@@ -52,9 +52,9 @@ class DynamicSparseEmbedding(BaseSparseEmbedding):
             return sparse_forward_fn(local_embeddings)
         # 创建扩容查询tensor和table_instance的映射关系，以便优化器中使用
         ConfigInitializer.get_instance().sparse_embed_config.insert_table_instance_to_tensor_dict(
-            result.get(str(ASCAnchorAttr.ID_OFFSETS)), self)
+            result.get(str(ASCAnchorAttr.ID_OFFSETS.value)), self)
         tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB, local_embeddings)
-        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get(str(ASCAnchorAttr.ID_OFFSETS)))
+        tf.compat.v1.add_to_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET, result.get(str(ASCAnchorAttr.ID_OFFSETS.value)))
         return sparse_forward_fn(local_embeddings)
 
 
-- 
Gitee


From 906acf0c55b5bdafc91d6ce1f7204489a06d2d86 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 21 May 2024 21:15:51 +0800
Subject: [PATCH 157/302] =?UTF-8?q?warm=20start=20=E5=AF=B9=E5=8E=9F?=
 =?UTF-8?q?=E7=94=9FEstimator=E6=89=93patch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/warm_start.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index baf01ce8..b3d08ef4 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -23,10 +23,6 @@ import six
 import tensorflow as tf
 from tensorflow.python.estimator import estimator as estimator_lib
 from tensorflow.python.training import warm_starting_util
-if tf.__version__.startswith("1"):
-    from npu_bridge.npu_init import NPUEstimator
-else:
-    from npu_device.compat.v1.npu_init import NPUEstimator
 
 from mx_rec.util.log import logger
 from mx_rec.saver.saver import Saver
@@ -61,7 +57,7 @@ class WarmStartController:
 def patch_for_warm_start():
     estimator_lib.Estimator.__init__ = patch_estimator_init(estimator_lib.Estimator.__init__)
     warm_starting_util.warm_start = patch_for_func_warm_start(warm_starting_util.warm_start)
-    NPUEstimator.train = patch_for_estimator_train(NPUEstimator.train)
+    estimator_lib.Estimator.train = patch_for_estimator_train(estimator_lib.Estimator.train)
 
 
 def patch_estimator_init(func):
-- 
Gitee


From 76c3993f67ba17fbd75a5df68c479afa25a1c40a Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Wed, 22 May 2024 10:32:04 +0800
Subject: [PATCH 158/302] =?UTF-8?q?dcnv2=EF=BC=8Cdlrm=E6=A8=A1=E5=9E=8Bmai?=
 =?UTF-8?q?n=E8=84=9A=E6=9C=AC=E5=86=85=E5=88=AA=E9=99=A4=E4=BB=A5?=
 =?UTF-8?q?=E4=BF=9D=E5=AD=98=E6=95=B0=E6=8D=AE=EF=BC=8C=E9=80=82=E9=85=8D?=
 =?UTF-8?q?=E5=A4=9A=E6=9C=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/config.py          | 23 ++++++++++++++++-------
 examples/DCNv2/main_mxrec.py      | 10 +++++++++-
 examples/demo/little_demo/main.py |  7 ++-----
 examples/dlrm/model/config.py     | 23 ++++++++++++++++-------
 examples/dlrm/model/main_mxrec.py | 29 ++++++++++++++++++++++++++++-
 examples/dlrm/model/run.sh        | 13 +------------
 6 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/examples/DCNv2/config.py b/examples/DCNv2/config.py
index 73ab2797..fab17d32 100644
--- a/examples/DCNv2/config.py
+++ b/examples/DCNv2/config.py
@@ -14,11 +14,14 @@
 # limitations under the License.
 # ==============================================================================
 
+import enum
 import os
 
 import tensorflow as tf
 from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
 
+SSD_DATA_PATH = ["ssd_data"]
+
 
 class LearningRateScheduler:
     """
@@ -86,6 +89,12 @@ class LearningRateScheduler:
         return lr_dense, lr_sparse
 
 
+class CacheModeEnum(enum.Enum):
+    HBM = "HBM"
+    DDR = "DDR"
+    SSD = "SSD"
+
+
 class Config:
     def __init__(self, ):
         self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
@@ -141,13 +150,13 @@ class Config:
         if self.cache_mode is None:
             raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
 
-        if self.cache_mode == "HBM":
+        if self.cache_mode == CacheModeEnum.HBM.value:
             self.dev_vocab_size = 24_000_000 * self.rank_size
             self.host_vocab_size = 0
-        elif self.cache_mode == "DDR":
+        elif self.cache_mode == CacheModeEnum.DDR.value:
             self.dev_vocab_size = 500_000 * self.rank_size
             self.host_vocab_size = 24_000_000 * self.rank_size
-        elif self.cache_mode == "SSD":
+        elif self.cache_mode == CacheModeEnum.SSD.value:
             self.dev_vocab_size = 100_000 * self.rank_size
             self.host_vocab_size = 2_000_000 * self.rank_size
             self.ssd_vocab_size = 24_000_000 * self.rank_size
@@ -155,16 +164,16 @@ class Config:
             raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
 
     def get_emb_table_cfg(self) -> dict:
-        if self.cache_mode == "HBM":
+        if self.cache_mode == CacheModeEnum.HBM.value:
             return {"device_vocabulary_size": self.dev_vocab_size}
-        elif self.cache_mode == "DDR":
+        elif self.cache_mode == CacheModeEnum.DDR.value:
             return {"device_vocabulary_size": self.dev_vocab_size,
                     "host_vocabulary_size": self.host_vocab_size}
-        elif self.cache_mode == "SSD":
+        elif self.cache_mode == CacheModeEnum.SSD.value:
             return {"device_vocabulary_size": self.dev_vocab_size,
                     "host_vocabulary_size": self.host_vocab_size,
                     "ssd_vocabulary_size": self.ssd_vocab_size,
-                    "ssd_data_path": ["ssd_data"]}
+                    "ssd_data_path": SSD_DATA_PATH}
         else:
             raise RuntimeError(f"get CACHE_MODE:{self.cache_mode}, check Config.__set_emb_table_size implementation")
 
diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index 18ab273e..12cf9428 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -27,7 +27,7 @@ import numpy as np
 from npu_bridge.npu_init import *
 
 from model import MyModel
-from config import sess_config, Config
+from config import sess_config, Config, SSD_DATA_PATH, CacheModeEnum
 from optimizer import get_dense_and_sparse_optimizer
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
@@ -259,6 +259,14 @@ def _del_related_dir(del_path: str) -> None:
 
 def _clear_saved_model() -> None:
     _del_related_dir("/root/ascend/log/*")
+    if os.getenv("CACHE_MODE", "") != CacheModeEnum.SSD.value:
+        return
+    logger.info("current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
+                " then create empty directory for this use case.")
+    for sub_path in SSD_DATA_PATH:
+        _del_related_dir(sub_path)
+        os.makedirs(sub_path, mode=0o550, exist_ok=True)
+        logger.info(f"mkdir dir:{sub_path}")
 
 
 if __name__ == "__main__":
diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index d8dd851a..15478aa3 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -174,11 +174,8 @@ def _clear_saved_model() -> None:
                 " then create empty directory for this use case.")
     for sub_path in _SSD_SAVE_PATH:
         _del_related_dir(sub_path)
-        try:
-            os.mkdir(sub_path)
-            logger.info(f"mkdir dir:{sub_path}")
-        except OSError:
-            logger.warning("ssd path has exist")  # 多进程并行，忽略异常
+        os.makedirs(sub_path, mode=0o550, exist_ok=True)
+        logger.info(f"mkdir dir:{sub_path}")
 
 
 if __name__ == "__main__":
diff --git a/examples/dlrm/model/config.py b/examples/dlrm/model/config.py
index fd38276d..d6259eb0 100644
--- a/examples/dlrm/model/config.py
+++ b/examples/dlrm/model/config.py
@@ -14,12 +14,15 @@
 # limitations under the License.
 # ==============================================================================
 
+import enum
 import os
 
 import tensorflow as tf
 from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
 from npu_bridge.estimator.npu.npu_config import NPURunConfig
 
+SSD_DATA_PATH = ["ssd_data"]
+
 
 class LearningRateScheduler:
     """
@@ -87,6 +90,12 @@ class LearningRateScheduler:
         return lr_dense, lr_sparse
 
 
+class CacheModeEnum(enum.Enum):
+    HBM = "HBM"
+    DDR = "DDR"
+    SSD = "SSD"
+
+
 class Config:
     def __init__(self, ):
         self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
@@ -142,13 +151,13 @@ class Config:
         if self.cache_mode is None:
             raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
 
-        if self.cache_mode == "HBM":
+        if self.cache_mode == CacheModeEnum.HBM.value:
             self.dev_vocab_size = 24_000_000 * self.rank_size
             self.host_vocab_size = 0
-        elif self.cache_mode == "DDR":
+        elif self.cache_mode == CacheModeEnum.DDR.value:
             self.dev_vocab_size = 500_000 * self.rank_size
             self.host_vocab_size = 24_000_000 * self.rank_size
-        elif self.cache_mode == "SSD":
+        elif self.cache_mode == CacheModeEnum.SSD.value:
             self.dev_vocab_size = 100_000 * self.rank_size
             self.host_vocab_size = 2_000_000 * self.rank_size
             self.ssd_vocab_size = 24_000_000 * self.rank_size
@@ -156,16 +165,16 @@ class Config:
             raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
 
     def get_emb_table_cfg(self) -> dict:
-        if self.cache_mode == "HBM":
+        if self.cache_mode == CacheModeEnum.HBM.value:
             return {"device_vocabulary_size": self.dev_vocab_size}
-        elif self.cache_mode == "DDR":
+        elif self.cache_mode == CacheModeEnum.DDR.value:
             return {"device_vocabulary_size": self.dev_vocab_size,
                     "host_vocabulary_size": self.host_vocab_size}
-        elif self.cache_mode == "SSD":
+        elif self.cache_mode == CacheModeEnum.SSD.value:
             return {"device_vocabulary_size": self.dev_vocab_size,
                     "host_vocabulary_size": self.host_vocab_size,
                     "ssd_vocabulary_size": self.ssd_vocab_size,
-                    "ssd_data_path": ["ssd_data"]}
+                    "ssd_data_path": SSD_DATA_PATH}
         else:
             raise RuntimeError(f"get CACHE_MODE:{self.cache_mode}, check Config.__set_emb_table_size implementation")
 
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 6fda4f0a..a630813a 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -15,6 +15,7 @@
 # ==============================================================================
 
 import os
+import shutil
 import time
 import warnings
 import random
@@ -25,7 +26,7 @@ from sklearn.metrics import roc_auc_score
 import numpy as np
 
 from optimizer import get_dense_and_sparse_optimizer
-from config import sess_config, Config
+from config import sess_config, Config, SSD_DATA_PATH, CacheModeEnum
 from model import MyModel
 from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
@@ -247,9 +248,35 @@ def create_feature_spec_list(use_timestamp=False):
     return feature_spec_list
 
 
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+    _del_related_dir("kernel*")
+    _del_related_dir("model_dir_rank*")
+    _del_related_dir("op_cache")
+
+    if os.getenv("CACHE_MODE", "") != CacheModeEnum.SSD.value:
+        return
+    logger.info("current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
+                " then create empty directory for this use case.")
+    for sub_path in SSD_DATA_PATH:
+        _del_related_dir(sub_path)
+        os.makedirs(sub_path, mode=0o550, exist_ok=True)
+        logger.info(f"mkdir dir:{sub_path}")
+
+
 if __name__ == "__main__":
     tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
     warnings.filterwarnings("ignore")
+    _clear_saved_model()
 
     rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
     rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
diff --git a/examples/dlrm/model/run.sh b/examples/dlrm/model/run.sh
index be509608..6c142443 100644
--- a/examples/dlrm/model/run.sh
+++ b/examples/dlrm/model/run.sh
@@ -28,18 +28,6 @@ local_rank_size=8
 num_process=$((num_server * local_rank_size))
 export TRAIN_RANK_SIZE=$num_process
 
-# 删除数据
-echo "CACHE_MODE:${CACHE_MODE}"
-if [ ${CACHE_MODE} = "SSD" ]; then
-  echo "SSD train mode not allow file exist before training,
-        deleting dir ${cur_path}/ssd_data then create for SSD use case"
-  rm -rf ssd_data
-  mkdir ssd_data
-fi
-rm -rf kernel*
-rm -rf /root/ascend/log/*
-rm -rf model_dir_rank* op_cache
-
 ################# 参数配置 ######################
 export USE_DYNAMIC=0            # 0：静态shape；1：动态shape
 export CACHE_MODE="HBM"         # HBM；DDR；SSD
@@ -48,6 +36,7 @@ export USE_DYNAMIC_EXPANSION=0  # 0：关闭动态扩容；1: 开启动态扩容
 export USE_MULTI_LOOKUP=0       # 0：一表一查；1：一表多查
 export USE_MODIFY_GRAPH=0       # 0：feature spec模式；1：自动改图模式
 ################################################
+echo "CACHE_MODE:${CACHE_MODE}"
 
 export HCCL_CONNECT_TIMEOUT=1200
 export DLRM_CRITEO_DATA_PATH=${dlrm_criteo_data_path}
-- 
Gitee


From 2a13ae80f882183a4a65fd93cd5277d9e02155e8 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Wed, 22 May 2024 06:44:00 +0000
Subject: [PATCH 159/302] =?UTF-8?q?!153=20cleancode=E6=B8=85=E7=90=86=20*?=
 =?UTF-8?q?=20clean=20code=E6=B8=85=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py      | 46 +++++++++++++++++++-------------------
 mx_rec/saver/warm_start.py |  2 +-
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index 45033b4f..9f34cca3 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -50,17 +50,6 @@ class SaveModelThread(threading.Thread):
 
 
 class Saver(object):
-    @staticmethod
-    def _make_table_name_dir(root_dir, table_instance, table_name):
-        if not table_instance.is_hbm:
-            table_dir = os.path.join(root_dir, "HashTable", "DDR", table_name)
-        else:
-            table_dir = os.path.join(root_dir, "HashTable", "HBM", table_name)
-        try:
-            tf.io.gfile.makedirs(table_dir)
-        except Exception as err:
-            raise RuntimeError(f"make dir {table_dir} for saving sparse table failed!") from err
-
     @para_checker_decorator(check_option_list=[
         ("var_list", ClassValidator, {"classes": (list, type(None))}),
         ("max_to_keep", IntValidator, {"min_value": 0, "max_value": MAX_INT32}, ["check_value"]),
@@ -82,6 +71,17 @@ class Saver(object):
         self.build()
         self.warm_start_tables = warm_start_tables
 
+    @staticmethod
+    def _make_table_name_dir(root_dir, table_instance, table_name):
+        if not table_instance.is_hbm:
+            table_dir = os.path.join(root_dir, "HashTable", "DDR", table_name)
+        else:
+            table_dir = os.path.join(root_dir, "HashTable", "HBM", table_name)
+        try:
+            tf.io.gfile.makedirs(table_dir)
+        except Exception as err:
+            raise RuntimeError(f"make dir {table_dir} for saving sparse table failed!") from err
+
     def build(self):
         if self.var_list is None:
             self.var_list = []
@@ -237,6 +237,18 @@ class Saver(object):
                 attribute = attribute.tostring()
                 file.write(attribute)
 
+    def get_warm_start_dict(self, table_list):
+        placeholder_dict = defaultdict(dict)
+        restore_fetch_list = []
+        for table_name, v in self.placeholder_dict.items():
+            if table_name in table_list:
+                placeholder_dict[table_name] = v
+                restore_fetch_list.append(self.restore_fetch_dict.get(table_name))
+
+        if not restore_fetch_list:
+            logger.warning("no tables can be warm start restored.")
+        return placeholder_dict, restore_fetch_list
+
     @performance("_save")
     def _save(self, sess, root_dir):
         for table_name in self.save_op_dict:
@@ -317,18 +329,6 @@ class Saver(object):
                 assign_op = state.assign(sub_optimizer_placeholder_dict.get(key_state))
                 self.restore_fetch_dict[table_instance.table_name].append(assign_op)
 
-    def get_warm_start_dict(self, table_list):
-        placeholder_dict = defaultdict(dict)
-        restore_fetch_list = []
-        for table_name, v in self.placeholder_dict.items():
-            if table_name in table_list:
-                placeholder_dict[table_name] = v
-                restore_fetch_list.append(self.restore_fetch_dict.get(table_name))
-
-        if not restore_fetch_list:
-            logger.warning("no tables can be warm start restored.")
-        return placeholder_dict, restore_fetch_list
-
     def _restore(self, sess, reading_path, warm_start_tables=None):
         # 根据table_list去改造
         if warm_start_tables:
diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index b3d08ef4..22e2be43 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -214,7 +214,7 @@ def get_table_name_set_by_ckpt_path(warm_start_path: str) -> List[str]:
     ckpt_name = f"sparse-{base_name}"
     sparse_path = os.path.join(directory, ckpt_name)
     if not tf.io.gfile.isdir(sparse_path):
-        logger.info(f"under the warm start path {warm_start_path}, sparse directory {sparse_path} not exists.")
+        logger.info("under the warm start path %s, sparse directory %s not exists.", warm_start_path, sparse_path)
     else:
         for dirname in tf.io.gfile.listdir(sparse_path):
             table_name_list.append(dirname)
-- 
Gitee


From 56951145d397fb8bd6c6638dcb1472c5296bacd2 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Wed, 22 May 2024 16:59:22 +0800
Subject: [PATCH 160/302] =?UTF-8?q?=20run=E8=84=9A=E6=9C=AC=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo/run.sh           | 2 +-
 examples/demo/little_demo_estimator/run.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/demo/little_demo/run.sh b/examples/demo/little_demo/run.sh
index 5b45af84..5c5d9d1d 100644
--- a/examples/demo/little_demo/run.sh
+++ b/examples/demo/little_demo/run.sh
@@ -160,5 +160,5 @@ fi
 echo "use horovod to start tasks"
 DATE=$(date +%Y-%m-%d-%H-%M-%S)
 horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
-python3.7 ${py} 2>&1 | tee "temp_${local_rank_size}p_${KEY_PROCESS_THREAD_NUM}t_${USE_MODE}_${CACHE_MODE}_${DATE}.log"
+python3.7 ${py} 2>&1 | tee "temp_${num_process}p_${KEY_PROCESS_THREAD_NUM}t_${USE_MODE}_${CACHE_MODE}_${DATE}.log"
 
diff --git a/examples/demo/little_demo_estimator/run.sh b/examples/demo/little_demo_estimator/run.sh
index 8bb43b19..011f0001 100644
--- a/examples/demo/little_demo_estimator/run.sh
+++ b/examples/demo/little_demo_estimator/run.sh
@@ -157,4 +157,4 @@ DATE=$(date +%Y-%m-%d-%H-%M-%S)
 horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
 python3.7 ${py} \
 --run_mode=$USE_MODE \
-2>&1 | tee "temp_${local_rank_size}p_${KEY_PROCESS_THREAD_NUM}t_${DATE}.log"
+2>&1 | tee "temp_${num_process}p_${KEY_PROCESS_THREAD_NUM}t_${DATE}.log"
-- 
Gitee


From 256ee5b4da42dae7cf586fa6799fb184d50a4df7 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 23 May 2024 14:52:45 +0800
Subject: [PATCH 161/302] =?UTF-8?q?=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?=
 =?UTF-8?q?=E7=BB=9F=E4=B8=80=E5=A4=A7=E5=86=99=E5=BC=80=E5=A4=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/config.py          | 2 +-
 examples/DCNv2/main_mxrec.py      | 4 ++--
 examples/demo/little_demo/main.py | 2 +-
 examples/dlrm/model/config.py     | 2 +-
 examples/dlrm/model/main_mxrec.py | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/DCNv2/config.py b/examples/DCNv2/config.py
index fab17d32..463f9aa1 100644
--- a/examples/DCNv2/config.py
+++ b/examples/DCNv2/config.py
@@ -163,7 +163,7 @@ class Config:
         else:
             raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
 
-    def get_emb_table_cfg(self) -> dict:
+    def get_emb_table_cfg(self):
         if self.cache_mode == CacheModeEnum.HBM.value:
             return {"device_vocabulary_size": self.dev_vocab_size}
         elif self.cache_mode == CacheModeEnum.DDR.value:
diff --git a/examples/DCNv2/main_mxrec.py b/examples/DCNv2/main_mxrec.py
index 12cf9428..a04e1c47 100644
--- a/examples/DCNv2/main_mxrec.py
+++ b/examples/DCNv2/main_mxrec.py
@@ -261,12 +261,12 @@ def _clear_saved_model() -> None:
     _del_related_dir("/root/ascend/log/*")
     if os.getenv("CACHE_MODE", "") != CacheModeEnum.SSD.value:
         return
-    logger.info("current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
+    logger.info("Current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
                 " then create empty directory for this use case.")
     for sub_path in SSD_DATA_PATH:
         _del_related_dir(sub_path)
         os.makedirs(sub_path, mode=0o550, exist_ok=True)
-        logger.info(f"mkdir dir:{sub_path}")
+        logger.info(f"Create dir:{sub_path}")
 
 
 if __name__ == "__main__":
diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index 15478aa3..80940e86 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -175,7 +175,7 @@ def _clear_saved_model() -> None:
     for sub_path in _SSD_SAVE_PATH:
         _del_related_dir(sub_path)
         os.makedirs(sub_path, mode=0o550, exist_ok=True)
-        logger.info(f"mkdir dir:{sub_path}")
+        logger.info(f"Create dir:{sub_path}")
 
 
 if __name__ == "__main__":
diff --git a/examples/dlrm/model/config.py b/examples/dlrm/model/config.py
index d6259eb0..45e8af40 100644
--- a/examples/dlrm/model/config.py
+++ b/examples/dlrm/model/config.py
@@ -164,7 +164,7 @@ class Config:
         else:
             raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
 
-    def get_emb_table_cfg(self) -> dict:
+    def get_emb_table_cfg(self):
         if self.cache_mode == CacheModeEnum.HBM.value:
             return {"device_vocabulary_size": self.dev_vocab_size}
         elif self.cache_mode == CacheModeEnum.DDR.value:
diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index a630813a..767eeb2f 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -265,12 +265,12 @@ def _clear_saved_model() -> None:
 
     if os.getenv("CACHE_MODE", "") != CacheModeEnum.SSD.value:
         return
-    logger.info("current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
+    logger.info("Current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
                 " then create empty directory for this use case.")
     for sub_path in SSD_DATA_PATH:
         _del_related_dir(sub_path)
         os.makedirs(sub_path, mode=0o550, exist_ok=True)
-        logger.info(f"mkdir dir:{sub_path}")
+        logger.info(f"Create dir:{sub_path}")
 
 
 if __name__ == "__main__":
-- 
Gitee


From c01f45a1110251356f7c1bdab75d1c1f9d73d1a7 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 23 May 2024 14:55:23 +0800
Subject: [PATCH 162/302] =?UTF-8?q?=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?=
 =?UTF-8?q?=E7=BB=9F=E4=B8=80=E5=A4=A7=E5=86=99=E5=BC=80=E5=A4=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/dlrm/model/main_mxrec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/dlrm/model/main_mxrec.py b/examples/dlrm/model/main_mxrec.py
index 767eeb2f..51ed7c4a 100644
--- a/examples/dlrm/model/main_mxrec.py
+++ b/examples/dlrm/model/main_mxrec.py
@@ -254,7 +254,7 @@ def _del_related_dir(del_path: str) -> None:
     dirs = glob(del_path)
     for sub_dir in dirs:
         shutil.rmtree(sub_dir, ignore_errors=True)
-        logger.info(f"delete dir:{sub_dir}")
+        logger.info(f"Delete dir:{sub_dir}")
 
 
 def _clear_saved_model() -> None:
-- 
Gitee


From 99f126e6b492c2c7e8fb7e4b644dfe08681b7962 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Thu, 23 May 2024 07:41:42 +0000
Subject: [PATCH 163/302] =?UTF-8?q?!157=20warm=20start=E5=8E=BB=E9=99=A4?=
 =?UTF-8?q?=E5=86=97=E4=BD=99=E5=88=A4=E6=96=AD=20*=20Merge=20remote-track?=
 =?UTF-8?q?ing=20branch=20'upstream/develop'=20into=20warm=5Fstart=5Fdev?=
 =?UTF-8?q?=20*=20warm=20start=E5=8E=BB=E9=99=A4=E5=86=97=E4=BD=99?=
 =?UTF-8?q?=E5=88=A4=E6=96=AD=E9=80=BB=E8=BE=91=20*=20clean=20code?=
 =?UTF-8?q?=E6=B8=85=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/warm_start.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mx_rec/saver/warm_start.py b/mx_rec/saver/warm_start.py
index 22e2be43..7ceb14c1 100644
--- a/mx_rec/saver/warm_start.py
+++ b/mx_rec/saver/warm_start.py
@@ -180,8 +180,6 @@ def _warm_settings_filter(warm_start_setting: tf.estimator.WarmStartSettings) ->
         matching_tables = [table for table in table_name_list if re.match(vars_to_warm_start, table)]
         if matching_tables:
             WarmStartController().add_element(warm_start_setting.ckpt_to_initialize_from, matching_tables)
-        if vars_to_warm_start != ".*":
-            return warm_start_setting_res
         warm_start_setting_res = warm_start_setting
     elif all(isinstance(v, str) for v in vars_to_warm_start):
         sparse_vars = []
-- 
Gitee


From 332d9ce17fa4cdafb4b8a215056a3359280905be Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Sat, 25 May 2024 09:35:23 +0000
Subject: [PATCH 164/302] =?UTF-8?q?!151=20=E5=BC=95=E5=85=A5embCache?=
 =?UTF-8?q?=E7=89=B9=E6=80=A7=20*=20increase=20send=20eos=20wait=20time=20?=
 =?UTF-8?q?*=20fix=20save=20bug;=20simplify=20ddr=20process=20logic=20*=20?=
 =?UTF-8?q?add=20init=20specialProcessStatus=20*=20=E4=BF=AE=E5=A4=8Dstep\?=
 =?UTF-8?q?interval=E5=85=A8=E4=B8=BA1=E4=B8=94=E5=A4=9A=E8=BD=AE=E5=88=87?=
 =?UTF-8?q?=E6=8D=A2=E5=9C=BA=E6=99=AF=20*=20adapt=20merge=20change=20*=20?=
 =?UTF-8?q?=E5=90=8C=E6=AD=A5pr143=20*=20!148=20=E5=BC=95=E5=85=A5embCache?=
 =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91=20*=20!1?=
 =?UTF-8?q?45=20=E5=AE=9E=E7=8E=B0embCache=E4=BF=9D=E5=AD=98=E5=8A=A0?=
 =?UTF-8?q?=E8=BD=BD=E5=8A=9F=E8=83=BD=20*=20!140=20=E5=90=8C=E6=AD=A5AccC?=
 =?UTF-8?q?TR=E4=BF=9D=E5=AD=98=E5=8A=A0=E8=BD=BD=E4=BB=A3=E7=A0=81=20*=20?=
 =?UTF-8?q?!141=20SSD=E6=96=B0=E5=A2=9Eswap=E9=80=BB=E8=BE=91=EF=BC=8C?=
 =?UTF-8?q?=E8=B0=83=E6=95=B4key=E6=95=B0=E6=8D=AE=E7=B1=BB=E5=9E=8B=20*?=
 =?UTF-8?q?=20!135=20=E5=A2=9E=E5=8A=A0embCache=E5=A4=B4=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=EF=BC=9B=E9=80=82=E9=85=8DInitializer=EF=BC=9B=E9=80=82?=
 =?UTF-8?q?=E9=85=8Dtest=20*=20!139=20little=20demo=E4=BF=AE=E6=AD=A3step?=
 =?UTF-8?q?=E4=B8=BA-1=E6=88=96=E9=9D=9E=E6=95=B4=E6=95=B0=E6=97=B6?=
 =?UTF-8?q?=E4=B8=8D=E7=AC=A6=E5=90=88=E9=A2=84=E6=9C=9F=E8=A1=8C=E4=B8=BA?=
 =?UTF-8?q?=EF=BC=9Bvocab=20size=E9=80=82=E9=85=8D=20*=20!134=20=E6=96=B0?=
 =?UTF-8?q?=E5=A2=9E=E4=BF=9D=E5=AD=98channel=EF=BC=8C=E5=BC=95=E5=85=A5?=
 =?UTF-8?q?=E5=A4=9A=E7=BA=BF=E7=A8=8B=EF=BC=9B=E6=96=B0=E5=A2=9Eblock?=
 =?UTF-8?q?=E5=88=A4=E6=96=AD=E6=8E=A5=E5=8F=A3=20*=20!130=20=E5=90=8C?=
 =?UTF-8?q?=E6=AD=A5AccCTR=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo/main.py             |   26 +-
 examples/demo/little_demo/run_mode.py         |   21 +-
 mx_rec/core/asc/build_graph.py                |   41 +-
 mx_rec/core/asc/manager.py                    |    5 +-
 mx_rec/core/feature_process.py                |    6 +-
 mx_rec/graph/modifier.py                      |   97 +-
 mx_rec/saver/saver.py                         |   69 +-
 mx_rec/util/config_utils/hybrid_mgmt_utils.py |    6 +
 src/AccCTR/3rdparty/CMakeLists.txt            |   14 +
 src/AccCTR/CMakeLists.txt                     |   16 +-
 src/AccCTR/README.md                          |    4 +-
 src/AccCTR/src/CMakeLists.txt                 |    9 +-
 src/AccCTR/src/common/util/error_code.h       |   15 +-
 .../src/common/util/external_threader.h       |   70 +
 src/AccCTR/src/embedding_cache/CMakeLists.txt |   27 +
 .../cache_manager/cache_manager.cpp           |  421 ++++
 .../cache_manager/cache_manager.h             |   95 +
 src/AccCTR/src/embedding_cache/common.h       |   65 +
 .../embedding_local_table/emb_local_table.cpp |  475 ++++
 .../embedding_local_table/emb_local_table.h   |   84 +
 .../constant_initializer.cpp                  |   62 +
 .../initializer/initializer.cpp               |   56 +
 .../random_normal_initializer.cpp             |   78 +
 .../truncated_normal_initializer.cpp          |   94 +
 src/AccCTR/src/embedding_cache/limited_set.h  |  118 +
 .../offset_mapper/address_mapper.h            |  308 +++
 .../offset_mapper/mapper_base.h               |  810 +++++++
 .../offset_mapper/offset_mapper.h             |  248 ++
 src/AccCTR/src/factory_impl.cpp               |   11 +
 src/AccCTR/src/factory_impl.h                 |    2 +
 src/AccCTR/src/include/CMakeLists.txt         |    2 +-
 src/AccCTR/src/include/embedding_cache.h      |  321 +++
 src/AccCTR/src/include/factory.h              |    5 +-
 src/AccCTR/src/include/ock_ctr_common_def.h   |    2 +-
 src/AccCTR/src/include/unique.h               |    1 +
 src/AccCTR/src/unique/unique_func.cpp         |   63 +-
 src/AccCTR/src/unique/unique_func.h           |  237 +-
 src/AccCTR/src/unique/unique_impl.cpp         |    8 +
 src/AccCTR/src/unique/unique_impl.h           |    2 +-
 src/AccCTR/tests/tools/create_fake_id.py      |    6 -
 src/AccCTR/tests/ut/conf/toolchain.cmake      |   24 +
 src/AccCTR/tests/ut/src/CMakeLists.txt        |   26 +-
 src/AccCTR/tests/ut/src/common.h              |   64 +
 src/AccCTR/tests/ut/src/emb_cache_test.cpp    | 1999 ++++++++++++++++
 src/AccCTR/tests/ut/src/emb_cache_test.h      |   62 +
 src/AccCTR/tests/ut/src/unique_test.cpp       |   53 +-
 src/AccCTR/tests/ut/src/unique_test.h         |   16 -
 src/core/CMakeLists.txt                       |   10 +-
 src/core/checkpoint/checkpoint.cpp            |   15 +-
 src/core/checkpoint/checkpoint.h              |    3 -
 .../ckpt_data_handler/ckpt_data_handler.cpp   |    5 +-
 .../ckpt_data_handler/ckpt_data_handler.h     |    2 -
 .../feat_admit_n_evict_ckpt.cpp               |    2 +-
 src/core/emb_hashmap/emb_hashmap.cpp          |  477 ----
 src/core/emb_hashmap/emb_hashmap.h            |   81 -
 src/core/emb_table/emb_table.cpp              |    4 +-
 src/core/emb_table/embedding_ddr.cpp          |  689 ++----
 src/core/emb_table/embedding_ddr.h            |   61 +-
 src/core/emb_table/embedding_dynamic.cpp      |   10 +-
 src/core/emb_table/embedding_dynamic.h        |    3 +-
 src/core/emb_table/embedding_mgmt.cpp         |   95 +-
 src/core/emb_table/embedding_mgmt.h           |   34 +-
 src/core/emb_table/embedding_static.cpp       |    2 +-
 src/core/emb_table/embedding_static.h         |    2 +-
 src/core/emb_table/embedding_table.cpp        |   68 +-
 src/core/emb_table/embedding_table.h          |   34 +-
 src/core/file_system/file_system.h            |    5 +-
 .../hdfs_file_system/hdfs_file_system.cpp     |   16 +-
 .../hdfs_file_system/hdfs_file_system.h       |    2 +-
 .../local_file_system/local_file_system.cpp   |   45 +-
 .../local_file_system/local_file_system.h     |    2 +-
 src/core/hd_transfer/hd_transfer.cpp          |   84 +-
 src/core/hd_transfer/hd_transfer.h            |   11 +-
 src/core/host_emb/host_emb.cpp                |  278 ---
 src/core/host_emb/host_emb.h                  |   76 -
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          | 2046 ++++++++++++-----
 src/core/hybrid_mgmt/hybrid_mgmt.h            |  233 +-
 src/core/hybrid_mgmt/hybrid_mgmt_block.cpp    |   40 +-
 src/core/hybrid_mgmt/hybrid_mgmt_block.h      |   15 +-
 .../key_process/feature_admit_and_evict.cpp   |    4 +-
 .../key_process/feature_admit_and_evict.h     |    5 +-
 src/core/key_process/key_process.cpp          |  393 +++-
 src/core/key_process/key_process.h            |   39 +-
 .../ock_ctr_common/include/embedding_cache.h  |  321 +++
 src/core/ock_ctr_common/include/factory.h     |   23 +-
 .../include/ock_ctr_common_def.h              |   18 +-
 src/core/ock_ctr_common/include/unique.h      |    1 +
 src/core/ssd_cache/cache_manager.cpp          |  578 ++---
 src/core/ssd_cache/cache_manager.h            |   99 +-
 src/core/ssd_cache/lfu_cache.cpp              |   37 +-
 src/core/ssd_cache/lfu_cache.h                |   24 +-
 src/core/ssd_cache/preprocess_mapper.h        |  108 +
 src/core/ssd_engine/file.cpp                  |   59 +-
 src/core/ssd_engine/file.h                    |   26 +-
 src/core/ssd_engine/ssd_engine.cpp            |   36 +-
 src/core/ssd_engine/ssd_engine.h              |   14 +-
 src/core/ssd_engine/table.cpp                 |   71 +-
 src/core/ssd_engine/table.h                   |   28 +-
 src/core/utils/common.cpp                     |    2 +
 src/core/utils/common.h                       |   97 +-
 src/core/utils/task_queue.h                   |  110 +
 src/pybind/module_main.cpp                    |   49 +-
 src/tests/checkpoint/checkpoint_test.cpp      |    8 +-
 src/tests/emb_hashmap/emb_hashmap_test.cpp    |  185 --
 src/tests/emb_mgmt/emb_mgmt_test.cpp          |   82 -
 src/tests/emb_table/embedding_ddr_test.cpp    |   76 +-
 src/tests/emb_table/embedding_mgmt_test.cpp   |    6 +-
 src/tests/emb_table/embedding_static_test.cpp |    5 +-
 .../file_system/hdfs_file_system_test.cpp     |    1 -
 .../file_system/local_file_system_test.cpp    |    7 +-
 src/tests/host_emb/host_emb_test.cpp          |  107 -
 .../feature_admit_and_evict_test.cpp          |    4 +-
 src/tests/key_process/key_process_test.cpp    |   14 +-
 src/tests/ssd_cache/cache_manager_test.cpp    |  269 +--
 src/tests/ssd_cache/lfu_cache_test.cpp        |   16 +-
 src/tests/ssd_engine/engine_test.cpp          |   12 +-
 src/tests/ssd_engine/file_test.cpp            |   43 +-
 src/tests/ssd_engine/table_test.cpp           |   12 +-
 src/tests/utils/common_h_test.cpp             |    6 -
 tests/mx_rec/core/test_build_graph.py         |   23 +-
 tests/mx_rec/saver/test_saver.py              |    1 +
 tests/run_python_dt.sh                        |    2 +-
 122 files changed, 9634 insertions(+), 4081 deletions(-)
 create mode 100644 src/AccCTR/src/embedding_cache/CMakeLists.txt
 create mode 100644 src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
 create mode 100644 src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
 create mode 100644 src/AccCTR/src/embedding_cache/common.h
 create mode 100644 src/AccCTR/src/embedding_cache/embedding_local_table/emb_local_table.cpp
 create mode 100644 src/AccCTR/src/embedding_cache/embedding_local_table/emb_local_table.h
 create mode 100644 src/AccCTR/src/embedding_cache/initializer/constant_initializer/constant_initializer.cpp
 create mode 100644 src/AccCTR/src/embedding_cache/initializer/initializer.cpp
 create mode 100644 src/AccCTR/src/embedding_cache/initializer/random_normal_initializer/random_normal_initializer.cpp
 create mode 100644 src/AccCTR/src/embedding_cache/initializer/truncated_normal_initializer/truncated_normal_initializer.cpp
 create mode 100644 src/AccCTR/src/embedding_cache/limited_set.h
 create mode 100644 src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
 create mode 100644 src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
 create mode 100644 src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
 create mode 100644 src/AccCTR/src/include/embedding_cache.h
 create mode 100644 src/AccCTR/tests/ut/conf/toolchain.cmake
 create mode 100644 src/AccCTR/tests/ut/src/common.h
 create mode 100644 src/AccCTR/tests/ut/src/emb_cache_test.cpp
 create mode 100644 src/AccCTR/tests/ut/src/emb_cache_test.h
 delete mode 100644 src/core/emb_hashmap/emb_hashmap.cpp
 delete mode 100644 src/core/emb_hashmap/emb_hashmap.h
 delete mode 100644 src/core/host_emb/host_emb.cpp
 delete mode 100644 src/core/host_emb/host_emb.h
 create mode 100644 src/core/ock_ctr_common/include/embedding_cache.h
 create mode 100644 src/core/ssd_cache/preprocess_mapper.h
 create mode 100644 src/core/utils/task_queue.h
 delete mode 100644 src/tests/emb_hashmap/emb_hashmap_test.cpp
 delete mode 100644 src/tests/host_emb/host_emb_test.cpp
 mode change 100644 => 100755 tests/run_python_dt.sh

diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index 80940e86..ff09bc50 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -185,7 +185,8 @@ if __name__ == "__main__":
 
     use_mode = UseMode.mapping(os.getenv("USE_MODE"))
     # 最大数据集生成数量
-    MAX_DATASET_GENERATE = 200
+    MAX_DATASET_GENERATE_TRAIN = 200
+    MAX_DATASET_GENERATE_EVAL = 10
     # 最大训练的步数
     MAX_TRAIN_STEPS = 200
     # 训练多少步切换为评估
@@ -232,6 +233,7 @@ if __name__ == "__main__":
     init(train_steps=TRAIN_STEPS,
          eval_steps=EVAL_STEPS,
          save_steps=SAVING_INTERVAL,
+         max_steps=MAX_TRAIN_STEPS,
          use_dynamic=use_dynamic,
          use_dynamic_expansion=use_dynamic_expansion,
          if_load=if_load)
@@ -261,12 +263,12 @@ if __name__ == "__main__":
     # 验证DDR的配置参考：建议跑dynamic避免调参。数据集key总量大于device表，小于device+host；一个batch的unique key数量小于device表。
     # 验证SSD的配置参考：建议跑dynamic避免调参。数据集key总量大于device+host；一个batch的unique key数量小于device表。
     hbm_test_cfg = {"device_vocabulary_size": cfg.user_vocab_size, "host_vocabulary_size": 0}
-    ddr_test_cfg = {"device_vocabulary_size": int(cfg.user_vocab_size * 0.2),
-                    "host_vocabulary_size": int(cfg.user_vocab_size * 0.8)}
+    ddr_test_cfg = {"device_vocabulary_size": int(cfg.user_vocab_size * 0.4),
+                    "host_vocabulary_size": int(cfg.user_vocab_size * 1.0)}
     ssd_test_cfg = {
-        "device_vocabulary_size": int(cfg.user_vocab_size * 0.1),
-        "host_vocabulary_size": int(cfg.user_vocab_size * 0.1),
-        "ssd_vocabulary_size": int(cfg.user_vocab_size * 0.8), "ssd_data_path": _SSD_SAVE_PATH
+        "device_vocabulary_size": int(cfg.user_vocab_size * 0.4),
+        "host_vocabulary_size": int(cfg.user_vocab_size * 0.8),
+        "ssd_vocabulary_size": int(cfg.user_vocab_size * 1.8), "ssd_data_path": _SSD_SAVE_PATH
     }
     cache_mode_dict = {CacheModeEnum.HBM.value: hbm_test_cfg, CacheModeEnum.DDR.value: ddr_test_cfg,
                        CacheModeEnum.SSD.value: ssd_test_cfg}
@@ -297,14 +299,16 @@ if __name__ == "__main__":
     train_batch = None
     table_list = [user_hashtable, item_hashtable]
     if use_mode in [UseMode.TRAIN, UseMode.LOAD_AND_TRAIN]:
-        train_iterator, train_model, train_batch = build_graph(table_list, is_train=True,
-                                                               feature_spec_list=train_feature_spec_list,
-                                                               config_dict=ACCESS_AND_EVICT,
-                                                               batch_number=MAX_DATASET_GENERATE * get_rank_size())
+        train_iterator, train_model, train_batch = build_graph(
+            table_list, is_train=True,
+            feature_spec_list=train_feature_spec_list,
+            config_dict=ACCESS_AND_EVICT,
+            batch_number=MAX_DATASET_GENERATE_TRAIN * get_rank_size()
+        )
     eval_iterator, eval_model, eval_batch = build_graph(table_list, is_train=False,
                                                         feature_spec_list=eval_feature_spec_list,
                                                         config_dict=ACCESS_AND_EVICT,
-                                                        batch_number=MAX_DATASET_GENERATE * get_rank_size())
+                                                        batch_number=MAX_DATASET_GENERATE_EVAL * get_rank_size())
     dense_variables, sparse_variables = get_dense_and_sparse_variable()
 
     params = {"train_batch": train_batch, "eval_batch": eval_batch, "use_one_shot": USE_ONE_SHOT,
diff --git a/examples/demo/little_demo/run_mode.py b/examples/demo/little_demo/run_mode.py
index f164322a..1a15fcc6 100644
--- a/examples/demo/little_demo/run_mode.py
+++ b/examples/demo/little_demo/run_mode.py
@@ -16,6 +16,7 @@
 # ==============================================================================
 
 import os
+import sys
 from typing import List
 
 import tensorflow as tf
@@ -72,6 +73,8 @@ class RunMode:
         channel_id = ConfigInitializer.get_instance().train_params_config.get_training_mode_channel_id(False)
         import_host_pipeline_ops().clear_channel(channel_id)
 
+        if self.infer_steps == -1:
+            self.infer_steps = sys.maxsize  # 消耗全部数据
         for i in range(1, self.infer_steps + 1):
             logger.info("###############    infer at step %d    ################", i)
             try:
@@ -126,17 +129,19 @@ class RunMode:
             self.session.run(initializer)
         else:
             logger.debug(f"use one shot iterator and modify graph is `{self.is_modify_graph}`.")
-
         self.saver = tf.compat.v1.train.Saver()
-        start_step = 1
 
+        latest_ckpt_step = 0
+        start_step = 1
         if if_load:
-            latest_step = get_load_step(model_file)
-            start_step = latest_step + 1
-            self.saver.restore(self.session, f"./saved-model/model-{latest_step}")
+            latest_ckpt_step = get_load_step(model_file)
+            start_step = latest_ckpt_step + 1
+            self.saver.restore(self.session, f"./saved-model/model-{latest_ckpt_step}")
         else:
             self.session.run(tf.compat.v1.global_variables_initializer())
 
+        if self.max_train_steps == -1:
+            self.max_train_steps = sys.maxsize  # 消耗全部数据
         for i in range(start_step, start_step + self.max_train_steps):
             logger.info("################    training at step %d    ################", i)
             try:
@@ -151,13 +156,13 @@ class RunMode:
                     logger.info(f"training at step:{i}, table[{t.table_name}], table size:{t.size()}, "
                                 f"table capacity:{t.capacity()}")
 
-                if i % train_interval == 0:
+                if train_interval != -1 and (i - latest_ckpt_step) % train_interval == 0:
                     self.evaluate()
 
-                if i % saving_interval == 0:
+                if saving_interval != -1 and (i - latest_ckpt_step) % saving_interval == 0:
                     self.saver.save(self.session, f"./saved-model/model", global_step=i)
 
-                if self.is_faae and i == train_interval // 2:
+                if train_interval != -1 and self.is_faae and i == train_interval // 2:
                     logger.info("###############    set_threshold at step:%d   ################", i)
                     self.change_threshold()
 
diff --git a/mx_rec/core/asc/build_graph.py b/mx_rec/core/asc/build_graph.py
index 46dbf193..0ddf313e 100644
--- a/mx_rec/core/asc/build_graph.py
+++ b/mx_rec/core/asc/build_graph.py
@@ -15,7 +15,8 @@
 # limitations under the License.
 # ==============================================================================
 
-from typing import Optional
+from dataclasses import dataclass, field
+from typing import Optional, List, Dict, Union, Tuple
 
 import tensorflow as tf
 
@@ -26,6 +27,14 @@ from mx_rec.util.log import logger
 from mx_rec.core.asc.swap_args import SwapArgs, SwapDataType
 
 
+@dataclass
+class SwapInfo:
+    swap_in_len: int = 0
+    swap_in_pos: List[tf.Tensor] = field(default_factory=lambda: [])
+    swap_out_len: int = 0
+    swap_out_pos: List[tf.Tensor] = field(default_factory=lambda: [])
+
+
 def get_restore_vector(config):
     logger.debug('Channel %s_restore_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
     if config.get("is_hbm"):
@@ -58,28 +67,37 @@ def get_restore_vector(config):
     return restore_vector, hot_pos
 
 
-def get_id_offsets(max_lookup_vec_size, config):
+def get_id_offsets(max_lookup_vec_size: int, config: dict) -> Tuple[int, SwapInfo]:
     logger.debug('Channel %s_lookup_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
     # 自动扩容当前只支持HBM模式，默认没有换入换出
+    swap_info = SwapInfo()
+
     with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
         if config.get("use_dynamic_expansion"):
             [id_offsets] = npu_ops.gen_npu_ops.get_next(
                 output_types=[tf.int64],
                 output_shapes=[[max_lookup_vec_size]],
                 channel_name=f'{config.get("table_name")}_lookup_{config.get("channel_id")}')
-            return id_offsets, [], 0
+            return id_offsets, swap_info
 
         [id_offsets] = npu_ops.gen_npu_ops.get_next(
             output_types=[tf.int32],
             output_shapes=[[max_lookup_vec_size]],
             channel_name=f'{config.get("table_name")}_lookup_{config.get("channel_id")}')
         if config.get("is_hbm"):
-            return id_offsets, [], 0
-        swap_pos, swap_len = npu_ops.gen_npu_ops.get_next(
-            output_types=[tf.int32, tf.int32],
-            output_shapes=[[max_lookup_vec_size], []],
-            channel_name=f'{config.get("table_name")}_swap_{config.get("channel_id")}')
-    return id_offsets, swap_pos, swap_len
+            return id_offsets, swap_info
+        (
+            swap_info.swap_in_pos,
+            swap_info.swap_out_pos,
+            swap_info.swap_in_len,
+            swap_info.swap_out_len,
+        ) = npu_ops.gen_npu_ops.get_next(
+            output_types=[tf.int32, tf.int32, tf.int32, tf.int32],
+            output_shapes=[[max_lookup_vec_size], [max_lookup_vec_size], [], []],
+            channel_name=f'{config.get("table_name")}_swap_all',
+        )
+        logger.debug('Channel %s_swap_all was built for getnext', config.get("table_name"))
+    return id_offsets, swap_info
 
 
 def get_all2all_args(use_static: bool, config: dict) -> Optional[list]:
@@ -115,13 +133,14 @@ def get_preprocessed_tensor_for_asc(table, config):
         restore_vector, hot_pos = get_restore_vector(config)
 
     with tf.compat.v1.variable_scope("id_offsets"):
-        id_offsets, swap_pos, swap_len = get_id_offsets(max_lookup_vec_size, config)
+        id_offsets, swap_info = get_id_offsets(max_lookup_vec_size, config)
 
     if not config.get("is_hbm"):
         # 一表多查时，会多次进入get_preprocessed_tensor_for_asc，最后一次大查询替换map的key-value即可
         swap_args = SwapArgs()
+        
         swap_args.set_data(SwapDataType.CONFIG.value, var_name=config.get("table_name"),
-                           var_channel=config.get("channel_id"), config=config, swap_pos=swap_pos, swap_len=swap_len)
+                           var_channel=config.get("channel_id"), config=config, swap_info=swap_info)
 
     all2all_args = get_all2all_args(use_static, config)
 
diff --git a/mx_rec/core/asc/manager.py b/mx_rec/core/asc/manager.py
index 97a71a4d..3a24b3d7 100644
--- a/mx_rec/core/asc/manager.py
+++ b/mx_rec/core/asc/manager.py
@@ -194,6 +194,7 @@ def initialize_emb_cache(table_info_list, threshold_list):
     train_steps = ConfigInitializer.get_instance().train_steps
     eval_steps = ConfigInitializer.get_instance().eval_steps
     save_steps = ConfigInitializer.get_instance().save_steps
+    max_train_steps = ConfigInitializer.get_instance().max_steps
 
     if_load = ConfigInitializer.get_instance().if_load
     option = 0
@@ -206,8 +207,8 @@ def initialize_emb_cache(table_info_list, threshold_list):
     if optimizer and optimizer.derivative == 2:
         option = option | USE_SUM_SAME_ID_GRADIENTS
 
-    # [train_steps, eval_steps, save_steps] pass step information to HybridMgmt for data process loop
-    rank_info = RankInfo(rank_id, device_id, rank_size, option, [train_steps, eval_steps, save_steps])
+    # pass step information to HybridMgmt for data process loop
+    rank_info = RankInfo(rank_id, device_id, rank_size, option, [train_steps, eval_steps, save_steps, max_train_steps])
 
     emb_cache = HybridMgmt()
 
diff --git a/mx_rec/core/feature_process.py b/mx_rec/core/feature_process.py
index 7a90e78b..a2161d02 100644
--- a/mx_rec/core/feature_process.py
+++ b/mx_rec/core/feature_process.py
@@ -61,6 +61,8 @@ class EvictHook(tf.compat.v1.train.SessionRunHook):
             raise RuntimeError("Global step should be created to use _EvictHook.")
         self.check_name_and_get_hashtable()
         for name, instance in self._hash_table_instance.items():
+            if not instance.is_hbm:
+                continue
             scope_name = f"{instance.table_name}//evict"
             with tf.compat.v1.variable_scope(scope_name):
                 logger.debug('Channel %s_evict_%d was built for op getnext', instance.table_name, TRAIN_CHANNEL_ID)
@@ -99,7 +101,9 @@ class EvictHook(tf.compat.v1.train.SessionRunHook):
             if not ConfigInitializer.get_instance().hybrid_manager_config.trigger_evict():
                 return
             self._start_time = cur_time
-            for name in self._hash_table_instance.keys():
+            for name, instance in self._hash_table_instance.items():
+                if not instance.is_hbm:
+                    continue
                 run_context.session.run(self._evict_op.get(name))
 
     def check_name_and_get_hashtable(self):
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index 179de09f..01aeda94 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -39,6 +39,7 @@ from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.helper import get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
 from mx_rec.core.asc.swap_args import SwapArgs
+from mx_rec.core.asc.build_graph import SwapInfo
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.graph.merge_lookup import do_merge_lookup
 from mx_rec.graph.utils import check_and_force_list, export_pb_graph
@@ -245,14 +246,13 @@ class _GraphModifier:
                 table_instance = ConfigInitializer.get_instance().sparse_embed_config.get_table_instance(each_var)
                 if table_instance.is_hbm:
                     continue
-                swap_args_dict = swap_args.swap_config_dict[table_instance.table_name][channel_id]
-                swap_pos = swap_args_dict["swap_pos"]
-                swap_len = swap_args_dict["swap_len"]
                 variable_and_slot_list = _get_variable_and_slot_list(
                     each_var, slot_num, table_instance.table_name, channel_id
                 )
 
-                swap_op = _get_swap_info(table_instance, variable_and_slot_list, swap_len, swap_pos, channel_id)
+                swap_args_dict = swap_args.swap_config_dict[table_instance.table_name][channel_id]
+                swap_op = _get_swap_info(
+                    table_instance, variable_and_slot_list, swap_args_dict["swap_info"], channel_id)
                 swap_control_dict = swap_args.swap_control_dict[table_instance.table_name][channel_id]
                 if "control_ops" not in swap_control_dict:
                     raise ValueError("Missing Required key in modify_graph_for_asc: control_ops")
@@ -518,6 +518,7 @@ class _GraphModifier:
 
 @para_checker_decorator(
     check_option_list=[
+        ("full_graph", ClassValidator, {"classes": (Graph, type(None))}),
         ("dump_graph", ClassValidator, {"classes": (bool,)}),
     ]
 )
@@ -718,57 +719,57 @@ def _get_variable_and_slot_list(each_var, slot_num, table_name, channel_id):
     return variable_and_slot_list
 
 
-def _get_swap_info(
-    table_instance: BaseSparseEmbedding, variable_and_slot_list: list, swap_len: int, swap_pos: list, channel_id: int
-) -> list:
+def _get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list: list, 
+                   swap_info: SwapInfo, channel_id: int) -> list:    
     """
-    Get swap info if threshold is configured.
+    Get swap op.
     :param table_instance: BaseSparseEmbedding
     :param variable_and_slot_list: [var + slots]
-    :param swap_len: swap length
-    :param swap_pos: swap position
+    :param swap_info: swap in/out length and position
     :param channel_id: train or predict
-    :return: swap info
+    :return: swap op
     """
+    if table_instance.is_hbm:
+        return [tf.no_op()]
+    
+    if len(variable_and_slot_list) == 0:
+        raise RuntimeError("When enable emb_transfer, optimizer should have slots")
+    
     use_static = ConfigInitializer.get_instance().use_static
     max_lookup_vec_size = None
     if use_static:
         max_lookup_vec_size = table_instance.send_count * table_instance.rank_size
 
-    if table_instance.is_hbm:
-        swap_in = [tf.no_op()]
-    else:
-        with tf.compat.v1.variable_scope("h2d_emb"):
-            logger.debug("Channel %s_h2d_%s was built for getnext", table_instance.table_name, channel_id)
-            h2d_emb = npu_ops.gen_npu_ops.get_next(
-                output_types=[tf.float32],
-                output_shapes=[[max_lookup_vec_size, table_instance.ext_emb_size]],
-                channel_name=f"{table_instance.table_name}_h2d_{channel_id}",
-            )[0]
-        logger.debug("h2d_emb shape: %s", h2d_emb)
-        if not isinstance(variable_and_slot_list, list):
-            raise RuntimeError("When enable emb_transfer, optimizer should have slots")
-        if use_static:
-            swap_pos = swap_pos[0:swap_len]
-            h2d_emb = h2d_emb[0:swap_len, :]
-        swap_outs = [tf.gather(one_table, swap_pos) for one_table in variable_and_slot_list]
-        swap_out = tf.concat(swap_outs, axis=1)
-        logger.debug("Channel %s_d2h_%s was built for op outfeed.", table_instance.table_name, channel_id)
-        swap_out_op = npu_ops.outfeed_enqueue_op(
-            channel_name=f"{table_instance.table_name}_d2h_{channel_id}", inputs=[swap_out]
-        )
-        with tf.control_dependencies([swap_out_op]):
-            nd_swap_pos = tf.expand_dims(swap_pos, 1)
-            table_num = len(variable_and_slot_list)
-            h2d_emb_split = tf.split(h2d_emb, table_num, axis=1)
-            optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(
-                table_instance.table_name
-            )
-            if optimizer is None and channel_id == 1:
-                swap_in = [tf.compat.v1.scatter_nd_update(variable_and_slot_list[0], nd_swap_pos, h2d_emb_split[0])]
-            else:
-                swap_in = [
-                    tf.compat.v1.scatter_nd_update(variable_and_slot_list[i], nd_swap_pos, h2d_emb_split[i])
-                    for i in range(len(variable_and_slot_list))
-                ]
-    return swap_in
+    with tf.compat.v1.variable_scope("h2d_emb"):
+        logger.debug('Channel %s_h2d_%s was built for getnext', table_instance.table_name, channel_id)
+        h2d_emb = npu_ops.gen_npu_ops.get_next(
+            output_types=[tf.float32],
+            output_shapes=[[max_lookup_vec_size, table_instance.ext_emb_size]],
+            channel_name=f'{table_instance.table_name}_h2d_all')[0]
+    logger.debug("h2d_emb shape: %s", h2d_emb)
+    
+    swap_out_pos = swap_info.swap_out_pos
+    swap_in_pos = swap_info.swap_in_pos
+    if use_static:
+        swap_out_pos = swap_out_pos[:swap_info.swap_out_len]
+        h2d_emb = h2d_emb[:swap_info.swap_in_len, :]
+        swap_in_pos = swap_in_pos[:swap_info.swap_in_len]
+    swap_outs = [tf.gather(one_table, swap_out_pos) for one_table in variable_and_slot_list]
+    swap_out = tf.concat(swap_outs, axis=1)
+    logger.debug('Channel %s_d2h_all was built for op outfeed.', table_instance.table_name)
+    
+    swap_out_op = npu_ops.outfeed_enqueue_op(
+        channel_name=f'{table_instance.table_name}_d2h_all', inputs=[swap_out])
+    with tf.control_dependencies([swap_out_op]):
+        nd_swap_pos = tf.expand_dims(swap_in_pos, 1)
+        var_num = len(variable_and_slot_list)
+        h2d_emb_split = tf.split(h2d_emb, var_num, axis=1)
+        
+        optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(
+            table_instance.table_name)
+        if optimizer is None and channel_id == 1:
+            swap_in_op = [tf.compat.v1.scatter_nd_update(variable_and_slot_list[0], nd_swap_pos, h2d_emb_split[0])]
+        else:
+            swap_in_op = [tf.compat.v1.scatter_nd_update(variable_and_slot_list[i], nd_swap_pos, h2d_emb_split[i])
+                        for i in range(var_num)]
+    return swap_in_op
diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index 9f34cca3..a91599bc 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -24,7 +24,7 @@ import tensorflow as tf
 from tensorflow.python.util import compat
 
 from mx_rec.constants.constants import DataName, DataAttr, MIN_SIZE, MAX_FILE_SIZE, Flag, TFDevice, \
-    MAX_INT32, HDFS_FILE_PREFIX
+    MAX_INT32, HDFS_FILE_PREFIX, TRAIN_CHANNEL_ID
 from mx_rec.util.communication.hccl_ops import get_rank_id, get_rank_size, get_local_rank_size
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.perf import performance
@@ -33,6 +33,7 @@ from mx_rec.validator.validator import DirectoryValidator, FileValidator, para_c
 from mx_rec.util.global_env_conf import global_env
 from mx_rec.util.log import logger
 from mx_rec.optimizers.base import CustomizedOptimizer
+from mx_rec.util.tf_version_adapter import npu_ops
 
 
 # define save model thread
@@ -63,6 +64,7 @@ class Saver(object):
         self.rank_id = get_rank_id()
         self.local_rank_size = get_local_rank_size()
         self.local_rank_id = self.rank_id % self.local_rank_size
+        self.rank_size = get_rank_size()
         self.save_op_dict = defaultdict(dict)
         self.restore_fetch_dict = defaultdict()
         self.placeholder_dict = defaultdict(dict)
@@ -256,25 +258,54 @@ class Saver(object):
             if optimizer_instance:
                 set_optimizer_info(optimizer_instance, table_name)
 
-        if self.config_instance.hybrid_manager_config.asc_manager:
+        table_instance0 = self.config_instance.sparse_embed_config.get_table_instance(self.var_list[0])
+        if table_instance0.is_hbm:
             self.config_instance.hybrid_manager_config.save_host_data(root_dir)
-            logger.debug(f"host data was saved.")
+            if self.config_instance.use_dynamic_expansion:
+                # Data related to dynamic expansion needs to be saved only on the host side.
+                return
 
-        if self.config_instance.use_dynamic_expansion:
-            # Data related to dynamic expansion needs to be saved only on the host side.
-            return
+            result = self.save_op_dict
+            threads = []
+            for table_name in result.keys():
+                thread = SaveModelThread(self, sess, result, root_dir, table_name)
+                threads.append(thread)
 
-        result = self.save_op_dict
-        threads = []
-        for table_name in result.keys():
-            thread = SaveModelThread(self, sess, result, root_dir, table_name)
-            threads.append(thread)
+            for thread in threads:
+                thread.start()
 
-        for thread in threads:
-            thread.start()
-
-        for thread in threads:
-            thread.join()
+            for thread in threads:
+                thread.join()
+        else:
+            # 接受host侧传来的需要swap_out的offset用于更新host侧并保存
+            self.config_instance.hybrid_manager_config.fetch_device_emb()
+            for var in self.var_list:
+                table_instance = self.config_instance.sparse_embed_config.get_table_instance(var)
+                table_name = table_instance.table_name
+
+                use_static = ConfigInitializer.get_instance().use_static
+                max_lookup_vec_size = None
+                if use_static:
+                    max_lookup_vec_size = table_instance.send_count * self.rank_size
+                swap_out_pos, swap_out_len = npu_ops.gen_npu_ops.get_next(
+                    output_types=[tf.int32, tf.int32],
+                    output_shapes=[[max_lookup_vec_size], []],
+                    channel_name=f'{table_name}_save_h2d_{TRAIN_CHANNEL_ID}')
+                if use_static:
+                    swap_out_pos = swap_out_pos[:swap_out_len]
+                    
+                optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(table_name)
+                table = [var] + [slot_var for slots in optimizer.values() for slot_var in slots.values()]
+
+                swap_outs = [tf.gather(one_table, swap_out_pos) for one_table in table]
+                swap_out = tf.concat(swap_outs, axis=1)
+                channel_name = f'{table_name}_save_d2h_{TRAIN_CHANNEL_ID}'
+                logger.debug('channel %s was built for op swap_out_op.', channel_name)
+                swap_out_op = npu_ops.outfeed_enqueue_op(channel_name=channel_name, inputs=[swap_out])
+                # 发送host需要的embedding
+                sess.run(swap_out_op)
+            self.config_instance.hybrid_manager_config.save_host_data(root_dir)
+        logger.debug(f"host data was saved.")
 
     def _get_valid_dict_data(self, dump_data_dict, table_name):
         host_data = self.config_instance.hybrid_manager_config.get_host_data(table_name)
@@ -346,6 +377,10 @@ class Saver(object):
             self.config_instance.hybrid_manager_config.restore_host_data(reading_path, warm_start_tables)
             logger.info("host data was restored.")
 
+        table_instance0 = self.config_instance.sparse_embed_config.get_table_instance(self.var_list[0])
+        if not table_instance0.is_hbm:
+            return
+        
         if self.config_instance.use_dynamic_expansion:
             # Data related to dynamic expansion needs to be restored only on the host side.
             return
@@ -355,7 +390,7 @@ class Saver(object):
         for table_name, sub_placeholder_dict in placeholder_dict.items():
             load_offset = self.config_instance.hybrid_manager_config.get_load_offset(table_name)
             fill_placeholder(reading_path, sub_placeholder_dict, restore_feed_dict,
-                             NameDescriptor(table_name, DataName.EMBEDDING.value), load_offset)
+                                NameDescriptor(table_name, DataName.EMBEDDING.value), load_offset)
 
             if "optimizer" in sub_placeholder_dict:
                 optimizer_state_placeholder_dict_group = sub_placeholder_dict.get("optimizer")
diff --git a/mx_rec/util/config_utils/hybrid_mgmt_utils.py b/mx_rec/util/config_utils/hybrid_mgmt_utils.py
index b2ad0efd..26624461 100644
--- a/mx_rec/util/config_utils/hybrid_mgmt_utils.py
+++ b/mx_rec/util/config_utils/hybrid_mgmt_utils.py
@@ -92,3 +92,9 @@ class HybridManagerConfig:
             raise TypeError("Asc load data does not match usr setups, \
             please re-consider if you want to restore from this dir")
         logger.debug("Data from host pipeline has been restored.")
+
+    def fetch_device_emb(self):
+        if self.asc_manager is None:
+            raise RuntimeError("ASC manager not exist.")
+        self.asc_manager.fetch_device_emb()
+        logger.debug("request of fetching embedding from device to host for saving has been send")
diff --git a/src/AccCTR/3rdparty/CMakeLists.txt b/src/AccCTR/3rdparty/CMakeLists.txt
index a17e472c..3a05f585 100644
--- a/src/AccCTR/3rdparty/CMakeLists.txt
+++ b/src/AccCTR/3rdparty/CMakeLists.txt
@@ -1,3 +1,17 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2022-2024. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 message("build mode " ${BUILD_MODE})
 
 set(PLATFORM_UTILITIES_3RDPARTY_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../opensource)
diff --git a/src/AccCTR/CMakeLists.txt b/src/AccCTR/CMakeLists.txt
index 0cb63176..60e2d638 100644
--- a/src/AccCTR/CMakeLists.txt
+++ b/src/AccCTR/CMakeLists.txt
@@ -23,8 +23,6 @@ if (${BUILD_MODE} MATCHES "release")
             -Wall
             -fPIC
             -fms-extensions
-            -Wno-unused-parameter
-            -Wno-unused-function
             -Wunused-variable
             -Wunused-value
             -Wcast-align
@@ -47,8 +45,6 @@ elseif (${BUILD_MODE} MATCHES "debug")
             -Wall
             -fPIC
             -fms-extensions
-            -Wno-unused-parameter
-            -Wno-unused-function
             -Wunused-variable
             -Wunused-value
             -Winvalid-pch
@@ -67,8 +63,6 @@ elseif (${BUILD_MODE} MATCHES "ut")
             -Wall
             -fPIC
             -fms-extensions
-            -Wno-unused-parameter
-            -Wno-unused-function
             -Wunused-variable
             -Wunused-value
             -Winvalid-pch
@@ -79,10 +73,6 @@ elseif (${BUILD_MODE} MATCHES "ut")
             -Wfloat-equal
             -Wextra
             -std=c++17
-            #-fsanitize=address
-            #-fno-omit-frame-pointer
-            #-fstack-protector-all
-            #-fstack-protector-strong
             )
 else ()
     message(FATAL_ERROR "======BUILD_MODE not found")
@@ -100,7 +90,6 @@ elseif (${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "x86_64")
             ${CXX_FLAGS}
             -msse2
             -mavx
-            #-w
             )
 else ()
     message(FATAL_ERROR "don't support ${CMAKE_HOST_SYSTEM_PROCESSOR}")
@@ -110,6 +99,11 @@ set(OCK_CTR_PLATFORM_UTIL_DIR       ${PROJECT_SOURCE_DIR}/../../../opensource)
 message(===============${OCK_CTR_PLATFORM_UTIL_DIR})
 include_directories(${OCK_CTR_PLATFORM_UTIL_DIR}/securec/include)
 
+include_directories(
+        ${PROJECT_SOURCE_DIR}/src
+        ${PROJECT_SOURCE_DIR}/src/embedding_cache
+)
+
 add_subdirectory(3rdparty)
 add_subdirectory(src)
 
diff --git a/src/AccCTR/README.md b/src/AccCTR/README.md
index 1a394699..1b25534d 100644
--- a/src/AccCTR/README.md
+++ b/src/AccCTR/README.md
@@ -6,4 +6,6 @@
 
 2、bash build.sh debug //编译debug
 
-3、bash build.sh ut //编译并运行ut，覆盖率在tests/build/cov/gen目录下
+3、编译和运行UT：
+  （1）bash build.sh ut //编译ut，覆盖率在tests/build/cov/gen目录下
+  （2）cd build && bash build_test.sh ut //进入到build目录下并运行ut
\ No newline at end of file
diff --git a/src/AccCTR/src/CMakeLists.txt b/src/AccCTR/src/CMakeLists.txt
index 09da4670..1f4d9269 100644
--- a/src/AccCTR/src/CMakeLists.txt
+++ b/src/AccCTR/src/CMakeLists.txt
@@ -23,12 +23,17 @@ set(OUTPUT                          ${PROJECT_SOURCE_DIR}/output)
 set(OCK_CTR_PLATFORM_UTIL_DIR       ${PROJECT_SOURCE_DIR}/../../../opensource)
 set(OCK_CTR_UTIL_INSTALL_DIR        ${PROJECT_SOURCE_DIR}/install)
 
-
 if (${BUILD_MODE} MATCHES "ut")
     add_compile_options(-ftest-coverage -fprofile-arcs)
     link_libraries(gcov)
+else()
+    add_compile_options(-D_GLIBCXX_USE_CXX11_ABI=0)  # must set this option otherwise pybind will not find embCache symbol
 endif (${BUILD_MODE} MATCHES "ut")
 
+if (${BUILD_MODE} MATCHES "fuzz")
+    add_compile_options(-ftest-coverage -fprofile-arcs -fdump-rtl-expand)
+    link_libraries(gcov asan)
+endif (${BUILD_MODE} MATCHES "fuzz")
 
 message("include : " ${OCK_CTR_SRC_INCLUDE_DIR})
 
@@ -37,6 +42,7 @@ set(LIB_HW_SECURE ${OCK_CTR_PLATFORM_UTIL_DIR}/securec/lib/libsecurec.so)
 add_subdirectory(include)
 add_subdirectory(common)
 add_subdirectory(unique)
+add_subdirectory(embedding_cache)
 
 
 file(GLOB_RECURSE CTR_SRC factory_impl.cpp)
@@ -52,6 +58,7 @@ target_include_directories(_ock_ctr_common
 target_link_libraries(_ock_ctr_common PUBLIC
         -Wl,--start-group
         unique
+        embedding_cache
         dl
         utils
         ${LIB_HW_SECURE}
diff --git a/src/AccCTR/src/common/util/error_code.h b/src/AccCTR/src/common/util/error_code.h
index 04d26a57..b30bfd83 100644
--- a/src/AccCTR/src/common/util/error_code.h
+++ b/src/AccCTR/src/common/util/error_code.h
@@ -29,7 +29,20 @@ using CTRCode = enum : int {
     H_OUTPUT_TYPE_ERROR = 8,
     H_SCENE_ERROR = 9,
     H_MEMORY_ALLOC_ERROR = 10,
-    H_UNIQUE_UNINITIALIZED_ERROR = 11
+    H_UNIQUE_UNINITIALIZED_ERROR = 11,
+    H_TABLE_NOT_EXIST = 12,
+    H_LOAD_ERROR = 13,
+    H_INITIALIZER_INVALID = 14,
+    H_EXT_EMBEDDING_SIZE_INVALID = 15,
+    H_MAX_CACHESIZE_TOO_SMALL = 16,
+    H_HOST_VOCAB_SIZE_TOO_SMALL = 17,
+    H_THREAD_NUM_ERROR = 18,
+    H_TABLE_CREATE_DUPLICATE = 19,
+    H_ARG_NOT_EMPTY = 20,
+    H_SIZE_ZERO = 21,
+    H_TABLE_NAME_EMPTY = 22,
+    H_PREFILL_BUFFER_SIZE_INVALID = 23,
+    H_TABLE_NAME_TOO_LONG = 24,
 };
 }
 }
diff --git a/src/AccCTR/src/common/util/external_threader.h b/src/AccCTR/src/common/util/external_threader.h
index 5a1132af..5f7c500f 100644
--- a/src/AccCTR/src/common/util/external_threader.h
+++ b/src/AccCTR/src/common/util/external_threader.h
@@ -20,11 +20,81 @@ limitations under the License.
 #include <sstream>
 #include <vector>
 #include <future>
+#include <queue>
+#include <thread>
+#include <condition_variable>
+#include <functional>
 #include "singleton.h"
 
 using ExternalThread = void (*)(const std::vector<std::function<void()>> &tasks);
 
 namespace ock {
+class ThreadPoolAsync {
+public:
+    ThreadPoolAsync() : stop(false) {}
+
+    ~ThreadPoolAsync()
+    {
+        {
+            std::lock_guard<std::mutex> lock(taskMutex);
+            stop = true;
+        }
+        taskCv.notify_all();
+        for (auto &t : workerThreads) {
+            t.join();
+        }
+    }
+
+    void SetNumThreads(int n)
+    {
+        if (n < 1) {
+            return;
+        }
+
+        for (int i = 0; i < n; ++i) {
+            workerThreads.emplace_back(std::bind(&ThreadPoolAsync::WorkerThread, this));
+        }
+    }
+
+    template <typename F> std::future<int> AddTask(F &&f)
+    {
+        std::lock_guard<std::mutex> lock(taskMutex);
+
+        auto pt = std::make_unique<std::packaged_task<int()>>(std::forward<F>(f));
+        auto fut = pt->get_future();
+        tasks.emplace(std::move(pt));
+        taskCv.notify_one();
+        return fut;
+    }
+
+private:
+    std::vector<std::thread> workerThreads;
+    std::queue<std::unique_ptr<std::packaged_task<int()>>> tasks;
+    std::mutex taskMutex;
+    std::condition_variable taskCv;
+    std::atomic<bool> stop = false;
+
+    void WorkerThread()
+    {
+        while (true) {
+            std::unique_ptr<std::packaged_task<int()>> task;
+            {
+                std::unique_lock<std::mutex> lock(taskMutex);
+                while (tasks.empty() && !stop) {
+                    taskCv.wait(lock);
+                }
+                if (stop) {
+                    break;
+                }
+                task = std::move(tasks.front());
+                tasks.pop();
+            }
+            (*task)();
+        }
+    }
+};
+
+
 class SimpleThreadPool {
 public:
     static void SyncRun(const std::vector<std::function<void()>> &tasks)
diff --git a/src/AccCTR/src/embedding_cache/CMakeLists.txt b/src/AccCTR/src/embedding_cache/CMakeLists.txt
new file mode 100644
index 00000000..e0278a6e
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+file(GLOB_RECURSE SRCS *.cpp *.h)
+
+add_library(embedding_cache OBJECT ${SRCS})
+
+target_link_libraries(embedding_cache
+        -Wl,--start-group
+        -Wl,--end-group
+        )
+
+target_include_directories(embedding_cache
+        PUBLIC
+        ${PROJECT_SOURCE_DIR}/src/common/util
+        ${PROJECT_SOURCE_DIR}/src/include)
\ No newline at end of file
diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
new file mode 100644
index 00000000..3620c5d0
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
@@ -0,0 +1,421 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#include "cache_manager.h"
+
+#include <unordered_set>
+
+#include "external_logger.h"
+
+using namespace EmbCache;
+using namespace ock;
+using namespace ock::ctr;
+
+int64_t EmbCache::INVALID_KEY = -1;
+
+int EmbCacheManagerImpl::CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
+                                             const std::vector<InitializerInfo>& initializerInfos, int64_t invalidKey,
+                                             uint64_t prefillBufferSize, uint32_t refillThreadNum)
+{
+    int checkTableNameRet = CheckCreateTableName(embCacheInfo.tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+
+    if (embCacheInfo.extEmbeddingSize == 0 || embCacheInfo.embeddingSize == 0 || embCacheInfo.vocabSize == 0 ||
+        embCacheInfo.maxCacheSize == 0) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "size must be positive");
+        return H_SIZE_ZERO;
+    }
+
+    if (embCacheInfo.vocabSize < embCacheInfo.maxCacheSize) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "vocabSize must be greater than or equal to maxCacheSize");
+        return H_HOST_VOCAB_SIZE_TOO_SMALL;
+    }
+
+    auto om = offsetMappers.find(embCacheInfo.tableName);
+    auto embTable = embTables.find(embCacheInfo.tableName);
+    if (om != offsetMappers.end() || embTable != embTables.end()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "This table has already been created");
+        return H_TABLE_CREATE_DUPLICATE;
+    }
+
+    if (embCacheInfo.extEmbeddingSize % embCacheInfo.embeddingSize != 0) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "extEmbeddingSize = embeddingSize + optimizerSize, "
+                                                  "which is divisible by embeddingSize");
+        return H_EXT_EMBEDDING_SIZE_INVALID;
+    }
+
+    if (!CheckInitializer(embCacheInfo.extEmbeddingSize, initializerInfos)) {
+        return H_INITIALIZER_INVALID;
+    }
+
+    if ((prefillBufferSize < 1) || (prefillBufferSize > embCacheInfo.vocabSize)) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "prefillBufferSize has to be between [1, hostVocabSize]");
+        return H_PREFILL_BUFFER_SIZE_INVALID;
+    }
+
+    if (!CheckValidThreadNum(refillThreadNum)) {
+        return H_THREAD_NUM_ERROR;
+    }
+
+    uint32_t reserve = embCacheInfo.vocabSize / VOCAB_CACHE_RATIO;
+    if (!offsetMappers[embCacheInfo.tableName].Initialize(reserve, embCacheInfo.maxCacheSize)) {
+        offsetMappers[embCacheInfo.tableName].UnInitialize();
+        offsetMappers.erase(embCacheInfo.tableName);
+        return H_MEMORY_ALLOC_ERROR;
+    }
+
+    EmbPoolParam embPoolParam{prefillBufferSize, refillThreadNum};
+
+    if (!embTables[embCacheInfo.tableName].Initialize(embCacheInfo, reserve, initializerInfos, embPoolParam)) {
+        offsetMappers.erase(embCacheInfo.tableName);
+        embTables.erase(embCacheInfo.tableName);
+        return H_MEMORY_ALLOC_ERROR;
+    }
+
+    embCacheInfos.insert({embCacheInfo.tableName, embCacheInfo});
+    INVALID_KEY = invalidKey;
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::GetSwapPairsAndKey2Offset(const std::string& tableName, std::vector<uint64_t>& keys,
+                                                   KeyOffsetPair& swapInKoPair, KeyOffsetPair& swapOutKoPair)
+{
+    int checkRet = CheckGetSwapPairsAndKey2Offset(tableName, swapInKoPair, swapOutKoPair);
+    if (checkRet != H_OK) {
+        return checkRet;
+    }
+    return offsetMappers[tableName].GetSwapPairsAndKey2Offset(keys, swapInKoPair, swapOutKoPair);
+}
+
+int EmbCacheManagerImpl::EmbeddingLookup(const std::string& tableName, const std::vector<uint64_t>& keys,
+                                         float* embAddr, uint32_t threadNum)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+
+    if (!CheckValidThreadNum(threadNum)) {
+        return H_THREAD_NUM_ERROR;
+    }
+
+    if (keys.empty()) {
+        return H_OK;
+    }
+
+    if (embAddr == nullptr) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "embAddr is nullptr");
+        return H_ADDRESS_NULL;
+    }
+
+    return embTables[tableName].Gather(reinterpret_cast<uint64_t>(embAddr), keys, threadNum);
+}
+
+int EmbCacheManagerImpl::EmbeddingLookupAddrs(const std::string& tableName, const std::vector<uint64_t>& keys,
+                                              std::vector<float*>& addrs, uint32_t threadNum)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+
+    if (!CheckValidThreadNum(threadNum)) {
+        return H_THREAD_NUM_ERROR;
+    }
+
+    if (keys.empty()) {
+        return H_OK;
+    }
+
+    return embTables[tableName].GatherAddrs(keys, addrs, threadNum);
+}
+
+// 如果多线程使用，严格保证传入的key线程间不会重复(unique key)，否则可能出现未定义结果
+int EmbCacheManagerImpl::EmbeddingLookupAndRemove(const std::string& tableName, const std::vector<uint64_t>& keys,
+                                                  float* embAddr, uint32_t threadNum)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+
+    if (!CheckValidThreadNum(threadNum)) {
+        return H_THREAD_NUM_ERROR;
+    }
+
+    if (keys.empty()) {
+        return H_OK;
+    }
+
+    if (embAddr == nullptr) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "embAddr is nullptr");
+        return H_ADDRESS_NULL;
+    }
+
+    return embTables[tableName].GatherAndRemove(reinterpret_cast<uint64_t>(embAddr), keys, threadNum);
+}
+
+int EmbCacheManagerImpl::EmbeddingUpdate(const std::string& tableName, const std::vector<uint64_t>& keys,
+                                         float* embAddr, uint32_t threadNum)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+
+    if (!CheckValidThreadNum(threadNum)) {  // 检查thread是否小于核数
+        return H_THREAD_NUM_ERROR;
+    }
+
+    if (keys.empty()) {
+        return H_OK;
+    }
+
+    if (embAddr == nullptr) {  // 检查embAddr是不是空指针
+        ExternalLogger::PrintLog(LogLevel::ERROR, "embAddr is nullptr");
+        return H_ADDRESS_NULL;
+    }
+
+    return embTables[tableName].Scatter(reinterpret_cast<uint64_t>(embAddr), keys, threadNum);
+}
+
+int EmbCacheManagerImpl::EmbeddingRemove(const std::string& tableName, const std::vector<uint64_t>& keys,
+                                         uint32_t threadNum)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+
+    if (!CheckValidThreadNum(threadNum)) {  // 检查thread是否小于核数
+        return H_THREAD_NUM_ERROR;
+    }
+
+    if (keys.empty()) {
+        return H_OK;
+    }
+
+    return embTables[tableName].RemoveByKeys(keys, threadNum);
+}
+
+int EmbCacheManagerImpl::RemoveEmbsByKeys(const std::string& tableName, const std::vector<uint64_t>& keys)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+    const auto& om = offsetMappers.find(tableName);
+    const auto& embTable = embTables.find(tableName);
+    for (auto key : keys) {
+        if (key == static_cast<uint64_t>(INVALID_KEY)) {
+            ExternalLogger::PrintLog(LogLevel::WARN, "Try to evict invalid key");
+            continue;
+        }
+        om->second.Remove(key);
+        embTable->second.Remove(key);
+    }
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::GetEmbTableNames(std::vector<std::string>& allTableNames)
+{
+    if (!allTableNames.empty()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "allTableNames should be empty");
+        return H_ARG_NOT_EMPTY;
+    }
+    allTableNames.reserve(embTables.size());
+    for (auto& embTable : embTables) {
+        allTableNames.emplace_back(embTable.first);
+    }
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::ExportDeviceKeyOffsetPairs(const std::string& tableName,
+                                                    std::vector<std::pair<uint64_t, uint64_t>>& koVec)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+    OffsetMapper& om = offsetMappers[tableName];
+    koVec = om.ExportSortedKVPairs();
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::Serialize(const std::string& tableName, std::vector<char>& buffer)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+    buffer = embTables[tableName].Serialize();
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::Deserialize(const std::string& tableName, const std::vector<char>& buffer)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+    if (!embTables[tableName].Deserialize(buffer)) {
+        return H_LOAD_ERROR;
+    }
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::GetEmbTableInfos(std::string tableName, std::vector<uint64_t>& keys,
+                                          std::vector<std::vector<float>>& embeddings,
+                                          std::vector<std::vector<float>>& optimizerSlots)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+    if (!keys.empty()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "keys should be empty");
+        return H_ARG_NOT_EMPTY;
+    }
+    if (!embeddings.empty()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "embeddings should be empty");
+        return H_ARG_NOT_EMPTY;
+    }
+    if (!optimizerSlots.empty()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "optimizerSlots should be empty");
+        return H_ARG_NOT_EMPTY;
+    }
+    embTables[tableName].GetEmbTableInfos(keys, embeddings, optimizerSlots);
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::LoadEmbTableInfos(std::string tableName, const std::vector<uint64_t>& keys,
+                                           const std::vector<std::vector<float>>& embeddings,
+                                           const std::vector<std::vector<float>>& optimizerSlots)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+    if (!embTables[tableName].LoadEmbTableInfos(keys, embeddings, optimizerSlots)) {
+        return H_LOAD_ERROR;
+    }
+    return H_OK;
+}
+
+void EmbCacheManagerImpl::Destroy()
+{
+    for (auto it = offsetMappers.begin(); it != offsetMappers.end(); it++) {
+        it->second.UnInitialize();
+    }
+    for (auto it = embTables.begin(); it != embTables.end(); it++) {
+        it->second.UnInitialize();
+    }
+    embCacheInfos.clear();
+    offsetMappers.clear();
+    embTables.clear();
+}
+
+int EmbCacheManagerImpl::CheckValidTableName(const std::string& tableName)
+{
+    if (tableName.size() > TABLE_NAME_MAX_SIZE) {
+        ExternalLogger::PrintLog(LogLevel::ERROR,
+                                 "tableName size can not larger than " + std::to_string(TABLE_NAME_MAX_SIZE));
+        return H_TABLE_NAME_TOO_LONG;
+    }
+    auto om = offsetMappers.find(tableName);
+    auto embTable = embTables.find(tableName);
+    if (om == offsetMappers.end() || embTable == embTables.end()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "can not find table");
+        return H_TABLE_NOT_EXIST;
+    }
+    return H_OK;
+}
+
+bool EmbCacheManagerImpl::CheckInitializer(uint32_t extEmbSize, std::vector<InitializerInfo> initializerInfos)
+{
+    std::sort(initializerInfos.begin(), initializerInfos.end(),
+              [](const auto& u, const auto& v) { return u.start < v.start; });
+    uint32_t cur_pos = 0;
+    for (const auto& info : initializerInfos) {
+        if (info.initializer == nullptr) {
+            ExternalLogger::PrintLog(LogLevel::ERROR, "initializer is nullptr");
+            return false;
+        }
+        if (info.start != cur_pos) {
+            ExternalLogger::PrintLog(LogLevel::ERROR, "Initializers got coverage problems");
+            return false;
+        }
+        cur_pos += info.len;
+    }
+    // 最后判断
+    if (cur_pos != extEmbSize) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "Initializers got coverage problems");
+        return false;
+    }
+    return true;
+}
+
+bool EmbCacheManagerImpl::CheckValidThreadNum(uint32_t threadNum)
+{
+    uint32_t processCoreNum = std::thread::hardware_concurrency();
+    if (threadNum > processCoreNum) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "ThreadNum can not larger than cpu core num");
+        return false;
+    }
+
+    if (threadNum == 0) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "ThreadNum can not be zero");
+        return false;
+    }
+    return true;
+}
+
+int EmbCacheManagerImpl::CheckGetSwapPairsAndKey2Offset(const std::string& tableName, const KeyOffsetPair& swapInKoPair,
+                                                        const KeyOffsetPair& swapOutKoPair)
+{
+    if (!swapInKoPair.first.empty() || !swapInKoPair.second.empty() || !swapOutKoPair.first.empty() ||
+        !swapOutKoPair.second.empty()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "koPair should be empty");
+        return H_ARG_NOT_EMPTY;
+    }
+
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::CheckCreateTableName(const std::string& tableName)
+{
+    if (tableName.empty()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "tableName can not be empty");
+        return H_TABLE_NAME_EMPTY;
+    }
+
+    if (tableName.size() > TABLE_NAME_MAX_SIZE) {
+        ExternalLogger::PrintLog(LogLevel::ERROR,
+                                 "tableName size can not larger than " + std::to_string(TABLE_NAME_MAX_SIZE));
+        return H_TABLE_NAME_TOO_LONG;
+    }
+    return H_OK;
+}
+
+uint32_t EmbCacheManagerImpl::GetUsage(const std::string& tableName)
+{
+    return embTables[tableName].GetUsage();
+}
diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
new file mode 100644
index 00000000..80fbcd46
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
@@ -0,0 +1,95 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef EMBEDDING_CACHE_MANAGER_H
+#define EMBEDDING_CACHE_MANAGER_H
+
+#include <cstring>
+#include <map>
+#include <set>
+#include <utility>
+
+#include "embedding_cache.h"
+#include "embedding_local_table/emb_local_table.h"
+#include "error_code.h"
+#include "offset_mapper/offset_mapper.h"
+
+namespace EmbCache {
+class EmbCacheManagerImpl : public EmbCacheManager {
+public:
+    EmbCacheManagerImpl() = default;
+
+    ~EmbCacheManagerImpl() override = default;
+
+    int CreateCacheForTable(const EmbCacheInfo& embCacheInfo, const std::vector<InitializerInfo>& initializerInfos,
+                            int64_t invalidKey, uint64_t prefillBufferSize, uint32_t refillThreadNum) override;
+
+    int GetSwapPairsAndKey2Offset(const std::string& tableName, std::vector<uint64_t>& keys,
+                                  KeyOffsetPair& swapInKoPair, KeyOffsetPair& swapOutKoPair) override;
+
+    int EmbeddingLookup(const std::string& tableName, const std::vector<uint64_t>& keys, float* embAddr,
+                        uint32_t threadNum) override;
+
+    int EmbeddingLookupAddrs(const std::string& tableName, const std::vector<uint64_t>& keys,
+                             std::vector<float*>& addrs, uint32_t threadNum) override;
+
+    int EmbeddingUpdate(const std::string& tableName, const std::vector<uint64_t>& keys, float* embAddr,
+                        uint32_t threadNum) override;
+
+    int EmbeddingRemove(const std::string& tableName, const std::vector<uint64_t>& keys, uint32_t threadNum) override;
+
+    int EmbeddingLookupAndRemove(const std::string& tableName, const std::vector<uint64_t>& keys, float* embAddr,
+                                 uint32_t threadNum) override;
+
+    int RemoveEmbsByKeys(const std::string& tableName, const std::vector<uint64_t>& keys) override;
+
+    int GetEmbTableNames(std::vector<std::string>& allTableNames) override;
+
+    int ExportDeviceKeyOffsetPairs(const std::string& tableName,
+                                   std::vector<std::pair<uint64_t, uint64_t>>& koVec) override;
+
+    int Serialize(const std::string& tableName, std::vector<char>& buffer) override;
+
+    int Deserialize(const std::string& tableName, const std::vector<char>& buffer) override;
+
+    void Destroy() override;
+
+    int GetEmbTableInfos(std::string tableName, std::vector<uint64_t>& keys,
+                         std::vector<std::vector<float>>& embeddings,
+                         std::vector<std::vector<float>>& optimizerSlots) override;
+
+    int LoadEmbTableInfos(std::string tableName, const std::vector<uint64_t>& keys,
+                          const std::vector<std::vector<float>>& embeddings,
+                          const std::vector<std::vector<float>>& optimizerSlots) override;
+
+    uint32_t GetUsage(const std::string& tableName) override;
+
+private:
+    std::map<std::string, EmbCacheInfo> embCacheInfos;
+    std::map<std::string, OffsetMapper> offsetMappers;
+    std::map<std::string, EmbLocalTable> embTables;
+
+    int CheckValidTableName(const std::string& tableName);
+
+    bool CheckInitializer(uint32_t extEmbSize, std::vector<InitializerInfo> initializerInfos);
+
+    bool CheckValidThreadNum(uint32_t threadNum);
+
+    int CheckGetSwapPairsAndKey2Offset(const std::string& tableName, const KeyOffsetPair& swapInKoPair,
+                                       const KeyOffsetPair& swapOutKoPair);
+
+    int CheckCreateTableName(const std::string& tableName);
+};
+}  // namespace EmbCache
+#endif  // EMBEDDING_CACHE_MANAGER_H
diff --git a/src/AccCTR/src/embedding_cache/common.h b/src/AccCTR/src/embedding_cache/common.h
new file mode 100644
index 00000000..72433332
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/common.h
@@ -0,0 +1,65 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef MXREC_COMMON_H
+#define MXREC_COMMON_H
+
+#include "limited_set.h"
+
+#ifndef HM_UNLIKELY
+#define HM_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#endif
+
+#ifndef HM_LIKELY
+#define HM_LIKELY(x) __builtin_expect(!!(x), 1)
+#endif
+
+namespace EmbCache {
+
+
+enum class FkvState {
+    FKV_EXIST,
+    FKV_NOT_EXIST,
+    FKV_KEY_CONFLICT,
+    FKV_BEFORE_PUT_FUNC_FAIL,
+    FKV_BEFORE_REMOVE_FUNC_FAIL,
+    FKV_NO_SPACE,
+    FKV_FAIL,
+};
+
+enum class BeforePutFuncState {
+    BEFORE_SUCCESS,
+    BEFORE_NO_SPACE,
+    BEFORE_FAIL,
+};
+
+enum class BeforeRemoveFuncState {
+    BEFORE_SUCCESS,
+    BEFORE_FAIL,
+};
+
+extern int64_t INVALID_KEY;
+constexpr uint64_t TABLE_NAME_MAX_SIZE = 1024;
+const uint32_t VOCAB_CACHE_RATIO = 15;
+constexpr float NORMAL_MEAN_MAX = 1e9;
+constexpr float NORMAL_MEAN_MIN = -1e9;
+constexpr float NORMAL_STDDEV_MAX = 100;
+constexpr float NORMAL_STDDEV_MIN = 0;
+constexpr float CONSTANT_VALUE_MAX = 1e9;
+constexpr float CONSTANT_VALUE_MIN = -1e9;
+constexpr float INIT_K_MAX = 10000;
+constexpr float INIT_K_MIN = -10000;
+const int INVALID_EMB_SIZE = -1;
+}
+#endif // MXREC_COMMON_H
diff --git a/src/AccCTR/src/embedding_cache/embedding_local_table/emb_local_table.cpp b/src/AccCTR/src/embedding_cache/embedding_local_table/emb_local_table.cpp
new file mode 100644
index 00000000..dc59a303
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/embedding_local_table/emb_local_table.cpp
@@ -0,0 +1,475 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#include "emb_local_table.h"
+
+#include <algorithm>
+#include <thread>
+
+#include "error_code.h"
+#include "securec.h"
+
+using namespace std;
+using namespace EmbCache;
+using namespace ock;
+using namespace ock::ctr;
+
+bool EmbLocalTable::Initialize(const EmbCacheInfo& embCacheInfo, uint64_t reserve,
+                               const std::vector<InitializerInfo>& initializerInfos, const EmbPoolParam& embPoolParam)
+{
+    emExpendMemInfo = make_shared<AutoRefillEmbeddingMemoryPool>(embPoolParam.prefillBufferSize, initializerInfos,
+                                                                 embCacheInfo.extEmbeddingSize, embCacheInfo.vocabSize,
+                                                                 embPoolParam.refillThreadNum);
+    embeddingSize = embCacheInfo.embeddingSize;
+    extEmbeddingSize = embCacheInfo.extEmbeddingSize;
+    return embMap.Initialize(reserve, embCacheInfo.vocabSize, emExpendMemInfo);
+}
+
+void EmbLocalTable::UnInitialize()
+{
+    embMap.UnInitialize();
+}
+
+int EmbLocalTable::FindAndPutIfNotFound(uint64_t key, uint64_t& value)
+{
+    FkvState ret = embMap.FindAndPutIfNotFound(key, value);
+    if (ret == FkvState::FKV_FAIL) {
+        return H_ERROR;
+    }
+    if (ret == FkvState::FKV_BEFORE_PUT_FUNC_FAIL) {
+        return H_MEMORY_ALLOC_ERROR;
+    }
+    if (ret == FkvState::FKV_NO_SPACE) {
+        return H_HOST_VOCAB_SIZE_TOO_SMALL;
+    }
+    return H_OK;
+}
+
+bool EmbLocalTable::Remove(uint64_t key)
+{
+    return embMap.Remove(key) != FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL;
+}
+
+int EmbLocalTable::RemoveByKeys(const std::vector<uint64_t>& keys, uint32_t threadNum)
+{
+    if (threadNum == 1) {
+        for (uint64_t key : keys) {
+            if (!Remove(key)) {
+                return H_ERROR;
+            }
+        }
+        return H_OK;
+    }
+    // 每个线程处理[start[threadId],start[threadId+1])这个区间的key
+    uint32_t m = keys.size() % threadNum;
+    vector<uint64_t> start(threadNum + 1);
+    // 前keys.size()%threadNum个线程向上取整
+    for (uint32_t threadId = 0; threadId < m; threadId++) {
+        start[threadId] = ((keys.size() + threadNum - 1) / threadNum) * threadId;
+    }
+    // 后面的向下取整
+    for (uint32_t threadId = m; threadId <= threadNum; threadId++) {
+        start[threadId] = (keys.size() / threadNum) * threadId + m;
+    }
+
+    vector<future<ock::ctr::CTRCode>> threads(threadNum);
+    for (uint32_t threadId = 0; threadId < threadNum; threadId++) {
+        threads[threadId] = std::async(std::launch::async, [&, threadId]() {
+            for (uint64_t i = start[threadId]; i < start[threadId + 1]; i++) {
+                if (!Remove(keys[i])) {
+                    return H_ERROR;
+                }
+            }
+            return H_OK;
+        });
+    }
+    for (auto& t : threads) {
+        auto res = t.get();
+        if (res != H_OK) {
+            return res;
+        }
+    }
+    return H_OK;
+}
+
+int EmbLocalTable::OneThreadHandle(uint64_t startAddr, const std::vector<uint64_t>& keys, bool isGather)
+{
+    for (uint64_t i = 0; i < keys.size(); i++) {
+        uint64_t embAddr;
+        int ret = FindAndPutIfNotFound(keys[i], embAddr);
+        if (ret != H_OK) {
+            return ret;
+        }
+        uint64_t memSize = emExpendMemInfo->extEmbeddingSize * sizeof(float);
+        auto addr = startAddr + i * memSize;
+        if (isGather) {
+            auto rc = memcpy_s(reinterpret_cast<void*>(addr), memSize, reinterpret_cast<void*>(embAddr), memSize);
+            if (rc != 0) {
+                ExternalLogger::PrintLog(LogLevel::ERROR,
+                                         "gather memcpy_s failed... dstSize: " + std::to_string(memSize));
+                return H_COPY_ERROR;
+            }
+        } else {
+            auto rc = memcpy_s(reinterpret_cast<void*>(embAddr), memSize,  // 按顺序把新的embedding拷贝到对应地址中
+                               reinterpret_cast<void*>(addr), memSize);
+            if (rc != 0) {
+                ExternalLogger::PrintLog(LogLevel::ERROR,
+                                         "scatter memcpy_s failed... dstSize: " + std::to_string(memSize));
+                return H_COPY_ERROR;
+            }
+        }
+    }
+
+    return H_OK;
+}
+
+int EmbLocalTable::Gather(uint64_t startAddr, const vector<uint64_t>& keys, uint32_t threadNum)
+{
+    if (threadNum == 1) {
+        return OneThreadHandle(startAddr, keys, true);
+    }
+
+    // 每个线程处理[start[threadId],start[threadId+1])这个区间的key
+    uint32_t m = keys.size() % threadNum;
+    vector<uint64_t> start(threadNum + 1);
+    // 前keys.size()%threadNum个线程向上取整
+    for (uint32_t threadId = 0; threadId < m; threadId++) {
+        start[threadId] = ((keys.size() + threadNum - 1) / threadNum) * threadId;
+    }
+    // 后面的向下取整
+    for (uint32_t threadId = m; threadId <= threadNum; threadId++) {
+        start[threadId] = (keys.size() / threadNum) * threadId + m;
+    }
+
+    vector<thread> threads(threadNum);
+    int ret = H_OK;
+    for (uint32_t threadId = 0; threadId < threadNum; threadId++) {
+        threads[threadId] = thread([&, threadId] {
+            for (uint64_t i = start[threadId]; i < start[threadId + 1]; i++) {
+                uint64_t embAddr;
+                int temp_ret = FindAndPutIfNotFound(keys[i], embAddr);
+                if (temp_ret != H_OK) {
+                    ret = temp_ret;
+                    return;
+                }
+                uint64_t memSize = emExpendMemInfo->extEmbeddingSize * sizeof(float);
+                auto addr = startAddr + i * memSize;
+                auto rc = memcpy_s(reinterpret_cast<void*>(addr), memSize, reinterpret_cast<void*>(embAddr), memSize);
+                if (rc != 0) {
+                    ExternalLogger::PrintLog(LogLevel::ERROR, "memcpy_s failed... dstSize: " + std::to_string(memSize));
+                    ret = H_COPY_ERROR;
+                    return;
+                }
+            }
+        });
+    }
+    for (auto& t : threads) {
+        t.join();
+    }
+    return ret;
+}
+
+int EmbLocalTable::GatherAddrs(const std::vector<uint64_t>& keys, std::vector<float*>& addrs, uint32_t threadNum)
+{
+    if (threadNum == 1) {
+        addrs.resize(keys.size());
+        for (uint64_t i = 0; i < keys.size(); i++) {
+            int temp_ret = FindAndPutIfNotFound(keys[i], reinterpret_cast<uint64_t&>(addrs[i]));
+            if (temp_ret != H_OK) {
+                return temp_ret;
+            }
+        }
+        return H_OK;
+    }
+    // 每个线程处理[start[threadId],start[threadId+1])这个区间的key
+    uint32_t m = keys.size() % threadNum;
+    vector<uint64_t> start(threadNum + 1);
+    // 前keys.size()%threadNum个线程向上取整
+    for (uint32_t threadId = 0; threadId < m; threadId++) {
+        start[threadId] = ((keys.size() + threadNum - 1) / threadNum) * threadId;
+    }
+    // 后面的向下取整
+    for (uint32_t threadId = m; threadId <= threadNum; threadId++) {
+        start[threadId] = (keys.size() / threadNum) * threadId + m;
+    }
+    addrs.resize(keys.size());
+
+    vector<thread> threads(threadNum);
+    int ret = H_OK;
+    for (uint32_t threadId = 0; threadId < threadNum; threadId++) {
+        threads[threadId] = thread([&, threadId] {
+            for (uint64_t i = start[threadId]; i < start[threadId + 1]; i++) {
+                int temp_ret = FindAndPutIfNotFound(keys[i], reinterpret_cast<uint64_t&>(addrs[i]));
+                if (temp_ret != H_OK) {
+                    ret = temp_ret;
+                    return;
+                }
+            }
+        });
+    }
+    for (auto& t : threads) {
+        t.join();
+    }
+    return ret;
+}
+
+// 如果多线程使用，严格保证传入的key线程间不会重复(unique key)，否则可能出现未定义结果
+int EmbLocalTable::GatherAndRemove(uint64_t startAddr, const vector<uint64_t>& keys, uint32_t threadNum)
+{
+    if (threadNum == 1) {
+        for (uint64_t i = 0; i < keys.size(); i++) {
+            uint64_t memSize = emExpendMemInfo->extEmbeddingSize * sizeof(float);
+            auto addr = startAddr + i * memSize;
+            auto ret = embMap.FindAndRemoveIfFound(keys[i], addr);  // 如果找到了就拷贝出来然后把key删了
+            if (ret == FkvState::FKV_NOT_EXIST) {  // 没找到key，给一个新的初始化值并且不需要存入key
+                auto* embAddr = reinterpret_cast<float*>(addr);
+                for (const auto& initializerInfo : emExpendMemInfo->initializerInfos) {
+                    initializerInfo.initializer->GenerateData(embAddr, INVALID_EMB_SIZE);
+                }
+            } else if (ret == FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL) {
+                ExternalLogger::PrintLog(LogLevel::ERROR, "memcpy_s failed... dstSize: " + std::to_string(memSize));
+                return H_COPY_ERROR;
+            }
+        }
+        return H_OK;
+    }
+
+    // 每个线程处理[start[threadId],start[threadId+1])这个区间的key
+    uint32_t m = keys.size() % threadNum;
+    vector<uint64_t> start(threadNum + 1);
+    // 前keys.size()%threadNum个线程向上取整
+    for (uint32_t threadId = 0; threadId < m; threadId++) {
+        start[threadId] = ((keys.size() + threadNum - 1) / threadNum) * threadId;
+    }
+    // 后面的向下取整
+    for (uint32_t threadId = m; threadId <= threadNum; threadId++) {
+        start[threadId] = (keys.size() / threadNum) * threadId + m;
+    }
+
+    vector<thread> threads(threadNum);
+    int retVal = H_OK;
+    for (uint32_t threadId = 0; threadId < threadNum; threadId++) {
+        threads[threadId] = thread([&, threadId] {
+            for (uint64_t i = start[threadId]; i < start[threadId + 1]; i++) {
+                uint64_t memSize = emExpendMemInfo->extEmbeddingSize * sizeof(float);
+                auto addr = startAddr + i * memSize;
+                auto ret = embMap.FindAndRemoveIfFound(keys[i], addr);  // 如果找到了就拷贝出来然后把key删了
+                if (ret == FkvState::FKV_NOT_EXIST) {  // 没找到key，给一个新的初始化值并且不需要存入key
+                    auto* embAddr = reinterpret_cast<float*>(addr);
+                    for (const auto& initializerInfo : emExpendMemInfo->initializerInfos) {
+                        initializerInfo.initializer->GenerateData(embAddr, INVALID_EMB_SIZE);
+                    }
+                } else if (ret == FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL) {
+                    ExternalLogger::PrintLog(LogLevel::ERROR, "memcpy_s failed... dstSize: " + std::to_string(memSize));
+                    retVal = H_COPY_ERROR;
+                    return;
+                }
+            }
+        });
+    }
+    for (auto& t : threads) {
+        t.join();
+    }
+    return retVal;
+}
+
+int EmbLocalTable::Scatter(const uint64_t startAddr, const vector<uint64_t>& keys, uint32_t threadNum)
+{
+    if (threadNum == 1) {  // 单线程版本
+        return OneThreadHandle(startAddr, keys, false);
+    }
+
+    // 多线程版本
+    // 每个线程处理[start[threadId],start[threadId+1])这个区间的key
+    uint32_t m = keys.size() % threadNum;
+    vector<uint64_t> start(threadNum + 1);
+    // 前keys.size()%threadNum个线程向上取整
+    for (uint32_t threadId = 0; threadId < m; threadId++) {
+        start[threadId] = ((keys.size() + threadNum - 1) / threadNum) * threadId;
+    }
+    // 后面的向下取整
+    for (uint32_t threadId = m; threadId <= threadNum; threadId++) {
+        start[threadId] = (keys.size() / threadNum) * threadId + m;
+    }
+
+    vector<thread> threads(threadNum);
+    int ret = H_OK;
+    for (uint32_t threadId = 0; threadId < threadNum; threadId++) {
+        threads[threadId] = thread([&, threadId] {
+            for (uint64_t i = start[threadId]; i < start[threadId + 1]; i++) {
+                uint64_t embAddr;
+                int temp_ret = FindAndPutIfNotFound(keys[i], embAddr);  // 获取每个key的embedding对应首地址
+                if (temp_ret != H_OK) {
+                    ret = temp_ret;
+                    return;
+                }
+                uint64_t memSize = emExpendMemInfo->extEmbeddingSize * sizeof(float);
+                auto addr = startAddr + i * memSize;
+                auto rc = memcpy_s(reinterpret_cast<void*>(embAddr), memSize,  // 按顺序把新的embedding拷贝到对应地址中
+                                   reinterpret_cast<void*>(addr), memSize);
+                if (rc != 0) {
+                    ExternalLogger::PrintLog(LogLevel::ERROR, "memcpy_s failed... dstSize: " + std::to_string(memSize));
+                    ret = H_COPY_ERROR;
+                    return;
+                }
+            }
+        });
+    }
+    for (auto& t : threads) {
+        t.join();
+    }
+    return ret;
+}
+
+// 导出存储的所有kv对
+vector<pair<uint64_t, uint64_t>> EmbLocalTable::ExportVec()
+{
+    return embMap.ExportVec();
+}
+
+template <class T>
+void EmbLocalTable::insertData(vector<char>& buffer, T& data)
+{
+    buffer.insert(buffer.end(), (char*)&data, (char*)&data + sizeof(data));
+}
+
+template <class T>
+bool EmbLocalTable::getData(const vector<char>& buffer, T& data, uint64_t& i)
+{
+    if (i + sizeof(T) > buffer.size()) {
+        return false;
+    }
+    data = *reinterpret_cast<const T*>(&buffer[i]);
+    i += sizeof(T);
+    return true;
+}
+
+// 把所存储的key-embedding信息序列化
+vector<char> EmbLocalTable::Serialize()
+{
+    vector<char> buffer;
+    vector<pair<uint64_t, uint64_t>> kvVec = ExportVec();
+
+    for (auto& p : kvVec) {
+        uint64_t key = p.first;
+        uint64_t value = p.second;
+        insertData(buffer, key);
+        auto* addr = reinterpret_cast<float*>(value);
+        buffer.insert(buffer.end(), reinterpret_cast<char*>(addr),
+                      reinterpret_cast<char*>((addr + emExpendMemInfo->extEmbeddingSize)));
+    }
+    return buffer;
+}
+
+// 反序列化key-embedding，存进map
+bool EmbLocalTable::Deserialize(const vector<char>& buffer)
+{
+    uint64_t i = 0;
+    while (i < buffer.size()) {
+        uint64_t key;
+        if (!getData(buffer, key, i)) {
+            ExternalLogger::PrintLog(LogLevel::ERROR, "get data failed!");
+            return false;
+        }
+        uint64_t value = 0;
+        if (FindAndPutIfNotFound(key, value) != H_OK) {
+            ExternalLogger::PrintLog(LogLevel::ERROR, "FindAndPutIfNotFound failed!");
+            return false;
+        }
+
+        auto* addr = reinterpret_cast<float*>(value);
+        for (uint32_t j = 0; j < emExpendMemInfo->extEmbeddingSize; j++) {
+            if (!getData(buffer, addr[j], i)) {
+                ExternalLogger::PrintLog(LogLevel::ERROR, "get data failed!");
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+uint32_t EmbLocalTable::GetUsage()
+{
+    return embMap.current_size;
+}
+
+void EmbLocalTable::GetEmbTableInfos(std::vector<uint64_t>& keys, std::vector<std::vector<float>>& embeddings,
+                                     std::vector<std::vector<float>>& optimizerSlots)
+{
+    vector<pair<uint64_t, uint64_t>> kvVec = ExportVec();
+
+    for (auto& p : kvVec) {
+        std::vector<float> curEmbedding;
+        keys.emplace_back(p.first);
+        auto* addr = reinterpret_cast<float*>(p.second);
+        curEmbedding.insert(curEmbedding.end(), addr, reinterpret_cast<float*>((addr + embeddingSize)));
+        embeddings.emplace_back(curEmbedding);
+        if (extEmbeddingSize > embeddingSize) {
+            std::vector<float> curOptimizerSlot;
+            curOptimizerSlot.insert(curOptimizerSlot.end(), reinterpret_cast<float*>(addr + embeddingSize),
+                                    reinterpret_cast<float*>((addr + extEmbeddingSize)));
+            optimizerSlots.emplace_back(curOptimizerSlot);
+        }
+    }
+}
+
+bool EmbLocalTable::LoadEmbTableInfos(const std::vector<uint64_t>& keys,
+                                      const std::vector<std::vector<float>>& embeddings,
+                                      const std::vector<std::vector<float>>& optimizerSlots)
+{
+    if (keys.size() != embeddings.size()) {
+        ExternalLogger::PrintLog(LogLevel::ERROR, "the size of keys and embeddings should be same!");
+        return false;
+    }
+    uint32_t optimizerSlotSize = extEmbeddingSize - embeddingSize;
+    if (optimizerSlotSize > 0) {
+        if (keys.size() != optimizerSlots.size()) {
+            ExternalLogger::PrintLog(LogLevel::ERROR, "the size of keys and optimizerSlots should be same!");
+            return false;
+        }
+    }
+    for (uint64_t i = 0; i < keys.size(); i++) {
+        uint64_t value = 0;
+        if (FindAndPutIfNotFound(keys[i], value) != H_OK) {
+            ExternalLogger::PrintLog(LogLevel::ERROR, "FindAndPutIfNotFound failed!");
+            return false;
+        }
+        if (embeddings[i].size() != embeddingSize) {
+            ExternalLogger::PrintLog(LogLevel::ERROR,
+                                     "The size of entering Embedding does not equals to embeddingSize");
+            return false;
+        }
+        auto* addr = reinterpret_cast<float*>(value);
+        auto rc = memcpy_s(addr, embeddingSize * sizeof(float), embeddings[i].data(), embeddingSize * sizeof(float));
+        if (rc != 0) {
+            ExternalLogger::PrintLog(LogLevel::ERROR, "embedding memcpy_s failed... ");
+            return false;
+        }
+        if (optimizerSlotSize > 0) {
+            if (optimizerSlots[i].size() != optimizerSlotSize) {
+                ExternalLogger::PrintLog(
+                    LogLevel::ERROR,
+                    "The size of entering optimizerSlot does not equals to extEmbeddingSize - embeddingSize");
+                return false;
+            }
+            auto rc2 = memcpy_s(reinterpret_cast<float*>(addr + embeddingSize), optimizerSlotSize * sizeof(float),
+                                optimizerSlots[i].data(), optimizerSlotSize * sizeof(float));
+            if (rc2 != 0) {
+                ExternalLogger::PrintLog(LogLevel::ERROR, "optimizerSlot memcpy_s failed... ");
+                return false;
+            }
+        }
+    }
+    return true;
+}
\ No newline at end of file
diff --git a/src/AccCTR/src/embedding_cache/embedding_local_table/emb_local_table.h b/src/AccCTR/src/embedding_cache/embedding_local_table/emb_local_table.h
new file mode 100644
index 00000000..ee93bb91
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/embedding_local_table/emb_local_table.h
@@ -0,0 +1,84 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef EMB_LOCAL_TABLE_H
+#define EMB_LOCAL_TABLE_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "offset_mapper/address_mapper.h"
+
+namespace EmbCache {
+struct EmbPoolParam {
+    uint64_t prefillBufferSize;
+    uint32_t refillThreadNum;
+};
+
+class EmbLocalTable {
+public:
+    EmbLocalTable() = default;
+
+    ~EmbLocalTable() = default;
+
+    bool Initialize(const EmbCacheInfo& embCacheInfo, uint64_t reserve,
+                    const std::vector<InitializerInfo>& initializerInfos, const EmbPoolParam& embPoolParam);
+
+    void UnInitialize();
+
+    int FindAndPutIfNotFound(uint64_t key, uint64_t& value);
+
+    bool Remove(uint64_t key);
+
+    int RemoveByKeys(const std::vector<uint64_t>& keys, uint32_t threadNum);
+
+    int Gather(uint64_t startAddr, const std::vector<uint64_t>& keys, uint32_t threadNum);
+
+    int GatherAddrs(const std::vector<uint64_t>& keys, std::vector<float*>& addrs, uint32_t threadNum);
+
+    int Scatter(uint64_t startAddr, const std::vector<uint64_t>& keys, uint32_t threadNum);
+
+    int OneThreadHandle(uint64_t startAddr, const std::vector<uint64_t>& keys, bool isGather);
+
+    int GatherAndRemove(uint64_t startAddr, const std::vector<uint64_t>& keys, uint32_t threadNum);
+
+    std::vector<std::pair<uint64_t, uint64_t>> ExportVec();
+
+    std::vector<char> Serialize();
+
+    bool Deserialize(const std::vector<char>& buffer);
+
+    uint32_t GetUsage();
+
+    void GetEmbTableInfos(std::vector<uint64_t>& keys, std::vector<std::vector<float>>& embeddings,
+                          std::vector<std::vector<float>>& optimizerSlots);
+
+    bool LoadEmbTableInfos(const std::vector<uint64_t>& keys, const std::vector<std::vector<float>>& embeddings,
+                           const std::vector<std::vector<float>>& optimizerSlots);
+
+private:
+    std::shared_ptr<AutoRefillEmbeddingMemoryPool> emExpendMemInfo;
+    AddressMapper embMap;
+    uint32_t embeddingSize;
+    uint32_t extEmbeddingSize;
+
+    template <class T>
+    void insertData(std::vector<char>& buffer, T& data);
+
+    template <class T>
+    bool getData(const std::vector<char>& buffer, T& data, uint64_t& i);
+};
+}  // namespace EmbCache
+#endif  // EMB_LOCAL_TABLE_H
diff --git a/src/AccCTR/src/embedding_cache/initializer/constant_initializer/constant_initializer.cpp b/src/AccCTR/src/embedding_cache/initializer/constant_initializer/constant_initializer.cpp
new file mode 100644
index 00000000..0e0ecb0d
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/initializer/constant_initializer/constant_initializer.cpp
@@ -0,0 +1,62 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#include "embedding_cache.h"
+#include "embedding_cache/common.h"
+#include "external_logger.h"
+
+using namespace std;
+using namespace EmbCache;
+using namespace ock;
+
+ConstantInitializer::ConstantInitializer(uint32_t start, uint32_t len, float value, float initK)
+    : start(start), len(len)
+{
+    if (value > CONSTANT_VALUE_MAX) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "constant value is greater than " +
+            std::to_string(CONSTANT_VALUE_MAX) + ", and will use " + std::to_string(CONSTANT_VALUE_MAX) + ".");
+        constantValue = CONSTANT_VALUE_MAX;
+    } else if (value < CONSTANT_VALUE_MIN) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "constant value is less than " + std::to_string(CONSTANT_VALUE_MIN) +
+            ", and will use " + std::to_string(CONSTANT_VALUE_MIN) + ".");
+        constantValue = CONSTANT_VALUE_MIN;
+    } else {
+        constantValue = value;
+    }
+    if (initK > INIT_K_MAX) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "constant initK is greater than " + std::to_string(INIT_K_MAX) +
+            ", and will use " + std::to_string(INIT_K_MAX) + ".");
+        initParam = INIT_K_MAX;
+    } else if (initK < INIT_K_MIN) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "constant initK is less than " + std::to_string(INIT_K_MIN) +
+            ", and will use " + std::to_string(INIT_K_MIN) + ".");
+        initParam = INIT_K_MIN;
+    } else {
+        initParam = initK;
+    }
+}
+
+void ConstantInitializer::GenerateData(float* emb, int embSize)
+{
+    if (len == 0) {
+        return;
+    }
+    if (embSize != INVALID_EMB_SIZE && embSize < static_cast<int>(start + len)) {
+        ExternalLogger::PrintLog(LogLevel::WARN,
+                                 "InitializeInfo start " + std::to_string(start) + " + len " + std::to_string(len) +
+                                 " is larger than embedding size " + std::to_string(embSize));
+        return;
+    }
+    std::fill_n(emb + start, len, initParam * constantValue);
+}
diff --git a/src/AccCTR/src/embedding_cache/initializer/initializer.cpp b/src/AccCTR/src/embedding_cache/initializer/initializer.cpp
new file mode 100644
index 00000000..887aaee0
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/initializer/initializer.cpp
@@ -0,0 +1,56 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#include <string>
+
+#include "external_logger.h"
+#include "embedding_cache.h"
+
+using namespace EmbCache;
+
+ConstantInitializerInfo::ConstantInitializerInfo(float constantValue, float initK)
+    : constantValue(constantValue), initK(initK)
+{}
+
+NormalInitializerInfo::NormalInitializerInfo(float mean, float stddev, uint32_t seed, float initK)
+    : mean(mean), stddev(stddev), seed(seed), initK(initK)
+{}
+
+InitializerInfo::InitializerInfo(std::string &name, uint32_t start, uint32_t len,
+    ConstantInitializerInfo constantInitializerInfo)
+    : name(name), start(start), len(len), constantInitializerInfo(constantInitializerInfo)
+{
+    if (name == "constant_initializer") {
+        initializerType = InitializerType::CONSTANT;
+        initializer = std::make_shared<ConstantInitializer>(start, len, constantInitializerInfo.constantValue,
+            constantInitializerInfo.initK);
+    } else {
+        ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "Invalid Initializer Type.");
+    }
+}
+
+InitializerInfo::InitializerInfo(std::string &name, uint32_t start, uint32_t len,
+    NormalInitializerInfo normalInitializerInfo)
+    : name(name), start(start), len(len), normalInitializerInfo(normalInitializerInfo)
+{
+    if (name == "truncated_normal_initializer") {
+        initializerType = InitializerType::TRUNCATED_NORMAL;
+        initializer = std::make_shared<TruncatedNormalInitializer>(start, len, normalInitializerInfo);
+    } else if (name == "random_normal_initializer") {
+        initializerType = InitializerType::RANDOM_NORMAL;
+        initializer = std::make_shared<RandomNormalInitializer>(start, len, normalInitializerInfo);
+    } else {
+        ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "Invalid Initializer Type.");
+    }
+}
diff --git a/src/AccCTR/src/embedding_cache/initializer/random_normal_initializer/random_normal_initializer.cpp b/src/AccCTR/src/embedding_cache/initializer/random_normal_initializer/random_normal_initializer.cpp
new file mode 100644
index 00000000..c4b01062
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/initializer/random_normal_initializer/random_normal_initializer.cpp
@@ -0,0 +1,78 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#include <algorithm>
+#include <iostream>
+#include "embedding_cache.h"
+#include "embedding_cache/common.h"
+#include "external_logger.h"
+
+using namespace EmbCache;
+using namespace ock;
+
+RandomNormalInitializer::RandomNormalInitializer(uint32_t start, uint32_t len, NormalInitializerInfo &initInfo)
+    : start(start), len(len), mean(initInfo.mean), stddev(initInfo.stddev), seed(initInfo.seed)
+{
+    // 校验stddev mean及initK值范围
+    if (initInfo.mean > NORMAL_MEAN_MAX) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "random normal mean param is greater than " +
+            std::to_string(NORMAL_MEAN_MAX) + ", and will use " + std::to_string(NORMAL_MEAN_MAX) + ".");
+        mean = NORMAL_MEAN_MAX;
+    } else if (initInfo.mean < NORMAL_MEAN_MIN) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "random normal mean param is less than " +
+            std::to_string(NORMAL_MEAN_MIN) + ", and will use " + std::to_string(NORMAL_MEAN_MIN) + ".");
+        mean = NORMAL_MEAN_MIN;
+    } else {
+        mean = initInfo.mean;
+    }
+    if (initInfo.stddev > NORMAL_STDDEV_MAX) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "random normal stddev param is greater than " +
+            std::to_string(NORMAL_STDDEV_MAX) + ", and will use " + std::to_string(NORMAL_STDDEV_MAX) + ".");
+        stddev = NORMAL_STDDEV_MAX;
+    } else if (initInfo.stddev < NORMAL_STDDEV_MIN) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "random normal stddev param is less than " +
+            std::to_string(NORMAL_STDDEV_MIN) + ", and will use " + std::to_string(NORMAL_STDDEV_MIN) + ".");
+        stddev = NORMAL_STDDEV_MIN;
+    } else {
+        stddev = initInfo.stddev;
+    }
+    if (initInfo.initK > INIT_K_MAX) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "random normal initK is greater than " + std::to_string(INIT_K_MAX) +
+            ", and will use " + std::to_string(INIT_K_MAX) + ".");
+        initParam = INIT_K_MAX;
+    } else if (initInfo.initK < INIT_K_MIN) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "random normal initK is less than " + std::to_string(INIT_K_MIN) +
+            ", and will use " + std::to_string(INIT_K_MIN) + ".");
+        initParam = INIT_K_MIN;
+    } else {
+        initParam = initInfo.initK;
+    }
+
+    generator = std::default_random_engine(seed);
+    distribution = std::normal_distribution<float>(mean, stddev);
+}
+
+void RandomNormalInitializer::GenerateData(float* emb, int embSize)
+{
+    if (len == 0) {
+        return;
+    }
+    if (embSize != INVALID_EMB_SIZE && embSize < static_cast<int>(start + len)) {
+        ExternalLogger::PrintLog(LogLevel::WARN,
+                                 "InitializeInfo start " + std::to_string(start) + " + len " + std::to_string(len) +
+                                 " is larger than embedding size " + std::to_string(embSize));
+        return;
+    }
+    std::generate_n(emb + start, len, [this]() { return initParam * distribution(generator); });
+}
\ No newline at end of file
diff --git a/src/AccCTR/src/embedding_cache/initializer/truncated_normal_initializer/truncated_normal_initializer.cpp b/src/AccCTR/src/embedding_cache/initializer/truncated_normal_initializer/truncated_normal_initializer.cpp
new file mode 100644
index 00000000..95e09757
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/initializer/truncated_normal_initializer/truncated_normal_initializer.cpp
@@ -0,0 +1,94 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#include <algorithm>
+#include "embedding_cache.h"
+#include "embedding_cache/common.h"
+#include "external_logger.h"
+
+using namespace EmbCache;
+using namespace ock;
+
+TruncatedNormalInitializer::TruncatedNormalInitializer(uint32_t start, uint32_t len, NormalInitializerInfo &initInfo)
+    : start(start), len(len), mean(initInfo.mean), stddev(initInfo.stddev), seed(initInfo.seed)
+{
+    // 校验stddev mean及initK值范围
+    if (initInfo.mean > NORMAL_MEAN_MAX) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "truncated normal mean param is greater than " +
+            std::to_string(NORMAL_MEAN_MAX) + ", and will use " + std::to_string(NORMAL_MEAN_MAX) + ".");
+        mean = NORMAL_MEAN_MAX;
+    } else if (initInfo.mean < NORMAL_MEAN_MIN) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "truncated normal mean param is less than " +
+            std::to_string(NORMAL_MEAN_MIN) + ", and will use " + std::to_string(NORMAL_MEAN_MIN) + ".");
+        mean = NORMAL_MEAN_MIN;
+    } else {
+        mean = initInfo.mean;
+    }
+
+    if (initInfo.stddev > NORMAL_STDDEV_MAX) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "truncated normal stddev param is greater than " +
+            std::to_string(NORMAL_STDDEV_MAX) + ", and will use " + std::to_string(NORMAL_STDDEV_MAX) + ".");
+        stddev = NORMAL_STDDEV_MAX;
+    } else if (initInfo.stddev < NORMAL_STDDEV_MIN) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "truncated normal stddev param is less than " +
+            std::to_string(NORMAL_STDDEV_MIN) + ", and will use " + std::to_string(NORMAL_STDDEV_MIN) + ".");
+        stddev = NORMAL_STDDEV_MIN;
+    } else {
+        stddev = initInfo.stddev;
+    }
+
+    if (abs(stddev) < std::numeric_limits<float>::epsilon()) {
+        ExternalLogger::PrintLog(
+            LogLevel::WARN,
+            "truncated normal stddev param is zero, initialization can be slow, suggest using constant initializer");
+    }
+
+    if (initInfo.initK > INIT_K_MAX) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "truncated normal initK is greater than " +
+            std::to_string(INIT_K_MAX) + ", and will use " + std::to_string(INIT_K_MAX) + ".");
+        initParam = INIT_K_MAX;
+    } else if (initInfo.initK < INIT_K_MIN) {
+        ExternalLogger::PrintLog(LogLevel::WARN, "truncated normal initK is less than " + std::to_string(INIT_K_MIN) +
+            ", and will use " + std::to_string(INIT_K_MIN) + ".");
+        initParam = INIT_K_MIN;
+    } else {
+        initParam = initInfo.initK;
+    }
+
+    generator = std::default_random_engine(seed);
+    distribution = std::normal_distribution<float>(mean, stddev);
+    minBound = initParam * (mean - static_cast<float>(boundNum) * stddev);
+    maxBound = initParam * (mean + static_cast<float>(boundNum) * stddev);
+}
+
+
+void TruncatedNormalInitializer::GenerateData(float* emb, int embSize)
+{
+    if (len == 0) {
+        return;
+    }
+    if (embSize != INVALID_EMB_SIZE && embSize < static_cast<int>(start + len)) {
+        ExternalLogger::PrintLog(LogLevel::WARN,
+                                 "InitializeInfo start " + std::to_string(start) + " + len " + std::to_string(len) +
+                                 " is larger than embedding size " + std::to_string(embSize));
+        return;
+    }
+    std::generate_n(emb + start, len, [this]() {
+        float tmp = initParam * distribution(generator);
+        while (tmp < minBound || tmp > maxBound) {
+            tmp = initParam * distribution(generator);
+        }
+        return tmp;
+    });
+}
diff --git a/src/AccCTR/src/embedding_cache/limited_set.h b/src/AccCTR/src/embedding_cache/limited_set.h
new file mode 100644
index 00000000..036a6477
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/limited_set.h
@@ -0,0 +1,118 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef MXREC_LIMITED_SET_H
+#define MXREC_LIMITED_SET_H
+
+#include <cstdint>
+#include <vector>
+
+namespace EmbCache {
+
+class LimitedSet {
+public:
+    struct Node {
+        uint64_t value;
+        Node *prev, *next;
+        Node(uint64_t val = -1) : value(val), prev(nullptr), next(nullptr) {}
+    };
+
+    LimitedSet(uint64_t maxRange) : head(new Node(-1)), tail(new Node(-1))
+    {
+        nodes.resize(maxRange);
+        for (auto &node : nodes) {
+            node = new Node(-1);
+        }
+        head->next = tail;
+        tail->prev = head;
+    }
+
+    ~LimitedSet()
+    {
+        for (auto &node : nodes) {
+            delete node;
+        }
+        delete head;
+        delete tail;
+    }
+
+    void insert(uint64_t value)
+    {
+        if (nodes[value]->value == value) {
+            return;
+        }
+        Node *node = nodes[value];
+        node->value = value;
+        Node *next = head->next;
+        node->next = next;
+        node->prev = head;
+        head->next = node;
+        next->prev = node;
+    }
+
+    void remove(uint64_t value)
+    {
+        if (nodes[value]->value != value) {
+            return;
+        }
+        Node *node = nodes[value];
+        node->prev->next = node->next;
+        node->next->prev = node->prev;
+        node->value = -1;
+    }
+
+    bool find(uint64_t value)
+    {
+        return nodes[value]->value == value;
+    }
+
+    class Iterator {
+    public:
+        Iterator(Node *node) : current(node) {}
+        bool operator != (const Iterator &other) const
+        {
+            return current != other.current;
+        }
+        const uint64_t &operator*() const
+        {
+            return current->value;
+        }
+        Iterator &operator ++ ()
+        {
+            current = current->next;
+            return *this;
+        }
+
+    private:
+        Node *current;
+    };
+
+    Iterator begin()
+    {
+        return { head->next };
+    }
+
+    Iterator end()
+    {
+        return { tail };
+    }
+
+private:
+    Node *head;
+    Node *tail;
+    std::vector<Node *> nodes;
+};
+
+}
+#endif // MXREC_LIMITED_SET_H
diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
new file mode 100644
index 00000000..649b2d8a
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
@@ -0,0 +1,308 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef MXREC_FASTER_QUERY_H
+#define MXREC_FASTER_QUERY_H
+
+#include <algorithm>
+#include <chrono>
+#include <functional>
+#include <memory>
+#include <queue>
+#include <thread>
+#include <utility>
+
+#include "embedding_cache.h"
+#include "offset_mapper/mapper_base.h"
+#include "securec.h"
+
+namespace EmbCache {
+using EmExpandMemUint = struct em_expand_memory_uint_ {
+    uint64_t address = 0;
+    uint64_t capacity = 0;
+    uint64_t leftCapacity = 0;
+
+    em_expand_memory_uint_() = default;
+
+    em_expand_memory_uint_(uint64_t a, uint64_t c) : address(a), capacity(c), leftCapacity(c) {}
+};
+
+template <typename T>
+class QWithLock {
+public:
+    bool pop(T& ele)
+    {
+        std::lock_guard<std::mutex> lk(mut);
+        if (dataQ.empty()) {
+            return false;
+        }
+        ele = dataQ.front();
+        dataQ.pop();
+        return true;
+    }
+
+    void push(const T& ele)
+    {
+        std::lock_guard<std::mutex> lk(mut);
+        dataQ.push(ele);
+    }
+
+    uint64_t GetLength()
+    {
+        std::lock_guard<std::mutex> lk(mut);
+        return dataQ.size();
+    }
+
+private:
+    std::mutex mut;
+    std::queue<T> dataQ;
+};
+
+class AutoRefillEmbeddingMemoryPool {
+public:
+    std::vector<EmExpandMemUint> expandedMemory;
+    uint32_t extEmbeddingSize;
+    std::vector<InitializerInfo> initializerInfos;
+
+    AutoRefillEmbeddingMemoryPool(uint64_t bufferSize, std::vector<InitializerInfo> initInfos, uint32_t extEmbSize,
+                                  uint64_t hostVocabSize, uint32_t refillThreadNum = 1)
+        : extEmbeddingSize(extEmbSize),
+          initializerInfos(std::move(initInfos)),
+          maxBufferSize(bufferSize),
+          totalLeftVocabSize(hostVocabSize),
+          numThreads(refillThreadNum)
+    {
+        itemSize = extEmbeddingSize * sizeof(float);
+        maxExpandSize = maxBufferSize * itemSize;
+        for (uint32_t i = 0; i < numThreads; i++) {
+            producerThreads.emplace_back([this] { ProducerWorker(); });
+        }
+    }
+
+    ~AutoRefillEmbeddingMemoryPool()
+    {
+        {
+            std::lock_guard<std::mutex> lock(producerMutex);
+            stop = true;
+        }
+        producerCv.notify_all();
+        fullCv.notify_all();
+        for (auto& t : producerThreads) {
+            t.join();
+        }
+    }
+
+    void Stop()
+    {
+        std::lock_guard<std::mutex> lock(producerMutex);
+        stop = true;
+        producerCv.notify_all();
+        fullCv.notify_all();
+    }
+
+    BeforePutFuncState GetNewValueToBeInserted(uint64_t& value, uint32_t maxRetry = 1000)
+    {
+        for (uint32_t i = 0; i < maxRetry; i++) {
+            if (BufferBin.pop(value)) {
+                producerCv.notify_one();
+                return BeforePutFuncState::BEFORE_SUCCESS;
+            };
+            producerCv.notify_one();
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        }
+        ock::ExternalLogger::PrintLog(
+            ock::LogLevel::ERROR,
+            "Failed to get new address for embedding, it is likely due to refill thread memory allocation failure "
+            "or max retry has been reached. Please check for memory alloc error or increase refill thread num!");
+        return BeforePutFuncState::BEFORE_FAIL;
+    }
+
+    void GetValueToBeRecycled(uint64_t value)
+    {
+        std::lock_guard<std::mutex> lock(producerMutex);
+        recycleBin.push(value);
+        full = false;
+        fullCv.notify_one();
+    }
+
+private:
+    uint64_t maxBufferSize;
+    uint64_t totalLeftVocabSize;
+    uint32_t numThreads;
+    std::atomic<uint64_t> currBufferSize{0};
+    volatile bool stop = false;
+    volatile std::atomic<bool> full = false;
+    std::mutex producerMutex;
+    std::mutex getAddrMutex;
+    std::condition_variable producerCv;
+    std::condition_variable fullCv;
+    QWithLock<uint64_t> BufferBin;
+    QWithLock<uint64_t> recycleBin;
+    std::vector<std::thread> producerThreads;
+    EmExpandMemUint currentMemoryUint{};
+    uint64_t dynamicExpandRatio = 2;
+    uint64_t maxExpandSize;
+    uint64_t itemSize;
+
+    bool GetNewAddr(uint64_t& newAddr)
+    {
+        std::lock_guard<std::mutex> lg(getAddrMutex);
+        if (HM_UNLIKELY(currentMemoryUint.leftCapacity <= 0)) {
+            /* need to expand memory */
+            uint64_t maxSize = std::min(maxExpandSize, totalLeftVocabSize * itemSize);
+            uint64_t newSize = currentMemoryUint.capacity
+                                   ? std::min(currentMemoryUint.capacity * dynamicExpandRatio, maxSize)
+                                   : itemSize;
+            if (newSize == 0) {
+                if (recycleBin.GetLength() == 0) {
+                    full = true;
+                }
+                return false;
+            }
+            auto newAddress = (uint64_t)malloc(newSize);
+            if (newAddress == 0) {
+                ock::ExternalLogger::PrintLog(ock::LogLevel::WARN, "Refill thread allocate memory failed!");
+                return false;
+            }
+            expandedMemory.emplace_back(newAddress, newSize);
+            currentMemoryUint.address = newAddress;
+            currentMemoryUint.capacity = newSize;
+            currentMemoryUint.leftCapacity = newSize;
+            totalLeftVocabSize -= newSize / itemSize;
+        }
+        newAddr = currentMemoryUint.address + currentMemoryUint.capacity - currentMemoryUint.leftCapacity;
+        currentMemoryUint.leftCapacity -= itemSize;
+        return true;
+    }
+
+    void Produce()
+    {
+        uint64_t newAddr;
+        if (!recycleBin.pop(newAddr)) {
+            if (!GetNewAddr(newAddr)) {
+                return;
+            }
+        }
+        GenerateData(newAddr);
+        BufferBin.push(newAddr);
+    }
+
+    void GenerateData(const uint64_t& addr)
+    {
+        auto* embAddr = reinterpret_cast<float*>(addr);
+        for (const auto& initializerInfo : initializerInfos) {
+            initializerInfo.initializer->GenerateData(embAddr, INVALID_EMB_SIZE);
+        }
+    }
+
+    void ProducerWorker()
+    {
+        std::unique_lock<std::mutex> lock(producerMutex);
+        while (!stop) {
+            if (full) {
+                fullCv.wait(lock);
+                continue;
+            }
+            if (BufferBin.GetLength() < maxBufferSize) {
+                Produce();
+                continue;
+            }
+            producerCv.wait(lock);
+        }
+    }
+};
+
+class AddressMapper : public MapperBase {
+public:
+    AddressMapper() = default;
+
+    ~AddressMapper() = default;
+
+    bool Initialize(uint32_t reserve, uint32_t vocabSize, std::shared_ptr<AutoRefillEmbeddingMemoryPool> expendInfoPtr)
+    {
+        hostVocabSize = vocabSize;
+        emExpendMemInfoPtr = expendInfoPtr;
+        return MapperBase::Initialize(reserve);
+    }
+
+    void UnInitialize() override
+    {
+        emExpendMemInfoPtr->Stop();
+        FreeExpandedMemory();
+        MapperBase::UnInitialize();
+    }
+
+    FkvState Remove(uint64_t key)
+    {
+        return MapperBase::Remove(key, [&](uint64_t value) {
+            emExpendMemInfoPtr->GetValueToBeRecycled(value);
+            return BeforeRemoveFuncState::BEFORE_SUCCESS;
+        });
+    }
+
+    FkvState FindAndPutIfNotFound(uint64_t key, uint64_t& value)
+    {
+        FkvState ret = MapperBase::FindAndPutIfNotFound(key, value, [&]() {
+            if (HM_UNLIKELY(current_size.load() >= hostVocabSize)) {
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "host does not have enough space");
+                return BeforePutFuncState::BEFORE_NO_SPACE;
+            }
+            return emExpendMemInfoPtr->GetNewValueToBeInserted(value);
+        });
+        if (ret == FkvState::FKV_FAIL) {
+            ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "FindAndPutIfNotFound failed!");
+            return ret;
+        }
+        if (ret == FkvState::FKV_BEFORE_PUT_FUNC_FAIL) {
+            ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "malloc failed");
+            return ret;
+        }
+        return ret;
+    }
+
+    // 如果多线程使用，严格保证传入的key线程间不会重复(unique key)，否则可能出现未定义结果
+    FkvState FindAndRemoveIfFound(uint64_t key, const uint64_t startAddr)
+    {
+        return MapperBase::Remove(key, [&](uint64_t value) {
+            uint64_t memSize = emExpendMemInfoPtr->extEmbeddingSize * sizeof(float);
+            auto rc = memcpy_s(reinterpret_cast<void*>(startAddr), memSize, reinterpret_cast<void*>(value), memSize);
+            if (rc != 0) {
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR,
+                                              "memcpy_s failed... dstSize: " + std::to_string(memSize));
+                return BeforeRemoveFuncState::BEFORE_FAIL;
+            }
+            emExpendMemInfoPtr->GetValueToBeRecycled(value);
+            return BeforeRemoveFuncState::BEFORE_SUCCESS;
+        });
+    }
+
+    uint32_t GetUsage()
+    {
+        return MapperBase::current_size;
+    }
+
+private:
+    void FreeExpandedMemory()
+    {
+        for (auto& memUint : emExpendMemInfoPtr->expandedMemory) {
+            free(reinterpret_cast<float*>(memUint.address));
+        }
+    }
+
+private:
+    uint32_t hostVocabSize;
+    std::shared_ptr<AutoRefillEmbeddingMemoryPool> emExpendMemInfoPtr;
+};
+}  // namespace EmbCache
+#endif  // MXREC_FASTER_QUERY_H
diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
new file mode 100644
index 00000000..969845ee
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
@@ -0,0 +1,810 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef MXREC_MAPPER_BASE_H
+#define MXREC_MAPPER_BASE_H
+
+#include <iostream>
+#include <atomic>
+#include <cstring>
+#include <vector>
+#include <mutex>
+#include <bitset>
+#include <future>
+#include <cstdlib>
+#include <thread>
+#include <algorithm>
+#include "securec.h"
+#include "embedding_cache/common.h"
+#include "external_logger.h"
+
+namespace EmbCache {
+/*
+ * @brief Allocator template, for extend memory allocation for overflowed buckets
+ */
+
+static constexpr size_t K_ALIGNMENT = 64;
+static constexpr size_t K_KVNUMINBUCKET = 3;
+
+enum BucketIdx {
+    FIRST,
+    SECOND,
+    THIRD
+};
+
+class NetHeapAllocator {
+public:
+    void *Allocate(uint32_t size)
+    {
+        return calloc(1, size);
+    }
+
+    void Free(void *p)
+    {
+        if (HM_LIKELY(p != nullptr)) {
+            free(p);
+            p = nullptr;
+        }
+    }
+};
+
+/*
+ * @brief Spin lock entry in bucket
+ * used for alloc overflowed buckets
+ */
+
+struct NetHashLockEntry {
+    uint64_t lock = 0;
+
+    /*
+     * @brief Spin lock
+     */
+    void Lock()
+    {
+        while (!__sync_bool_compare_and_swap(&lock, 0, 1)) {
+        }
+    }
+
+    /*
+     * @brief Unlock
+     */
+    void UnLock()
+    {
+        __atomic_store_n(&lock, 0, __ATOMIC_SEQ_CST);
+    }
+} __attribute__((packed));
+
+/*
+ * @brief Store the key/value into a linked array with 6 items,
+ * because 64bytes is one cache line
+ */
+
+struct alignas(K_ALIGNMENT)NetHashBucket {
+    std::atomic<uint64_t> keys[K_KVNUMINBUCKET]{};
+    uint64_t values[K_KVNUMINBUCKET]{};
+    NetHashBucket *next = nullptr;
+    NetHashLockEntry spinLock{};
+
+    FkvState Put(uint64_t key, uint64_t &value, const std::function<BeforePutFuncState()> &beforePutFunc)
+    {
+        /* don't put them into loop, flat code is faster than loop */
+        uint64_t oldKey = 0;
+        if (keys[BucketIdx::FIRST].load(std::memory_order_relaxed) == 0 &&
+            keys[BucketIdx::FIRST].compare_exchange_strong(oldKey, key)) {
+            BeforePutFuncState ret = beforePutFunc();
+            if (HM_UNLIKELY(ret == BeforePutFuncState::BEFORE_FAIL)) {
+                keys[BucketIdx::FIRST] = 0;
+                return FkvState::FKV_BEFORE_PUT_FUNC_FAIL;
+            }
+            if (HM_UNLIKELY(ret == BeforePutFuncState::BEFORE_NO_SPACE)) {
+                keys[BucketIdx::FIRST] = 0;
+                return FkvState::FKV_NO_SPACE;
+            }
+            values[BucketIdx::FIRST] = value;
+            return FkvState::FKV_NOT_EXIST;
+        }
+
+        if (HM_UNLIKELY(oldKey == key)) {
+            return FkvState::FKV_KEY_CONFLICT;
+        }
+
+        oldKey = 0;
+        if (keys[BucketIdx::SECOND].load(std::memory_order_relaxed) == 0 &&
+            keys[BucketIdx::SECOND].compare_exchange_strong(oldKey, key)) {
+            BeforePutFuncState ret = beforePutFunc();
+            if (HM_UNLIKELY(ret == BeforePutFuncState::BEFORE_FAIL)) {
+                keys[BucketIdx::SECOND] = 0;
+                return FkvState::FKV_BEFORE_PUT_FUNC_FAIL;
+            }
+            if (HM_UNLIKELY(ret == BeforePutFuncState::BEFORE_NO_SPACE)) {
+                keys[BucketIdx::SECOND] = 0;
+                return FkvState::FKV_NO_SPACE;
+            }
+            values[BucketIdx::SECOND] = value;
+            return FkvState::FKV_NOT_EXIST;
+        }
+
+        if (HM_UNLIKELY(oldKey == key)) {
+            return FkvState::FKV_KEY_CONFLICT;
+        }
+
+        oldKey = 0;
+        if (keys[BucketIdx::THIRD].load(std::memory_order_relaxed) == 0 &&
+            keys[BucketIdx::THIRD].compare_exchange_strong(oldKey, key)) {
+            BeforePutFuncState ret = beforePutFunc();
+            if (HM_UNLIKELY(ret == BeforePutFuncState::BEFORE_FAIL)) {
+                keys[BucketIdx::THIRD] = 0;
+                return FkvState::FKV_BEFORE_PUT_FUNC_FAIL;
+            }
+            if (HM_UNLIKELY(ret == BeforePutFuncState::BEFORE_NO_SPACE)) {
+                keys[BucketIdx::THIRD] = 0;
+                return FkvState::FKV_NO_SPACE;
+            }
+            values[BucketIdx::THIRD] = value;
+            return FkvState::FKV_NOT_EXIST;
+        }
+
+        if (HM_UNLIKELY(oldKey == key)) {
+            return FkvState::FKV_KEY_CONFLICT;
+        }
+
+        return FkvState::FKV_FAIL;
+    }
+
+    /*
+     * @brief Remove the address from the bucket and get size
+     */
+    bool Find(const uint64_t key, uint64_t &value)
+    {
+        /*
+         * expand the loop, instead of put them into a for/while loop for performance
+         */
+        if (key == keys[BucketIdx::FIRST].load(std::memory_order_relaxed)) {
+            value = values[BucketIdx::FIRST];
+            return true;
+        }
+
+        if (key == keys[BucketIdx::SECOND].load(std::memory_order_relaxed)) {
+            value = values[BucketIdx::SECOND];
+            return true;
+        }
+
+        if (key == keys[BucketIdx::THIRD].load(std::memory_order_relaxed)) {
+            value = values[BucketIdx::THIRD];
+            return true;
+        }
+
+        return false;
+    }
+
+    FkvState Remove(uint64_t key)
+    {
+        /* don't put them into loop, flat code is faster than loop */
+        uint64_t oldValue = key;
+        if (keys[BucketIdx::FIRST].load(std::memory_order_relaxed) == key &&
+            keys[BucketIdx::FIRST].compare_exchange_strong(oldValue, 0)) {
+            values[BucketIdx::FIRST] = 0;
+            return FkvState::FKV_EXIST;
+        }
+        if (HM_UNLIKELY(oldValue == 0)) {
+            return FkvState::FKV_EXIST;
+        }
+        oldValue = key;
+
+        if (keys[BucketIdx::SECOND].load(std::memory_order_relaxed) == key &&
+            keys[BucketIdx::SECOND].compare_exchange_strong(oldValue, 0)) {
+            values[BucketIdx::SECOND] = 0;
+            return FkvState::FKV_EXIST;
+        }
+        if (HM_UNLIKELY(oldValue == 0)) {
+            return FkvState::FKV_EXIST;
+        }
+        oldValue = key;
+
+        if (keys[BucketIdx::THIRD].load(std::memory_order_relaxed) == key &&
+            keys[BucketIdx::THIRD].compare_exchange_strong(oldValue, 0)) {
+            values[BucketIdx::THIRD] = 0;
+            return FkvState::FKV_EXIST;
+        }
+        if (HM_UNLIKELY(oldValue == 0)) {
+            return FkvState::FKV_EXIST;
+        }
+
+        return FkvState::FKV_NOT_EXIST;
+    }
+
+    FkvState Remove(uint64_t key, const std::function<BeforeRemoveFuncState(uint64_t)> &beforeRemoveFunc)
+    {
+        /* don't put them into loop, flat code is faster than loop */
+        uint64_t oldValue = key;
+        if (keys[BucketIdx::FIRST].load(std::memory_order_relaxed) == key &&
+            keys[BucketIdx::FIRST].compare_exchange_strong(oldValue, 0)) {
+            if (HM_UNLIKELY(beforeRemoveFunc(values[BucketIdx::FIRST]) == BeforeRemoveFuncState::BEFORE_FAIL)) {
+                return FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL;
+            }
+
+            values[BucketIdx::FIRST] = 0;
+            return FkvState::FKV_EXIST;
+        }
+        if (HM_UNLIKELY(oldValue == 0)) {
+            return FkvState::FKV_EXIST;
+        }
+        oldValue = key;
+
+        if (keys[BucketIdx::SECOND].load(std::memory_order_relaxed) == key &&
+            keys[BucketIdx::SECOND].compare_exchange_strong(oldValue, 0)) {
+            if (HM_UNLIKELY(beforeRemoveFunc(values[BucketIdx::SECOND]) == BeforeRemoveFuncState::BEFORE_FAIL)) {
+                return FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL;
+            }
+
+            values[BucketIdx::SECOND] = 0;
+            return FkvState::FKV_EXIST;
+        }
+        if (HM_UNLIKELY(oldValue == 0)) {
+            return FkvState::FKV_EXIST;
+        }
+        oldValue = key;
+
+        if (keys[BucketIdx::THIRD].load(std::memory_order_relaxed) == key &&
+            keys[BucketIdx::THIRD].compare_exchange_strong(oldValue, 0)) {
+            if (HM_UNLIKELY(beforeRemoveFunc(values[BucketIdx::THIRD]) == BeforeRemoveFuncState::BEFORE_FAIL)) {
+                return FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL;
+            }
+
+            values[BucketIdx::THIRD] = 0;
+            return FkvState::FKV_EXIST;
+        }
+        if (HM_UNLIKELY(oldValue == 0)) {
+            return FkvState::FKV_EXIST;
+        }
+
+        return FkvState::FKV_NOT_EXIST;
+    }
+};
+
+
+class MapperBase {
+public:
+    //    DEFINE_RDMA_REF_COUNT_FUNCTIONS
+    std::atomic<uint32_t> current_size{ 0 };
+
+    MapperBase() = default;
+
+    ~MapperBase() = default;
+
+    bool Initialize(uint32_t reserve)
+    {
+        /* already initialized */
+        if (mOverflowEntryAlloc != nullptr) {
+            return true;
+        }
+
+        /* get proper bucket count */
+        uint32_t bucketCount = std::max(reserve, uint32_t(128));
+        if (bucketCount > gPrimes[gPrimesCount - 1]) {
+            bucketCount = gPrimes[gPrimesCount - 1];
+        } else {
+            uint32_t i = 0;
+            while (i < gPrimesCount && gPrimes[i] < bucketCount) {
+                i++;
+            }
+            bucketCount = gPrimes[i];
+        }
+
+        /* allocate buckets for sub-maps */
+        for (auto &mSubMap : mSubMaps) {
+            auto tmp = new (std::nothrow) NetHashBucket[bucketCount];
+            if (HM_UNLIKELY(tmp == nullptr)) {
+                FreeSubMaps();
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR,
+                    "Failed to new hash bucket, probably out of memory");
+                return false;
+            }
+
+            /* make physical page and set to zero */
+            auto ret = memset_s(tmp, sizeof(NetHashBucket) * bucketCount, 0, sizeof(NetHashBucket) * bucketCount);
+            if (ret != 0) {
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR,
+                    "memset_s failed... size: " + std::to_string(sizeof(NetHashBucket) * bucketCount));
+                return false;
+            }
+
+            mSubMap = tmp;
+        }
+
+        /* create overflow entry allocator */
+        mOverflowEntryAlloc = new (std::nothrow) NetHeapAllocator();
+        if (HM_UNLIKELY(mOverflowEntryAlloc == nullptr)) {
+            FreeSubMaps();
+            ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR,
+                "Failed to new overflow entry allocator, probably out of memory");
+            return false;
+        }
+
+        /* set bucket count */
+        mBucketCount = bucketCount;
+        ock::ExternalLogger::PrintLog(ock::LogLevel::INFO,
+            "fastKV inited, mBucketCount: " + std::to_string(mBucketCount));
+        return true;
+    }
+
+    virtual void UnInitialize()
+    {
+        if (mOverflowEntryAlloc == nullptr) {
+            return;
+        }
+
+        /* free overflowed entries firstly */
+        FreeOverFlowedEntries();
+
+        /* free sub map secondly */
+        FreeSubMaps();
+
+        /* free overflow entry at last */
+        delete mOverflowEntryAlloc;
+        mOverflowEntryAlloc = nullptr;
+        mBucketCount = 0;
+    }
+
+    FkvState FindAndPutIfNotFound(uint64_t key, uint64_t &value,
+        const std::function<BeforePutFuncState()> &beforePutFunc)
+    {
+        if (HM_UNLIKELY(key == 0)) {
+            if (zeroInside) {
+                value = zeroValue;
+                return FkvState::FKV_EXIST;
+            }
+            if (__sync_bool_compare_and_swap(&zeroInside, false, true)) {
+                BeforePutFuncState ret = beforePutFunc();
+                if (HM_UNLIKELY(ret == BeforePutFuncState::BEFORE_FAIL)) {
+                    return FkvState::FKV_BEFORE_PUT_FUNC_FAIL;
+                }
+                if (HM_UNLIKELY(ret == BeforePutFuncState::BEFORE_NO_SPACE)) {
+                    return FkvState::FKV_NO_SPACE;
+                }
+                zeroValue = value;
+                current_size++;
+                return FkvState::FKV_NOT_EXIST;
+            }
+            return FkvState::FKV_KEY_CONFLICT;
+        }
+
+        /* get bucket */
+        auto buck = &(mSubMaps[key % gSubMapCount][key % mBucketCount]);
+
+        /* loop all buckets linked */
+        while (buck != nullptr) {
+            buck->spinLock.Lock();
+            if (buck->Find(key, value)) {
+                buck->spinLock.UnLock();
+                return FkvState::FKV_EXIST;
+            }
+            buck->spinLock.UnLock();
+
+            if (buck->next != nullptr) {
+                buck = buck->next;
+            } else {
+                break;
+            }
+        }
+
+        // did not find, now do put. continue from the last bucket in find
+        return PutKeyValue(key, value, buck, beforePutFunc);
+    }
+
+    FkvState Remove(uint64_t key)
+    {
+        if (HM_UNLIKELY(key == 0)) {
+            if (zeroInside) {
+                if (__sync_bool_compare_and_swap(&zeroInside, true, false)) {
+                    zeroValue = 0;
+                    current_size--;
+                }
+                return FkvState::FKV_EXIST;
+            }
+            return FkvState::FKV_NOT_EXIST;
+        }
+
+        /* get bucket */
+        auto buck = &(mSubMaps[key % gSubMapCount][key % mBucketCount]);
+
+        /* loop all buckets linked */
+        uint64_t value;
+        while (buck != nullptr) {
+            if (buck->Find(key, value)) {
+                buck->Remove(key);
+                current_size--;
+                return FkvState::FKV_EXIST;
+            }
+
+            buck = buck->next;
+        }
+
+        return FkvState::FKV_NOT_EXIST;
+    }
+
+    FkvState Remove(uint64_t key, const std::function<BeforeRemoveFuncState(uint64_t)> &beforeRemoveFunc)
+    {
+        if (HM_UNLIKELY(key == 0)) {
+            if (!zeroInside) {
+                return FkvState::FKV_NOT_EXIST;
+            }
+            if (__sync_bool_compare_and_swap(&zeroInside, true, false)) {
+                auto ret = beforeRemoveFunc(zeroValue);
+                if (HM_UNLIKELY(ret == BeforeRemoveFuncState::BEFORE_FAIL)) {
+                    return FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL;
+                }
+                zeroValue = 0;
+                current_size--;
+            }
+            return FkvState::FKV_EXIST;
+        }
+
+        /* get bucket */
+        auto buck = &(mSubMaps[key % gSubMapCount][key % mBucketCount]);
+
+        /* loop all buckets linked */
+        uint64_t value;
+        while (buck != nullptr) {
+            if (buck->Find(key, value)) {
+                auto ret = buck->Remove(key, beforeRemoveFunc);
+                if (HM_UNLIKELY(ret == FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL)) {
+                    return FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL;
+                }
+
+                current_size--;
+                return FkvState::FKV_EXIST;
+            }
+
+            buck = buck->next;
+        }
+
+        return FkvState::FKV_NOT_EXIST;
+    }
+
+    FkvState Put(uint64_t key, uint64_t value)
+    {
+        if (HM_UNLIKELY(key == 0)) {
+            if (__sync_bool_compare_and_swap(&zeroInside, false, true)) {
+                zeroValue = value;
+                current_size++;
+                return FkvState::FKV_NOT_EXIST;
+            }
+            return FkvState::FKV_KEY_CONFLICT;
+        }
+
+        /* get bucket */
+        auto buck = &(mSubMaps[key % gSubMapCount][key % mBucketCount]);
+        /* loop all buckets linked */
+        while (buck != nullptr) {
+            if (buck->next != nullptr) {
+                buck = buck->next;
+            } else {
+                break;
+            }
+        }
+
+        // did not find, now do put. continue from the last bucket in find
+        /* try 8192 times */
+        for (uint16_t i = 0; i < 8192; i++) {
+            /* loop all buckets linked */
+            while (buck != nullptr) {
+                /* if there is an entry to put, just break */
+                FkvState putRet = buck->Put(key, value, []() -> BeforePutFuncState { return {}; });
+                if (putRet == FkvState::FKV_NOT_EXIST) {
+                    current_size++;
+                    return FkvState::FKV_NOT_EXIST;
+                }
+
+                if (HM_UNLIKELY(putRet == FkvState::FKV_KEY_CONFLICT)) {
+                    return FkvState::FKV_KEY_CONFLICT;
+                }
+                /*
+                 * if no next bucket exist, just for break,
+                 * else move to next bucket linked
+                 */
+                if (buck->next == nullptr) {
+                    break;
+                } else {
+                    buck = buck->next;
+                }
+            }
+
+            /*
+             * if not put successfully in existing buckets, allocate a new one
+             *
+             * NOTES: just allocate memory, don't access new bucket in the spin lock scope,
+             * if access new bucket, which could trigger physical memory allocation which
+             * could trigger page fault, that is quite slow. In this case, spin lock
+             * could occupy too much CPU
+             */
+            auto &lock = buck->spinLock;
+            lock.Lock();
+            /* if other thread allocated new buck already, unlock and continue */
+            if (buck->next != nullptr) {
+                buck = buck->next;
+                lock.UnLock();
+                continue;
+            }
+
+            /* firstly entered thread allocate new bucket */
+            auto newBuck = static_cast<NetHashBucket *>(mOverflowEntryAlloc->Allocate(sizeof(NetHashBucket)));
+            if (HM_UNLIKELY(newBuck == nullptr)) {
+                lock.UnLock();
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "Failed to allocate new bucket");
+                return FkvState::FKV_FAIL;
+            }
+            /* link to current buck, set buck to new buck */
+            buck->next = newBuck;
+            buck = newBuck;
+
+            /* unlock */
+            lock.UnLock();
+        }
+        return FkvState::FKV_FAIL;
+    }
+
+    bool Find(const uint64_t key, uint64_t &value)
+    {
+        if (HM_UNLIKELY(key == 0)) {
+            if (zeroInside) {
+                value = zeroValue;
+                return true;
+            }
+            return false;
+        }
+        /* get bucket */
+        auto buck = &(mSubMaps[key % gSubMapCount][key % mBucketCount]);
+
+        /* loop all buckets linked */
+        while (buck != nullptr) {
+            if (buck->Find(key, value)) {
+                return true;
+            }
+
+            buck = buck->next;
+        }
+
+        return false;
+    }
+
+    /* When used in muti thread, this function can only be used when keys are uniqued */
+    FkvState FindAndDeleteIfFound(const uint64_t key, uint64_t &value,
+        const std::function<BeforeRemoveFuncState(uint64_t)> &beforeRemoveFunc)
+    {
+        if (HM_UNLIKELY(key == 0)) {
+            if (!zeroInside) {
+                return FkvState::FKV_NOT_EXIST;
+            }
+            value = zeroValue;
+            if (__sync_bool_compare_and_swap(&zeroInside, true, false)) {
+                auto ret = beforeRemoveFunc(zeroValue);
+                if (HM_UNLIKELY(ret == BeforeRemoveFuncState::BEFORE_FAIL)) {
+                    return FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL;
+                }
+                zeroValue = 0;
+                current_size--;
+            }
+
+            return FkvState::FKV_EXIST;
+        }
+        /* get bucket */
+        auto buck = &(mSubMaps[key % gSubMapCount][key % mBucketCount]);
+
+        while (buck != nullptr) {
+            if (buck->Find(key, value)) {
+                auto ret = buck->Remove(key, beforeRemoveFunc);
+                if (HM_UNLIKELY(ret == FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL)) {
+                    return FkvState::FKV_BEFORE_REMOVE_FUNC_FAIL;
+                }
+                current_size--;
+                return FkvState::FKV_EXIST;
+            }
+
+            buck = buck->next;
+        }
+
+        return FkvState::FKV_NOT_EXIST;
+    }
+
+    std::vector<std::pair<uint64_t, uint64_t>> ExportVec()
+    {
+        std::vector<std::pair<uint64_t, uint64_t>> kvVec;
+        if (zeroInside) {
+            kvVec.emplace_back(0, zeroValue);
+        }
+        for (auto &mSubMap : mSubMaps) {
+            for (uint32_t j = 0; j < mBucketCount; j++) {
+                auto buck = &mSubMap[j];
+                ExtractKeyValInBuck(buck, kvVec);
+            }
+        }
+        return kvVec;
+    }
+
+protected:
+    static constexpr uint16_t gSubMapCount = 5; /* count of sub map */
+    static constexpr uint32_t gPrimesCount = 256;
+
+    /* make sure the size of this class is 64 bytes, fit into one cache line */
+    NetHeapAllocator *mOverflowEntryAlloc = nullptr; /* allocate overflowed entry in one bucket */
+    NetHashBucket *mSubMaps[gSubMapCount]{};         /* sub map */
+    uint32_t mBucketCount = 0;                       /* bucket count of each sub map */
+    uint32_t mBaseSize = 4096;                       /* base size */
+    bool zeroInside = false;
+    uint64_t zeroValue = 0;
+
+    const uint32_t gPrimes[gPrimesCount] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37,
+                                            41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89,
+                                            97, 103, 109, 113, 127, 137, 139, 149, 157, 167,
+                                            179, 193, 199, 211, 227, 241, 257, 277, 293, 313,
+                                            337, 359, 383, 409, 439, 467, 503, 541, 577, 619,
+                                            661, 709, 761, 823, 887, 953, 1031, 1109, 1193, 1289,
+                                            1381, 1493, 1613, 1741, 1879, 2029, 2179, 2357, 2549,
+                                            2753, 2971, 3209, 3469, 3739, 4027, 4349, 4703, 5087,
+                                            5503, 5953, 6427, 6949, 7517, 8123, 8783, 9497, 10273,
+                                            11113, 12011, 12983, 14033, 15173, 16411, 17749, 19183,
+                                            20753, 22447, 24281, 26267, 28411, 30727, 33223, 35933,
+                                            38873, 42043, 45481, 49201, 53201, 57557, 62233, 67307,
+                                            72817, 78779, 85229, 92203, 99733, 107897, 116731, 126271,
+                                            136607, 147793, 159871, 172933, 187091, 202409, 218971, 236897,
+                                            256279, 277261, 299951, 324503, 351061, 379787, 410857, 444487,
+                                            480881, 520241, 562841, 608903, 658753, 712697, 771049, 834181,
+                                            902483, 976369, 1056323, 1142821, 1236397, 1337629, 1447153,
+                                            1565659, 1693859, 1832561, 1982627, 2144977, 2320627, 2510653,
+                                            2716249, 2938679, 3179303, 3439651, 3721303, 4026031, 4355707,
+                                            4712381, 5098259, 5515729, 5967347, 6456007, 6984629, 7556579,
+                                            8175383, 8844859, 9569143, 10352717, 11200489, 12117689,
+                                            13109983, 14183539, 15345007, 16601593, 17961079, 19431899,
+                                            21023161, 22744717, 24607243, 26622317, 28802401, 31160981,
+                                            33712729, 36473443, 39460231, 42691603, 46187573, 49969847,
+                                            54061849, 58488943, 63278561, 68460391, 74066549, 80131819,
+                                            86693767, 93793069, 101473717, 109783337, 118773397, 128499677,
+                                            139022417, 150406843, 162723577, 176048909, 190465427,
+                                            206062531, 222936881, 241193053, 260944219, 282312799,
+                                            305431229, 330442829, 357502601, 386778277, 418451333,
+                                            452718089, 489790921, 529899637, 573292817, 620239453,
+                                            671030513, 725980837, 785430967, 849749479, 919334987,
+                                            994618837, 1076067617, 1164186217, 1259520799, 1362662261,
+                                            1474249943, 1594975441, 1725587117, 1866894511, 2019773507,
+                                            2185171673, 2364114217, 2557710269, 2767159799, 2993761039,
+                                            3238918481, 3504151727, 3791104843, 4101556399, 4294967291};
+
+private:
+    void FreeSubMaps()
+    {
+        /* free all sub maps */
+        for (auto &mSubMap : mSubMaps) {
+            if (mSubMap != nullptr) {
+                delete[] mSubMap;
+                mSubMap = nullptr;
+            }
+        }
+    }
+
+    void FreeOverFlowedEntries()
+    {
+        for (auto &mSubMap : mSubMaps) {
+            if (mSubMap == nullptr) {
+                continue;
+            }
+
+            /* free overflow entries in one sub map */
+            for (uint32_t buckIndex = 0; buckIndex < mBucketCount; ++buckIndex) {
+                auto curBuck = mSubMap[buckIndex].next;
+                NetHashBucket *nextOverflowEntryBuck = nullptr;
+
+                /* exit loop when curBuck is null */
+                while (curBuck != nullptr) {
+                    /* assign next overflow buck to tmp variable */
+                    nextOverflowEntryBuck = curBuck->next;
+
+                    /* free this overflow bucket */
+                    mOverflowEntryAlloc->Free(curBuck);
+
+                    /* assign next to current */
+                    curBuck = nextOverflowEntryBuck;
+                }
+            }
+        }
+    }
+
+    FkvState PutKeyValue(uint64_t key, uint64_t& value, EmbCache::NetHashBucket *buck,
+                    const std::function<BeforePutFuncState()>& beforePutFunc)
+    {
+         /* try 8192 times */
+        for (uint16_t i = 0; i < 8192; i++) {
+            /* loop all buckets linked */
+            while (buck != nullptr) {
+                /* if there is an entry to put, just break */
+                buck->spinLock.Lock();
+                FkvState putRet = buck->Put(key, value, beforePutFunc);
+                buck->spinLock.UnLock();
+                if (putRet == FkvState::FKV_NOT_EXIST) {
+                    current_size++;
+                    return FkvState::FKV_NOT_EXIST;
+                }
+
+                if (HM_UNLIKELY(putRet == FkvState::FKV_KEY_CONFLICT)) {
+                    return FkvState::FKV_KEY_CONFLICT;
+                }
+
+                if (HM_UNLIKELY(putRet == FkvState::FKV_BEFORE_PUT_FUNC_FAIL)) {
+                    return FkvState::FKV_BEFORE_PUT_FUNC_FAIL;
+                }
+
+                if (HM_UNLIKELY(putRet == FkvState::FKV_NO_SPACE)) {
+                    return FkvState::FKV_NO_SPACE;
+                }
+
+                /*
+                 * if no next bucket exist, just for break,
+                 * else move to next bucket linked
+                 */
+                if (buck->next == nullptr) {
+                    break;
+                } else {
+                    buck = buck->next;
+                }
+            }
+
+            /*
+             * if not put successfully in existing buckets, allocate a new one
+             *
+             * NOTES: just allocate memory, don't access new bucket in the spin lock scope,
+             * if access new bucket, which could trigger physical memory allocation which
+             * could trigger page fault, that is quite slow. In this case, spin lock
+             * could occupy too much CPU
+             */
+            auto &lock = buck->spinLock;
+            lock.Lock();
+            /* if other thread allocated new buck already, unlock and continue */
+            if (buck->next != nullptr) {
+                buck = buck->next;
+                lock.UnLock();
+                continue;
+            }
+
+            /* firstly entered thread allocate new bucket */
+            auto newBuck = static_cast<NetHashBucket *>(mOverflowEntryAlloc->Allocate(sizeof(NetHashBucket)));
+            if (HM_UNLIKELY(newBuck == nullptr)) {
+                lock.UnLock();
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "Failed to allocate new bucket");
+                return FkvState::FKV_FAIL;
+            }
+            /* link to current buck, set buck to new buck */
+            buck->next = newBuck;
+            buck = newBuck;
+
+            /* unlock */
+            lock.UnLock();
+        }
+        return FkvState::FKV_FAIL;
+    }
+
+    void ExtractKeyValInBuck(EmbCache::NetHashBucket *buck, std::vector<std::pair<uint64_t, uint64_t>>& kvVec)
+    {
+        while (buck) {
+            for (size_t k = 0; k < K_KVNUMINBUCKET; k++) {
+                if (buck->keys[k] == 0) {
+                    continue;
+                }
+                kvVec.emplace_back(buck->keys[k].load(), buck->values[k]);
+            }
+            buck = buck->next;
+        }
+    }
+};
+}
+#endif // MXREC_MAPPER_BASE_H
diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
new file mode 100644
index 00000000..80170989
--- /dev/null
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
@@ -0,0 +1,248 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef MXREC_OFFSET_MAPPER_H
+#define MXREC_OFFSET_MAPPER_H
+
+#include <iostream>
+#include <atomic>
+#include <cstring>
+#include <vector>
+#include <mutex>
+#include <bitset>
+#include <future>
+#include <cstdlib>
+#include <thread>
+#include <algorithm>
+#include <queue>
+#include "mapper_base.h"
+
+namespace EmbCache {
+class OffsetMapper : public MapperBase {
+public:
+    OffsetMapper() = default;
+
+    ~OffsetMapper() = default;
+
+    bool Initialize(uint32_t reserve, uint32_t maxSize = 0)
+    {
+        maxCacheSize = maxSize;
+        useLength = 0;
+        pos2Key.resize(maxSize);
+        std::fill(pos2Key.begin(), pos2Key.end(), INVALID_KEY);
+        try {
+            validPos = new LimitedSet(maxSize);
+            evictPos = new LimitedSet(maxSize);
+        } catch (const std::bad_alloc &e) {
+            return false;
+        }
+        return MapperBase::Initialize(reserve);
+    }
+
+    void UnInitialize() override
+    {
+        delete validPos;
+        delete evictPos;
+        validPos = nullptr;
+        evictPos = nullptr;
+        MapperBase::UnInitialize();
+    }
+
+    FkvState Remove(uint64_t key)
+    {
+        return MapperBase::Remove(key, [&](uint64_t value) {
+            validPos->remove(value);
+            auto pos = std::find(lastBatchPos.begin(), lastBatchPos.end(), value);
+            if (pos != lastBatchPos.end()) {
+                lastBatchPos.erase(pos);
+            }
+            evictPos->insert(value);
+            evictSize++;
+            return BeforeRemoveFuncState::BEFORE_SUCCESS;
+        });
+    }
+
+    std::vector<std::pair<uint64_t, uint64_t>> ExportSortedKVPairs()
+    {
+        auto koVec = ExportVec();
+        std::sort(koVec.begin(), koVec.end(), [](const auto &u, const auto &v) { return u.second < v.second; });
+        return koVec;
+    }
+
+    uint64_t GetFreeLength()
+    {
+        return maxCacheSize - useLength + evictSize;
+    }
+
+    int GetSwapPairsAndKey2Offset(std::vector<uint64_t>& keys, KeyOffsetPair& swapInKoPair,
+                                  KeyOffsetPair& swapOutKoPair)
+    {
+        std::vector<uint64_t> swapInKeysID = FilterKeys(keys, swapInKoPair);
+
+        uint64_t swapInCnt = 0;
+        int ret = FindInUsedPos(keys, swapInCnt, swapInKeysID, swapInKoPair, swapOutKoPair);
+        if (ret != ock::ctr::H_OK) {
+            return ret;
+        }
+
+        // 剩下的Key从om中分配位置
+        ret = FindInOffsetMapper(keys, swapInKoPair, swapInCnt, swapInKeysID);
+        if (ret != ock::ctr::H_OK) {
+            return ret;
+        }
+
+        // 上个batch中的pos可被换出，加入validPos中
+        for (uint64_t pos : lastBatchPos) {
+            if (HM_UNLIKELY(pos == static_cast<uint64_t>(INVALID_KEY))) {
+                continue;
+            }
+            validPos->insert(pos);
+        }
+
+        // 这里keys都已被替换成offset，这个batch使用的pos在下个batch不能被换出，移出validPos
+        for (uint64_t pos : keys) {
+            if (HM_UNLIKELY(pos == static_cast<uint64_t>(INVALID_KEY))) {
+                continue;
+            }
+            validPos->remove(pos);
+            evictPos->remove(pos);
+        }
+
+        lastBatchPos = keys;
+        return ock::ctr::H_OK;
+    }
+
+    uint32_t GetUsage()
+    {
+        return useLength - evictSize;
+    }
+
+    uint64_t FindInUsedPos(std::vector<uint64_t>& keys, uint64_t& swapInCnt, std::vector<uint64_t>& swapInKeysID,
+                            KeyOffsetPair& swapInKoPair, KeyOffsetPair& swapOutKoPair)
+    {
+        std::vector<uint64_t> &swapInKeys = swapInKoPair.first;
+        std::vector<uint64_t> &swapInPos = swapInKoPair.second;
+        std::vector<uint64_t> &swapOutKeys = swapOutKoPair.first;
+        std::vector<uint64_t> &swapOutPos = swapOutKoPair.second;
+
+        // 换出量 = 换入量 - 剩余空间
+        uint64_t swapOutNum = swapInKeys.size() <= GetFreeLength() ? 0 : swapInKeys.size() - GetFreeLength();
+        swapOutKeys.resize(swapOutNum);
+        swapOutPos.resize(swapOutNum);
+
+        // 空间不足，前swapOutNum个Key从evictPos中拿可换出位置
+        for (uint64_t pos : *evictPos) {
+            if (swapInCnt == swapInKeys.size()) {
+                break;
+            }
+            // 记录swapInPos
+            swapInPos[swapInCnt] = pos;
+            // key->offset
+            keys[swapInKeysID[swapInCnt]] = pos;
+            // 放入新key-pos
+            Put(swapInKeys[swapInCnt], pos);
+            // 更新pos2Key
+            pos2Key[pos] = swapInKeys[swapInCnt];
+            swapInCnt++;
+            evictSize--;
+        }
+
+        uint64_t swapOutCnt = 0;
+        // 空间不足，前swapOutNum个Key从validPos中拿可换出位置
+        for (uint64_t pos : *validPos) {
+            if (swapOutCnt == swapOutNum) {
+                break;
+            }
+            // 记录swapInPos
+            swapInPos[swapInCnt] = pos;
+            // key->offset
+            keys[swapInKeysID[swapInCnt]] = pos;
+            // 删除原key-pos，放入新key-pos
+            uint64_t key = pos2Key[pos];
+            MapperBase::Remove(key);
+            Put(swapInKeys[swapInCnt], pos);
+            // 记录swapOutKoPair
+            swapOutKeys[swapOutCnt] = key;
+            swapOutPos[swapOutCnt] = pos;
+            // 更新pos2Key
+            pos2Key[pos] = swapInKeys[swapInCnt];
+            swapInCnt++;
+            swapOutCnt++;
+        }
+
+        if (swapOutCnt < swapOutNum) {
+            ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "max cache size is too small");
+            return ock::ctr::H_MAX_CACHESIZE_TOO_SMALL;
+        }
+
+        return ock::ctr::H_OK;
+    }
+
+    int FindInOffsetMapper(std::vector<uint64_t>& keys, KeyOffsetPair& swapInKoPair, uint64_t swapInCnt,
+                           std::vector<uint64_t>& swapInKeysID)
+    {
+        std::vector<uint64_t> &swapInKeys = swapInKoPair.first;
+        std::vector<uint64_t> &swapInPos = swapInKoPair.second;
+
+        for (uint64_t i = swapInCnt; i < swapInKeys.size(); i++) {
+            swapInPos[i] = useLength++;
+            if (HM_UNLIKELY(swapInPos[i] >= maxCacheSize)) {
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "max cache size is too small");
+                return ock::ctr::H_MAX_CACHESIZE_TOO_SMALL;
+            }
+            // 放入新key-pos
+            Put(swapInKeys[i], swapInPos[i]);
+            // 更新pos2Key
+            pos2Key[swapInPos[i]] = swapInKeys[i];
+            // key->offset
+            keys[swapInKeysID[i]] = swapInPos[i];
+        }
+        return ock::ctr::H_OK;
+    }
+
+    std::vector<uint64_t> FilterKeys(std::vector<uint64_t>& keys, KeyOffsetPair &swapInKoPair)
+    {
+        std::vector<uint64_t> &swapInKeys = swapInKoPair.first;
+        std::vector<uint64_t> &swapInPos = swapInKoPair.second;
+
+        std::vector<uint64_t> swapInKeysID;
+        for (uint64_t i = 0; i < keys.size(); i++) {
+            // Invalid key 不考虑
+            if (HM_UNLIKELY(keys[i] == static_cast<uint64_t>(INVALID_KEY))) {
+                continue;
+            }
+            // 在HBM中的key, 原地替换为pos后从validPos中移除
+            // 不在HBM中的key，加入swapInKeys，并记录在keys中的下标，用于后续key->offset
+            if (Find(keys[i], keys[i])) {
+                validPos->remove(keys[i]);
+            } else {
+                swapInKeys.push_back(keys[i]);
+                swapInKeysID.push_back(i);
+            }
+        }
+        swapInPos.resize(swapInKeys.size());
+        return swapInKeysID;
+    }
+
+private:
+    uint32_t maxCacheSize{};            // HBM可容纳embedding条数
+    uint32_t useLength{};               // HBM存储的embedding条数
+    LimitedSet *validPos{};             // HBM中可被换出的位置
+    LimitedSet *evictPos{};             // 淘汰出的位置
+    std::vector<uint64_t> pos2Key;      // HBM中每个位置对应的key
+    std::vector<uint64_t> lastBatchPos; // 上个batch的keys在HBM中占用的pos
+    uint64_t evictSize;                 // evictPos的长度
+};
+}
+#endif // MXREC_OFFSET_MAPPER_H
diff --git a/src/AccCTR/src/factory_impl.cpp b/src/AccCTR/src/factory_impl.cpp
index f0f5cdac..654e1d76 100644
--- a/src/AccCTR/src/factory_impl.cpp
+++ b/src/AccCTR/src/factory_impl.cpp
@@ -54,6 +54,17 @@ int FactoryImpl::CreateUnique(std::shared_ptr<Unique> &out)
     return H_OK;
 }
 
+int FactoryImpl::CreateEmbCacheManager(std::shared_ptr<EmbCache::EmbCacheManager> &out)
+{
+    auto tmp = new (std::nothrow) EmbCache::EmbCacheManagerImpl();
+    if (tmp == nullptr) {
+        return H_NEW_OBJECT_FAILED;
+    }
+
+    out.reset(dynamic_cast<EmbCache::EmbCacheManager *>(tmp));
+    return H_OK;
+}
+
 int FactoryImpl::SetExternalLogFuncInner(ExternalLog logFunc)
 {
     auto logger = ExternalLogger::Instance();
diff --git a/src/AccCTR/src/factory_impl.h b/src/AccCTR/src/factory_impl.h
index cc1c025a..aa5cd211 100644
--- a/src/AccCTR/src/factory_impl.h
+++ b/src/AccCTR/src/factory_impl.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "include/factory.h"
 #include "unique/unique_impl.h"
+#include "embedding_cache/cache_manager/cache_manager.h"
 
 namespace ock {
 namespace ctr {
@@ -27,6 +28,7 @@ public:
 
 public:
     int CreateUnique(std::shared_ptr<Unique> &out) override;
+    int CreateEmbCacheManager(std::shared_ptr<EmbCache::EmbCacheManager> &out) override;
     int SetExternalLogFuncInner(ExternalLog logFunc) override;
 
 public:
diff --git a/src/AccCTR/src/include/CMakeLists.txt b/src/AccCTR/src/include/CMakeLists.txt
index c9d2b215..7f8b2b6d 100644
--- a/src/AccCTR/src/include/CMakeLists.txt
+++ b/src/AccCTR/src/include/CMakeLists.txt
@@ -12,7 +12,7 @@
 # limitations under the License.
 # ==============================================================================
 
-set(INCLUDE_HEADERS factory.h ock_ctr_common_def.h unique.h)
+set(INCLUDE_HEADERS factory.h ock_ctr_common_def.h unique.h embedding_cache.h)
 
 set(TARGET_INSTALL_INCLUDE ${OUTPUT}/ock_ctr_common/include)
 
diff --git a/src/AccCTR/src/include/embedding_cache.h b/src/AccCTR/src/include/embedding_cache.h
new file mode 100644
index 00000000..4adf1fbf
--- /dev/null
+++ b/src/AccCTR/src/include/embedding_cache.h
@@ -0,0 +1,321 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2022-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef EMBEDDING_CACHE_H
+#define EMBEDDING_CACHE_H
+
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+namespace EmbCache {
+using KeyOffsetPair = std::pair<std::vector<uint64_t>, std::vector<uint64_t>>;
+
+class Initializer {
+public:
+    Initializer() = default;
+    virtual ~Initializer() = default;
+
+    /* *
+     * 生成随机数
+     * @Param emb embedding的首地址
+     */
+    virtual void GenerateData(float* emb, int embSize) = 0;
+    uint32_t start{};       // 起始位置
+    uint32_t len{};         // 初始化的长度
+    float initParam = 1.0;  // 初始化器生成的初始值均需要乘以initParam
+};
+
+enum class InitializerType {
+    INVALID,
+    CONSTANT,
+    TRUNCATED_NORMAL,
+    RANDOM_NORMAL
+};
+
+struct ConstantInitializerInfo {
+    ConstantInitializerInfo() = default;
+
+    ConstantInitializerInfo(float constantValue, float initK);
+
+    float constantValue = 0;  // 常量值
+    float initK = 1.0;      // 初始化出来的值需乘以initK
+};
+
+struct NormalInitializerInfo {
+    NormalInitializerInfo() = default;
+
+    NormalInitializerInfo(float mean, float stddev, uint32_t seed, float initK);
+
+    float mean = 0;       // 平均值
+    float stddev = 0;     // 标准差
+    uint32_t seed = 0;    // 随机数种子
+    float initK = 1.0;  // 初始化出来的值需乘以initK
+};
+
+class ConstantInitializer : public Initializer {
+public:
+    ConstantInitializer() = default;
+
+    ConstantInitializer(uint32_t start, uint32_t len, float value, float initK);
+
+    ~ConstantInitializer() override = default;
+
+    void GenerateData(float* emb, int embSize) override;
+
+    uint32_t start = 0;       // 起始位置
+    uint32_t len = 0;         // 初始化的长度
+    float constantValue = 0;  // 常量值
+};
+
+class RandomNormalInitializer : public Initializer {
+public:
+    RandomNormalInitializer() = default;
+    RandomNormalInitializer(uint32_t start, uint32_t len, NormalInitializerInfo& initInfo);
+
+    ~RandomNormalInitializer() override = default;
+
+    void GenerateData(float* emb, int embSize) override;
+
+    uint32_t start = 0;  // 起始位置
+    uint32_t len = 0;    // 初始化的长度
+    float mean = 0;      // 平均值
+    float stddev = 0;    // 标准差
+    uint32_t seed = 0;   // 随机数种子
+
+    std::default_random_engine generator;          // 随机数生成器
+    std::normal_distribution<float> distribution;  // 正态分布
+};
+
+class TruncatedNormalInitializer : public Initializer {
+public:
+    TruncatedNormalInitializer() = default;
+
+    TruncatedNormalInitializer(uint32_t start, uint32_t len, NormalInitializerInfo& initInfo);
+
+    ~TruncatedNormalInitializer() override = default;
+
+    void GenerateData(float* emb, int embSize) override;
+
+    int boundNum = 2;
+
+    uint32_t start = 0;  // 起始位置
+    uint32_t len = 0;    // 初始化的长度
+    float mean = 0;      // 平均值
+    float stddev = 0;    // 标准差
+    uint32_t seed = 0;   // 随机数种子
+
+    std::default_random_engine generator;  // 随机数生成器
+    std::normal_distribution<float> distribution;
+    float minBound = 0;  // 下界
+    float maxBound = 0;  // 上界
+};
+
+struct InitializerInfo {
+    InitializerInfo() = default;
+
+    InitializerInfo(std::string& name, uint32_t start, uint32_t len, ConstantInitializerInfo constantInitializerInfo);
+
+    InitializerInfo(std::string& name, uint32_t start, uint32_t len, NormalInitializerInfo normalInitializerInfo);
+
+    std::string name = "";  // 初始化器的名称
+    uint32_t start = 0;  // 初始化开始的位置
+    uint32_t len = 0;    // 待初始化的长度
+    InitializerType initializerType = InitializerType::INVALID;
+
+    ConstantInitializerInfo constantInitializerInfo;
+    NormalInitializerInfo normalInitializerInfo;
+
+    std::shared_ptr<Initializer> initializer;
+};
+
+struct EmbCacheInfo {
+    EmbCacheInfo(std::string tableName, uint32_t vocabSize, uint32_t embeddingSize, uint32_t extEmbeddingSize,
+                 uint32_t maxCacheSize)
+        : tableName(tableName),
+          vocabSize(vocabSize),
+          embeddingSize(embeddingSize),
+          extEmbeddingSize(extEmbeddingSize),
+          maxCacheSize(maxCacheSize)
+    {
+    }
+    std::string tableName = "";
+    uint32_t vocabSize = 0;  // host侧的容量(能存多少条embedding)
+    uint32_t embeddingSize = 0;
+    uint32_t extEmbeddingSize = 0;  // 包含embedding和优化器信息的embedding长度
+    uint32_t maxCacheSize = 0;      // device侧的容量(能存多少条embedding)
+};
+
+class EmbCacheManager {
+public:
+    virtual ~EmbCacheManager() = default;
+
+    /* *
+     * 对当前embInfo对应的table在cache_manager中进行table初始化
+     * @Param EmbCacheInfo: embedding cache的初始化信息
+     * @Param std::vector<InitializerInfo> 初始化器的信息
+     * @Param uint64_t prefillBufferSize emb内存池恒定可用大小
+     * @Param uint32_t refillThreadNum emb内存池自动填充线程数
+     * @Return errorCode
+     */
+    virtual int CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
+                                    const std::vector<InitializerInfo>& initializerInfos, int64_t invalidKey = -1,
+                                    uint64_t prefillBufferSize = 500000, uint32_t refillThreadNum = 1) = 0;
+
+    /* *
+     * 查找当前keys对应的offsets并将本不存在与offsetMapper中的keys插入到offsetMapper中并得到其偏移值offsets，
+     * 并且当offsetMapper可存放空间不足时，释放可swapOut的keys，获取当前需要被换入换出的keys和offsets的pair
+     * @Param tableName: 表名
+     * @Param keys: 当前batch所有unique的keys
+     * @Param swapInKoPair: 输出参数，需要换入的Key-offset pair
+     * @Param swapOutKoPair: 输出参数，需要换出的Key-offset pair
+     * @Return errorCode
+     */
+    virtual int GetSwapPairsAndKey2Offset(const std::string& tableName, std::vector<uint64_t>& keys,
+                                          KeyOffsetPair& swapInKoPair, KeyOffsetPair& swapOutKoPair) = 0;
+
+    /* *
+     * 查询Embedding
+     * @Param tableName: 表名
+     * @Param keys: 待查询的keys
+     * @Param embAddr: 申请出来存放embedding的空间首地址
+     * @Param threadNum: 线程数
+     * @Return errorCode
+     */
+    virtual int EmbeddingLookup(const std::string& tableName, const std::vector<uint64_t>& keys, float* embAddr,
+                                uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 查询Embedding的地址
+     * @Param tableName: 表名
+     * @Param keys: 待查询的keys
+     * @Param addrs: keys对应的申请出来存放embedding的空间首地址
+     * @Param threadNum: 线程数
+     * @Return errorCode
+     */
+    virtual int EmbeddingLookupAddrs(const std::string& tableName, const std::vector<uint64_t>& keys,
+                                     std::vector<float*>& addrs, uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 查询Embedding并且在查询完成之后删除embedding对应的key。如果多线程使用，严格保证传入的key线程间不会重复(unique
+     * key)，否则可能出现未定义结果
+     * @Param tableName: 表名
+     * @Param keys: 待查询的keys
+     * @Param embAddr: 申请出来存放embedding的空间首地址
+     * @Param threadNum: 线程数
+     * @Return errorCode
+     */
+    virtual int EmbeddingLookupAndRemove(const std::string& tableName, const std::vector<uint64_t>& keys,
+                                         float* embAddr, uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 更新Embedding
+     * @Param tableName: 表名
+     * @Param keys: 待更新的keys，用于查询出每个key在DDR上存放的地址
+     * @Param embAddr: 待更新到DDR上的embedding的首地址
+     * @Param threadNum: 线程数
+     * @Return errorCode
+     */
+    virtual int EmbeddingUpdate(const std::string& tableName, const std::vector<uint64_t>& keys, float* embAddr,
+                                uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 在EmbLocalTable中移除keys，并将存储其embedding的内存位置记为可复用
+     * @Param tableName: 表名
+     * @Param keys: 待移除的keys
+     * @Return errorCode
+     */
+    virtual int EmbeddingRemove(const std::string& tableName, const std::vector<uint64_t>& keys,
+                                uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 将需要被淘汰的keys从offsetMapper的记录中移除，同时也在EmbLocalTable中移除，并将存储其embedding的内存位置记为可复用
+     * @Param tableName: 表名
+     * @Param keys: 待淘汰的keys
+     * @Return errorCode
+     */
+    virtual int RemoveEmbsByKeys(const std::string& tableName, const std::vector<uint64_t>& keys) = 0;
+
+    /* *
+     * 获取所有table names
+     * @Param allTableNames: 输出参数，用于存放所有的table names
+     * @Return errorCode
+     */
+    virtual int GetEmbTableNames(std::vector<std::string>& allTableNames) = 0;
+
+    /* *
+     * 获取以values为增序排列的当前记录在offsetMapper中所有的keys和values的pairs
+     * @Param tableName: 表名
+     * koVec: 输出参数
+     * @Return errorCode
+     */
+    virtual int ExportDeviceKeyOffsetPairs(const std::string& tableName,
+                                           std::vector<std::pair<uint64_t, uint64_t>>& koVec) = 0;
+
+    /* *
+     * 获取当前table的序列化信息
+     * @Param tableName: 要序列化的表
+     * @Param buffer: 输出参数，存储序列化之后的信息
+     * @Return errorCode
+     */
+    virtual int Serialize(const std::string& tableName, std::vector<char>& buffer) = 0;
+
+    /* *
+     * 将当前table的序列化信息进行反序列化
+     * @Param tableName: 要反序列化的表
+     * @Param buffer: 输入参数，将buffer中的内容进行反序列化
+     * @Return errorCode
+     */
+    virtual int Deserialize(const std::string& tableName, const std::vector<char>& buffer) = 0;
+
+    /* *
+     * 析构所有embCache，释放内存
+     */
+    virtual void Destroy() = 0;
+
+    /* *
+     * 查询表的使用量
+     * @Param tableName: 要查询的表
+     * @Return 当前表的使用量
+     */
+    virtual uint32_t GetUsage(const std::string& tableName) = 0;
+
+    /* *
+     * 获取当前host侧所存储的所有keys及其对应的embeddings和优化器参数
+     * @Param tableName: 需要获取信息的table名字
+     * @Param keys: 输入参数，输入空vector，获取的存储的所有keys会赋到该vector中
+     * @Param embeddings: 输入参数，输入空vector，获取的存储的所有embeddings会赋到该vector中
+     * @Param optimizerSlots: 输入参数，输入空vector，获取的存储的所有optimizerSlots会赋到该vector中
+     * @Return errorCode
+     */
+    virtual int GetEmbTableInfos(std::string tableName, std::vector<uint64_t>& keys,
+                                 std::vector<std::vector<float>>& embeddings,
+                                 std::vector<std::vector<float>>& optimizerSlots) = 0;
+
+    /* *
+     * 将所需存储的keys及其对应的embeddings和优化器参数传入，来装载LocalEmbeddingTable
+     * @Param tableName: 需要加载信息的table名字
+     * @Param keys: 输入参数，需要加载的所有keys
+     * @Param embeddings: 输入参数，需要加载的所有embeddings
+     * @Param optimizerSlots: 输入参数，需要加载的所有optimizerSlots
+     * @Return errorCode
+     */
+    virtual int LoadEmbTableInfos(std::string tableName, const std::vector<uint64_t>& keys,
+                                  const std::vector<std::vector<float>>& embeddings,
+                                  const std::vector<std::vector<float>>& optimizerSlots) = 0;
+};
+}  // namespace EmbCache
+
+#endif  // EMBEDDING_CACHE_H
diff --git a/src/AccCTR/src/include/factory.h b/src/AccCTR/src/include/factory.h
index 14732cf9..69e8217a 100644
--- a/src/AccCTR/src/include/factory.h
+++ b/src/AccCTR/src/include/factory.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 #include <memory>
 #include "unique.h"
+#include "embedding_cache.h"
 
 
 #ifdef __cplusplus
@@ -39,11 +40,13 @@ class Factory;
 
 using FactoryPtr = std::shared_ptr<Factory>;
 using UniquePtr = std::shared_ptr<Unique>;
+using EmbCacheManagerPtr = std::shared_ptr<EmbCache::EmbCacheManager>;
 
 class Factory {
 public:
     virtual ~Factory() = default;
     virtual int CreateUnique(UniquePtr &out) = 0;
+    virtual int CreateEmbCacheManager(EmbCacheManagerPtr &out) = 0;
     virtual int SetExternalLogFuncInner(ExternalLog logFunc) = 0;
 
 public:
@@ -52,7 +55,7 @@ public:
         int result = 0;
         uintptr_t factory = 0;
         /* dynamic load function */
-        if ((result = OckCtrCommonDef::CreatFactory(&factory)) == 0) {
+        if ((result = OckCtrCommonDef::CreateFactory(&factory)) == 0) {
             out.reset(reinterpret_cast<Factory *>(factory));
         }
         return result;
diff --git a/src/AccCTR/src/include/ock_ctr_common_def.h b/src/AccCTR/src/include/ock_ctr_common_def.h
index ed955996..75e7e9cb 100644
--- a/src/AccCTR/src/include/ock_ctr_common_def.h
+++ b/src/AccCTR/src/include/ock_ctr_common_def.h
@@ -25,7 +25,7 @@ namespace ock {
 namespace ctr {
 class OckCtrCommonDef {
 public:
-    static int CreatFactory(uintptr_t *factory)
+    static int CreateFactory(uintptr_t *factory)
     {
         static void *handle = nullptr;
         static std::mutex m;
diff --git a/src/AccCTR/src/include/unique.h b/src/AccCTR/src/include/unique.h
index 3154a784..1f58f8a4 100644
--- a/src/AccCTR/src/include/unique.h
+++ b/src/AccCTR/src/include/unique.h
@@ -58,6 +58,7 @@ using UniqueConf = struct UniqueConfCTR {
     uint32_t maxThreadNum = 8;                  // 最大工作线程数
     int64_t maxIdVal = 0;                       // 最大id值
     bool trace = false;                         // 是否开启性能检测，需要配合外部日志输出
+    bool performance = false;                   // 是否开启增强接口，增强接口shardingNum必须是2的幂次方，默认用取模分桶
 } __attribute__((packed));
 
 using UniqueIn = struct UniqueInCTR {
diff --git a/src/AccCTR/src/unique/unique_func.cpp b/src/AccCTR/src/unique/unique_func.cpp
index d208eac9..45ac768a 100644
--- a/src/AccCTR/src/unique/unique_func.cpp
+++ b/src/AccCTR/src/unique/unique_func.cpp
@@ -27,7 +27,6 @@ void Dedup::Insert(uint64_t val)
 
     for (int8_t i = 0; i < count; ++i) {
         if (bucket->data[totalCount] == val) {
-            TryIncreaseIdCount(bucket->idCount[totalCount]);
             // found one
             return;
         }
@@ -38,7 +37,6 @@ void Dedup::Insert(uint64_t val)
         std::lock_guard<SpinLockG> lg(bucket->lock);
         for (int8_t j = totalCount; j < bucket->count; ++j) {
             if (bucket->data[totalCount] == val) {
-                TryIncreaseIdCount(bucket->idCount[totalCount]);
                 // found one
                 return;
             }
@@ -47,7 +45,6 @@ void Dedup::Insert(uint64_t val)
         if (totalCount < n) {
             bucket->data[totalCount] = val;
             bucket->count++;
-            TryIncreaseIdCount(bucket->idCount[totalCount]);
             return;
         }
     }
@@ -55,13 +52,6 @@ void Dedup::Insert(uint64_t val)
     InsertOverflow(val);
 }
 
-inline void Dedup::TryIncreaseIdCount(std::atomic<uint16_t> &val)
-{
-    if (idCountEnable_) {
-        val++;
-    }
-}
-
 int32_t Dedup::GetReplaceOffsetUnsafe(uint64_t val)
 {
     auto h = static_cast<int32_t>(Hash(val) & bucketCountMask_);
@@ -108,7 +98,6 @@ void Dedup::Clear(uint64_t newBucketCountPowerOf2)
     }
     bzero(table_, sizeof(Meta<n>) * bucketCount_);
     overflow_.clear();
-    idCountOverflow_.clear();
 }
 
 void Dedup::NewParameter()
@@ -168,6 +157,58 @@ int32_t ShardedDedup::GetFillOffset(const std::vector<size_t> &totalUniqueSize,
     }
 }
 
+void ShardedDedup::GetIndexAndStart(const int32_t *uniqueSizeInBucket, bool usePadding, int shardingNumber, int &start,
+                                    int &index)
+{
+    if (shardingNumber > 0) {
+        index += uniqueSizeInBucket[shardingNumber - 1];
+    }
+
+    if (usePadding) {
+        start = shardingNumber * conf.paddingSize;
+    } else {
+        start = index;
+    }
+}
+
+int ShardedDedup::PrintMemCpyLog(int rc, const uint32_t dstSize, const std::string &logMsg)
+{
+    if (rc != 0) {
+        std::stringstream ssm;
+        ssm << "[" << logMsg << "] memcpy_s failed... dstSize: " << dstSize;
+        ExternalLogger::PrintLog(LogLevel::ERROR, ssm.str());
+        return H_COPY_ERROR;
+    } else {
+        return H_OK;
+    }
+}
+
+int ShardedDedup::HandleIdCountFill(std::vector<std::atomic<int32_t>> &idCount, UniqueOutSelf &uniqueOut)
+{
+    if (conf.usePadding) {
+        uint32_t memSize = idCount.size() * sizeof(int32_t);
+        auto rc = memcpy_s(uniqueOut.idCntFill, memSize, (int32_t *)(idCount.data()), memSize);
+        if (rc != 0) {
+            return rc;
+        }
+        int ret = PrintMemCpyLog(rc, memSize, "[TileAndFill/idCntFill]");
+        if (ret != 0) {
+            return ret;
+        }
+    } else {
+        uint32_t memSize = idCount.size() * sizeof(int32_t);
+        auto rc = memcpy_s(uniqueOut.idCnt, memSize, (int32_t *)(idCount.data()), memSize);
+        if (rc != 0) {
+            return rc;
+        }
+
+        int ret = PrintMemCpyLog(rc, memSize, "[TileAndFill/idCnt]");
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    return H_OK;
+}
 
 size_t ShardedDedup::CalThreadNum() const
 {
diff --git a/src/AccCTR/src/unique/unique_func.h b/src/AccCTR/src/unique/unique_func.h
index 07c8ebb7..4812f74c 100644
--- a/src/AccCTR/src/unique/unique_func.h
+++ b/src/AccCTR/src/unique/unique_func.h
@@ -30,6 +30,7 @@ limitations under the License.
 #include <vector>
 #include <map>
 #include <limits>
+#include <unistd.h>
 
 #include "securec.h"
 #include "common_includes.h"
@@ -37,6 +38,14 @@ limitations under the License.
 
 namespace ock {
 namespace ctr {
+#ifndef LIKELY
+#define LIKELY(x) __builtin_expect(!!(x), 1)
+#endif
+
+#ifndef UNLIKELY
+#define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#endif
+
 using UniqueOutSelf = struct UniqueSelf {
     void *uniqueId = nullptr;           // 去重分桶填充之后最终的的ids(需要用户申请)必选
     uint32_t *index = nullptr;          // 去重后id的索引位置(需要用户申请)必选
@@ -47,7 +56,7 @@ using UniqueOutSelf = struct UniqueSelf {
     int uniqueIdCnt = 0;      // 每个桶去重后的id个数(需要用户申请)
 };
 
-constexpr int UNIQUE_MAX_BUCKET_WIDTH = 5;
+constexpr int UNIQUE_MAX_BUCKET_WIDTH = 6;
 
 template <DataType> struct Map {};
 template <> struct Map<DataType::INT64> {
@@ -111,7 +120,7 @@ class Dedup {
     static constexpr uint32_t K_MINIMAL_WORKLOAD_PER_WORKER = 1 << 12;
     static constexpr size_t K_ALIGNMENT = 64;
     static const int kDefaultBucketCount = 1 << 24;
-    static const int8_t n = 4;
+    static const int8_t n = UNIQUE_MAX_BUCKET_WIDTH;
 
     template <int M> struct Meta {
         static_assert(M <= UNIQUE_MAX_BUCKET_WIDTH, "should be no larger than max bucket width");
@@ -119,7 +128,6 @@ class Dedup {
         volatile int8_t count {};
         uint32_t replaceBase {};
         volatile uint64_t data[M] {};
-        std::atomic<uint16_t> idCount[M] {};
     } __attribute__((__aligned__(64)));
 
     struct Statistics {
@@ -152,11 +160,10 @@ public:
     void Insert(uint64_t val);
     int32_t GetReplaceOffsetUnsafe(uint64_t val);
     void InitTable();
-    void TryIncreaseIdCount(std::atomic<uint16_t> &val);
     void Clear(uint64_t newBucketCountPowerOf2);
     void NewParameter();
 
-    template <DataType T> uint32_t UniqueRaw(void *output, uint32_t priorTotal, int32_t *idCount)
+    template <DataType T> uint32_t UniqueRaw(void *output, uint32_t priorTotal)
     {
         uint32_t total = priorTotal;
         uint32_t replaceOffset = priorTotal;
@@ -168,19 +175,13 @@ public:
             }
             bucket->replaceBase = replaceOffset;
             for (int j = 0; j < bucket->count; ++j) {
-                if (idCountEnable_) {
-                    idCount[total] = bucket->idCount[j];
-                }
-                out[total++] = static_cast<typename Map<DataType::INT64>::type>(bucket->data[j]);
+                out[total++] = bucket->data[j];
             }
             replaceOffset += bucket->count;
         }
         auto it = overflow_.begin();
         int32_t totalOverflow = 0;
         while (it != overflow_.end()) {
-            if (idCountEnable_) {
-                idCount[total] = static_cast<int32_t>(idCountOverflow_[it->first]);
-            }
             out[total++] = it->first;
             it->second = replaceOffset++;
             ++it;
@@ -189,7 +190,7 @@ public:
 
         // set total overflow count
         stats_.totalUniques = static_cast<uint64_t>(total - priorTotal);
-        stats_.totalOverflowUniques = static_cast<uint64_t>(totalOverflow);
+        stats_.totalOverflowUniques = totalOverflow;
         return total - priorTotal;
     }
 
@@ -200,14 +201,13 @@ private:
     int largeCount_ { 0 };
     Meta<n> *table_ {};
     std::unordered_map<uint64_t, uint32_t> overflow_;
-    std::unordered_map<uint64_t, uint32_t> idCountOverflow_;
     SpinLockG overflowMutex_;
     Statistics stats_;
     bool idCountEnable_ { false };
 
     static inline uint64_t Hash(uint64_t val)
     {
-        return val ^ (val >> HASH_L_L) ^ (val >> HASH_L_L) ^ (val >> HASH_H);
+        return val ^ (val >> HASH_L_L) ^ (val >> HASH_L) ^ (val >> HASH_H);
     }
 
     void InsertOverflow(uint64_t val)
@@ -217,10 +217,6 @@ private:
         if (it == overflow_.end()) {
             overflow_[val] = 0;
         }
-
-        if (idCountEnable_) {
-            idCountOverflow_[val]++;
-        }
     }
 
     int32_t GetReplaceOffsetFromOverflowUnsafe(uint64_t val)
@@ -234,6 +230,7 @@ class ShardedDedup {
     static constexpr uint32_t K_MINIMAL_WORKLOAD_PER_WORKER = 1 << 13;
     static constexpr int K_DEFAULT_DUPLICATE_RATIO = 4;
     static constexpr int K_BUCKET_WIDTH = 4;
+    static constexpr int CLEAR_WAIT_TIME = 10;
 
 public:
     using DedupT = Dedup;
@@ -244,44 +241,45 @@ public:
     {
         const int numOfGroupsInShard = groupMethod_.GroupCount();
         uint32_t totalSize = conf.desiredSize + (conf.desiredSize >> 1);
-        while (bucketCountPower2_ * static_cast<uint32_t>(K_BUCKET_WIDTH) *
-        static_cast<uint32_t>(numOfGroupsInShard) * static_cast<uint32_t>(estimatedDuplicateRatio) < totalSize) {
+        while (bucketCountPower2_ * K_BUCKET_WIDTH * numOfGroupsInShard * estimatedDuplicateRatio < totalSize) {
             bucketCountPower2_ <<= 1;
         }
 
         idCountEnable_ = (conf.outputType == OutputType::ENHANCED) && conf.useIdCount;
-        try {
-            for (int32_t i = 0; i < numOfGroupsInShard; ++i) {
-                auto obj = new DedupT(bucketCountPower2_, numOfGroupsInShard, idCountEnable_);
-                dedupShards_.emplace_back(obj);
+        for (int32_t i = 0; i < numOfGroupsInShard; ++i) {
+            auto obj = new DedupT(bucketCountPower2_, numOfGroupsInShard, idCountEnable_);
+            if (obj == nullptr) {
+                ExternalLogger::PrintLog(LogLevel::ERROR, "creat object error");
+                throw NullptrError();
             }
-        } catch (const std::bad_alloc& e) {
-            ExternalLogger::PrintLog(LogLevel::ERROR, "Memory allocation failed during loop: " + std::string(e.what()));
-            throw;
+            dedupShards_.emplace_back(obj);
         }
     }
 
     ~ShardedDedup() = default;
 
-    void StartNewRound()
+    int StartNewRound()
     {
         for (auto &s : dedupShards_) {
             s->NewParameter();
         }
+        clearFinish_ = true;
+        return 0;
     }
 
 public:
     template <DataType T> int Compute(UniqueIn &uniqueIn, UniqueOutSelf &uniqueOut)
     {
-        try {
-            if (!firstEnterFlag_) {
-                StartNewRound();
-            }
-        } catch (AllocError &) {
-            ExternalLogger::PrintLog(LogLevel::ERROR, "memory alloc error");
-            return H_MEMORY_ALLOC_ERROR;
+        if (firstEnter_) {
+            pool_.SetNumThreads(1);
+            firstEnter_ = false;
         }
-        firstEnterFlag_ = false;
+
+        while (!clearFinish_) {
+            usleep(CLEAR_WAIT_TIME);
+        }
+
+        clearFinish_ = false;
         size_t threadNum = CalThreadNum();
         partSize = (uniqueIn.inputIdCnt + threadNum - 1) / threadNum;
 
@@ -304,23 +302,29 @@ public:
         if (conf.outputType == OutputType::ENHANCED) {
             int totalNumber = 0;
             for (int i = 0; i < conf.shardingNum; i++) {
-                totalUniqueSize[i] = static_cast<size_t>(totalNumber);
+                totalUniqueSize[i] = totalNumber;
                 if (conf.useSharding) {
                     totalNumber += uniqueOut.uniqueIdCntInBucket[i];
                 }
             }
         }
 
-        ret = CalUniqueOut<T>(uniqueIn, uniqueOut, totalUniqueSize);
+        int size = 1;
+        if (conf.useIdCount) {
+            size = conf.usePadding ? conf.paddingSize * conf.shardingNum : uniqueOut.uniqueIdCnt;
+        }
+        std::vector<std::atomic<int32_t>> idCount(size);
+        ret = CalUniqueOut<T>(uniqueIn, uniqueOut, totalUniqueSize, idCount);
         if (ret != H_OK) {
             ExternalLogger::PrintLog(LogLevel::ERROR, "CalUniqueOut ERROR");
             return ret;
         }
 
         if (conf.outputType == OutputType::ENHANCED) {
-            HandleTileAndFill<T>(uniqueIn, uniqueOut);
+            HandleTileAndFill<T>(uniqueOut, idCount);
         }
 
+        pool_.AddTask([this]() { return StartNewRound(); });
         return H_OK;
     }
 
@@ -336,17 +340,22 @@ private:
 
     int32_t GetFillOffset(const std::vector<size_t> &totalUniqueSize, int64_t val, int32_t group);
 
-    template <DataType T> int HandleTileAndFill(UniqueIn &uniqueIn, UniqueOutSelf &uniqueOut)
+    void GetIndexAndStart(const int32_t *uniqueSizeInBucket, bool usePadding, int shardingNumber, int &start,
+        int &index);
+
+    int PrintMemCpyLog(int rc, const uint32_t dstSize, const std::string &logMsg);
+
+    int HandleIdCountFill(std::vector<std::atomic<int32_t>> &idCount, UniqueOutSelf &uniqueOut);
+
+    template <DataType T> int HandleTileAndFill(UniqueOutSelf &uniqueOut, std::vector<std::atomic<int32_t>> &idCount)
     {
         int ret = H_OK;
         if (conf.useSharding) { // 使能shard
-            ret = TileAndFill<T>(uniqueOut.uniqueIdInBucket, uniqueOut.uniqueIdCntInBucket, uniqueOut.uniqueId,
-                uniqueOut.idCnt, uniqueOut.idCntFill);
+            ret = TileAndFill<T>(uniqueOut, uniqueOut.uniqueIdCntInBucket, idCount);
         } else if (!conf.useSharding && conf.useIdCount) { // 不使能shard和使能特征计数
             std::vector<int32_t> count;
             count.emplace_back(uniqueOut.uniqueIdCnt); // 记录去重后id个数
-            ret = TileAndFill<T>(uniqueOut.uniqueId, count.data(), uniqueOut.uniqueId, uniqueOut.idCnt,
-                uniqueOut.idCntFill);
+            ret = TileAndFill<T>(uniqueOut, count.data(), idCount);
         }
 
         if (ret != H_OK) {
@@ -365,37 +374,37 @@ private:
             uint64_t inGroupTotal;
             if (conf.outputType == OutputType::ENHANCED) {
                 if (conf.useSharding && conf.useIdCount) {
-                    inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueIdInBucket, total,
-                        uniqueOut.idCnt); // 特征计数使能和shard同时使能
-                    uniqueOut.uniqueIdCntInBucket[j] = static_cast<int32_t>(inGroupTotal);
+                    inGroupTotal =
+                        dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueIdInBucket, total); // 特征计数使能和shard同时使能
+                    uniqueOut.uniqueIdCntInBucket[j] = inGroupTotal;
                 } else if (!conf.useSharding && conf.useIdCount) {
-                    inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total,
-                        uniqueOut.idCnt); // 特征计数使能和shard不使能
+                    inGroupTotal =
+                        dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total); // 特征计数使能和shard不使能
                 } else if (conf.useSharding && !conf.useIdCount) {
-                    inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueIdInBucket, total,
-                        nullptr); // 特征计数使能和shard不使能
-                    uniqueOut.uniqueIdCntInBucket[j] = static_cast<int>(inGroupTotal);
+                    inGroupTotal =
+                        dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueIdInBucket, total); // 特征计数使能和shard不使能
+                    uniqueOut.uniqueIdCntInBucket[j] = inGroupTotal;
                 } else {
-                    inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total,
-                        nullptr); // 特征计数不使能和shard不使能，跟普通unique对等
+                    inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId,
+                        total); // 特征计数不使能和shard不使能，跟普通unique对等
                 }
             } else {
-                inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total, nullptr);
+                inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total);
             }
-            total += static_cast<int32_t>(inGroupTotal);
+            total += inGroupTotal;
         }
         uniqueOut.uniqueIdCnt = total;
     }
 
     template <DataType T>
-    int TileAndFill(void *uniqueIdInBucket, const int32_t *uniqueSizeInBucket, void *uniqueIds, const int32_t *idCnt,
-        int32_t *idCntFill)
+    int TileAndFill(UniqueOutSelf &uniqueOut, const int32_t *uniqueSizeInBucket,
+        std::vector<std::atomic<int32_t>> &idCount)
     {
         int start = 0;
         int index = 0;
 
-        auto uIdInBucket = TypeTrans<T>(uniqueIdInBucket);
-        auto uIds = TypeTrans<T>(uniqueIds);
+        auto uIdInBucket = TypeTrans<T>(conf.useSharding ? uniqueOut.uniqueIdInBucket : uniqueOut.uniqueId);
+        auto uIds = TypeTrans<T>(uniqueOut.uniqueId);
 
         for (int i = 0; i < conf.shardingNum; i++) {
             GetIndexAndStart(uniqueSizeInBucket, conf.usePadding, i, start, index);
@@ -419,35 +428,31 @@ private:
 
             if (conf.useIdCount && conf.usePadding) {
                 memSize = uniqueSizeInBucket[i] * sizeof(int32_t);
-                rc = memcpy_s(idCntFill + start, memSize, idCnt + index, memSize);
-                ret = PrintMemCpyLog(rc, memSize, "[TileAndFill/idCntFill]");
+                rc = memcpy_s(uniqueOut.idCnt + index, memSize, (int32_t *)(idCount.data()) + start,
+                    memSize); // 填充idCount
+                ret = PrintMemCpyLog(rc, memSize, "[TileAndFill/idCnt]");
+            }
+
+            if (ret != 0) {
+                return ret;
             }
+        }
+
+        if (conf.useIdCount) {
+            int ret = HandleIdCountFill(idCount, uniqueOut);
             if (ret != 0) {
                 return ret;
             }
         }
 
         if (conf.usePadding) {
-            HandleFill<T>(uIds, uniqueSizeInBucket, idCntFill);
+            HandleFill<T>(uIds, uniqueSizeInBucket);
         }
 
         return H_OK;
     }
 
-    int PrintMemCpyLog(int rc, const uint32_t dstSize, const std::string &logMsg)
-    {
-        if (rc != 0) {
-            std::stringstream ssm;
-            ssm << "[" << logMsg << "] memcpy_s failed... dstSize: " << dstSize;
-            ExternalLogger::PrintLog(LogLevel::ERROR, ssm.str());
-            return H_COPY_ERROR;
-        } else {
-            return H_OK;
-        }
-    }
-
-    template <DataType T>
-    void HandleFill(typename Map<T>::type *uIds, const int32_t *uniqueSizeInBucket, int32_t *idCntFill)
+    template <DataType T> void HandleFill(typename Map<T>::type *uIds, const int32_t *uniqueSizeInBucket)
     {
         int start = 0;
         int index = 0;
@@ -459,26 +464,6 @@ private:
             for (int j = 0; j < fillLen; j++) {
                 uIds[start + uniqueSizeInBucket[i] + j] = conf.paddingVal; // padding填充
             }
-
-            if (idCntFill != nullptr) {
-                for (int y = 0; y < fillLen; y++) {
-                    idCntFill[start + uniqueSizeInBucket[i] + y] = 0; // 特征计数填充
-                }
-            }
-        }
-    }
-
-    void GetIndexAndStart(const int32_t *uniqueSizeInBucket, bool usePadding, int shardingNumber, int &start,
-        int &index)
-    {
-        if (shardingNumber > 0) {
-            index += uniqueSizeInBucket[shardingNumber - 1];
-        }
-
-        if (usePadding) {
-            start = shardingNumber * conf.paddingSize;
-        } else {
-            start = index;
         }
     }
 
@@ -493,13 +478,18 @@ private:
             tasks.push_back([this, val, start, end, &ret]() {
                 for (uint64_t j = start; j < end; ++j) {
                     auto value = val[j];
-                    if (value > conf.maxIdVal) {
+                    if (UNLIKELY(value > conf.maxIdVal)) {
                         ExternalLogger::PrintLog(LogLevel::ERROR, "id val is larger than maxIdVal");
                         ret = H_ID_LARGE;
                         break;
                     }
-                    auto group = groupMethod_.GroupId(value);
-                    dedupShards_[group]->Insert(value);
+
+                    if (conf.performance) {
+                        dedupShards_[value & (conf.shardingNum - 1)]->Insert(value);
+                    } else {
+                        auto group = groupMethod_.GroupId(value);
+                        dedupShards_[group]->Insert(value);
+                    }
                 }
             });
         }
@@ -520,31 +510,46 @@ private:
     }
 
     template <DataType T>
-    int CalUniqueOut(UniqueIn &uniqueIn, UniqueOutSelf &uniqueOut, std::vector<size_t> &totalUniqueSize)
+    int CalUniqueOut(UniqueIn &uniqueIn, UniqueOutSelf &uniqueOut, std::vector<size_t> &totalUniqueSize,
+        std::vector<std::atomic<int32_t>> &idCount)
     {
         uint32_t *beginPtr = uniqueOut.index;
         uint32_t *finishPtr = beginPtr + uniqueIn.inputIdCnt;
         uint32_t *partBeginPtr = beginPtr;
-        auto alignedAddress = CacheLineAlign(reinterpret_cast<uintptr_t>(partBeginPtr + partSize));
-        auto *partEndPtr = reinterpret_cast<uint32_t *>(static_cast<uintptr_t>(alignedAddress));
+        auto *partEndPtr =
+            reinterpret_cast<uint32_t *>(CacheLineAlign(reinterpret_cast<uintptr_t>(partBeginPtr + partSize)));
         std::vector<std::function<void()>> tasks;
         auto val = TypeTrans<T>(uniqueIn.inputId);
         while (partBeginPtr < finishPtr) {
             if (partEndPtr > finishPtr) {
                 partEndPtr = finishPtr;
             }
-            if (partBeginPtr < partEndPtr) {
-                // Due to cacheline alignment computation, the actual number of
-                // threads created here may not match threadNum exactly but
-                // should be +/-1 off.
-                tasks.push_back([this, val, beginPtr, partBeginPtr, partEndPtr, totalUniqueSize]() {
-                    for (uint32_t *ptr = partBeginPtr; ptr < partEndPtr; ++ptr) {
+
+            if (partBeginPtr >= partEndPtr) {
+                partBeginPtr = partEndPtr;
+                partEndPtr += partSize;
+                continue;
+            }
+
+            // Due to cacheline alignment computation, the actual number of
+            // threads created here may not match threadNum exactly but
+            // should be +/-1 off.
+            tasks.push_back([this, val, beginPtr, partBeginPtr, partEndPtr, totalUniqueSize, &idCount]() {
+                for (uint32_t *ptr = partBeginPtr; ptr < partEndPtr; ++ptr) {
+                    int32_t fillOffset;
+                    if (conf.performance) {
+                        fillOffset = GetFillOffset(totalUniqueSize, val[ptr - beginPtr],
+                            val[ptr - beginPtr] & (conf.shardingNum - 1));
+                    } else {
                         auto group = groupMethod_.GroupId(val[ptr - beginPtr]);
-                        int32_t fillOffset = GetFillOffset(totalUniqueSize, val[ptr - beginPtr], group);
-                        *ptr = fillOffset;
+                        fillOffset = GetFillOffset(totalUniqueSize, val[ptr - beginPtr], group);
                     }
-                });
-            }
+                    *ptr = fillOffset;
+                    if (LIKELY(conf.useIdCount)) {
+                        idCount[fillOffset]++;
+                    }
+                }
+            });
             partBeginPtr = partEndPtr;
             partEndPtr += partSize;
         }
@@ -569,8 +574,10 @@ private:
     UniqueConf conf;
     std::vector<std::unique_ptr<DedupT>> dedupShards_ {};
     uint32_t partSize;
-    bool firstEnterFlag_ = false;
+    bool clearFinish_ = true;
     bool idCountEnable_ { false };
+    ThreadPoolAsync pool_;
+    bool firstEnter_ = true;
 };
 }
 }
diff --git a/src/AccCTR/src/unique/unique_impl.cpp b/src/AccCTR/src/unique/unique_impl.cpp
index 77113214..800f21de 100644
--- a/src/AccCTR/src/unique/unique_impl.cpp
+++ b/src/AccCTR/src/unique/unique_impl.cpp
@@ -228,6 +228,14 @@ int UniqueImpl::CheckEnhancedUniqueConf(const UniqueConf &conf)
         if (CheckInputZero(conf.shardingNum, "shardingNum")) {
             return H_NUM_SMALL;
         }
+        if (conf.performance) {
+            bool isExponentOfTwo =
+                (conf.shardingNum > 0) && ((conf.shardingNum & (conf.shardingNum - 1)) == 0); // 判断是不是2的N次幂
+            if (!isExponentOfTwo) {
+                ExternalLogger::PrintLog(LogLevel::ERROR, "if performance is true, shardingNum must be 2^N");
+                return H_ERROR;
+            }
+        }
     }
 
     return H_OK;
diff --git a/src/AccCTR/src/unique/unique_impl.h b/src/AccCTR/src/unique/unique_impl.h
index f4c45fde..e37a58db 100644
--- a/src/AccCTR/src/unique/unique_impl.h
+++ b/src/AccCTR/src/unique/unique_impl.h
@@ -43,7 +43,7 @@ private:
 
 private:
     ShardedDedup *unique = nullptr;
-    UniqueConf uniqueConf {};
+    UniqueConf uniqueConf{};
 };
 }
 }
diff --git a/src/AccCTR/tests/tools/create_fake_id.py b/src/AccCTR/tests/tools/create_fake_id.py
index fc0f1f8e..aa42f071 100644
--- a/src/AccCTR/tests/tools/create_fake_id.py
+++ b/src/AccCTR/tests/tools/create_fake_id.py
@@ -68,12 +68,6 @@ def write_data(file_name, x, y, dup):
 
 
 def main():
-    # 300w id去重率20%
-    # 6x + y =300
-    # x + y = 60
-    # x = 48 y =12
-    write_data('data20.txt', 48*10000, 12*10000, 6)
-
     # 300w id去重率30%
     # 6x + y =300
     # x + y = 90
diff --git a/src/AccCTR/tests/ut/conf/toolchain.cmake b/src/AccCTR/tests/ut/conf/toolchain.cmake
new file mode 100644
index 00000000..bd6617e4
--- /dev/null
+++ b/src/AccCTR/tests/ut/conf/toolchain.cmake
@@ -0,0 +1,24 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# 添加编译选项
+option(USE32BIT "Use 32-Bit" OFF)
+if(USE32BIT)
+    add_compile_options(-m32)
+    add_link_options(-m32)
+endif()
+
+add_compile_options(-Wall)
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 11)
\ No newline at end of file
diff --git a/src/AccCTR/tests/ut/src/CMakeLists.txt b/src/AccCTR/tests/ut/src/CMakeLists.txt
index a4c631e8..3da58244 100644
--- a/src/AccCTR/tests/ut/src/CMakeLists.txt
+++ b/src/AccCTR/tests/ut/src/CMakeLists.txt
@@ -19,6 +19,11 @@ set(OCK_CTR_UTIL_INSTALL_DIR        ${PROJECT_SOURCE_DIR}/install)
 set(OCK_CTR_SRC_DIR ${PROJECT_SOURCE_DIR}/src)
 message("src" ${OCK_CTR_SRC_DIR})
 
+# 包含所有组件的cmake
+include("${CMAKE_CURRENT_SOURCE_DIR}/../conf/toolchain.cmake")
+set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../src)
+set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../)
+
 file(GLOB_RECURSE TEST_UNIQUE_FILES *.cpp *.h)
 add_executable(test_unique_files ${TEST_UNIQUE_FILES})
 include_directories(${OCK_CTR_UTIL_INSTALL_DIR}/googletest-release-1.8.1/include)
@@ -29,17 +34,36 @@ SET(LIB_3RD_GTEST ${OCK_CTR_UTIL_INSTALL_DIR}/googletest-release-1.8.1/lib64/lib
 
 
 message(${OCK_CTR_SRC_DIR}/include)
+# 添加库文件的搜索路径
+target_link_directories(test_unique_files
+        PUBLIC
+        ${PROJECT_SOURCE_DIR}/output/ock_ctr_common/lib
+        )
 
+# 添加头文件的搜索路径
 target_include_directories(test_unique_files
         PUBLIC
-        ${OCK_CTR_SRC_DIR}/include)
+        ${OCK_CTR_SRC_DIR}/include
+        ${PROJECT_SOURCE_DIR}
+        ${OCK_CTR_SRC_DIR}/common/util
+        )
 
+# 用来指定要链接的库
 target_link_libraries(test_unique_files
         PUBLIC
         -Wl,--start-group
+        _ock_ctr_common
         pthread
         dl
         ${LIB_3RD_GTEST}
         ${LIB_3RD_GMOCK}
         -Wl,--end-group)
 
+# 打印构建选项
+get_target_property(COMPILE_FLAGS test_unique_files COMPILE_OPTIONS)
+get_target_property(LINK_FLAGS test_unique_files LINK_OPTIONS)
+message(STATUS "Compiler id: ${CMAKE_CXX_COMPILER_ID}")
+message(STATUS "Compile flags: ${COMPILE_FLAGS}")
+message(STATUS "Link flags: ${LINK_FLAGS}")
+message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")
+
diff --git a/src/AccCTR/tests/ut/src/common.h b/src/AccCTR/tests/ut/src/common.h
new file mode 100644
index 00000000..7302d10c
--- /dev/null
+++ b/src/AccCTR/tests/ut/src/common.h
@@ -0,0 +1,64 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef CTR_COMMON_H
+#define CTR_COMMON_H
+#include <iostream>
+
+#include "factory.h"
+
+extern ock::ctr::FactoryPtr factory;
+
+enum CTRLogLevel {
+    DEBUG = 0,
+    INFO,
+    WARN,
+    ERROR,
+};
+
+class SimpleThreadPool {
+public:
+    static void SyncRun(const std::vector<std::function<void()>> &tasks)
+    {
+        std::vector<std::future<void>> futs;
+        for (auto &task : tasks) {
+            futs.push_back(std::async(task));
+        }
+        for (auto &fut : futs) {
+            fut.wait();
+        }
+    }
+};
+
+static void CTRLog(int level, const char *msg)
+{
+    switch (level) {
+        case CTRLogLevel::DEBUG:
+            std::cout << "DEBUG:" << msg << std::endl;
+            break;
+        case CTRLogLevel::INFO:
+            std::cout << "INFO:" << msg << std::endl;
+            break;
+        case CTRLogLevel::WARN:
+            std::cout << "WARN:" << msg << std::endl;
+            break;
+        case CTRLogLevel::ERROR:
+            std::cout << "ERROR:" << msg << std::endl;
+            break;
+        default:
+            break;
+    }
+}
+
+#endif // CTR_COMMON_H
diff --git a/src/AccCTR/tests/ut/src/emb_cache_test.cpp b/src/AccCTR/tests/ut/src/emb_cache_test.cpp
new file mode 100644
index 00000000..dda5423c
--- /dev/null
+++ b/src/AccCTR/tests/ut/src/emb_cache_test.cpp
@@ -0,0 +1,1999 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#include <sstream>
+#include <cmath>
+
+#include "common/util/error_code.h"
+#include "emb_cache_test.h"
+#include "common.h"
+
+using namespace std;
+using namespace ock::ctr;
+
+FactoryPtr factory;
+EmbCacheManagerPtr embCache = nullptr;
+
+std::vector<uint64_t> GenKeys(uint64_t n, uint32_t seed = 0, uint64_t min = 0, uint64_t max = UINT64_MAX)
+{
+    std::mt19937 generator(seed);
+    std::uniform_int_distribution<uint64_t> distribution(min, max);
+    std::vector<uint64_t> data(n);
+    for (uint64_t &x : data) {
+        x = distribution(generator);
+    }
+    sort(data.begin(), data.end());
+    data.erase(unique(data.begin(), data.end()), data.end());
+    return data;
+}
+
+std::vector<uint64_t> GenUniqueKeys(uint64_t n)
+{
+    std::vector<uint64_t> data(n);
+    for (uint64_t i = 0; i < n; i++) {
+        data[i] = i;
+    }
+    return data;
+}
+
+EmbCacheManagerPtr EmbCacheTest::SimpleCreateTable(std::string tableName, uint32_t hostVocabSize,
+    uint32_t embeddingSize, uint32_t extEmbeddingSize, uint32_t devVocabSize, pair<float, float> normalPara,
+    float constPara)
+{
+    factory->CreateEmbCacheManager(embCache);
+    EmbCache::EmbCacheInfo embCacheInfo(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    EmbCache::NormalInitializerInfo normalInitializerInfo(normalPara.first, normalPara.second, 0, 1.0);
+    std::string normalInitializeName = "random_normal_initializer";
+    EmbCache::InitializerInfo normalInitializeInfo(normalInitializeName, 0, embeddingSize, normalInitializerInfo);
+
+    EmbCache::ConstantInitializerInfo constantInitializerInfo(constPara, 1.0);
+    std::string constantInitializeName = "constant_initializer";
+
+    std::vector<EmbCache::InitializerInfo> initializeInfos(extEmbeddingSize / embeddingSize);
+    initializeInfos[0] = normalInitializeInfo;
+    for (uint64_t i = 1; i < initializeInfos.size(); i++) {
+        initializeInfos[i] = EmbCache::InitializerInfo(constantInitializeName, embeddingSize * i, embeddingSize,
+            constantInitializerInfo);
+    }
+    int ret = embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize, 1);
+    if (ret != H_OK) {
+        string msg = "CreateCacheForTable Failed. ret: " + std::to_string(ret);
+        CTRLog(CTRLogLevel::ERROR, msg.c_str());
+        return nullptr;
+    }
+    return embCache;
+}
+
+EmbCacheManagerPtr EmbCacheTest::ConstZeroCreateTable(std::string tableName, uint32_t hostVocabSize,
+    uint32_t embeddingSize, uint32_t extEmbeddingSize, uint32_t devVocabSize, uint64_t prefillBufferSize,
+    uint8_t prefillThreadNum)
+{
+    factory->CreateEmbCacheManager(embCache);
+    EmbCache::EmbCacheInfo embCacheInfo(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    EmbCache::ConstantInitializerInfo constantInitializerInfo(0.0, 1.0);
+    std::string constantInitializeName = "constant_initializer";
+
+    std::vector<EmbCache::InitializerInfo> initializeInfos = { EmbCache::InitializerInfo(constantInitializeName, 0,
+        extEmbeddingSize, constantInitializerInfo) };
+    int ret = embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, prefillBufferSize, prefillThreadNum);
+    if (ret != H_OK) {
+        string msg = "CreateCacheForTable Failed. ret: " + std::to_string(ret);
+        CTRLog(CTRLogLevel::ERROR, msg.c_str());
+        return nullptr;
+    }
+    return embCache;
+}
+
+void EmbCacheTest::SetUpTestCase()
+{
+    Factory::Create(factory);
+    factory->SetExternalLogFuncInner(CTRLog);
+}
+
+void EmbCacheTest::TearDownTestCase() {}
+
+void EmbCacheTest::SetUp() {}
+
+void EmbCacheTest::TearDown()
+{
+    if (embCache != nullptr) {
+        embCache->Destroy();
+        embCache = nullptr;
+    }
+}
+
+TEST_F(EmbCacheTest, ConstantInitializerInfo)
+{
+    CTRLog(CTRLogLevel::INFO, "===========ConstantInitializerInfo start=============");
+
+    // 正确初始化ConstantInitializerInfo结构体，无日志信息反馈
+    EmbCache::ConstantInitializerInfo constantInitializerInfo(0.233, 1.0);
+    CTRLog(CTRLogLevel::INFO, "===========ConstantInitializerInfo end=============");
+}
+
+TEST_F(EmbCacheTest, NormalInitializerInfo)
+{
+    CTRLog(CTRLogLevel::INFO, "===========NormalInitializerInfo start=============");
+    // 正确初始化NormalInitializerInfo结构体，无日志信息反馈
+    EmbCache::NormalInitializerInfo normalInitializerInfo(0, 0.05, 0, 1.0);
+    // 标准差负值数学意义不明，传入负值问题用户自己承担
+    EmbCache::NormalInitializerInfo normalInitializerInfo_ne_dev(0, -0.05, 0, 1.0);
+    CTRLog(CTRLogLevel::INFO, "===========NormalInitializerInfo end=============");
+}
+
+TEST_F(EmbCacheTest, InitializerInfo)
+{
+    CTRLog(CTRLogLevel::INFO, "===========InitializerInfo start=============");
+    uint32_t embeddingSize = 13;
+
+    EmbCache::NormalInitializerInfo normalInitializerInfo(0, 0.05, 0, 1.0);
+    EmbCache::ConstantInitializerInfo constantInitializerInfo(0.233, 1.0);
+
+    // 传入的std::string不为"constant_initializer" 日志打印"Invalid Initializer Type."
+    std::string not_a_initializer_name = "not_a_initializer_name";
+    EmbCache::InitializerInfo constantInitializeInfo =
+        EmbCache::InitializerInfo(not_a_initializer_name, embeddingSize, embeddingSize, constantInitializerInfo);
+
+    // 传入的std::string不为"constant_initializer" 日志打印"Invalid Initializer Type."
+    not_a_initializer_name = "";
+    constantInitializeInfo =
+        EmbCache::InitializerInfo(not_a_initializer_name, embeddingSize, embeddingSize, constantInitializerInfo);
+
+    // 正确初始化InitializeInfo结构体，无日志信息反馈
+    std::string constantInitializeName = "constant_initializer";
+    constantInitializeInfo =
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize, embeddingSize + 1, constantInitializerInfo);
+
+    // 传入的std::string不为"random_normal_initializer"或truncated_normal_initializer 日志打印"Invalid Initializer
+    // Type."
+    not_a_initializer_name = "not_a_initializer_name";
+    EmbCache::InitializerInfo normalInitializeInfo =
+        EmbCache::InitializerInfo(not_a_initializer_name, embeddingSize, embeddingSize, normalInitializerInfo);
+
+    // 传入的std::string不为"random_normal_initializer"或truncated_normal_initializer 日志打印"Invalid Initializer
+    // Type."
+    not_a_initializer_name = "";
+    normalInitializeInfo =
+        EmbCache::InitializerInfo(not_a_initializer_name, embeddingSize, embeddingSize, normalInitializerInfo);
+
+    // 正确初始化InitializeInfo结构体，无日志信息反馈
+    std::string normalInitializeName = "random_normal_initializer";
+    normalInitializeInfo = EmbCache::InitializerInfo(normalInitializeName, 0, embeddingSize, normalInitializerInfo);
+
+    // 正确初始化InitializeInfo结构体，无日志信息反馈
+    std::string truncatedNormalInitializeName = "truncated_normal_initializer";
+    EmbCache::InitializerInfo truncatedNormalInitializeInfo =
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, 0, embeddingSize, normalInitializerInfo);
+
+    CTRLog(CTRLogLevel::INFO, "===========InitializerInfo end=============");
+}
+
+TEST_F(EmbCacheTest, EmbCacheInfo)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EmbCacheInfo start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    // 正确初始化EmbCacheInfo结构体，无日志信息反馈
+    EmbCache::EmbCacheInfo embCacheInfo(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    CTRLog(CTRLogLevel::INFO, "===========EmbCacheInfo end=============");
+}
+
+TEST_F(EmbCacheTest, CreateCacheForTable)
+{
+    factory->CreateEmbCacheManager(embCache);
+    CTRLog(CTRLogLevel::INFO, "===========CreateCacheForTable start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    EmbCache::EmbCacheInfo embCacheInfo(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, {}, -1, hostVocabSize), H_INITIALIZER_INVALID);
+
+    EmbCache::NormalInitializerInfo normalInitializerInfo(0, 0.05, 0, 1.0);
+    std::string normalInitializeName = "random_normal_initializer";
+    EmbCache::InitializerInfo normalInitializeInfo(normalInitializeName, 0, embeddingSize, normalInitializerInfo);
+
+    // 空initializer 日志打印出"Initializer is nullptr"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, { {}, {} }, -1, hostVocabSize), H_INITIALIZER_INVALID);
+
+    normalInitializeInfo.initializer = nullptr;
+    // 空initializer 日志打印出"Initializer is nullptr"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, { normalInitializeInfo }, -1, hostVocabSize),
+        H_INITIALIZER_INVALID);
+
+    normalInitializeInfo = EmbCache::InitializerInfo(normalInitializeName, 0, embeddingSize, normalInitializerInfo);
+    EmbCache::ConstantInitializerInfo constantInitializerInfo(0.233, 1.0);
+    std::string constantInitializeName = "constant_initializer";
+    EmbCache::InitializerInfo constantInitializeInfo(constantInitializeName, embeddingSize, embeddingSize + 1,
+        constantInitializerInfo);
+    std::vector<EmbCache::InitializerInfo> initializeInfos = { normalInitializeInfo, constantInitializeInfo };
+
+    // initializerInfos的区间之间有重叠或者遗漏 日志打印出"Initializers got coverage problems"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_INITIALIZER_INVALID);
+
+    constantInitializeInfo =
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize + 1, embeddingSize, constantInitializerInfo);
+    initializeInfos = { normalInitializeInfo, constantInitializeInfo };
+    // initializerInfos的区间之间有重叠或者遗漏 日志打印出"Initializers got coverage problems"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_INITIALIZER_INVALID);
+
+
+    embCacheInfo.extEmbeddingSize = extEmbeddingSize;
+    std::string not_a_initializer_name = "not_a_initializer_name";
+    constantInitializeInfo =
+        EmbCache::InitializerInfo(not_a_initializer_name, embeddingSize, embeddingSize, constantInitializerInfo);
+    initializeInfos = { normalInitializeInfo, constantInitializeInfo };
+
+    // 传入的Initializer的name不符要求 日志打印出"Invalid Initializer Type.\nInitializer is nullptr"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_INITIALIZER_INVALID);
+
+    constantInitializeInfo =
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize, embeddingSize, constantInitializerInfo);
+    initializeInfos = { normalInitializeInfo, constantInitializeInfo };
+
+    embCacheInfo.extEmbeddingSize++;
+
+    // 传入的embInfo中的传入的extEmbeddingSize并非embeddingSize的整数倍 日志打印出"extEmbeddingSize = embeddingSize +
+    // optimizerSize, which is divisible by embeddingSize"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize),
+        H_EXT_EMBEDDING_SIZE_INVALID);
+
+    embCacheInfo.maxCacheSize = 100;
+    // maxCacheSize>vocabSize 日志打印出"vocabSize must be greater than or equal to maxCacheSize"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize),
+        H_HOST_VOCAB_SIZE_TOO_SMALL);
+    embCacheInfo.maxCacheSize = devVocabSize;
+
+    embCacheInfo.extEmbeddingSize = 0;
+    // extEmbeddingSize为0 日志打印出"size must be positive"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_SIZE_ZERO);
+    embCacheInfo.extEmbeddingSize = extEmbeddingSize;
+
+    embCacheInfo.embeddingSize = 0;
+    // embeddingSize为0 日志打印出"size must be positive"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_SIZE_ZERO);
+    embCacheInfo.embeddingSize = embeddingSize;
+
+    embCacheInfo.vocabSize = 0;
+    // vocabSize为0 日志打印出"size must be positive"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_SIZE_ZERO);
+    embCacheInfo.vocabSize = hostVocabSize;
+
+    embCacheInfo.maxCacheSize = 0;
+    // maxCacheSize为0 日志打印出"size must be positive"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_SIZE_ZERO);
+    embCacheInfo.maxCacheSize = devVocabSize;
+
+    embCacheInfo.tableName = "";
+    // 传入的tableName空 日志打印出"tableName can not be empty"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_TABLE_NAME_EMPTY);
+
+    embCacheInfo.tableName =
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "0000000001000000000100000000010001";
+    // 传入的tableName长度正好为长度上限1024
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_OK);
+
+    embCacheInfo.tableName =
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100012";
+    // 传入的tableName长度为1025超过了长度上限
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_TABLE_NAME_TOO_LONG);
+    embCacheInfo.tableName = tableName;
+
+    // 正常创建 日志中不会打印异常信息
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_OK);
+
+    // 重复创建同名Table 日志打印出"This table has already been created"
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize),
+        H_TABLE_CREATE_DUPLICATE);
+    embCache->Destroy();
+
+    // Destroy后仍能正常创建 日志中不会打印异常信息
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize), H_OK);
+    embCache->Destroy();
+
+    // prefill单线程
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, 3, 1), H_OK);
+    embCache->Destroy();
+
+    // prefill多线程
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, 3, 3), H_OK);
+    embCache->Destroy();
+
+    // prefill多线程
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, 3, 0), H_THREAD_NUM_ERROR);
+    embCache->Destroy();
+
+    // prefill过多线程
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, 3, 10000), H_THREAD_NUM_ERROR);
+    embCache->Destroy();
+
+    // prefill 正常buffersize
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, 3, 1), H_OK);
+    embCache->Destroy();
+
+    // prefill 超大buffersize
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, 10, 1), H_PREFILL_BUFFER_SIZE_INVALID);
+    embCache->Destroy();
+
+    // prefill 0buffersize
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, 0, 1), H_PREFILL_BUFFER_SIZE_INVALID);
+    CTRLog(CTRLogLevel::INFO, "===========CreateCacheForTable end=============");
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_LOOKUP_ADDRS)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_ADDRS start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> lookupKeys;
+    std::vector<float *> addrs;
+
+    lookupKeys = { 0, 1, 2, 3, 4 };
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs), H_OK);
+
+    // lookupkeys 为空
+    lookupKeys = {};
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs), H_OK);
+
+    lookupKeys = { 0 };
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs("not_a_table", lookupKeys, addrs), H_TABLE_NOT_EXIST);
+
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tooLongTableName, lookupKeys, addrs), H_TABLE_NAME_TOO_LONG);
+
+    lookupKeys = { 5 };
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs), H_HOST_VOCAB_SIZE_TOO_SMALL);
+
+    lookupKeys = { 5 };
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs, 1), H_HOST_VOCAB_SIZE_TOO_SMALL);
+
+    lookupKeys = { 0, 1, 4 };
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs), H_OK);
+
+    lookupKeys = { 0, 1, 4 };
+    uint32_t threadNum = std::thread::hardware_concurrency();
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs, threadNum + 1), H_THREAD_NUM_ERROR);
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs, threadNum), H_OK);
+    // 单线程lookup
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs, 1), H_OK);
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs, 0), H_THREAD_NUM_ERROR);
+    embCache->Destroy();
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_LOOKUP_ADDRS_DATA)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_ADDRS_DATA start=============");
+    factory->CreateEmbCacheManager(embCache);
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 3000000;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 39;
+    uint32_t devVocabSize = 100000;
+    EmbCache::EmbCacheInfo embCacheInfo(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::string normalInitializeName = "random_normal_initializer";
+    std::string constantInitializeName = "constant_initializer";
+    EmbCache::NormalInitializerInfo normalInitializerInfo(0, 0.05, 0, 1.0);
+    EmbCache::ConstantInitializerInfo constantInitializerInfo(0.233, 1.0);
+
+    std::string truncatedNormalInitializeName = "truncated_normal_initializer";
+    // 加入所有初始化器的所有分支
+    std::vector<EmbCache::InitializerInfo> initializeInfos = {
+        EmbCache::InitializerInfo(normalInitializeName, 0, embeddingSize, normalInitializerInfo),
+        EmbCache::InitializerInfo(normalInitializeName, embeddingSize, 0, normalInitializerInfo),
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize, embeddingSize, constantInitializerInfo),
+        EmbCache::InitializerInfo(constantInitializeName, 2 * embeddingSize, 0, constantInitializerInfo),
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, 2 * embeddingSize, embeddingSize,
+        normalInitializerInfo),
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, 3 * embeddingSize, 0, normalInitializerInfo),
+    };
+    // 正确创建
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos), H_OK);
+    std::vector<uint64_t> lookupKeys;
+    std::vector<float *> addrs;
+    lookupKeys = GenKeys(hostVocabSize, 123321);
+    ASSERT_EQ(embCache->EmbeddingLookupAddrs(tableName, lookupKeys, addrs), H_OK);
+
+    long double sum = 0.0;
+    long double cnt = 0.0;
+    long double accum = 0.0;
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        // normalInitializer 生成数据
+        for (uint32_t j = 0; j < embeddingSize; j++) {
+            sum += addrs[i][j];
+            cnt++;
+        }
+
+        // constantInitializer 生成数据
+        for (uint32_t j = embeddingSize; j < 2 * embeddingSize; j++) {
+            ASSERT_LE(std::abs(addrs[i][j] - 0.233), 1e-6f);
+        }
+        // truncatedNormalInitializer 生成数据
+        for (uint32_t j = 2 * embeddingSize; j < 3 * embeddingSize; j++) {
+            // 在[-2*stddev, 2*stddev]范围中
+            ASSERT_LE(std::abs(addrs[i][j]), 0.1f + 1e-6f);
+        }
+    }
+
+    long double mean = sum / cnt;
+    for (uint32_t i = 0; i < lookupKeys.size(); ++i) {
+        for (uint32_t j = 0; j < embeddingSize; j++) {
+            accum += (addrs[i][j] - mean) * (addrs[i][j] - mean);
+        }
+    }
+    long double stdev = sqrt(accum / cnt);
+    ASSERT_LE(std::abs(mean), 5e-6f);
+    ASSERT_LE(std::abs(stdev - 0.05), 5e-6f);
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_ADDRS_DATA end=============");
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_LOOKUP_300W)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_300W start=============");
+    factory->CreateEmbCacheManager(embCache);
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 3000000;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 39;
+    uint32_t devVocabSize = 100000;
+    EmbCache::EmbCacheInfo embCacheInfo(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::string normalInitializeName = "random_normal_initializer";
+    std::string constantInitializeName = "constant_initializer";
+    EmbCache::NormalInitializerInfo normalInitializerInfo(0, 0.05, 0, 1.0);
+    EmbCache::ConstantInitializerInfo constantInitializerInfo(0.233, 1.0);
+
+    std::string truncatedNormalInitializeName = "truncated_normal_initializer";
+    // 加入所有初始化器的所有分支
+    std::vector<EmbCache::InitializerInfo> initializeInfos = {
+        EmbCache::InitializerInfo(normalInitializeName, 0, embeddingSize, normalInitializerInfo),
+        EmbCache::InitializerInfo(normalInitializeName, embeddingSize, 0, normalInitializerInfo),
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize, embeddingSize, constantInitializerInfo),
+        EmbCache::InitializerInfo(constantInitializeName, 2 * embeddingSize, 0, constantInitializerInfo),
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, 2 * embeddingSize, embeddingSize,
+        normalInitializerInfo),
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, 3 * embeddingSize, 0, normalInitializerInfo),
+    };
+    // 正确创建
+    ASSERT_EQ(embCache->CreateCacheForTable(embCacheInfo, initializeInfos), H_OK);
+    std::vector<uint64_t> lookupKeys;
+    float *addr;
+    lookupKeys = GenKeys(hostVocabSize, 123321);
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+
+    long double sum = 0.0;
+    long double cnt = 0.0;
+    long double accum = 0.0;
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        // normalInitializer 生成数据
+        for (uint32_t j = 0; j < embeddingSize; j++) {
+            sum += addr[i * extEmbeddingSize + j];
+            cnt++;
+        }
+
+        // constantInitializer 生成数据
+        for (uint32_t j = embeddingSize; j < 2 * embeddingSize; j++) {
+            ASSERT_LE(std::abs(addr[i * extEmbeddingSize + j] - 0.233), 1e-6f);
+        }
+        // truncatedNormalInitializer 生成数据
+        for (uint32_t j = 2 * embeddingSize; j < 3 * embeddingSize; j++) {
+            // 在[-2*stddev, 2*stddev]范围中
+            ASSERT_LE(std::abs(addr[i * extEmbeddingSize + j]), 0.1f + 1e-6f);
+        }
+    }
+
+    long double mean = sum / cnt;
+    for (uint32_t i = 0; i < lookupKeys.size(); ++i) {
+        for (uint32_t j = 0; j < embeddingSize; j++) {
+            accum += (addr[i * extEmbeddingSize + j] - mean) * (addr[i * extEmbeddingSize + j] - mean);
+        }
+    }
+    long double stdev = sqrt(accum / cnt);
+    ASSERT_LE(std::abs(mean), 5e-6f);
+    ASSERT_LE(std::abs(stdev - 0.05), 5e-6f);
+    free(addr);
+    CTRLog(CTRLogLevel::INFO, "===========GenerateData end=============");
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_LOOKUP_AND_REMOVE)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_AND_REMOVE start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> lookupKeys;
+    float *addr;
+
+    lookupKeys = { 0, 1, 2, 3, 4 };
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+
+    // lookupkeys 为空
+    lookupKeys = {};
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+
+    lookupKeys = { 0 };
+    addr = nullptr;
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove("not_a_table", lookupKeys, addr), H_TABLE_NOT_EXIST);
+
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tooLongTableName, lookupKeys, addr), H_TABLE_NAME_TOO_LONG);
+
+    lookupKeys = { 0 };
+    addr = nullptr;
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr), H_ADDRESS_NULL);
+
+    lookupKeys = { 0, 1, 4 };
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    uint32_t threadNum = std::thread::hardware_concurrency();
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr, threadNum + 1), H_THREAD_NUM_ERROR);
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr, threadNum), H_OK);
+    // 单线程lookup
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr, 1), H_OK);
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr, 0), H_THREAD_NUM_ERROR);
+    free(addr);
+    embCache->Destroy();
+
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_AND_REMOVE end=============");
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_LOOKUP_AND_REMOVE_2)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_AND_REMOVE_2 start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 200;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> lookupKeys;
+    float *addr;
+
+    for (int i = 0; i < 100; i++) {
+        for (int j = 0; j < 2; j++) {
+            lookupKeys.emplace_back(i);
+        }
+    }
+
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr, 1), H_OK);
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+    embCache->Destroy();
+
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_AND_REMOVE_2 end=============");
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_LOOKUP)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> lookupKeys;
+    float *addr;
+
+    lookupKeys = { 0, 1, 2, 3, 4 };
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+
+    // lookupkeys 为空
+    lookupKeys = {};
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+
+    lookupKeys = { 0 };
+    addr = nullptr;
+    ASSERT_EQ(embCache->EmbeddingLookup("not_a_table", lookupKeys, addr), H_TABLE_NOT_EXIST);
+
+    ASSERT_EQ(embCache->EmbeddingLookup(tooLongTableName, lookupKeys, addr), H_TABLE_NAME_TOO_LONG);
+
+    lookupKeys = { 5 };
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_HOST_VOCAB_SIZE_TOO_SMALL);
+    free(addr);
+
+    lookupKeys = { 0 };
+    addr = nullptr;
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_ADDRESS_NULL);
+
+    lookupKeys = { 0, 1, 4 };
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+
+    lookupKeys = { 0, 1, 4 };
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    uint32_t threadNum = std::thread::hardware_concurrency();
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr, threadNum + 1), H_THREAD_NUM_ERROR);
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr, threadNum), H_OK);
+    // 单线程lookup
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr, 1), H_OK);
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr, 0), H_THREAD_NUM_ERROR);
+    free(addr);
+    embCache->Destroy();
+
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP end=============");
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_LOOKUP_AND_REMOVE_300W)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_AND_REMOVE_300W start=============");
+    std::string tableName = "test_table";
+    std::vector<uint64_t> lookupKeys;
+    float *newEmb;
+
+    // 300w个key
+    uint32_t hostVocabSize = 3000000;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 100000;
+    embCache = ConstZeroCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    lookupKeys = GenUniqueKeys(hostVocabSize);
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        for (uint32_t j = 0; j < extEmbeddingSize; j++) {
+            newEmb[i * extEmbeddingSize + j] = i + 0.01f * j; // 生成特殊数据
+        }
+    }
+    CTRLog(CTRLogLevel::INFO, "gen done");
+    // 把特殊数据放到表中
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+    CTRLog(CTRLogLevel::INFO, "EmbeddingUpdate done");
+
+    float *addr;
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    // 查询特殊数据
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    CTRLog(CTRLogLevel::INFO, "EmbeddingLookup done");
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        for (uint32_t j = 0; j < extEmbeddingSize; j++) {
+            // 验证表中数据正确性
+            ASSERT_LE(std::abs(addr[i * extEmbeddingSize + j] - (i + 0.01f * j)), 1e-6f);
+        }
+    }
+    free(addr);
+    addr = nullptr;
+
+    // Remove之后再Lookup，观察这些embedding是不是被正确remove
+    // 首先确认EmbeddingLookupAndRemove不会报错
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookupAndRemove(tableName, lookupKeys, addr, 4), H_OK);
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        for (uint32_t j = 0; j < extEmbeddingSize; j++) {
+            // 验证表中数据正确性
+            ASSERT_LE(std::abs(addr[i * extEmbeddingSize + j] - (i + 0.01f * j)), 1e-6f);
+        }
+    }
+    free(addr);
+    addr = nullptr;
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    // 然后再lookup，并确保lookup不会报错
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    // 因为用const zero初始化， EmbeddingLookupAndRemove之后再lookup，结果应该全是0
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        for (uint32_t j = 0; j < extEmbeddingSize; j++) {
+            // 验证表中数据正确性
+            ASSERT_LE(std::abs(addr[i * extEmbeddingSize + j] - 0), 1e-6f);
+        }
+    }
+    free(addr);
+
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_LOOKUP_AND_REMOVE_300W end=============");
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_UPDATE_300W)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_UPDATE_300W start=============");
+    std::string tableName = "test_table";
+    std::vector<uint64_t> lookupKeys;
+    float *newEmb;
+
+    // 300w个key
+    uint32_t hostVocabSize = 3000000;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 100000;
+    embCache = ConstZeroCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize, 50000, 6);
+    lookupKeys = GenKeys(hostVocabSize, 123321);
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        for (uint32_t j = 0; j < extEmbeddingSize; j++) {
+            newEmb[i * extEmbeddingSize + j] = i + 0.01f * j; // 生成特殊数据
+        }
+    }
+    CTRLog(CTRLogLevel::INFO, "gen done");
+    // 把特殊数据放到表中
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+    CTRLog(CTRLogLevel::INFO, "EmbeddingUpdate done");
+
+    float *addr;
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    // 查询特殊数据
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    CTRLog(CTRLogLevel::INFO, "EmbeddingLookup done");
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        for (uint32_t j = 0; j < extEmbeddingSize; j++) {
+            // 验证表中数据正确性
+            ASSERT_LE(std::abs(addr[i * extEmbeddingSize + j] - (i + 0.01f * j)), 1e-6f);
+        }
+    }
+    // Remove之后再Lookup，观察这些embedding是不是被正确remove
+    // 首先确认remove不会报错
+    ASSERT_EQ(embCache->RemoveEmbsByKeys(tableName, lookupKeys), H_OK);
+    // 然后再lookup，并确保lookup不会报错
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    // 因为用const zero初始化， 删除之后再lookup，结果应该全是0
+    for (uint32_t i = 0; i < lookupKeys.size(); i++) {
+        for (uint32_t j = 0; j < extEmbeddingSize; j++) {
+            // 验证表中数据正确性
+            ASSERT_LE(std::abs(addr[i * extEmbeddingSize + j] - 0), 1e-6f);
+        }
+    }
+    free(addr);
+
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_UPDATE_300W end=============");
+}
+
+TEST_F(EmbCacheTest, EMBEDDING_UPDATE)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_UPDATE start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> lookupKeys;
+    float *newEmb;
+
+    lookupKeys = { 0, 1, 2, 3, 4 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+
+    // 更新存在的table，应当正常更新
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+
+    lookupKeys = { 0 };
+    newEmb = nullptr;
+    // 更新不存在的table
+    ASSERT_EQ(embCache->EmbeddingUpdate("not_a_table", lookupKeys, newEmb), H_TABLE_NOT_EXIST);
+
+    // 表名超过上限
+    ASSERT_EQ(embCache->EmbeddingUpdate(tooLongTableName, lookupKeys, newEmb), H_TABLE_NAME_TOO_LONG);
+
+    lookupKeys = { 5 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+
+    // 当前embLocalTable中存储的key已达到hostVocabSize上限，并继续添加新key
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_HOST_VOCAB_SIZE_TOO_SMALL);
+    free(newEmb);
+
+    lookupKeys = { 0 };
+    newEmb = nullptr;
+    // 传入embAddr为空指针
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_ADDRESS_NULL);
+
+    // 更新存在于table的keys, 传入embAddr不为空指针
+    lookupKeys = { 0, 1, 4 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+
+    // 线程数未超过核数
+    lookupKeys = { 0, 1, 4 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb, 4), H_OK);
+    free(newEmb);
+
+    // 线程数等于核数
+    uint32_t processCoreNum = std::thread::hardware_concurrency();
+    lookupKeys = { 0, 1, 4 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb, processCoreNum), H_OK);
+    free(newEmb);
+
+    // 线程数大于核数
+    processCoreNum = std::thread::hardware_concurrency();
+    lookupKeys = { 0, 1, 4 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb, processCoreNum + 1), H_THREAD_NUM_ERROR);
+    free(newEmb);
+
+    // 线程数为0
+    processCoreNum = std::thread::hardware_concurrency();
+    lookupKeys = { 0, 1, 4 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb, 0), H_THREAD_NUM_ERROR);
+    free(newEmb);
+
+    // 线程数为1
+    lookupKeys = { 0, 1, 4 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb, 1), H_OK);
+    free(newEmb);
+
+    // lookupkeys为空
+    lookupKeys = {};
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb, 1), H_OK);
+    free(newEmb);
+
+    TearDown();
+
+    // 更新不存在于table的key，且当前embLocalTable中存储的key未达到hostVocabSize上限，继续添加新key
+    tableName = "test_table_one";
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    lookupKeys = { 0, 1 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb);
+    free(newEmb);
+    lookupKeys = { 2, 3 };
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+
+
+    CTRLog(CTRLogLevel::INFO, "===========EMBEDDING_UPDATE end=============");
+}
+
+TEST_F(EmbCacheTest, GetSwapPairsAndKey2Offset)
+{
+    CTRLog(CTRLogLevel::INFO, "===========GetSwapPairsAndKey2Offset start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 100;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 10;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> insertKeys;
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair, swapOutKoPair;
+
+    // 使用不存在的table
+    insertKeys = { 0, 1, 2, 3, 4 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset("not_a_table", insertKeys, swapInKoPair, swapOutKoPair),
+        H_TABLE_NOT_EXIST);
+
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tooLongTableName, insertKeys, swapInKoPair, swapOutKoPair),
+        H_TABLE_NAME_TOO_LONG);
+
+    // 正常查找不存在的keys
+    insertKeys = { 0, 1, 2, 3, 4 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair, swapOutKoPair), H_OK);
+    bool ret1 = true;
+    for (uint64_t i = 0; i < swapInKoPair.first.size(); i++) {
+        if (swapInKoPair.first[i] != i) {
+            string msg = "the " + std::to_string(i) + "th has key " + std::to_string(swapInKoPair.first[i]) +
+                         ", but expect " + std::to_string(i);
+            CTRLog(CTRLogLevel::INFO, msg.c_str());
+            ret1 = false;
+        }
+    }
+    ASSERT_EQ(ret1, true);
+
+    // 正常查找存在的keys
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair2, swapOutKoPair2;
+    insertKeys = { 1, 2, 3 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair2, swapOutKoPair2), H_OK);
+    uint64_t uint_zero = 0;
+    ASSERT_EQ(swapInKoPair2.first.size(), uint_zero);
+
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair3, swapOutKoPair3;
+    insertKeys = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+    // 使用非空的koPair
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair, swapOutKoPair3),
+        H_ARG_NOT_EMPTY);
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair3, swapInKoPair), H_ARG_NOT_EMPTY);
+    // 存入keys正好达到maxCacheSize上限值
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair3, swapOutKoPair3), H_OK);
+
+    // 存入keys正好越过到maxCacheSize上限值
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair4, swapOutKoPair4;
+    insertKeys = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair4, swapOutKoPair4),
+        H_MAX_CACHESIZE_TOO_SMALL);
+
+    embCache->Destroy();
+    // 单次存入keys超过maxCacheSize上限值
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair5, swapOutKoPair5;
+    insertKeys = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair5, swapOutKoPair5),
+        H_MAX_CACHESIZE_TOO_SMALL);
+
+    embCache->Destroy();
+    // 单次存入keys正好达到上限值后，再次查找已存在的keys
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair6, swapOutKoPair6;
+    insertKeys = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair6, swapOutKoPair6), H_OK);
+
+    embCache->Destroy();
+    // 连续两次存入的keys未超过上限，第三次传入keys达到上限
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair7, swapOutKoPair7;
+    insertKeys = { 0, 1, 2, 3, 4 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair7, swapOutKoPair7), H_OK);
+
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair8, swapOutKoPair8;
+    insertKeys = { 5, 6, 7, 8, 9 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair8, swapOutKoPair8), H_OK);
+
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair9, swapOutKoPair9;
+    insertKeys = { 10, 11, 12, 13, 14 };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair9, swapOutKoPair9), H_OK);
+
+    embCache->Destroy();
+    // 查询INVALID_KEY
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair10, swapOutKoPair10;
+    uint64_t neg_one = -1;
+    insertKeys = { neg_one, neg_one, neg_one, neg_one, neg_one };
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair10, swapOutKoPair10), H_OK);
+    ASSERT_EQ(swapInKoPair10.first.empty(), true);
+    ASSERT_EQ(swapInKoPair10.second.empty(), true);
+    ASSERT_EQ(swapOutKoPair10.first.empty(), true);
+    ASSERT_EQ(swapOutKoPair10.second.empty(), true);
+
+    // 查找空keys
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair11, swapOutKoPair11;
+    insertKeys = {};
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, insertKeys, swapInKoPair11, swapOutKoPair11), H_OK);
+    ASSERT_EQ(swapInKoPair11.first.empty(), true);
+    ASSERT_EQ(swapInKoPair11.second.empty(), true);
+    ASSERT_EQ(swapOutKoPair11.first.empty(), true);
+    ASSERT_EQ(swapOutKoPair11.second.empty(), true);
+    CTRLog(CTRLogLevel::INFO, "===========GetSwapPairsAndKey2Offset end=============");
+}
+
+
+bool checkKeys(std::set<uint64_t> &keySet, std::vector<std::set<uint64_t>> &historyKeyVec,
+    const std::vector<uint64_t> &keys, const std::vector<uint64_t> &swapInKeys,
+    const std::vector<uint64_t> &swapOutKeys, uint32_t maxCacheSize)
+{
+    std::set<uint64_t> newKeys;
+    for (auto key : keys) {
+        if (keySet.find(key) == keySet.end()) {
+            newKeys.insert(key);
+        }
+        keySet.insert(key);
+    }
+    for (auto key : swapInKeys) {
+        if (newKeys.find(key) == newKeys.end()) {
+            CTRLog(CTRLogLevel::ERROR, "swapIn key error1");
+            return false;
+        }
+    }
+    if (swapInKeys.size() != newKeys.size()) {
+        CTRLog(CTRLogLevel::ERROR, "swapIn key error2");
+        return false;
+    }
+    historyKeyVec.insert(historyKeyVec.begin(), { keys.begin(), keys.end() });
+    if (historyKeyVec.size() > 2) {
+        historyKeyVec.pop_back();
+    }
+    for (auto key : swapOutKeys) {
+        if (historyKeyVec[0].find(key) != historyKeyVec[0].end() ||
+            historyKeyVec[1].find(key) != historyKeyVec[1].end()) {
+            CTRLog(CTRLogLevel::ERROR, "swapOut key error1");
+            return false;
+        }
+    }
+    for (auto key : swapOutKeys) {
+        if (keySet.find(key) == keySet.end()) {
+            CTRLog(CTRLogLevel::ERROR, "swapOut key error2");
+            return false;
+        }
+    }
+    for (auto key : swapOutKeys) {
+        keySet.erase(key);
+    }
+    if (keySet.size() > maxCacheSize) {
+        CTRLog(CTRLogLevel::ERROR, "total key size error");
+        return false;
+    }
+    return true;
+}
+
+bool checkOffsets(std::set<uint64_t> &offsetSet, const std::vector<uint64_t> &swapInOffsets,
+    const std::vector<uint64_t> &swapOutOffset)
+{
+    for (auto offset : swapOutOffset) {
+        if (offsetSet.find(offset) == offsetSet.end()) {
+            CTRLog(CTRLogLevel::ERROR, "swapOut offset error1");
+            return false;
+        }
+    }
+
+    for (auto offset : swapOutOffset) {
+        offsetSet.erase(offset);
+    }
+
+    for (auto offset : swapInOffsets) {
+        if (offsetSet.find(offset) != offsetSet.end()) {
+            CTRLog(CTRLogLevel::ERROR, "swapIn offset error");
+            return false;
+        }
+        offsetSet.insert(offset);
+    }
+
+    return true;
+}
+
+
+TEST_F(EmbCacheTest, DEVICE_COMBINE_TEST)
+{
+    CTRLog(CTRLogLevel::INFO, "===========DEVICE_COMBINE_TEST start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 4000000;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 30000;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::set<uint64_t> keySet;
+    std::set<uint64_t> offsetSet;
+    std::vector<std::set<uint64_t>> historyKeyVec;
+    std::vector<std::vector<uint64_t>> historyOffsetVec;
+    std::vector<uint64_t> lookupKeys;
+    std::vector<uint64_t> check_keys;
+    for (uint32_t i = 0; i < 50; i++) {
+        lookupKeys = GenKeys(10000, 123 + i, 0, 100000);
+        check_keys = lookupKeys;
+        std::pair<std::vector<uint64_t>, std::vector<uint64_t>> koPair1;
+        std::pair<std::vector<uint64_t>, std::vector<uint64_t>> koPair2;
+        ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, lookupKeys, koPair1, koPair2), H_OK);
+        bool retKey1 = checkKeys(keySet, historyKeyVec, check_keys, koPair1.first, koPair2.first, devVocabSize);
+        bool retOffset1 = checkOffsets(offsetSet, koPair1.second, koPair2.second);
+        ASSERT_EQ(retKey1, true);
+        ASSERT_EQ(retOffset1, true);
+    }
+
+    CTRLog(CTRLogLevel::INFO, "===========DEVICE_COMBINE_TEST end=============");
+}
+
+TEST_F(EmbCacheTest, REMOVE_KEYS)
+{
+    CTRLog(CTRLogLevel::INFO, "===========REMOVE_KEYS start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 100;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 10;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> lookupKeys;
+    std::vector<uint64_t> removeKeys;
+    float *addr;
+    float *newEmb;
+
+    for (uint32_t i = 0; i < hostVocabSize - 1; i++) {
+        lookupKeys.emplace_back(i);
+        for (uint32_t j = 0; j < hostVocabSize - 1; j++) {
+            removeKeys.emplace_back(i + j);
+        }
+    }
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+
+    // 表存在
+    ASSERT_EQ(embCache->RemoveEmbsByKeys(tableName, lookupKeys), H_OK);
+
+    // 表不存在
+    ASSERT_EQ(embCache->RemoveEmbsByKeys("not_a_table", lookupKeys), H_TABLE_NOT_EXIST);
+
+    // 表名超过上限
+    ASSERT_EQ(embCache->RemoveEmbsByKeys(tooLongTableName, lookupKeys), H_TABLE_NAME_TOO_LONG);
+
+    // remove INVALID_KEY
+    uint64_t neg_one = -1;
+    lookupKeys = { neg_one, neg_one, neg_one, neg_one, neg_one };
+    ASSERT_EQ(embCache->RemoveEmbsByKeys(tableName, lookupKeys), H_OK);
+
+    // 判断embLocalTable是否remove掉记录信息
+    lookupKeys = { 0, 1, 4 };
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 999.99f;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    bool ret1 = true;
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        if (fabs(addr[i] - 999.99f) > 0.0000001) {
+            ret1 = false;
+        }
+    }
+    free(addr);
+    ASSERT_EQ(ret1, true);
+
+    ASSERT_EQ(embCache->RemoveEmbsByKeys(tableName, lookupKeys), H_OK);
+
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    bool ret2 = true;
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        if (fabs(addr[i] - 999.99f) <= 0.0000001) {
+            ret2 = false;
+        }
+    }
+    free(addr);
+    ASSERT_EQ(ret2, true);
+
+    // 判断offsetMapper是否remove掉记录信息
+    lookupKeys = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair, swapOutKoPair;
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, lookupKeys, swapInKoPair, swapOutKoPair), H_OK);
+    removeKeys = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+    ASSERT_EQ(embCache->RemoveEmbsByKeys(tableName, removeKeys), H_OK);
+    std::vector<std::pair<uint64_t, uint64_t>> koVec;
+    ASSERT_EQ(embCache->ExportDeviceKeyOffsetPairs(tableName, koVec), H_OK);
+    bool ret3 = true;
+    for (uint32_t i = 0; i < koVec.size(); i++) {
+        if (std::find(removeKeys.begin(), removeKeys.end(), koVec[i].first) != removeKeys.end()) {
+            ret3 = false;
+        }
+    }
+    ASSERT_EQ(ret3, true);
+    // 判断删除后，还能再添加
+    lookupKeys = { 9, 10, 11, 12, 13 };
+    std::vector<uint64_t> oldKeys = lookupKeys;
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair2, swapOutKoPair2;
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, lookupKeys, swapInKoPair2, swapOutKoPair2), H_OK);
+    bool ret4 = true;
+    for (uint32_t i = 0; i < 5; i++) {
+        if (oldKeys[i] != swapInKoPair2.first[i]) {
+            ret4 = false;
+        }
+    }
+    bool ret5 = true;
+    for (uint32_t i = 0; i < 5; i++) {
+        if (lookupKeys[i] != swapInKoPair2.second[i]) {
+            ret5 = false;
+        }
+    }
+    ASSERT_EQ(ret4, true);
+    ASSERT_EQ(ret5, true);
+    ASSERT_EQ(swapInKoPair2.first.size(), 5ull);
+    ASSERT_EQ(swapInKoPair2.second.size(), 5ull);
+    ASSERT_EQ(swapOutKoPair2.first.empty(), true);
+    ASSERT_EQ(swapOutKoPair2.second.empty(), true);
+
+    removeKeys = { 9, 10, 11, 3 };
+    ASSERT_EQ(embCache->RemoveEmbsByKeys(tableName, removeKeys), H_OK);
+    std::vector<std::pair<uint64_t, uint64_t>> koVec2;
+    ASSERT_EQ(embCache->ExportDeviceKeyOffsetPairs(tableName, koVec2), H_OK);
+    bool ret6 = true;
+    for (uint32_t i = 0; i < koVec2.size(); i++) {
+        if (std::find(removeKeys.begin(), removeKeys.end(), koVec2[i].first) != removeKeys.end()) {
+            ret6 = false;
+        }
+    }
+    ASSERT_EQ(ret6, true);
+
+    // 判断删除后，还能再添加
+    lookupKeys = { 0, 1, 2, 3, 4, 5, 6, 7 };
+    std::vector<uint64_t> oldKeys2 = lookupKeys;
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair3, swapOutKoPair3;
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, lookupKeys, swapInKoPair3, swapOutKoPair3), H_OK);
+    bool ret7 = true;
+    for (uint32_t i = 0; i < 8; i++) {
+        if (oldKeys2[i] != swapInKoPair3.first[i]) {
+            ret7 = false;
+        }
+    }
+    bool ret8 = true;
+    for (uint32_t i = 0; i < 8; i++) {
+        if (lookupKeys[i] != swapInKoPair3.second[i]) {
+            ret8 = false;
+        }
+    }
+    ASSERT_EQ(ret7, true);
+    ASSERT_EQ(ret8, true);
+    ASSERT_EQ(swapInKoPair3.first.size(), 8ull);
+    ASSERT_EQ(swapInKoPair3.second.size(), 8ull);
+    ASSERT_EQ(swapOutKoPair3.first.empty(), true);
+    ASSERT_EQ(swapOutKoPair3.second.empty(), true);
+
+    lookupKeys = { 15 };
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair4, swapOutKoPair4;
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, lookupKeys, swapInKoPair4, swapOutKoPair4),
+              H_OK);
+
+    CTRLog(CTRLogLevel::INFO, "===========REMOVE_KEYS end=============");
+}
+
+TEST_F(EmbCacheTest, ExportDeviceKeyOffsetPairs)
+{
+    CTRLog(CTRLogLevel::INFO, "===========ExportDeviceKeyOffsetPairs start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 10;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 8;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    // 使用不存在的table名字
+    std::vector<std::pair<uint64_t, uint64_t>> koVec;
+    ASSERT_EQ(embCache->ExportDeviceKeyOffsetPairs("not_a_table", koVec), H_TABLE_NOT_EXIST);
+
+    ASSERT_EQ(embCache->ExportDeviceKeyOffsetPairs(tooLongTableName, koVec), H_TABLE_NAME_TOO_LONG);
+
+    // 正常export出koPair
+    std::vector<uint64_t> lookupKeys;
+    std::vector<uint64_t> checkKeys;
+    lookupKeys = { 6, 0, 8, 1, 3, 4 };
+    checkKeys = lookupKeys;
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair, swapOutKoPair;
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, lookupKeys, swapInKoPair, swapOutKoPair), H_OK);
+    std::vector<std::pair<uint64_t, uint64_t>> koVec2;
+    ASSERT_EQ(embCache->ExportDeviceKeyOffsetPairs(tableName, koVec2), H_OK);
+    ASSERT_EQ(koVec2.size(), lookupKeys.size());
+    bool ret1 = true;
+    for (uint32_t i = 0; i < koVec2.size(); i++) {
+        if (koVec2[i].first != checkKeys[i] || koVec2[i].second != lookupKeys[i]) {
+            ret1 = false;
+        }
+    }
+    ASSERT_EQ(ret1, true);
+
+    CTRLog(CTRLogLevel::INFO, "===========ExportDeviceKeyOffsetPairs end=============");
+}
+
+TEST_F(EmbCacheTest, GetEmbTableNames)
+{
+    CTRLog(CTRLogLevel::INFO, "===========GetEmbTableNames start=============");
+    factory->CreateEmbCacheManager(embCache);
+    uint32_t hostVocabSize = 10;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 8;
+    std::vector<std::string> tableNameVec;
+    tableNameVec.emplace_back("table1");
+    tableNameVec.emplace_back("table2");
+    tableNameVec.emplace_back("table3");
+    for (const std::string tableName : tableNameVec) {
+        EmbCache::EmbCacheInfo embCacheInfo(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+        EmbCache::NormalInitializerInfo normalInitializerInfo(0, 0.5, 0, 1.0);
+        std::string normalInitializeName = "random_normal_initializer";
+        EmbCache::InitializerInfo normalInitializeInfo(normalInitializeName, 0, embeddingSize, normalInitializerInfo);
+
+        EmbCache::ConstantInitializerInfo constantInitializerInfo(0.233, 1.0);
+        std::string constantInitializeName = "constant_initializer";
+        EmbCache::InitializerInfo constantInitializeInfo(constantInitializeName, embeddingSize, embeddingSize,
+            constantInitializerInfo);
+
+        std::vector<EmbCache::InitializerInfo> initializeInfos(extEmbeddingSize / embeddingSize);
+        initializeInfos[0] = normalInitializeInfo;
+        for (uint64_t i = 1; i < initializeInfos.size(); i++) {
+            initializeInfos[i] = constantInitializeInfo;
+        }
+        embCache->CreateCacheForTable(embCacheInfo, initializeInfos, -1, hostVocabSize);
+    }
+    std::vector<std::string> allTableNames;
+    std::vector<std::string> notEmptyVector = { "123" };
+    ASSERT_EQ(embCache->GetEmbTableNames(notEmptyVector), H_ARG_NOT_EMPTY);
+
+    ASSERT_EQ(embCache->GetEmbTableNames(allTableNames), H_OK);
+    bool ret1 = true;
+    for (auto tableName : allTableNames) {
+        if (std::find(tableNameVec.begin(), tableNameVec.end(), tableName) == tableNameVec.end()) {
+            ret1 = false;
+        }
+    }
+    for (auto tableName : tableNameVec) {
+        if (std::find(allTableNames.begin(), allTableNames.end(), tableName) == allTableNames.end()) {
+            ret1 = false;
+        }
+    }
+    ASSERT_EQ(ret1, true);
+
+    CTRLog(CTRLogLevel::INFO, "===========GetEmbTableNames end=============");
+}
+
+TEST_F(EmbCacheTest, SERIALIZE)
+{
+    CTRLog(CTRLogLevel::INFO, "===========SERIALIZE start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    std::vector<uint64_t> lookupKeys;
+
+    lookupKeys = { 0 };
+    std::vector<char> buffer;
+    ASSERT_EQ(embCache->Serialize("not_a_table", buffer), H_TABLE_NOT_EXIST);
+    // 表名超过上限
+    ASSERT_EQ(embCache->Serialize(tooLongTableName, buffer), H_TABLE_NAME_TOO_LONG);
+    CTRLog(CTRLogLevel::INFO, "===========SERIALIZE end=============");
+}
+
+TEST_F(EmbCacheTest, DESERIALIZE)
+{
+    CTRLog(CTRLogLevel::INFO, "===========DESERIALIZE start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    std::vector<uint64_t> lookupKeys;
+
+    lookupKeys = { 0 };
+    std::vector<char> buffer = { 'A', 'B', '1', '2' };
+    ASSERT_EQ(embCache->Deserialize("not_a_table", buffer), H_TABLE_NOT_EXIST);
+
+    ASSERT_EQ(embCache->Deserialize(tooLongTableName, buffer), H_TABLE_NAME_TOO_LONG);
+
+    ASSERT_EQ(embCache->Deserialize(tableName, buffer), H_LOAD_ERROR);
+
+    lookupKeys = { 0, 1, 2, 3, 4 };
+    float *newEmb;
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+    std::vector<char> buffer1;
+    ASSERT_EQ(embCache->Serialize(tableName, buffer1), H_OK);
+    buffer1.erase(buffer1.begin() + buffer1.size() / 2, buffer1.end());
+    ASSERT_EQ(embCache->Deserialize(tableName, buffer1), H_LOAD_ERROR);
+
+    CTRLog(CTRLogLevel::INFO, "===========DESERIALIZE end=============");
+}
+
+TEST_F(EmbCacheTest, SERIALIZE_DESERIALIZE)
+{
+    CTRLog(CTRLogLevel::INFO, "===========SERIALIZE_DESERIALIZE start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    std::vector<uint64_t> lookupKeys;
+    lookupKeys = { 0, 1, 2, 3, 4 };
+    float *newEmb;
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+
+    std::vector<char> buffer1;
+    std::vector<char> buffer2;
+
+    ASSERT_EQ(embCache->Serialize(tableName, buffer1), H_OK);
+    ASSERT_EQ(embCache->Deserialize(tableName, buffer1), H_OK);
+    ASSERT_EQ(embCache->Serialize(tableName, buffer2), H_OK);
+    ASSERT_EQ(buffer1.size(), buffer2.size());
+    for (uint64_t i = 0; i < buffer1.size(); i++) {
+        ASSERT_EQ(buffer1[i], buffer2[i]);
+    }
+    ASSERT_EQ(buffer1, buffer2);
+    CTRLog(CTRLogLevel::INFO, "===========SERIALIZE_DESERIALIZE end=============");
+}
+
+TEST_F(EmbCacheTest, ERROR_INITIALIZER)
+{
+    CTRLog(CTRLogLevel::INFO, "===========ERROR_INITIALIZER start=============");
+    uint32_t embeddingSize = 13;
+    /* 对ConstantInitializerInfo的constValue和initK的校验 */
+    std::string constantInitializeName = "constant_initializer";
+    // 日志打印"constant value is less than -1000000000, and will use -1000000000."，并正常初始化InitializerInfo
+    EmbCache::ConstantInitializerInfo constantInitializerInfo1(-1e9 - 1e8, 1.0);
+    EmbCache::InitializerInfo constantInitializeInfo =
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize, embeddingSize + 1, constantInitializerInfo1);
+
+    // 日志打印"constant value is greater than 1000000000, and will use 1000000000."，并正常初始化InitializerInfo
+    EmbCache::ConstantInitializerInfo constantInitializerInfo2(1e9 + 1e8, 1.0);
+    constantInitializeInfo =
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize, embeddingSize + 1, constantInitializerInfo2);
+
+    // 日志打印"constant initK is greater than 10000, and will use 10000."，并正常初始化InitializerInfo
+    EmbCache::ConstantInitializerInfo constantInitializerInfo3(0.233, 10001);
+    constantInitializeInfo =
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize, embeddingSize + 1, constantInitializerInfo3);
+
+    // 日志打印"constant initK is less than -10000, and will use -10000."，并正常初始化InitializerInfo
+    EmbCache::ConstantInitializerInfo constantInitializerInfo4(0.233, -10001);
+    constantInitializeInfo =
+        EmbCache::InitializerInfo(constantInitializeName, embeddingSize, embeddingSize + 1, constantInitializerInfo4);
+
+    /* 对NormalIntializerInfo的mean、stdev和initK的校验 */
+    std::string normalInitializeName = "random_normal_initializer";
+    // 日志打印"random normal mean param is greater than 1000000000, and will use
+    // 1000000000."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo1(1e9 + 1e8, 0.05, 0, 1.0);
+    EmbCache::InitializerInfo normalInitializeInfo =
+        EmbCache::InitializerInfo(normalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo1);
+
+    // 日志打印"random normal mean param is less than -1000000000, and will use
+    // -1000000000."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo2(-1e9 - 1e8, 0.05, 0, 1.0);
+    normalInitializeInfo =
+        EmbCache::InitializerInfo(normalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo2);
+
+    // 日志打印"random normal stddev param is greater than 100, and will use 100."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo3(0, 101, 0, 1.0);
+    normalInitializeInfo =
+        EmbCache::InitializerInfo(normalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo3);
+
+    // 日志打印"random normal stddev param is less than 0, and will use 0."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo4(0, -1, 0, 1.0);
+    normalInitializeInfo =
+        EmbCache::InitializerInfo(normalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo4);
+    // 日志打印"random normal initK is greater than 10000, and will use 10000."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo5(0, 0.05, 0, 10001);
+    normalInitializeInfo =
+        EmbCache::InitializerInfo(normalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo5);
+    // 日志打印"random normal initK is less than -10000, and will use -10000."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo6(0, 0.05, 0, -10001);
+    normalInitializeInfo =
+        EmbCache::InitializerInfo(normalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo6);
+
+    /* 对TruncatedNormalInitializer的mean、stdev以及initK的校验 */
+    std::string truncatedNormalInitializeName = "truncated_normal_initializer";
+    // 日志打印"truncated normal mean param is greater than 1000000000, and will use
+    // 1000000000."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo7(1e9 + 1e8, 0.05, 0, 1.0);
+    EmbCache::InitializerInfo truncatedNormalInitializeInfo =
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo7);
+
+    // 日志打印"truncated normal mean param is less than -1000000000, and will use
+    // -1000000000."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo8(-1e9 - 1e8, 0.05, 0, 1.0);
+    truncatedNormalInitializeInfo =
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo8);
+
+    // 日志打印"truncated normal stddev param is greater than 100, and will use 100."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo9(0, 101, 0, 1.0);
+    truncatedNormalInitializeInfo =
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo9);
+
+    // 日志打印"truncated normal stddev param is less than 0.000000, and will use 0.000000."并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo10(0, -1, 0, 1.0);
+    truncatedNormalInitializeInfo =
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo10);
+    // 日志打印"truncated normal initK is greater than 10000, and will use 10000."，并正常初始化InitializerInfo
+    EmbCache::NormalInitializerInfo normalInitializerInfo11(0, 0.05, 0, 10001);
+    truncatedNormalInitializeInfo =
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo11);
+    // 日志打印"truncated normal initK is less than -10000, and will use -10000."
+    EmbCache::NormalInitializerInfo normalInitializerInfo12(0, 0.05, 0, -10001);
+    truncatedNormalInitializeInfo =
+        EmbCache::InitializerInfo(truncatedNormalInitializeName, embeddingSize, embeddingSize, normalInitializerInfo12);
+    CTRLog(CTRLogLevel::INFO, "===========ERROR_INITIALIZER end=============");
+}
+
+
+TEST_F(EmbCacheTest, EmbeddingRemove)
+{
+    CTRLog(CTRLogLevel::INFO, "===========EmbeddingRemove start=============");
+    std::string tableName = "test_table";
+    uint32_t hostVocabSize = 100;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint32_t devVocabSize = 100;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> lookupKeys;
+    std::vector<uint64_t> removeKeys;
+    float *addr;
+    float *newEmb;
+
+    for (uint32_t i = 0; i < hostVocabSize - 1; i++) {
+        lookupKeys.emplace_back(i);
+        for (uint32_t j = 0; j < hostVocabSize - 1; j++) {
+            removeKeys.emplace_back(i + j);
+        }
+    }
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    // 表存在
+    ASSERT_EQ(embCache->EmbeddingRemove(tableName, lookupKeys), H_OK);
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    // 单线程
+    ASSERT_EQ(embCache->EmbeddingRemove(tableName, lookupKeys, 1), H_OK);
+
+    free(addr);
+    // REMOVE空keys
+    std::vector<uint64_t> emptyRemoveKeys;
+    ASSERT_EQ(embCache->EmbeddingRemove(tableName, emptyRemoveKeys), H_OK);
+
+    // 表不存在
+    ASSERT_EQ(embCache->EmbeddingRemove("not_a_table", lookupKeys), H_TABLE_NOT_EXIST);
+    // 表名超过上限
+    ASSERT_EQ(embCache->EmbeddingRemove(tooLongTableName, lookupKeys), H_TABLE_NAME_TOO_LONG);
+
+    // remove INVALID_KEY
+    uint64_t neg_one = -1;
+    lookupKeys = { neg_one, neg_one, neg_one, neg_one, neg_one };
+    ASSERT_EQ(embCache->EmbeddingRemove(tableName, lookupKeys), H_OK);
+
+    // 判断embLocalTable是否remove掉记录信息
+    lookupKeys = { 0, 1, 4 };
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    free(addr);
+
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 999.99f;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    bool ret1 = true;
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        if (fabs(addr[i] - 999.99f) > 0.0000001) {
+            ret1 = false;
+        }
+    }
+    free(addr);
+    ASSERT_EQ(ret1, true);
+
+    ASSERT_EQ(embCache->EmbeddingRemove(tableName, lookupKeys), H_OK);
+
+    addr = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    ASSERT_EQ(embCache->EmbeddingLookup(tableName, lookupKeys, addr), H_OK);
+    bool ret2 = true;
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        if (fabs(addr[i] - 999.99f) <= 0.0000001) {
+            ret2 = false;
+        }
+    }
+    free(addr);
+    ASSERT_EQ(ret2, true);
+
+    // 判断offsetMapper是否remove掉记录信息
+    lookupKeys = { 6, 0, 8, 1, 3, 4 };
+    std::pair<std::vector<uint64_t>, std::vector<uint64_t>> swapInKoPair, swapOutKoPair;
+    ASSERT_EQ(embCache->GetSwapPairsAndKey2Offset(tableName, lookupKeys, swapInKoPair, swapOutKoPair), H_OK);
+    removeKeys = { 0, 1, 4 };
+    ASSERT_EQ(embCache->EmbeddingRemove(tableName, removeKeys), H_OK);
+
+    CTRLog(CTRLogLevel::INFO, "===========EmbeddingRemove end=============");
+}
+
+TEST_F(EmbCacheTest, GET_EMB_TABLE_INFO)
+{
+    CTRLog(CTRLogLevel::INFO, "===========GET_EMB_TABLE_INFO start=============");
+    std::string tableName = "test_table";
+    uint64_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint64_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    std::vector<uint64_t> lookupKeys;
+    lookupKeys = { 0, 1, 2, 3, 4 };
+    float *newEmb;
+    newEmb = (float *)malloc(lookupKeys.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys.size() * extEmbeddingSize; i++) {
+        newEmb[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys, newEmb), H_OK);
+    free(newEmb);
+
+    std::vector<uint64_t> keys;
+    std::vector<std::vector<float>> embeddings;
+    std::vector<std::vector<float>> optimizerSlots;
+
+    ASSERT_EQ(embCache->GetEmbTableInfos("Invalid_table_name", keys, embeddings, optimizerSlots), H_TABLE_NOT_EXIST);
+    ASSERT_EQ(embCache->GetEmbTableInfos(tooLongTableName, keys, embeddings, optimizerSlots), H_TABLE_NAME_TOO_LONG);
+    ASSERT_EQ(embCache->GetEmbTableInfos(tableName, keys, embeddings, optimizerSlots), H_OK);
+    bool ret = true;
+    if (keys.size() != 5) {
+        ret = false;
+    }
+    uint32_t optimizerSlotSize = extEmbeddingSize - embeddingSize;
+    for (auto key : keys) {
+        auto it = std::find(lookupKeys.begin(), lookupKeys.end(), key);
+        if (it == lookupKeys.end()) {
+            ret = false;
+            break;
+        }
+        uint32_t index = it - lookupKeys.begin();
+        for (uint32_t i = 0; i < embeddingSize; i++) {
+            if (fabs(embeddings[index][i] - 0.01f * (i + index * extEmbeddingSize)) > 0.0000001) {
+                ret = false;
+            }
+        }
+        for (uint32_t i = 0; i < optimizerSlotSize; i++) {
+            if (fabs(optimizerSlots[index][i] - 0.01f * (i + index * extEmbeddingSize + embeddingSize)) > 0.0000001) {
+                ret = false;
+            }
+        }
+    }
+    ASSERT_EQ(ret, true);
+
+    std::vector<uint64_t> keys2 = { 1, 2, 3 };
+    std::vector<std::vector<float>> embeddings2;
+    std::vector<std::vector<float>> optimizerSlots2;
+    ASSERT_EQ(embCache->GetEmbTableInfos(tableName, keys2, embeddings2, optimizerSlots2), H_ARG_NOT_EMPTY);
+
+    std::vector<uint64_t> keys3;
+    std::vector<std::vector<float>> embeddings3;
+    std::vector<std::vector<float>> optimizerSlots3;
+    embeddings3.emplace_back(std::vector<float>({ 0.1f, 0.2f }));
+    ASSERT_EQ(embCache->GetEmbTableInfos(tableName, keys3, embeddings3, optimizerSlots3), H_ARG_NOT_EMPTY);
+
+    std::vector<uint64_t> keys4;
+    std::vector<std::vector<float>> embeddings4;
+    std::vector<std::vector<float>> optimizerSlots4;
+    optimizerSlots4.emplace_back(std::vector<float>({ 0.1f, 0.2f }));
+    ASSERT_EQ(embCache->GetEmbTableInfos(tableName, keys4, embeddings4, optimizerSlots4), H_ARG_NOT_EMPTY);
+    embCache->Destroy();
+
+    hostVocabSize = 5;
+    embeddingSize = 13;
+    extEmbeddingSize = 13;
+    devVocabSize = 2;
+
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+    std::vector<uint64_t> lookupKeys2;
+    lookupKeys2 = { 0, 1, 2, 3, 4 };
+    float *newEmb2;
+    newEmb2 = (float *)malloc(lookupKeys2.size() * extEmbeddingSize * sizeof(float));
+    for (uint32_t i = 0; i < lookupKeys2.size() * extEmbeddingSize; i++) {
+        newEmb2[i] = 0.01f * i;
+    }
+    ASSERT_EQ(embCache->EmbeddingUpdate(tableName, lookupKeys2, newEmb2), H_OK);
+    free(newEmb2);
+
+    std::vector<uint64_t> keys5;
+    std::vector<std::vector<float>> embeddings5;
+    std::vector<std::vector<float>> optimizerSlots5;
+
+    ASSERT_EQ(embCache->GetEmbTableInfos(tableName, keys5, embeddings5, optimizerSlots5), H_OK);
+    bool ret2 = true;
+    if (keys.size() != 5) {
+        ret2 = false;
+    }
+    for (auto key : keys) {
+        auto it = std::find(lookupKeys2.begin(), lookupKeys2.end(), key);
+        if (it == lookupKeys2.end()) {
+            ret2 = false;
+            break;
+        }
+        uint32_t index = it - lookupKeys2.begin();
+        for (uint32_t i = 0; i < embeddingSize; i++) {
+            if (fabs(embeddings5[index][i] - 0.01f * (i + index * extEmbeddingSize)) > 0.0000001) {
+                ret2 = false;
+            }
+        }
+    }
+    if (!optimizerSlots5.empty()) {
+        ret2 = false;
+    }
+    ASSERT_EQ(ret2, true);
+
+    CTRLog(CTRLogLevel::INFO, "===========GET_EMB_TABLE_INFO end=============");
+}
+
+TEST_F(EmbCacheTest, LOAD_EMB_TABLE_INFO)
+{
+    CTRLog(CTRLogLevel::INFO, "===========LOAD_EMB_TABLE_INFO start=============");
+    std::string tableName = "test_table";
+    uint64_t hostVocabSize = 5;
+    uint32_t embeddingSize = 13;
+    uint32_t extEmbeddingSize = 26;
+    uint64_t devVocabSize = 2;
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    std::vector<uint64_t> keys;
+    std::vector<std::vector<float>> embeddings;
+    std::vector<std::vector<float>> optimizerSlots;
+
+    keys = { 0, 1, 2, 3, 4 };
+    for (uint64_t i = 0; i < keys.size(); i++) {
+        std::vector<float> curEmbedding;
+        for (uint64_t j = 0; j < embeddingSize; j++) {
+            curEmbedding.emplace_back(0.01f * (i * extEmbeddingSize + j));
+        }
+        embeddings.emplace_back(curEmbedding);
+    }
+    uint32_t optimizerSlotSize = extEmbeddingSize - embeddingSize;
+    for (uint64_t i = 0; i < keys.size(); i++) {
+        std::vector<float> curOptimizerSlot;
+        for (uint64_t j = 0; j < optimizerSlotSize; j++) {
+            curOptimizerSlot.emplace_back(0.01f * (i * extEmbeddingSize + embeddingSize + j));
+        }
+        optimizerSlots.emplace_back(curOptimizerSlot);
+    }
+    ASSERT_EQ(embCache->LoadEmbTableInfos("Invalid_table_name", keys, embeddings, optimizerSlots), H_TABLE_NOT_EXIST);
+    ASSERT_EQ(embCache->LoadEmbTableInfos(tooLongTableName, keys, embeddings, optimizerSlots), H_TABLE_NAME_TOO_LONG);
+    ASSERT_EQ(embCache->LoadEmbTableInfos(tableName, keys, embeddings, optimizerSlots), H_OK);
+
+    std::vector<uint64_t> keys2;
+    std::vector<std::vector<float>> embeddings2;
+    std::vector<std::vector<float>> optimizerSlots2;
+    ASSERT_EQ(embCache->GetEmbTableInfos(tableName, keys2, embeddings2, optimizerSlots2), H_OK);
+
+    bool ret = true;
+    if (keys2.size() != 5) {
+        ret = false;
+    }
+    for (auto key : keys2) {
+        auto it = std::find(keys.begin(), keys.end(), key);
+        if (it == keys.end()) {
+            ret = false;
+            break;
+        }
+        uint32_t index = it - keys.begin();
+        for (uint32_t i = 0; i < embeddingSize; i++) {
+            if (fabs(embeddings2[index][i] - 0.01f * (i + index * extEmbeddingSize)) > 0.0000001) {
+                ret = false;
+            }
+        }
+        for (uint32_t i = 0; i < optimizerSlotSize; i++) {
+            if (fabs(optimizerSlots2[index][i] - 0.01f * (i + index * extEmbeddingSize + embeddingSize)) > 0.0000001) {
+                ret = false;
+            }
+        }
+    }
+    ASSERT_EQ(ret, true);
+
+    std::vector<uint64_t> keys3;
+    std::vector<std::vector<float>> embeddings3;
+    std::vector<std::vector<float>> optimizerSlots3;
+
+    keys3 = { 0, 1, 2, 3, 4 };
+    for (uint64_t i = 0; i < keys3.size() - 1; i++) {
+        std::vector<float> curEmbedding;
+        for (uint64_t j = 0; j < embeddingSize; j++) {
+            curEmbedding.emplace_back(0.01f * (i * extEmbeddingSize + j));
+        }
+        embeddings3.emplace_back(curEmbedding);
+    }
+    for (uint64_t i = 0; i < keys3.size(); i++) {
+        std::vector<float> curOptimizerSlot;
+        for (uint64_t j = 0; j < optimizerSlotSize; j++) {
+            curOptimizerSlot.emplace_back(0.01f * (i * extEmbeddingSize + embeddingSize + j));
+        }
+        optimizerSlots3.emplace_back(curOptimizerSlot);
+    }
+    // keys num != embeddings num
+    ASSERT_EQ(embCache->LoadEmbTableInfos(tableName, keys3, embeddings3, optimizerSlots3), H_LOAD_ERROR);
+
+    std::vector<uint64_t> keys4;
+    std::vector<std::vector<float>> embeddings4;
+    std::vector<std::vector<float>> optimizerSlots4;
+
+    keys4 = { 0, 1, 2, 3, 4 };
+    for (uint64_t i = 0; i < keys4.size(); i++) {
+        std::vector<float> curEmbedding;
+        for (uint64_t j = 0; j < embeddingSize; j++) {
+            curEmbedding.emplace_back(0.01f * (i * extEmbeddingSize + j));
+        }
+        embeddings4.emplace_back(curEmbedding);
+    }
+    for (uint64_t i = 0; i < keys4.size() - 1; i++) {
+        std::vector<float> curOptimizerSlot;
+        for (uint64_t j = 0; j < optimizerSlotSize; j++) {
+            curOptimizerSlot.emplace_back(0.01f * (i * extEmbeddingSize + embeddingSize + j));
+        }
+        optimizerSlots4.emplace_back(curOptimizerSlot);
+    }
+    // keys num != optimizerSlots num
+    ASSERT_EQ(embCache->LoadEmbTableInfos(tableName, keys4, embeddings4, optimizerSlots4), H_LOAD_ERROR);
+
+    std::vector<uint64_t> keys5;
+    std::vector<std::vector<float>> embeddings5;
+    std::vector<std::vector<float>> optimizerSlots5;
+
+    keys5 = { 0, 1, 2, 3, 4, 5 };
+    for (uint64_t i = 0; i < keys5.size(); i++) {
+        std::vector<float> curEmbedding;
+        for (uint64_t j = 0; j < embeddingSize; j++) {
+            curEmbedding.emplace_back(0.01f * (i * extEmbeddingSize + j));
+        }
+        embeddings5.emplace_back(curEmbedding);
+    }
+    for (uint64_t i = 0; i < keys5.size(); i++) {
+        std::vector<float> curOptimizerSlot;
+        for (uint64_t j = 0; j < optimizerSlotSize; j++) {
+            curOptimizerSlot.emplace_back(0.01f * (i * extEmbeddingSize + embeddingSize + j));
+        }
+        optimizerSlots5.emplace_back(curOptimizerSlot);
+    }
+    // loadKeys num > hostVocabSize
+    ASSERT_EQ(embCache->LoadEmbTableInfos(tableName, keys5, embeddings5, optimizerSlots5), H_LOAD_ERROR);
+
+    std::vector<uint64_t> keys6;
+    std::vector<std::vector<float>> embeddings6;
+    std::vector<std::vector<float>> optimizerSlots6;
+
+    keys6 = { 0, 1, 2, 3, 4 };
+    for (uint64_t i = 0; i < keys6.size(); i++) {
+        std::vector<float> curEmbedding;
+        for (uint64_t j = 0; j < embeddingSize - 1; j++) {
+            curEmbedding.emplace_back(0.01f * (i * extEmbeddingSize + j));
+        }
+        embeddings6.emplace_back(curEmbedding);
+    }
+    for (uint64_t i = 0; i < keys6.size(); i++) {
+        std::vector<float> curOptimizerSlot;
+        for (uint64_t j = 0; j < optimizerSlotSize; j++) {
+            curOptimizerSlot.emplace_back(0.01f * (i * extEmbeddingSize + embeddingSize + j));
+        }
+        optimizerSlots6.emplace_back(curOptimizerSlot);
+    }
+    // entering embeddingSize != table embeddingSize
+    ASSERT_EQ(embCache->LoadEmbTableInfos(tableName, keys6, embeddings6, optimizerSlots6), H_LOAD_ERROR);
+
+    std::vector<uint64_t> keys7;
+    std::vector<std::vector<float>> embeddings7;
+    std::vector<std::vector<float>> optimizerSlots7;
+
+    keys7 = { 0, 1, 2, 3, 4 };
+    for (uint64_t i = 0; i < keys7.size(); i++) {
+        std::vector<float> curEmbedding;
+        for (uint64_t j = 0; j < embeddingSize; j++) {
+            curEmbedding.emplace_back(0.01f * (i * extEmbeddingSize + j));
+        }
+        embeddings7.emplace_back(curEmbedding);
+    }
+    for (uint64_t i = 0; i < keys7.size(); i++) {
+        std::vector<float> curOptimizerSlot;
+        for (uint64_t j = 0; j < optimizerSlotSize - 1; j++) {
+            curOptimizerSlot.emplace_back(0.01f * (i * extEmbeddingSize + embeddingSize + j));
+        }
+        optimizerSlots7.emplace_back(curOptimizerSlot);
+    }
+    // entering optimizerSlotSize != table optimizerSlotSize
+    ASSERT_EQ(embCache->LoadEmbTableInfos(tableName, keys7, embeddings7, optimizerSlots7), H_LOAD_ERROR);
+    embCache->Destroy();
+
+    hostVocabSize = 5;
+    embeddingSize = 13;
+    extEmbeddingSize = 13;
+    devVocabSize = 2;
+
+    embCache = SimpleCreateTable(tableName, hostVocabSize, embeddingSize, extEmbeddingSize, devVocabSize);
+
+    std::vector<uint64_t> keys8;
+    std::vector<std::vector<float>> embeddings8;
+    std::vector<std::vector<float>> optimizerSlots8;
+
+    keys8 = { 0, 1, 2, 3, 4 };
+    for (uint64_t i = 0; i < keys8.size(); i++) {
+        std::vector<float> curEmbedding;
+        for (uint64_t j = 0; j < embeddingSize; j++) {
+            curEmbedding.emplace_back(0.01f * (i * extEmbeddingSize + j));
+        }
+        embeddings8.emplace_back(curEmbedding);
+    }
+
+    ASSERT_EQ(embCache->LoadEmbTableInfos(tableName, keys8, embeddings8, optimizerSlots8), H_OK);
+
+    std::vector<uint64_t> keys9;
+    std::vector<std::vector<float>> embeddings9;
+    std::vector<std::vector<float>> optimizerSlots9;
+    ASSERT_EQ(embCache->GetEmbTableInfos(tableName, keys9, embeddings9, optimizerSlots9), H_OK);
+
+    double eps = 0.0000001;
+    bool ret2 = true;
+    if (keys9.size() != 5) {
+        ret2 = false;
+    }
+    for (auto key : keys9) {
+        auto it = std::find(keys9.begin(), keys9.end(), key);
+        if (it == keys9.end()) {
+            ret2 = false;
+            break;
+        }
+        uint32_t index = it - keys9.begin();
+        for (uint32_t i = 0; i < embeddingSize; i++) {
+            if (fabs(embeddings9[index][i] - 0.01f * (i + index * extEmbeddingSize)) > eps) {
+                ret2 = false;
+            }
+        }
+    }
+    if (!optimizerSlots9.empty()) {
+        ret2 = false;
+    }
+    ASSERT_EQ(ret2, true);
+
+    CTRLog(CTRLogLevel::INFO, "===========LOAD_EMB_TABLE_INFO end=============");
+}
diff --git a/src/AccCTR/tests/ut/src/emb_cache_test.h b/src/AccCTR/tests/ut/src/emb_cache_test.h
new file mode 100644
index 00000000..5c87237b
--- /dev/null
+++ b/src/AccCTR/tests/ut/src/emb_cache_test.h
@@ -0,0 +1,62 @@
+/* Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ ==============================================================================*/
+
+#ifndef CTR_EMB_CACHE_TEST_H
+#define CTR_EMB_CACHE_TEST_H
+
+#include <vector>
+#include <unordered_set>
+#include <map>
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+#include "factory.h"
+#include "embedding_cache.h"
+
+
+class EmbCacheTest : public testing::Test {
+protected:
+    EmbCacheTest(){};
+    ~EmbCacheTest(){};
+    static void SetUpTestCase();
+    static void TearDownTestCase();
+
+
+    void SetUp() override;
+
+    void TearDown() override;
+
+    static ock::ctr::EmbCacheManagerPtr SimpleCreateTable(std::string tableName, uint32_t hostVocabSize, uint32_t embeddingSize,
+        uint32_t extEmbeddingSize, uint32_t devVocabSize, std::pair<float, float> normalPara = { 0, 0.05 },
+        float constPara = 0.233);
+
+    static ock::ctr::EmbCacheManagerPtr ConstZeroCreateTable(std::string tableName, uint32_t hostVocabSize,
+        uint32_t embeddingSize, uint32_t extEmbeddingSize, uint32_t devVocabSize, uint64_t prefillBufferSize = 50000,
+        uint8_t prefillThreadNum = 1);
+
+    std::string tooLongTableName =
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100000000010000000001000000000100000000010000000001000000000100000000010000000001"
+        "00000000010000000001000000000100012";
+};
+
+#endif // CTR_EMB_CACHE_TEST_H
diff --git a/src/AccCTR/tests/ut/src/unique_test.cpp b/src/AccCTR/tests/ut/src/unique_test.cpp
index f971bb91..a94ebaf7 100644
--- a/src/AccCTR/tests/ut/src/unique_test.cpp
+++ b/src/AccCTR/tests/ut/src/unique_test.cpp
@@ -15,8 +15,7 @@ limitations under the License.
 #include <sstream>
 #include <fstream>
 #include "unique_test.h"
-
-FactoryPtr factory;
+#include "common.h"
 
 void UniqueTest::SetUpTestCase()
 {
@@ -144,7 +143,10 @@ TEST_F(UniqueTest, DoUniqueNormal)
     std::string input_path(path);
     std::cout << "input_path:" + input_path + "/data30.txt" << std::endl;
     std::ifstream input(input_path + "/data30.txt");
-
+    if(!input.good()) {
+        std::cout << "Failed to open file:" + input_path + "/data30.txt" << std::endl;
+        return;
+    }
     std::vector<int64_t> numbers;
     std::string line;
     while (std::getline(input, line, ',')) {
@@ -156,6 +158,8 @@ TEST_F(UniqueTest, DoUniqueNormal)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.trace = true;
     conf.desiredSize = numbers.size();
@@ -213,6 +217,8 @@ TEST_F(UniqueTest, UseErrOutputTypeEnhanced)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -253,6 +259,8 @@ TEST_F(UniqueTest, UseErrOutputTypeNormal)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -292,6 +300,8 @@ TEST_F(UniqueTest, DoEnhancedUnique)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -340,6 +350,8 @@ TEST_F(UniqueTest, DoEnhancedUniqueErr)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -402,6 +414,8 @@ TEST_F(UniqueTest, DoEnhancedUnique_UniqueIdSize)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -449,6 +463,8 @@ TEST_F(UniqueTest, idCntIsNull)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -488,6 +504,8 @@ TEST_F(UniqueTest, idCntIsNullSharding)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -537,6 +555,8 @@ TEST_F(UniqueTest, DoUniqueShard)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.useSharding = true;
     conf.useIdCount = true;
@@ -612,6 +632,8 @@ TEST_F(UniqueTest, DoUniqueOnlyShard)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.useSharding = true;
     conf.desiredSize = 6;
@@ -675,6 +697,8 @@ TEST_F(UniqueTest, DoUniquePadding)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.usePadding = true;
     conf.useSharding = true;
@@ -755,6 +779,8 @@ TEST_F(UniqueTest, DoUniqueNoThreadPool)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 20; // 配置空间大于实际输入数组长度，验证正常运行
     conf.dataType = DataType::INT64;
@@ -817,6 +843,8 @@ TEST_F(UniqueTest, DoUniqueShardNumberOversize)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.useSharding = true;
     conf.desiredSize = 6;
@@ -895,6 +923,7 @@ TEST_F(UniqueTest, DoUniqueSpecial)
 
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
+    factory->SetExternalLogFuncInner(CTRLog);
 
     int count = 1000000;
     UniqueConf conf;
@@ -963,6 +992,8 @@ TEST_F(UniqueTest, IdLarge)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -999,6 +1030,8 @@ TEST_F(UniqueTest, DoUniqueNormalInt32)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.useSharding = true;
     conf.desiredSize = 6;
@@ -1122,6 +1155,8 @@ TEST_F(UniqueTest, DoUniqueShardMultipleTimes)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.useSharding = true;
     conf.desiredSize = 6;
@@ -1286,6 +1321,8 @@ TEST_F(UniqueTest, IdCntSmall)
     UniquePtr unique;
     ASSERT_EQ(factory->CreateUnique(unique), 0);
 
+    factory->SetExternalLogFuncInner(CTRLog);
+
     UniqueConf conf;
     conf.desiredSize = 6;
     conf.dataType = DataType::INT64;
@@ -1321,7 +1358,10 @@ TEST_F(UniqueTest, DoUniqueLotsDataFunction)
     std::string input_path(path);
     std::cout << "input_path:" + input_path + "/data40.txt" << std::endl;
     std::ifstream input(input_path + "/data40.txt");
-
+    if(!input.good()) {
+        std::cout << "Failed to open file:" + input_path + "/data40.txt" << std::endl;
+        return;
+    }
     std::vector<int64_t> numbers;
     std::string line;
     while (std::getline(input, line, ',')) {
@@ -1423,7 +1463,10 @@ TEST_F(UniqueTest, DoUniqueLotsDataPaddingFunction)
     std::string input_path(path);
     std::cout << "input_path:" + input_path + "/data30.txt" << std::endl;
     std::ifstream input(input_path + "/data30.txt");
-
+    if(!input.good()) {
+        std::cout << "Failed to open file:" + input_path + "/data30.txt" << std::endl;
+        return;
+    }
     std::vector<int64_t> numbers;
     std::string line;
     while (std::getline(input, line, ',')) {
diff --git a/src/AccCTR/tests/ut/src/unique_test.h b/src/AccCTR/tests/ut/src/unique_test.h
index 0243f262..c3bc64f3 100644
--- a/src/AccCTR/tests/ut/src/unique_test.h
+++ b/src/AccCTR/tests/ut/src/unique_test.h
@@ -19,7 +19,6 @@ limitations under the License.
 #include <vector>
 #include <unordered_set>
 #include <map>
-#include "factory.h"
 #include "gtest/gtest.h"
 #include "gmock/gmock.h"
 #include "unique.h"
@@ -28,21 +27,6 @@ using namespace std;
 using namespace ock::ctr;
 
 
-class SimpleThreadPool {
-public:
-    static void SyncRun(const std::vector<std::function<void()>> &tasks)
-    {
-        std::vector<std::future<void>> futs;
-        for (auto &task : tasks) {
-            futs.push_back(std::async(task));
-        }
-        for (auto &fut : futs) {
-            fut.wait();
-        }
-    }
-};
-
-
 class UniqueTest : public testing::Test {
 protected:
     UniqueTest() {};
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index dd1052f2..64a076b9 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -27,6 +27,11 @@ if(NOT SECUREC_PATH)
 endif()
 message("SECUREC_PATH: " ${SECUREC_PATH})
 
+if(NOT ACCCTR_PATH)
+    set(ACCCTR_PATH ${PROJECT_SOURCE_DIR}/AccCTR)
+endif()
+message("ACCCTR_PATH: " ${ACCCTR_PATH})
+
 include_directories(${ABSEIL_PATH}/include)
 link_directories(${ABSEIL_PATH}/lib)
 
@@ -38,7 +43,7 @@ endif()
 
 link_libraries(stdc++fs)
 
-file(GLOB_RECURSE MXREC_SRC ./*.cpp)
+file(GLOB_RECURSE MXREC_SRC ./*.cpp ./*.h)
 add_library(ASC SHARED ${MXREC_SRC})
 
 target_include_directories(ASC
@@ -55,10 +60,11 @@ target_link_directories(ASC
         ${HDF5_PATH}/lib
         ${SECUREC_PATH}/lib
         ${ASCEND_DRIVER_PATH}/lib64/driver
+        ${ACCCTR_PATH}/output/ock_ctr_common/lib
 )
 
 target_link_libraries(ASC PUBLIC ascendcl msprofiler ge_executor gert runtime ge_common register graph ascend_protobuf
-    profapi opt_feature error_manager exe_graph acl_tdt_channel acl_tdt_queue securec drvdsmi_host)
+    profapi opt_feature error_manager exe_graph acl_tdt_channel acl_tdt_queue securec drvdsmi_host _ock_ctr_common)
 target_link_libraries(ASC PUBLIC
         -l:_tf_adapter.so
         OpenMP::OpenMP_CXX ${MPI_CXX_LIBRARIES}
diff --git a/src/core/checkpoint/checkpoint.cpp b/src/core/checkpoint/checkpoint.cpp
index 0fc03feb..8a6750d5 100644
--- a/src/core/checkpoint/checkpoint.cpp
+++ b/src/core/checkpoint/checkpoint.cpp
@@ -196,9 +196,7 @@ void Checkpoint::WriteStream(CkptTransData& transData, const string& dataDir, si
     }
 
     ssize_t writeBytesNum;
-    if (floatTransSet.find(dataType) != floatTransSet.end()) {
-        writeBytesNum = fileSystemPtr->Write(dataDir, transData.floatArr, dataSize);
-    } else if (int32TransSet.find(dataType) != int32TransSet.end()) {
+    if (int32TransSet.find(dataType) != int32TransSet.end()) {
         writeBytesNum = fileSystemPtr->Write(dataDir,
                                              reinterpret_cast<const char*>(transData.int32Arr.data()), dataSize);
     } else if (int64TransSet.find(dataType) != int64TransSet.end()) {
@@ -207,6 +205,8 @@ void Checkpoint::WriteStream(CkptTransData& transData, const string& dataDir, si
     } else if (dataType == CkptDataType::ATTRIBUTE) {
         writeBytesNum = fileSystemPtr->Write(dataDir,
                                              reinterpret_cast<const char*>(transData.attribute.data()), dataSize);
+    } else {
+        throw runtime_error("unknown CkptDataType");
     }
 
     if (writeBytesNum == -1) {
@@ -276,7 +276,6 @@ void Checkpoint::LoadDataset(const vector<string>& embNames,
             auto attributeDir { datasetPath + dirSeparator + "slice" + attribFileType };
 
             CkptTransData transData;
-
             LOG_DEBUG("====Start reading data from: {}", attributeDir);
             auto dataElmtBytes { dataHandler->GetDataElmtBytes(CkptDataType::ATTRIBUTE) };
             ReadStream(transData, attributeDir, CkptDataType::ATTRIBUTE, dataElmtBytes);
@@ -328,10 +327,10 @@ void Checkpoint::ReadStream(CkptTransData& transData,
         readBytesNum = fileSystemPtr->Read(dataDir, reinterpret_cast<char*>(transData.int32Arr.data()), datasetSize);
     } else if (int64TransSet.find(dataType) != int64TransSet.end()) {
         readBytesNum = fileSystemPtr->Read(dataDir, reinterpret_cast<char*>(transData.int64Arr.data()), datasetSize);
-    } else if (floatTransSet.find(dataType) != floatTransSet.end()) {
-        readBytesNum = fileSystemPtr->Read(dataDir, reinterpret_cast<char*>(transData.floatArr.data()), datasetSize);
     } else if (dataType == CkptDataType::ATTRIBUTE) {
         readBytesNum = fileSystemPtr->Read(dataDir, reinterpret_cast<char *>(transData.attribute.data()), datasetSize);
+    } else {
+        throw runtime_error("unknown CkptDataType");
     }
 
     if (readBytesNum == -1) {
@@ -383,9 +382,9 @@ void Checkpoint::SetTransDataSize(CkptTransData& transData, size_t datasetSize,
         transData.int32Arr.resize(datasetSize);
     } else if (int64TransSet.find(dataType) != int64TransSet.end()) {
         transData.int64Arr.resize(datasetSize);
-    } else if (floatTransSet.find(dataType) != floatTransSet.end()) {
-        transData.floatArr.resize(datasetSize);
     } else if (dataType == CkptDataType::ATTRIBUTE) {
         transData.attribute.resize(datasetSize);
+    } else {
+        throw runtime_error("unknown CkptDataType");
     }
 }
diff --git a/src/core/checkpoint/checkpoint.h b/src/core/checkpoint/checkpoint.h
index 362881b2..625660ff 100644
--- a/src/core/checkpoint/checkpoint.h
+++ b/src/core/checkpoint/checkpoint.h
@@ -63,9 +63,6 @@ namespace MxRec {
             CkptDataType::KEY_COUNT_MAP,
             CkptDataType::EVICT_POS
         };
-        const set<CkptDataType> floatTransSet{
-            CkptDataType::EMB_DATA
-        };
 
         vector<unique_ptr<CkptDataHandler>> dataHandlers;
         string processPath;
diff --git a/src/core/ckpt_data_handler/ckpt_data_handler.cpp b/src/core/ckpt_data_handler/ckpt_data_handler.cpp
index 18f1a090..04feb4b3 100644
--- a/src/core/ckpt_data_handler/ckpt_data_handler.cpp
+++ b/src/core/ckpt_data_handler/ckpt_data_handler.cpp
@@ -33,7 +33,6 @@ void CkptDataHandler::CleanTransfer()
 {
     transferData.int64Arr.clear();
     transferData.int32Arr.clear();
-    transferData.floatArr.clear();
     transferData.attribute.clear();
     transferData.datasetSize = 0;
     transferData.attributeSize = 0;
@@ -42,7 +41,7 @@ void CkptDataHandler::CleanTransfer()
 void CkptDataHandler::SetDatasetForLoadEmb(CkptDataType dataType, string embName, CkptTransData& loadedData,
                                            CkptData& ckptData)
 {
-    LOG_ERROR("Load host emb failed. dataType:{}, embName:{}, loadedData:{}, ckptData:{}",
-        dataType, embName, loadedData.datasetSize, ckptData.embHashMaps.empty());
+    LOG_ERROR("Load host emb failed. dataType:{}, embName:{}, loadedData:{}",
+        dataType, embName, loadedData.datasetSize);
     throw runtime_error("only EMB_INFO and EMB_DATA supported for load host emb");
 }
\ No newline at end of file
diff --git a/src/core/ckpt_data_handler/ckpt_data_handler.h b/src/core/ckpt_data_handler/ckpt_data_handler.h
index 383317d9..0ca33294 100644
--- a/src/core/ckpt_data_handler/ckpt_data_handler.h
+++ b/src/core/ckpt_data_handler/ckpt_data_handler.h
@@ -18,8 +18,6 @@ See the License for the specific language governing permissions and
 
 #include <functional>
 
-#include "emb_hashmap/emb_hashmap.h"
-#include "host_emb/host_emb.h"
 #include "utils/common.h"
 
 namespace MxRec {
diff --git a/src/core/ckpt_data_handler/feat_admit_n_evict_ckpt/feat_admit_n_evict_ckpt.cpp b/src/core/ckpt_data_handler/feat_admit_n_evict_ckpt/feat_admit_n_evict_ckpt.cpp
index be35044b..140b9c77 100644
--- a/src/core/ckpt_data_handler/feat_admit_n_evict_ckpt/feat_admit_n_evict_ckpt.cpp
+++ b/src/core/ckpt_data_handler/feat_admit_n_evict_ckpt/feat_admit_n_evict_ckpt.cpp
@@ -157,7 +157,7 @@ void FeatAdmitNEvictCkpt::SetHistRec(string embName)
     for (size_t i = featItemInfoOffset; i < featItemInfoTotalSize + featItemInfoOffset; i += featItemInfoSaveNum) {
         process = i % printPerStep;
         if (process == 1) {
-            LOG_DEBUG("====in SetHistRec, process : %f", i / featItemInfoTotalSize);
+            LOG_TRACE("====in SetHistRec, process : {}", i / featItemInfoTotalSize);
         }
         auto featureId = transArr[i + featureIdIdxOffset];
         auto count = transArr[i + countIdxOffset];
diff --git a/src/core/emb_hashmap/emb_hashmap.cpp b/src/core/emb_hashmap/emb_hashmap.cpp
deleted file mode 100644
index 977b2c0b..00000000
--- a/src/core/emb_hashmap/emb_hashmap.cpp
+++ /dev/null
@@ -1,477 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#include "emb_hashmap.h"
-#include <fstream>
-#include <mpi.h>
-
-#include "hybrid_mgmt/hybrid_mgmt_block.h"
-#include "utils/common.h"
-#include "emb_table/embedding_mgmt.h"
-
-using namespace MxRec;
-
-void EmbHashMap::Init(const RankInfo& ri, const vector<EmbInfo>& embInfos, bool ifLoad)
-{
-    this->rankInfo = ri;
-    if (!ifLoad) {
-        EmbHashMapInfo embHashMapInfo;
-        LOG_INFO("init emb hash map from scratch");
-        for (const auto& embInfo: embInfos) {
-            embHashMapInfo.devOffset2Batch.resize(embInfo.devVocabSize);
-            embHashMapInfo.devOffset2Key.resize(embInfo.devVocabSize);
-            embHashMapInfo.hostVocabSize = embInfo.hostVocabSize;
-            embHashMapInfo.devVocabSize = embInfo.devVocabSize;
-            embHashMapInfo.currentUpdatePos = 0;
-            fill(embHashMapInfo.devOffset2Batch.begin(), embHashMapInfo.devOffset2Batch.end(), -1);
-            fill(embHashMapInfo.devOffset2Key.begin(), embHashMapInfo.devOffset2Key.end(), -1);
-            embHashMaps[embInfo.name] = embHashMapInfo;
-
-            LOG_TRACE("devOffset2Key, {}", VectorToString(embHashMaps.at(embInfo.name).devOffset2Key));
-            LOG_TRACE("devOffset2Batch, {}", VectorToString(embHashMaps.at(embInfo.name).devOffset2Batch));
-        }
-    }
-}
-
-void EmbHashMap::ClearLookupAndSwapOffset(EmbHashMapInfo& embHashMap) const
-{
-    embHashMap.swapPos.clear();
-    embHashMap.lookUpVec.clear();
-    embHashMap.ddr2HbmKeys.clear();
-}
-
-/// DDR模型下处理特征的offset、swap信息等
-/// \param embName 表名
-/// \param keys 查询向量
-/// \param DDRParam 临时向量
-/// \param channelId 通道索引（训练/推理）
-void EmbHashMap::Process(const string& embName, vector<emb_key_t>& keys, DDRParam& ddrParam, int channelId)
-{
-#ifndef GTEST
-    EASY_FUNCTION(profiler::colors::Pink)
-    TimeCost swapTimeCost;
-    std::shared_ptr<EmbeddingTable> table = EmbeddingMgmt::Instance()->GetTable(embName);
-
-    int32_t keepBatch = swapId; // 处理batch的次数，多个预取一起处理算一次
-    vector<size_t> swapPos;
-    vector<int32_t> lookUpVec = table->FindOffset(keys, swapId, channelId, swapPos);
-
-    table->RefreshFreqInfoWithSwap();
-
-    EASY_BLOCK("hostHashMaps->tdt")
-
-    std::copy(lookUpVec.begin(), lookUpVec.end(), std::back_inserter(ddrParam.offsetsOut));
-
-    // 构造查询向量tensor
-    int lookUpVecSize = static_cast<int>(lookUpVec.size());
-    ddrParam.tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, { lookUpVecSize }));
-
-    auto lookupTensorData = ddrParam.tmpDataOut.back().flat<int32>();
-    for (int i = 0; i < lookUpVecSize; i++) {
-        lookupTensorData(i) = static_cast<int32_t>(lookUpVec[i]);
-    }
-    LOG_TRACE("lookupTensor, {}", VectorToString(lookUpVec));
-
-    // 构造交换向量tensor
-    int swapSize = static_cast<int>(swapPos.size());
-    ddrParam.tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, { swapSize }));
-
-    auto swapTensorData = ddrParam.tmpDataOut.back().flat<int32>();
-    for (int i = 0; i < swapSize; i++) {
-        swapTensorData(i) = static_cast<int>(swapPos[i]);
-    }
-    if (swapSize > 0) {
-        LOG_DEBUG("swap num: {}", swapSize);
-    }
-
-    LOG_TRACE("swapTensor, {}", VectorToString(swapPos));
-    // 清空本次记录的查询偏移和交换偏移
-    table->ClearLookupAndSwapOffset();
-
-    LOG_INFO("current ddr emb:{}, usage:{}/[{}+{}]", embName, table->GetMaxOffset(),
-             table->GetDevVocabSize(), table->GetHostVocabSize());
-
-    ddrParam.tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
-    auto swapLen = ddrParam.tmpDataOut.back().flat<int32>();
-    swapLen(0) = swapSize;
-
-    if (GlogConfig::gStatOn) {
-        LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} swap_key_size {} swap_time_cost {}",
-            channelId, swapId, rankInfo.rankId, swapSize, swapTimeCost.ElapsedMS());
-    }
-
-    swapId++;
-    EASY_END_BLOCK
-#endif
-}
-
-
-auto EmbHashMap::GetHashMaps() -> absl::flat_hash_map<string, EmbHashMapInfo>
-{
-    LOG_DEBUG(HYBRID_BLOCKING + " start GetHashMaps");
-    HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
-    auto embHashMapsOld = embHashMaps;
-    int checkResult = hybridMgmtBlock->CheckSaveEmbMapValid();
-    if (checkResult == 0) {
-        // 检查是否需要回退
-        return embHashMapsOld;
-    }
-    if (checkResult == 1) {
-        // 回退一步
-        for (auto& temp: embHashMapsOld) {
-            auto &embHashMap = temp.second;
-            for (auto &swapKeys: embHashMap.oldSwap) {
-                emb_key_t oldKey = swapKeys.first;
-                emb_key_t key = swapKeys.second;
-                int tempOffset = static_cast<int>(embHashMap.hostHashMap[key]);
-                embHashMap.hostHashMap[key] = embHashMap.hostHashMap[oldKey];
-                embHashMap.hostHashMap[oldKey] = static_cast<int>(tempOffset);
-            }
-            embHashMap.maxOffset = embHashMap.maxOffsetOld;
-            for (auto &offset2Key: embHashMap.devOffset2KeyOld) {
-                embHashMap.devOffset2Key[offset2Key.first] = offset2Key.second;
-            }
-        }
-        return embHashMapsOld;
-    }
-    // 此时需要回退2步，无法满足此条件，保存的东西错误，直接回退
-    if (rankInfo.isDDR) {
-        throw HybridMgmtBlockingException("EmbHashMap::GetHashMaps() ");
-    }
-    return embHashMapsOld;
-}
-
-void EmbHashMap::LoadHashMap(EmbHashMemT& loadData)
-{
-    embHashMaps = std::move(loadData);
-}
-
-/// 对HBM剩余空间和更新位置进行初始化
-void EmbHashMapInfo::SetStartCount()
-{
-    currentUpdatePosStart = currentUpdatePos;
-    freeSize = devVocabSize;
-}
-
-/// 判断HBM是否有剩余空间
-/// \param i 查询向量的大小
-/// \return
-bool EmbHashMapInfo::HasFree(size_t i) const
-{
-    return freeSize < i;
-}
-
-/*
-* 删除淘汰key的映射关系，并将其offset更新到evictPos，待后续复用
-*/
-void EmbHashMap::EvictDeleteEmb(const string& embName, const vector<emb_key_t>& keys)
-{
-    EASY_FUNCTION()
-    size_t keySize = keys.size();
-    auto& embHashMap = embHashMaps.at(embName);
-    vector<emb_key_t> evictHBMKeys;
-    vector<emb_key_t> evictDDRKeys;
-    for (size_t i = 0; i < keySize; i++) {
-        size_t offset;
-        auto key = keys[i];
-        if (key == -1) {
-            LOG_WARN("evict key equal -1!");
-            continue;
-        }
-        const auto& iter = embHashMap.hostHashMap.find(key);
-        if (iter != embHashMap.hostHashMap.end()) {
-            offset = iter->second;
-            embHashMap.hostHashMap.erase(iter);
-            LOG_TRACE("evict embName {}, offset {}", embName, offset);
-        } else {
-            // 淘汰依据keyProcess中的history，hashmap映射关系创建于ParseKey；两者异步，造成淘汰的值在hashmap里可能未创建
-            continue;
-        }
-
-        if (offset < embHashMap.devVocabSize) {
-            embHashMap.devOffset2Batch[offset] = -1;
-            embHashMap.devOffset2KeyOld.emplace_back(offset, embHashMap.devOffset2Key[offset]);
-            embHashMap.devOffset2Key[offset] = -1;
-            embHashMap.evictDevPos.emplace_back(offset);
-            evictHBMKeys.emplace_back(key);
-        } else {
-            embHashMap.evictPos.emplace_back(offset);
-            evictDDRKeys.emplace_back(key);
-        }
-    }
-    if (isSSDEnabled) {
-        cacheManager->RefreshFreqInfoCommon(embName, evictHBMKeys, TransferType::HBM_2_EVICT);
-        cacheManager->RefreshFreqInfoCommon(embName, evictDDRKeys, TransferType::DDR_2_EVICT);
-    }
-
-    LOG_INFO("ddr EvictDeleteEmb, emb: [{}], hostEvictSize: {}, devEvictSize: {}",
-        embName, embHashMap.evictPos.size(), embHashMap.evictDevPos.size());
-    LOG_TRACE("hostHashMap, {}", MapToString(embHashMaps[embName].hostHashMap));
-}
-
-
-/// 从embHashMaps获取key对应的位置，构造查询向量；更新devOffset2Batch；记录dev与host需要交换的偏移
-/// \param embName 表名
-/// \param keys 查询向量
-/// \param currentBatchId 已处理的batch数
-/// \param keepBatchId 处理batch的次数，多个预取一起处理算一次
-/// \param channelId 通道索引（训练/推理）
-void EmbHashMap::FindOffset(const string& embName, const vector<emb_key_t>& keys,
-                            size_t currentBatchId, size_t keepBatchId, int channelId)
-{
-    EASY_FUNCTION()
-    size_t keySize = keys.size();
-    auto it = embHashMaps.find(embName);
-    if (it == embHashMaps.end()) {
-        throw runtime_error("table not exist in embHashMaps");
-    }
-    auto &embHashMap = it->second;
-    UpdateBatchId(keys, currentBatchId, keySize, embHashMap);
-    for (size_t i = 0; i < keySize; i++) {
-        auto key = keys[i];
-        if (key == -1) {
-            embHashMap.lookUpVec.emplace_back(INVALID_KEY_VALUE);
-            continue;
-        }
-        size_t offset;
-        auto isOffsetValid = FindOffsetHelper(key, embHashMap, channelId, offset);
-        if (!isOffsetValid) {
-            embHashMap.lookUpVec.emplace_back(INVALID_KEY_VALUE);
-            continue;
-        }
-        AddKeyFreqInfo(embName, key, RecordType::NOT_DDR);
-        if (offset < embHashMap.devVocabSize) {
-            // 偏移小于等于HBM容量：直接放入查询向量；更新偏移之前关联的key和当前关联的key
-            embHashMap.lookUpVec.emplace_back(offset);
-            embHashMap.devOffset2KeyOld.emplace_back(offset, static_cast<int>(embHashMap.devOffset2Key[offset]));
-            embHashMap.devOffset2Key[offset] = key;
-        } else {
-            // 偏移大于HBM容量：记录在host emb上的偏移；找到需要交换的HBM偏移
-            embHashMap.missingKeysHostPos.emplace_back(offset - embHashMap.devVocabSize);
-            FindSwapPosOld(embName, key, offset, currentBatchId, keepBatchId);
-        }
-    }
-    if (currentBatchId == 0) {
-        LOG_INFO("max offset {}", embHashMap.maxOffset);
-    }
-    LOG_TRACE("hostHashMap, {}", MapToString(embHashMaps[embName].hostHashMap));
-}
-
-
-/// 查找key对应的偏移；1. 已在hash map中，直接返回对应的offset；2. 开启淘汰的情况下，复用淘汰的位置；3. 没有则新分配
-/// \param key 输入特征
-/// \param embHashMap hash map实例
-/// \param channelId 通道索引（训练/推理）
-/// \param offset 未初始化变量，用于记录
-/// \return
-bool EmbHashMap::FindOffsetHelper(const emb_key_t& key, EmbHashMapInfo& embHashMap, int channelId, size_t& offset) const
-
-{
-    const auto& iter = embHashMap.hostHashMap.find(key);
-    if (iter != embHashMap.hostHashMap.end()) {
-        offset = iter->second;
-        LOG_TRACE("devVocabSize, {} , offset , {}", embHashMap.devVocabSize, offset);
-        if (isSSDEnabled && offset >= embHashMap.devVocabSize) {
-            embHashMap.ddr2HbmKeys.emplace_back(key);
-        }
-    } else if (embHashMap.evictDevPos.size() != 0 && channelId == TRAIN_CHANNEL_ID) { // 优先复用hbm表
-        offset = embHashMap.evictDevPos.back();
-        embHashMap.hostHashMap[key] = offset;
-        LOG_TRACE("ddr mode, dev evictPos is not null, key [{}] reuse offset [{}], evictSize [{}]",
-            key, offset, embHashMap.evictDevPos.size());
-        embHashMap.evictDevPos.pop_back();
-    } else if (embHashMap.evictPos.size() != 0 && channelId == TRAIN_CHANNEL_ID) { // hbm不足，再复用ddr表
-        offset = embHashMap.evictPos.back();
-        embHashMap.hostHashMap[key] = offset;
-        LOG_TRACE("ddr mode, host evictPos is not null, key [{}] reuse offset [{}], evictSize [{}]",
-            key, offset, embHashMap.evictPos.size());
-        embHashMap.evictPos.pop_back();
-    } else {
-        if (channelId == TRAIN_CHANNEL_ID) {
-            embHashMap.hostHashMap[key] = embHashMap.maxOffset;
-            offset = embHashMap.maxOffset;
-            embHashMap.maxOffset++;
-            if (embHashMap.maxOffset == embHashMap.devVocabSize) {
-                LOG_INFO("start using host vocab!");
-            }
-            if (embHashMap.maxOffset > embHashMap.hostVocabSize + embHashMap.devVocabSize) {
-                LOG_ERROR("hostVocabSize too small! dev:{} host:{}", embHashMap.devVocabSize, embHashMap.hostVocabSize);
-                throw runtime_error("hostVocabSize too small");
-            }
-        } else {
-            return false;
-        }
-    }
-    return true;
-}
-
-/// 更新HBM中的key相应offset最近出现的batch步数，用于跟踪哪些offset是最近在使用的
-/// \param keys 查询向量
-/// \param currentBatchId 已处理的batch数
-/// \param keySize 查询向量长度
-/// \param embHashMap hash map实例
-void EmbHashMap::UpdateBatchId(const vector<emb_key_t>& keys, size_t currentBatchId, size_t keySize,
-                               EmbHashMapInfo& embHashMap) const
-{
-    for (size_t i = 0; i < keySize; i++) {
-        size_t offset;
-        auto key = keys[i];
-        if (key == -1) {
-            continue;
-        }
-        const auto& iter = embHashMap.hostHashMap.find(key);
-        if (iter != embHashMap.hostHashMap.end()) {
-            offset = iter->second;
-
-            LOG_TRACE("key will be used, {} , offset , {}", key, offset);
-            if (offset < embHashMap.devVocabSize) {
-                // devOffset2Batch size equal to devVocabSize, unnecessary to check index boundary
-                embHashMap.devOffset2Batch[offset] = static_cast<int>(currentBatchId);
-            }
-        }
-    }
-}
-
-/// 利用devOffset2Batch上key最近使用的batchId，来选择需要淘汰的key，记录淘汰位置和device侧所需的keys
-/// \param embName 表名
-/// \param key 输入特征
-/// \param hostOffset 全局偏移
-/// \param currentBatchId 已处理的batch数
-/// \param keepBatchId 处理batch的次数，多个预取一起处理算一次
-/// \return 是否找到需要交换的位置
-bool EmbHashMap::FindSwapPosOld(const string& embName, emb_key_t key, size_t hostOffset, size_t currentBatchId,
-                                size_t keepBatchId)
-{
-    bool notFind = true;
-    auto it = embHashMaps.find(embName);
-    if (it == embHashMaps.end()) {
-        throw runtime_error("table not exist in embHashMaps");
-    }
-    auto &embHashMap = it->second;
-    while (notFind) {
-        // 找到本次预取之前的偏移（保证所有预取batch的key都在HBM中）
-        if (embHashMap.currentUpdatePos >= embHashMap.devOffset2Batch.size()) {
-            throw runtime_error("currentUpdatePos out of range");
-        }
-
-        if (embHashMap.devOffset2Batch[embHashMap.currentUpdatePos] < static_cast<int>(keepBatchId)) {
-            embHashMap.devOffset2Batch[embHashMap.currentUpdatePos] = static_cast<int>(currentBatchId);
-            embHashMap.swapPos.emplace_back(embHashMap.currentUpdatePos); // 记录需要被换出的HBM偏移
-            embHashMap.lookUpVec.emplace_back(embHashMap.currentUpdatePos); // 交换的位置就是该key查询的偏移
-            embHashMap.hostHashMap[key] = embHashMap.currentUpdatePos;  // 更新key对应的HBM偏移
-            // 记录HBM偏移之前的key
-            embHashMap.devOffset2KeyOld.emplace_back(embHashMap.currentUpdatePos,
-                                                     embHashMap.devOffset2Key[embHashMap.currentUpdatePos]);
-            auto& oldKey = embHashMap.devOffset2Key[embHashMap.currentUpdatePos];
-            embHashMap.oldSwap.emplace_back(oldKey, key); // 记录交换的两个key oldKey:HBM->DDR key:DDR->HBM
-            embHashMap.hostHashMap[oldKey] = hostOffset; // 更新被替换的key的偏移
-            oldKey = key;
-            notFind = false;
-        }
-        embHashMap.currentUpdatePos++; // 查找位置+1
-        embHashMap.freeSize--; // HBM可用空间-1
-
-        // 遍历完一遍整个HBM表后，从头开始遍历
-        if (embHashMap.currentUpdatePos == embHashMap.devVocabSize) {
-            embHashMap.currentUpdatePos = 0;
-        }
-
-        // 已经找完整个HBM空间，且没找到可用位置，表示HBM空间不足以放下整个batch（预取batch数）的key，无法正常执行训练，故运行时错误退出
-        if (embHashMap.currentUpdatePos == embHashMap.currentUpdatePosStart && notFind) {
-            LOG_ERROR("devVocabSize is too small");
-            throw runtime_error("devVocabSize is too small");
-        }
-    }
-    return true;
-}
-
-/// HBM-DDR换入换出时刷新频次信息
-/// \param embName emb表名
-/// \param embHashMap emb hash map
-void EmbHashMap::RefreshFreqInfoWithSwap(const string& embName, EmbHashMapInfo& embHashMap) const
-{
-    if (!isSSDEnabled) {
-        return;
-    }
-    // 换入换出key列表，元素为pair: pair<oldKey, key> oldKey为从HBM移出的key, key为从DDR移出的key
-    auto& oldSwap = embHashMap.oldSwap;
-    LOG_DEBUG("RefreshFreqInfoWithSwap:oldSwap Size:{}", oldSwap.size());
-    vector<emb_key_t> enterDDRKeys;
-    for (auto keyPair : oldSwap) {
-        enterDDRKeys.emplace_back(keyPair.first);
-    }
-    cacheManager->RefreshFreqInfoCommon(embName, enterDDRKeys, TransferType::HBM_2_DDR);
-    cacheManager->RefreshFreqInfoCommon(embName, embHashMap.ddr2HbmKeys, TransferType::DDR_2_HBM);
-
-    AddCacheManagerTraceLog(embName, embHashMap);
-}
-
-/// 记录日志：HBM和DDR换入换出后，比较hostHashMap中DDR内key和表对应的lfuCache对象中的key内容
-void EmbHashMap::AddCacheManagerTraceLog(const string& embTableName, const EmbHashMapInfo& embHashMap) const
-{
-    if (Logger::GetLevel() != Logger::TRACE) {
-        return;
-    }
-    auto& hostMap = embHashMap.hostHashMap;
-    auto& devSize = embHashMap.devVocabSize;
-    auto iter = cacheManager->ddrKeyFreqMap.find(embTableName);
-    if (iter == cacheManager->ddrKeyFreqMap.end()) {
-        throw runtime_error("table not in ddrKeyFreqMap");
-    }
-    auto &lfu = iter->second;
-    const auto& lfuTab = lfu.GetFreqTable();
-    if (lfuTab.empty()) {
-        return;
-    }
-    size_t tableKeyInDdr = 0;
-    vector<emb_key_t> ddrKeys; // 获取hostHashMap中保存在DDR的key
-    for (const auto& item : hostMap) {
-        if (item.second < devSize) {
-            continue;
-        }
-        ddrKeys.emplace_back(item.first);
-        ++tableKeyInDdr;
-    }
-    vector<emb_key_t> lfuKeys;
-    for (const auto& it : lfuTab) {
-        lfuKeys.emplace_back(it.first);
-    }
-    std::sort(ddrKeys.begin(), ddrKeys.end());
-    std::sort(lfuKeys.begin(), lfuKeys.end());
-    std::string ddrKeysString = VectorToString(ddrKeys);
-    std::string lfuKeysString = VectorToString(lfuKeys);
-    if (ddrKeysString != lfuKeysString) {
-        LOG_ERROR("swap HBM with DDR step error, key string not equal, ddrKeysString:{}, lfuKeysString:{}",
-            ddrKeysString, lfuKeysString);
-    } else {
-        LOG_INFO("swap HBM with DDR step OK, table:{}, ddrKeysString == lfuKeysString, string length:{}",
-            embTableName, lfuKeysString.length());
-    }
-
-    LOG_INFO("swap HBM with DDR step end, table:{}, tableKeyInDdr:{}, tableKeyInLfu:{}",
-        embTableName, tableKeyInDdr, lfu.keyTable.size());
-}
-
-/// 记录key频次数据
-/// \param embTableName emb表名
-/// \param key key
-/// \param type 记录类型枚举
-void EmbHashMap::AddKeyFreqInfo(const string& embTableName, const emb_key_t& key, RecordType type) const
-{
-    if (!isSSDEnabled) {
-        return;
-    }
-    cacheManager->PutKey(embTableName, key, type);
-}
diff --git a/src/core/emb_hashmap/emb_hashmap.h b/src/core/emb_hashmap/emb_hashmap.h
deleted file mode 100644
index 96a75e54..00000000
--- a/src/core/emb_hashmap/emb_hashmap.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#ifndef MX_REC_EMB_HASHMAP_H
-#define MX_REC_EMB_HASHMAP_H
-
-#include <vector>
-#include <memory>
-#include <array>
-#include "absl/container/flat_hash_map.h"
-#include "host_emb/host_emb.h"
-#include "ssd_cache/cache_manager.h"
-#include "utils/common.h"
-#include "utils/time_cost.h"
-
-namespace MxRec {
-    using namespace std;
-
-    class EmbHashMap {
-    public:
-        EmbHashMap() = default;
-
-        void Init(const RankInfo& ri, const vector<EmbInfo>& embInfos, bool ifLoad = false);
-
-        void Process(const string& embName, std::vector<emb_key_t>& keys, DDRParam& ddrParam, int channelId);
-
-        auto GetHashMaps() -> absl::flat_hash_map<string, EmbHashMapInfo>;
-
-        void LoadHashMap(absl::flat_hash_map<string, EmbHashMapInfo>& loadData);
-
-        void EvictDeleteEmb(const string& embName, const vector<emb_key_t>& keys);
-
-        absl::flat_hash_map<string, EmbHashMapInfo> embHashMaps;
-
-        bool FindOffsetHelper(const emb_key_t& key, EmbHashMapInfo& embHashMap, int channelId, size_t& offset) const;
-
-        void UpdateBatchId(const vector<emb_key_t>& keys, size_t currentBatchId, size_t keySize,
-                           EmbHashMapInfo& embHashMap) const;
-
-        bool FindSwapPosOld(const string& embName, emb_key_t key, size_t hostOffset, size_t currentBatchId,
-                            size_t keepBatchId);
-
-        std::vector<size_t>& GetEvictPos(const string& embName)
-        {
-            return embHashMaps.at(embName).evictPos;
-        }
-
-        bool isSSDEnabled { false };
-        CacheManager* cacheManager;
-
-    GTEST_PRIVATE:
-
-        void FindOffset(const string& embName, const vector<emb_key_t>& keys,
-                        size_t currentBatchId, size_t keepBatchId, int channelId);
-
-        void AddCacheManagerTraceLog(const string& embTableName, const EmbHashMapInfo& embHashMap) const;
-
-        void AddKeyFreqInfo(const string& embTableName, const emb_key_t& key, RecordType type) const;
-
-        void ClearLookupAndSwapOffset(EmbHashMapInfo& embHashMap) const;
-
-        void RefreshFreqInfoWithSwap(const string& embName, EmbHashMapInfo& embHashMap) const;
-
-        RankInfo rankInfo;
-        int swapId { 0 };
-    };
-}
-
-#endif // MX_REC_EMB_HASHMAP_H
diff --git a/src/core/emb_table/emb_table.cpp b/src/core/emb_table/emb_table.cpp
index 1c24eb2b..914cf535 100644
--- a/src/core/emb_table/emb_table.cpp
+++ b/src/core/emb_table/emb_table.cpp
@@ -78,6 +78,7 @@ EmbTable::~EmbTable()
 // 从embeddingList获取一个可用的emb地址
 int64_t EmbTable::GetEmbAddress()
 {
+    int64_t ret = -1;
 #ifndef GTEST
     if (embeddingList.empty()) {
         PrintStatus();
@@ -97,8 +98,9 @@ int64_t EmbTable::GetEmbAddress()
     float *embAddr = embeddingList.front();
     embeddingList.pop_front();
     usedCapacity++;
-    return reinterpret_cast<int64_t>(embAddr);
+    ret = reinterpret_cast<int64_t>(embAddr);
 #endif
+    return ret;
 }
 
 void EmbTable::RandomInit(void* newBlock)
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 3d2b77e7..caec0229 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
         limitations under the License.
 ==============================================================================*/
-
 #include "emb_table/embedding_ddr.h"
+
 #include <utility>
+
 #include "utils/logger.h"
 #include "utils/singleton.h"
-#include "host_emb/host_emb.h"
 #include "file_system/file_system_handler.h"
 #include "ssd_cache/cache_manager.h"
-#include "emb_table/embedding_mgmt.h"
+#include "ock_ctr_common/include/error_code.h"
 
 using namespace MxRec;
 
@@ -29,18 +29,15 @@ EmbeddingDDR::EmbeddingDDR()
 }
 
 EmbeddingDDR::EmbeddingDDR(const EmbInfo& info, const RankInfo& rankInfo, int inSeed)
-    : EmbeddingTable(info, rankInfo, inSeed)
+    : EmbeddingTable(info, rankInfo, inSeed), deviceId(rankInfo.deviceId)
 {
-    LOG_INFO("Init DDR table [{}] devVocabSize = {} hostVocabSize = {}", name, devVocabSize, hostVocabSize);
-    currentUpdatePos = 0;
-    devOffset2Key.resize(devVocabSize);
-    devOffset2Batch.resize(devVocabSize);
-    std::fill(devOffset2Batch.begin(), devOffset2Batch.end(), -1);
-    std::fill(devOffset2Key.begin(), devOffset2Key.end(), -1);
+    LOG_INFO("Init DDR table:{}, devVocabSize:{}, hostVocabSize:{}", name, devVocabSize, hostVocabSize);
 }
 
 EmbeddingDDR::~EmbeddingDDR()
 {
+    hdTransfer = nullptr;
+    embCache = nullptr;
 }
 
 void EmbeddingDDR::Key2Offset(std::vector<emb_key_t>& splitKey, int channel)
@@ -52,214 +49,11 @@ int64_t EmbeddingDDR::capacity() const
     return capacity_;
 }
 
-std::vector<int32_t> EmbeddingDDR::FindOffset(const vector<emb_key_t>& keys,
-                                              size_t batchId, int channelId,
-                                              std::vector<size_t>& swapPos)
-{
-    devOffset2KeyOld.clear();
-    oldSwap.clear();
-    maxOffsetOld = maxOffset;
-
-    UpdateBatchId(keys, batchId);
-    std::vector<int32_t> lookUpVec;
-    for (size_t i = 0; i < keys.size(); i++) {
-        emb_key_t key = keys[i];
-        if (key == INVALID_KEY_VALUE) {
-            lookUpVec.emplace_back(INVALID_KEY_VALUE);
-            continue;
-        }
-        emb_key_t offset = FindOffsetHelper(key, channelId);
-        if (offset == INVALID_KEY_VALUE) {
-            lookUpVec.emplace_back(INVALID_KEY_VALUE);
-            continue;
-        }
-        AddKeyFreqInfo(key, RecordType::NOT_DDR);
-        if (offset < devVocabSize) {
-            // 偏移小于等于HBM容量：直接放入查询向量；更新偏移之前关联的key和当前关联的key
-            lookUpVec.push_back(offset);
-            devOffset2KeyOld.emplace_back(offset, static_cast<int>(devOffset2Key[offset]));
-            devOffset2Key[offset] = key;
-        } else {
-            // 偏移大于HBM容量：记录在host emb上的偏移；找到需要交换的HBM偏移
-            missingKeysHostPos_.emplace_back(offset - devVocabSize);
-            offset = FindSwapPosOld(key, offset, batchId, swapPos);
-            lookUpVec.emplace_back(offset);
-        }
-    }
-    if (batchId == 0) {
-        LOG_INFO("max offset {}", maxOffset);
-    }
-    LOG_TRACE("keyOffsetMap, {}", MapToString(keyOffsetMap));
-    return lookUpVec;
-}
-
-emb_key_t EmbeddingDDR::FindOffsetHelper(const emb_key_t& key, int channelId)
-{
-    const auto& iter = keyOffsetMap.find(key);
-    emb_key_t offset = INVALID_KEY_VALUE;
-    if (iter != keyOffsetMap.end()) {
-        offset = iter->second;
-        LOG_TRACE("devVocabSize, {} , offset , {}", devVocabSize, offset);
-        if (isSSDEnabled_ && offset >= devVocabSize) {
-            ddr2HbmKeys.emplace_back(key);
-        }
-        return offset;
-    }
-    if (channelId != TRAIN_CHANNEL_ID) {
-        return offset;
-    }
-    if (evictDevPos.size() != 0) { // 优先复用hbm表
-        offset = evictDevPos.back();
-        keyOffsetMap[key] = offset;
-        LOG_TRACE("ddr mode, dev evictDevPos is not null, key [{}] reuse offset [{}], evictSize [{}]",
-            key, offset, evictDevPos.size());
-        evictDevPos.pop_back();
-        LOG_ERROR("dev evicted offset = {}", offset);
-        return offset;
-    }
-
-    if (evictHostPos.size() != 0) { // hbm不足，再复用host/ddr表
-        offset = evictHostPos.back();
-        keyOffsetMap[key] = offset;
-        LOG_TRACE("ddr mode, host evictPos is not null, key [{}] reuse offset [{}], evictSize [{}]",
-            key, offset, evictHostPos.size());
-        evictHostPos.pop_back();
-        LOG_TRACE("host evicted offset = {}", offset);
-        return offset;
-    }
-    keyOffsetMap[key] = maxOffset;
-    offset = maxOffset;
-    maxOffset++;
-    if (maxOffset == devVocabSize) {
-        LOG_INFO("start using host vocab!");
-    }
-    if (maxOffset > (hostVocabSize + devVocabSize)) {
-        LOG_ERROR("hostVocabSize too small! dev:{} host:{}", devVocabSize, hostVocabSize);
-        throw runtime_error("hostVocabSize too small");
-    }
-    return offset;
-}
-
-void EmbeddingDDR::UpdateBatchId(const vector<emb_key_t>& keys, size_t currentBatchId)
-{
-    for (size_t i = 0; i < keys.size(); i++) {
-        size_t offset;
-        emb_key_t key = keys[i];
-        if (key == -1) {
-            continue;
-        }
-        const auto& iter = keyOffsetMap.find(key);
-        if (iter != keyOffsetMap.end()) {
-            offset = iter->second;
-
-            LOG_TRACE("key will be used, {} , offset , {}", key, offset);
-            if (offset < devVocabSize) {
-                // devOffset2Batch size equal to devVocabSize, unnecessary to check index boundary
-                devOffset2Batch[offset] = static_cast<int>(currentBatchId);
-            }
-        }
-    }
-}
-
-/// 利用devOffset2Batch上key最近使用的batchId，来选择需要淘汰的key，记录淘汰位置和device侧所需的keys
-/// \param embName 表名
-/// \param key 输入特征
-/// \param hostOffset 全局偏移
-/// \param currentBatchId 已处理的batch数
-/// \param keepBatchId 处理batch的次数，多个预取一起处理算一次
-/// \return 是否找到需要交换的位置
-emb_key_t EmbeddingDDR::FindSwapPosOld(emb_key_t key, size_t hostOffset, size_t batchId,
-                                       std::vector<size_t>& swapPos)
-{
-    bool notFind = true;
-    emb_key_t offset = INVALID_KEY_VALUE;
-    while (notFind) {
-        // 找到本次预取之前的偏移（保证所有预取batch的key都在HBM中）
-        if (currentUpdatePos >= devOffset2Batch.size()) {
-            LOG_ERROR("outofrange {} >= {}", currentUpdatePos, devOffset2Batch.size());
-            throw runtime_error("currentUpdatePos out of range");
-        }
-
-        if (devOffset2Batch[currentUpdatePos] < static_cast<int>(batchId)) {
-            devOffset2Batch[currentUpdatePos] = static_cast<int>(batchId);
-            swapPos.emplace_back(currentUpdatePos); // 记录需要被换出的HBM偏移
-            offset = currentUpdatePos;
-            keyOffsetMap[key] = currentUpdatePos;  // 更新key对应的HBM偏移
-            // 记录HBM偏移之前的key
-            devOffset2KeyOld.emplace_back(currentUpdatePos, devOffset2Key[currentUpdatePos]);
-            auto& oldKey = devOffset2Key[currentUpdatePos];
-            oldSwap.emplace_back(oldKey, key); // 记录交换的两个key oldKey:HBM->DDR key:DDR->HBM
-            keyOffsetMap[oldKey] = hostOffset; // 更新被替换的key的偏移
-            oldKey = key;
-            notFind = false;
-        }
-        currentUpdatePos++; // 查找位置+1
-        freeSize_--;        // HBM可用空间-1
-
-        // 遍历完一遍整个HBM表后，从头开始遍历
-        if (currentUpdatePos == devVocabSize) {
-            currentUpdatePos = 0;
-        }
-
-        /**
-         * currentUpdatePos已经绕了HBM一圈
-         * 已经找完整个HBM空间，且没找到可用位置，表示HBM空间不足以放下整个batch（预取batch数）的key，
-         * 无法正常执行训练，故运行时错误退出
-         */
-        if (currentUpdatePos == currentUpdatePosStart && notFind) {
-            LOG_ERROR("devVocabSize is too small");
-            throw runtime_error("devVocabSize is too small");
-        }
-    }
-    return offset;
-}
-
 /*
 * 删除淘汰key的映射关系，并将其offset更新到evictPos，待后续复用
 */
 void EmbeddingDDR::EvictDeleteEmb(const vector<emb_key_t>& keys)
 {
-    EASY_FUNCTION()
-    size_t keySize = keys.size();
-    vector<emb_key_t> evictHBMKeys;
-    vector<emb_key_t> evictDDRKeys;
-    for (size_t i = 0; i < keySize; ++i) {
-        size_t offset;
-        emb_key_t key = keys[i];
-        if (key == INVALID_KEY_VALUE) {
-            LOG_WARN("evict key equal -1!");
-            continue;
-        }
-        const auto& iter = keyOffsetMap.find(key);
-        if (iter == keyOffsetMap.end()) {
-            // 淘汰依据keyProcess中的history，hashmap映射关系创建于ParseKey；两者异步，造成淘汰的值在hashmap里可能未创建
-            continue;
-        }
-        offset = iter->second;
-        keyOffsetMap.erase(iter);
-        LOG_TRACE("evict embName {}, offset {}", name, offset);
-
-        if (offset < devVocabSize) {
-            // offset 在device中
-            devOffset2Batch[offset] = -1;
-            devOffset2KeyOld.emplace_back(offset, devOffset2Key[offset]);
-            devOffset2Key[offset] = -1;
-            evictDevPos.emplace_back(offset);
-            evictHBMKeys.emplace_back(key);
-        } else {
-            // offset 在Host
-            evictHostPos.emplace_back(offset);
-            evictDDRKeys.emplace_back(key); // 删除映射表、初始化host表、发送dev淘汰位置
-        }
-    }
-    if (isSSDEnabled_) {
-        cacheManager_->RefreshFreqInfoCommon(name, evictHBMKeys, TransferType::HBM_2_EVICT);
-        cacheManager_->RefreshFreqInfoCommon(name, evictDDRKeys, TransferType::DDR_2_EVICT);
-    }
-
-    LOG_INFO("ddr EvictDeleteEmb, emb: [{}], hostEvictSize: {}, devEvictSize: {}",
-        name, evictHostPos.size(), evictDevPos.size());
-    LOG_TRACE("keyOffsetMap, {}", MapToString(keyOffsetMap));
 }
 
 /// DDR模式下的淘汰：删除映射表、初始化host表、发送dev淘汰位置
@@ -267,57 +61,27 @@ void EmbeddingDDR::EvictDeleteEmb(const vector<emb_key_t>& keys)
 /// \param keys
 void EmbeddingDDR::EvictKeys(const vector<emb_key_t>& keys)
 {
-    EASY_FUNCTION()
-    for (const emb_key_t& key : keys) {
-        size_t offset;
-        if (key == INVALID_KEY_VALUE) {
-            LOG_WARN("evict key equal -1!");
-            continue;
-        }
-        const auto& iter = keyOffsetMap.find(key);
-        if (iter == keyOffsetMap.end()) {
-            continue;
-        }
-        // 淘汰依据keyProcess中的history，hashmap映射关系创建于ParseKey；两者异步，造成淘汰的值在hashmap里可能未创建
-        offset = iter->second;
-        keyOffsetMap.erase(iter);
-        LOG_TRACE("evict embName {}, offset {}", name, offset);
-
-        if (offset < devVocabSize) {
-            devOffset2Batch[offset] = INVALID_KEY_VALUE;
-            devOffset2KeyOld.emplace_back(offset, devOffset2Key[offset]);
-            devOffset2Key[offset] = INVALID_KEY_VALUE;
-            evictDevPos.emplace_back(offset);
-        } else {
-            evictHostPos.emplace_back(offset);
-        }
-    }
 }
 
-void EmbeddingDDR::ClearLookupAndSwapOffset()
+void EmbeddingDDR::Load(const string& savePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
-    ddr2HbmKeys.clear();
-}
+    vector<emb_cache_key_t> keys;
+    vector<vector<float>> embeddings;
+    vector<vector<float>> optimizerSlots;
 
-void EmbeddingDDR::SetStartCount()
-{
-    currentUpdatePosStart = currentUpdatePos;
-    freeSize_ = devVocabSize;
-}
+    LoadKey(savePath, keys);
+    LoadEmbedding(savePath, embeddings);
+    LoadOptimizerSlot(savePath, optimizerSlots);
 
-void EmbeddingDDR::Load(const string& savePath)
-{
-    LoadKey(savePath);
-    LoadEmbAndOptim(savePath);
-}
+    auto rc = embCache->LoadEmbTableInfos(name, keys, embeddings, optimizerSlots);
+    if (rc != 0) {
+        throw runtime_error("embCache->LoadEmbTableInfos failed, err code:" + to_string(rc));
+    }
 
-void EmbeddingDDR::Save(const string& savePath)
-{
-    SaveKey(savePath);
-    SaveEmbAndOptim(savePath);
+    trainKeySet[name].insert(keys.cbegin(), keys.cend());
 }
 
-void EmbeddingDDR::LoadKey(const string& savePath)
+void EmbeddingDDR::LoadKey(const string &savePath, vector<emb_cache_key_t> &keys)
 {
     stringstream ss;
     ss << savePath << "/" << name << "/key/slice.data";
@@ -325,108 +89,170 @@ void EmbeddingDDR::LoadKey(const string& savePath)
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
 
-    size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
+    size_t fileSize = 0;
+    try {
+        fileSize = fileSystemPtr->GetFileSize(ss.str());
+    } catch (exception& e) {
+        string errMsg = StringFormat("open file failed:%s, error code:%d", ss.str().c_str(), strerror(errno));
+        throw runtime_error(errMsg);
+    }
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {} is too big.", ss.str(), fileSize));
+        string errMsg = StringFormat("file:%s, size:%d is too big", ss.str().c_str(), fileSize);
+        throw runtime_error(errMsg);
     }
 
-    int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
+    // 暂时向HBM兼容，转成int64_t，后续再归一key类型为uint64_t
+    auto buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
-        throw runtime_error(StringFormat("Error: Load keys failed. "
-                                         "failed to allocate {} bytes using malloc.", fileSize));
+        string errMsg = StringFormat("malloc buffer failed, error code:%d", strerror(errno));
+        throw runtime_error(errMsg);
     }
-
-    ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
-    if (res == -1) {
+    ssize_t result = fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
+    if (result == -1) {
         free(static_cast<void*>(buf));
-        throw runtime_error(StringFormat("Error: Load keys failed. "
-                                         "An error occurred while reading file: {}.", ss.str()));
+        string errMsg = StringFormat("read buffer failed, error code:%d", strerror(errno));
+        throw runtime_error(errMsg);
     }
-    if (res != fileSize) {
+    if (result != fileSize) {
         free(static_cast<void*>(buf));
         throw runtime_error(StringFormat("Error: Load keys failed. Expected to read {} bytes, "
-                                         "but actually read {} bytes to file {}.", fileSize, res, ss.str()));
+                                         "but actually read {} bytes to file {}.", fileSize, result, ss.str()));
     }
 
-    size_t loadKeySize = fileSize / sizeof(int64_t);
-
-    // key优先加载至device
-    loadOffset.clear();
     hostLoadOffset.clear();
-    int keyCount = 0;
-    for (int i = 0; i < loadKeySize; i = i + 1) {
+    size_t loadKeySize = fileSize / sizeof(int64_t);
+    for (size_t i = 0; i < loadKeySize; i++) {
+        // 分配到不同的卡
         if (buf[i] % rankSize_ != rankId_) {
             continue;
         }
-        if (keyCount > devVocabSize + hostVocabSize) {
-            free(static_cast<void*>(buf));
-            throw runtime_error(StringFormat("Error: Load keys failed. Load key size :{} , "
-                                             "exceeds the sum of device vocab size and host vocab size: {}.",
-                                             keyCount, devVocabSize + hostVocabSize));
-        } else if (keyCount < devVocabSize) {
-            loadOffset.push_back(i);
-            devOffset2Key[keyCount] = buf[i];
-        } else {
-            hostLoadOffset.push_back(i);
-        }
-        keyOffsetMap[buf[i]] = keyCount;
-        keyCount++;
+        hostLoadOffset.emplace_back(i);
+        keys.emplace_back(static_cast<emb_cache_key_t>(buf[i]));
     }
-    maxOffset = keyOffsetMap.size();
+
     free(static_cast<void*>(buf));
+    LOG_DEBUG("load key done, table:{}", name);
 }
 
-void EmbeddingDDR::LoadEmbAndOptim(const string& savePath)
+void EmbeddingDDR::LoadEmbedding(const string &savePath, vector<vector<float>> &embeddings)
 {
+    // must init first
+    for (size_t i = 0; i < hostLoadOffset.size(); i++) {
+        vector<float> tmp(embSize_);
+        embeddings.emplace_back(tmp);
+    }
+
     stringstream ss;
     ss << savePath << "/" << name;
 
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
 
-    HostEmb *hostEmbs = Singleton<MxRec::HostEmb>::GetInstance();
-    HostEmbTable &table = hostEmbs->GetEmb(name);
-    if (table.embData.empty()) {
-        LOG_ERROR("hostEmb data is empty");
-        return;
-    }
-
-    // 读embedding
     stringstream embedStream;
     embedStream << ss.str() << "/" << "embedding/slice.data";
+    ssize_t res = fileSystemPtr->Read(embedStream.str(), embeddings, 0, hostLoadOffset, embSize_);
+    LOG_DEBUG("load embedding done, table:{}, read bytes:{}", name, res);
+}
 
-    size_t readSize = hostLoadOffset.size() * embSize_ * sizeof(float);
-    ssize_t res = fileSystemPtr->Read(embedStream.str(), table.embData, 0, hostLoadOffset, embSize_);
-    if (res == -1) {
-        throw runtime_error(StringFormat("Error: Load embeddings failed. An error occurred while reading file: {}.",
-                                         embedStream.str()));
-    }
-    if (res != readSize) {
-        throw runtime_error(StringFormat("Error: Load embeddings failed. Expected to read {} bytes, "
-                                         "but actually read {} bytes to file {}.", readSize, res, embedStream.str()));
+void EmbeddingDDR::LoadOptimizerSlot(const string &savePath, vector<vector<float>> &optimizerSlots)
+{
+    // must init first
+    for (size_t i = 0; i < hostLoadOffset.size(); i++) {
+        vector<float> tmp(extEmbSize_ - embSize_);
+        optimizerSlots.emplace_back(tmp);
     }
 
-    // 读optim
-    int64_t optimIndex = 1;
+    stringstream ss;
+    ss << savePath << "/" << name;
+
+    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
+    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
+
+    int64_t slotIdx = 0;
     for (const auto &param: optimParams) {
         stringstream paramStream;
         paramStream << ss.str() << "/" << optimName + "_" + param << "/slice.data";
+        ssize_t res = fileSystemPtr->Read(paramStream.str(), optimizerSlots, slotIdx, hostLoadOffset, embSize_);
+        slotIdx++;
+        LOG_DEBUG("load optimizer slot, table:{}, slot:{}, read bytes:{}", name, param, res);
+    }
+
+    LOG_DEBUG("load optimizer slot done, table:{}", name);
+}
+
+void EmbeddingDDR::Save(const string& savePath)
+{
+    SyncLatestEmbedding();
+
+    vector<emb_cache_key_t> keys;
+    vector<vector<float>> embeddings;
+    vector<vector<float>> optimizerSlots;
+    embCache->GetEmbTableInfos(name, keys, embeddings, optimizerSlots);
+
+    SaveKey(savePath, keys);
+    SaveEmbedding(savePath, embeddings);
+    SaveOptimizerSlot(savePath, optimizerSlots, keys.size());
+}
 
-        ssize_t res = fileSystemPtr->Read(paramStream.str(), table.embData, optimIndex, hostLoadOffset, embSize_);
-        if (res == -1) {
-            throw runtime_error(StringFormat("Error: Load optimizers failed. An error occurred while reading file: {}.",
-                                             paramStream.str()));
+void EmbeddingDDR::SyncLatestEmbedding()
+{
+    // 导出host记录的存在于npu的embedding
+    std::vector<std::pair<uint64_t, uint64_t>> koVec;
+    int rc = embCache->ExportDeviceKeyOffsetPairs(name, koVec);
+    if (rc != ock::ctr::H_OK) {
+        string errMsg = StringFormat("ExportDeviceKeyOffsetPairs failed, table:%s, error code:%d", name.c_str(), rc);
+        throw std::invalid_argument(errMsg);
+    }
+    std::vector<uint64_t> swapOutKeys;
+    for (const auto& p : koVec) {
+        swapOutKeys.push_back(p.first);
+    }
+    LOG_DEBUG("save swapOutKeys.size:{}, table:{}", swapOutKeys.size(), name);
+
+    // 接收python save接口发送的卡内embedding
+    auto size = hdTransfer->RecvAcl(TransferChannel::SAVE_D2H, TRAIN_CHANNEL_ID, name, 0, -1);
+    LOG_DEBUG("save acltdtGetDatasetSize, size: {}, table:{}", size, name);
+    auto aclData = acltdtGetDataItem(hdTransfer->aclDatasets[name][0], 0);
+    if (aclData == nullptr) {
+        throw runtime_error("Acl get tensor data from dataset failed.");
+    }
+    auto* ptr = reinterpret_cast<float*>(acltdtGetDataAddrFromItem(aclData));
+
+    if (ssdVocabSize == 0) {
+        // 在保存之前先更新host的embedding
+        rc = embCache->EmbeddingUpdate(name, swapOutKeys, ptr);
+        if (rc != ock::ctr::H_OK) {
+            string errMsg = StringFormat("EmbeddingUpdate failed, table:%s, error code:%d", name.c_str(), rc);
+            throw std::invalid_argument(errMsg);
         }
-        if (res != readSize) {
-            throw runtime_error(StringFormat("Error: Load embeddings failed. Expected to read {} bytes, "
-                                             "but actually read {} bytes to file {}.",
-                                             readSize, res, paramStream.str()));
+    } else {
+        // 在保存之前先更新ddr和ssd的embedding
+        SwapOutInfo info;
+        cacheManager_->ProcessSwapOutKeys(name, swapOutKeys, info);
+        vector<float*> swapOutAddrs;
+        rc = embCache->EmbeddingLookupAddrs(name, info.swapOutDDRKeys, swapOutAddrs);
+        if (rc != ock::ctr::H_OK) {
+            string errMsg = StringFormat("EmbeddingLookupAddrs failed, table:%s, error code:%d", name.c_str(), rc);
+            throw std::invalid_argument(errMsg);
+        }
+        uint32_t extEmbeddingSize = embInfo_.extEmbeddingSize;
+        uint32_t memSize = extEmbeddingSize * sizeof(float);
+        // DDR更新
+#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
+    shared(swapOutAddrs, info, ptr, extEmbeddingSize, memSize)
+        for (uint64_t i = 0; i < swapOutAddrs.size(); i++) {
+            int errCode = memcpy_s(
+                swapOutAddrs[i], memSize, ptr + info.swapOutDDRAddrOffs[i] * extEmbeddingSize, memSize);
+            if (errCode != 0) {
+                string errMsg = StringFormat("memcpy_s failed, table:%s, error code:%d", name.c_str(), errCode);
+                throw std::invalid_argument(errMsg);
+            }
         }
-        optimIndex++;
+        cacheManager_->UpdateSSDEmb(name, ptr, embInfo_.extEmbeddingSize, info.swapOutSSDKeys, info.swapOutSSDAddrOffs);
     }
 }
 
-void EmbeddingDDR::SaveKey(const string& savePath)
+void EmbeddingDDR::SaveKey(const string& savePath, vector<emb_cache_key_t>& keys)
 {
     stringstream ss;
     ss << savePath << "/" << name << "/key/";
@@ -436,45 +262,17 @@ void EmbeddingDDR::SaveKey(const string& savePath)
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
 
-    hostKey.clear();
-    hostOffset.clear();
-    deviceKey.clear();
-    deviceOffset.clear();
-
-    for (const auto& it: keyOffsetMap) {
-        if (it.second >= devVocabSize) {
-            hostKey.push_back(it.first);
-            hostOffset.push_back(it.second);
-        } else {
-            deviceKey.push_back(it.first);
-            deviceOffset.push_back(it.second);
-        }
-    }
+    // 暂时向HBM兼容，转成int64_t，后续再归一key类型为uint64_t
+    vector<int64_t> keysCompat(keys.cbegin(), keys.cend());
 
-    size_t writeSize = static_cast<size_t>(hostKey.size() * sizeof(int64_t));
-    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(hostKey.data()), writeSize);
+    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(keysCompat.data()),
+                                       static_cast<size_t>(keys.size() * sizeof(int64_t)));
     if (res == -1) {
-        throw runtime_error(StringFormat("Error: Save keys failed. "
-                                         "An error occurred while writing file: {}.", ss.str()));
-    }
-    if (res != writeSize) {
-        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write {} bytes, "
-                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
-    }
-
-    writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
-    res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
-    if (res == -1) {
-        throw runtime_error(StringFormat("Error: Save keys failed. "
-                                         "An error occurred while writing file: {}.", ss.str()));
-    }
-    if (res != writeSize) {
-        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write {} bytes, "
-                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
+        throw runtime_error("save key failed!");
     }
 }
 
-void EmbeddingDDR::SaveEmbData(const string& savePath)
+void EmbeddingDDR::SaveEmbedding(const string& savePath, vector<vector<float>>& embeddings)
 {
     stringstream ss;
     ss << savePath << "/" << name << "/embedding/";
@@ -484,157 +282,68 @@ void EmbeddingDDR::SaveEmbData(const string& savePath)
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
 
-    size_t writeSize = embSize_ * sizeof(float) * embContent.size();
-    ssize_t res = fileSystemPtr->Write(ss.str(), embContent, embSize_ * sizeof(float));
-    if (res == -1) {
-        throw runtime_error(StringFormat("Error: Save embeddings failed. "
-                                         "An error occurred while writing file: {}.", ss.str()));
-    }
-    if (res != writeSize) {
-        throw runtime_error(StringFormat("Error: Save embeddings failed. Expected to write {} bytes, "
-                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
+    ssize_t writeBytesNum = fileSystemPtr->Write(ss.str(), embeddings, embSize_);
+    ssize_t expectWriteBytes = embeddings.size() * embSize_ * sizeof(float);
+    if (writeBytesNum != expectWriteBytes) {
+        string errMsg = StringFormat("save embedding failed, write expect:%d, actual:%d, path:%s",
+                                     expectWriteBytes, writeBytesNum, savePath.c_str());
+        throw runtime_error(errMsg);
     }
 }
 
-void EmbeddingDDR::SaveOptimData(const string& savePath)
+void EmbeddingDDR::SaveOptimizerSlot(const string& savePath, vector<vector<float>>& optimizerSlots, size_t keySize)
 {
-    for (const auto &content: optimContentMap) {
+    if (optimizerSlots.size() != keySize) {
+        string errMsg = StringFormat("optimizer slot data size not equal to key size, "
+                                     "optimizerSlots.size:%d, keySize:%d",
+                                     optimizerSlots.size(), keySize);
+        throw runtime_error(errMsg);
+    }
+
+    size_t slotIdx = 0;
+    for (const auto &slotName: optimParams) {
         stringstream ss;
-        ss << savePath << "/" << name << "/" << optimName + "_" + content.first << "/";
+        ss << savePath << "/" << name << "/" << optimName + "_" + slotName << "/";
         MakeDir(ss.str());
         ss << "slice_" << rankId_ << ".data";
 
         unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
         unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
 
-        size_t writeSize = embSize_ * sizeof(float) * content.second.size();
-        ssize_t res = fileSystemPtr->Write(ss.str(), content.second, embSize_ * sizeof(float));
-        if (res == -1) {
-            throw runtime_error(StringFormat("Error: Save optimizers failed. "
-                                             "An error occurred while writing file: {}.", ss.str()));
+        vector<vector<float>> slotData;
+        for (const auto &data: optimizerSlots) {
+            vector<float> tmp(data.cbegin() + slotIdx * embSize_, data.cbegin() + (slotIdx+1) * embSize_);
+            slotData.emplace_back(tmp);
         }
-        if (res != writeSize) {
-            throw runtime_error(StringFormat("Error: Save optimizers failed. Expected to write {} bytes, "
-                                             "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
+        ssize_t writeBytesNum = fileSystemPtr->Write(ss.str(), slotData, embSize_);
+        ssize_t expectWriteBytes = slotData.size() * embSize_ * sizeof(float);
+        if (writeBytesNum != expectWriteBytes) {
+            string errMsg = StringFormat("save optimizer slot failed, write expect:%d, actual:%d, path:%s",
+                                         expectWriteBytes, writeBytesNum, savePath.c_str());
+            throw runtime_error(errMsg);
         }
-    }
-}
 
-void EmbeddingDDR::SaveEmbAndOptim(const string& savePath)
-{
-    HostEmb *hostEmbs = Singleton<MxRec::HostEmb>::GetInstance();
-    HostEmbTable &table = hostEmbs->GetEmb(name);
-    if (table.embData.empty()) {
-        LOG_ERROR("host embedding data is empty");
+        slotIdx++;
     }
-    embContent.clear();
-    for (const string &param: optimParams) {
-        optimContentMap[param].clear();
-    }
-    for (int64_t &offset: hostOffset) {
-        embContent.push_back(table.embData[offset - devVocabSize].data());
-        int optim_param_count = 1;
-        for (const string &param: optimParams) {
-            optimContentMap[param].push_back(table.embData[offset - devVocabSize].data() +
-                                             sizeof(float) * embSize_ * optim_param_count);
-            optim_param_count++;
-        }
-    }
-    SaveEmbData(savePath);
-    SaveOptimData(savePath);
 }
 
-
 vector<int64_t> EmbeddingDDR::GetDeviceOffset()
 {
-    return deviceOffset;
+    throw runtime_error("GetDeviceOffset deprecated in ddr/ssd mode");
 }
 
 void EmbeddingDDR::SetOptimizerInfo(OptimizerInfo& optimizerInfo)
 {
     optimName = optimizerInfo.optimName;
     optimParams = optimizerInfo.optimParams;
-    for (const string &param: optimParams) {
-        optimContentMap[param] = vector<float*>{};
-    }
 }
 
 void EmbeddingDDR::SetCacheManager(CacheManager *cm)
 {
+    LOG_DEBUG("set CacheManager");
     cacheManager_ = cm;
 }
 
-void EmbeddingDDR::AddKeyFreqInfo(const emb_key_t& key, RecordType type)
-{
-    if (!isSSDEnabled_) {
-        return;
-    }
-    cacheManager_->PutKey(name, key, type);
-}
-
-void EmbeddingDDR::RefreshFreqInfoWithSwap()
-{
-    if (!isSSDEnabled_) {
-        return;
-    }
-    // 换入换出key列表，元素为pair: pair<oldKey, key> oldKey为从HBM移出的key, key为从DDR移出的key
-    LOG_DEBUG("RefreshFreqInfoWithSwap, table:{}, oldSwap Size:{}", name, oldSwap.size());
-    vector<emb_key_t> enterDDRKeys;
-    for (auto keyPair : oldSwap) {
-        enterDDRKeys.emplace_back(keyPair.first);
-    }
-    cacheManager_->RefreshFreqInfoCommon(name, enterDDRKeys, TransferType::HBM_2_DDR);
-    cacheManager_->RefreshFreqInfoCommon(name, ddr2HbmKeys, TransferType::DDR_2_HBM);
-
-    AddCacheManagerTraceLog();
-}
-
-/// 记录日志：HBM和DDR换入换出后，比较hostHashMap中DDR内key和表对应的lfuCache对象中的key内容
-void EmbeddingDDR::AddCacheManagerTraceLog() const
-{
-    if (Logger::GetLevel() != Logger::TRACE) {
-        return;
-    }
-    auto& hostMap = keyOffsetMap;
-    auto& devSize = devVocabSize;
-    auto iter = cacheManager_->ddrKeyFreqMap.find(name);
-    if (iter == cacheManager_->ddrKeyFreqMap.end()) {
-        throw runtime_error("table not in ddrKeyFreqMap");
-    }
-    auto &lfu = iter->second;
-    const auto& lfuTab = lfu.GetFreqTable();
-    if (lfuTab.empty()) {
-        return;
-    }
-    size_t tableKeyInDdr = 0;
-    vector<emb_key_t> ddrKeys; // 获取hostHashMap中保存在DDR的key
-    for (const auto& item : hostMap) {
-        if (item.second < devSize) {
-            continue;
-        }
-        ddrKeys.emplace_back(item.first);
-        ++tableKeyInDdr;
-    }
-    vector<emb_key_t> lfuKeys;
-    for (const auto& it : lfuTab) {
-        lfuKeys.emplace_back(it.first);
-    }
-    std::sort(ddrKeys.begin(), ddrKeys.end());
-    std::sort(lfuKeys.begin(), lfuKeys.end());
-    std::string ddrKeysString = VectorToString(ddrKeys);
-    std::string lfuKeysString = VectorToString(lfuKeys);
-    if (ddrKeysString != lfuKeysString) {
-        LOG_ERROR("swap HBM with DDR step error, key string not equal, table:{}, ddrKeysString:{}, lfuKeysString:{}",
-                  name, ddrKeysString, lfuKeysString);
-    } else {
-        LOG_INFO("swap HBM with DDR step OK, table:{}, ddrKeysString == lfuKeysString, string length:{}",
-                 name, lfuKeysString.length());
-    }
-
-    LOG_INFO("swap HBM with DDR step end, table:{}, tableKeyInDdr:{}, tableKeyInLfu:{}",
-             name, tableKeyInDdr, lfu.keyTable.size());
-}
-
 TableInfo EmbeddingDDR::GetTableInfo()
 {
     TableInfo ti = {
@@ -643,42 +352,16 @@ TableInfo EmbeddingDDR::GetTableInfo()
         .devVocabSize=devVocabSize,
         .maxOffset=maxOffset,
         .keyOffsetMap=keyOffsetMap,
-        .evictDevPos=evictDevPos,
-        .evictHostPos=evictHostPos,
     };
     return ti;
 }
 
-void EmbeddingDDR::RefreshFreqInfoAfterLoad()
+void EmbeddingDDR::SetHDTransfer(HDTransfer *hdTransfer)
 {
-    vector<emb_key_t> h2d;
-    vector<emb_key_t> d2h;
-
-    for (const auto& it: cacheManager_->ddrKeyFreqMap[name].keyTable) {
-        auto key = it.first;
-        auto iter = keyOffsetMap.find(key);
-        if (iter == keyOffsetMap.end()) {
-            throw runtime_error("ddrKeyFreqMap key not in keyOffsetMap");
-        }
-        auto offset = iter->second;
-        if (offset < devVocabSize) {
-            d2h.emplace_back(key);
-        }
-    }
-    for (const auto& it: cacheManager_->excludeDDRKeyCountMap[name]) {
-        auto key = it.first;
-        auto iter = keyOffsetMap.find(key);
-        if (iter == keyOffsetMap.end()) {
-            continue;
-        }
-        auto offset = iter->second;
-        if (offset >= devVocabSize) {
-            h2d.emplace_back(key);
-        }
-    }
-
-    cacheManager_->RefreshFreqInfoCommon(name, h2d, TransferType::HBM_2_DDR);
-    cacheManager_->RefreshFreqInfoCommon(name, d2h, TransferType::DDR_2_HBM);
+    this->hdTransfer = hdTransfer;
+}
 
-    LOG_DEBUG("RefreshFreqInfoAfterLoad done");
+void EmbeddingDDR::SetEmbCache(ock::ctr::EmbCacheManagerPtr embCache)
+{
+    this->embCache = embCache;
 }
diff --git a/src/core/emb_table/embedding_ddr.h b/src/core/emb_table/embedding_ddr.h
index ab7cc3fb..ac5c5878 100644
--- a/src/core/emb_table/embedding_ddr.h
+++ b/src/core/emb_table/embedding_ddr.h
@@ -34,45 +34,35 @@ public:
 
     virtual int64_t capacity() const;
 
-    virtual std::vector<int32_t> FindOffset(const vector<emb_key_t>& keys,
-                                            size_t batchId, int channelId,
-                                            std::vector<size_t>& swapPos);
+    virtual void EvictKeys(const vector<emb_key_t>& keys);
 
-    emb_key_t FindOffsetHelper(const emb_key_t& key, int channelId);
+    void Load(const string& savePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
-    void UpdateBatchId(const vector<emb_key_t>& keys, size_t currentBatchId);
+    void LoadKey(const string& savePath, vector<emb_cache_key_t>& keys);
 
-    emb_key_t FindSwapPosOld(emb_key_t key, size_t hostOffset, size_t batchId, std::vector<size_t>& swapPos);
+    void LoadEmbedding(const string& savePath, vector<vector<float>>& embeddings);
 
-    virtual void EvictKeys(const vector<emb_key_t>& keys);
+    void LoadOptimizerSlot(const string& savePath, vector<vector<float>>& optimizerSlots);
 
-//    std::vector<int32_t> lookUpVec; // 查询结果
+    void Save(const string& savePath);
 
-    virtual void ClearLookupAndSwapOffset();
+    void SyncLatestEmbedding();
 
-    void SetStartCount();
+    void SaveKey(const string& savePath, vector<emb_cache_key_t>& keys);
 
-    void Load(const string& savePath);
+    void SaveEmbedding(const string& savePath, vector<vector<float>>& embeddings);
 
-    void Save(const string& savePath);
+    void SaveOptimizerSlot(const string& savePath, vector<vector<float>>& optimizerSlots, size_t keySize);
 
     vector<int64_t> GetDeviceOffset();
 
     void SetOptimizerInfo(OptimizerInfo& optimizerInfo);
 
-    void RefreshFreqInfoWithSwap();
-
-    void AddKeyFreqInfo(const emb_key_t& key, RecordType type);
-
     void SetCacheManager(CacheManager *cm);
 
-    void AddCacheManagerTraceLog() const;
-
     TableInfo GetTableInfo();
 
-    void RefreshFreqInfoAfterLoad();
-
-GTEST_PRIVATE:
+    void SetHDTransfer(HDTransfer* hdTransfer);
 
     void LoadKey(const string& savePath);
     void LoadEmbAndOptim(const string& savePath);
@@ -81,10 +71,11 @@ GTEST_PRIVATE:
     void SaveEmbData(const string &savePath);
     void SaveOptimData(const string& savePath);
     void SaveEmbAndOptim(const string& savePath);
+    void SetEmbCache(ock::ctr::EmbCacheManagerPtr embCache);
 
-    void EvictDeleteEmb(const vector<emb_key_t>& keys);
+GTEST_PRIVATE:
 
-    std::vector<emb_key_t> devOffset2Key;
+    void EvictDeleteEmb(const vector<emb_key_t>& keys);
 
     size_t maxOffsetOld { 0 };
     std::vector<size_t> evictPosChange;
@@ -92,32 +83,16 @@ GTEST_PRIVATE:
     std::vector<std::pair<int, emb_key_t>> devOffset2KeyOld;
     std::vector<std::pair<emb_key_t, emb_key_t>> oldSwap; // (old on dev, old on host)
 
-    /*
-     * HBM与DDR换入换出时,已存在于DDR且要转移到HBM的key(不包含新key); 用于SSD模式
-     * (区别于oldSwap: pair.second为已存在于DDR key + 换入换出前映射到DDR的新key)
-     */
-    std::vector<emb_key_t> ddr2HbmKeys;
-    std::vector<int> devOffset2Batch; // has -1
-
-    /**
-     * 记录HBM上查找空位的当前位置
-     * 值域为[0, devVocabSize]
-    **/
-    size_t currentUpdatePos;
-    size_t currentUpdatePosStart; // 记录HBM上查找空位的起始位置
-
-    vector<int64_t> hostKey;
-    vector<int64_t> hostOffset;
-    vector<int64_t> deviceKey;
-    vector<int64_t> deviceOffset;
-
     vector<float *> embContent;
 
     std::string optimName;
     std::vector<std::string> optimParams;
-    std::map<std::string, vector<float*>> optimContentMap;
 
     vector<int64_t> hostLoadOffset;
+
+    HDTransfer *hdTransfer = nullptr;
+    ock::ctr::EmbCacheManagerPtr embCache = nullptr;
+    int deviceId = -1;
 };
 
 }
diff --git a/src/core/emb_table/embedding_dynamic.cpp b/src/core/emb_table/embedding_dynamic.cpp
index bca77178..78c94862 100644
--- a/src/core/emb_table/embedding_dynamic.cpp
+++ b/src/core/emb_table/embedding_dynamic.cpp
@@ -27,7 +27,7 @@ EmbeddingDynamic::EmbeddingDynamic()
 }
 
 EmbeddingDynamic::EmbeddingDynamic(const EmbInfo& info, const RankInfo& rankInfo, int inSeed)
-    : EmbeddingTable(info, rankInfo, inSeed)
+    : EmbeddingTable(info, rankInfo, inSeed), deviceId(rankInfo.deviceId)
 {
     if (isDynamic_) {
         auto ret = aclrtSetDevice(static_cast<int32_t>(rankInfo.deviceId));
@@ -197,7 +197,7 @@ void EmbeddingDynamic::SaveEmbData(const string& savePath)
 
     unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
     unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-    fileSystemPtr->WriteEmbedding(ss.str(), embSize_, embAddress, rankId_);
+    fileSystemPtr->WriteEmbedding(ss.str(), embSize_, embAddress, deviceId);
 }
 
 void EmbeddingDynamic::SaveOptimData(const string &savePath)
@@ -210,11 +210,11 @@ void EmbeddingDynamic::SaveOptimData(const string &savePath)
 
         unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
         unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-        fileSystemPtr->WriteEmbedding(ss.str(), embSize_, content.second, rankId_);
+        fileSystemPtr->WriteEmbedding(ss.str(), embSize_, content.second, deviceId);
     }
 }
 
-void EmbeddingDynamic::Load(const string& savePath)
+void EmbeddingDynamic::Load(const string& savePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
     LoadKey(savePath);
     LoadEmbAndOptim(savePath);
@@ -240,7 +240,7 @@ void EmbeddingDynamic::LoadEmbAndOptim(const string& savePath)
         stringstream paramStream;
         paramStream << ss.str() << "/" << optimName + "_" + param << "/slice.data";
         fileSystemPtr->ReadEmbedding(paramStream.str(), embeddingSizeInfo,
-                                     firstAddress + optimIndex * embSize_ * sizeof(float), rankId_, loadOffset);
+                                     firstAddress + optimIndex * embSize_ * sizeof(float), deviceId, loadOffset);
         optimIndex++;
     }
 }
diff --git a/src/core/emb_table/embedding_dynamic.h b/src/core/emb_table/embedding_dynamic.h
index 59418229..5cf49718 100644
--- a/src/core/emb_table/embedding_dynamic.h
+++ b/src/core/emb_table/embedding_dynamic.h
@@ -35,7 +35,7 @@ public:
 
     virtual int64_t capacity() const;
 
-    void Load(const string& savePath);
+    void Load(const string& savePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
     void Save(const string& savePath);
 
@@ -74,6 +74,7 @@ private:
     std::string optimName;
     std::vector<std::string> optimParams;
     std::map<std::string, vector<int64_t>> optimAddressMap;
+    int deviceId = -1;
 
     int64_t firstAddress;
 };
diff --git a/src/core/emb_table/embedding_mgmt.cpp b/src/core/emb_table/embedding_mgmt.cpp
index f850e254..33e1c671 100644
--- a/src/core/emb_table/embedding_mgmt.cpp
+++ b/src/core/emb_table/embedding_mgmt.cpp
@@ -12,8 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
         limitations under the License.
 ==============================================================================*/
-
 #include "emb_table/embedding_mgmt.h"
+
+#include <future>
+
 #include "emb_table/embedding_static.h"
 #include "emb_table/embedding_dynamic.h"
 #include "emb_table/embedding_ddr.h"
@@ -25,8 +27,7 @@ EmbeddingMgmt::EmbeddingMgmt()
 {
 }
 
-void EmbeddingMgmt::Init(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-    const vector<ThresholdValue>& thresholdValues, int seed)
+void EmbeddingMgmt::Init(const RankInfo& rInfo, const vector<EmbInfo>& eInfos, int seed)
 {
     for (size_t i = 0; i < eInfos.size(); ++i) {
         if (rInfo.isDDR) {
@@ -54,17 +55,7 @@ void EmbeddingMgmt::Key2Offset(const std::string& name, std::vector<emb_key_t>&
 
 size_t EmbeddingMgmt::GetMaxOffset(const std::string& name)
 {
-    embeddings[name]->GetMaxOffset();
-}
-
-void EmbeddingMgmt::LoadMaxOffset(OffsetMemT& loadData)
-{
-    LOG_ERROR("load max offset");
-}
-
-void EmbeddingMgmt::LoadKeyOffsetMap(KeyOffsetMemT& loadData)
-{
-    LOG_ERROR("load key offset");
+    return embeddings[name]->GetMaxOffset();
 }
 
 std::map<EmbNameT, size_t> EmbeddingMgmt::GetMaxOffset()
@@ -85,7 +76,7 @@ KeyOffsetMemT EmbeddingMgmt::GetKeyOffsetMap()
     return keyOffsetMap;
 }
 
-void EmbeddingMgmt::EvictKeys(const string& name, const vector<emb_key_t>& keys)
+void EmbeddingMgmt::EvictKeys(const string& name, const vector<emb_cache_key_t>& keys)
 {
     LOG_ERROR("evict keys for {}", name);
     if (keys.size() != 0) {
@@ -94,7 +85,7 @@ void EmbeddingMgmt::EvictKeys(const string& name, const vector<emb_key_t>& keys)
     embeddings[name]->EvictInitDeviceEmb();
 }
 
-void EmbeddingMgmt::EvictKeysCombine(const vector<emb_key_t>& keys)
+void EmbeddingMgmt::EvictKeysCombine(const vector<emb_cache_key_t>& keys)
 {
     if (keys.size() != 0) {
         for (auto& table: embeddings) {
@@ -117,41 +108,16 @@ int64_t EmbeddingMgmt::GetCapacity(const std::string &name)
     return embeddings[name]->capacity();
 }
 
-void EmbeddingMgmt::FindOffset(const std::string& name, const vector<emb_key_t>& keys,
-                               size_t currentBatchId, size_t keepBatchId, int channel)
+void EmbeddingMgmt::Load(const string& name, const string& filePath,
+                         map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
-    return embeddings[name]->FindOffset(keys, currentBatchId, keepBatchId, channel);
+    return embeddings[name]->Load(filePath, trainKeySet);
 }
 
-const std::vector<size_t>& EmbeddingMgmt::GetMissingKeys(const std::string& name)
-{
-    return embeddings[name]->GetMissingKeys();
-}
-
-void EmbeddingMgmt::ClearMissingKeys(const std::string& name)
-{
-    return embeddings[name]->ClearMissingKeys();
-}
-
-std::shared_ptr<EmbeddingTable> EmbeddingMgmt::GetTable(const string& name)
-{
-    auto it = embeddings.find(name);
-    if (it == embeddings.end()) {
-        LOG_ERROR("table not found");
-    }
-    return std::dynamic_pointer_cast<EmbeddingTable>(it->second);
-}
-
-void EmbeddingMgmt::Load(const string& name, const string& filePath)
-{
-    return embeddings[name]->Load(filePath);
-}
-
-
-void EmbeddingMgmt::Load(const string& filePath)
+void EmbeddingMgmt::Load(const string& filePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
     for (auto& tablePair: embeddings) {
-        tablePair.second->Load(filePath);
+        tablePair.second->Load(filePath, trainKeySet);
     }
 }
 
@@ -162,8 +128,14 @@ void EmbeddingMgmt::Save(const string& name, const string& filePath)
 
 void EmbeddingMgmt::Save(const string& filePath)
 {
+    // use multi-thread to prevent receiving save_d2h blocked when table order different between cpp and python
+    vector<future<void>> futures;
     for (auto& tablePair: embeddings) {
-        tablePair.second->Save(filePath);
+        futures.emplace_back(
+            std::async(std::launch::async, [table = tablePair.second, filePath] { table->Save(filePath); }));
+    }
+    for (auto& f: futures) {
+        f.get();  // get() will repost exception if happened
     }
 }
 
@@ -181,18 +153,6 @@ void EmbeddingMgmt::SetOptimizerInfo(const string& name, OptimizerInfo& optimize
     embeddings[name]->SetOptimizerInfo(optimizerInfo);
 }
 
-EmbHashMemT EmbeddingMgmt::GetEmbHashMaps()
-{
-    EmbHashMemT EmbHashMaps;
-    for (auto& tablePair: embeddings) {
-        EmbHashMaps[tablePair.first].hostHashMap = tablePair.second ->GetKeyOffsetMap();
-        EmbHashMaps[tablePair.first].devVocabSize = tablePair.second ->GetDevVocabSize();
-        EmbHashMaps[tablePair.first].hostVocabSize = tablePair.second ->GetHostVocabSize();
-        EmbHashMaps[tablePair.first].maxOffset = tablePair.second ->GetMaxOffset();
-    }
-    return EmbHashMaps;
-}
-
 OffsetMapT EmbeddingMgmt::GetLoadOffsets()
 {
     OffsetMapT AllLoadOffsets;
@@ -209,25 +169,16 @@ void EmbeddingMgmt::SetCacheManagerForEmbTable(CacheManager* cacheManager)
     }
 }
 
-void EmbeddingMgmt::EnableSSD()
+void EmbeddingMgmt::SetHDTransferForEmbTable(HDTransfer* hdTransfer)
 {
     for (auto& table: embeddings) {
-        table.second->EnableSSD();
+        table.second->SetHDTransfer(hdTransfer);
     }
 }
 
-void EmbeddingMgmt::LockSave()
+void EmbeddingMgmt::SetEmbCacheForEmbTable(const ock::ctr::EmbCacheManagerPtr& embCache)
 {
     for (auto& table: embeddings) {
-        table.second->mutSave_.lock();
+        table.second->SetEmbCache(embCache);
     }
-    LOG_DEBUG("LockSave");
 }
-
-void EmbeddingMgmt::UnLockSave()
-{
-    for (auto& table: embeddings) {
-        table.second->mutSave_.unlock();
-    }
-    LOG_DEBUG("UnLockSave");
-}
\ No newline at end of file
diff --git a/src/core/emb_table/embedding_mgmt.h b/src/core/emb_table/embedding_mgmt.h
index d091bdef..ef106786 100644
--- a/src/core/emb_table/embedding_mgmt.h
+++ b/src/core/emb_table/embedding_mgmt.h
@@ -34,8 +34,7 @@ public:
      * @param[in] rInfo 从python侧传过了的rank信息
      * @param[in] eInfos 从python侧传过了的embedding表信息
      */
-    void Init(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
-              const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
+    void Init(const RankInfo& rInfo, const vector<EmbInfo>& eInfos, int seed = 0);
 
     /**
      * 从embedding表中查批量查找key
@@ -45,29 +44,18 @@ public:
      */
     void Key2Offset(const std::string& name, std::vector<emb_key_t>& keys, int channel);
 
-    void FindOffset(const std::string& name, const vector<emb_key_t>& keys,
-                    size_t currentBatchId, size_t keepBatchId, int channel);
-
     /**
      * 在指定的embedding表中淘汰key
      * @param[in] name embedding表名
      * @param[in] keys 待淘汰的key
      */
-    void EvictKeys(const std::string& name, const vector<emb_key_t>& keys);
+    void EvictKeys(const std::string& name, const vector<emb_cache_key_t>& keys);
 
     /**
      * 在全部的embedding表中淘汰key
      * @param[in] keys 待淘汰的key
      */
-    void EvictKeysCombine(const vector<emb_key_t>& keys);
-
-    const std::vector<size_t>& GetMissingKeys(const std::string& name);
-
-    void ClearMissingKeys(const std::string& name);
-
-    void LoadMaxOffset(OffsetMemT& loadData);
-
-    void LoadKeyOffsetMap(KeyOffsetMemT& loadData);
+    void EvictKeysCombine(const vector<emb_cache_key_t>& keys);
 
     size_t GetMaxOffset(const std::string& name);
 
@@ -81,17 +69,15 @@ public:
 
     static EmbeddingMgmt* Instance();
 
-    std::shared_ptr<EmbeddingTable> GetTable(const string& name);
-
-     /**
+    /**
      * 加载单个表
      */
-    void Load(const string& name, const string& filePath);
+    void Load(const string& name, const string& filePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
     /**
      * 加载所有表
      */
-    void Load(const string& filePath);
+    void Load(const string& filePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
     /**
      * 保存单个表
@@ -113,8 +99,6 @@ public:
     */
     OffsetMapT GetLoadOffsets();
 
-    EmbHashMemT GetEmbHashMaps();
-
     /**
     * 设置某张表的优化器信息
     */
@@ -122,11 +106,9 @@ public:
 
     void SetCacheManagerForEmbTable(CacheManager* cacheManager);
 
-    void EnableSSD();
-
-    void LockSave();
+    void SetHDTransferForEmbTable(HDTransfer* hdTransfer);
 
-    void UnLockSave();
+    void SetEmbCacheForEmbTable(const ock::ctr::EmbCacheManagerPtr& embCache);
 private:
 
     EmbeddingMgmt();
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index 312b8a77..fdda5ede 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -106,7 +106,7 @@ void EmbeddingStatic::SaveKey(const string& savePath)
     }
 }
 
-void EmbeddingStatic::Load(const string& savePath)
+void EmbeddingStatic::Load(const string& savePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
     LoadKey(savePath);
 }
diff --git a/src/core/emb_table/embedding_static.h b/src/core/emb_table/embedding_static.h
index 965bce0e..6515f586 100644
--- a/src/core/emb_table/embedding_static.h
+++ b/src/core/emb_table/embedding_static.h
@@ -35,7 +35,7 @@ public:
 
     virtual int64_t capacity() const;
 
-    void Load(const string& savePath);
+    void Load(const string& savePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
     void Save(const string& savePath);
 
diff --git a/src/core/emb_table/embedding_table.cpp b/src/core/emb_table/embedding_table.cpp
index 7cfc125e..1579282f 100644
--- a/src/core/emb_table/embedding_table.cpp
+++ b/src/core/emb_table/embedding_table.cpp
@@ -27,7 +27,7 @@ EmbeddingTable::EmbeddingTable()
 
 EmbeddingTable::EmbeddingTable(const EmbInfo& info, const RankInfo& rankInfo, int inSeed)
     : name(info.name), hostVocabSize(info.hostVocabSize), devVocabSize(info.devVocabSize),
-      freeSize_(0), maxOffset(0), isDynamic_(rankInfo.useDynamicExpansion),
+      ssdVocabSize(info.ssdVocabSize), freeSize_(0), maxOffset(0), isDynamic_(rankInfo.useDynamicExpansion),
       embSize_(info.embeddingSize), extEmbSize_(info.extEmbeddingSize),
       embInfo_(info), seed_(inSeed), rankId_(rankInfo.rankId), rankSize_(rankInfo.rankSize)
 {
@@ -43,19 +43,6 @@ void EmbeddingTable::Key2Offset(std::vector<emb_key_t>& keys, int channel)
     return;
 }
 
-void EmbeddingTable::FindOffset(const vector<emb_key_t>& keys,
-                                size_t currentBatchId, size_t keepBatchId, int channelId)
-{
-    return;
-}
-
-std::vector<int32_t> EmbeddingTable::FindOffset(const vector<emb_key_t>& keys,
-                                                size_t batchId, int channelId,
-                                                std::vector<size_t>& swapPos)
-{
-    return {};
-}
-
 size_t EmbeddingTable::GetMaxOffset()
 {
     return maxOffset;
@@ -71,7 +58,7 @@ size_t EmbeddingTable::size() const
     return maxOffset;
 }
 
-void EmbeddingTable::EvictKeys(const std::vector<emb_key_t>& keys)
+void EmbeddingTable::EvictKeys(const std::vector<emb_cache_key_t>& keys)
 {
     std::lock_guard<std::mutex> lk(mut_); // lock for PROCESS_THREAD
     size_t keySize = keys.size();
@@ -132,40 +119,12 @@ absl::flat_hash_map<emb_key_t, int64_t> EmbeddingTable::GetKeyOffsetMap()
     return keyOffsetMap;
 }
 
-void EmbeddingTable::ClearMissingKeys()
-{
-    missingKeysHostPos_.clear();
-}
-
-const std::vector<size_t>& EmbeddingTable::GetMissingKeys()
-{
-    return missingKeysHostPos_;
-}
-
-void EmbeddingTable::SetStartCount()
-{
-}
-
-void EmbeddingTable::ClearLookupAndSwapOffset()
-{
-}
-
-size_t EmbeddingTable::GetDevVocabSize()
-{
-    return devVocabSize;
-}
-
-size_t EmbeddingTable::GetHostVocabSize()
-{
-    return hostVocabSize;
-}
-
 vector<int64_t> EmbeddingTable::GetLoadOffset()
 {
     return loadOffset;
 }
 
-void EmbeddingTable::Load(const string& filePath)
+void EmbeddingTable::Load(const string& filePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
 }
 
@@ -184,15 +143,6 @@ void EmbeddingTable::SetCacheManager(CacheManager *cm)
 {
 }
 
-void EmbeddingTable::EnableSSD()
-{
-    isSSDEnabled_ = true;
-}
-
-void EmbeddingTable::RefreshFreqInfoWithSwap()
-{
-}
-
 TableInfo EmbeddingTable::GetTableInfo()
 {
     TableInfo ti = {
@@ -201,8 +151,6 @@ TableInfo EmbeddingTable::GetTableInfo()
         .devVocabSize=devVocabSize,
         .maxOffset=maxOffset,
         .keyOffsetMap=keyOffsetMap,
-        .evictDevPos=evictDevPos,
-        .evictHostPos=evictHostPos,
     };
     return ti;
 }
@@ -214,4 +162,12 @@ vector<int64_t> EmbeddingTable::GetDeviceOffset()
 
 void EmbeddingTable::SetOptimizerInfo(OptimizerInfo& optimizerInfo)
 {
-}
\ No newline at end of file
+}
+
+void EmbeddingTable::SetHDTransfer(HDTransfer *hdTransfer)
+{
+}
+
+void EmbeddingTable::SetEmbCache(ock::ctr::EmbCacheManagerPtr embCache)
+{
+}
diff --git a/src/core/emb_table/embedding_table.h b/src/core/emb_table/embedding_table.h
index 0c05a0a0..1fa9008b 100644
--- a/src/core/emb_table/embedding_table.h
+++ b/src/core/emb_table/embedding_table.h
@@ -37,21 +37,11 @@ public:
      */
     virtual void Key2Offset(std::vector<emb_key_t>& keys, int channel);
 
-    /**
-     * DDR模式使用
-     */
-    virtual void FindOffset(const vector<emb_key_t>& keys,
-                            size_t currentBatchId, size_t keepBatchId, int channelId);
-
-    virtual std::vector<int32_t> FindOffset(const vector<emb_key_t>& keys,
-                                            size_t batchId, int channelId,
-                                            std::vector<size_t>& swapPos);
-
     /**
      * 淘汰key,  配合GetEvictedKeys一起使用GetEvictedKeys
      * EvictKeys执行，通过GetEvictedKeys, GetEvictedKeys拿结果
      */
-    virtual void EvictKeys(const std::vector<emb_key_t>& keys);
+    virtual void EvictKeys(const std::vector<emb_cache_key_t>& keys);
 
     /**
      * 获取设备侧淘汰的key的偏移或者地址
@@ -73,24 +63,12 @@ public:
 
     virtual size_t size() const;
 
-    void ClearMissingKeys();
-
-    virtual const std::vector<size_t>& GetMissingKeys();
-
     absl::flat_hash_map<emb_key_t, int64_t> GetKeyOffsetMap();
 
-    virtual void SetStartCount();
-
-    virtual void ClearLookupAndSwapOffset();
-
-    virtual void Load(const string& savePath);
+    virtual void Load(const string& savePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
     virtual void Save(const string& savePath);
 
-    size_t GetDevVocabSize();
-
-    size_t GetHostVocabSize();
-
     static void MakeDir(const string& dirName);
 
     virtual vector<int64_t> GetDeviceOffset();
@@ -101,20 +79,20 @@ public:
 
     virtual void SetCacheManager(CacheManager* cacheManager);
 
-    void EnableSSD();
+    virtual TableInfo GetTableInfo();
 
-    virtual void RefreshFreqInfoWithSwap();
+    virtual void SetHDTransfer(HDTransfer *hdTransfer);
 
-    virtual TableInfo GetTableInfo();
+    virtual void SetEmbCache(ock::ctr::EmbCacheManagerPtr embCache);
 
     std::string name;
     size_t hostVocabSize;
     size_t devVocabSize;
+    size_t ssdVocabSize;
     size_t maxOffset;
     absl::flat_hash_map<emb_key_t, int64_t> keyOffsetMap;
     std::vector<int64_t> evictDevPos;     // 记录HBM内被淘汰的key
     std::vector<int64_t> evictHostPos; // 记录Host内淘汰列表
-    std::mutex mutSave_;  // 用于保存时锁住KeyOffsetMap
 
 #ifdef NDEBUG
 protected:
diff --git a/src/core/file_system/file_system.h b/src/core/file_system/file_system.h
index 66c142db..5546c691 100644
--- a/src/core/file_system/file_system.h
+++ b/src/core/file_system/file_system.h
@@ -31,10 +31,7 @@ namespace MxRec {
         virtual size_t GetFileSize(const string& filePath) = 0;
 
         virtual ssize_t Write(const string& filePath, const char* fileContent, size_t dataSize) = 0;
-        virtual ssize_t Write(const string& filePath, vector<float*> fileContent, size_t dataSize) = 0;
-
-        // In the dynamic expansion mode, embedding is transported to the host side from the device side
-        // and written into a file.
+        virtual ssize_t Write(const string& filePath, vector<vector<float>>& fileContent, size_t dataSize) = 0;
         virtual void WriteEmbedding(const string& filePath, const int& embeddingSize,
                                     const vector<int64_t>& addressArr, int deviceId) = 0;
 
diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
index 2c463115..68fc47a8 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
@@ -94,7 +94,7 @@ ssize_t HdfsFileSystem::Write(const string& filePath, const char* fileContent, s
     return static_cast<ssize_t>(writeBytesNum);
 }
 
-ssize_t HdfsFileSystem::Write(const string& filePath, vector<float*> fileContent, size_t dataSize)
+ssize_t HdfsFileSystem::Write(const string& filePath, vector<vector<float>>& fileContent, size_t dataSize)
 {
     hdfsFS fs = ConnectHdfs();
 
@@ -107,7 +107,7 @@ ssize_t HdfsFileSystem::Write(const string& filePath, vector<float*> fileContent
     tSize writeBytesNum = 0;
     size_t loops = fileContent.size();
     for (size_t i = 0; i < loops; i++) {
-        tSize res = hdfs->Write(fs, file, fileContent[i], dataSize);
+        tSize res = hdfs->Write(fs, file, reinterpret_cast<const char *>(&fileContent[i]), dataSize);
         if (res == -1) {
             hdfs->CloseFile(fs, file);
             hdfs->Disconnect(fs);
@@ -138,6 +138,13 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
     }
 
 #ifndef GTEST
+    auto res = aclrtSetDevice(static_cast<int32_t>(deviceId));
+    if (res != ACL_ERROR_NONE) {
+        hdfs->CloseFile(fs, file);
+        hdfs->Disconnect(fs);
+        throw runtime_error(StringFormat("Set device failed, device_id:%d", deviceId).c_str());
+    }
+
     for (size_t i = 0; i < addressArr.size(); i += embHashNum) {
         vector<float> row(embeddingSize);
         int64_t address = addressArr.at(i);
@@ -246,6 +253,11 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
         throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
     }
 
+    auto res = aclrtSetDevice(static_cast<int32_t>(deviceId));
+    if (res != ACL_ERROR_NONE) {
+        throw runtime_error(StringFormat("Set device failed, device_id:%d", deviceId).c_str());
+    }
+
     float* floatPtr = reinterpret_cast<float*>(firstAddress);
     auto i = 0;
     for (const auto& offset: offsetArr) {
diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.h b/src/core/file_system/hdfs_file_system/hdfs_file_system.h
index 8d436d3d..f6c6a489 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.h
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.h
@@ -35,7 +35,7 @@ namespace MxRec {
         size_t GetFileSize(const string& filePath) override;
 
         ssize_t Write(const string& filePath, const char* fileContent, size_t dataSize) override;
-        ssize_t Write(const string& filePath, vector<float*> fileContent, size_t dataSize) override;
+        ssize_t Write(const string& filePath, vector<vector<float>>& fileContent, size_t dataSize) override;
         void WriteEmbedding(const string& filePath, const int& embeddingSize,
                             const vector<int64_t>& addressArr, int deviceId) override;
 
diff --git a/src/core/file_system/local_file_system/local_file_system.cpp b/src/core/file_system/local_file_system/local_file_system.cpp
index 43cd0033..6215d2ac 100644
--- a/src/core/file_system/local_file_system/local_file_system.cpp
+++ b/src/core/file_system/local_file_system/local_file_system.cpp
@@ -112,44 +112,23 @@ ssize_t LocalFileSystem::Write(const string& filePath, const char* fileContent,
     return writeBytesNum;
 }
 
-ssize_t LocalFileSystem::Write(const string& filePath, vector<float*> fileContent, size_t dataSize)
+ssize_t LocalFileSystem::Write(const string& filePath, vector<vector<float>>& fileContent, size_t dataSize)
 {
     int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, fileMode);
     if (fd == -1) {
         throw runtime_error(StringFormat("open file %s to write failed.", filePath.c_str()));
     }
 
-    buffer.reserve(BUFFER_SIZE);
-    BufferQueue queue;
-    ssize_t writeBytesNum = 0;
-    std::thread writer(&LocalFileSystem::WriterFn, this, std::ref(queue), fd, std::ref(writeBytesNum));
-
-    size_t loops = fileContent.size();
-    for (size_t i = 0; i < loops; i++) {
-        size_t idx = 0;
-        size_t writeSize = 0;
-        size_t dataCol = dataSize;
-        while (dataCol != 0) {
-            if (dataCol > oneTimeReadWriteLen) {
-                writeSize = oneTimeReadWriteLen;
-            } else {
-                writeSize = dataCol;
-            }
-            FillToBuffer(queue, reinterpret_cast<const char *>(fileContent[i]) + idx, writeSize);
-            dataCol -= writeSize;
-            idx += writeSize;
-        }
+    vector<float> flattenContent;
+    for (auto& vec : fileContent) {
+        flattenContent.insert(flattenContent.cend(), vec.cbegin(), vec.cend());
     }
 
-    // After all data has been processed, check if there is any data left in the buffer
-    if (!buffer.empty()) {
-        queue.Push(std::move(buffer));
-        buffer.clear();
-    }
+    ssize_t writeBytesNum =
+        write(fd, reinterpret_cast<const char*>(flattenContent.data()), flattenContent.size() * sizeof(float));
 
-    queue.Push(std::vector<char>());
-    writer.join();
     close(fd);
+
     return writeBytesNum;
 }
 
@@ -168,6 +147,12 @@ void LocalFileSystem::WriteEmbedding(const string& filePath, const int& embeddin
     }
 
 #ifndef GTEST
+    auto res = aclrtSetDevice(static_cast<int32_t>(deviceId));
+    if (res != ACL_ERROR_NONE) {
+        close(fd);
+        throw runtime_error(StringFormat("Set device failed, device_id:%d", deviceId).c_str());
+    }
+
     for (size_t i = 0; i < addressArr.size(); i += keyAddrElem) {
         vector<float> row(embeddingSize);
         int64_t address = addressArr.at(i);
@@ -271,6 +256,10 @@ void LocalFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& e
     if (fp == nullptr) {
         throw runtime_error(StringFormat("Failed to open read file: %s", filePath.c_str()));
     }
+    auto res = aclrtSetDevice(static_cast<int32_t>(deviceId));
+    if (res != ACL_ERROR_NONE) {
+        throw runtime_error(StringFormat("Set device failed, device_id:%d", deviceId).c_str());
+    }
 
     float* floatPtr = reinterpret_cast<float*>(firstAddress);
     auto i = 0;
diff --git a/src/core/file_system/local_file_system/local_file_system.h b/src/core/file_system/local_file_system/local_file_system.h
index f8eefd5b..9b09f34d 100644
--- a/src/core/file_system/local_file_system/local_file_system.h
+++ b/src/core/file_system/local_file_system/local_file_system.h
@@ -33,7 +33,7 @@ namespace MxRec {
         size_t GetFileSize(const string& filePath) override;
 
         ssize_t Write(const string& filePath, const char* fileContent, size_t dataSize) override;
-        ssize_t Write(const string& filePath, vector<float*> fileContent, size_t dataSize) override;
+        ssize_t Write(const string& filePath, vector<vector<float>>& fileContent, size_t dataSize) override;
         void WriteEmbedding(const string& filePath, const int& embeddingSize,
                             const vector<int64_t>& addressArr, int deviceId) override;
 
diff --git a/src/core/hd_transfer/hd_transfer.cpp b/src/core/hd_transfer/hd_transfer.cpp
index a32ddf28..8fc2a282 100644
--- a/src/core/hd_transfer/hd_transfer.cpp
+++ b/src/core/hd_transfer/hd_transfer.cpp
@@ -50,7 +50,14 @@ int HDTransfer::Init(const vector<EmbInfo>& embInfos, uint32_t localRankId)
             CreateChannel(localRankId, embInfo.name, i);
         }
         // 创建acltdtDataset类型的数据，对等一个Vector<tensor>。同步接口。
-        aclDatasets[embInfo.name] = acltdtCreateDataset();
+        for (int j = 0; j < EMBEDDING_THREAD_NUM; j++) {
+            acltdtDataset* dataset = acltdtCreateDataset();
+            if (dataset == nullptr) {
+                LOG_ERROR("create acltdtDataset failed, table:{}, threadId:{}", embName, j);
+                throw runtime_error("create acltdtDataset failed");
+            }
+            aclDatasets[embInfo.name][j] = dataset;
+        }
     }
     running = true;
     LOG(INFO) << "hd_transfer init";
@@ -71,9 +78,11 @@ void HDTransfer::Destroy()
         }
         LOG_INFO(HD + "destroy channel:{}", c.first);
     }
-    for (auto& d: aclDatasets) {
-        if (acltdtDestroyDataset(d.second) != ACL_ERROR_NONE) {
-            throw runtime_error("Acl destroy tensor dataset failed.");
+    for (auto& datasetMap: aclDatasets) {
+        for (auto &d: datasetMap.second) {
+            if (acltdtDestroyDataset(d.second) != ACL_ERROR_NONE) {
+                throw runtime_error("Acl destroy tensor dataset failed.");
+            }
         }
     }
     aclFinalize();
@@ -90,16 +99,26 @@ void HDTransfer::CreateChannel(const uint32_t localRankId, const string& embName
     int channelSize = GlobalEnv::hdChannelSize;
     LOG_INFO("user config all2all restore lookup channel size:{}", channelSize);
     for (int c = static_cast<int>(TransferChannel::D2H); c != static_cast<int>(TransferChannel::INVALID); c++) {
+        if ((c == static_cast<int>(TransferChannel::SWAP) || c == static_cast<int>(TransferChannel::D2H) ||
+             c == static_cast<int>(TransferChannel::H2D)) && channelNum == EVAL_CHANNEL_ID) {
+            continue;
+        }
+
         auto channel = static_cast<TransferChannel>(c);
-        string sendName = StringFormat(
-            "%s_%s_%d", embName.c_str(), TransferChannel2Str(channel).c_str(), channelNum
-        );
+        std::string sendName;
+        if (c == static_cast<int>(TransferChannel::SWAP) || c == static_cast<int>(TransferChannel::D2H) ||
+            c == static_cast<int>(TransferChannel::H2D)) {
+            sendName = StringFormat("%s_%s_all", embName.c_str(), TransferChannel2Str(channel).c_str());
+        } else {
+            sendName = StringFormat("%s_%s_%d", embName.c_str(), TransferChannel2Str(channel).c_str(), channelNum);
+        }
         if (TransferChannel2Str(channel) == "all2all" ||
             TransferChannel2Str(channel) == "restore" ||
             TransferChannel2Str(channel) == "lookup"  ||
             TransferChannel2Str(channel) == "restore_second" ||
             TransferChannel2Str(channel) == "uniquekeys" ||
-            TransferChannel2Str(channel) == "evict"  /* for noDDR */
+            TransferChannel2Str(channel) == "evict" ||
+            TransferChannel2Str(channel) == "swap"
                 ) {
             transferChannels[sendName] = TDT_CREATE_CHANNEL(localRankId, sendName.c_str(), channelSize);
         } else {
@@ -128,10 +147,16 @@ void HDTransfer::Send(TransferChannel channel, const vector<Tensor> &tensors, in
     for (auto& t: tensors) {
         sizes.push_back(t.NumElements());
     }
-    string sendName = StringFormat("%s_%s_%d", embName.c_str(), TransferChannel2Str(channel).c_str(), channelId);
 
-    LOG_INFO(HD + "hd transfer send {}, send count is {}, size list:{}",
-             sendName, sizes.size(), VectorToString(sizes));
+    string sendName;
+    if (channel == TransferChannel::SWAP || channel == TransferChannel::D2H || channel == TransferChannel::H2D) {
+        sendName = StringFormat("%s_%s_all", embName.c_str(), TransferChannel2Str(channel).c_str());
+    } else {
+        sendName = StringFormat("%s_%s_%d", embName.c_str(), TransferChannel2Str(channel).c_str(), channelId);
+    }
+
+    LOG_INFO(HD + "hd transfer send:{}, batchId:{}, send count:{}, size list:{}",
+             sendName, batchId, sizes.size(), VectorToString(sizes));
 
     if (sizes.size() == 0) {
         LOG_WARN("tensors num can not be zero");
@@ -171,9 +196,15 @@ void HDTransfer::Send(TransferChannel channel, const vector<Tensor> &tensors, in
 vector<tensorflow::Tensor> HDTransfer::Recv(TransferChannel channel, int channelId, const string& embName)
 {
     EASY_FUNCTION()
+    vector<tensorflow::Tensor> tensors;
 #ifndef GTEST
-    std::vector<tensorflow::Tensor> tensors;
-    string recvName = StringFormat("%s_%s_%d", embName.c_str(), TransferChannel2Str(channel).c_str(), channelId);
+    string recvName;
+    if (channel == TransferChannel::SWAP || channel == TransferChannel::D2H || channel == TransferChannel::H2D) {
+        recvName = StringFormat("%s_%s_all", embName.c_str(), TransferChannel2Str(channel).c_str());
+    } else {
+        recvName = StringFormat("%s_%s_%d", embName.c_str(), TransferChannel2Str(channel).c_str(), channelId);
+    }
+
     LOG_DEBUG("hd transfer try recv:{}", recvName);
     TimeCost tc = TimeCost();
     tensorflow::Status status = tensorflow::RecvTensorByAcl(transferChannels[recvName], tensors);
@@ -190,8 +221,8 @@ vector<tensorflow::Tensor> HDTransfer::Recv(TransferChannel channel, int channel
         sizes.push_back(t.NumElements());
     }
     LOG_INFO("hd transfer recv:{}, size:{} cost:{}ms", recvName, VectorToString(sizes), tc.ElapsedMS());
-    return tensors;
 #endif
+    return tensors;
 }
 
 /// 接收从device发送过来的数据（D2H）, updateEmbV2函数使用；使用原生的aclTDT接口
@@ -199,27 +230,36 @@ vector<tensorflow::Tensor> HDTransfer::Recv(TransferChannel channel, int channel
 /// \param channelId 通道索引（训练/推理）
 /// \param embName 表名
 /// \return
-size_t HDTransfer::RecvAcl(TransferChannel channel, int channelId, const string& embName)
+size_t HDTransfer::RecvAcl(TransferChannel channel, int channelId, const string& embName,
+                           int embeddingThreadId, int batchId)
 {
     EASY_FUNCTION()
+    size_t ret = 0;
 #ifndef GTEST
-    std::vector<tensorflow::Tensor> tensors;
-    string recvName = StringFormat("%s_%s_%d", embName.c_str(), TransferChannel2Str(channel).c_str(), channelId);
-    LOG_DEBUG("hd transfer try recv:{}", recvName);
+    string recvName;
+    if (channel == TransferChannel::SWAP || channel == TransferChannel::D2H || channel == TransferChannel::H2D) {
+        recvName = StringFormat("%s_%s_all", embName.c_str(), TransferChannel2Str(channel).c_str());
+    } else {
+        recvName = StringFormat("%s_%s_%d", embName.c_str(), TransferChannel2Str(channel).c_str(), channelId);
+    }
+
+    LOG_DEBUG("hd transfer try recv:{}, batchId:{}", recvName, batchId);
     TimeCost tc = TimeCost();
-    if (aclDatasets[embName] == nullptr) {
+    if (aclDatasets[embName][embeddingThreadId] == nullptr) {
         throw runtime_error(StringFormat("Failed recv:%s.", recvName.c_str()).c_str());
     }
-    auto aclStatus = acltdtReceiveTensor(transferChannels[recvName], aclDatasets[embName], GlobalEnv::aclTimeout);
+    auto aclStatus = acltdtReceiveTensor(
+        transferChannels[recvName], aclDatasets[embName][embeddingThreadId], GlobalEnv::aclTimeout);
     if (!running) {
         return 0;
     }
     if (aclStatus != ACL_ERROR_NONE && aclStatus != ACL_ERROR_RT_QUEUE_EMPTY) {
         throw runtime_error(StringFormat("Failed receive data from acl channel, acl status:%d", aclStatus).c_str());
     }
-    LOG_INFO("hd transfer recv:{} cost:{}ms", recvName, tc.ElapsedMS());
-    return acltdtGetDatasetSize(aclDatasets[embName]);
+    LOG_INFO("hd transfer recv:{}, batchId:{}, cost:{}ms", recvName, batchId, tc.ElapsedMS());
+    ret =  acltdtGetDatasetSize(aclDatasets[embName][embeddingThreadId]);
 #endif
+    return ret;
 }
 
 std::unordered_map<std::string, acltdtChannelHandle*> HDTransfer::GetTransChannel()
diff --git a/src/core/hd_transfer/hd_transfer.h b/src/core/hd_transfer/hd_transfer.h
index f9528578..58c48067 100644
--- a/src/core/hd_transfer/hd_transfer.h
+++ b/src/core/hd_transfer/hd_transfer.h
@@ -45,6 +45,8 @@ namespace MxRec {
         EVICT,
         H2D,
         SWAP,
+        SAVE_D2H,
+        SAVE_H2D,
         INVALID
     };
 
@@ -69,6 +71,10 @@ namespace MxRec {
                 return "h2d";
             case TransferChannel::SWAP:
                 return "swap";
+            case TransferChannel::SAVE_D2H:
+                return "save_d2h";
+            case TransferChannel::SAVE_H2D:
+                return "save_h2d";
             default:
                 throw std::invalid_argument("Invalid TransferChannel");
         }
@@ -76,7 +82,7 @@ namespace MxRec {
 
     class HDTransfer {
     public:
-        std::unordered_map<std::string, acltdtDataset*> aclDatasets;
+        std::unordered_map<std::string, std::unordered_map<int, acltdtDataset*>> aclDatasets;
 
         HDTransfer() = default;
 
@@ -87,7 +93,8 @@ namespace MxRec {
 
         vector<Tensor> Recv(TransferChannel channel, int channelId, const string& embName);
 
-        size_t RecvAcl(TransferChannel channel, int channelId, const string& embName);
+        size_t RecvAcl(TransferChannel channel, int channelId, const string& embName,
+                       int embeddingThreadId, int batchId);
 
         void Destroy();
 
diff --git a/src/core/host_emb/host_emb.cpp b/src/core/host_emb/host_emb.cpp
deleted file mode 100644
index ce0e0a78..00000000
--- a/src/core/host_emb/host_emb.cpp
+++ /dev/null
@@ -1,278 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#include "host_emb.h"
-#include <utility>
-#include "hd_transfer/hd_transfer.h"
-#include "checkpoint/checkpoint.h"
-#include "initializer/initializer.h"
-#include "utils/time_cost.h"
-
-using namespace MxRec;
-using namespace std;
-using namespace chrono;
-
-/// 初始化host emb
-/// \param embInfos 表信息列表
-/// \param seed 随机种子
-/// \return
-void HostEmb::Initialize(const vector<EmbInfo>& embInfos, int seed)
-{
-    for (const auto& embInfo: embInfos) {
-        HostEmbTable hostEmb;
-        hostEmb.hostEmbInfo = embInfo;
-        EmbDataGenerator(embInfo.initializeInfos, seed, static_cast<int>(embInfo.hostVocabSize),
-                         embInfo.extEmbeddingSize, hostEmb.embData);
-        hostEmbs[embInfo.name] = move(hostEmb);
-        LOG_INFO(HOSTEMB + "HostEmb Initialize End");
-    }
-}
-
-/// 根据指定的初始化器对emb进行初始化
-/// \param initializeInfos emb初始化信息列表
-/// \param seed 随机种子
-/// \param vocabSize host表大小
-/// \param embeddingSize emb维度
-/// \param embData emb数据
-void HostEmb::EmbDataGenerator(const vector<InitializeInfo> &initializeInfos, int seed, int vocabSize,
-    int embeddingSize, vector<vector<float>> &embData) const
-{
-#ifndef GTEST
-    LOG_INFO(HOSTEMB + "GenerateEmbData Start, seed:{}, initializer num: {}", seed, initializeInfos.size());
-    embData.clear();
-    embData.resize(vocabSize, vector<float>(embeddingSize));
-
-    for (auto initializeInfo: initializeInfos) {
-        LOG_INFO("Device GenerateEmbData ing. name {}", initializeInfo.name);
-        for (int i = 0; i < vocabSize; i++) {
-            initializeInfo.initializer->GenerateData(embData.at(i).data(), embeddingSize);
-        }
-    }
-    LOG_INFO(HOSTEMB + "GenerateEmbData End, seed:{}", seed);
-#endif
-}
-
-/// 停止用于异步更新D2H emb的线程
-/// \param channelId 通道索引（训练/推理）
-void HostEmb::Join(int channelId)
-{
-    TimeCost tc = TimeCost();
-    switch (channelId) {
-        case TRAIN_CHANNEL_ID:
-            LOG_DEBUG(HOSTEMB + "start join, channelId:{}, procThreadsForTrain num:{}",
-                channelId, procThreadsForTrain.size());
-            for (auto& t: procThreadsForTrain) {
-                t->join();
-            }
-            procThreadsForTrain.clear();
-            LOG_DEBUG(HOSTEMB + "end join, channelId:{}, cost:{}ms", channelId, tc.ElapsedMS());
-            break;
-        case EVAL_CHANNEL_ID:
-            LOG_DEBUG(HOSTEMB + "start join, channelId:{}, procThreadsForEval num:{}",
-                channelId, procThreadsForEval.size());
-            for (auto& t: procThreadsForEval) {
-                t->join();
-            }
-            procThreadsForEval.clear();
-            LOG_DEBUG(HOSTEMB + "end join, channelId:{}, cost:{}ms", channelId, tc.ElapsedMS());
-            break;
-        default:
-            throw invalid_argument("channelId not in [TRAIN_CHANNEL_ID, EVAL_CHANNEL_ID]");
-    }
-}
-
-#ifndef GTEST
-/// 从hdTransfer获取device侧返回的emb信息，并在host侧表的对应位置插入。
-/// missingKeysHostPos为host侧需要发送的emb的位置，也就是淘汰的emb的插入位置
-/// \param missingKeysHostPos 当前batch在host上需要换出的偏移
-/// \param channelId 通道索引（训练/推理）
-/// \param embName 表名
-void HostEmb::UpdateEmb(const vector<size_t>& missingKeysHostPos, int channelId, const string& embName)
-{
-    LOG_INFO(HOSTEMB + "UpdateEmb, channelId:{}, embName:{}", channelId, embName);
-    EASY_FUNCTION(profiler::colors::Purple);
-    TimeCost tc = TimeCost();
-    auto hdTransfer = Singleton<MxRec::HDTransfer>::GetInstance();
-    TransferChannel transferName = TransferChannel::D2H;
-    LOG_INFO(HOSTEMB + "wait D2H embs, channelId:{}", channelId);
-    const auto tensors = hdTransfer->Recv(transferName, channelId, embName);
-    if (tensors.empty()) {
-        LOG_WARN(HOSTEMB + "recv empty data");
-        return;
-    }
-    const Tensor& d2hEmb = tensors[0];
-    EASY_BLOCK("Update")
-    const float* tensorPtr = d2hEmb.flat<float>().data();
-    auto embeddingSize = hostEmbs[embName].hostEmbInfo.extEmbeddingSize;
-    auto& embData = hostEmbs[embName].embData;
-
-    LOG_DEBUG(HOSTEMB + "embName:{}, UpdateEmb missingKeys len = {}, embeddingSize = {}, "
-        "embData.size = {} {}", embName, missingKeysHostPos.size(), embeddingSize, embData.size(), tensorPtr);
-
-#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
-                         shared(missingKeysHostPos, tensorPtr, embData, embeddingSize)
-    for (size_t i = 0; i < missingKeysHostPos.size(); i++) {
-        auto& dst = embData[missingKeysHostPos[i]];
-#pragma omp simd
-        for (int j = 0; j < embeddingSize; j++) {
-            dst[j] = tensorPtr[j + embeddingSize * i];
-        }
-    }
-    LOG_INFO(HOSTEMB + "update emb end cost: {}ms", tc.ElapsedMS());
-    EASY_END_BLOCK
-}
-
-/// 用从device获取的数据更新host的emb（使用aclTDT原生接口）
-/// \param missingKeysHostPos 当前batch在host上需要换出的偏移
-/// \param channelId 通道索引（训练/推理）
-/// \param embName 表名
-void HostEmb::UpdateEmbV2(const vector<size_t>& missingKeysHostPos, int channelId, const string& embName)
-{
-    LOG_INFO(HOSTEMB + "UpdateEmbV2, channelId:{}, embName:{}", channelId, embName);
-    EASY_FUNCTION(profiler::colors::Purple)
-    auto updateThread =
-        [this, missingKeysHostPos, channelId, embName] {
-            auto hdTransfer = Singleton<MxRec::HDTransfer>::GetInstance();
-            TransferChannel transferName = TransferChannel::D2H;
-            LOG_INFO(HOSTEMB + "wait D2H embs, channelId:{}", channelId);
-            auto size = hdTransfer->RecvAcl(transferName, channelId, embName);
-            if (size == 0) {
-                LOG_WARN(HOSTEMB + "recv empty data");
-                return;
-            }
-            TimeCost tc = TimeCost();
-
-            EASY_BLOCK("Update")
-            auto& embData = hostEmbs[embName].embData;
-            auto embeddingSize = hostEmbs[embName].hostEmbInfo.extEmbeddingSize;
-            auto aclData = acltdtGetDataItem(hdTransfer->aclDatasets[embName], 0);
-            if (aclData == nullptr) {
-                throw runtime_error("Acl get tensor data from dataset failed.");
-            }
-            float* ptr = static_cast<float *>(acltdtGetDataAddrFromItem(aclData));
-            if (ptr == nullptr || missingKeysHostPos.size() == 0) {
-                return;
-            }
-            size_t elementSize = acltdtGetDataSizeFromItem(aclData);
-            size_t dimNum = acltdtGetDimNumFromItem(aclData);
-            LOG_DEBUG(HOSTEMB + "embName:{}, UpdateEmb missingKeys len = {}, embeddingSize = {},"
-                                " embData.size = {}, RecvAcl = {}, elementSize = {}, dimNum = {}",
-                      embName, missingKeysHostPos.size(), embeddingSize, embData.size(), size, elementSize, dimNum);
-#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) shared(ptr, embData, embeddingSize)
-            for (size_t j = 0; j < missingKeysHostPos.size(); j++) {
-                auto& dst = embData[missingKeysHostPos[j]];
-#pragma omp simd
-                for (int k = 0; k < embeddingSize; k++) {
-                    dst[k] = ptr[k + embeddingSize * j];
-                }
-            }
-            LOG_INFO(HOSTEMB + "update emb end cost: {}ms", tc.ElapsedMS());
-    };
-
-    switch (channelId) {
-        case TRAIN_CHANNEL_ID:
-            procThreadsForTrain.emplace_back(make_unique<thread>(updateThread));
-            break;
-        case EVAL_CHANNEL_ID:
-            procThreadsForEval.emplace_back(make_unique<thread>(updateThread));
-            break;
-        default:
-            throw invalid_argument("channelId not in [TRAIN_CHANNEL_ID, EVAL_CHANNEL_ID]");
-    }
-}
-
-/// 查找host侧需要发送给device的emb数据。
-/// \param missingKeysHostPos 当前batch在host上需要换出的偏移
-/// \param embName
-/// \param h2dEmbOut
-void HostEmb::GetH2DEmb(const vector<size_t>& missingKeysHostPos, const string& embName,
-                        vector<Tensor>& h2dEmbOut)
-{
-    EASY_FUNCTION()
-    TimeCost tc = TimeCost();
-    const auto& emb = hostEmbs[embName];
-    const int embeddingSize = emb.hostEmbInfo.extEmbeddingSize;
-    h2dEmbOut.emplace_back(Tensor(tensorflow::DT_FLOAT, {
-        int(missingKeysHostPos.size()), embeddingSize
-    }));
-    auto& tmpTensor = h2dEmbOut.back();
-    auto tmpData = tmpTensor.flat<float>();
-#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) shared(missingKeysHostPos, emb, tmpData)
-    for (size_t i = 0; i < missingKeysHostPos.size(); ++i) {
-        const auto& src = emb.embData[missingKeysHostPos[i]];
-#pragma omp simd
-        for (int j = 0; j < embeddingSize; j++) {
-            tmpData(j + i * embeddingSize) = src[j];
-        }
-    }
-    LOG_INFO("GetH2DEmb end, missingKeys count:{} cost:{}ms", missingKeysHostPos.size(), tc.ElapsedMS());
-}
-
-/// 获取hostEmbs的指针
-/// \return
-auto HostEmb::GetHostEmbs() -> absl::flat_hash_map<string, HostEmbTable>*
-{
-    return &hostEmbs;
-}
-
-/// 对指定offset的emb进行初始化
-/// \param initializeInfos emb初始化信息列表
-/// \param embData emb数据
-/// \param offset 偏移列表
-void HostEmb::EmbPartGenerator(const vector<InitializeInfo> &initializeInfos, vector<vector<float>> &embData,
-                               const vector<size_t>& offset) const
-{
-    for (auto initializeInfo: initializeInfos) {
-        LOG_INFO("Device GenerateEmbData ing. name {}", initializeInfo.name);
-        for (size_t i = 0; i < offset.size(); ++i) {
-            initializeInfo.initializer->GenerateData(embData.at(offset.at(i)).data(),
-                static_cast<int>(embData[0].size()));
-        }
-    }
-}
-
-void HostEmb::EmbPartGenerator(const vector<InitializeInfo> &initializeInfos, vector<vector<float>> &embData,
-                               const vector<int64_t>& offset) const
-{
-    for (auto initializeInfo: initializeInfos) {
-        LOG_INFO("Device GenerateEmbData ing. name {}", initializeInfo.name);
-        for (size_t i = 0; i < offset.size(); ++i) {
-            initializeInfo.initializer->GenerateData(embData.at(offset.at(i)).data(),
-                                                     static_cast<int>(embData[0].size()));
-        }
-    }
-}
-#endif
-
-/// 利用initializer初始化emb淘汰的位置
-/// \param embName 表名
-/// \param offset 淘汰的偏移列表
-void HostEmb::EvictInitEmb(const string& embName, const vector<size_t>& offset)
-{
-#ifndef GTEST
-    auto& hostEmb = GetEmb(embName);
-    EmbPartGenerator(hostEmb.hostEmbInfo.initializeInfos, hostEmb.embData, offset);
-    LOG_INFO(HOSTEMB + "ddr EvictInitEmb!host embName {}, init offsets size: {}", embName, offset.size());
-#endif
-}
-
-void HostEmb::EvictInitEmb(const string& embName, const vector<int64_t>& offset)
-{
-#ifndef GTEST
-    auto& hostEmb = GetEmb(embName);
-    EmbPartGenerator(hostEmb.hostEmbInfo.initializeInfos, hostEmb.embData, offset);
-    LOG_INFO(HOSTEMB + "ddr EvictInitEmb!host embName {}, init offsets size: {}", embName, offset.size());
-#endif
-}
\ No newline at end of file
diff --git a/src/core/host_emb/host_emb.h b/src/core/host_emb/host_emb.h
deleted file mode 100644
index a9ff3786..00000000
--- a/src/core/host_emb/host_emb.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#ifndef MX_REC_HOSTEMB_H
-#define MX_REC_HOSTEMB_H
-
-#include <thread>
-#include <vector>
-#include <memory>
-#include <array>
-#include "absl/container/flat_hash_map.h"
-#include "utils/common.h"
-#include "utils/singleton.h"
-#include "tensorflow/core/framework/tensor.h"
-
-namespace MxRec {
-    using namespace std;
-    using namespace tensorflow;
-
-    class HostEmb {
-    public:
-        HostEmb() = default;
-
-        ~HostEmb()
-        {};
-
-        void Initialize(const vector<EmbInfo>& embInfos, int seed);
-
-        void Join(int channelId);
-
-        void UpdateEmb(const vector<size_t>& missingKeysHostPos, int channelId, const string& embName);
-
-        void UpdateEmbV2(const vector<size_t>& missingKeysHostPos, int channelId, const string& embName);
-
-        void GetH2DEmb(const vector<size_t>& missingKeysHostPos, const string& embName,
-                       vector<Tensor>& h2dEmbOut);
-        auto GetHostEmbs() -> absl::flat_hash_map<string, HostEmbTable>*;
-
-        void EvictInitEmb(const string& embName, const vector<size_t>& offset);
-
-        void EvictInitEmb(const string& embName, const vector<int64_t>& offset);
-
-        HostEmbTable& GetEmb(const string& embName)
-        {
-            return hostEmbs.at(embName);
-        }
-
-    GTEST_PRIVATE:
-        absl::flat_hash_map<string, HostEmbTable> hostEmbs;
-
-        std::vector<unique_ptr<std::thread>> procThreadsForTrain;
-        std::vector<unique_ptr<std::thread>> procThreadsForEval;
-
-        void EmbDataGenerator(const vector<InitializeInfo>& initializeInfos, int seed, int vocabSize, int embeddingSize,
-                              vector<vector<float>>& embData) const;
-        void EmbPartGenerator(const vector<InitializeInfo> &initializeInfos, vector<vector<float>> &embData,
-                              const vector<size_t>& offset) const;
-
-        void EmbPartGenerator(const vector<InitializeInfo> &initializeInfos, vector<vector<float>> &embData,
-                              const vector<int64_t>& offset) const;
-    };
-}
-
-#endif // MX_REC_HOSTEMB_H
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 78621829..123b2c79 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -20,6 +20,7 @@ See the License for the specific language governing permissions and
 #include <mpi.h>
 #include <string>
 #include <thread>
+#include <future>
 
 #include "hd_transfer/hd_transfer.h"
 #include "hybrid_mgmt/hybrid_mgmt_block.h"
@@ -30,12 +31,11 @@ See the License for the specific language governing permissions and
 #include "key_process/key_process.h"
 #include "key_process/feature_admit_and_evict.h"
 #include "emb_table/embedding_mgmt.h"
-#include "emb_table/embedding_ddr.h"
 
 
 using namespace MxRec;
 using namespace std;
-
+using namespace ock::ctr;
 
 /// Openmpi通信域进程数设置、计算所有表host特征数量总数、设置训练模式（HBM/DDR）
 /// \param rankInfo
@@ -89,8 +89,13 @@ bool HybridMgmt::Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos,
         return true;
     }
 
+    // create factory for fastUnique and embeddingCache
+    int result = ock::ctr::Factory::Create(factory);
+    if (result != 0) {
+        throw runtime_error(Logger::Format("create fast factory failed, error code:{}", result));
+    }
+
     InitRankInfo(rankInfo, embInfos);
-    EmbeddingMgmt::Instance()->Init(rankInfo, embInfos, thresholdValues, seed);
     GlogConfig::gStatOn = GlobalEnv::statOn;
 
     LOG_INFO(MGMT + "begin initialize, localRankSize:{}, localRankId:{}, rank:{}",
@@ -110,25 +115,17 @@ bool HybridMgmt::Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos,
     KEY_PROCESS_INSTANCE->Initialize(rankInfo, embInfos, thresholdValues, seed);
 
     isRunning = true;
+    isSSDEnabled = rankInfo.isSSDEnabled;
+    EmbeddingMgmt::Instance()->Init(rankInfo, embInfos, seed);
 
-    // DDR模式，初始化hashmap和host emb
     if (rankInfo.isDDR) {
-        hostEmbs = Singleton<MxRec::HostEmb>::GetInstance();
-        hostHashMaps = make_unique<EmbHashMap>();
-        hostEmbs->Initialize(embInfos, seed);
-        hostHashMaps->Init(rankInfo, embInfos, ifLoad);
+        InitEmbeddingCache(embInfos);
     }
 
-    // 非断点续训模式，启动数据传输
-    isSSDEnabled = rankInfo.isSSDEnabled;
     if (isSSDEnabled) {
         cacheManager = Singleton<MxRec::CacheManager>::GetInstance();
-        cacheManager->Init(hostEmbs, mgmtEmbInfo);
-        hostHashMaps->isSSDEnabled = this->isSSDEnabled;
-        hostHashMaps->cacheManager = this->cacheManager;
-        // 启用SSD时，EmbeddingDDR依赖cacheManager
-        EmbeddingMgmt::Instance()->EnableSSD();
-        EmbeddingMgmt::Instance()->SetCacheManagerForEmbTable(this->cacheManager);
+        cacheManager->Init(embCache, mgmtEmbInfo);
+        EmbeddingMgmt::Instance()->SetCacheManagerForEmbTable(cacheManager);
     }
     isLoad = ifLoad;
     if (!isLoad) {
@@ -136,99 +133,24 @@ bool HybridMgmt::Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos,
     }
 
     for (const auto& info: embInfos) {
-        LOG_INFO(MGMT + "emb[{}] vocab size {}+{} sc:{}",
+        LOG_INFO(MGMT + "table:{}, vocab size dev+host:{}+{}, send count:{}",
                  info.name, info.devVocabSize, info.hostVocabSize, info.sendCount);
     }
-    LOG_INFO(MGMT + "end initialize, isDDR:{}, maxStep:[{}, {}], rank:{}", rankInfo.isDDR,
-             rankInfo.ctrlSteps.at(TRAIN_CHANNEL_ID), rankInfo.ctrlSteps.at(EVAL_CHANNEL_ID), rankInfo.rankId);
+    LOG_INFO(MGMT + "end initialize, rankId:{}, isDDR:{}, "
+                    "step[train_interval, eval_interval, save_interval, max_train_step]:[{}, {}, {}, {}]",
+             rankInfo.rankId, rankInfo.isDDR,
+             rankInfo.ctrlSteps.at(TRAIN_CHANNEL_ID), rankInfo.ctrlSteps.at(EVAL_CHANNEL_ID),
+             rankInfo.ctrlSteps.at(SAVE_STEP_INDEX), rankInfo.ctrlSteps.at(MAX_TRAIN_STEP_INDEX));
 #endif
     isInitialized = true;
 
     return true;
 }
 
-// 比较hostHashMap和cacheManager的数据是否一致
-void HybridMgmt::AddCacheManagerTraceLog(CkptData& saveData)
-{
-    if (Logger::GetLevel() != Logger::TRACE) {
-        return;
-    }
-    auto& embHashMaps = saveData.embHashMaps;
-    auto& ddrKeyFreqMap = saveData.ddrKeyFreqMaps;
-    for (auto& it : embHashMaps) {
-        string embTableName = it.first;
-        auto& hostMap = EmbeddingMgmt::Instance()->GetTable(embTableName)->keyOffsetMap;
-        auto& devSize = it.second.devVocabSize;
-        auto& lfu = ddrKeyFreqMap[embTableName];
-        size_t tableKeyInDdr = 0;
-        for (const auto& item : hostMap) {
-            if (item.second < devSize) {
-                continue;
-            }
-            ++tableKeyInDdr;
-            auto cuKey = item.first;
-            if (lfu.find(cuKey) == lfu.end()) {
-                LOG_ERROR("save step error, ddr key:{}, not exist in lfu, hostHashMap offset:",
-                          cuKey, item.second);
-            }
-        }
-        LOG_INFO("save step end, table:{}, tableKeyInDdr:{}, tableKeyInLfu:{}",
-                 embTableName, tableKeyInDdr, lfu.size());
-    }
-}
-
-/// 保存CacheManager时恢复数据(与恢复hostHashMap类似，仅恢复保存数据,不修改源数据)
-/// \param saveData 保存数据
-void HybridMgmt::RestoreFreq4Save(CkptData& saveData) const
-{
-    // 仅在差异1步时执行恢复操作
-    int checkResult = hybridMgmtBlock->CheckSaveEmbMapValid();
-    if (checkResult != 1) {
-        return;
-    }
-    auto& ddrKeyFreqMaps = saveData.ddrKeyFreqMaps;
-    auto& excludeDDRKeyFreqMaps = saveData.excludeDDRKeyFreqMaps;
-
-    for (const auto& it : saveData.embHashMaps) {
-        auto& embTableName = it.first;
-        auto& embHashMap = it.second;
-        vector<emb_key_t> hbm2DdrKeys;
-        vector<emb_key_t> ddr2HbmKeys;
-        LOG_INFO("restore freq info for save step, table:{}, embHashMap.oldSwap size:{}",
-                 embTableName, embHashMap.oldSwap.size());
-        LOG_INFO("before, ddr key table size:{}, exclude ddr key table size:{}",
-                 ddrKeyFreqMaps[embTableName].size(), excludeDDRKeyFreqMaps[embTableName].size());
-        for (const auto& swapKeys : embHashMap.oldSwap) {
-            hbm2DdrKeys.emplace_back(swapKeys.second);
-            ddr2HbmKeys.emplace_back(swapKeys.first);
-        }
-        int hbm2DdrKeysNotInExcludeMapCount = 0;
-        int ddr2HbmKeysNotInDDRMapCount = 0;
-        for (auto& key : hbm2DdrKeys) {
-            if (excludeDDRKeyFreqMaps[embTableName].find(key) == excludeDDRKeyFreqMaps[embTableName].end()) {
-                ++hbm2DdrKeysNotInExcludeMapCount;
-            }
-            ddrKeyFreqMaps[embTableName][key] = excludeDDRKeyFreqMaps[embTableName][key];
-            excludeDDRKeyFreqMaps[embTableName].erase(key);
-        }
-        for (auto& key : ddr2HbmKeys) {
-            if (ddrKeyFreqMaps[embTableName].find(key) == ddrKeyFreqMaps[embTableName].end()) {
-                ++ddr2HbmKeysNotInDDRMapCount;
-            }
-            excludeDDRKeyFreqMaps[embTableName][key] = ddrKeyFreqMaps[embTableName][key];
-            ddrKeyFreqMaps[embTableName].erase(key);
-        }
-        LOG_INFO("hbm2DdrKeysNotInExcludeMapCount:{}, ddr2HbmKeysNotInDDRMapCount:{}",
-                 hbm2DdrKeysNotInExcludeMapCount, ddr2HbmKeysNotInDDRMapCount);
-        LOG_INFO("after, ddr key table size:{}, exclude ddr key table size:{}",
-                 ddrKeyFreqMaps[embTableName].size(), excludeDDRKeyFreqMaps[embTableName].size());
-    }
-}
-
 /// 保存模型
 /// \param savePath 保存路径
 /// \return
-bool HybridMgmt::Save(const string savePath)
+void HybridMgmt::Save(const string& savePath)
 {
 #ifndef GTEST
     if (!isInitialized) {
@@ -242,22 +164,17 @@ bool HybridMgmt::Save(const string savePath)
     Checkpoint saveCkpt;
     saveData.keyCountMap = KEY_PROCESS_INSTANCE->GetKeyCountMap();
 
-    EmbeddingMgmt::Instance()->LockSave();  // acquire lock here to prevent HybridMgmt modify keyOffsetMap
     EmbeddingMgmt::Instance()->Save(savePath);
-    offsetMapToSend = EmbeddingMgmt::Instance()->GetDeviceOffsets();
+    if (!mgmtRankInfo.isDDR) {
+        // hbm模式只保存必要的offset对应的内容
+        offsetMapToSend = EmbeddingMgmt::Instance()->GetDeviceOffsets();
+    }
 
     if (isSSDEnabled) {
-        LOG_DEBUG(MGMT + "Start host side save: ssd mode hashmap");
-        for (auto& it : cacheManager->ddrKeyFreqMap) {
-            saveData.ddrKeyFreqMaps[it.first] = it.second.GetFreqTable();
-        }
-        saveData.excludeDDRKeyFreqMaps = cacheManager->excludeDDRKeyCountMap;
-        RestoreFreq4Save(saveData);
-        AddCacheManagerTraceLog(saveData);
+        LOG_DEBUG(MGMT + "start save SSD data");
         auto step = GetStepFromPath(savePath);
         cacheManager->SaveSSDEngine(step);
     }
-    EmbeddingMgmt::Instance()->UnLockSave();
 
     // 保存特征准入淘汰相关的数据
     FeatureAdmitAndEvict& featAdmitNEvict = KEY_PROCESS_INSTANCE->GetFeatAdmitAndEvict();
@@ -272,8 +189,9 @@ bool HybridMgmt::Save(const string savePath)
     saveCkpt.SaveModel(savePath, saveData, mgmtRankInfo, mgmtEmbInfo);
     // 数据处理线程释放锁
     KEY_PROCESS_INSTANCE->LoadSaveUnlock();
+    hybridMgmtBlock->FinishSave();
+    cvCheckSave.notify_all();
 #endif
-    return true;
 }
 
 /// 加载模型
@@ -297,26 +215,23 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
     SetFeatureTypeForLoad(loadFeatures);
 
     if (warmStartTables.size() == 0) {
-        EmbeddingMgmt::Instance()->Load(loadPath);
+        EmbeddingMgmt::Instance()->Load(loadPath, trainKeysSet);
     } else {
         for (auto& tableName: warmStartTables) {
-            EmbeddingMgmt::Instance()->Load(tableName, loadPath);
+            EmbeddingMgmt::Instance()->Load(tableName, loadPath, trainKeysSet);
         }
     }
 
-    loadOffsetToSend = EmbeddingMgmt::Instance()->GetLoadOffsets();
+    if (!mgmtRankInfo.isDDR) {
+        // hbm模式只保存必要的offset对应的内容
+        loadOffsetToSend = EmbeddingMgmt::Instance()->GetLoadOffsets();
+    }
 
     // 执行加载操作
     loadCkpt.LoadModel(loadPath, loadData, mgmtRankInfo, mgmtEmbInfo, loadFeatures);
 
     KEY_PROCESS_INSTANCE->LoadKeyCountMap(loadData.keyCountMap);
-    if (mgmtRankInfo.isDDR) {
-        // DDR模式 将加载的hash map进行赋值
-        LOG_DEBUG(MGMT + "Start host side load: ddr mode hashmap");
-        auto GetEmbHashMaps = EmbeddingMgmt::Instance()->GetEmbHashMaps();
-        LOG_DEBUG(MGMT + "over over Start host side load: ddr mode hashmap");
-        hostHashMaps->LoadHashMap(GetEmbHashMaps);
-    } else {
+    if (!mgmtRankInfo.isDDR) {
         // HBM模式 将加载的最大偏移（真正使用了多少vocab容量）、特征到偏移的映射，进行赋值
         LOG_DEBUG(MGMT + "Start host side load: no ddr mode hashmap");
         auto keyOffsetMap = EmbeddingMgmt::Instance()->GetKeyOffsetMap();
@@ -336,13 +251,7 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
     if (isSSDEnabled) {
         LOG_DEBUG(MGMT + "Start host side load: ssd key freq map");
         auto step = GetStepFromPath(loadPath);
-        cacheManager->Load(loadData.ddrKeyFreqMaps, loadData.excludeDDRKeyFreqMaps,
-                           step, mgmtRankInfo.rankSize, mgmtRankInfo.rankId);
-        for (auto info: mgmtEmbInfo) {
-            auto tb = EmbeddingMgmt::Instance()->GetTable(info.name);
-            auto tbCast = reinterpret_pointer_cast<EmbeddingDDR>(tb);
-            tbCast->RefreshFreqInfoAfterLoad();
-        }
+        cacheManager->Load(mgmtEmbInfo, step, trainKeysSet);
     }
 
     LOG_DEBUG(MGMT + "Finish host side load process");
@@ -368,10 +277,6 @@ void HybridMgmt::SetFeatureTypeForLoad(vector<CkptFeatureType>& loadFeatures)
     if (featAdmitNEvict.GetFunctionSwitch()) {
         loadFeatures.push_back(CkptFeatureType::FEAT_ADMIT_N_EVICT);
     }
-
-    if (isSSDEnabled) {
-        loadFeatures.push_back(CkptFeatureType::DDR_KEY_FREQ_MAP);
-    }
 }
 
 /// 获取key对应的offset，python侧调用
@@ -444,76 +349,6 @@ void HybridMgmt::ReceiveHostMap(AllKeyOffsetMapT receiveKeyOffsetMap)
 #endif
 }
 
-/// 对加载的数据和训练配置进行一致性校验
-/// \param loadHostEmbs
-/// \param setupHostEmbs
-/// \param embTableCount
-/// \return
-bool HybridMgmt::IsLoadDataMatches(const EmbMemT& loadHostEmbs,
-                                   const EmbInfo& setupHostEmbs,
-                                   size_t& embTableCount) const
-{
-    bool loadDataMatches = { true };
-    const auto& loadEmbTable { loadHostEmbs.find(setupHostEmbs.name) };
-    if (loadEmbTable != loadHostEmbs.end()) {
-        embTableCount++;
-
-        const auto& loadEmbInfo { loadEmbTable->second.hostEmbInfo };
-        if (setupHostEmbs.sendCount != loadEmbInfo.sendCount) {
-            LOG_ERROR(MGMT + "Load data sendCount {} for table {} does not match setup sendCount {}",
-                      setupHostEmbs.sendCount, setupHostEmbs.name, loadEmbInfo.sendCount);
-            loadDataMatches = false;
-        }
-        if (setupHostEmbs.extEmbeddingSize != loadEmbInfo.extEmbeddingSize) {
-            LOG_ERROR(MGMT + "Load data extEmbeddingSize {} for table {} does not match setup extEmbeddingSize {}",
-                      setupHostEmbs.extEmbeddingSize, setupHostEmbs.name, loadEmbInfo.extEmbeddingSize);
-            loadDataMatches = false;
-        }
-        if (setupHostEmbs.devVocabSize != loadEmbInfo.devVocabSize) {
-            LOG_ERROR(MGMT + "Load data devVocabSize {} for table {} does not match setup devVocabSize {}",
-                      setupHostEmbs.devVocabSize, setupHostEmbs.name, loadEmbInfo.devVocabSize);
-            loadDataMatches = false;
-        }
-        if (setupHostEmbs.hostVocabSize != loadEmbInfo.hostVocabSize) {
-            LOG_ERROR(MGMT + "Load data hostVocabSize {} for table {} does not match setup hostVocabSize {}",
-                      setupHostEmbs.hostVocabSize, setupHostEmbs.name, loadEmbInfo.hostVocabSize);
-            loadDataMatches = false;
-        }
-        if (!loadDataMatches) {
-            return false;
-        }
-    } else {
-        LOG_ERROR(MGMT + "Load data does not contain table with table name: {}", setupHostEmbs.name);
-        return false;
-    }
-    return true;
-}
-
-/// 对DDR模式保存的模型和训练配置进行一致性校验
-/// \param loadData
-/// \return 是否一致
-bool HybridMgmt::LoadMatchesDDRSetup(const CkptData& loadData)
-{
-    size_t embTableCount { 0 };
-    auto loadHostEmbs { loadData.hostEmbs };
-    if (loadHostEmbs == nullptr) {
-        LOG_ERROR(MGMT + "Host Embedding of load checkpoint data is nullptr!");
-        return false;
-    }
-    for (EmbInfo setupHostEmbs : mgmtEmbInfo) {
-        if (!IsLoadDataMatches(*loadHostEmbs, setupHostEmbs, embTableCount)) {
-            return false;
-        }
-    }
-
-    if (embTableCount < loadHostEmbs->size()) {
-        LOG_ERROR(MGMT + "Load data has {} tables more than setup table num {}",
-                  loadHostEmbs->size(), embTableCount);
-        return false;
-    }
-    return true;
-}
-
 /// 根据HBM/DDR模式，启动数据处理线程
 void HybridMgmt::Start()
 {
@@ -558,6 +393,12 @@ void HybridMgmt::StartThreadForDDR()
         LOG_INFO("parseKeysTaskForEval done");
     };
     procThreads.emplace_back(std::make_unique<std::thread>(parseKeysTaskForEval));
+
+    auto embeddingProcessTask = [this]() {
+        EmbeddingTask();
+        LOG_INFO("embeddingProcessTask done");
+    };
+    procThreads.emplace_back(std::make_unique<std::thread>(embeddingProcessTask));
 #endif
 }
 
@@ -574,6 +415,16 @@ void HybridMgmt::Destroy()
     // 先发送停止信号mgmt，先停止新lookup查询, 解除queue的限制防止卡住
     isRunning = false;
 
+    mutexDestroy = true;
+    for (const auto& embInfo: mgmtEmbInfo) {
+        for (int index = 0; index < EMBEDDING_THREAD_NUM; index++) {
+            cvLastUpdateFinishMap[embInfo.name][index].notify_all();
+            cvLastLookUpFinishMap[embInfo.name][index].notify_all();
+            cvLastSendFinishMap[embInfo.name][index].notify_all();
+            cvLastRecvFinishMap[embInfo.name][index].notify_all();
+        }
+    }
+
     {
         // 获取锁 避免KeyProcess中手动发送结束信息时通道关闭
         std::unique_lock<std::mutex> lockGuard(KEY_PROCESS_INSTANCE->destroyMutex);
@@ -591,22 +442,18 @@ void HybridMgmt::Destroy()
     if (cacheManager != nullptr) {
         cacheManager = nullptr;
     }
-    if (hostEmbs != nullptr) {
-        hostEmbs->Join(TRAIN_CHANNEL_ID);
-        hostEmbs->Join(EVAL_CHANNEL_ID);
-        hostEmbs = nullptr;
-    }
+    JoinEmbeddingCacheThread();
     procThreads.clear();
     // 停止预处理
     KEY_PROCESS_INSTANCE->Destroy();
     LOG_DEBUG(MGMT + "Destroy hybrid_mgmt module end.");
-};
+}
 
-#ifndef GTEST
 /// 启动hybrid处理任务
 /// \param type
 void HybridMgmt::TrainTask(TaskType type)
 {
+#ifndef GTEST
     int channelId = TRAIN_CHANNEL_ID;
     int& theTrainBatchId = hybridMgmtBlock->hybridBatchId[channelId];
     do {
@@ -619,19 +466,9 @@ void HybridMgmt::TrainTask(TaskType type)
         }
         LOG_INFO(HYBRID_BLOCKING + "hybrid start task channel {} batch {}", channelId, theTrainBatchId);
 
-        switch (type) {
-            case TaskType::HBM:
-                ParseKeysHBM(TRAIN_CHANNEL_ID, theTrainBatchId);
-                LOG_INFO(MGMT + "ParseKeysHBMBatchId = {}", theTrainBatchId);
-                break;
-            case TaskType::DDR:
-                ParseKeys(TRAIN_CHANNEL_ID, theTrainBatchId);
-                LOG_INFO(MGMT + "parseKeysBatchId = {}", theTrainBatchId);
-                break;
-            default:
-                throw std::invalid_argument("Invalid TaskType Type.");
-        }
+        ParseKeys(TRAIN_CHANNEL_ID, theTrainBatchId, type);
     } while (true);
+#endif
 }
 
 /// 推理数据处理：数据处理状态正常，处理的batch数小于用户预设值或者设为-1时，会循环处理；
@@ -639,11 +476,20 @@ void HybridMgmt::TrainTask(TaskType type)
 /// \return
 void HybridMgmt::EvalTask(TaskType type)
 {
+#ifndef GTEST
     int channelId = EVAL_CHANNEL_ID;
     int& evalBatchId = hybridMgmtBlock->hybridBatchId[channelId];
     do {
         hybridMgmtBlock->CheckAndSetBlock(channelId);
         if (hybridMgmtBlock->GetBlockStatus(channelId)) {
+            LOG_DEBUG("eval channel block at batchId:{}, needWaitSave:{}",
+                      evalBatchId, hybridMgmtBlock->IsNeedWaitSave());
+            std::unique_lock<std::mutex> checkSaveLocker(saveMutex);
+            cvCheckSave.wait(checkSaveLocker, [this] {
+                return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy;
+            });
+            hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
+            LOG_DEBUG("wake TrainTask");
             hybridMgmtBlock->DoBlock(channelId);
         }
         if (!isRunning) {
@@ -651,326 +497,250 @@ void HybridMgmt::EvalTask(TaskType type)
         }
         LOG_INFO(HYBRID_BLOCKING + "hybrid start task channel {} batch {}", channelId, evalBatchId);
 
-        switch (type) {
-            case TaskType::HBM:
-                ParseKeysHBM(EVAL_CHANNEL_ID, evalBatchId);
-                LOG_INFO(MGMT + "HBM evalBatchId = {}", evalBatchId);
-                break;
-            case TaskType::DDR:
-                ParseKeys(EVAL_CHANNEL_ID, evalBatchId);
-                LOG_INFO(MGMT + "DDR evalBatchId = {}", evalBatchId);
-                break;
-            default:
-                throw std::invalid_argument("Invalid TaskType Type.");
-        }
+        ParseKeys(EVAL_CHANNEL_ID, evalBatchId, type);
     } while (true);
+#endif
 }
 
-/// HBM模式下，发送key process线程已处理好的各类型向量到指定通道中
-/// \param channelId 通道索引（训练/推理）
-/// \param batchId 已处理的batch数
-/// \return
-bool HybridMgmt::ParseKeysHBM(int channelId, int& batchId)
-{
-    LOG_INFO(MGMT + "nBatch:{} channelId:{} batchId:{}, ParseKeys with HBM mode start.",
-             mgmtRankInfo.nBatch, channelId, batchId);
-
-    // 循环处理每个表的数据
-    for (const auto& embInfo: mgmtEmbInfo) {
-        TimeCost parseKeysTc;
-        // 获取各类向量，如果为空指针，退出当前函数
-        auto infoVecs = KEY_PROCESS_INSTANCE->GetInfoVec(batchId, embInfo.name, channelId, ProcessedInfo::RESTORE);
-        if (infoVecs == nullptr) {
-            LOG_INFO(MGMT + "channelId:{} batchId:{}, ParseKeys infoVecs empty !", channelId, batchId);
-            return false;
-        }
-        LOG_DEBUG("channelId:{} batchId:{}, ParseKeysHBM GetInfoVec end.", channelId, batchId);
-        // 动态shape场景下，获取all2all向量（通信量矩阵）
-        TimeCost sendTensorsSyncTC;
-        unique_ptr<vector<Tensor>> all2all = nullptr;
-        if (!mgmtRankInfo.useStatic) {
-            TimeCost getTensorsSyncTC;
-            all2all = KEY_PROCESS_INSTANCE->GetInfoVec(batchId, embInfo.name, channelId, ProcessedInfo::ALL2ALL);
-            LOG_DEBUG("channelId:{} batchId:{}, getTensorsSyncTC(ms):{}",
-                      channelId, batchId, getTensorsSyncTC.ElapsedMS());
-            if (all2all == nullptr) {
-                LOG_ERROR("Information vector is nullptr!");
-                return false;
-            }
-            sendTensorsSyncTC = TimeCost(); // 重新初始化，不计算getTensors耗时
-            TimeCost sendAll2AllScSyncTC;
-            hdTransfer->Send(TransferChannel::ALL2ALL, *all2all, channelId, embInfo.name);
-            LOG_DEBUG("channelId:{} batchId:{}, sendAll2AllScSyncTC(ms):{}",
-                      channelId, batchId, sendAll2AllScSyncTC.ElapsedMS());
-        }
-
-        // 发送查询向量
-        TimeCost sendLookupSyncTC;
-        hdTransfer->Send(TransferChannel::LOOKUP, { infoVecs->back() }, channelId, embInfo.name);
-        infoVecs->pop_back();
-        LOG_DEBUG("channelId:{} batchId:{}, sendLookupSyncTC(ms):{}", channelId, batchId, sendLookupSyncTC.ElapsedMS());
-
-        // 训练时，使用全局去重聚合梯度，发送全局去重的key和对应的恢复向量
-        if (mgmtRankInfo.useSumSameIdGradients && channelId == TRAIN_CHANNEL_ID) {
-            SendUniqKeysAndRestoreVecHBM(channelId, batchId, embInfo, infoVecs);
-        }
-
-        // 发送恢复向量
-        TimeCost sendRestoreSyncTC;
-        hdTransfer->Send(TransferChannel::RESTORE, *infoVecs, channelId, embInfo.name);
-        LOG_DEBUG("sendRestoreSyncTC(ms):{}, sendTensorsSyncTC(ms):{}, parseKeysTc HBM mode (ms):{}",
-                  sendRestoreSyncTC.ElapsedMS(), sendTensorsSyncTC.ElapsedMS(), parseKeysTc.ElapsedMS());
-        LOG_INFO(MGMT + "channelId:{} batchId:{}, embName:{}, ParseKeys with HBM mode end.",
-                 channelId, batchId, embInfo.name);
-    }
-    batchId++;
-    return true;
-}
-
-void HybridMgmt::SendUniqKeysAndRestoreVecHBM(int channelId, int &batchId, const EmbInfo &embInfo,
-                                              const unique_ptr<vector<Tensor>> &infoVecs) const
+void HybridMgmt::SendUniqKeysAndRestoreVecHBM(const EmbBaseInfo &info,
+                                              const unique_ptr<vector<Tensor>> &infoVecs, bool isGrad) const
 {
     TimeCost sendUniqueKeysSyncTC;
     LOG_DEBUG("channelId:{} batchId:{}, global unique, table name: {}, is grad: {}",
-              channelId, batchId, embInfo.name, embInfo.isGrad);
-    if (embInfo.isGrad) {
-        hdTransfer->Send(TransferChannel::UNIQKEYS, {infoVecs->back()}, channelId, embInfo.name);
+              info.channelId, info.batchId, info.name, isGrad);
+    if (isGrad) {
+        hdTransfer->Send(TransferChannel::UNIQKEYS, {infoVecs->back()}, info.channelId, info.name);
     }
     infoVecs->pop_back();
     LOG_DEBUG("channelId:{} batchId:{}, sendUniqueKeysSyncTC(ms):{}",
-              channelId, batchId, sendUniqueKeysSyncTC.ElapsedMS());
+              info.channelId, info.batchId, sendUniqueKeysSyncTC.ElapsedMS());
 
     TimeCost sendUniqueRestoreVecSyncTC;
-    if (embInfo.isGrad) {
-        hdTransfer->Send(TransferChannel::RESTORE_SECOND, {infoVecs->back()}, channelId, embInfo.name);
+    if (isGrad) {
+        hdTransfer->Send(TransferChannel::RESTORE_SECOND, {infoVecs->back()}, info.channelId, info.name);
     }
     infoVecs->pop_back();
     LOG_DEBUG("channelId:{} batchId:{}, sendUniqueRestoreVecSyncTC(ms):{}",
-              channelId, batchId, sendUniqueRestoreVecSyncTC.ElapsedMS());
+              info.channelId, info.batchId, sendUniqueRestoreVecSyncTC.ElapsedMS());
 }
 
-#endif
 
-/// 当前处理的batch是否是最后一个batch
+/// 当前处理的batch是否是最后一个batch，涵盖train切换eval、save场景
 /// \param batchId 已处理的batch数
-/// \param channelId 通道索引（训练/推理）
 /// \return
-bool HybridMgmt::EndBatch(int batchId, int channelId) const
+bool HybridMgmt::IsTrainEndBatch(int batchId) const
+{
+    // case 1：需要切eval
+    // case 2：需要save时，补发pos后被阻塞，等待save完成，避免embCache状态发送变化
+    // batchId是从0开始的，所以要+1对上step
+    bool isNeedSwitchToEval = mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID] != -1 &&
+                              (batchId + 1) % mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID] == 0;
+    bool isNeedSave = mgmtRankInfo.ctrlSteps[SAVE_STEP_INDEX] != -1 &&
+                      mgmtRankInfo.ctrlSteps[SAVE_STEP_INDEX] != 0 &&
+                      (batchId + 1) % mgmtRankInfo.ctrlSteps[SAVE_STEP_INDEX] == 0;
+    LOG_DEBUG("mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID]:{}, batchId:{}",
+              mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID], batchId);
+    LOG_DEBUG("isNeedSwitchToEval:{}, isNeedSave:{}", isNeedSwitchToEval, isNeedSave);
+    return isNeedSwitchToEval || isNeedSave;
+}
+
+bool HybridMgmt::IsEvalEndBatch(int batchId) const
 {
-    return (batchId % mgmtRankInfo.ctrlSteps[channelId] == 0 && mgmtRankInfo.ctrlSteps[channelId] != -1);
+    // batchId是从0开始的，所以要+1对上step，表示当前step之后要结束eval了
+    return (batchId + 1) == hybridMgmtBlock->stepsInterval[EVAL_CHANNEL_ID];
 }
 
 /// DDR模式下，发送key process线程已处理好的各类型向量到指定通道中
 /// \param channelId 通道索引（训练/推理）
 /// \param batchId 已处理的batch数
 /// \return
-bool HybridMgmt::ParseKeys(int channelId, int& batchId)
+bool HybridMgmt::ParseKeys(int channelId, int& batchId, TaskType type)
 {
 #ifndef GTEST
     LOG_INFO(MGMT + "channelId:{} batchId:{}, DDR mode, ParseKeys start.", channelId, batchId);
     TimeCost parseKeyTC;
-    int start = batchId;
     bool remainBatch = true; // 是否从通道获取了数据
 
+    vector<std::thread> parseKeyThreadPool;
     for (const auto& embInfo : mgmtEmbInfo) {
-        ProcessEmbInfo(embInfo.name, batchId, channelId, remainBatch);
-        // 通道数据已空
-        if (!remainBatch) {
-            LOG_DEBUG("last batch ending");
-            return false;
+        EmbBaseInfo info = {.batchId=batchId, .channelId=channelId, .name=embInfo.name};
+        switch (type) {
+            case TaskType::HBM:
+                parseKeyThreadPool.emplace_back([this, info, &remainBatch, embInfo]() {
+                    ProcessEmbInfoHBM(info, remainBatch, embInfo.isGrad);
+                });
+                break;
+            case TaskType::DDR:
+                if (!isSSDEnabled) {
+                    parseKeyThreadPool.emplace_back([this, info, &remainBatch, embInfo]() {
+                        ProcessEmbInfoDDR(info, remainBatch);
+                    });
+                } else {
+                    parseKeyThreadPool.emplace_back([this, info, &remainBatch, embInfo]() {
+                        ProcessEmbInfoSSD(info, remainBatch);
+                    });
+                }
+                break;
+            default:
+                throw std::invalid_argument("Invalid TaskType Type.");
         }
     }
-    batchId++;
+    for (auto& t : parseKeyThreadPool) {
+        t.join();
+    }
+    // 通道数据已空
+    if (!remainBatch) {
+        LOG_DEBUG("last batch ending");
+        return false;
+    }
 
     if (!isRunning) {
         return false;
     }
-    EmbHDTransWrap(channelId, batchId - 1, start);
     LOG_DEBUG(MGMT + "channelId:{} batchId:{}, ParseKeys end, parseKeyTC(ms):{}",
               channelId, batchId, parseKeyTC.ElapsedMS());
+    batchId++;
 #endif
     return true;
 }
 
-void HybridMgmt::HandlePrepareDDRDataRet(TransferRet prepareSSDRet) const
+void HybridMgmt::ProcessEmbInfoHBM(const EmbBaseInfo &info, bool& remainBatchOut, bool isGrad)
 {
-    LOG_ERROR("Transfer embedding with DDR and SSD error.");
-    if (prepareSSDRet == TransferRet::SSD_SPACE_NOT_ENOUGH) {
-        LOG_ERROR("PrepareDDRData: SSD available space is not enough.");
-        throw runtime_error("ssdVocabSize too small");
+    TimeCost parseKeysTc;
+    LOG_DEBUG("ProcessEmbInfoHBM table:{}, batchId:{}, channel:{}", info.name, info.batchId, info.channelId);
+
+    // 获取各类向量，如果为空指针，退出当前函数
+    bool isEos = false;
+    auto infoVecs = KEY_PROCESS_INSTANCE->GetInfoVec(info, ProcessedInfo::RESTORE, isEos);
+    if (isEos) {
+        HandleEosCaseHBM(info.name, info.batchId, info.channelId, remainBatchOut);
+        return;
+    }
+    if (infoVecs == nullptr) {
+        LOG_INFO(MGMT + "table:{}, channelId:{} batchId:{}, ParseKeys infoVecs empty !",
+                 info.name, info.channelId, info.batchId);
+        remainBatchOut = false;
+        return;
+    }
+    LOG_DEBUG("table:{}, channelId:{} batchId:{}, ParseKeysHBM GetInfoVec end",
+              info.name, info.channelId, info.batchId);
+
+    // 动态shape场景下，获取all2all向量（通信量矩阵）
+    SendAll2AllVec(info, remainBatchOut);
+    if (!remainBatchOut) {
+        return;
+    }
+
+    // 发送查询向量
+    TimeCost sendLookupSyncTC;
+    hdTransfer->Send(TransferChannel::LOOKUP, { infoVecs->back() }, info.channelId, info.name);
+    infoVecs->pop_back();
+    LOG_DEBUG("table:{}, channelId:{} batchId:{}, sendLookupSyncTC(ms):{}",
+              info.name, info.channelId, info.batchId, sendLookupSyncTC.ElapsedMS());
+
+    // 训练时，使用全局去重聚合梯度，发送全局去重的key和对应的恢复向量
+    if (mgmtRankInfo.useSumSameIdGradients && info.channelId == TRAIN_CHANNEL_ID) {
+        SendUniqKeysAndRestoreVecHBM(info, infoVecs, isGrad);
     }
-    if (prepareSSDRet == TransferRet::DDR_SPACE_NOT_ENOUGH) {
-        LOG_ERROR("PrepareDDRData: DDR available space is not enough.");
-        throw runtime_error("ddrVocabSize too small");
+
+    // 发送恢复向量
+    TimeCost sendRestoreSyncTC;
+    hdTransfer->Send(TransferChannel::RESTORE, *infoVecs, info.channelId, info.name);
+    LOG_DEBUG("table:{}, sendRestoreSyncTC(ms):{}, parseKeysTc HBM mode (ms):{}",
+              info.name, sendRestoreSyncTC.ElapsedMS(), parseKeysTc.ElapsedMS());
+
+    LOG_INFO(MGMT + "table:{}, channelId:{} batchId:{}, embName:{}, ParseKeys with HBM mode end.",
+             info.name, info.channelId, info.batchId, info.name);
+
+    if (info.channelId == TRAIN_CHANNEL_ID) {
+        alreadyTrainOnce = true;
     }
-    throw runtime_error("Transfer embedding with DDR and SSD error.");
 }
 
-#ifndef GTEST
 
 /// 构造训练所需的各种向量数据
 /// \param embName 表名
 /// \param batchId 已处理的batch数
 /// \param channelId 通道索引（训练/推理）
 /// \param remainBatchOut 是否从通道获取了数据
-/// \return HBM是否还有剩余空间
-bool HybridMgmt::ProcessEmbInfo(const std::string& embName, int batchId, int channelId, bool& remainBatchOut)
+void HybridMgmt::ProcessEmbInfoDDR(const EmbBaseInfo& info, bool& remainBatchOut)
 {
+#ifndef GTEST
     TimeCost getAndSendTensorsTC;
-    TimeCost getTensorsTC;
+    LOG_DEBUG("ProcessEmbInfoDDR start, table:{}, channel:{}, batchId:{}", info.name, info.channelId, info.batchId);
 
-    if (hostHashMaps->embHashMaps.find(embName) == hostHashMaps->embHashMaps.end()) {
-        LOG_ERROR("Failed to get embedding hash map with given name: {}", embName);
-        return false;
+    if (info.channelId == TRAIN_CHANNEL_ID  && info.batchId == hybridMgmtBlock->maxTrainStep) {
+        HandleReachMaxStepCase(info, remainBatchOut);
+        return;
     }
 
-    auto& embHashMap = hostHashMaps->embHashMaps.at(embName);
-    // 计数初始化
-    std::shared_ptr<EmbeddingTable> table = EmbeddingMgmt::Instance()->GetTable(embName);
-    table->SetStartCount();
-
-    // 获取查询向量
-    auto lookupKeys = KEY_PROCESS_INSTANCE->GetLookupKeys(batchId, embName, channelId);
-    if (lookupKeys.empty()) {
-        remainBatchOut = false;
-        LOG_WARN("channelId:{} batchId:{}, embName:{}, GetLookupKeys result is empty.", channelId, batchId, embName);
-        return false;
-    }
-    LOG_DEBUG("channelId:{} batchId:{}, embName:{}, GetLookupKeys end.", channelId, batchId, embName);
-    // 获取各类向量，如果为空指针，退出当前函数
-    unique_ptr<vector<Tensor>> infoVecs = KEY_PROCESS_INSTANCE->GetInfoVec(batchId, embName, channelId,
-                                                                           ProcessedInfo::RESTORE);
-    if (infoVecs == nullptr) {
-        LOG_ERROR("Information vector is nullptr!");
-        return false;
+    // 只有在每次GetUniqueKeys的时候才知道上游是否已经EOS
+    // 注意GetUniqueKeys与EOS关联，需要在ProcessEmbInfoDDR最先调用，如需调整位置，请参考并适配其他函数
+    // 获取GlobalUnique向量
+    auto uniqueKeys = GetUniqueKeys(info, remainBatchOut);
+    if (uniqueKeys.empty()) {
+        return;
     }
-    LOG_DEBUG("channelId:{} batchId:{}, GetInfoVec end, getTensorsTC(ms):{}",
-              channelId, batchId, getTensorsTC.ElapsedMS());
-
-    TimeCost sendRestoreSyncTC;
-    hdTransfer->Send(TransferChannel::RESTORE, *infoVecs, channelId, embName);
-    LOG_DEBUG("channelId:{} batchId:{}, send restore end, sendRestoreSyncTC(ms):{}",
-              channelId, batchId, sendRestoreSyncTC.ElapsedMS());
-
-    // 调用SSD cache缓存处理流程，获取锁避免保存时修改keyOffsetMap
-    table->mutSave_.lock();
-    LOG_DEBUG("acquire save lock, table:{}", table->name);
-    PrepareDDRData(table, lookupKeys, channelId, batchId);
 
-    // 计算查询向量；记录需要被换出的HBM偏移
-    vector<Tensor> tmpData;
-    vector<int32_t> offsetsOut;
-    DDRParam ddrParam(tmpData, offsetsOut);
-    TimeCost hostHashMapProcessTC;
-
-    hostHashMaps->Process(embName, lookupKeys, ddrParam, channelId);
-    table->mutSave_.unlock();
-    LOG_DEBUG("release save lock, table:{}", table->name);
-
-    LOG_DEBUG("channelId:{} batchId:{}, hostHashMapProcessTC(ms):{}",
-              channelId, batchId, hostHashMapProcessTC.ElapsedMS());
-
-    if (mgmtRankInfo.useSumSameIdGradients && channelId == TRAIN_CHANNEL_ID && remainBatchOut) {
-        SendUniqKeysAndRestoreVecDDR(embName, batchId, channelId, ddrParam);
+    // 获取GlobalUnique对应的restoreVectorSec
+    auto restoreVecSec = GetRestoreVecSec(info, remainBatchOut);
+    if (restoreVecSec.empty()) {
+        return;
     }
 
-    TimeCost sendTensorsTC;
-    hdTransfer->Send(TransferChannel::LOOKUP, { ddrParam.tmpDataOut.front() }, channelId, embName);
-    ddrParam.tmpDataOut.erase(ddrParam.tmpDataOut.cbegin());
-    hdTransfer->Send(TransferChannel::SWAP, ddrParam.tmpDataOut, channelId, embName);
-    if (!mgmtRankInfo.useStatic) {
-        unique_ptr<vector<Tensor>> all2all = KEY_PROCESS_INSTANCE->GetInfoVec(batchId, embName,
-                                                                              channelId, ProcessedInfo::ALL2ALL);
-        if (all2all == nullptr) {
-            LOG_ERROR("Information vector is nullptr!");
-            return false;
-        }
-        hdTransfer->Send(TransferChannel::ALL2ALL, *all2all, channelId, embName);
+    SendAll2AllVec(info, remainBatchOut);
+    if (!remainBatchOut) {
+        return;
     }
-    LOG_DEBUG("channelId:{} batchId:{}, ProcessEmbInfo end, sendTensorsTC(ms):{}, getAndSendTensorsTC(ms):{}",
-              channelId, batchId, sendTensorsTC.ElapsedMS(), getAndSendTensorsTC.ElapsedMS());
 
-    if (!isSSDEnabled && embHashMap.HasFree(lookupKeys.size())) { // check free > next one batch
-        LOG_WARN(MGMT + "channelId:{} batchId:{}, embName:{}, freeSize not enough:{}",
-                 channelId, batchId, embName, lookupKeys.size());
-        return false;
+    SendRestoreVec(info, remainBatchOut);
+    if (!remainBatchOut) {
+        return;
     }
-    return true;
-}
 
-void HybridMgmt::SendUniqKeysAndRestoreVecDDR(const string &embName, int &batchId, int &channelId, DDRParam &ddrParam)
-{
-    LOG_DEBUG("channelId:{} batchId:{}, embName:{}, SendUniqKeysAndRestoreVecDDR start.", channelId, batchId, embName);
-    vector<int32_t> uniqueKeys;
-    vector<int32_t> restoreVecSec;
-    KEY_PROCESS_INSTANCE->GlobalUnique(ddrParam.offsetsOut, uniqueKeys, restoreVecSec);
+    std::pair<vector<uint64_t>, vector<uint64_t>> swapInKoPair;
+    std::pair<vector<uint64_t>, vector<uint64_t>> swapOutKoPair;
+    GetSwapPairsAndKey2Offset(info, uniqueKeys, swapInKoPair, swapOutKoPair);
 
-    TimeCost sendUniqueKeysSyncTC;
-    hdTransfer->Send(TransferChannel::UNIQKEYS, {mgmtRankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys) :
-                                                 Vec2TensorI32(uniqueKeys) }, channelId, embName);
-    LOG_DEBUG("channelId:{} batchId:{}, sendUniqueKeysSyncTC(ms):{}",
-              channelId, batchId, sendUniqueKeysSyncTC.ElapsedMS());
+    SendLookupOffsets(info, uniqueKeys, restoreVecSec);
 
-    TimeCost sendRestoreVecSecSyncTC;
-    hdTransfer->Send(TransferChannel::RESTORE_SECOND, {Vec2TensorI32(restoreVecSec) }, channelId, embName);
-    LOG_DEBUG("channelId:{} batchId:{}, sendRestoreVecSecSyncTC(ms):{}",
-              channelId, batchId, sendRestoreVecSecSyncTC.ElapsedMS());
-}
+    SendGlobalUniqueVec(info, uniqueKeys, restoreVecSec);
 
-/// 发送H2D和接收D2H向量
-/// \param channelId 通道索引（训练/推理）
-/// \param batchId 已处理的batch数
-/// \param start
-void HybridMgmt::EmbHDTransWrap(int channelId, const int& batchId, int start)
-{
-    LOG_INFO(MGMT + "start:{} channelId:{} batchId:{}, EmbHDTransWrap start.", start, channelId, batchId);
-    TimeCost embHDTransWrapTC;
-    TimeCost hostEmbsTC;
-    hostEmbs->Join(channelId);
-    LOG_DEBUG("channelId:{} batchId:{}, hostEmbs Join end, hostEmbsTC(ms):{}",
-              channelId, batchId, hostEmbsTC.ElapsedMS());
-    if (!isRunning) {
+    auto isNeedReturn = HandleSpecialProcessStatusDDR(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
+    if (isNeedReturn) {
         return;
     }
-    EmbHDTrans(channelId, batchId);
-    LOG_DEBUG("channelId:{} batchId:{}, EmbHDTransWrap end, embHDTransWrapTC(ms):{}",
-              channelId, batchId, embHDTransWrapTC.ElapsedMS());
-}
 
-/// 发送H2D和接收D2H向量，并更新host emb
-/// \param channelId 通道索引（训练/推理）
-/// \param batchId 已处理的batch数
-void HybridMgmt::EmbHDTrans(const int channelId, const int batchId)
-{
-    EASY_FUNCTION(profiler::colors::Blue)
-    EASY_VALUE("mgmtProcess", batchId)
-    LOG_DEBUG(MGMT + "channelId:{} batchId:{}, EmbHDTrans start.", channelId, batchId);
-    TimeCost h2dTC;
-    // 发送host需要换出的emb
-    for (const auto& embInfo: mgmtEmbInfo) {
-        const auto& missingKeys = EmbeddingMgmt::Instance()->GetMissingKeys(embInfo.name);
-        vector<Tensor> h2dEmb;
-        hostEmbs->GetH2DEmb(missingKeys, embInfo.name, h2dEmb); // order!
-        hdTransfer->Send(TransferChannel::H2D, h2dEmb, channelId, embInfo.name, batchId);
+    TimeCost swapProcessTC;
+    EnqueueSwapInfo(info, swapInKoPair, swapOutKoPair);
+
+    auto &swapInPos = swapInKoPair.second;
+    auto &swapOutPos = swapOutKoPair.second;
+    auto lastSwapInPos = lastSwapInPosMap[info.name];
+    lastSwapInPosMap[info.name] = swapInPos; // 暂存待下一步发送
+
+    // 下发swaptensor
+    if (info.batchId != 0) {
+        SendTensorForSwap(info, lastSwapInPos, swapOutPos);
     }
-    LOG_DEBUG("channelId:{} batchId:{}, EmbHDTrans h2d end, h2dTC(ms):{}", channelId, batchId, h2dTC.ElapsedMS());
 
-    TimeCost d2hTC;
-    // 接收device换出的emb，并更新到host上
-    for (const auto& embInfo: mgmtEmbInfo) {
-        const auto& missingKeys = EmbeddingMgmt::Instance()->GetMissingKeys(embInfo.name);
-        hostEmbs->UpdateEmbV2(missingKeys, channelId, embInfo.name); // order!
-        EmbeddingMgmt::Instance()->ClearMissingKeys(embInfo.name);
+    HandleEndBatchCase(info, swapInPos);
+
+    if (info.channelId == TRAIN_CHANNEL_ID) {
+        alreadyTrainOnce = true;
     }
-    LOG_DEBUG("channelId:{} batchId:{}, EmbHDTrans d2h end, d2hTC(ms):{}", channelId, batchId, d2hTC.ElapsedMS());
-}
+
+    LOG_DEBUG("ProcessEmbInfoDDR end, table:{}, channel:{}, batchId:{} swapProcessTC(ms):{} getAndSendTensorsTC(ms):{}",
+              info.name, info.channelId, info.batchId, swapProcessTC.ElapsedMS(), getAndSendTensorsTC.ElapsedMS());
 #endif
+}
 
 /// hook通过时间或者step数触发淘汰
 /// \return
 bool HybridMgmt::Evict()
 {
 #ifndef GTEST
+    std::lock_guard<std::mutex> lk(evictMut);
     if (!isInitialized) {
         throw runtime_error("HybridMgmt not initialized. Call Initialize first.");
     }
@@ -1001,8 +771,15 @@ bool HybridMgmt::Evict()
         }
     } else {
         if (GlobalEnv::useCombineFaae) {
-            for (auto& map : hostHashMaps->embHashMaps) {
-                EmbeddingMgmt::Instance()->EvictKeys(map.first, evictKeyMap[COMBINE_HISTORY_NAME]);
+            vector<std::string> allTableNames;
+            int retCode = embCache->GetEmbTableNames(allTableNames);
+            if (retCode != H_OK) {
+                LOG_ERROR("GetEmbTableNames failed!");
+                return false;
+            }
+            for (const string& embName : allTableNames) {
+                EvictKeys(embName, evictKeyMap[COMBINE_HISTORY_NAME]);
+                EvictSSDKeys(embName, evictKeyMap[COMBINE_HISTORY_NAME]);
             }
         } else {
             for (const auto& evict : as_const(evictKeyMap)) {
@@ -1019,68 +796,24 @@ bool HybridMgmt::Evict()
 /// DDR模式下的淘汰：删除映射表、初始化host表、发送dev淘汰位置
 /// \param embName
 /// \param keys
-void HybridMgmt::EvictKeys(const string& embName, const vector<emb_key_t>& keys)
-{
-    std::shared_ptr<EmbeddingTable> table = EmbeddingMgmt::Instance()->GetTable(embName);
-
-    table->EvictKeys(keys);
-
-    const vector<int64_t>& evictOffsetDev = table->GetEvictedKeys();
-    const vector<int64_t>& evictOffsetHost = table->GetHostEvictedKeys();
-
-    vector<int64_t> evictOffsetHostx(evictOffsetHost);
-
-    size_t devVocabSize = table->GetDevVocabSize();
-    for (int64_t& key: evictOffsetHostx) {
-        key -= static_cast<int64_t>(devVocabSize);
-    };
-
-    /* 淘汰Host侧 */
-    if (!evictOffsetHost.empty()) {
-        hostEmbs->EvictInitEmb(embName, evictOffsetHost);
-    }
-
-    vector<Tensor> tmpDataOut;
-    Tensor tmpData = Vec2TensorI32(evictOffsetDev);
-    tmpDataOut.emplace_back(tmpData);
-    tmpDataOut.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
-
-    auto evictLen = tmpDataOut.back().flat<int32>();
-    auto evictSize = static_cast<int>(evictOffsetDev.size());
-    evictLen(0) = evictSize;
-
-    hdTransfer->Send(TransferChannel::EVICT, tmpDataOut, TRAIN_CHANNEL_ID, embName);
-}
-
-inline void HybridMgmt::PrepareDDRData(std::shared_ptr<EmbeddingTable> table,
-                                       const vector<emb_key_t>& keys, int channelId, int batchId) const
+void HybridMgmt::EvictKeys(const string& embName, const vector<emb_cache_key_t>& keys)
 {
-    if (!isSSDEnabled) {
+    if (keys.empty()) {
         return;
     }
-    LOG_DEBUG("channelId:{} batchId:{}, embTableName:{}, PrepareDDRData start.", channelId, batchId, table->name);
-    TimeCost prepareDDRDataTc;
-    TableInfo ti = table->GetTableInfo();
-    TransferRet ret = cacheManager->TransferDDREmbWithSSD(ti, keys, channelId);
-    if (ret != TransferRet::TRANSFER_OK) {
-        HandlePrepareDDRDataRet(ret);
+    int retCode = embCache->RemoveEmbsByKeys(embName, keys);
+    if (retCode != H_OK) {
+        LOG_ERROR("RemoveEmbsByKeys failed!");
+        return;
     }
-    LOG_DEBUG("channelId:{} batchId:{}, embTableName:{}, PrepareDDRData end, prepareDDRDataTc(ms):{}",
-              channelId, batchId, table->name, prepareDDRDataTc.ElapsedMS());
 }
 
-void HybridMgmt::EvictSSDKeys(const string& embName, const vector<emb_key_t>& keys) const
+void HybridMgmt::EvictSSDKeys(const string& embName, const vector<emb_cache_key_t>& keys) const
 {
     if (!isSSDEnabled) {
         return;
     }
-    vector<emb_key_t> ssdKeys;
-    for (auto& key : keys) {
-        if (cacheManager->IsKeyInSSD(embName, key)) {
-            ssdKeys.emplace_back(key);
-        }
-    }
-    cacheManager->EvictSSDEmbedding(embName, ssdKeys);
+    cacheManager->EvictSSDEmbedding(embName, keys);
 }
 
 int HybridMgmt::GetStepFromPath(const string& loadPath) const
@@ -1134,19 +867,20 @@ void HybridMgmt::CountStepBySessionRun(int channelID, int steps) const
 /// \return 表使用大小
 int64_t HybridMgmt::GetTableSize(const string& embName) const
 {
+    int64_t size = -1;
 #ifndef GTEST
     if (!isInitialized) {
         throw runtime_error("HybridMgmt not initialized. Call Initialize first.");
     }
 
     if (mgmtRankInfo.useDynamicExpansion) {
-        int64_t size = EmbeddingMgmt::Instance()->GetSize(embName);
+        size = EmbeddingMgmt::Instance()->GetSize(embName);
         LOG_INFO(MGMT + "dynamic expansion mode, get emb:[{}] size:{}", embName, size);
         return size;
     }
     if (!mgmtRankInfo.isDDR) {
         size_t maxOffset = EmbeddingMgmt::Instance()->GetMaxOffset(embName);
-        int64_t size = static_cast<int64_t>(maxOffset);
+        size = static_cast<int64_t>(maxOffset);
         LOG_INFO(MGMT + "HBM mode, get emb:[{}] size:{}", embName, size);
         return size;
     }
@@ -1155,17 +889,11 @@ int64_t HybridMgmt::GetTableSize(const string& embName) const
         ssdSize = cacheManager->GetTableEmbeddingSize(embName);
     }
 
-    const auto& iter = hostHashMaps->embHashMaps.find(embName);
-    if (iter == hostHashMaps->embHashMaps.end()) {
-        LOG_ERROR(MGMT + "get maxOffset, wrong embName:{} ", embName);
-        return -1;
-    }
-    auto maxOffset = hostHashMaps->embHashMaps.at(embName).maxOffset;
-    int64_t size = static_cast<int64_t>(maxOffset) + ssdSize;
-
+    uint32_t ddrSize = embCache->GetUsage(embName);
+    size = static_cast<int64_t>(ddrSize) + ssdSize;
     LOG_INFO(MGMT + "DDR/SSD mode, get emb:[{}] size:{}", embName, size);
-    return size;
 #endif
+    return size;
 }
 
 /// 获取table表容量大小
@@ -1184,8 +912,8 @@ int64_t HybridMgmt::GetTableCapacity(const string& embName) const
         return capacity;
     }
     LOG_WARN(MGMT + "no dynamic expansion mode, get emb:[{}] capacity failed", embName);
-    return -1;
 #endif
+    return -1;
 }
 
 /// 设置表的优化器信息
@@ -1199,3 +927,1281 @@ void HybridMgmt::SetOptimizerInfo(const string& embName, OptimizerInfo optimInfo
     }
     EmbeddingMgmt::Instance()->SetOptimizerInfo(embName, optimInfo);
 }
+
+void HybridMgmt::LookUpAddrs(const string &embName, int extEmbeddingSize)
+{
+    int id = 0;
+    uint64_t memSize = extEmbeddingSize * sizeof(float);
+    const std::string hbmSwapKeyQueName = "HBMSwapKeyQue";
+    const std::string ddrSwapKeyQueName = "DDRSwapKeyQue";
+    auto lookUpFunc = [this, memSize, embName, id](
+        std::map<std::string, TaskQueue<std::vector<uint64_t>>> &fromQue,
+        std::map<std::string, TaskQueue<std::vector<float *>>> &toQue,
+        const string &swapStr, const string &fromQueName
+    ) {
+        std::vector<uint64_t> keys = fromQue[embName + swapStr].WaitAndPop();
+        if (!isRunning) {
+            return;
+        }
+        std::vector<float*> addrs;
+        TimeCost lookupAddrsTC;
+        int rc = embCache->EmbeddingLookupAddrs(embName, keys, addrs);
+        if (rc != H_OK) {
+            lookupAddrSuccess = false;
+            LOG_ERROR("lookUpAddrs, table:{}, fromQue: {}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}",
+                      embName, fromQueName, swapStr, keys.size(), addrs.size(), id);
+            throw runtime_error("EmbeddingLookupAddrs failed! error code:" + std::to_string(rc));
+        }
+        if (&fromQue == &DDRSwapKeyQue && swapStr == SWAP_OUT_STR) {
+            for (auto &addr : addrs) {
+                auto *newAddr = (float*)malloc(memSize);
+                rc = memcpy_s(newAddr, memSize, addr, memSize);
+                if (rc != 0) {
+                    lookupAddrSuccess = false;
+                    throw runtime_error("memcpy_s failed! error code:" + std::to_string(rc));
+                }
+                addr = newAddr;
+            }
+            rc = embCache->EmbeddingRemove(embName, keys);
+            if (rc != H_OK) {
+                lookupAddrSuccess = false;
+                throw runtime_error("EmbeddingRemove failed! error code:" + std::to_string(rc));
+            }
+        }
+        LOG_DEBUG("table:{}, fromQue:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsTC(ms):{}",
+                  embName, fromQueName, swapStr, keys.size(), addrs.size(), id, lookupAddrsTC.ElapsedMS());
+        toQue[embName + swapStr].Pushv(addrs);
+    };
+    while (isRunning && lookupAddrSuccess) {
+        lookUpFunc(DDRSwapKeyQue, DDRSwapAddrsQue, SWAP_OUT_STR, ddrSwapKeyQueName);
+        lookUpFunc(DDRSwapKeyQue, DDRSwapAddrsQue, SWAP_IN_STR, ddrSwapKeyQueName);
+        lookUpFunc(HBMSwapKeyQue, tableToQueueLookup, SWAP_IN_STR, hbmSwapKeyQueName);
+        lookUpFunc(HBMSwapKeyQue, tableToQueueLookup, SWAP_OUT_STR, hbmSwapKeyQueName);
+        id++;
+        lookUpSwapInAddrsPushId[embName]++;
+    }
+}
+
+void HybridMgmt::LookUpSwapAddrs(const string &embName, const string &swapStr)
+{
+    int id = 0;
+    std::string swapName = embName + swapStr;
+    while (isRunning && lookupAddrSuccess) {
+        std::vector<uint64_t> keys = HBMSwapKeyQue[swapName].WaitAndPop();
+        if (!isRunning) {
+            return;
+        }
+        vector<float *> addrs;
+        TimeCost lookupAddrsTC;
+        int rc = embCache->EmbeddingLookupAddrs(embName, keys, addrs);
+        if (rc != H_OK) {
+            lookupAddrSuccess = false;
+            throw runtime_error("EmbeddingLookupAddrs failed! error code: " + std::to_string(rc));
+        }
+        LOG_DEBUG(
+            "table:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsTC(ms):{}",
+            embName, swapStr, keys.size(), addrs.size(), id, lookupAddrsTC.ElapsedMS());
+        tableToQueueLookup[swapName].Pushv(addrs);
+        if (swapStr==SWAP_IN_STR) {
+            lookUpSwapInAddrsPushId[embName]++;
+            LOG_DEBUG("LookUpSwapAddrs, table:{}, pushId:{}, lookUpSwapInAddrsPushId:{}",
+                      embName, id, lookUpSwapInAddrsPushId[embName]);
+        }
+        id++;
+    }
+}
+
+/// 导出npu的embedding
+void HybridMgmt::FetchDeviceEmb()
+{
+    // 数据处理线程上锁
+    KEY_PROCESS_INSTANCE->LoadSaveLock();
+
+    if (mgmtRankInfo.isDDR) {
+        // DDR模式保存host的emb表以及hashmap
+        LOG_DEBUG(MGMT + "start host side save: ddr mode");
+        for (const auto &embInfo: mgmtEmbInfo) {
+            std::vector<std::pair<uint64_t, uint64_t>> koVec;
+            embCache->ExportDeviceKeyOffsetPairs(embInfo.name, koVec);
+            std::vector<uint64_t> swapOutPos;
+            for (const auto &p : koVec) {
+                swapOutPos.push_back(p.second);
+            }
+
+            vector <Tensor> swapTensor;
+            swapTensor.emplace_back(Vec2TensorI32(swapOutPos));
+            swapTensor.emplace_back(Tensor(tensorflow::DT_INT32, {1}));
+            auto swapOutLen = swapTensor.back().flat<int32>();
+            swapOutLen(0) = swapOutPos.size();
+            LOG_DEBUG(MGMT + "save swapOutPos size:{}", swapOutPos.size());
+            // 发送SwapOutPos信息
+            hdTransfer->Send(TransferChannel::SAVE_H2D, swapTensor, TRAIN_CHANNEL_ID, embInfo.name);
+        }
+    }
+    KEY_PROCESS_INSTANCE->LoadSaveUnlock();
+}
+
+// 这里就是新增的embedding处理线程
+void HybridMgmt::EmbeddingTask()
+{
+    for (const auto& embInfo: mgmtEmbInfo) {
+        lastUpdateFinishStepMap[embInfo.name] = 0;
+        lastLookUpFinishStepMap[embInfo.name] = 0;
+        lastSendFinishStepMap[embInfo.name] = 0;
+        lastRecvFinishStepMap[embInfo.name] = 0;
+    }
+
+    TimeCost embHDTransTC;
+    MultiThreadEmbHDTransWrap();
+    LOG_DEBUG("embHDTransTC(ms):{}", embHDTransTC.ElapsedMS());
+}
+
+void HybridMgmt::MultiThreadEmbHDTransWrap()
+{
+    for (int index = 0; index < EMBEDDING_THREAD_NUM; index++) {
+        for (const auto& embInfo: mgmtEmbInfo) {
+            CreateEmbeddingLookUpAndSendThread(index, embInfo);
+            CreateEmbeddingReceiveAndUpdateThread(index, embInfo);
+        }
+    }
+}
+
+void HybridMgmt::EmbeddingLookUpAndSendDDR(int batchId, int index, const EmbInfo& embInfo)
+{
+    int cvNotifyIndex = 0;
+    if (index + 1 != EMBEDDING_THREAD_NUM) {
+        cvNotifyIndex = index + 1;
+    }
+
+    EmbTaskInfo info = {
+        .batchId=batchId,
+        .threadIdx=index,
+        .cvNotifyIndex=cvNotifyIndex,
+        .extEmbeddingSize=embInfo.extEmbeddingSize,
+        .name=embInfo.name
+    };
+    vector<Tensor> h2dEmb;
+
+    auto isSuccess = EmbeddingLookUpDDR(info, h2dEmb);
+    if (!isSuccess) {
+        LOG_INFO("HybridMgmt is not running");
+        return;
+    }
+
+    EmbeddingSendDDR(info, h2dEmb);
+}
+
+void HybridMgmt::EmbeddingReceiveAndUpdateDDR(int batchId, int index, const EmbInfo& embInfo)
+{
+    int cvNotifyIndex = 0;
+    if (index + 1 != EMBEDDING_THREAD_NUM) {
+        cvNotifyIndex = index + 1;
+    }
+
+    EmbTaskInfo info = {
+        .batchId=batchId,
+        .threadIdx=index,
+        .cvNotifyIndex=cvNotifyIndex,
+        .extEmbeddingSize=embInfo.extEmbeddingSize,
+        .name=embInfo.name
+    };
+
+    float* ptr = nullptr;
+    vector<float*> swapOutAddrs;
+    auto isSuccess = EmbeddingReceiveDDR(info, ptr, swapOutAddrs);
+    if (!isSuccess) {
+        LOG_INFO("HybridMgmt is not running");
+        return;
+    }
+
+    EmbeddingUpdateDDR(info, ptr, swapOutAddrs);
+}
+
+void HybridMgmt::EmbeddingLookUpAndSendSSD(int batchId, int index, const EmbInfo& embInfo)
+{
+    int cvNotifyIndex = 0;
+    if (index + 1 != EMBEDDING_THREAD_NUM) {
+        cvNotifyIndex = index + 1;
+    }
+
+    EmbTaskInfo info = {
+        .batchId=batchId,
+        .threadIdx=index,
+        .cvNotifyIndex=cvNotifyIndex,
+        .extEmbeddingSize=embInfo.extEmbeddingSize,
+        .name=embInfo.name
+    };
+    vector<Tensor> h2dEmb;
+
+    auto isSuccess = EmbeddingLookUpSSD(info, h2dEmb);
+    if (!isSuccess) {
+        LOG_INFO("HybridMgmt is not running");
+        return;
+    }
+
+    EmbeddingSendSSD(info, h2dEmb);
+}
+
+void HybridMgmt::EmbeddingReceiveAndUpdateSSD(int batchId, int index, const EmbInfo& embInfo)
+{
+    int cvNotifyIndex = 0;
+    if (index + 1 != EMBEDDING_THREAD_NUM) {
+        cvNotifyIndex = index + 1;
+    }
+
+    EmbTaskInfo info = {
+        .batchId=batchId,
+        .threadIdx=index,
+        .cvNotifyIndex=cvNotifyIndex,
+        .extEmbeddingSize=embInfo.extEmbeddingSize,
+        .name=embInfo.name
+    };
+    float* ptr = nullptr;
+    vector<float*> swapOutAddrs;
+    int64_t dims0 = 0;
+    EmbeddingReceiveSSD(info, ptr, swapOutAddrs, dims0);
+
+    EmbeddingUpdateSSD(info, ptr, swapOutAddrs, dims0);
+}
+
+
+/// 构造训练所需的各种向量数据
+/// \param embName 表名
+/// \param batchId 已处理的batch数
+/// \param channelId 通道索引（训练/推理）
+/// \param remainBatchOut 是否从通道获取了数据
+/// \return 是否处理成功
+void HybridMgmt::ProcessEmbInfoSSD(const EmbBaseInfo& info, bool& remainBatchOut)
+{
+#ifndef GTEST
+    TimeCost getAndSendTensorsTC;
+    LOG_DEBUG("ProcessEmbInfoSSD table:{}, channel:{}, batchId:{}", info.name, info.channelId, info.batchId);
+
+    if (info.channelId == TRAIN_CHANNEL_ID  && info.batchId == hybridMgmtBlock->maxTrainStep) {
+        HandleReachMaxStepCase(info, remainBatchOut);
+        return;
+    }
+
+    // 只有在每次GetUniqueKeys的时候才知道上游是否已经EOS
+    // 注意GetUniqueKeys与EOS关联，需要在ProcessEmbInfoSSD最先调用，如需调整位置，请参考并适配其他函数
+    // 获取GlobalUnique向量
+    auto uniqueKeys = GetUniqueKeys(info, remainBatchOut);
+    if (uniqueKeys.empty()) {
+        return;
+    }
+
+    // 获取GlobalUnique对应的restoreVectorSec
+    auto restoreVecSec = GetRestoreVecSec(info, remainBatchOut);
+    if (restoreVecSec.empty()) {
+        return;
+    }
+
+    SendAll2AllVec(info, remainBatchOut);
+    if (!remainBatchOut) {
+        return;
+    }
+
+    SendRestoreVec(info, remainBatchOut);
+    if (!remainBatchOut) {
+        return;
+    }
+
+    std::pair<vector<uint64_t>, vector<uint64_t>> swapInKoPair;
+    std::pair<vector<uint64_t>, vector<uint64_t>> swapOutKoPair;
+    GetSwapPairsAndKey2Offset(info, uniqueKeys, swapInKoPair, swapOutKoPair);
+
+    SendLookupOffsets(info, uniqueKeys, restoreVecSec);
+
+    SendGlobalUniqueVec(info, uniqueKeys, restoreVecSec);
+
+    auto isNeedReturn = HandleSpecialProcessStatusSSD(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
+    if (isNeedReturn) {
+        return;
+    }
+
+    TimeCost swapProcessTC;
+    auto &swapInKeys = swapInKoPair.first;
+    auto &swapInPos = swapInKoPair.second;
+    auto &swapOutKeys = swapOutKoPair.first;
+    auto &swapOutPos = swapOutKoPair.second;
+
+    HandleDataSwapForSSD(info, swapInKeys, swapOutKeys);
+
+    auto lastSwapInPos = lastSwapInPosMap[info.name];
+    lastSwapInPosMap[info.name] = swapInPos; // 暂存待下一步发送
+
+    // 下发swaptensor
+    if (info.batchId != 0) {
+        SendTensorForSwap(info, lastSwapInPos, swapOutPos);
+    }
+
+    HandleEndBatchCase(info, swapInPos);
+
+    CheckLookupAddrSuccessSSD();
+
+    if (info.channelId == TRAIN_CHANNEL_ID) {
+        alreadyTrainOnce = true;
+    }
+
+    LOG_DEBUG("ProcessEmbInfoSSD end, table:{}, batchId:{}, swapProcessTC(ms):{}, getAndSendTensorsTC(ms):{}",
+              info.name, info.batchId, swapProcessTC.ElapsedMS(), getAndSendTensorsTC.ElapsedMS());
+#endif
+}
+
+void HybridMgmt::SendTensorForSwap(const EmbBaseInfo& info,
+                                   const vector<uint64_t> &swapInPosUint,
+                                   const vector<uint64_t> &swapOutPosUint)
+{
+#ifndef GTEST
+    vector<Tensor> swapTensor;
+    swapTensor.emplace_back(Vec2TensorI32(swapInPosUint));
+    swapTensor.emplace_back(Vec2TensorI32(swapOutPosUint));
+    swapTensor.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
+    auto swapInLen = swapTensor.back().flat<int32>();
+    swapInLen(0) = swapInPosUint.size();
+    swapTensor.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
+    auto swapOutLen = swapTensor.back().flat<int32>();
+    swapOutLen(0) = swapOutPosUint.size();
+
+    hdTransfer->Send(TransferChannel::SWAP, swapTensor, info.channelId, info.name, info.batchId);
+#endif
+}
+
+void HybridMgmt::InitDataPipelineForDDR(const string &embName)
+{
+    // 初始化公共队列
+    HBMSwapKeyQue[embName+SWAP_IN_STR];
+    HBMSwapKeyQue[embName+SWAP_OUT_STR];
+    tableToQueueLookup[embName+SWAP_IN_STR];
+    tableToQueueLookup[embName+SWAP_OUT_STR];
+
+    // 初始化lookup线程
+    lookUpSwapInAddrsPushId[embName];  // 此处初始化，避免多线程竞争导致计数错误
+    lookUpSwapInAddrsThreads.emplace_back(
+        std::async(std::launch::async, [=] { LookUpSwapAddrs(embName, SWAP_IN_STR); }));
+    lookUpSwapOutAddrsThreads.emplace_back(
+        std::async(std::launch::async, [=] { LookUpSwapAddrs(embName, SWAP_OUT_STR); }));
+
+    LOG_DEBUG("data pipeline for ddr init");
+}
+
+void HybridMgmt::InitDataPipelineForSSD(const string &embName, int extEmbeddingSize)
+{
+    // 初始化公共队列
+    HBMSwapKeyQue[embName+SWAP_IN_STR];
+    HBMSwapKeyQue[embName+SWAP_OUT_STR];
+    tableToQueueLookup[embName+SWAP_IN_STR];
+    tableToQueueLookup[embName+SWAP_OUT_STR];
+
+    HBMSwapKeyQue[embName + ADDR_STR];
+    SwapOut2SSDKeyQue[embName + SWAP_IN_STR];
+    SwapOut2SSDKeyQue[embName + ADDR_STR];
+    SwapOut2SSDKeyQue[embName + SWAP_OUT_STR];
+
+    DDRSwapKeyQue[embName + SWAP_OUT_STR];
+    DDRSwapKeyQue[embName + SWAP_IN_STR];
+    DDRSwapKeyForSSDQue[embName + SWAP_OUT_STR];
+    DDRSwapKeyForSSDQue[embName + SWAP_IN_STR];
+    DDRSwapAddrsQue[embName + SWAP_OUT_STR];
+    DDRSwapAddrsQue[embName + SWAP_IN_STR];
+
+    // 初始化lookup线程
+    lookUpThreads.emplace_back(
+        std::async(std::launch::async, [=] { LookUpAddrs(embName, extEmbeddingSize); }));
+    LOG_DEBUG("data pipeline for ssd init");
+}
+
+void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
+{
+    factory->SetExternalLogFuncInner(CTRLog);
+    factory->CreateEmbCacheManager(embCache);
+    EmbeddingMgmt::Instance()->SetEmbCacheForEmbTable(embCache);
+    EmbeddingMgmt::Instance()->SetHDTransferForEmbTable(hdTransfer);
+
+    for (auto embInfo: embInfos) {
+        if (isSSDEnabled) {
+            InitDataPipelineForSSD(embInfo.name, embInfo.extEmbeddingSize);
+        } else {
+            InitDataPipelineForDDR(embInfo.name);
+        }
+
+        specialProcessStatus[embInfo.name] = ProcessStatus::NORMAL;
+
+        // 初始化embedding cache
+        LOG_INFO("create cache for table:{}, hostVocabSize:{}, embSize:{}, maxCacheSize:{}",
+                 embInfo.name, embInfo.hostVocabSize, embInfo.extEmbeddingSize, embInfo.devVocabSize);
+        EmbCache::EmbCacheInfo embCacheInfo(embInfo.name, embInfo.hostVocabSize, embInfo.embeddingSize,
+                                            embInfo.extEmbeddingSize, embInfo.devVocabSize);
+        int ret = embCache->CreateCacheForTable(
+            embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, embInfo.hostVocabSize, EMBEDDING_THREAD_NUM);
+        if (ret != H_OK) {
+            throw runtime_error(embInfo.name + "create cache for table failed, error code: " + std::to_string(ret));
+        }
+    }
+}
+
+void HybridMgmt::JoinEmbeddingCacheThread()
+{
+    for (auto &p : tableToQueueLookup) {
+        p.second.DestroyQueue();
+    }
+    for (auto &p : HBMSwapKeyQue) {
+        p.second.DestroyQueue();
+    }
+    for (auto &p : SwapOut2SSDKeyQue) {
+        p.second.DestroyQueue();
+    }
+    for (auto &p : DDRSwapKeyQue) {
+        p.second.DestroyQueue();
+    }
+    for (auto &p : DDRSwapKeyForSSDQue) {
+        p.second.DestroyQueue();
+    }
+    for (auto &p : DDRSwapAddrsQue) {
+        p.second.DestroyQueue();
+    }
+    for (auto& t : EmbeddingLookUpAndSendThreadPool) {
+        t.join();
+    }
+    for (auto& t : EmbeddingReceiveAndUpdateThreadPool) {
+        t.join();
+    }
+    for (auto& t : lookUpThreads) {
+        t.wait();
+    }
+    for (auto& t : lookUpSwapInAddrsThreads) {
+        t.wait();
+    }
+    for (auto& t : lookUpSwapOutAddrsThreads) {
+        t.wait();
+    }
+}
+
+void HybridMgmt::HandleReachMaxStepCase(const EmbBaseInfo& info, bool& remainBatchOut)
+{
+    //  1. 如果没有切换过，即状态normal，就该send以结束step n-1
+    //  2. 如果切换过：
+    //     a. eval场景跑完，不用send，外面自然退出
+    //     b. save场景，能触发，说明期望的train step已经跑完（由IsTrainEndBatch判定send），当前step也不用send
+    LOG_DEBUG("table:{}, batchId:{}, ProcessStatus:{}, reach maxTrainStep",
+              info.name, info.batchId, ProcessStatus2Str(ProcessStatus::NORMAL));
+    if (specialProcessStatus[info.name] == ProcessStatus::NORMAL) {
+        LOG_DEBUG("table:{}, batchId:{}, need send swap tensor"
+                  " for last step to finish train", info.name, info.batchId);
+        std::vector<uint64_t> emptySwapOutPos;
+        SendTensorForSwap(info, lastSwapInPosMap[info.name], emptySwapOutPos);
+    } else {
+        LOG_DEBUG("table:{}, batchId:{}, switch from eval or save, unnecessary to send emptySwapOutPos",
+                  info.name, info.batchId);
+    }
+    remainBatchOut = false;
+    hybridMgmtBlock->SetBlockStatus(TRAIN_CHANNEL_ID, true);
+}
+
+void HybridMgmt::HandleEosCase(const EmbBaseInfo& info, bool &remainBatchOut)
+{
+    LOG_INFO("GetUniqueKeys get eos, handle final batch for current epoch, table:{}, channel:{}, batchId:{}",
+             info.name, info.channelId, info.batchId);
+    bool sendAllChannel = false;
+    if (info.channelId == TRAIN_CHANNEL_ID) {
+        vector<uint64_t> emptySwapOutPos;
+        SendTensorForSwap(info, lastSwapInPosMap[info.name], emptySwapOutPos);
+        LOG_INFO("GetUniqueKeys get eos, send pos for train channel, table:{}, batchId:{}", info.name, info.batchId);
+        KEY_PROCESS_INSTANCE->SendEos(info.name, info.batchId, info.channelId, sendAllChannel);
+        remainBatchOut = false;
+        return;
+    }
+
+    if (!alreadyTrainOnce) {
+        // predict场景
+        LOG_INFO("ProcessEmbInfoDDR first run in eval channel, assume as predict mode, start handle eos");
+        std::vector<uint64_t> emptySwapOutPos;
+        SendTensorForSwap(info, lastSwapInPosMap[info.name], emptySwapOutPos);
+        sendAllChannel = true;
+    } else {
+        hybridMgmtBlock->SetBlockStatus(EVAL_CHANNEL_ID, true);
+        LOG_INFO("GetUniqueKeys get eos from eval channel, SetBlockStatus=true");
+        if (hybridMgmtBlock->IsNeedWaitSave()) {
+            // train+eval+save场景
+            // 当前step n之后需要save，涉及save到train的状态切换。需要：
+            // 1. 补发pos以启动eval step n-1并完成。
+            // 2. eval step n遇到eos结束
+            // 3. 开始save，完成后唤醒train的ProcessEmbInfoDDR，所以需要在此之前改变specialProcessStatus
+            LOG_DEBUG("eval encounter eos and need save after this step"
+                      "send pos change specialProcessStatus, current status:{}, modify to status:{}",
+                      ProcessStatus2Str(specialProcessStatus[info.name]),
+                      ProcessStatus2Str(ProcessStatus::AFTER_SWITCH_FIRST_BATCH));
+            vector<uint64_t> emptySwapOutPos;
+            SendTensorForSwap(info, lastSwapInPosMap[info.name], emptySwapOutPos);
+            specialProcessStatus[info.name] = ProcessStatus::AFTER_SWITCH_FIRST_BATCH;
+        } else {
+            // train+eval+train场景
+            // 交给train的ProcessEmbInfoDDR启动最后n-1步eval
+            // train发送pos让eval step n-1跑完，到eval step n时各channel遇到eos后结束（train、eval共享的channel除外）
+            LOG_INFO("GetUniqueKeys get eos, skip send pos for eval channel, table:{}, batchId:{}",
+                     info.name, info.batchId);
+        }
+    }
+    KEY_PROCESS_INSTANCE->SendEos(info.name, info.batchId, info.channelId, sendAllChannel);
+    remainBatchOut = false;
+}
+
+bool HybridMgmt::EmbeddingReceiveDDR(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs)
+{
+    std::unique_lock<std::mutex> lastRecvFinishLocker(lastRecvFinishMutexMap[info.name][info.threadIdx]);
+    cvLastRecvFinishMap[info.name][info.threadIdx].wait(lastRecvFinishLocker, [info, this] {
+        return (lastRecvFinishStepMap[info.name] == info.batchId) || mutexDestroy;
+    });
+    if (!isRunning) {
+        return false;
+    }
+    TimeCost EmbeddingRecvTC = TimeCost();
+
+    swapOutAddrs = tableToQueueLookup[info.name+SWAP_OUT_STR].WaitAndPop();
+    if (!isRunning) {
+        return false;
+    }
+    // 等待图执行发送d2h embedding过来
+    if (info.batchId != 0) {
+        TransferChannel transferName = TransferChannel::D2H;
+        auto size = hdTransfer->RecvAcl(transferName, TRAIN_CHANNEL_ID, info.name, info.threadIdx, info.batchId);
+        if (size == 0) {
+            LOG_WARN(HOSTEMB + "recv empty data");
+            return false;
+        }
+
+        auto aclData = acltdtGetDataItem(hdTransfer->aclDatasets[info.name][info.threadIdx], 0);
+        if (aclData == nullptr) {
+            throw runtime_error("Acl get tensor data from dataset failed.");
+        }
+        ptr = reinterpret_cast<float *>(acltdtGetDataAddrFromItem(aclData));
+
+        // 判断拿到的embedding个数是否与swapOutKeys个数相等
+        size_t dimNum = acltdtGetDimNumFromItem(aclData);
+        int64_t dims[dimNum];
+        acltdtGetDimsFromItem(aclData, dims, dimNum);
+
+        LOG_DEBUG("table:{}, batchId:{}, dims[0]:{}, swapOutAddrs size:{}",
+                  info.name, info.batchId, dims[0], swapOutAddrs.size());
+
+        if (dims[0] != static_cast<int64_t>(swapOutAddrs.size())) {
+            throw runtime_error("data dims[0] != swapOutKeys.size()");
+        }
+    }
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingRecvTC(ms):{}",
+              info.name, info.batchId, info.threadIdx, EmbeddingRecvTC.ElapsedMS());
+    lastRecvFinishStepMap[info.name]++;
+    cvLastRecvFinishMap[info.name][info.cvNotifyIndex].notify_all();
+
+    return true;
+}
+
+void HybridMgmt::EmbeddingUpdateDDR(const EmbTaskInfo& info, const float* embPtr, vector<float*>& swapOutAddrs)
+{
+    std::unique_lock<std::mutex> lastUpdateFinishLocker(lastUpdateFinishMutexMap[info.name][info.threadIdx]);
+    cvLastUpdateFinishMap[info.name][info.threadIdx].wait(lastUpdateFinishLocker, [info, this] {
+        return (lastUpdateFinishStepMap[info.name] == info.batchId) || mutexDestroy;
+    });
+    TimeCost EmbeddingUpdateTC = TimeCost();
+
+    uint64_t memSize = info.extEmbeddingSize * sizeof(float);
+    uint64_t extEmbeddingSize = info.extEmbeddingSize;
+# pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
+                shared(swapOutAddrs, embPtr, extEmbeddingSize, memSize)
+    for (uint64_t i = 0; i < swapOutAddrs.size(); i++) {
+        auto rc = memcpy_s(swapOutAddrs[i], memSize, embPtr + i * extEmbeddingSize, memSize);
+        if (rc != 0) {
+            throw runtime_error("memcpy_s failed, error code:" + to_string(rc));
+        }
+    }
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingUpdateTC(ms):{}",
+              info.name, info.batchId, info.threadIdx, EmbeddingUpdateTC.ElapsedMS());
+
+    lastUpdateFinishStepMap[info.name]++;
+    cvLastUpdateFinishMap[info.name][info.cvNotifyIndex].notify_all();
+}
+
+bool HybridMgmt::EmbeddingLookUpDDR(const EmbTaskInfo &info, vector<Tensor>& h2dEmb)
+{
+    std::unique_lock<std::mutex> lastUpdateFinishLocker(lastUpdateFinishMutexMap[info.name][info.threadIdx]);
+    cvLastUpdateFinishMap[info.name][info.threadIdx].wait(lastUpdateFinishLocker, [info, this] {
+        return (lastUpdateFinishStepMap[info.name] >= info.batchId) || mutexDestroy;
+    });
+    if (!isRunning) {
+        return false;
+    }
+
+    std::unique_lock<std::mutex> lastLookUpFinishLocker(lastLookUpFinishMutexMap[info.name][info.threadIdx]);
+    cvLastLookUpFinishMap[info.name][info.threadIdx].wait(lastLookUpFinishLocker, [info, this] {
+        return (lastLookUpFinishStepMap[info.name] == info.batchId) || mutexDestroy;
+    });
+    if (!isRunning) {
+        return false;
+    }
+
+    bool isSuccess = BuildH2DEmbedding(info, h2dEmb);
+    if (!isSuccess) {
+        return false;
+    }
+
+    lastLookUpFinishStepMap[info.name]++;
+    cvLastLookUpFinishMap[info.name][info.cvNotifyIndex].notify_all();
+
+    return true;
+}
+
+void HybridMgmt::EmbeddingSendDDR(const EmbTaskInfo &info, vector<Tensor>& h2dEmb)
+{
+    std::unique_lock<std::mutex> lastSendFinishLocker(lastSendFinishMutexMap[info.name][info.threadIdx]);
+    cvLastSendFinishMap[info.name][info.threadIdx].wait(lastSendFinishLocker, [info, this] {
+        return (lastSendFinishStepMap[info.name] == info.batchId) || mutexDestroy;
+    });
+    TimeCost SendTC = TimeCost();
+    hdTransfer->Send(TransferChannel::H2D, h2dEmb, TRAIN_CHANNEL_ID, info.name, info.batchId);
+    lastSendFinishStepMap[info.name]++;
+    cvLastSendFinishMap[info.name][info.cvNotifyIndex].notify_all();
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, SendH2DEmbTC(ms):{}",
+              info.name, info.batchId, info.threadIdx, SendTC.ElapsedMS());
+
+    // 对于end of sequence场景，key process需要基于h2dNextBatchId等待每个table都完成了最后1个step发送，才能发EOS至各channel
+    hybridMgmtBlock->h2dNextBatchId[info.name]++;
+    LOG_DEBUG("h2dNextBatchId, table:{}, next batchId:{}", info.name, hybridMgmtBlock->h2dNextBatchId[info.name]);
+}
+
+void HybridMgmt::CreateEmbeddingLookUpAndSendThread(int index, const EmbInfo& embInfo)
+{
+    EmbeddingLookUpAndSendThreadPool.emplace_back([index, embInfo, this]() {
+        while (true) {
+            lookUpAndSendBatchIdMtx.lock();
+            if (lookUpAndSendTableBatchMap[embInfo.name] % EMBEDDING_THREAD_NUM == index) {
+                int cur_batch_id = lookUpAndSendTableBatchMap[embInfo.name];
+                lookUpAndSendTableBatchMap[embInfo.name]++;
+                lookUpAndSendBatchIdMtx.unlock();
+                if (!isSSDEnabled) {
+                    EmbeddingLookUpAndSendDDR(cur_batch_id, index, embInfo);
+                } else {
+                    EmbeddingLookUpAndSendSSD(cur_batch_id, index, embInfo);
+                }
+            } else {
+                lookUpAndSendBatchIdMtx.unlock();
+            }
+            if (!isRunning) {
+                return;
+            }
+        }
+    });
+}
+
+void HybridMgmt::CreateEmbeddingReceiveAndUpdateThread(int index, const EmbInfo& embInfo)
+{
+    EmbeddingReceiveAndUpdateThreadPool.emplace_back([index, embInfo, this]() {
+        while (true) {
+            receiveAndUpdateBatchIdMtx.lock();
+            if (receiveAndUpdateTableBatchMap[embInfo.name] % EMBEDDING_THREAD_NUM == index) {
+                int cur_batch_id = receiveAndUpdateTableBatchMap[embInfo.name];
+                receiveAndUpdateTableBatchMap[embInfo.name]++;
+                receiveAndUpdateBatchIdMtx.unlock();
+                if (!isSSDEnabled) {
+                    EmbeddingReceiveAndUpdateDDR(cur_batch_id, index, embInfo);
+                } else {
+                    EmbeddingReceiveAndUpdateSSD(cur_batch_id, index, embInfo);
+                }
+            } else {
+                receiveAndUpdateBatchIdMtx.unlock();
+            }
+            if (!isRunning) {
+                return;
+            }
+        }
+    });
+}
+
+bool HybridMgmt::EmbeddingReceiveSSD(const EmbTaskInfo &info, float *&ptr,
+                                     vector<float *> &swapOutAddrs, int64_t& dims0)
+{
+    std::unique_lock<std::mutex> lastRecvFinishLocker(lastRecvFinishMutexMap[info.name][info.threadIdx]);
+    cvLastRecvFinishMap[info.name][info.threadIdx].wait(lastRecvFinishLocker, [info, this] {
+        return (lastRecvFinishStepMap[info.name] == info.batchId) || mutexDestroy;
+    });
+    if (!isRunning) {
+        return false;
+    }
+    TimeCost EmbeddingRecvTC = TimeCost();
+    // finish时会pop空vector，因此需要额外判定isRunning
+    swapOutAddrs = tableToQueueLookup[info.name+SWAP_OUT_STR].WaitAndPop();
+    if (!isRunning) {
+        return false;
+    }
+    // 等待图执行发送d2h embedding过来
+    if (info.batchId != 0) {
+        TransferChannel transferName = TransferChannel::D2H;
+        auto size = hdTransfer->RecvAcl(transferName, TRAIN_CHANNEL_ID, info.name, info.threadIdx, info.batchId);
+        if (size == 0) {
+            LOG_WARN(HOSTEMB + "recv empty data");
+            return false;
+        }
+
+        auto aclData = acltdtGetDataItem(hdTransfer->aclDatasets[info.name][info.threadIdx], 0);
+        if (aclData == nullptr) {
+            throw runtime_error("Acl get tensor data from dataset failed.");
+        }
+        ptr = reinterpret_cast<float *>(acltdtGetDataAddrFromItem(aclData));
+
+        // 判断拿到的embedding个数是否与swapOutKeys个数相等
+        size_t dimNum = acltdtGetDimNumFromItem(aclData);
+        int64_t dims[dimNum];
+        acltdtGetDimsFromItem(aclData, dims, dimNum);
+
+        LOG_DEBUG("table:{}, batchId:{}, recv d2h, dims[0]:{}, swapOutAddrs.size:{}",
+                  info.name, info.batchId, dims[0], swapOutAddrs.size());
+        dims0 = dims[0];
+    }
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingRecvTC(ms):{}",
+              info.name.c_str(), info.batchId, info.threadIdx, EmbeddingRecvTC.ElapsedMS());
+    lastRecvFinishStepMap[info.name]++;
+    cvLastRecvFinishMap[info.name][info.cvNotifyIndex].notify_all();
+    return true;
+}
+
+void HybridMgmt::EmbeddingUpdateSSD(const EmbTaskInfo& info, float *embPtr,
+                                    vector<float *>& swapOutAddrs, int64_t& dims0)
+{
+    std::unique_lock<std::mutex> lastUpdateFinishLocker(lastUpdateFinishMutexMap[info.name][info.threadIdx]);
+    cvLastUpdateFinishMap[info.name][info.threadIdx].wait(lastUpdateFinishLocker, [info, this] {
+        return (lastUpdateFinishStepMap[info.name] == info.batchId) || mutexDestroy;
+    });
+
+    TimeCost EmbeddingUpdateTC = TimeCost();
+    std::vector<uint64_t> swapOutDDRAddrOffs = HBMSwapKeyQue[info.name + ADDR_STR].WaitAndPop();
+    if (!isRunning) {
+        return;
+    }
+    uint64_t memSize = info.extEmbeddingSize * sizeof(float);
+    uint64_t extEmbeddingSize = info.extEmbeddingSize;
+    // DDR更新
+# pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
+                shared(swapOutAddrs, swapOutDDRAddrOffs, embPtr, extEmbeddingSize, memSize)
+    for (uint64_t i = 0; i < swapOutAddrs.size(); i++) {
+        auto rc = memcpy_s(swapOutAddrs[i], memSize, embPtr + swapOutDDRAddrOffs[i] * extEmbeddingSize, memSize);
+        if (rc != 0) {
+            throw runtime_error("memcpy_s failed, error code:" + to_string(rc));
+        }
+    }
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingUpdateTC(ms):{}",
+              info.name.c_str(), info.batchId, info.threadIdx, EmbeddingUpdateTC.ElapsedMS());
+
+    // SSD更新
+    TimeCost SSDUpdateTC = TimeCost();
+    std::vector<uint64_t> swapOutSSDAddrOffs = SwapOut2SSDKeyQue[info.name + ADDR_STR].WaitAndPop();
+    std::vector<uint64_t> swapOutSSDKeys = SwapOut2SSDKeyQue[info.name + SWAP_OUT_STR].WaitAndPop();
+    if (!isRunning) {
+        return;
+    }
+
+    if (dims0 != static_cast<int64_t>(swapOutAddrs.size() + swapOutSSDKeys.size())) {
+        throw runtime_error("data dims[0] != swapOutKeys.size");
+    }
+    cacheManager->UpdateSSDEmb(info.name, embPtr, extEmbeddingSize, swapOutSSDKeys, swapOutSSDAddrOffs);
+    LOG_DEBUG("table:{}, batchId:{}, thread{}, SSDUpdateTC(ms):{}",
+              info.name.c_str(), info.batchId, info.threadIdx, SSDUpdateTC.ElapsedMS());
+
+    lastUpdateFinishStepMap[info.name]++;
+    cvLastUpdateFinishMap[info.name][info.cvNotifyIndex].notify_all();
+}
+
+bool HybridMgmt::EmbeddingLookUpSSD(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
+{
+    std::unique_lock<std::mutex> lastUpdateFinishLocker(lastUpdateFinishMutexMap[info.name][info.threadIdx]);
+    cvLastUpdateFinishMap[info.name][info.threadIdx].wait(lastUpdateFinishLocker, [info, this] {
+        return (lastUpdateFinishStepMap[info.name] >= info.batchId) || mutexDestroy;
+    });
+    if (!isRunning) {
+        return false;
+    }
+
+    std::unique_lock<std::mutex> lastLookUpFinishLocker(lastLookUpFinishMutexMap[info.name][info.threadIdx]);
+    cvLastLookUpFinishMap[info.name][info.threadIdx].wait(lastLookUpFinishLocker, [info, this] {
+        return (lastLookUpFinishStepMap[info.name] == info.batchId) || mutexDestroy;
+    });
+    if (!isRunning) {
+        return false;
+    }
+
+    TimeCost transferDDR2SSDTC = TimeCost();
+    // DDR腾空间
+    std::vector<uint64_t> DDR2SSDKeys = DDRSwapKeyForSSDQue[info.name + SWAP_OUT_STR].WaitAndPop();
+    std::vector<float*> DDR2SSDAddrs = DDRSwapAddrsQue[info.name + SWAP_OUT_STR].WaitAndPop();
+    if (!isRunning) {
+        return false;
+    }
+    cacheManager->TransferDDR2SSD(info.name, info.extEmbeddingSize, DDR2SSDKeys, DDR2SSDAddrs);
+    LOG_DEBUG("table:{}, thread:{}, transferDDR2SSDTC(ms):{}",
+              info.name.c_str(), info.threadIdx, transferDDR2SSDTC.ElapsedMS());
+
+    TimeCost fetchSSDEmb2DDRTC = TimeCost();
+    // swapInKeys中在SSD的到DDR
+    std::vector<uint64_t> SSD2DDRKeys = DDRSwapKeyForSSDQue[info.name + SWAP_IN_STR].WaitAndPop();
+    std::vector<float*> SSD2DDRAddrs = DDRSwapAddrsQue[info.name + SWAP_IN_STR].WaitAndPop();
+    if (!isRunning) {
+        return false;
+    }
+    cacheManager->FetchSSDEmb2DDR(info.name, info.extEmbeddingSize, SSD2DDRKeys, SSD2DDRAddrs);
+    LOG_DEBUG("table:{}, thread:{}, fetchSSDEmb2DDRTC(ms):{}",
+              info.name.c_str(), info.threadIdx, fetchSSDEmb2DDRTC.ElapsedMS());
+
+    bool isSuccess = BuildH2DEmbedding(info, h2dEmb);
+    if (!isSuccess) {
+        return false;
+    }
+
+    lastLookUpFinishStepMap[info.name]++;
+    cvLastLookUpFinishMap[info.name][info.cvNotifyIndex].notify_all();
+
+    return true;
+}
+
+void HybridMgmt::EmbeddingSendSSD(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
+{
+    std::unique_lock<std::mutex> lastSendFinishLocker(lastSendFinishMutexMap[info.name][info.threadIdx]);
+    cvLastSendFinishMap[info.name][info.threadIdx].wait(lastSendFinishLocker, [info, this] {
+        return (lastSendFinishStepMap[info.name] == info.batchId) || mutexDestroy;
+    });
+    TimeCost SendTC = TimeCost();
+    hdTransfer->Send(TransferChannel::H2D, h2dEmb, TRAIN_CHANNEL_ID, info.name, info.batchId);
+    lastSendFinishStepMap[info.name]++;
+    cvLastSendFinishMap[info.name][info.cvNotifyIndex].notify_all();
+    LOG_DEBUG("table:{}, thread:{}, SendH2DEmbTC(ms):{}", info.name.c_str(), info.threadIdx, SendTC.ElapsedMS());
+
+    // 对于end of sequence场景，key process需要基于h2dNextBatchId等待每个table都完成了最后1个step发送，才能发EOS至各channel
+    hybridMgmtBlock->h2dNextBatchId[info.name]++;
+    LOG_DEBUG("h2dNextBatchId, table:{}, next batchId:{}", info.name, hybridMgmtBlock->h2dNextBatchId[info.name]);
+}
+
+void HybridMgmt::HandleEosCaseHBM(const string &embName, int batchId, int channelId, bool &remainBatchOut)
+{
+    bool sendAllChannel = false;
+    if (channelId == EVAL_CHANNEL_ID) {
+        if (!alreadyTrainOnce) {
+            // predict场景
+            sendAllChannel = true;
+        } else {
+            // train+eval场景
+            hybridMgmtBlock->SetBlockStatus(EVAL_CHANNEL_ID, true);
+            LOG_INFO("GetUniqueKeys get eos from eval channel, SetBlockStatus=true");
+        }
+    }
+    KEY_PROCESS_INSTANCE->SendEos(embName, batchId, channelId, sendAllChannel);
+    remainBatchOut = false;
+}
+
+void HybridMgmt::HandleEndBatchCase(const EmbBaseInfo& info, vector<uint64_t>& swapInPos)
+{
+    if ((info.channelId == TRAIN_CHANNEL_ID) && IsTrainEndBatch(info.batchId)) {
+        // 如果是train epoch最后一个batch，补发emptySwapOutPos以启动当前step
+        std::vector<uint64_t> emptySwapOutPos;
+        SendTensorForSwap(info, swapInPos, emptySwapOutPos);
+        specialProcessStatus[info.name] = ProcessStatus::AFTER_SWITCH_FIRST_BATCH;
+        LOG_DEBUG("handle last end batch for current epoch, table:{}, batchId:{}", info.name, info.batchId);
+        return;
+    }
+
+    if (info.channelId == EVAL_CHANNEL_ID && IsEvalEndBatch(info.batchId)) {
+        // 当前step之后eval结束，需要设置处理状态
+        // 因为eval、predict最后1个batch之后不会像train那样再往后跑，所以必须放这里补发
+        LOG_DEBUG("reach max eval step, send emptySwapOutPos tensor for last step to finish eval, "
+                  "change ProcessStatus to {}, table:{}, batchId:{}",
+                  ProcessStatus2Str(ProcessStatus::AFTER_SWITCH_FIRST_BATCH), info.name, info.batchId);
+        std::vector<uint64_t> emptySwapOutPos;
+        SendTensorForSwap(info, lastSwapInPosMap[info.name], emptySwapOutPos);
+        specialProcessStatus[info.name] = ProcessStatus::AFTER_SWITCH_FIRST_BATCH;
+    }
+}
+
+void HybridMgmt::HandleFirstBatchCaseDDR(const EmbBaseInfo& info,
+                                         pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                         pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
+{
+    TimeCost swapProcessTC;
+    auto &swapInKeys = swapInKoPair.first;
+    auto &swapInPos = swapInKoPair.second;
+    auto &swapOutKeys = swapOutKoPair.first;
+    auto &swapOutPos = swapOutKoPair.second;
+
+    vector<uint64_t> emptySwapOutKeys;
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
+              info.name, info.batchId, info.channelId, swapInKoPair.first.size(), emptySwapOutKeys.size());
+    trainTestSwitchInfoStore[info.name] = {swapOutKeys, swapOutPos};
+
+    LOG_DEBUG("handle first batch case, delay sending swapInPos, table:{}", info.name);
+    LOG_DEBUG("enqueue HBMSwapKeyQue table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
+              info.name, info.batchId, info.channelId, swapInKeys.size(), emptySwapOutKeys.size());
+    HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutKeys);
+    HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKeys);
+}
+
+void HybridMgmt::HandleFirstBatchCaseSSD(const EmbBaseInfo& info,
+                                         std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                         std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
+{
+    // 发现train、save、eval切换，先保存状态，发emptySwapOutKeys以对应上一步的emptySwapOutPos
+    vector<uint64_t> emptySwapOutKeys;
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
+              info.name, info.batchId, info.channelId, swapInKoPair.first.size(), emptySwapOutKeys.size());
+    trainTestSwitchInfoStore[info.name] = {swapOutKoPair.first, swapOutKoPair.second};
+
+    TimeCost ProcessSwapInKeysTC = TimeCost();
+    vector<emb_cache_key_t> SSDToDDRKeys;
+    vector<emb_cache_key_t> DDRToSSDKeys;
+    cacheManager->ProcessSwapInKeys(info.name, swapInKoPair.first, DDRToSSDKeys, SSDToDDRKeys);
+    LOG_DEBUG("ProcessSwapInKeysTC(ms):{} ", ProcessSwapInKeysTC.ElapsedMS());
+
+    vector<uint64_t> emptySwapOutDDRKeys;
+    vector<uint64_t> emptySwapOutDDRAddrOffs;
+    vector<uint64_t> emptySwapOutSSDKeys;
+    vector<uint64_t> emptySwapOutSSDAddrOff;
+
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
+              info.name, info.batchId, info.channelId, swapInKoPair.first.size(), swapOutKoPair.first.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapOutDDRKeys.size:{}, swapOutDDRAddrOffs.size:{}, "
+              "swapOutSSDKeys.size:{}, swapOutSSDAddrOff.size:{}",
+              info.name, info.batchId, info.channelId, emptySwapOutDDRKeys.size(), emptySwapOutDDRAddrOffs.size(),
+              emptySwapOutSSDKeys.size(), emptySwapOutSSDAddrOff.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToSSDKeys.size:{}, SSDToDDRKeys.size:{}",
+              info.name, info.batchId, info.channelId, DDRToSSDKeys.size(), SSDToDDRKeys.size());
+
+    auto DDRToSSDKeysForSSD = DDRToSSDKeys;
+    auto SSDToDDRKeysForSSD = SSDToDDRKeys;
+    // DDR<->SSD
+    DDRSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(DDRToSSDKeys);
+    DDRSwapKeyQue[info.name + SWAP_IN_STR].Pushv(SSDToDDRKeys);
+
+    DDRSwapKeyForSSDQue[info.name + SWAP_OUT_STR].Pushv(DDRToSSDKeysForSSD);
+    DDRSwapKeyForSSDQue[info.name + SWAP_IN_STR].Pushv(SSDToDDRKeysForSSD);
+
+    // HBM<->DDR
+    HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutDDRKeys);
+    HBMSwapKeyQue[info.name + ADDR_STR].Pushv(emptySwapOutDDRAddrOffs);
+    HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKoPair.first);
+
+    // HBM->SSD
+    SwapOut2SSDKeyQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutSSDKeys);
+    SwapOut2SSDKeyQue[info.name + ADDR_STR].Pushv(emptySwapOutSSDAddrOff);
+}
+
+void HybridMgmt::HandleDataSwapForSSD(const EmbBaseInfo& info,
+                                      vector<uint64_t> &swapInKeys, vector<uint64_t> &swapOutKeys)
+{
+    TimeCost ProcessSwapInKeysTC;
+    vector<emb_cache_key_t> SSDToDDRKeys;
+    vector<emb_cache_key_t> DDRToSSDKeys;
+    cacheManager->ProcessSwapInKeys(info.name, swapInKeys, DDRToSSDKeys, SSDToDDRKeys);
+    LOG_DEBUG("ProcessSwapInKeysTC(ms):{} ", ProcessSwapInKeysTC.ElapsedMS());
+
+    TimeCost ProcessSwapOutKeysTC;
+    SwapOutInfo swapInfo;
+    cacheManager->ProcessSwapOutKeys(info.name, swapOutKeys, swapInfo);
+    LOG_DEBUG("ProcessSwapOutKeysTC(ms):{} ", ProcessSwapOutKeysTC.ElapsedMS());
+
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
+              info.name, info.batchId, info.channelId, swapInKeys.size(), swapOutKeys.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapOutDDRKeys:{}, swapOutDDRAddrOffs:{}, "
+              "swapOutSSDKeys:{}, swapOutSSDAddrOff:{}",
+              info.name, info.batchId, info.channelId, swapInfo.swapOutDDRKeys.size(),
+              swapInfo.swapOutDDRAddrOffs.size(), swapInfo.swapOutSSDKeys.size(), swapInfo.swapOutSSDAddrOffs.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToSSDKeys:{}, SSDToDDRKeys:{}",
+              info.name, info.batchId, info.channelId, DDRToSSDKeys.size(), SSDToDDRKeys.size());
+
+    auto DDRToSSDKeysForSSD = DDRToSSDKeys;
+    auto SSDToDDRKeysForSSD = SSDToDDRKeys;
+    // DDR<->SSD
+    DDRSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(DDRToSSDKeys);
+    DDRSwapKeyQue[info.name + SWAP_IN_STR].Pushv(SSDToDDRKeys);
+
+    DDRSwapKeyForSSDQue[info.name + SWAP_OUT_STR].Pushv(DDRToSSDKeysForSSD);
+    DDRSwapKeyForSSDQue[info.name + SWAP_IN_STR].Pushv(SSDToDDRKeysForSSD);
+
+    // HBM<->DDR
+    HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(swapInfo.swapOutDDRKeys);
+    HBMSwapKeyQue[info.name + ADDR_STR].Pushv(swapInfo.swapOutDDRAddrOffs);
+    HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKeys);
+
+    // HBM->SSD
+    SwapOut2SSDKeyQue[info.name + SWAP_OUT_STR].Pushv(swapInfo.swapOutSSDKeys);
+    SwapOut2SSDKeyQue[info.name + ADDR_STR].Pushv(swapInfo.swapOutSSDAddrOffs);
+}
+
+bool HybridMgmt::BuildH2DEmbedding(const EmbTaskInfo &info, vector<Tensor> &h2dEmb)
+{
+    std::vector<float*> swapInAddrs = tableToQueueLookup[info.name+SWAP_IN_STR].WaitAndPop();
+    if (!isRunning) {
+        return false;
+    }
+    h2dEmb.emplace_back(Tensor(tensorflow::DT_FLOAT, {
+        int(swapInAddrs.size()), static_cast<long long>(info.extEmbeddingSize)
+    }));
+    auto &tmpTensor = h2dEmb.back();
+    float *h2dEmbAddr = tmpTensor.flat<float>().data();
+    TimeCost embeddingLookupTC = TimeCost();
+
+    uint64_t memSize = info.extEmbeddingSize * sizeof(float);
+# pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
+                shared(swapInAddrs, h2dEmbAddr, info, memSize)
+    for (uint64_t i = 0; i < swapInAddrs.size(); i++) {
+        auto rc = memcpy_s(h2dEmbAddr + i * info.extEmbeddingSize, memSize, swapInAddrs[i], memSize);
+        if (rc != 0) {
+            throw runtime_error("memcpy_s failed, error code:" + to_string(rc));
+        }
+    }
+    LOG_DEBUG("table:{}, thread:{}, embeddingLookupTC(ms):{}",
+              info.name.c_str(), info.threadIdx, embeddingLookupTC.ElapsedMS());
+    return true;
+}
+
+vector<uint64_t> HybridMgmt::GetUniqueKeys(const EmbBaseInfo &info, bool &remainBatchOut)
+{
+    bool isEos = false;
+    auto uniqueKeys = KEY_PROCESS_INSTANCE->GetUniqueKeys(info, isEos, lookUpSwapInAddrsPushId);
+    if (isEos) {
+        HandleEosCase(info, remainBatchOut);
+        return uniqueKeys;
+    }
+    if (uniqueKeys.empty()) {
+        remainBatchOut = false;
+        LOG_WARN("table:{}, channelId:{} batchId:{}, UniqueKeys result is empty",
+                 info.name, info.channelId, info.batchId);
+        return uniqueKeys;
+    }
+
+    if (info.channelId == TRAIN_CHANNEL_ID) {
+        TimeCost KeyMaintainTC;
+        trainKeysSet[info.name].insert(uniqueKeys.begin(), uniqueKeys.end());
+        LOG_DEBUG("table:{}, batchId:{}, KeyMaintainTC(ms):{}", info.name, info.batchId, KeyMaintainTC.ElapsedMS());
+    } else {
+        for (auto &key : uniqueKeys) {
+            if (trainKeysSet[info.name].find(key) == trainKeysSet[info.name].end()) {
+                key = INVALID_KEY_VALUE;
+                LOG_TRACE("find key not train before, set as invalid key");
+            }
+        }
+    }
+
+    LOG_DEBUG("table:{}, channelId:{} batchId:{}, GetUniqueKeys end", info.name, info.channelId, info.batchId);
+    return uniqueKeys;
+}
+
+vector<int32_t> HybridMgmt::GetRestoreVecSec(const EmbBaseInfo &info, bool &remainBatchOut)
+{
+    auto restoreVecSec = KEY_PROCESS_INSTANCE->GetRestoreVecSec(info);
+    if (restoreVecSec.empty()) {
+        remainBatchOut = false;
+        LOG_WARN("table:{}, channelId:{} batchId:{}, restoreVecSec result is empty",
+                 info.name, info.channelId, info.batchId);
+        return restoreVecSec;
+    }
+    LOG_DEBUG("table:{}, channelId:{} batchId:{}, GetRestoreVecSec end", info.name, info.channelId, info.batchId);
+    return restoreVecSec;
+}
+
+void HybridMgmt::SendAll2AllVec(const EmbBaseInfo &info, bool &remainBatchOut)
+{
+    if (!mgmtRankInfo.useStatic) {
+        bool isEos = false;  // useless, adapt to HBM mode
+        TimeCost getAll2AllTC;
+        unique_ptr<vector<Tensor>> all2all = KEY_PROCESS_INSTANCE->GetInfoVec(
+            info, ProcessedInfo::ALL2ALL, isEos);
+        LOG_DEBUG("table:{}, channelId:{}, batchId:{}, GetInfoVec all2all end, GetAll2AllTC(ms):{}",
+                  info.name, info.channelId, info.batchId, getAll2AllTC.ElapsedMS());
+        if (all2all == nullptr) {
+            remainBatchOut = false;
+            LOG_WARN("Information vector is nullptr!");
+            return;
+        }
+        TimeCost sendAll2AllTC;
+        hdTransfer->Send(TransferChannel::ALL2ALL, *all2all, info.channelId, info.name);
+        LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send all2all end, sendAll2AllTC(ms):{}",
+                  info.name, info.channelId, info.batchId, sendAll2AllTC.ElapsedMS());
+    }
+}
+
+void HybridMgmt::SendRestoreVec(const EmbBaseInfo &info, bool &remainBatchOut)
+{
+    bool isEos = false;  // useless, adapt to HBM mode
+    TimeCost getRestoreTC;
+    unique_ptr<vector<Tensor>> infoVecs = KEY_PROCESS_INSTANCE->GetInfoVec(
+        info, ProcessedInfo::RESTORE, isEos);
+    if (infoVecs == nullptr) {
+        remainBatchOut = false;
+        LOG_ERROR("Information vector is nullptr!");
+        return;
+    }
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, get restore end, getRestoreTC(ms):{}",
+              info.name, info.channelId, info.batchId, getRestoreTC.ElapsedMS());
+
+    TimeCost sendRestoreSyncTC;
+    hdTransfer->Send(TransferChannel::RESTORE, *infoVecs, info.channelId, info.name);
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send restore end, sendRestoreSyncTC(ms):{}",
+              info.name, info.channelId, info.batchId, sendRestoreSyncTC.ElapsedMS());
+}
+
+void HybridMgmt::SendLookupOffsets(const EmbBaseInfo &info,
+                                   vector<uint64_t> &uniqueKeys, vector<int32_t> &restoreVecSec)
+{
+    TimeCost sendLookupOffsetsTC;
+    std::vector<uint64_t> lookupOffsets;
+    for (const auto &index : restoreVecSec) {
+        lookupOffsets.emplace_back(uniqueKeys[index]);
+    }
+    hdTransfer->Send(TransferChannel::LOOKUP, { Vec2TensorI32(lookupOffsets) }, info.channelId, info.name);
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send lookupOffset, sendLookupOffsetsTC(ms):{}",
+              info.name, info.channelId, info.batchId, sendLookupOffsetsTC.ElapsedMS());
+}
+
+void HybridMgmt::SendGlobalUniqueVec(const EmbBaseInfo &info,
+                                     vector<uint64_t> &uniqueKeys, vector<int32_t> &restoreVecSec)
+{
+    if (!(info.channelId == TRAIN_CHANNEL_ID && mgmtRankInfo.useSumSameIdGradients)) {
+        return;
+    }
+    TimeCost sendUniqueKeysSyncTC;
+    hdTransfer->Send(TransferChannel::UNIQKEYS, {mgmtRankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys) :
+                                                 Vec2TensorI32(uniqueKeys) }, info.channelId, info.name);
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, sendUniqueKeysSyncTC(ms):{}",
+              info.name, info.channelId, info.batchId, sendUniqueKeysSyncTC.ElapsedMS());
+
+    TimeCost sendRestoreVecSecSyncTC;
+    hdTransfer->Send(TransferChannel::RESTORE_SECOND, {Vec2TensorI32(restoreVecSec) }, info.channelId, info.name);
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, sendRestoreVecSecSyncTC(ms):{}",
+              info.name, info.channelId, info.batchId, sendRestoreVecSecSyncTC.ElapsedMS());
+}
+
+bool HybridMgmt::HandleSpecialProcessStatusDDR(const EmbBaseInfo &info, TimeCost& getAndSendTensorsTC,
+                                               pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
+                                               pair<vector<uint64_t>, vector<uint64_t>> &swapOutKoPair)
+{
+    TimeCost swapProcessTC;
+    auto &swapInPos = swapInKoPair.second;
+    auto &swapOutKeys = swapOutKoPair.first;
+    auto &swapOutPos = swapOutKoPair.second;
+
+    if (specialProcessStatus[info.name] == ProcessStatus::AFTER_SWITCH_FIRST_BATCH) {
+        // 发现train、save、eval切换，先保存状态，发emptySwapOutKeys以对应上一步的emptySwapOutPos
+        HandleFirstBatchCaseDDR(info, swapInKoPair, swapOutKoPair);
+        LOG_DEBUG("handle channel switch case:afterSwitchFirstBatch, table:{}, channelId:{}, batchId:{}",
+                  info.name, info.channelId, info.batchId);
+
+        if (mgmtRankInfo.ctrlSteps[info.channelId] == 1) {
+            vector<uint64_t> emptySwapOutPos;
+            SendTensorForSwap(info, swapInPos, emptySwapOutPos);
+            LOG_DEBUG("ProcessEmbInfoDDR special case, user only run one step, table:{}, channelId:{}, batchId:{}",
+                      info.name, info.channelId, info.batchId);
+            return true;
+        }
+
+        specialProcessStatus[info.name] = ProcessStatus::AFTER_SWITCH_SECOND_BATCH;
+        LOG_DEBUG("ProcessEmbInfoDDR end, table:{}, batchId:{}, swapProcessTC(ms):{}, getAndSendTensorsTC(ms):{}",
+                  info.name, info.batchId, swapProcessTC.ElapsedMS(), getAndSendTensorsTC.ElapsedMS());
+        return true;
+    }
+    if (specialProcessStatus[info.name] == ProcessStatus::AFTER_SWITCH_SECOND_BATCH) {
+        // 将上一步暂存的状态合并至当前step一起处理
+        auto tempStore = trainTestSwitchInfoStore[info.name];
+        swapOutKeys.insert(swapOutKeys.end(), tempStore[0].begin(), tempStore[0].end());
+        swapOutPos.insert(swapOutPos.end(), tempStore[1].begin(), tempStore[1].end());
+        specialProcessStatus[info.name] = ProcessStatus::NORMAL;
+        LOG_DEBUG("handle channel switch case:afterSwitchSecondBatch, table:{}, channelId:{}, batchId:{}",
+                  info.name, info.channelId, info.batchId);
+    }
+    return false;
+}
+
+bool HybridMgmt::HandleSpecialProcessStatusSSD(const EmbBaseInfo &info, TimeCost &getAndSendTensorsTC,
+                                               pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
+                                               pair<vector<uint64_t>, vector<uint64_t>> &swapOutKoPair)
+{
+    TimeCost swapProcessTC;
+    auto &swapInPos = swapInKoPair.second;
+    auto &swapOutKeys = swapOutKoPair.first;
+    auto &swapOutPos = swapOutKoPair.second;
+
+    if (specialProcessStatus[info.name] == ProcessStatus::AFTER_SWITCH_FIRST_BATCH) {
+        // 发现train、save、eval切换，先保存状态，发emptySwapOutKeys以对应上一步的emptySwapOutPos
+        HandleFirstBatchCaseSSD(info, swapInKoPair, swapOutKoPair);
+        LOG_DEBUG("handle channel switch case:afterSwitchFirstBatch, table:{}, channelId:{}, batchId:{}",
+                  info.name, info.channelId, info.batchId);
+
+        if (mgmtRankInfo.ctrlSteps[info.channelId] == 1) {
+            vector<uint64_t> emptySwapOutPos;
+            SendTensorForSwap(info, swapInPos, emptySwapOutPos);
+            LOG_DEBUG("ProcessEmbInfoSSD special case, user only run one step, table:{}, channelId:{}, batchId:{}",
+                      info.name, info.channelId, info.batchId);
+        }
+
+        specialProcessStatus[info.name] = ProcessStatus::AFTER_SWITCH_SECOND_BATCH;
+        LOG_DEBUG("ProcessEmbInfoSSD end, table:{}, batchId:{}, swapProcessTC(ms):{}, getAndSendTensorsTC(ms):{}",
+                  info.name, info.batchId, swapProcessTC.ElapsedMS(), getAndSendTensorsTC.ElapsedMS());
+        return true;
+    }
+    if (specialProcessStatus[info.name] == ProcessStatus::AFTER_SWITCH_SECOND_BATCH) {
+        // 将上一步暂存的状态合并至当前step一起处理
+        auto tempStore = trainTestSwitchInfoStore[info.name];
+        swapOutKeys.insert(swapOutKeys.end(), tempStore[0].begin(), tempStore[0].end());
+        swapOutPos.insert(swapOutPos.end(), tempStore[1].begin(), tempStore[1].end());
+        specialProcessStatus[info.name] = ProcessStatus::NORMAL;
+        LOG_DEBUG("handle channel switch case:afterSwitchSecondBatch, table:{}, channelId:{}, batchId:{}",
+                  info.name, info.channelId, info.batchId);
+    }
+    return false;
+}
+
+
+void HybridMgmt::CheckLookupAddrSuccessDDR()
+{
+    if (!lookupAddrSuccess) {
+        // lookup失败，从future捞出异常
+        for (auto& t : lookUpSwapInAddrsThreads) {
+            t.get();
+        }
+        for (auto& t : lookUpSwapOutAddrsThreads) {
+            t.get();
+        }
+    }
+}
+
+void HybridMgmt::CheckLookupAddrSuccessSSD()
+{
+    if (!lookupAddrSuccess) {
+        for (auto& t : lookUpThreads) {
+            t.get();
+        }
+    }
+}
+
+void HybridMgmt::GetSwapPairsAndKey2Offset(const EmbBaseInfo &info, vector<uint64_t> &uniqueKeys,
+                                           pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
+                                           pair<vector<uint64_t>, vector<uint64_t>> &swapOutKoPair)
+{
+    TimeCost GetSwapPairsAndKey2OffsetTC;
+    int swapInCode = embCache->GetSwapPairsAndKey2Offset(info.name, uniqueKeys, swapInKoPair, swapOutKoPair);
+    if (swapInCode != H_OK) {
+        string errMsg = StringFormat("table:%s, GetSwapPairsAndKey2Offset failed! error code:%d",
+                                     info.name.c_str(), swapInCode);
+        throw runtime_error(errMsg);
+    }
+    LOG_DEBUG("table:{}, channel:{}, batchId:{}, GetSwapPairsAndKey2OffsetTC(ms):{}",
+              info.name, info.channelId, info.batchId, GetSwapPairsAndKey2OffsetTC.ElapsedMS());
+}
+
+void HybridMgmt::EnqueueSwapInfo(const EmbBaseInfo &info,
+                                 pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                 pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
+{
+    auto &swapInKeys = swapInKoPair.first;
+    auto &swapOutKeys = swapOutKoPair.first;
+
+    LOG_DEBUG("enqueue HBMSwapKeyQue table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
+              info.name, info.batchId, info.channelId, swapInKeys.size(), swapOutKeys.size());
+    HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(swapOutKeys);
+    HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKeys);
+
+    CheckLookupAddrSuccessDDR();
+}
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index a7bdcee6..2b4b2fc8 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -19,14 +19,19 @@ See the License for the specific language governing permissions and
 #include <array>
 #include <vector>
 #include <memory>
+#include <unordered_set>
 
 #include "absl/container/flat_hash_map.h"
 
 #include "utils/common.h"
 #include "utils/config.h"
+#include "utils/singleton.h"
+#include "utils/task_queue.h"
+#include "utils/time_cost.h"
+#include "ock_ctr_common/include/factory.h"
+#include "ock_ctr_common/include/embedding_cache.h"
+#include "ock_ctr_common/include/error_code.h"
 
-#include "host_emb/host_emb.h"
-#include "emb_hashmap/emb_hashmap.h"
 #include "hd_transfer/hd_transfer.h"
 #include "ssd_cache/cache_manager.h"
 #include "hybrid_mgmt_block.h"
@@ -35,12 +40,41 @@ See the License for the specific language governing permissions and
 namespace MxRec {
     using namespace std;
     using namespace tensorflow;
+    using namespace Common;
 
     enum class TaskType {
         HBM,
         DDR
     };
 
+    enum class ProcessStatus {
+        NORMAL,
+        AFTER_SWITCH_FIRST_BATCH,
+        AFTER_SWITCH_SECOND_BATCH
+    };
+
+    inline string ProcessStatus2Str(ProcessStatus s)
+    {
+        switch (s) {
+            case ProcessStatus::NORMAL:
+                return "normal";
+            case ProcessStatus::AFTER_SWITCH_FIRST_BATCH:
+                return "afterSwitchFirstBatch";
+            case ProcessStatus::AFTER_SWITCH_SECOND_BATCH:
+                return "afterSwitchSecondBatch";
+            default:
+                throw std::invalid_argument("Invalid ProcessStatus");
+        }
+    };
+
+    struct EmbTaskInfo {
+        int batchId;
+        int threadIdx;
+        int cvNotifyIndex;
+        int extEmbeddingSize;
+        string name;
+    };
+
     class HybridMgmt {
     public:
         HybridMgmt() = default;
@@ -59,7 +93,7 @@ namespace MxRec {
         bool Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos, int seed,
                         const vector<ThresholdValue>& thresholdValues, bool ifLoad);
 
-        bool Save(const string savePath);
+        void Save(const string& savePath);
 
         bool Load(const string& loadPath, vector<string> warmStartTables);
 
@@ -77,13 +111,7 @@ namespace MxRec {
 
         void Destroy();
 
-        bool ParseKeys(int channelId, int& batchId);
-
-        bool ParseKeysHBM(int channelId, int& batchId);
-
-        bool ProcessEmbInfo(const std::string& embName, int batchId, int channelId, bool& remainBatchOut);
-
-        void EmbHDTrans(const int channelId, const int batchId);
+        bool ParseKeys(int channelId, int& batchId, TaskType type);
 
         bool Evict();
 
@@ -97,39 +125,100 @@ namespace MxRec {
 
         void SetOptimizerInfo(const string& embName, OptimizerInfo optimInfo) const;
 
+        void FetchDeviceEmb();
+
+        void ProcessEmbInfoHBM(const EmbBaseInfo& info, bool& remainBatchOut, bool isGrad);
+
+        void ProcessEmbInfoDDR(const EmbBaseInfo& info, bool& remainBatchOut);
+
+        void ProcessEmbInfoSSD(const EmbBaseInfo& info, bool& remainBatchOut);
+
     GTEST_PRIVATE:
+        bool mutexDestroy { false };
+        std::mutex lookUpAndSendBatchIdMtx;
+        std::mutex receiveAndUpdateBatchIdMtx;
+        std::map<std::string, int> lookUpAndSendTableBatchMap;
+        std::map<std::string, int> receiveAndUpdateTableBatchMap;
+
+        std::map<std::string, std::map<int, std::mutex>> lastUpdateFinishMutexMap;
+        std::map<std::string, std::map<int, std::condition_variable>> cvLastUpdateFinishMap;
+        std::map<std::string, int> lastUpdateFinishStepMap;
+        std::map<std::string, std::map<int, std::mutex>> lastLookUpFinishMutexMap;
+        std::map<std::string, std::map<int, std::condition_variable>> cvLastLookUpFinishMap;
+        std::map<std::string, int> lastLookUpFinishStepMap;
+        std::map<std::string, std::map<int, std::mutex>> lastSendFinishMutexMap;
+        std::map<std::string, std::map<int, std::condition_variable>> cvLastSendFinishMap;
+        std::map<std::string, int> lastSendFinishStepMap;
+        std::map<std::string, std::map<int, std::mutex>> lastRecvFinishMutexMap;
+        std::map<std::string, std::map<int, std::condition_variable>> cvLastRecvFinishMap;
+        std::map<std::string, int> lastRecvFinishStepMap;
+
+        std::vector<std::thread> EmbeddingLookUpAndSendThreadPool;
+        std::vector<std::thread> EmbeddingReceiveAndUpdateThreadPool;
+        std::vector<std::future<void>> lookUpSwapOutAddrsThreads;
+        std::vector<std::future<void>> lookUpSwapInAddrsThreads;
+        std::vector<std::future<void>> lookUpThreads;
+
+        std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyQue;
+        std::map<std::string, TaskQueue<std::vector<uint64_t>>> SwapOut2SSDKeyQue;
+        std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyQue;
+        std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyForSSDQue;
+        std::map<std::string, TaskQueue<std::vector<float *>>> DDRSwapAddrsQue;
+
+        std::mutex evictMut;
+
+        std::map<std::string, std::unordered_set<uint64_t>> trainKeysSet;
+
+        const string SWAP_IN_STR = "SwapIn";
+        const string SWAP_OUT_STR = "SwapOut";
+        const string ADDR_STR = "Addr";
+
+        ock::ctr::EmbCacheManagerPtr embCache = nullptr;
+        std::map<std::string, TaskQueue<std::vector<float *>>> tableToQueueLookup;
+        std::map<std::string, std::vector<uint64_t>> lastSwapInPosMap {};
+        std::map<std::string, std::vector<std::vector<uint64_t>>> trainTestSwitchInfoStore {};
+        std::atomic<bool> lookupAddrSuccess {true};
+
+        std::mutex saveMutex;
+        std::condition_variable cvCheckSave;
 
         void SetFeatureTypeForLoad(vector<CkptFeatureType>& loadFeatures);
 
-        bool IsLoadDataMatches(const EmbMemT& loadHostEmbs, const EmbInfo& setupHostEmbs, size_t& embTableCount) const;
-
-        void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
+        void EvictKeys(const string& embName, const vector<emb_cache_key_t>& keys);
 
         void InitRankInfo(RankInfo& rankInfo, const vector<EmbInfo>& embInfos) const;
 
-        void EvictSSDKeys(const string& embName, const vector<emb_key_t>& keys) const;
-
-        void PrepareDDRData(std::shared_ptr<EmbeddingTable> table,
-                            const vector<emb_key_t> &keys, int channelId, int batchId) const;
+        void EvictSSDKeys(const string& embName, const vector<emb_cache_key_t>& keys) const;
 
         int GetStepFromPath(const string& loadPath) const;
 
-        static void AddCacheManagerTraceLog(CkptData& saveData);
+        void LookUpAddrs(const string &embName, int extEmbeddingSize);
+
+        void LookUpSwapAddrs(const std::string &embName, const std::string &swapStr);
+
+        void EmbeddingTask();
+
+        void MultiThreadEmbHDTransWrap();
+
+        void EmbeddingLookUpAndSendDDR(int batchId, int index, const EmbInfo& embInfo);
+
+        void EmbeddingReceiveAndUpdateDDR(int batchId, int index, const EmbInfo& embInfo);
+
+        void EmbeddingLookUpAndSendSSD(int batchId, int index, const EmbInfo& embInfo);
+
+        void EmbeddingReceiveAndUpdateSSD(int batchId, int index, const EmbInfo& embInfo);
+
+        void SendTensorForSwap(const EmbBaseInfo& info,
+                               const vector<uint64_t> &swapInPosUint,
+                               const vector<uint64_t> &swapOutPosUint);
 
-        void RestoreFreq4Save(CkptData& saveData) const;
     private:
-        int currentBatchId;
-        int trainBatchId = 0; // 0-199, 200-
-        int getInfoBatchId; // 0-199, 200-
-        int sendBatchId;
         HybridMgmtBlock* hybridMgmtBlock;
         vector<EmbInfo> mgmtEmbInfo;
         RankInfo mgmtRankInfo;
         CacheManager* cacheManager;
-        HostEmb* hostEmbs {};
-        unique_ptr<EmbHashMap> hostHashMaps {};
         vector<std::unique_ptr<std::thread>> procThreads {};
-        map<std::string, std::vector<emb_key_t>> evictKeyMap {};
+        map<string, vector<emb_cache_key_t>> evictKeyMap {};
         HDTransfer *hdTransfer;
         OffsetMapT offsetMapToSend;
         OffsetMapT loadOffsetToSend;
@@ -137,23 +226,101 @@ namespace MxRec {
         bool isRunning;
         bool isLoad { false };
         bool isInitialized { false };
+        bool alreadyTrainOnce = false;  // 用于判断是否为predict模式
+        map<string, int> lookUpSwapInAddrsPushId;  // 用于处理eos场景，当消费者追上生产者且长时间无上游数据，会触发eos
+        map<string, ProcessStatus> specialProcessStatus;
 
         void TrainTask(TaskType type);
 
         void EvalTask(TaskType type);
 
-        bool EndBatch(int batchId, int channelId) const;
+        void SendUniqKeysAndRestoreVecHBM(const EmbBaseInfo &info,
+                                          const unique_ptr<vector<Tensor>> &infoVecs, bool isGrad) const;
+
+        void HandleEndBatchCase(const EmbBaseInfo& info, vector<uint64_t>& swapInPos);
+
+        bool IsTrainEndBatch(int batchId) const;
+
+        bool IsEvalEndBatch(int batchId) const;
+
+        void InitEmbeddingCache(const vector<EmbInfo>& embInfos);
+
+        void InitDataPipelineForDDR(const string &embName);
+
+        void InitDataPipelineForSSD(const string &embName, int extEmbeddingSize);
+
+        void JoinEmbeddingCacheThread();
+
+        void HandleReachMaxStepCase(const EmbBaseInfo& info, bool& remainBatchOut);
+
+        void HandleEosCase(const EmbBaseInfo& info, bool& remainBatchOut);
+
+        void HandleEosCaseHBM(const string& embName, int batchId, int channelId, bool& remainBatchOut);
+
+        bool EmbeddingReceiveDDR(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs);
+
+        void EmbeddingUpdateDDR(const EmbTaskInfo& info, const float* embPtr, vector<float*>& swapOutAddrs);
+
+        bool EmbeddingLookUpDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+
+        void EmbeddingSendDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+
+        bool EmbeddingReceiveSSD(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs, int64_t& dims0);
+
+        void EmbeddingUpdateSSD(const EmbTaskInfo& info, float* embPtr, vector<float*>& swapOutAddrs, int64_t& dims0);
+
+        bool EmbeddingLookUpSSD(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+
+        void EmbeddingSendSSD(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+
+        void CreateEmbeddingLookUpAndSendThread(int index, const EmbInfo& embInfo);
+
+        void CreateEmbeddingReceiveAndUpdateThread(int index, const EmbInfo& embInfo);
+
+        void HandleFirstBatchCaseDDR(const EmbBaseInfo& info,
+                                     std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                     std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+
+        void HandleFirstBatchCaseSSD(const EmbBaseInfo& info,
+                                     std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                     std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+
+        void HandleDataSwapForSSD(const EmbBaseInfo& info,
+                                  vector<uint64_t> &swapInKeys, vector<uint64_t> &swapOutKeys);
+
+        bool BuildH2DEmbedding(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+
+        vector<uint64_t> GetUniqueKeys(const EmbBaseInfo& info, bool& remainBatchOut);
+
+        vector<int32_t> GetRestoreVecSec(const EmbBaseInfo& info, bool& remainBatchOut);
+
+        void SendAll2AllVec(const EmbBaseInfo& info, bool& remainBatchOut);
+
+        void SendRestoreVec(const EmbBaseInfo& info, bool& remainBatchOut);
+
+        void SendLookupOffsets(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys, vector<int32_t>& restoreVecSec);
+
+        void SendGlobalUniqueVec(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys, vector<int32_t>& restoreVecSec);
+
+        bool HandleSpecialProcessStatusDDR(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
+                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        void EmbHDTransWrap(int channelId, const int& batchId, int start);
+        bool HandleSpecialProcessStatusSSD(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
+                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        bool LoadMatchesDDRSetup(const CkptData& loadData);
+        void CheckLookupAddrSuccessDDR();
 
-        void HandlePrepareDDRDataRet(TransferRet prepareSSDRet) const;
+        void CheckLookupAddrSuccessSSD();
 
-        void SendUniqKeysAndRestoreVecHBM(int channelId, int& batchId, const EmbInfo &embInfo,
-                                          const unique_ptr<vector<Tensor>> &infoVecs) const;
+        void GetSwapPairsAndKey2Offset(const EmbBaseInfo& info, vector<uint64_t> &uniqueKeys,
+                                       std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                       std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        void SendUniqKeysAndRestoreVecDDR(const string &embName, int &batchId, int &channelId, DDRParam &ddrParam);
+        void EnqueueSwapInfo(const EmbBaseInfo& info,
+                             std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                             std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
     };
 }
 #endif // MX_REC_EMB_MGMT_H
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
index ad10bac4..65235389 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
@@ -40,6 +40,7 @@ void HybridMgmtBlock::CheckAndSetBlock(int channelId)
         LOG_DEBUG(HYBRID_BLOCKING + "blocking by save saveInterval {} pythonBatchId {} hybridBatchId {}",
                   saveInterval, pythonBatchId[channelId], hybridBatchId[channelId]);
         isBlock[TRAIN_CHANNEL_ID] = true;
+        finishSave = false;
     }
     if (stepsInterval[channelId] == -1) {
         return;
@@ -74,7 +75,8 @@ bool HybridMgmtBlock::WaitValid(int channelId)
 {
     // 等待hybrid处理完成
     int reTryNumber = 100;
-    LOG_INFO(HYBRID_BLOCKING + "check step invalid, wait {} {}", channelId, hybridBatchId[channelId]);
+    LOG_INFO(HYBRID_BLOCKING + "validate step and wait, channel:{}, pythonBatchId:{}, hybridBatchId:{}",
+             channelId, pythonBatchId[channelId], hybridBatchId[channelId]);
     // 等待hybrid处理完成后再一次唤醒
     while (pythonBatchId[lastRunChannelId] != hybridBatchId[lastRunChannelId] and isRunning) {
         std::this_thread::sleep_for(std::chrono::milliseconds(10ms));
@@ -85,6 +87,8 @@ bool HybridMgmtBlock::WaitValid(int channelId)
     }
 
     if (pythonBatchId[channelId] == hybridBatchId[channelId]) {
+        LOG_ERROR(HYBRID_BLOCKING + "step not equal, channel:{}, pythonBatchId:{}, hybridBatchId:{}",
+                  channelId, pythonBatchId[channelId], hybridBatchId[channelId]);
         return true;
     } else {
         // 如果等待python侧处理较长时间后hybrid依旧无法追赶上python则异常
@@ -159,14 +163,19 @@ void HybridMgmtBlock::DoBlock(int channelId)
 /// \param channelId channelId  train 0 eval 1
 void HybridMgmtBlock::ResetAll(int channelId)
 {
-    LOG_DEBUG(HYBRID_BLOCKING + "Hybridmgmt is resetting data channelId {} hybridBatchId {}",
-        channelId, hybridBatchId[channelId]);
+    LOG_DEBUG(HYBRID_BLOCKING + "start reset block status,"
+                                " channelId:{}, pythonBatchId:{}, readEmbedBatchId:{}, hybridBatchId:{}",
+              channelId, pythonBatchId[channelId], readEmbedBatchId[channelId], hybridBatchId[channelId]);
 
     readEmbedBatchId[channelId] = 0;
     pythonBatchId[channelId] = 0;
     hybridBatchId[channelId] = 0;
     isBlock[channelId] = false;
 
+    LOG_DEBUG(HYBRID_BLOCKING + "after reset block status,"
+                                " channelId:{}, pythonBatchId:{}, readEmbedBatchId:{}, hybridBatchId:{}",
+              channelId, pythonBatchId[channelId], readEmbedBatchId[channelId], hybridBatchId[channelId]);
+
     LOG_DEBUG("Start to reset isNeedSendEos");
     Singleton<KeyProcess>::GetInstance()->SetEos(0, channelId);
 }
@@ -224,16 +233,37 @@ void HybridMgmtBlock::SetRankInfo(RankInfo ri)
     this->stepsInterval[TRAIN_CHANNEL_ID] = ri.ctrlSteps[TRAIN_CHANNEL_ID];
     this->stepsInterval[EVAL_CHANNEL_ID] = ri.ctrlSteps[EVAL_CHANNEL_ID];
     this->saveInterval = ri.ctrlSteps[SAVE_STEP_INDEX];
+    this->maxTrainStep = ri.ctrlSteps[MAX_TRAIN_STEP_INDEX];
     this->rankInfo = ri;
-};
+}
 
 void HybridMgmtBlock::SetStepInterval(int trainStep, int evalStep)
 {
     this->stepsInterval[0] = trainStep;
     this->stepsInterval[1] = evalStep;
-};
+}
 
 HybridMgmtBlock::~HybridMgmtBlock()
 {
     Destroy();
 }
+
+void HybridMgmtBlock::Wake(int channelId)
+{
+    isBlock[channelId] = false;
+}
+
+bool HybridMgmtBlock::IsNeedWaitSave()
+{
+    if (saveInterval != 0 && saveInterval != -1 &&
+        hybridBatchId[TRAIN_CHANNEL_ID] % saveInterval == 0
+        && !finishSave) {
+        return true;
+    }
+    return false;
+}
+
+void HybridMgmtBlock::FinishSave()
+{
+    finishSave = true;
+}
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.h b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
index 00cdc73e..a969d7a9 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
@@ -26,6 +26,7 @@ See the License for the specific language governing permissions and
 namespace MxRec {
     const std::string HYBRID_BLOCKING = "[HYBRID_BLOCKING] ";
     const int SAVE_STEP_INDEX = 2;
+    const int MAX_TRAIN_STEP_INDEX = 3;
     const std::chrono::milliseconds SLEEP_MS = 20ms;
 
     class HybridMgmtBlock {
@@ -39,6 +40,11 @@ namespace MxRec {
         int pythonBatchId[2] = {0, 0};
         // readEmbed算子侧将要处理的batch id
         int readEmbedBatchId[2] = {0, 0};
+        int maxTrainStep = 0;
+        int stepsInterval[2] = {0, 0};  // 通道i运行多少步后切换为通道j
+
+        // hybrid已完成H2D的step
+        map<string, int> h2dNextBatchId;
 
         int loop[2] = {1, 1};
 
@@ -76,14 +82,19 @@ namespace MxRec {
 
         void Destroy();
 
+        void Wake(int channelId);
+
+        bool IsNeedWaitSave();
+
+        void FinishSave();
+
     private:
-        // 通道i运行多少步后切换为通道j
-        int stepsInterval[2] = {0, 0};
         // 控制通道阻塞的变量
         bool isBlock[2] = {true, true};
         // 控制训练了多少步进行保存的步数
         int saveInterval = 0;
         RankInfo rankInfo;
+        bool finishSave = true;
     };
 
     class HybridMgmtBlockingException : public std::exception {
diff --git a/src/core/key_process/feature_admit_and_evict.cpp b/src/core/key_process/feature_admit_and_evict.cpp
index fe7295b2..0305665a 100644
--- a/src/core/key_process/feature_admit_and_evict.cpp
+++ b/src/core/key_process/feature_admit_and_evict.cpp
@@ -144,7 +144,7 @@ FeatureAdmitType FeatureAdmitAndEvict::FeatureAdmitHelper(const int channel, con
 }
 
 // 特征淘汰接口
-void FeatureAdmitAndEvict::FeatureEvict(map<std::string, std::vector<emb_key_t>>& evictKeyMap)
+void FeatureAdmitAndEvict::FeatureEvict(map<std::string, std::vector<emb_cache_key_t>>& evictKeyMap)
 {
     std::vector<std::string> tableNames = GetAllNeedEvictTableNames();
     if (tableNames.empty()) {
@@ -163,7 +163,7 @@ void FeatureAdmitAndEvict::FeatureEvict(map<std::string, std::vector<emb_key_t>>
     }
 }
 
-void FeatureAdmitAndEvict::FeatureEvictHelper(const std::string& embName, std::vector<emb_key_t>& evictKey)
+void FeatureAdmitAndEvict::FeatureEvictHelper(const std::string& embName, std::vector<emb_cache_key_t>& evictKey)
 {
     // 从 m_historyRecords 中淘汰删除
     time_t currTime = m_recordsData.timestamps[embName];
diff --git a/src/core/key_process/feature_admit_and_evict.h b/src/core/key_process/feature_admit_and_evict.h
index 0b31b080..6c82c846 100644
--- a/src/core/key_process/feature_admit_and_evict.h
+++ b/src/core/key_process/feature_admit_and_evict.h
@@ -25,7 +25,6 @@ See the License for the specific language governing permissions and
 #include <mutex>
 #include <string>
 #include "absl/container/flat_hash_map.h"
-#include "host_emb/host_emb.h"
 #include "utils/common.h"
 #include "utils/safe_queue.h"
 #include "utils/singleton.h"
@@ -69,7 +68,7 @@ namespace MxRec {
             KeysT& splitKey, std::vector<uint32_t>& keyCount);
 
         // 特征淘汰接口
-        void FeatureEvict(map<std::string, std::vector<emb_key_t>>& evictKeyMap);
+        void FeatureEvict(map<std::string, std::vector<emb_cache_key_t>>& evictKeyMap);
         void ExecuteFeatureAdmit(
             const string& tableName, int channel, KeysT& splitKey, absl::flat_hash_map<int64_t, uint32_t>& mergeKeys);
 
@@ -105,7 +104,7 @@ namespace MxRec {
         std::vector<std::string> GetAllNeedEvictTableNames();
         FeatureAdmitType FeatureAdmitHelper(const int channel, const std::string& tableNameOrigin,
                                             const int64_t featureId, const uint32_t featureCnt);
-        void FeatureEvictHelper(const std::string& embName, std::vector<emb_key_t>& evictKey);
+        void FeatureEvictHelper(const std::string& embName, std::vector<emb_cache_key_t>& evictKey);
         void ResetAllRecords();
 
         bool m_isEnableFunction { true };                                    // “特征淘汰”的使能开关
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 22148581..63163453 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -24,7 +24,6 @@ See the License for the specific language governing permissions and
 #include "utils/singleton.h"
 #include "utils/time_cost.h"
 #include "utils/config.h"
-#include "host_emb/host_emb.h"
 #include "emb_table/embedding_mgmt.h"
 #include "hd_transfer/hd_transfer.h"
 #include "ock_ctr_common/include/error_code.h"
@@ -44,10 +43,15 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
                             const vector<ThresholdValue>& thresholdValues,
                             int seed)
 {
+    readySendEosCnt[TRAIN_CHANNEL_ID].store(0);
+    readySendEosCnt[EVAL_CHANNEL_ID].store(0);
+    finishSendEosCnt[TRAIN_CHANNEL_ID].store(0);
+    finishSendEosCnt[EVAL_CHANNEL_ID].store(0);
+
     this->rankInfo = rInfo;
-    
+
     SetupHotEmbUpdateStep();
-    
+
     map<EmbNameT, int> scInfo;
     for (const auto& info: eInfos) {
         embInfos[info.name] = info;
@@ -79,13 +83,6 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
         LOG_WARN(KEY_PROCESS "Feature admit-and-evict function is unavailable ...");
     }
 
-    if (GlobalEnv::fastUnique) {
-        int result = ock::ctr::Factory::Create(factory);
-        if (result != 0) {
-            throw runtime_error(Logger::Format("create fast factory failed, error code:{}", result));
-        }
-    }
-
     LOG_INFO(KEY_PROCESS "scInfo:{}, localRankSize:{}, rankSize:{}, useStatic:{}",
         MapToString(scInfo), rInfo.localRankSize, rInfo.rankSize, rInfo.useStatic);
 #ifndef GTEST
@@ -374,25 +371,32 @@ bool KeyProcess::KeyProcessTaskHelperWithFastUnique(unique_ptr<EmbBatchT>& batch
     // Static all2all，need send count
     if (!rankInfo.useStatic) { SendA2A(uniqueInfo.all2AllInfo.scAll, batch->name, batch->channel, batch->batchId); }
 
+    TimeCost pushResultTC;
     auto tensors = make_unique<vector<Tensor>>();
     tensors->push_back(Vec2TensorI32(uniqueInfo.restore));
 
     uniqueInfo.hotPos.resize(hotEmbTotCount[batch->name], -1);
     tensors->push_back(Vec2TensorI32(uniqueInfo.hotPos));
-    
+
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(move(tensors), uniqueInfo.all2AllInfo.keyRecv, channel);
         tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueInfo.all2AllInfo.keyRecv) :
-                                                            Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
+                           Vec2TensorI32(uniqueInfo.all2AllInfo.keyRecv));
+        PushResultHBM(batch, move(tensors));
+    } else {
+        std::vector<uint64_t> lookupKeysUint(uniqueInfo.all2AllInfo.keyRecv.begin(),
+                                             uniqueInfo.all2AllInfo.keyRecv.end());
+        vector<uint64_t> uniqueKeys;
+        vector<int32_t> restoreVecSec;
+        GlobalUnique(lookupKeysUint, uniqueKeys, restoreVecSec);
+        PushResultDDR(batch, move(tensors), uniqueKeys, restoreVecSec);
     }
 
-    TimeCost pushResultTC;
-    PushResult(batch, move(tensors), uniqueInfo.all2AllInfo.keyRecv);
+    LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     if (GlogConfig::gStatOn) {
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost_with_fast_unique {}",
             channel, batch->batchId, rankInfo.rankId, totalTimeCost.ElapsedMS());
     }
-    LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     return true;
 }
 
@@ -440,13 +444,19 @@ bool KeyProcess::KeyProcessTaskHelper(unique_ptr<EmbBatchT>& batch, int channel,
 
     hotPos.resize(hotEmbTotCount[batch->name], 0);
     tensors->push_back(Vec2TensorI32(hotPos));
-    
+
     if (!rankInfo.isDDR) {
         PushGlobalUniqueTensors(tensors, lookupKeys, channel);
         tensors->push_back(rankInfo.useDynamicExpansion ? Vec2TensorI64(lookupKeys) : Vec2TensorI32(lookupKeys));
+        PushResultHBM(batch, move(tensors));
+    } else {
+        std::vector<uint64_t> lookupKeysUint(lookupKeys.begin(), lookupKeys.end());
+        vector<uint64_t> uniqueKeys;
+        vector<int32_t> restoreVecSec;
+        GlobalUnique(lookupKeysUint, uniqueKeys, restoreVecSec);
+        PushResultDDR(batch, move(tensors), uniqueKeys, restoreVecSec);
     }
 
-    PushResult(batch, move(tensors), lookupKeys);
     LOG_DEBUG("pushResultTC(ms):{}", pushResultTC.ElapsedMS());
     if (GlogConfig::gStatOn) {
         LOG_INFO(STAT_INFO "channel_id {} batch_id {} rank_id {} key_process_time_cost {}",
@@ -504,15 +514,22 @@ vector<uint32_t> KeyProcess::GetCountRecv(const unique_ptr<EmbBatchT>& batch, in
     return countRecv;
 }
 
-void KeyProcess::PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
-                            KeysT& lookupKeys)
+void KeyProcess::PushResultHBM(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors)
 {
     std::unique_lock<std::mutex> lockGuard(mut);
     storage.push_front(move(tensors));
     infoList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, storage.begin()));
-    if (rankInfo.isDDR) {
-        lookupKeysList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, move(lookupKeys)));
-    }
+    lockGuard.unlock();
+}
+
+void KeyProcess::PushResultDDR(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
+                               std::vector<uint64_t>& uniqueKeys, std::vector<int32_t>& restoreVecSec)
+{
+    std::unique_lock<std::mutex> lockGuard(mut);
+    storage.push_front(move(tensors));
+    infoList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, storage.begin()));
+    uniqueKeysList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, move(uniqueKeys)));
+    restoreVecSecList[batch->name][batch->channel].push(make_tuple(batch->batchId, batch->name, move(restoreVecSec)));
     lockGuard.unlock();
 }
 
@@ -1158,33 +1175,113 @@ void KeyProcess::BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vecto
 }
 
 template<class T>
-T KeyProcess::GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel)
+T KeyProcess::GetInfo(info_list_t<T>& list, const EmbBaseInfo &info)
 {
     std::lock_guard<std::mutex> lockGuard(mut);
-    if (list[embName][channel].empty()) {
+    if (list[info.name][info.channelId].empty()) {
         LOG_TRACE("get info list is empty.");
         throw EmptyList();
     }
-    auto topBatch = get<int>(list[embName][channel].top());
-    if (topBatch < batch) {
-        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel", topBatch, batch, channel);
+    auto topBatch = get<int>(list[info.name][info.channelId].top());
+    if (topBatch < info.batchId) {
+        LOG_ERROR("wrong batch id, top:{} getting:{}, channel:{}, may not clear channel",
+                  topBatch, info.batchId, info.channelId);
         this_thread::sleep_for(1s);
     }
-    if (topBatch != batch) {
-        LOG_TRACE("topBatch({}) is not equal batch({}).", topBatch, batch);
+    if (topBatch != info.batchId) {
+        LOG_TRACE("topBatch({}) is not equal batch({}).", topBatch, info.batchId);
         throw WrongListTop();
     }
-    auto t = list[embName][channel].top();
-    list[embName][channel].pop();
+    auto t = list[info.name][info.channelId].top();
+    list[info.name][info.channelId].pop();
     return move(t);
 }
 
-/// DDR模式下，从list中获取查询tensor向量
-/// \param batch 已处理的batch数
-/// \param embName 表名
-/// \param channel 通道索引（训练/推理）
-/// \return
-KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
+vector<uint64_t> KeyProcess::GetUniqueKeys(const EmbBaseInfo& info, bool& isEos,
+                                           map<string, int> &lookUpSwapInAddrsPushId)
+{
+    TimeCost tc = TimeCost();
+
+    HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
+    bool cancelMonitor = false;
+    thread timeoutMonitor;
+    if (info.batchId != 0) {
+        timeoutMonitor = StartEosMonitorThread(info, cancelMonitor);
+    }
+
+    // 循环尝试获取list中的数据；如果key process线程退出或者处理数据超时，返回空vector
+
+    vector<uint64_t> ret;
+    auto startTime = std::chrono::system_clock::now();
+    while (true) {
+        if (!isRunning) {
+            break;
+        }
+        auto endTime = std::chrono::system_clock::now();
+        // 判断此时的info.batchId id是否已经过期，即通道已经刷新
+        if (info.batchId != hybridMgmtBlock->hybridBatchId[info.channelId]) {
+            LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
+                      info.name, info.channelId, info.batchId);
+            break;
+        }
+        if (info.batchId != 0 && info.channelId != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
+            LOG_WARN(KEY_PROCESS "getting lookup keys timeout! {}[{}]:{}",
+                     info.name, info.channelId, info.batchId);
+            break;
+        }
+        try {
+            auto infoVec = GetInfo(uniqueKeysList, info);
+            ret = get<std::vector<uint64_t>>(infoVec);
+            break;
+        } catch (EmptyList&) {
+            unique_lock<mutex> lockEosGuard(eosMutex);
+            isEos = IsGetUniqueKeysEos(info, startTime, lookUpSwapInAddrsPushId);
+            if (isEos) {
+                break;
+            }
+            this_thread::sleep_for(1ms);
+        } catch (WrongListTop&) {
+            LOG_TRACE("getting info failed table:{}, channel:{}, mgmt batchId:{}, wrong top",
+                      info.name, info.channelId, info.channelId);
+            this_thread::sleep_for(1ms);
+        }
+    }
+    cancelMonitor = true;
+    if (timeoutMonitor.joinable()) {
+        timeoutMonitor.join();
+    }
+    return ret;
+}
+
+bool KeyProcess::IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::system_clock::time_point& startTime,
+                                    map<string, int>& lookUpSwapInAddrsPushId)
+{
+    HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
+    auto endTime = std::chrono::system_clock::now();
+    
+    // readEmbKey真实的次数是readEmbedBatchId减1
+    int readEmbKeyBatchId = hybridMgmtBlock->readEmbedBatchId[info.channelId] - 1;
+    // 避免eos在keyProcess还未处理完数据时插队到通道前面
+    std::chrono::duration<double> elapsedTime = endTime - startTime;
+    if (info.batchId != 0 && elapsedTime.count() >= timeoutGetUniqueKeysEmpty) {
+        LOG_DEBUG("table:{}, channelId:{}, isNeedSendEos:{}, readEmbKeyBatchId:{}, batch:{}, h2dNextBatchId:{},"
+                  " lookUpSwapInAddrsPushId:{}", info.name, info.channelId, isNeedSendEos[info.channelId],
+                  readEmbKeyBatchId, info.batchId, hybridMgmtBlock->h2dNextBatchId[info.name],
+                  lookUpSwapInAddrsPushId[info.name]);
+        startTime = std::chrono::system_clock::now();
+    }
+    if (isNeedSendEos[info.channelId] && readEmbKeyBatchId < info.batchId &&
+        hybridMgmtBlock->h2dNextBatchId[info.name] == lookUpSwapInAddrsPushId[info.name]) {
+        LOG_INFO("table:{}, channelId:{} batchId:{}, GetUniqueKeys eos",
+                 info.name, info.channelId, info.batchId);
+        return true;
+    }
+    LOG_TRACE("getting uniqueKeys failed, table:{}, channel:{}, mgmt batchId:{}, readEmbKey batchId:{}, list is empty",
+              info.name, info.channelId, info.batchId, readEmbKeyBatchId);
+    return false;
+}
+
+std::vector<int32_t> KeyProcess::GetRestoreVecSec(const EmbBaseInfo& info)
 {
     TimeCost tc = TimeCost();
     // 循环尝试获取list中的数据；如果key process线程退出或者处理数据超时，返回空vector
@@ -1194,74 +1291,80 @@ KeysT KeyProcess::GetLookupKeys(int batch, const string& embName, int channel)
         }
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
-        if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
+        if (info.batchId != hybridMgmtBlock->hybridBatchId[info.channelId]) {
             LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                    embName, channel, batch);
+                      info.name, info.channelId, info.batchId);
             return {};
         }
-        if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
-            LOG_WARN(KEY_PROCESS "getting lookup keys timeout! {}[{}]:{}", embName, channel, batch);
+        if (info.batchId != 0 && info.channelId != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
+            LOG_WARN(KEY_PROCESS "getting lookup keys timeout! {}[{}]:{}", info.name, info.channelId, info.batchId);
             return {};
         }
         try {
-            auto ret = GetInfo(lookupKeysList, batch, embName, channel);
-            return get<KeysT>(ret);
+            auto ret = GetInfo(restoreVecSecList, info);
+            return get<std::vector<int32_t>>(ret);
         } catch (EmptyList&) {
             unique_lock<mutex> lockEosGuard(eosMutex);
             // readEmbKey真实的次数是readEmbedBatchId减1
-            int readEmbKeyBatchId = hybridMgmtBlock->readEmbedBatchId[channel] - 1;
+            int readEmbKeyBatchId = hybridMgmtBlock->readEmbedBatchId[info.channelId] - 1;
             // 避免eos在keyProcess还未处理完数据时插队到通道前面
-            if (isNeedSendEos[channel] && readEmbKeyBatchId < batch) {
-                LOG_INFO("channelId:{} batchId:{}, GetLookupKeys eos.", channel, batch);
-                unique_lock<mutex> lockDestroyGuard(destroyMutex);
-                SendEos(batch, channel);
-                return {};
+            if (isNeedSendEos[info.channelId] && readEmbKeyBatchId < info.batchId &&
+                hybridMgmtBlock->h2dNextBatchId[info.name] == info.batchId) {
+                LOG_ERROR("channelId:{} batchId:{}, GetRestoreVecSec eos, code should not reach here",
+                          info.channelId, info.batchId);
+                throw runtime_error("GetRestoreVecSec eos, code should not reach here");
             }
             LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
-                embName, channel, batch, readEmbKeyBatchId);
+                      info.name, info.channelId, info.batchId, readEmbKeyBatchId);
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
-            LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
+            LOG_TRACE("getting info failed {}[{}]:{} wrong top", info.name, info.channelId, info.batchId);
             this_thread::sleep_for(1ms);
         }
     }
 }
 
 /// 当数据列表为空，且eos标志位为true时，主动发送eos
+/// \param embName 表名
 /// \param batchId 已处理的batch数
 /// \param channel 通道索引（训练/推理）
-void KeyProcess::SendEos(int batchId, int channel)
+/// \param sendAllChannel 是否强制发送所有channel
+void KeyProcess::SendEos(const std::string& embName, int batchId, int channel, bool sendAllChannel)
 {
 #ifndef GTEST
-    LOG_INFO("channelId:{} batchId:{}, SendEos start.", channel, batchId);
-
-    auto trans = Singleton<HDTransfer>::GetInstance();
-    unordered_map<std::string, acltdtChannelHandle*> transChannels = trans->GetTransChannel();
-    std::set<std::string> usedChannelNames = trans->GetUsedTransChannel()[channel];
-
-    vector<Tensor> tensors;
-    bool isNeedResend = true;
-
-    for (const auto& emb: as_const(embInfos)) { // 一个表触发以后，其余表都发送eos，最后外层接收null退出此次循环
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos start.", channel, batchId, emb.first);
-        if (!isRunning) {
-            throw EndRunExit("SendEos end run, isRunning is false after lock destroyMutex.");
-        }
-        for (const string& transName : usedChannelNames) {
-            string sendName = StringFormat("%s_%s_%d", emb.first.c_str(), transName.c_str(), channel);
-            size_t channelSize = 0;
-            
-            acltdtQueryChannelSize(transChannels[sendName], &channelSize);
-            LOG_INFO("[EOS] Before send eos, {} contains {}.", sendName, channelSize);
-            SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors, isNeedResend);
-            acltdtQueryChannelSize(transChannels[sendName], &channelSize);
-            LOG_INFO("[EOS] After send eos, {} contains {}.", sendName, channelSize);
-        }
-        LOG_INFO("channelId:{} batchId:{}, the embName:{} related channel SendEos end.", channel, batchId, emb.first);
+    finishSendEosCnt[channel].store(0);
+    ++readySendEosCnt[channel];
+    LOG_INFO("table:{}, channelId:{} batchId:{}, readySendEosCnt:{}, ready to SendEos",
+             embName, channel, batchId, readySendEosCnt[channel]);
+    while (readySendEosCnt[channel] != static_cast<int>(embInfos.size())) {
+        LOG_DEBUG("table:{}, readySendEosCnt:{}, waiting other table enter SendEos", embName, readySendEosCnt[channel]);
+        this_thread::sleep_for(1000ms);
+    }
+    LOG_INFO("table:{}, channelId:{} batchId:{}, SendEos start, acquiring destroyMutex", embName, channel, batchId);
+    destroyMutex.lock();
+
+    LOG_INFO("table:{}, channelId:{} batchId:{}, SendEos start", embName, channel, batchId);
+    if (!isRunning) {
+        LOG_INFO("other table trigger eos ahead, keyProcess already destroyed. skip sending eos for table:{}", embName);
+        ++finishSendEosCnt[channel];
+        destroyMutex.unlock();
+        return;
     }
+    SendEosTensor(embName, channel, sendAllChannel);
+    destroyMutex.unlock();
+    LOG_INFO("channelId:{} batchId:{}, the embName:{} SendEos end, release destroyMutex", channel, batchId, embName);
 
-    LOG_INFO("channelId:{} batchId:{}, SendEos end.", channel, batchId);
+    ++finishSendEosCnt[channel];
+    LOG_INFO("table:{}, channelId:{} batchId:{}, finishSendEosCnt:{}, finish SendEos",
+             embName, channel, batchId, finishSendEosCnt[channel]);
+    while (finishSendEosCnt[channel] != static_cast<int>(embInfos.size())) {
+        LOG_DEBUG("table:{}, channelId:{} batchId:{}, finishSendEosCnt:{}, waiting other table finish SendEos",
+                  embName, channel, batchId, finishSendEosCnt[channel]);
+        this_thread::sleep_for(1000ms);
+    }
+    readySendEosCnt[channel].store(0);
     isNeedSendEos[channel] = false;
+    LOG_DEBUG("isNeedSendEos set to false, table:{}, channelId:{} batchId:{}", embName, channel, batchId);
 #endif
 }
 
@@ -1271,7 +1374,7 @@ void KeyProcess::SendEos(int batchId, int channel)
 /// \param channel 通道索引（训练/推理）
 /// \param type 数据类型
 /// \return
-unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type)
+unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(const EmbBaseInfo &info, ProcessedInfo type, bool &isEos)
 {
     TimeCost tc = TimeCost();
     info_list_t<TensorInfoT>* list;
@@ -1288,47 +1391,46 @@ unique_ptr<vector<Tensor>> KeyProcess::GetInfoVec(int batch, const string& embNa
             throw std::invalid_argument("Invalid ProcessedInfo Type.");
     }
 
+    unique_ptr<vector<Tensor>> ret = nullptr;
     // 循环尝试获取list中的数据；如果key process线程退出或者处理数据超时，返回空指针
     while (true) {
         if (!isRunning) {
-            return nullptr;
+            break;
         }
         // 判断此时的batch id是否已经过期，即通道已经刷新
         HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
-        if (batch != hybridMgmtBlock->hybridBatchId[channel]) {
+        if (info.batchId != hybridMgmtBlock->hybridBatchId[info.channelId]) {
             LOG_DEBUG(KEY_PROCESS "Detected that the batch has expired at this time, exiting the loop! {}[{}]:{}",
-                embName, channel, batch);
-            return nullptr;
+                info.name, info.channelId, info.batchId);
+            break;
         }
-        if (batch != 0 && channel != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
-            LOG_WARN(KEY_PROCESS "getting lookup keys timeout! {}[{}]:{}", embName, channel, batch);
-            return nullptr;
+        if (info.batchId != 0 && info.channelId != 0 && tc.ElapsedSec() > KEY_PROCESS_TIMEOUT) {
+            LOG_WARN(KEY_PROCESS "getting lookup keys timeout! {}[{}]:{}", info.name, info.channelId, info.batchId);
+            break;
         }
 
         try {
-            auto ret = GetInfo(*list, batch, embName, channel);
-            auto it = get<std::list<unique_ptr<vector<Tensor>>>::iterator>(ret);
-            auto uTensor = move(*it);
+            auto infoVec = GetInfo(*list, info);
+            auto it = get<std::list<unique_ptr<vector<Tensor>>>::iterator>(infoVec);
+            ret = std::move(*it);
             std::unique_lock<std::mutex> lockGuard(mut);
             storage.erase(it);
-            return uTensor;
+            break;
         } catch (EmptyList&) {
             unique_lock<mutex> lockEosGuard(eosMutex);
-            // 避免eos在keyProcess还未处理完数据时插队到通道前面, readEmbKey真实的次数是readEmbedBatchId减1
-            if (isNeedSendEos[channel] && (hybridMgmtBlock->readEmbedBatchId[channel] - 1) < batch) {
-                LOG_INFO("channelId:{} batchId:{}, GetInfoVec eos.", channel, batch);
-                unique_lock<mutex> lockDestroyGuard(destroyMutex);
-                SendEos(batch, channel);
-                return nullptr;
+            isEos = IsGetInfoVecEos(info.batchId, info.name, info.channelId);
+            if (isEos) {
+                break;
             }
             LOG_TRACE("getting info failed {}[{}], list is empty, and mgmt batchId: {}, readEmbKey batchId: {}.",
-                embName, channel, batch, (hybridMgmtBlock->readEmbedBatchId[channel] - 1));
+                info.name, info.channelId, info.batchId, (hybridMgmtBlock->readEmbedBatchId[info.channelId] - 1));
             this_thread::sleep_for(1ms);
         } catch (WrongListTop&) {
-            LOG_TRACE("getting info failed {}[{}]:{} wrong top", embName, channel, batch);
+            LOG_TRACE("getting info failed {}[{}]:{} wrong top", info.name, info.channelId, info.batchId);
             this_thread::sleep_for(1ms);
         }
     }
+    return ret;
 }
 
 void KeyProcess::SendA2A(const vector<int>& a2aInfo, const string& embName, int channel, int batch)
@@ -1355,13 +1457,13 @@ int KeyProcess::GetMaxStep(int channelId) const
     return rankInfo.ctrlSteps.at(channelId);
 }
 
-void KeyProcess::EvictKeys(const string& embName, const vector<emb_key_t>& keys) // hbm
+void KeyProcess::EvictKeys(const string& embName, const vector<emb_cache_key_t>& keys) // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm funEvictCall: [{}]! keySize:{}", embName, keys.size());
     EmbeddingMgmt::Instance()->EvictKeys(embName, keys);
 }
 
-void KeyProcess::EvictKeysCombine(const vector<emb_key_t>& keys) // hbm
+void KeyProcess::EvictKeysCombine(const vector<emb_cache_key_t>& keys) // hbm
 {
     LOG_INFO(KEY_PROCESS "hbm combine funEvictCall, keySize:{}", keys.size());
     EmbeddingMgmt::Instance()->EvictKeysCombine(keys);
@@ -1466,7 +1568,94 @@ void KeyProcess::RecordKeyCountMap(const unique_ptr<EmbBatchT>& batch)
 void KeyProcess::SetEos(int status, int channelId)
 {
     unique_lock<mutex> lockGuard(eosMutex);
-    LOG_INFO("isNeedSendEos status is changed, before status:[{}], input status:{}, channel:[{}], ",
-             isNeedSendEos[channelId], status, channelId);
+    LOG_INFO("isNeedSendEos status is changed, channel:{}, before status:{}, input status:{}",
+             channelId, isNeedSendEos[channelId], status);
     isNeedSendEos[channelId] = (status == 1);
 }
+
+bool KeyProcess::IsGetInfoVecEos(int batch, const string& embName, int channel)
+{
+    HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
+
+    // 避免eos在keyProcess还未处理完数据时插队到通道前面, readEmbKey真实的次数是readEmbedBatchId减1
+    int readEmbKeyBatchId = hybridMgmtBlock->readEmbedBatchId[channel] - 1;
+    if (rankInfo.isDDR) {
+        if (isNeedSendEos[channel] && readEmbKeyBatchId < batch &&
+            hybridMgmtBlock->h2dNextBatchId[embName] == batch) {
+            LOG_ERROR("channelId:{} batchId:{}, GetInfoVec eos, code should not reach here", channel, batch);
+            throw runtime_error("GetInfoVec eos, code should not reach here");
+        }
+    } else {
+        LOG_TRACE("table:{}, channelId:{}, readEmbKeyBatchId:{}, batchId:{}, isNeedSendEos:{}",
+                  embName, channel, readEmbKeyBatchId, batch, isNeedSendEos[channel]);
+        if (isNeedSendEos[channel] && readEmbKeyBatchId < batch) {
+            LOG_INFO("table:{}, channelId:{} batchId:{}, GetInfoVec eos", embName, channel, batch);
+            return true;
+        }
+    }
+    return false;
+}
+
+std::thread KeyProcess::StartEosMonitorThread(const EmbBaseInfo &info, bool &cancelMonitor)
+{
+    // 由于embCache延迟发送swapPos的特性，step n需要step n+1的数据来启动，当获取不到step n+1时，需要触发eos并补发step n需要的swapPos
+    LOG_DEBUG("table:{}, channel:{}, batchId:{}, start a monitor thread to check eos",
+              info.name, info.channelId, info.batchId);
+    return thread([&]() {
+        chrono::high_resolution_clock::time_point start = chrono::high_resolution_clock::now();
+        chrono::high_resolution_clock::time_point end = chrono::high_resolution_clock::now();
+        chrono::duration<double> duration = chrono::duration_cast<chrono::duration<double>>(end - start);
+        while (!cancelMonitor && duration.count() < timeoutGetUniqueKeys) {
+            this_thread::sleep_for(1ms);
+            end = chrono::high_resolution_clock::now();
+            duration = chrono::duration_cast<chrono::duration<double >>(end - start);
+        }
+        if (!cancelMonitor) {
+            this->SetEos(1, info.channelId);
+            LOG_INFO("table:{}, channel:{}, batchId:{}, timeout:{}(s) monitor empty data, set eos",
+                     info.name, info.channelId, info.batchId, timeoutGetUniqueKeys);
+        } else {
+            LOG_DEBUG("table:{}, channel:{}, batchId:{}, timeout monitor canceled",
+                      info.name, info.channelId, info.batchId);
+        }
+    });
+}
+
+void KeyProcess::SendEosTensor(const std::string& embName, int channel, bool sendAllChannel)
+{
+#ifndef GTEST
+    auto trans = Singleton<HDTransfer>::GetInstance();
+    unordered_map<std::string, acltdtChannelHandle*> transChannels = trans->GetTransChannel();
+    std::set<std::string> usedChannelNames = trans->GetUsedTransChannel()[channel];
+
+    vector<Tensor> tensors;
+    bool isNeedResend = true;
+    string sendName;
+    for (const string& transName : usedChannelNames) {
+        if (transName == TransferChannel2Str(TransferChannel::SAVE_D2H) ||
+            transName == TransferChannel2Str(TransferChannel::SAVE_H2D)) {
+            // do nothing on save channel, it's independent to train, eval and predict channel;
+            continue;
+        }
+
+        if (transName == TransferChannel2Str(TransferChannel::SWAP) ||
+            transName == TransferChannel2Str(TransferChannel::H2D)) {
+            sendName = StringFormat("%s_%s_all", embName.c_str(), transName.c_str());
+            if (channel == EVAL_CHANNEL_ID && !sendAllChannel) {
+                LOG_INFO("skip send eos for share channel:{}, channel id:{}", sendName, channel);
+                LOG_INFO("check if train ProcessEmbInfo run and let it decide eos or not");
+                continue;
+            }
+        } else {
+            sendName = StringFormat("%s_%s_%d", embName.c_str(), transName.c_str(), channel);
+        }
+
+        size_t channelSize = 0;
+        acltdtQueryChannelSize(transChannels[sendName], &channelSize);
+        LOG_INFO("[EOS] Before send eos, channel:{}, size:{}.", sendName, channelSize);
+        SendTensorsByAcl(transChannels[sendName], ACL_TENSOR_DATA_END_OF_SEQUENCE, tensors, isNeedResend);
+        acltdtQueryChannelSize(transChannels[sendName], &channelSize);
+        LOG_INFO("[EOS] After send eos, channel:{}, size:{}.", sendName, channelSize);
+    }
+#endif
+}
diff --git a/src/core/key_process/key_process.h b/src/core/key_process/key_process.h
index 8bd7b8d0..589fc2a5 100644
--- a/src/core/key_process/key_process.h
+++ b/src/core/key_process/key_process.h
@@ -83,9 +83,11 @@ namespace MxRec {
         bool Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos,
                        const vector<ThresholdValue>& thresholdValues = {}, int seed = 0);
 
-        unique_ptr<vector<Tensor>> GetInfoVec(int batch, const string& embName, int channel, ProcessedInfo type);
+        unique_ptr<vector<Tensor>> GetInfoVec(const EmbBaseInfo& info, ProcessedInfo type, bool &isEos);
 
-        KeysT GetLookupKeys(int batch, const string& embName, int channel);
+        vector<uint64_t> GetUniqueKeys(const EmbBaseInfo &info, bool &isEos, map<string, int> &lookUpSwapInAddrsPushId);
+
+        vector<int32_t> GetRestoreVecSec(const EmbBaseInfo& info);
 
         int GetMaxStep(int channelId) const;
 
@@ -109,9 +111,9 @@ namespace MxRec {
 
         void LoadSaveUnlock();
 
-        void EvictKeys(const string& embName, const vector<emb_key_t>& keys);
+        void EvictKeys(const string& embName, const vector<emb_cache_key_t>& keys);
 
-        void EvictKeysCombine(const vector<emb_key_t>& keys);
+        void EvictKeysCombine(const vector<emb_cache_key_t>& keys);
 
         void SetupHotEmbUpdateStep();
 
@@ -157,7 +159,7 @@ namespace MxRec {
 
         void SetEos(int status, int channelId);
 
-        void SendEos(int batchId, int channel);
+        void SendEos(const string& embName, int batchId, int channel, bool sendAllChannel);
 
         bool isRunning { false };
 
@@ -167,12 +169,13 @@ namespace MxRec {
         {
             return embInfos.find(embName) != embInfos.end();
         };
+
     GTEST_PRIVATE:
 
         int Start();
 
         template<class T>
-        T GetInfo(info_list_t<T>& list, int batch, const string& embName, int channel);
+        T GetInfo(info_list_t<T>& list, const EmbBaseInfo &info);
 
         RankInfo rankInfo;
         map<EmbNameT, EmbInfo> embInfos;
@@ -181,6 +184,8 @@ namespace MxRec {
         vector<std::unique_ptr<std::thread>> procThreads {};
         std::mutex loadSaveMut[MAX_CHANNEL_NUM][MAX_KEY_PROCESS_THREAD] {};
         info_list_t<LookupKeyT> lookupKeysList;
+        info_list_t<UinqueKeyT> uniqueKeysList;
+        info_list_t<RestoreVecSecT> restoreVecSecList;
         list<unique_ptr<vector<Tensor>>> storage;
         info_list_t<TensorInfoT> infoList;
         info_list_t<TensorInfoT> all2AllList;
@@ -195,7 +200,13 @@ namespace MxRec {
         ock::ctr::FactoryPtr factory {};
         int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
         bool isWithFAAE;
-        bool isNeedSendEos[2] = { 0, 0 }; // 分别代表通道0、1的eos状态
+
+        // for end-of-sequence case
+        bool isNeedSendEos[2] = {false, false}; // 表示各表通道0、1的eos状态
+        atomic<int> readySendEosCnt[2];
+        atomic<int> finishSendEosCnt[2];
+        const double timeoutGetUniqueKeys = 10.0;  // 如果超时仍未获取到数据将触发EOS
+        const double timeoutGetUniqueKeysEmpty = 1.0;  // 如果超时仍未获取到数据将打印信息
 
         void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
 
@@ -262,7 +273,10 @@ namespace MxRec {
         void HandleHotAndSendCount(const unique_ptr<EmbBatchT> &batch, UniqueInfo& uniqueInfoOut,
                                        KeySendInfo& keySendInfo, vector<int>& sc, vector<int>& splitSize);
 
-        void PushResult(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors, KeysT& lookupKeys);
+        void PushResultHBM(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors);
+
+        void PushResultDDR(unique_ptr<EmbBatchT>& batch, unique_ptr<vector<Tensor>> tensors,
+                           std::vector<uint64_t>& uniqueKeys, std::vector<int32_t>& restoreVecSec);
 
         void PushGlobalUniqueTensors(const unique_ptr<vector<Tensor>>& tensors, KeysT& lookupKeys, int channel);
 
@@ -290,6 +304,15 @@ namespace MxRec {
         }
 
         string DumpSplitKeys(vector<vector<emb_key_t>>& splitKeys) const;
+
+        bool IsGetInfoVecEos(int batch, const string& embName, int channel);
+
+        bool IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::system_clock::time_point& startTime,
+                                map<string, int>& lookUpSwapInAddrsPushId);
+
+        void SendEosTensor(const std::string& embName, int channel, bool sendAllChannel);
+
+        std::thread StartEosMonitorThread(const EmbBaseInfo& info, bool& cancelMonitor);
     };
 
 #define KEY_PROCESS_INSTANCE Singleton<KeyProcess>::GetInstance()
diff --git a/src/core/ock_ctr_common/include/embedding_cache.h b/src/core/ock_ctr_common/include/embedding_cache.h
new file mode 100644
index 00000000..f3bc9e23
--- /dev/null
+++ b/src/core/ock_ctr_common/include/embedding_cache.h
@@ -0,0 +1,321 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.s
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef EMBEDDING_CACHE_H
+#define EMBEDDING_CACHE_H
+
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+namespace EmbCache {
+using KeyOffsetPair = std::pair<std::vector<uint64_t>, std::vector<uint64_t>>;
+
+class Initializer {
+public:
+    Initializer() = default;
+    virtual ~Initializer() = default;
+
+    /* *
+     * 生成随机数
+     * @Param emb embedding的首地址
+     */
+    virtual void GenerateData(float* emb, int embSize) = 0;
+    uint32_t start = 0;     // 起始位置
+    uint32_t len = 0;       // 初始化的长度
+    float initParam = 1.0;  // 初始化器生成的初始值均需要乘以initParam
+};
+
+enum class InitializerType {
+    INVALID,
+    CONSTANT,
+    TRUNCATED_NORMAL,
+    RANDOM_NORMAL
+};
+
+struct ConstantInitializerInfo {
+    ConstantInitializerInfo() = default;
+
+    ConstantInitializerInfo(float constantValue, float initK);
+
+    float constantValue = 0;  // 常量值
+    float initK = 1.0;        // 初始化出来的值需乘以initK
+};
+
+struct NormalInitializerInfo {
+    NormalInitializerInfo() = default;
+
+    NormalInitializerInfo(float mean, float stddev, uint32_t seed, float initK);
+
+    float mean = 0;     // 平均值
+    float stddev = 0;   // 标准差
+    uint32_t seed = 0;  // 随机数种子
+    float initK = 1.0;  // 初始化出来的值需乘以initK
+};
+
+class ConstantInitializer : public Initializer {
+public:
+    ConstantInitializer() = default;
+
+    ConstantInitializer(uint32_t start, uint32_t len, float value, float initK);
+
+    ~ConstantInitializer() override = default;
+
+    void GenerateData(float* emb, int embSize) override;
+
+    uint32_t start = 0;       // 起始位置
+    uint32_t len = 0;         // 初始化的长度
+    float constantValue = 0;  // 常量值
+};
+
+class RandomNormalInitializer : public Initializer {
+public:
+    RandomNormalInitializer() = default;
+    RandomNormalInitializer(uint32_t start, uint32_t len, NormalInitializerInfo& initInfo);
+
+    ~RandomNormalInitializer() override = default;
+
+    void GenerateData(float* emb, int embSize) override;
+
+    uint32_t start = 0;  // 起始位置
+    uint32_t len = 0;    // 初始化的长度
+    float mean = 0;      // 平均值
+    float stddev = 0;    // 标准差
+    uint32_t seed = 0;   // 随机数种子
+
+    std::default_random_engine generator;          // 随机数生成器
+    std::normal_distribution<float> distribution;  // 正态分布
+};
+
+class TruncatedNormalInitializer : public Initializer {
+public:
+    TruncatedNormalInitializer() = default;
+
+    TruncatedNormalInitializer(uint32_t start, uint32_t len, NormalInitializerInfo& initInfo);
+
+    ~TruncatedNormalInitializer() override = default;
+
+    void GenerateData(float* emb, int embSize) override;
+
+    int boundNum = 2;
+
+    uint32_t start = 0;  // 起始位置
+    uint32_t len = 0;    // 初始化的长度
+    float mean = 0;      // 平均值
+    float stddev = 0;    // 标准差
+    uint32_t seed = 0;   // 随机数种子
+
+    std::default_random_engine generator;  // 随机数生成器
+    std::normal_distribution<float> distribution;
+    float minBound = 0;  // 下界
+    float maxBound = 0;  // 上界
+};
+
+struct InitializerInfo {
+    InitializerInfo() = default;
+
+    InitializerInfo(std::string& name, uint32_t start, uint32_t len, ConstantInitializerInfo constantInitializerInfo);
+
+    InitializerInfo(std::string& name, uint32_t start, uint32_t len, NormalInitializerInfo normalInitializerInfo);
+
+    std::string name = "";  // 初始化器的名称
+    uint32_t start = 0;     // 初始化开始的位置
+    uint32_t len = 0;       // 待初始化的长度
+    InitializerType initializerType = InitializerType::INVALID;
+
+    ConstantInitializerInfo constantInitializerInfo;
+    NormalInitializerInfo normalInitializerInfo;
+
+    std::shared_ptr<Initializer> initializer;
+};
+
+struct EmbCacheInfo {
+    EmbCacheInfo(std::string tableName, uint32_t vocabSize, uint32_t embeddingSize, uint32_t extEmbeddingSize,
+                 uint32_t maxCacheSize)
+        : tableName(tableName),
+          vocabSize(vocabSize),
+          embeddingSize(embeddingSize),
+          extEmbeddingSize(extEmbeddingSize),
+          maxCacheSize(maxCacheSize)
+    {
+    }
+    std::string tableName = "";
+    uint32_t vocabSize = 0;  // host侧的容量(能存多少条embedding)
+    uint32_t embeddingSize = 0;
+    uint32_t extEmbeddingSize = 0;  // 包含embedding和优化器信息的embedding长度
+    uint32_t maxCacheSize = 0;      // device侧的容量(能存多少条embedding)
+};
+
+class EmbCacheManager {
+public:
+    virtual ~EmbCacheManager() = default;
+
+    /* *
+     * 对当前embInfo对应的table在cache_manager中进行table初始化
+     * @Param EmbCacheInfo: embedding cache的初始化信息
+     * @Param std::vector<InitializerInfo> 初始化器的信息
+     * @Param uint64_t prefillBufferSize emb内存池恒定可用大小
+     * @Param uint32_t refillThreadNum emb内存池自动填充线程数
+     * @Return errorCode
+     */
+    virtual int CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
+                                    const std::vector<InitializerInfo>& initializerInfos, int64_t invalidKey = -1,
+                                    uint64_t prefillBufferSize = 500000, uint32_t refillThreadNum = 1) = 0;
+
+    /* *
+     * 查找当前keys对应的offsets并将本不存在与offsetMapper中的keys插入到offsetMapper中并得到其偏移值offsets，
+     * 并且当offsetMapper可存放空间不足时，释放可swapOut的keys，获取当前需要被换入换出的keys和offsets的pair
+     * @Param tableName: 表名
+     * @Param keys: 当前batch所有unique的keys
+     * @Param swapInKoPair: 输出参数，需要换入的Key-offset pair
+     * @Param swapOutKoPair: 输出参数，需要换出的Key-offset pair
+     * @Return errorCode
+     */
+    virtual int GetSwapPairsAndKey2Offset(std::string tableName, std::vector<uint64_t>& keys,
+                                          KeyOffsetPair& swapInKoPair, KeyOffsetPair& swapOutKoPair) = 0;
+
+    /* *
+     * 查询Embedding
+     * @Param tableName: 表名
+     * @Param keys: 待查询的keys
+     * @Param embAddr: 申请出来存放embedding的空间首地址
+     * @Param threadNum: 线程数
+     * @Return errorCode
+     */
+    virtual int EmbeddingLookup(std::string tableName, const std::vector<uint64_t>& keys, float* embAddr,
+                                uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 查询Embedding的地址
+     * @Param tableName: 表名
+     * @Param keys: 待查询的keys
+     * @Param addrs: keys对应的申请出来存放embedding的空间首地址
+     * @Param threadNum: 线程数
+     * @Return errorCode
+     */
+    virtual int EmbeddingLookupAddrs(std::string tableName, const std::vector<uint64_t>& keys,
+                                     std::vector<float*>& addrs, uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 查询Embedding并且在查询完成之后删除embedding对应的key。如果多线程使用，严格保证传入的key线程间不会重复(unique
+     * key)，否则可能出现未定义结果
+     * @Param tableName: 表名
+     * @Param keys: 待查询的keys
+     * @Param embAddr: 申请出来存放embedding的空间首地址
+     * @Param threadNum: 线程数
+     * @Return errorCode
+     */
+    virtual int EmbeddingLookupAndRemove(std::string tableName, const std::vector<uint64_t>& keys, float* embAddr,
+                                         uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 更新Embedding
+     * @Param tableName: 表名
+     * @Param keys: 待更新的keys，用于查询出每个key在DDR上存放的地址
+     * @Param embAddr: 待更新到DDR上的embedding的首地址
+     * @Param threadNum: 线程数
+     * @Return errorCode
+     */
+    virtual int EmbeddingUpdate(std::string tableName, const std::vector<uint64_t>& keys, float* embAddr,
+                                uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 在EmbLocalTable中移除keys，并将存储其embedding的内存位置记为可复用
+     * @Param tableName: 表名
+     * @Param keys: 待移除的keys
+     * @Return errorCode
+     */
+    virtual int EmbeddingRemove(std::string tableName, const std::vector<uint64_t>& keys, uint32_t threadNum = 4) = 0;
+
+    /* *
+     * 将需要被淘汰的keys从offsetMapper的记录中移除，同时也在EmbLocalTable中移除，并将存储其embedding的内存位置记为可复用
+     * @Param tableName: 表名
+     * @Param keys: 待淘汰的keys
+     * @Return errorCode
+     */
+    virtual int RemoveEmbsByKeys(std::string tableName, const std::vector<uint64_t>& keys) = 0;
+
+    /* *
+     * 获取所有table names
+     * @Param allTableNames: 输出参数，用于存放所有的table names
+     * @Return errorCode
+     */
+    virtual int GetEmbTableNames(std::vector<std::string>& allTableNames) = 0;
+
+    /* *
+     * 获取以values为增序排列的当前记录在offsetMapper中所有的keys和values的pairs
+     * @Param tableName: 表名
+     * koVec: 输出参数
+     * @Return errorCode
+     */
+    virtual int ExportDeviceKeyOffsetPairs(std::string tableName,
+                                           std::vector<std::pair<uint64_t, uint64_t>>& koVec) = 0;
+
+    /* *
+     * 获取当前table的序列化信息
+     * @Param tableName: 要序列化的表
+     * @Param buffer: 输出参数，存储序列化之后的信息
+     * @Return errorCode
+     */
+    virtual int Serialize(std::string tableName, std::vector<char>& buffer) = 0;
+
+    /* *
+     * 将当前table的序列化信息进行反序列化
+     * @Param tableName: 要反序列化的表
+     * @Param buffer: 输入参数，将buffer中的内容进行反序列化
+     * @Return errorCode
+     */
+    virtual int Deserialize(std::string tableName, const std::vector<char>& buffer) = 0;
+
+    /* *
+     * 析构所有embCache，释放内存
+     */
+    virtual void Destroy() = 0;
+
+    /* *
+     * 查询表的使用量
+     * @Param tableName: 要查询的表
+     * @Return 当前表的使用量
+     */
+    virtual uint32_t GetUsage(const std::string& tableName) = 0;
+
+    /* *
+     * 获取当前host侧所存储的所有keys及其对应的embeddings和优化器参数
+     * @Param tableName: 需要获取信息的table名字
+     * @Param keys: 输入参数，输入空vector，获取的存储的所有keys会赋到该vector中
+     * @Param embeddings: 输入参数，输入空vector，获取的存储的所有embeddings会赋到该vector中
+     * @Param optimizerSlots: 输入参数，输入空vector，获取的存储的所有optimizerSlots会赋到该vector中
+     * @Return errorCode
+     */
+    virtual int GetEmbTableInfos(std::string tableName, std::vector<uint64_t>& keys,
+                                 std::vector<std::vector<float>>& embeddings,
+                                 std::vector<std::vector<float>>& optimizerSlots) = 0;
+
+    /* *
+     * 将所需存储的keys及其对应的embeddings和优化器参数传入，来装载LocalEmbeddingTable
+     * @Param tableName: 需要加载信息的table名字
+     * @Param keys: 输入参数，需要加载的所有keys
+     * @Param embeddings: 输入参数，需要加载的所有embeddings
+     * @Param optimizerSlots: 输入参数，需要加载的所有optimizerSlots
+     * @Return errorCode
+     */
+    virtual int LoadEmbTableInfos(std::string tableName, const std::vector<uint64_t>& keys,
+                                  const std::vector<std::vector<float>>& embeddings,
+                                  const std::vector<std::vector<float>>& optimizerSlots) = 0;
+};
+}  // namespace EmbCache
+
+#endif  // EMBEDDING_CACHE_H
diff --git a/src/core/ock_ctr_common/include/factory.h b/src/core/ock_ctr_common/include/factory.h
index 44a2fce0..ce701abe 100644
--- a/src/core/ock_ctr_common/include/factory.h
+++ b/src/core/ock_ctr_common/include/factory.h
@@ -17,16 +17,17 @@ See the License for the specific language governing permissions and
 #define UNIQUE_OCK_CTR_COMMON_H
 
 #include <cstdint>
-#include <string>
 #include <memory>
-#include "unique.h"
+#include <string>
 
+#include "embedding_cache.h"
+#include "unique.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-using ExternalLog = void (*)(int level, const char *msg);
+using ExternalLog = void (*)(int level, const char* msg);
 
 #ifdef __cplusplus
 }
@@ -40,26 +41,28 @@ class Factory;
 
 using FactoryPtr = std::shared_ptr<Factory>;
 using UniquePtr = std::shared_ptr<Unique>;
+using EmbCacheManagerPtr = std::shared_ptr<EmbCache::EmbCacheManager>;
 
 class Factory {
 public:
     virtual ~Factory() = default;
-    virtual int CreateUnique(UniquePtr &out) = 0;
+    virtual int CreateUnique(UniquePtr& out) = 0;
+    virtual int CreateEmbCacheManager(EmbCacheManagerPtr& out) = 0;
     virtual int SetExternalLogFuncInner(ExternalLog logFunc) = 0;
 
 public:
-    static int Create(FactoryPtr &out)
+    static int Create(FactoryPtr& out)
     {
         int result = 0;
         uintptr_t factory = 0;
         /* dynamic load function */
-        if ((result = OckCtrCommonDef::CreatFactory(&factory)) == 0) {
-            out.reset(reinterpret_cast<Factory *>(factory));
+        if ((result = OckCtrCommonDef::CreateFactory(&factory)) == 0) {
+            out.reset(reinterpret_cast<Factory*>(factory));
         }
         return result;
     }
 };
-}
-}
+}  // namespace ctr
+}  // namespace ock
 
-#endif // UNIQUE_OCK_CTR_COMMON_H
+#endif  // UNIQUE_OCK_CTR_COMMON_H
diff --git a/src/core/ock_ctr_common/include/ock_ctr_common_def.h b/src/core/ock_ctr_common/include/ock_ctr_common_def.h
index e8b3f0b5..537d7a39 100644
--- a/src/core/ock_ctr_common/include/ock_ctr_common_def.h
+++ b/src/core/ock_ctr_common/include/ock_ctr_common_def.h
@@ -20,15 +20,15 @@ See the License for the specific language governing permissions and
 #include <iostream>
 #include <mutex>
 
-using CTR_CREATE_FACTORY_FUNCTION = int (*)(uintptr_t *);
+using CTR_CREATE_FACTORY_FUNCTION = int (*)(uintptr_t*);
 
 namespace ock {
 namespace ctr {
 class OckCtrCommonDef {
 public:
-    static int CreatFactory(uintptr_t *factory)
+    static int CreateFactory(uintptr_t* factory)
     {
-        static void *handle = nullptr;
+        static void* handle = nullptr;
         static std::mutex m;
         std::unique_lock<std::mutex> lock(m);
         if (handle != nullptr) {
@@ -38,8 +38,8 @@ public:
 
         handle = dlopen(LIBRARY_NAME, RTLD_NOW);
         if (handle == nullptr) {
-            std::cout << "Failed to call dlopen to load library '" << LIBRARY_NAME << "', error " << dlerror() <<
-                std::endl;
+            std::cout << "Failed to call dlopen to load library '" << LIBRARY_NAME << "', error " << dlerror()
+                      << std::endl;
             return -1;
         }
 
@@ -55,9 +55,9 @@ public:
     }
 
 private:
-    constexpr static const char *LIBRARY_NAME = "lib_ock_ctr_common.so";
+    constexpr static const char* LIBRARY_NAME = "lib_ock_ctr_common.so";
 };
-}
-}
+}  // namespace ctr
+}  // namespace ock
 
-#endif // OCK_OCK_CTR_COMMON_DEF_H
+#endif  // OCK_OCK_CTR_COMMON_DEF_H
diff --git a/src/core/ock_ctr_common/include/unique.h b/src/core/ock_ctr_common/include/unique.h
index cb8960e7..5d11fe66 100644
--- a/src/core/ock_ctr_common/include/unique.h
+++ b/src/core/ock_ctr_common/include/unique.h
@@ -59,6 +59,7 @@ using UniqueConf = struct UniqueConfCTR {
     uint32_t maxThreadNum = 8;                  // 最大工作线程数
     int64_t maxIdVal = 0;                       // 最大id值
     bool trace = false;                         // 是否开启性能检测，需要配合外部日志输出
+    bool performance = false;                   // 是否开启增强接口，增强接口shardingNum必须是2的幂次方，默认用取模分桶
 } __attribute__((packed));
 
 using UniqueIn = struct UniqueInCTR {
diff --git a/src/core/ssd_cache/cache_manager.cpp b/src/core/ssd_cache/cache_manager.cpp
index 36be19d9..a82a65a7 100644
--- a/src/core/ssd_cache/cache_manager.cpp
+++ b/src/core/ssd_cache/cache_manager.cpp
@@ -18,277 +18,27 @@ See the License for the specific language governing permissions and
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include <utility>
 
 #include "utils/common.h"
 #include "utils/time_cost.h"
 
 using namespace MxRec;
 
-inline void CacheManager::GetExternalKeys(const absl::flat_hash_map<emb_key_t, int64_t> &keyOffsetMap,
-                                          vector<emb_key_t> &externalKeys, vector<emb_key_t> &internalKeys,
-                                          const vector<emb_key_t> &keys) const
+void CacheManager::Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo)
 {
-    for (const emb_key_t key : keys) {
-        if (keyOffsetMap.find(key) == keyOffsetMap.end()) {
-            externalKeys.emplace_back(key);
-        } else {
-            internalKeys.emplace_back(key);
-        }
-    }
-}
-
-void CacheManager::AddDebugAndTraceLog(size_t batchKeySize, vector<emb_key_t> &externalKeys,
-                                       vector<emb_key_t> &externalSSDKeys) const
-{
-    LOG_DEBUG("TransferDDREmbWithSSD: batchKeySize:{}, externalKeys size:{}, externalSSDKeys size:{}",
-        batchKeySize, externalKeys.size(), externalSSDKeys.size());
-    LOG_TRACE("TransferDDREmbWithSSD: externalKeys:{}, externalSSDKeys:{}",
-        VectorToString(externalKeys), VectorToString(externalSSDKeys));
-}
-
-/// 去重和过滤无效key
-/// \param originalKeys 原有keys
-/// \param keys 处理后的keys
-void CacheManager::HandleRepeatAndInvalidKey(const vector<emb_key_t>& originalKeys, vector<emb_key_t>& keys) const
-{
-    // 去重并保持原key的顺序 结果可测试
-    unordered_set<emb_key_t> keySet;
-    for (auto& key : originalKeys) {
-        if (key == INVALID_KEY_VALUE) {
-            continue;
-        }
-        if (keySet.find(key) == keySet.end()) {
-            keySet.emplace(key);
-            keys.emplace_back(key);
-        }
-    }
-}
-
-/// DDR与SSD数据转移，使DDR内剩余空间能放置当前批次key
-/// \param embTableName emb表名
-/// \param embHashMap emb表
-/// \param originalKeys 当前批次key
-/// \param channelId 通道id
-/// \return 转移结果枚举
-TransferRet CacheManager::TransferDDREmbWithSSD(TableInfo& table,
-                                                const vector<emb_key_t>& originalKeys, int channelId)
-{
-    vector<emb_key_t> keys; // 去重和删除无效key
-    HandleRepeatAndInvalidKey(originalKeys, keys);
-    // 区分HBM+DDR内key，和HBM+DDR外的key(新key或保存在SSD中的key)
-    vector<emb_key_t> externalKeys;
-    vector<emb_key_t> internalKeys;
-    GetExternalKeys(table.keyOffsetMap, externalKeys, internalKeys, keys);
-    if (externalKeys.empty()) { return TransferRet::TRANSFER_OK; }
-
-    // 判断剩余内存空间是否足够; 可用内存空间计算：HBM+DDR-已占用; 若是训练，再加DDR已淘汰;
-    // SSD仅与DDR交互，不考虑HBM淘汰位置；由于maxOffset比实际使用大1，所以虽然从0开始也不用再减1
-    size_t ddrAvailableSize = table.devVocabSize + table.hostVocabSize - table.maxOffset;
-    if (channelId == TRAIN_CHANNEL_ID) {
-        ddrAvailableSize += table.evictHostPos.size();
-    }
-    LOG_DEBUG("TransferDDREmbWithSSD, table:{}, maxOffset:{}, evictHostPos size:{}, ddrAvailableSize:{}",
-        table.name, table.maxOffset, table.evictHostPos.size(), ddrAvailableSize);
-    CreateSSDTableIfNotExist(table.name);
-
-    // 调用ssdEngine查询当前批次key中保存在SSD中的key
-    vector<emb_key_t> externalSSDKeys;
-    GetSSDKeys(table.name, externalKeys, externalSSDKeys);
-    // 后续判断maxOffset是否超出范围时，maxOffset=devVocabSize+hostVocabSize时可用,此处包含等于
-    bool isDDRSpaceEnough = ddrAvailableSize >= externalKeys.size();
-    bool ddrSpaceEnoughOrEval = channelId != TRAIN_CHANNEL_ID || isDDRSpaceEnough;
-    if (ddrSpaceEnoughOrEval && externalSSDKeys.empty()) {
-        // 部分场景后续不用处理，在此处返回
-        return TransferRet::TRANSFER_OK;
-    }
-
-    AddDebugAndTraceLog(keys.size(), externalKeys, externalSSDKeys);
-    /*
-     * 前面 externalSSDKeys = 0 ，评估场景的 ddr空间可用、不可用已返回； 训练的可用已返回；
-     * 剩下的情况如下：
-     * 评估：
-     *   externalSSDKeys > 0, 可用 & 不可用操作一样；
-     *     可选：Ddr->ssd, 腾出 externalSSDKeys 大小空间；
-     *     Ssd->ddr, 需要移动 externalSSDKeys ；
-     *   externalSSDKeys = 0  --已返回
-     * 训练：
-     *   externalSSDKeys > 0
-     *     可用：
-     *       可选：Ddr->ssd, 腾出 externalSSDKeys 大小空间；
-     *       Ssd->ddr, 需要移动 externalSSDKeys ；
-     *     不可用：
-     *       必选：Ddr->ssd, 腾出 externalKeys 大小空间；
-     *         需要计算ssd剩余空间：externalKeys - externalSSDKeys
-     *         (注: 当前策略均转移externalKeys)
-     *       Ssd->ddr, 需要移动 externalSSDKeys ；
-     *   externalSSDKeys = 0
-     *     可用： --已返回
-     *     不可用：
-     *       Ddr->ssd, 腾出 externalKeys 大小的空间；
-     *         需要计算ssd剩余空间： externalKeys
-     *  因cache每次只转移DDR最小空间，上述可选动作也需执行，避免SSD移入DDR时空间不足
-     */
-    // 训练场景检查SSD剩余空间 评估不考虑新key
-    if (channelId == TRAIN_CHANNEL_ID) {
-        size_t needSSDSize = externalKeys.size() - externalSSDKeys.size() - ddrAvailableSize;
-        const int64_t ssdAvailableSize = ssdEngine->GetTableAvailableSpace(table.name);
-        if (int64_t(needSSDSize) > ssdAvailableSize) {
-            LOG_ERROR("TransferDDREmbWithSSD: ssd available space is not enough to transfer DDR emb data. "
-                      "needSSDSize:{}, ssdAvailableSize:{}", needSSDSize, ssdAvailableSize);
-            return TransferRet::SSD_SPACE_NOT_ENOUGH;
-        }
-    }
-
-    // 从SSD获取emb数据并从SSD删除; 避免DDR->SSD时空间不够
-    vector<vector<float>> ssdEmbData;
-    if (!externalSSDKeys.empty()) {
-        ssdEmbData = ssdEngine->FetchEmbeddings(table.name, externalSSDKeys);
-        ssdEngine->DeleteEmbeddings(table.name, externalSSDKeys);
-    }
-
-    // 从ddr转移到ssd的key个数
-    size_t ddrSwapOutSizeTmp = ddrSpaceEnoughOrEval ? externalSSDKeys.size() : externalKeys.size();
-    auto ddrSwapOutSize = static_cast<int64_t>(ddrSwapOutSizeTmp - ddrAvailableSize);
-    LOG_DEBUG("TransferDDREmbWithSSD: ddrSwapOutSize:{}", ddrSwapOutSize);
-
-    /*
-     * 转移DDR中数据到SSD
-     */
-    // 记录要从DDR转移到SSD的key对应的offset(相对值，需减去devVocabSize)
-    vector<size_t> ddrTransferPos;
-    TransferRet ddr2SsdRet = TransferDDREmb2SSD(table, ddrSwapOutSize, internalKeys, ddrTransferPos);
-    if (ddr2SsdRet == TransferRet::DDR_SPACE_NOT_ENOUGH) {
-        ssdEngine->InsertEmbeddings(table.name, externalSSDKeys, ssdEmbData);
-        return ddr2SsdRet;
-    }
-
-    HandleDDRTransferPos(ddrTransferPos, externalSSDKeys, table);
-
-    /*
-     * 转移SSD中保存的当前批次key的emb数据到DDR
-     */
-    return TransferSSDEmb2DDR(table, externalSSDKeys, ddrTransferPos, ssdEmbData);
-}
-
-/// SSD数据转移到DDR中后刷新映射和频次信息
-/// \param embTableName emb表名
-/// \param embHashMap emb hash表
-/// \param externalSSDKeys 存储在SSD中的key列表
-/// \param ddrTransferPos
-void CacheManager::RefreshRelateInfoWithSSD2DDR(TableInfo& table,
-                                                vector<emb_key_t>& externalSSDKeys, vector<size_t>& ddrTransferPos)
-{
-    for (size_t i = 0; i < externalSSDKeys.size(); ++i) {
-        // 映射关系 ddrTransferPos是在ddrEmbHash中的位置，记录映射时需加上devVocabSize
-        auto& key = externalSSDKeys[i];
-        table.keyOffsetMap[key] = ddrTransferPos[i] + table.devVocabSize;
-        // 频次
-        ddrKeyFreqMap[table.name].PutWithInit(key, excludeDDRKeyCountMap[table.name][key]);
-        excludeDDRKeyCountMap[table.name].erase(key);
-    }
-}
-
-void CacheManager::GetDDREmbInfo(vector<emb_key_t>& keys, TableInfo& table,
-                                 vector<size_t>& ddrTransferPos, vector<vector<float>>& ddrEmbData) const
-{
-    // 根据offset 获取对应Emb数据
-    for (auto& key : keys) {
-        auto koCast = static_cast<size_t>(table.keyOffsetMap[key]);
-        ddrTransferPos.emplace_back(koCast - table.devVocabSize);
-    }
-
-    LOG_TRACE("DDR keys:{}", VectorToString(keys));
-    LOG_TRACE("DDR key positions:{}", VectorToString(ddrTransferPos));
-
-    ddrEmbData.resize(keys.size());
-    const auto& emb = hostEmbs->GetEmb(table.name);
-#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) shared(ddrTransferPos, emb, ddrEmbData)
-    for (size_t i = 0; i < ddrTransferPos.size(); ++i) {
-        auto& missingKeyPo = ddrTransferPos[i];
-        const auto& src = emb.embData[missingKeyPo];
-        ddrEmbData[i] = src;
-    }
-}
-
-/// 使用ssdEmbData更新DDR内emb数据
-/// \param embTableName emb表名
-/// \param ddrTransferPos 需要更新的DDR内的offset
-/// \param ssdEmbData SSD对应的emb数据
-void CacheManager::UpdateDDREmbInfo(const std::string& embTableName,
-                                    vector<size_t>& ddrTransferPos,
-                                    vector<vector<float>>& ssdEmbData) const
-{
-    auto& emb = hostEmbs->GetEmb(embTableName);
-    auto& embData = emb.embData;
-#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) shared(ddrTransferPos, embData, ssdEmbData)
-    for (size_t i = 0; i < ddrTransferPos.size(); ++i) {
-        embData[ddrTransferPos[i]] = ssdEmbData[i];
-    }
-}
-
-/// DDR_2_SSD场景数据刷新: 仅刷新映射和频次，ddr转移出去的offset信息后续统一处理
-/// \param embTableName emb表名
-/// \param embHashMap emb map
-/// \param ddrSwapOutKeys 从DDR中转移到SSD中key列表
-/// \param ddrSwapOutCounts 从DDR中转移到SSD中key频次数据
-void CacheManager::RefreshRelateInfoWithDDR2SSD(TableInfo& table,
-                                                vector<emb_key_t>& ddrSwapOutKeys,
-                                                vector<freq_num_t>& ddrSwapOutCounts)
-{
-    auto& excludeFreqMap = excludeDDRKeyCountMap[table.name];
-    for (size_t i = 0; i < ddrSwapOutKeys.size(); ++i) {
-        auto& key = ddrSwapOutKeys[i];
-        table.keyOffsetMap.erase(key);
-        excludeFreqMap[key] = ddrSwapOutCounts[i];
-    }
-}
-
-/// key从DDR移入、移出、HBM淘汰时刷新频次信息；仅刷新频次信息
-/// \param embTableName emb表名
-/// \param keys 操作的key集合
-/// \param type TransferType
-void CacheManager::RefreshFreqInfoCommon(const string& embTableName, vector<emb_key_t>& keys, TransferType type)
-{
-    if (type == TransferType::DDR_2_HBM) {
-        for (auto& key : keys) {
-            // 频次数据记录到 excludeDDRKeyCountMap,并删除ddrKeyFreqMap中频次数据
-            // 进入findOffset时记录的key次数 + ddr内key次数
-            auto tmpCount = excludeDDRKeyCountMap[embTableName][key];
-            excludeDDRKeyCountMap[embTableName][key] = ddrKeyFreqMap[embTableName].Get(key) + tmpCount;
-            ddrKeyFreqMap[embTableName].Pop(key);
-        }
-    } else if (type == TransferType::HBM_2_DDR) {
-        for (auto& key : keys) {
-            // excludeDDRKeyCountMap 中次数转移到 ddrKeyFreqMap, 并删除原记录
-            ddrKeyFreqMap[embTableName].PutWithInit(key, excludeDDRKeyCountMap[embTableName][key]);
-            excludeDDRKeyCountMap[embTableName].erase(key);
-        }
-    } else if (type == TransferType::DDR_2_EVICT) {
-        for (auto& key : keys) {
-            ddrKeyFreqMap[embTableName].Pop(key);
-        }
-    } else {
-        // TransferType::HBM_2_EVICT
-        for (auto& key : keys) {
-            excludeDDRKeyCountMap[embTableName].erase(key);
-        }
-    }
-}
-
-void CacheManager::Init(HostEmb* hostEmbPtr, vector<EmbInfo>& mgmtEmbInfo)
-{
-    this->hostEmbs = hostEmbPtr;
+    LOG_INFO("CacheManager Init method begin");
+    this->embCache = std::move(embCachePtr);
     for (auto& emb : mgmtEmbInfo) {
         EmbBaseInfo baseInfo {emb.ssdVocabSize, emb.ssdDataPath, false};
         embBaseInfos.emplace(emb.name, baseInfo);
-        ddrKeyFreqMap[emb.name];
-        excludeDDRKeyCountMap[emb.name];
+        preProcessMapper[emb.name].Initialize(emb.name, emb.hostVocabSize, emb.ssdVocabSize);
     }
     ssdEngine->Start();
-    LOG_INFO("CacheManager Init method end.");
+    LOG_INFO("CacheManager Init method end");
 }
 
-bool CacheManager::IsKeyInSSD(const string& embTableName, emb_key_t key)
+bool CacheManager::IsKeyInSSD(const string& embTableName, emb_cache_key_t key)
 {
     return ssdEngine->IsKeyExist(embTableName, key);
 }
@@ -296,16 +46,35 @@ bool CacheManager::IsKeyInSSD(const string& embTableName, emb_key_t key)
 /// 淘汰SSD中Emb信息
 /// \param embTableName emb表名
 /// \param keys 淘汰key列表
-void CacheManager::EvictSSDEmbedding(const string& embTableName, vector<emb_key_t>& keys)
+void CacheManager::EvictSSDEmbedding(const string& embTableName, const vector<emb_cache_key_t>& keys)
 {
     if (keys.empty()) {
         return;
     }
-    // 1 删除缓存中记录的key的次数 2 删除SSD中保存的Emb数据
-    for (auto& key : keys) {
-        excludeDDRKeyCountMap[embTableName].erase(key);
+
+    int keyStep = preProcessStep;
+    unordered_map<emb_cache_key_t, freq_num_t>& ssdMap = preProcessMapper[embTableName].excludeDDRKeyCountMap;
+    LFUCache& ddrLfu = preProcessMapper[embTableName].lfuCache;
+    std::vector<emb_cache_key_t> ssdKeysToBeDeleted;
+    // 1 删除缓存中记录的key的次数
+    for (auto &key: keys) {
+        auto it = ssdMap.find(key);
+        if (it != ssdMap.end()) {
+            ssdMap.erase(it);
+            ssdKeysToBeDeleted.emplace_back(key);
+        } else {
+            ddrLfu.Pop(key);
+        }
     }
-    ssdEngine->DeleteEmbeddings(embTableName, keys);
+
+    ssdEvictThreads.emplace_back([=]() mutable {
+        // 2 删除SSD中保存的Emb数据
+        std::unique_lock<std::mutex> lk(evictWaitMut);
+        evictWaitCond.wait(lk, [keyStep, this] {
+            return embeddingTaskStep == keyStep;
+        });
+        ssdEngine->DeleteEmbeddings(embTableName, ssdKeysToBeDeleted);
+    });
 }
 
 /// 放入key，新增/更新(次数+1)次数
@@ -324,116 +93,6 @@ void CacheManager::PutKey(const string& embTableName, const emb_key_t& key, Reco
     hashMap[key] = count;
 }
 
-/// DDR->SSD与SSD->DDR的key个数可能不一致，手动补齐/截取
-/// \param ddrTransferPos DDR->SSD的offset列表(hostEmb表内的偏移值)
-/// \param externalSSDKeys SSD->DDR的key列表
-/// \param embHashMap emb hash表
-void CacheManager::HandleDDRTransferPos(vector<size_t>& ddrTransferPos, vector<emb_key_t>& externalSSDKeys,
-                                        TableInfo& table)
-{
-    if (ddrTransferPos.size() == externalSSDKeys.size()) {
-        return;
-    }
-    LOG_DEBUG("TransferDDREmbWithSSD: operate length is not equal, will padding or clipping, "
-              "ddrTransferPos size:{}, externalSSDKeys size:{}",
-              ddrTransferPos.size(), externalSSDKeys.size());
-    // ddrTransferPos中是DDR内偏移位置，存入evictPos时，需加上devVocabSize;取出时需减去
-    if (ddrTransferPos.size() > externalSSDKeys.size()) {
-        while (ddrTransferPos.size() > externalSSDKeys.size()) {
-            auto evictHostPos = ddrTransferPos.back() + table.devVocabSize;
-            table.evictHostPos.emplace_back(static_cast<int64_t>(evictHostPos));
-            ddrTransferPos.pop_back();
-        }
-        return;
-    }
-    // 补齐offset
-    while (ddrTransferPos.size() < externalSSDKeys.size() && !table.evictHostPos.empty()) {
-        ddrTransferPos.emplace_back(table.evictHostPos.back() - table.devVocabSize);
-        table.evictHostPos.pop_back();
-    }
-    auto allSize = table.devVocabSize + table.hostVocabSize;
-    // 还不够继续使用maxOffset
-    while (ddrTransferPos.size() < externalSSDKeys.size() && table.maxOffset < allSize) {
-        auto nextPos = table.maxOffset++;
-        ddrTransferPos.emplace_back(nextPos - table.devVocabSize);
-    }
-    LOG_DEBUG("HandleDDRTransferPos: handle end, pos len:{}, keys len:{}",
-        ddrTransferPos.size(), externalSSDKeys.size());
-}
-
-void CacheManager::GetSSDKeys(const std::string& embTableName, vector<emb_key_t>& externalKeys,
-                              vector<emb_key_t>& externalSSDKeys)
-{
-    for (auto& key : externalKeys) {
-        if (ssdEngine->IsKeyExist(embTableName, key)) {
-            externalSSDKeys.emplace_back(key);
-        }
-    }
-}
-
-TransferRet CacheManager::TransferDDREmb2SSD(TableInfo& table,
-                                             int64_t ddrSwapOutSize,
-                                             const vector<emb_key_t>& keys, vector<size_t>& ddrTransferPos)
-{
-    if (ddrSwapOutSize <= 0) {
-        // 此时不需要转移数据
-        return TransferRet::TRANSFER_OK;
-    }
-
-    TimeCost ddr2SsdTc;
-    LOG_DEBUG("TransferDDREmbWithSSD: get ddr least freq keys, table:{}, ddrSwapOutSize:{}",
-              table.name, ddrSwapOutSize);
-    // 获取DDR中指定数量的最低频次key，并获取相应emb数据，执行DDR换出到SSD
-    vector<emb_key_t> ddrSwapOutKeys;
-    vector<freq_num_t> ddrSwapOutCounts;
-    ddrKeyFreqMap[table.name].GetAndDeleteLeastFreqKeyInfo(ddrSwapOutSize, keys, ddrSwapOutKeys, ddrSwapOutCounts);
-    if (static_cast<int64_t>(ddrSwapOutKeys.size()) != ddrSwapOutSize) {
-        auto keyTableSize = ddrKeyFreqMap[table.name].keyTable.size();
-        // 获取的最低频次key数量和预期不一致，DDR空间不足，不能放置当前批次数据
-        LOG_ERROR("TransferDDREmbWithSSD, table:{}, vector length is not equal, ddrSwapOutKeys size:{}, "
-                  "ddrSwapOutSize:{}, ddr lfu keyTable size:{}",
-                  table.name, ddrSwapOutKeys.size(), ddrSwapOutSize, keyTableSize);
-        RestoreLeastFreqInfo(table.name, ddrSwapOutKeys, ddrSwapOutCounts);
-        return TransferRet::DDR_SPACE_NOT_ENOUGH;
-    }
-    LOG_DEBUG("TransferDDREmbWithSSD: get DDR embeddings and save to SSD, table:{}, size:{}",
-              table.name, ddrSwapOutKeys.size());
-    // 获取DDR中emb数据
-    vector<vector<float>> ddrEmbData;
-    GetDDREmbInfo(ddrSwapOutKeys, table, ddrTransferPos, ddrEmbData);
-    // 调用SSDEngine接口，将DDR Emb数据保存到SSD
-    ssdEngine->InsertEmbeddings(table.name, ddrSwapOutKeys, ddrEmbData);
-
-    // 初始化DDR内被转移出去的位置
-    hostEmbs->EvictInitEmb(table.name, ddrTransferPos);
-
-    // 更新记录的DDR中key频次信息
-    RefreshRelateInfoWithDDR2SSD(table, ddrSwapOutKeys, ddrSwapOutCounts);
-    LOG_DEBUG("TransferDDREmbWithSSD: table:{}, ddr2SsdTc TimeCost(ms):{}", table.name, ddr2SsdTc.ElapsedMS());
-    return TransferRet::TRANSFER_OK;
-}
-
-TransferRet CacheManager::TransferSSDEmb2DDR(TableInfo& table,
-                                             vector<emb_key_t>& externalSSDKeys, vector<size_t>& ddrTransferPos,
-                                             vector<vector<float>>& ssdEmbData)
-{
-    if (externalSSDKeys.empty()) {
-        return TransferRet::TRANSFER_OK;
-    }
-    TimeCost ssd2DdrTc;
-    LOG_DEBUG("TransferDDREmbWithSSD: get SSD embeddings and save to DDR, size:{}", externalSSDKeys.size());
-    if (ddrTransferPos.size() != externalSSDKeys.size() || externalSSDKeys.size() != ssdEmbData.size()) {
-        LOG_ERROR("TransferDDREmbWithSSD, vector length is not equal, ddrTransferPos len:{}, externalSSDKeys len:{}, "
-            "ssdEmbData len:{}", ddrTransferPos.size(), externalSSDKeys.size(), ssdEmbData.size());
-        return TransferRet::TRANSFER_ERROR;
-    }
-    // 将SSD emb存储到DDR中 刷新频次信息
-    UpdateDDREmbInfo(table.name, ddrTransferPos, ssdEmbData);
-    RefreshRelateInfoWithSSD2DDR(table, externalSSDKeys, ddrTransferPos);
-    LOG_DEBUG("TransferDDREmbWithSSD: ssd2DdrTc TimeCost(ms):{}", ssd2DdrTc.ElapsedMS());
-    return TransferRet::TRANSFER_OK;
-}
-
 void CacheManager::CreateSSDTableIfNotExist(const std::string& embTableName)
 {
     if (embBaseInfos[embTableName].isExist) {
@@ -451,18 +110,11 @@ void CacheManager::CreateSSDTableIfNotExist(const std::string& embTableName)
     LOG_INFO("ssd table is exist, embTableName:" + embTableName);
 }
 
-void CacheManager::RestoreLeastFreqInfo(const std::string& embTableName, vector<emb_key_t>& ddrSwapOutKeys,
-                                        vector<freq_num_t>& ddrSwapOutCounts)
-{
-    auto& lfuCache = ddrKeyFreqMap[embTableName];
-    for (size_t i = 0; i < ddrSwapOutKeys.size(); ++i) {
-        lfuCache.PutWithInit(ddrSwapOutKeys[i], ddrSwapOutCounts[i]);
-    }
-}
-
 CacheManager::~CacheManager()
 {
-    hostEmbs = nullptr;
+    for (auto &t : ssdEvictThreads) {
+        t.join();
+    }
     ssdEngine->Stop();
     ddrKeyFreqMap.clear();
     excludeDDRKeyCountMap.clear();
@@ -472,34 +124,9 @@ CacheManager::~CacheManager()
 /// \param ddrFreqInitMap ddr内key频次数据
 /// \param excludeDdrFreqInitMap 非DDR key频次数据
 /// \param step 加载SSDEngine传入步数
-void CacheManager::Load(unordered_map<std::string, unordered_map<emb_key_t, freq_num_t>>& ddrFreqInitMap,
-                        unordered_map<std::string, unordered_map<emb_key_t, freq_num_t>>& excludeDdrFreqInitMap,
-                        int step, int rankSize, int rankId)
+void CacheManager::Load(const std::vector<EmbInfo> &mgmtEmbInfo, int step,
+                        map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
-    if (rankSize <= 0) {
-        throw runtime_error("rank size must > 0");
-    }
-    // 加载CacheManager数据
-    for (auto& it : ddrFreqInitMap) {
-        auto& embTableName = it.first;
-        auto& freqMap = it.second;
-        for (auto& freqIt : freqMap) {
-            if (freqIt.first % rankSize != rankId) {
-                continue;
-            }
-            ddrKeyFreqMap[embTableName].PutWithInit(freqIt.first, freqIt.second);
-        }
-    }
-    for (auto& it : excludeDdrFreqInitMap) {
-        auto& embTableName = it.first;
-        auto& freqMap = it.second;
-        for (auto& freqIt : freqMap) {
-            if (freqIt.first % rankSize != rankId) {
-                continue;
-            }
-            excludeDDRKeyCountMap[embTableName].emplace(freqIt.first, freqIt.second);
-        }
-    }
     // 加载SSDEngine数据
 #ifndef GTEST
     for (auto& it : embBaseInfos) {
@@ -507,6 +134,28 @@ void CacheManager::Load(unordered_map<std::string, unordered_map<emb_key_t, freq
         EmbBaseInfo& embBase = it.second;
         ssdEngine->Load(embTableName, embBase.savePath, embBase.maxTableSize, step);
     }
+    auto tableKeysVec = ssdEngine->ExportTableKey();
+    for (auto &it: tableKeysVec) {
+        auto &embTableName = it.first;
+        auto &keys = it.second;
+        for (auto key: keys) {
+            preProcessMapper[embTableName].excludeDDRKeyCountMap[key] = 1;
+            trainKeySet[embTableName].insert(key);
+        }
+    }
+    for (const auto &embInfo: mgmtEmbInfo) {
+        const std::string &tableName = embInfo.name;
+        std::vector<char> buffer;
+        int rc = embCache->Serialize(tableName, buffer);
+        if (rc != 0) {
+            throw std::runtime_error("Serialize failed!");
+        }
+        uint64_t memSize = sizeof(uint64_t) + embInfo.extEmbeddingSize * sizeof(float);
+        for (uint64_t i = 0; i < buffer.size(); i += memSize) {
+            uint64_t key = *reinterpret_cast<uint64_t *>(&buffer[i]);
+            preProcessMapper[tableName].lfuCache.Put(key);
+        }
+    }
 #endif
 }
 
@@ -525,3 +174,114 @@ int64_t CacheManager::GetTableEmbeddingSize(const string& tableName)
     return ssdEngine->GetTableEmbeddingSize(tableName);
 }
 
+void CacheManager::ProcessSwapOutKeys(const string& tableName, const vector<emb_cache_key_t>& swapOutKeys,
+                                      SwapOutInfo& info)
+{
+    auto& swapOutDDRKeys = info.swapOutDDRKeys;
+    auto& swapOutDDRAddrOffs = info.swapOutDDRAddrOffs;
+    auto& swapOutSSDKeys = info.swapOutSSDKeys;
+    auto& swapOutSSDAddrOffs = info.swapOutSSDAddrOffs;
+
+    // 处理一下没见过的key，看是更新到DDR还是SSD中
+    auto& keyMapper = preProcessMapper[tableName];
+    size_t availableDDRSize = keyMapper.DDRAvailableSize();
+    for (size_t i = 0; i < swapOutKeys.size(); ++i) {
+        emb_cache_key_t key = swapOutKeys[i];
+        if (keyMapper.IsDDRKeyExist(key)) {
+            keyMapper.lfuCache.Put(key);
+            swapOutDDRKeys.push_back(key);
+            swapOutDDRAddrOffs.push_back(i);
+        } else if (keyMapper.IsSSDKeyExist(key)) {
+            keyMapper.excludeDDRKeyCountMap[key]++;
+            swapOutSSDKeys.push_back(key);
+            swapOutSSDAddrOffs.push_back(i);
+        } else if (availableDDRSize > 0) {
+            keyMapper.InsertDDRKey(key);
+            swapOutDDRKeys.push_back(key);
+            swapOutDDRAddrOffs.push_back(i);
+            availableDDRSize--;
+        } else {
+            keyMapper.InsertSSDKey(key);
+            swapOutSSDKeys.push_back(key);
+            swapOutSSDAddrOffs.push_back(i);
+        }
+    }
+}
+
+void CacheManager::ProcessSwapInKeys(const string& tableName, const vector<emb_cache_key_t>& swapInKeys,
+                                     vector<emb_cache_key_t>& DDRToSSDKeys, vector<emb_cache_key_t>& SSDToDDRKeys)
+{
+    auto& keyMapper = preProcessMapper[tableName];
+    size_t externalDDRSize = 0;
+    std::vector<emb_cache_key_t> firstSeenKeys;
+    for (emb_cache_key_t key : swapInKeys) {
+        if (keyMapper.IsDDRKeyExist(key)) {
+            continue;
+        }
+        externalDDRSize++;
+        if (keyMapper.IsSSDKeyExist(key)) {
+            SSDToDDRKeys.push_back(key);
+        } else {
+            firstSeenKeys.push_back(key);
+        }
+    }
+
+    auto ddrAvailableSize = keyMapper.DDRAvailableSize();
+    if (externalDDRSize > ddrAvailableSize) {  // 需要DDR--->SSD
+        size_t transNum = externalDDRSize - ddrAvailableSize;
+
+        if (transNum > keyMapper.SSDAvailableSize()) {
+            throw invalid_argument("SSD table size too small, key quantity exceed while transferring DDR data to SSD");
+        }
+        // DDR--->SSD
+        keyMapper.GetAndDeleteLeastFreqDDRKey2SSD(transNum, swapInKeys, DDRToSSDKeys);
+    }
+
+    // SSD--->DDR
+    for (uint64_t key : SSDToDDRKeys) {
+        keyMapper.InsertDDRKey(key);
+        keyMapper.RemoveSSDKey(key);
+    }
+    for (uint64_t key : firstSeenKeys) {
+        keyMapper.InsertDDRKey(key);
+    }
+    preProcessStep++;
+}
+
+void CacheManager::UpdateSSDEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize,
+                                vector<emb_cache_key_t>& keys, const vector<uint64_t>& swapOutSSDddrOffs)
+{
+    vector<float*> embeddingsAddr(keys.size());
+    for (uint64_t i = 0; i < swapOutSSDddrOffs.size(); i++) {
+        embeddingsAddr[i] = embPtr + swapOutSSDddrOffs[i] * extEmbeddingSize;
+    }
+    ssdEngine->InsertEmbeddingsByAddr(tableName, keys, embeddingsAddr, extEmbeddingSize);
+}
+
+void CacheManager::TransferDDR2SSD(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                                   vector<float*>& addrs)
+{
+    CreateSSDTableIfNotExist(tableName);
+    ssdEngine->InsertEmbeddingsByAddr(tableName, keys, addrs, extEmbeddingSize);
+    for (auto addr : addrs) {
+        free(addr);
+        addr = nullptr;
+    }
+}
+
+void CacheManager::FetchSSDEmb2DDR(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                                   const vector<float*>& addrs)
+{
+    auto embeddings = ssdEngine->FetchEmbeddings(tableName, keys);
+    for (uint64_t i = 0; i < embeddings.size(); i++) {
+        int rc = memcpy_s(addrs[i], extEmbeddingSize * sizeof(float), embeddings[i].data(),
+                          extEmbeddingSize * sizeof(float));
+        if (rc != 0) {
+            throw runtime_error("memcpy_s failed, rc: " + to_string(rc));
+        }
+    }
+    ssdEngine->DeleteEmbeddings(tableName, keys);
+
+    embeddingTaskStep++;
+    evictWaitCond.notify_all();
+}
diff --git a/src/core/ssd_cache/cache_manager.h b/src/core/ssd_cache/cache_manager.h
index e750626d..89ed61d7 100644
--- a/src/core/ssd_cache/cache_manager.h
+++ b/src/core/ssd_cache/cache_manager.h
@@ -23,10 +23,11 @@ See the License for the specific language governing permissions and
 #include <unordered_set>
 
 #include "hd_transfer/hd_transfer.h"
-#include "host_emb/host_emb.h"
 #include "lfu_cache.h"
 #include "ssd_engine/ssd_engine.h"
 #include "utils/common.h"
+#include "preprocess_mapper.h"
+#include "ock_ctr_common/include/factory.h"
 
 namespace MxRec {
 
@@ -36,8 +37,13 @@ namespace MxRec {
         size_t devVocabSize;
         size_t& maxOffset;
         absl::flat_hash_map<emb_key_t, int64_t>& keyOffsetMap;
-        std::vector<int64_t>& evictDevPos;     // 记录HBM内被淘汰的key
-        std::vector<int64_t>& evictHostPos; // 记录Host内淘汰列表
+    };
+
+    struct SwapOutInfo {
+        vector<emb_cache_key_t> swapOutDDRKeys;
+        vector<emb_cache_key_t> swapOutDDRAddrOffs;
+        vector<emb_cache_key_t> swapOutSSDKeys;
+        vector<emb_cache_key_t> swapOutSSDAddrOffs;
     };
 
     enum class TransferRet {
@@ -67,34 +73,48 @@ namespace MxRec {
 
         ~CacheManager();
 
-        void Init(HostEmb* hostEmbPtr, vector<EmbInfo>& mgmtEmbInfo);
+        void Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo);
 
-        void Load(unordered_map<std::string, unordered_map<emb_key_t, freq_num_t>>& ddrFreqInitMap,
-                  unordered_map<std::string, unordered_map<emb_key_t, freq_num_t>>& excludeDdrFreqInitMap,
-                  int step, int rankSize, int rankId);
+        void Load(const std::vector<EmbInfo>& mgmtEmbInfo, int step,
+                  map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
         void SaveSSDEngine(int step);
 
-        // 转换DDR和SSD数据
-        TransferRet TransferDDREmbWithSSD(TableInfo& table,
-                                          const vector<emb_key_t>& originalKeys, int channelId);
+        bool IsKeyInSSD(const string& embTableName, emb_cache_key_t key);
+
+        void EvictSSDEmbedding(const string& embTableName, const vector<emb_cache_key_t>& keys);
 
-        /* HBM与DDR换入换出时刷新频次信息 */
-        void RefreshFreqInfoCommon(const string& embTableName, vector<emb_key_t>& keys,
-                                   TransferType type);
+        void PutKey(const string& embTableName, const emb_key_t& key, RecordType type);
 
-        bool IsKeyInSSD(const string& embTableName, emb_key_t key);
+        void ProcessSwapOutKeys(const string& tableName, const vector<emb_cache_key_t>& swapOutKeys,
+                                SwapOutInfo& info);
 
-        void EvictSSDEmbedding(const string& embTableName, vector<emb_key_t>& keys);
+        void ProcessSwapInKeys(const string& tableName, const vector<emb_cache_key_t>& swapInKeys,
+                               vector<emb_cache_key_t>& DDRToSSDKeys, vector<emb_cache_key_t>& SSDToDDRKeys);
 
-        void PutKey(const string& embTableName, const emb_key_t& key, RecordType type);
+        void UpdateSSDEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                          const vector<uint64_t>& swapOutSSDAddrOffs);
+
+        void TransferDDR2SSD(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                             vector<float*>& addrs);
+
+        void FetchSSDEmb2DDR(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                             const vector<float*>& addrs);
+
+        int64_t GetTableEmbeddingSize(const string& tableName);
 
         // DDR内每个表中emb数据频次缓存；map<embTableName, 频次缓存>
         unordered_map<std::string, LFUCache> ddrKeyFreqMap;
         // 每张表中非DDR内key的出现次数
-        unordered_map<std::string, unordered_map<emb_key_t, freq_num_t>> excludeDDRKeyCountMap;
+        unordered_map<std::string, unordered_map<emb_cache_key_t, freq_num_t>> excludeDDRKeyCountMap;
 
-        int64_t GetTableEmbeddingSize(const string& tableName);
+        // 每一个table对应一个PreProcessMapper，预先推演HBM->DDR的情况
+        std::unordered_map<std::string, PreProcessMapper> preProcessMapper;
+
+        int preProcessStep = 0;
+        int embeddingTaskStep = 0;
+        std::mutex evictWaitMut;
+        std::condition_variable evictWaitCond;
 
     private:
         struct EmbBaseInfo {
@@ -103,53 +123,14 @@ namespace MxRec {
             bool isExist;
         };
 
-        void GetDDREmbInfo(vector<emb_key_t>& keys,
-                           TableInfo& table,
-                           vector<size_t>& ddrTransferPos, vector<vector<float>>& ddrEmbData) const;
-
-        void UpdateDDREmbInfo(const std::string& embTableName,
-                              vector<size_t>& ddrTransferPos,
-                              vector<vector<float>>& ssdEmbData) const;
-
-        void RefreshRelateInfoWithDDR2SSD(TableInfo& table,
-                                          vector<emb_key_t>& ddrSwapOutKeys, vector<freq_num_t>& ddrSwapOutCounts);
-
-        void RefreshRelateInfoWithSSD2DDR(TableInfo& table,
-                                          vector<emb_key_t>& externalSSDKeys, vector<size_t>& ddrTransferPos);
-
-        void GetSSDKeys(const std::string& embTableName, vector<emb_key_t>& externalKeys,
-                        vector<emb_key_t>& externalSSDKeys);
-
-        TransferRet TransferDDREmb2SSD(TableInfo& table,
-                                       int64_t ddrSwapOutSize, const vector<emb_key_t>& keys,
-                                       vector<size_t>& ddrTransferPos);
-
-        TransferRet TransferSSDEmb2DDR(TableInfo& table,
-                                       vector<emb_key_t>& externalSSDKeys, vector<size_t>& ddrTransferPos,
-                                       vector<vector<float>>& ssdEmbData);
-
         void CreateSSDTableIfNotExist(const std::string& embTableName);
 
-        void RestoreLeastFreqInfo(const std::string& embTableName, vector<emb_key_t>& ddrSwapOutKeys,
-                                  vector<freq_num_t>& ddrSwapOutCounts);
-
-        static void HandleDDRTransferPos(vector<size_t>& ddrTransferPos, vector<emb_key_t>& externalSSDKeys,
-                                         TableInfo& table);
-
-        inline void GetExternalKeys(const absl::flat_hash_map<emb_key_t, int64_t> &keyOffsetMap,
-                                    vector<emb_key_t>& externalKeys,
-                                    vector<emb_key_t>& internalKeys, const vector<emb_key_t>& keys) const;
-
-        void AddDebugAndTraceLog(size_t batchKeySize, vector<emb_key_t>& externalKeys,
-                                 vector<emb_key_t>& externalSSDKeys) const;
-
-        void HandleRepeatAndInvalidKey(const vector<emb_key_t>& originalKeys, vector<emb_key_t>& keys) const;
-
         unordered_map<std::string, EmbBaseInfo> embBaseInfos;
 
     GTEST_PRIVATE:
         shared_ptr<SSDEngine> ssdEngine = std::make_shared<SSDEngine>();
-        HostEmb* hostEmbs {};
+        vector<std::thread> ssdEvictThreads;
+        ock::ctr::EmbCacheManagerPtr embCache {};
     };
 }
 
diff --git a/src/core/ssd_cache/lfu_cache.cpp b/src/core/ssd_cache/lfu_cache.cpp
index c204e336..c2d38bd2 100644
--- a/src/core/ssd_cache/lfu_cache.cpp
+++ b/src/core/ssd_cache/lfu_cache.cpp
@@ -25,7 +25,7 @@ using namespace MxRec;
 /// 仅获取当前key的频次，不增加频次；key不存在时返回-1
 /// \param key key
 /// \return key的频次
-freq_num_t LFUCache::Get(emb_key_t key)
+freq_num_t LFUCache::Get(emb_cache_key_t key)
 {
     auto it = keyTable.find(key);
     if (it == keyTable.end()) { return -1; }
@@ -37,13 +37,16 @@ freq_num_t LFUCache::Get(emb_key_t key)
 /// \param keys 要返回的最低频次key不能在该列表内
 /// \param ddrSwapOutKeys 记录最低频次key
 /// \param ddrSwapOutCounts 记录最低频次key对应次数
-void LFUCache::GetAndDeleteLeastFreqKeyInfo(int64_t num, const vector<emb_key_t>& keys,
-                                            vector<emb_key_t>& ddrSwapOutKeys, vector<freq_num_t>& ddrSwapOutCounts)
+void LFUCache::GetAndDeleteLeastFreqKeyInfo(uint64_t num, const vector<emb_cache_key_t>& keys,
+                                            vector<emb_cache_key_t>& ddrSwapOutKeys,
+                                            vector<freq_num_t>& ddrSwapOutCounts)
 {
     freq_num_t tempMinFreq = minFreq;
-    unordered_set<emb_key_t> retainedKeySet(keys.begin(), keys.end());
-    int64_t counter = 0;
+    unordered_set<emb_cache_key_t> retainedKeySet(keys.begin(), keys.end());
+    uint64_t counter = 0;
     const size_t freqSize = freqTable.size();
+    LOG_DEBUG("table:{}, num:{}, freqTable.size:{}, keys.size:{}, ddrSwapOutKeys.size:{}, ddrSwapOutCounts.size:{}",
+              name, num, freqTable.size(), keys.size(), ddrSwapOutKeys.size(), ddrSwapOutCounts.size());
     // 遍历freqTable<次数，keyList>时，次数可能不连续，要实际使用了1个keyList后才自增，手动增加计数器
     for (size_t i = 0; i < freqSize;) {
         auto nodesIter = freqTable.find(tempMinFreq);
@@ -53,7 +56,7 @@ void LFUCache::GetAndDeleteLeastFreqKeyInfo(int64_t num, const vector<emb_key_t>
         }
         auto nodeIt = freqTable[tempMinFreq].begin();
         while (nodeIt != freqTable[tempMinFreq].end() && !freqTable[tempMinFreq].empty() && counter < num) {
-            emb_key_t currentKey = nodeIt->key;
+            emb_cache_key_t currentKey = nodeIt->key;
             if (retainedKeySet.find(currentKey) != retainedKeySet.end()) {
                 // 当前key在指定的集合中，不满足
                 nodeIt++;
@@ -80,7 +83,7 @@ void LFUCache::GetAndDeleteLeastFreqKeyInfo(int64_t num, const vector<emb_key_t>
 
 /// 放入key，新增/更新(次数+1)次数
 /// \param key key
-void LFUCache::Put(emb_key_t key)
+void LFUCache::Put(emb_cache_key_t key)
 {
     auto it = keyTable.find(key);
     if (it == keyTable.end()) {
@@ -94,8 +97,10 @@ void LFUCache::Put(emb_key_t key)
     freqTable[freq].erase(node);
     if (freqTable[freq].empty()) {
         freqTable.erase(freq);
+        if (minFreq == freq) {
+            minFreq += 1;
+        }
     }
-    if (minFreq == freq) { minFreq += 1; }
     freqTable[freq + 1].emplace_front(key, freq + 1);
     keyTable[key] = freqTable[freq + 1].begin();
 }
@@ -103,7 +108,7 @@ void LFUCache::Put(emb_key_t key)
 /// 直接放入指定次数；用于初始化场景
 /// \param key key
 /// \param freq 频次
-void LFUCache::PutWithInit(emb_key_t key, freq_num_t freq)
+void LFUCache::PutWithInit(emb_cache_key_t key, freq_num_t freq)
 {
     if (keyTable.find(key) != keyTable.end()) {
         // 一般初始化时，key应该不存在已经被插入的情况；此处替换就的key频次信息
@@ -120,7 +125,7 @@ void LFUCache::PutWithInit(emb_key_t key, freq_num_t freq)
 }
 
 /// 删除指定key
-bool LFUCache::Pop(emb_key_t key)
+bool LFUCache::Pop(emb_cache_key_t key)
 {
     auto it = keyTable.find(key);
     if (it == keyTable.end()) {
@@ -139,15 +144,23 @@ bool LFUCache::Pop(emb_key_t key)
 
 /// 获取所有的key和次数信息
 /// \return 频次数据map<key, freq>
-std::unordered_map<emb_key_t, freq_num_t> LFUCache::GetFreqTable()
+std::unordered_map<emb_cache_key_t, freq_num_t> LFUCache::GetFreqTable()
 {
-    unordered_map<emb_key_t, freq_num_t> freqMap(keyTable.size());
+    unordered_map<emb_cache_key_t, freq_num_t> freqMap(keyTable.size());
     for (const auto& it :keyTable) {
         freqMap[it.first] = it.second->freq;
     }
     return freqMap;
 }
 
+LFUCache::LFUCache(const string& cacheName)
+{
+    name = cacheName;
+    minFreq = 0;
+    keyTable.clear();
+    freqTable.clear();
+}
+
 LFUCache::LFUCache()
 {
     minFreq = 0;
diff --git a/src/core/ssd_cache/lfu_cache.h b/src/core/ssd_cache/lfu_cache.h
index 247e490e..94fde539 100644
--- a/src/core/ssd_cache/lfu_cache.h
+++ b/src/core/ssd_cache/lfu_cache.h
@@ -31,10 +31,10 @@ namespace MxRec {
 
     // 记录key和次数信息
     struct LFUCacheNode {
-        emb_key_t key;
+        emb_cache_key_t key;
         freq_num_t freq;
 
-        LFUCacheNode(emb_key_t key, freq_num_t freq) : key(key), freq(freq)
+        LFUCacheNode(emb_cache_key_t key, freq_num_t freq) : key(key), freq(freq)
         {}
     };
 
@@ -42,25 +42,29 @@ namespace MxRec {
     public:
         LFUCache();
 
-        freq_num_t Get(emb_key_t key);
+        explicit LFUCache(const string& cacheName);
 
-        void GetAndDeleteLeastFreqKeyInfo(int64_t num, const vector<emb_key_t>& keys,
-                                          vector<emb_key_t>& ddrSwapOutKeys,
+        freq_num_t Get(emb_cache_key_t key);
+
+        void GetAndDeleteLeastFreqKeyInfo(uint64_t num, const vector<emb_cache_key_t>& keys,
+                                          vector<emb_cache_key_t>& ddrSwapOutKeys,
                                           vector<freq_num_t>& ddrSwapOutCounts);
 
-        void Put(emb_key_t key);
+        void Put(emb_cache_key_t key);
 
-        bool Pop(emb_key_t key);
+        bool Pop(emb_cache_key_t key);
 
-        void PutWithInit(emb_key_t key, freq_num_t freq);
+        void PutWithInit(emb_cache_key_t key, freq_num_t freq);
 
-        std::unordered_map<emb_key_t, freq_num_t> GetFreqTable();
+        std::unordered_map<emb_cache_key_t, freq_num_t> GetFreqTable();
         // 最小频次
         freq_num_t minFreq = 0;
         // 次数, 该次数对应的key列表(key, freq)
         std::unordered_map<freq_num_t, std::list<LFUCacheNode>> freqTable;
         // key, key所属node在freqTable的节点列表中的存储位置地址
-        std::unordered_map<emb_key_t, std::list<LFUCacheNode>::iterator> keyTable;
+        std::unordered_map<emb_cache_key_t, std::list<LFUCacheNode>::iterator> keyTable;
+    private:
+        string name;
     };
 }
 
diff --git a/src/core/ssd_cache/preprocess_mapper.h b/src/core/ssd_cache/preprocess_mapper.h
new file mode 100644
index 00000000..03860181
--- /dev/null
+++ b/src/core/ssd_cache/preprocess_mapper.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+ * Description: ssd cache module
+ * Author: MindX SDK
+ * Date: 2024/2/18
+ */
+
+#ifndef MXREC_DDR_PREPROCESS_MAPPER_H
+#define MXREC_DDR_PREPROCESS_MAPPER_H
+
+#include <stdexcept>
+#include "lfu_cache.h"
+
+namespace MxRec {
+    /*
+    * 专供keys处理的线程使用，每一个emb_local_table就有一个DDRPreProcessMapper
+    * MapperBase中的桶存储k-v对，在这里value统一赋值为0
+    */
+    class PreProcessMapper {
+    public:
+        void Initialize(const string& embName, uint32_t vocabSize, uint32_t ssdVocabSize)
+        {
+            tableName = embName;
+            lfuCache = LFUCache(embName);
+            ddrAvailableSize = vocabSize;
+            ssdAvailableSize = ssdVocabSize;
+        }
+
+        bool IsDDRKeyExist(uint64_t key)
+        {
+            return lfuCache.keyTable.find(key) != lfuCache.keyTable.end();
+        }
+
+        bool IsSSDKeyExist(uint64_t key)
+        {
+            return excludeDDRKeyCountMap.find(key) != excludeDDRKeyCountMap.end();
+        }
+
+        bool InsertDDRKey(uint64_t key)
+        {
+            if (IsDDRKeyExist(key)) {
+                throw std::invalid_argument("InsertDDRKey failed! key already exist");
+            }
+
+            freq_num_t freq = excludeDDRKeyCountMap[key] + 1;
+            lfuCache.PutWithInit(key, freq);
+            return true;
+        }
+
+        bool InsertSSDKey(uint64_t key)
+        {
+            if (IsSSDKeyExist(key)) {
+                throw std::invalid_argument("InsertSSDKey failed! key already exist");
+            }
+
+            excludeDDRKeyCountMap[key] = 1;
+            return true;
+        }
+
+        bool RemoveSSDKey(uint64_t key)
+        {
+            if (!IsSSDKeyExist(key)) {
+                throw std::invalid_argument("RemoveKey failed! key not exist");
+            }
+            excludeDDRKeyCountMap.erase(key);
+            return true;
+        }
+
+        size_t DDRAvailableSize()
+        {
+            if (ddrAvailableSize < lfuCache.keyTable.size()) {
+                throw std::invalid_argument("ddrAvailableSize < existKeys.size()");
+            }
+            return ddrAvailableSize - lfuCache.keyTable.size();
+        }
+
+        size_t SSDAvailableSize()
+        {
+            if (ssdAvailableSize < excludeDDRKeyCountMap.size()) {
+                throw std::invalid_argument("ssdAvailableSize < existKeys.size()");
+            }
+            return ssdAvailableSize - excludeDDRKeyCountMap.size();
+        }
+
+        void GetAndDeleteLeastFreqDDRKey2SSD(uint64_t transNum, const std::vector<uint64_t>& keys,
+                                             std::vector<uint64_t>& DDRSwapOutKeys)
+        {
+            LOG_DEBUG("start GetAndDeleteLeastFreqDDRKey2SSD, table:{}", tableName);
+            std::vector<freq_num_t> DDRSwapOutCounts;
+            lfuCache.GetAndDeleteLeastFreqKeyInfo(transNum, keys, DDRSwapOutKeys, DDRSwapOutCounts);
+            for (uint64_t i = 0; i < DDRSwapOutKeys.size(); i++) {
+                excludeDDRKeyCountMap[DDRSwapOutKeys[i]] = DDRSwapOutCounts[i];
+            }
+            if (DDRSwapOutCounts.size() != transNum) {
+                throw std::invalid_argument(
+                    "GetAndDeleteLeastFreqDDRKey2SSD failed! DDRSwapOutCounts.size()!=transNum");
+            }
+        }
+
+        string tableName;
+        uint64_t ddrAvailableSize = 0;
+        uint64_t ssdAvailableSize = 0;
+        LFUCache lfuCache;
+        std::unordered_map<uint64_t, freq_num_t> excludeDDRKeyCountMap;
+    };
+}
+
+#endif // MXREC_DDR_PREPROCESS_MAPPER_H
diff --git a/src/core/ssd_engine/file.cpp b/src/core/ssd_engine/file.cpp
index 83395f36..cc9ec206 100644
--- a/src/core/ssd_engine/file.cpp
+++ b/src/core/ssd_engine/file.cpp
@@ -24,7 +24,7 @@ using namespace MxRec;
 /// 创建新文件实例，包含元数据文件、数据文件
 /// \param fileID 文件ID
 /// \param fileDir 当前文件目录
-File::File(uint64_t fileID, string &fileDir) : fileID(fileID), fileDir(fileDir)
+File::File(uint64_t fileID, string& fileDir) : fileID(fileID), fileDir(fileDir)
 {
     LOG_DEBUG("start init file, fileID:{}", fileID);
 
@@ -75,7 +75,7 @@ File::File(uint64_t fileID, string &fileDir) : fileID(fileID), fileDir(fileDir)
 /// \param loadDir 加载文件的目录
 /// \param fileDir 当前文件目录
 /// \param step 加载的步数
-File::File(uint64_t fileID, string &fileDir, string &loadDir, int step) : fileID(fileID), fileDir(fileDir)
+File::File(uint64_t fileID, string& fileDir, string& loadDir, int step) : fileID(fileID), fileDir(fileDir)
 {
     LOG_DEBUG("start init file with load, fileID:{}", fileID);
 
@@ -141,13 +141,13 @@ File::~File()
     fs::remove(dataFilePath);
 }
 
-bool File::IsKeyExist(emb_key_t key)
+bool File::IsKeyExist(emb_cache_key_t key) const
 {
     auto it = keyToOffset.find(key);
     return !(it == keyToOffset.end());
 }
 
-void File::InsertEmbeddings(vector<emb_key_t> &keys, vector<vector<float>> &embeddings)
+void File::InsertEmbeddings(vector<emb_cache_key_t>& keys, vector<vector<float>>& embeddings)
 {
     if (keys.size() != embeddings.size()) {
         throw invalid_argument("keys' length not equal to embeddings' length");
@@ -178,10 +178,10 @@ void File::InsertEmbeddings(vector<emb_key_t> &keys, vector<vector<float>> &embe
     dataCnt += dLen;
 }
 
-vector<vector<float>> File::FetchEmbeddings(vector<emb_key_t> &keys)
+vector<vector<float>> File::FetchEmbeddings(vector<emb_cache_key_t>& keys)
 {
     vector<vector<float>> ret;
-    for (emb_key_t k: keys) {
+    for (emb_cache_key_t k: keys) {
         auto it = keyToOffset.find(k);
         if (it == keyToOffset.end()) {
             throw invalid_argument("key not exist");
@@ -208,7 +208,7 @@ vector<vector<float>> File::FetchEmbeddings(vector<emb_key_t> &keys)
     return ret;
 }
 
-void File::DeleteEmbedding(emb_key_t key)
+void File::DeleteEmbedding(emb_cache_key_t key)
 {
     if (!IsKeyExist(key)) {
         return;
@@ -217,7 +217,7 @@ void File::DeleteEmbedding(emb_key_t key)
     staleDataCnt += 1;
 }
 
-void File::Save(const string &saveDir, int step)
+void File::Save(const string& saveDir, int step)
 {
     LOG_DEBUG("start save file at step:{}, fileID:{}", step, fileID);
 
@@ -278,7 +278,7 @@ void File::Load()
 {
     // file already validate and open in instantiation
     LOG_DEBUG("start reading meta file, fileID:{}", fileID);
-    emb_key_t key;
+    emb_cache_key_t key;
     offset_t offset;
     do {
         localFileMeta.read(reinterpret_cast<char *>(&key), keyDataLen);
@@ -311,9 +311,9 @@ void File::Load()
     LOG_DEBUG("end reading meta file, fileID:{}", fileID);
 }
 
-vector<emb_key_t> File::GetKeys()
+vector<emb_cache_key_t> File::GetKeys()
 {
-    vector<emb_key_t> ret;
+    vector<emb_cache_key_t> ret;
     for (auto item: keyToOffset) {
         ret.push_back(item.first);
     }
@@ -334,3 +334,40 @@ uint64_t File::GetStaleDataCnt() const
 {
     return staleDataCnt;
 }
+
+void File::InsertEmbeddingsByAddr(vector<emb_cache_key_t>& keys, vector<float*>& embeddingsAddr,
+                                  uint64_t extEmbeddingSize)
+{
+    if (keys.size() != embeddingsAddr.size()) {
+        throw invalid_argument("keys' length not equal to embeddings' length");
+    }
+
+    size_t dLen = keys.size();
+    for (size_t i = 0; i < dLen; ++i) {
+        if (embeddingsAddr[i] == nullptr) {
+            throw invalid_argument("Null pointer found in embeddingsAddr");
+        }
+    }
+
+    localFileData.seekp(lastWriteOffset);  // always set pointer to buffer end in case reading happened before
+
+    for (size_t i = 0; i < dLen; ++i) {
+        if (IsKeyExist(keys[i])) {
+            staleDataCnt++;
+        }
+        keyToOffset[keys[i]] = lastWriteOffset;
+
+        if (extEmbeddingSize > maxEmbSize) {
+            throw invalid_argument("embedding size too large");
+        }
+        localFileData.write(reinterpret_cast<char const*>(&extEmbeddingSize), sizeof(extEmbeddingSize));
+        localFileData.write(reinterpret_cast<char const*>(embeddingsAddr[i]), extEmbeddingSize * sizeof(float));
+
+        auto pos = localFileData.tellp();
+        if (pos == -1) {
+            throw runtime_error("can't get file position pointer, write data failed");
+        }
+        lastWriteOffset = offset_t(pos);
+    }
+    dataCnt += dLen;
+}
diff --git a/src/core/ssd_engine/file.h b/src/core/ssd_engine/file.h
index 949859db..bc2b1fcb 100644
--- a/src/core/ssd_engine/file.h
+++ b/src/core/ssd_engine/file.h
@@ -33,30 +33,31 @@ namespace MxRec {
     using offset_t = uint32_t;
 
     class File {
-        static const uint64_t keyDataLen = sizeof(emb_key_t);
-        static const uint64_t offsetDataLen = sizeof(offset_t);
+        static constexpr uint64_t keyDataLen = sizeof(emb_cache_key_t);
+        static constexpr uint64_t offsetDataLen = sizeof(offset_t);
 
     public:
-        File(uint64_t fileID, string &fileDir);
+        File(uint64_t fileID, string& fileDir);
 
-        File(uint64_t fileID, string &fileDir, string &loadDir, int step); // initialize with loading specific step data
+        File(uint64_t fileID, string& fileDir, string& loadDir,
+             int step);  // initialize with loading specific step data
 
         File(const File&) = delete;
         File& operator=(const File&) = delete;
 
         ~File();
 
-        bool IsKeyExist(emb_key_t key);
+        bool IsKeyExist(emb_cache_key_t key) const;
 
-        void InsertEmbeddings(vector<emb_key_t> &keys, vector<vector<float>> &embeddings);
+        void InsertEmbeddings(vector<emb_cache_key_t>& keys, vector<vector<float>>& embeddings);
 
-        vector<vector<float>> FetchEmbeddings(vector<emb_key_t> &keys);
+        vector<vector<float>> FetchEmbeddings(vector<emb_cache_key_t>& keys);
 
-        void DeleteEmbedding(emb_key_t key);
+        void DeleteEmbedding(emb_cache_key_t key);
 
-        void Save(const string &saveDir, int step);
+        void Save(const string& saveDir, int step);
 
-        vector<emb_key_t> GetKeys();
+        vector<emb_cache_key_t> GetKeys();
 
         uint64_t GetDataCnt() const;
 
@@ -64,6 +65,9 @@ namespace MxRec {
 
         uint64_t GetStaleDataCnt() const;
 
+        void InsertEmbeddingsByAddr(vector<emb_cache_key_t>& keys, vector<float*>& embeddingsAddr,
+                                    uint64_t extEmbeddingSize);
+
     private:
         uint64_t fileID;  // init by constructor
         string fileDir;  // init by constructor
@@ -77,7 +81,7 @@ namespace MxRec {
 
         uint64_t dataCnt = 0;
         uint64_t staleDataCnt = 0;
-        unordered_map<emb_key_t, offset_t> keyToOffset{}; // offset_t >> maxDataNumInFile * embDataSize
+        unordered_map<emb_cache_key_t, offset_t> keyToOffset{}; // offset_t >> maxDataNumInFile * embDataSize
         offset_t lastWriteOffset = 0;
 
         void Load();
diff --git a/src/core/ssd_engine/ssd_engine.cpp b/src/core/ssd_engine/ssd_engine.cpp
index 65708792..bbf55e66 100644
--- a/src/core/ssd_engine/ssd_engine.cpp
+++ b/src/core/ssd_engine/ssd_engine.cpp
@@ -27,7 +27,7 @@ bool SSDEngine::IsTableExist(const string &tableName)
     return !(it == tableMap.end());
 }
 
-bool SSDEngine::IsKeyExist(const string &tableName, emb_key_t key)
+bool SSDEngine::IsKeyExist(const string &tableName, emb_cache_key_t key)
 {
     if (!isRunning) {
         throw runtime_error("SSDEngine not running");
@@ -54,7 +54,8 @@ void SSDEngine::CreateTable(const string &tableName, vector<string> savePaths, u
     tableMap[tableName] = make_shared<Table>(tableName, savePaths, maxTableSize, compactThreshold);
 }
 
-void SSDEngine::InsertEmbeddings(const string &tableName, vector<emb_key_t> &keys, vector<vector<float>> &embeddings)
+void SSDEngine::InsertEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys,
+                                 vector<vector<float>>& embeddings)
 {
     if (!isRunning) {
         throw runtime_error("SSDEngine not running");
@@ -71,7 +72,7 @@ void SSDEngine::InsertEmbeddings(const string &tableName, vector<emb_key_t> &key
     it->second->InsertEmbeddings(keys, embeddings);
 }
 
-void SSDEngine::DeleteEmbeddings(const string &tableName, vector<emb_key_t> &keys)
+void SSDEngine::DeleteEmbeddings(const string &tableName, vector<emb_cache_key_t> &keys)
 {
     if (!isRunning) {
         throw runtime_error("SSDEngine not running");
@@ -154,7 +155,7 @@ void SSDEngine::CompactMonitor()
     LOG_DEBUG("SSDEngine end CompactMonitor");
 }
 
-vector<vector<float>> SSDEngine::FetchEmbeddings(const string &tableName, vector<emb_key_t> &keys)
+vector<vector<float>> SSDEngine::FetchEmbeddings(const string &tableName, vector<emb_cache_key_t> &keys)
 {
     if (!isRunning) {
         throw runtime_error("SSDEngine not running");
@@ -209,3 +210,30 @@ int64_t SSDEngine::GetTableEmbeddingSize(const string &tableName)
     }
     return static_cast<int64_t>(it->second->GetTableUsage());
 }
+
+void SSDEngine::InsertEmbeddingsByAddr(const string& tableName, vector<emb_cache_key_t>& keys,
+                                       vector<float*>& embeddingsAddr, uint64_t extEmbeddingSize)
+{
+    if (!isRunning) {
+        throw runtime_error("SSDEngine not running");
+    }
+    auto it = as_const(tableMap).find(tableName);
+    if (it == tableMap.end()) {
+        throw invalid_argument("table not found");
+    }
+
+    if (keys.size() != embeddingsAddr.size()) {
+        throw invalid_argument("keys' length not equal to embeddings' length");
+    }
+
+    it->second->InsertEmbeddingsByAddr(keys, embeddingsAddr, extEmbeddingSize);
+}
+
+vector<pair<string, vector<emb_cache_key_t>>> SSDEngine::ExportTableKey()
+{
+    vector<pair<string, vector<emb_cache_key_t>>> tableKeysVec;
+    for (const auto& p : tableMap) {
+        tableKeysVec.emplace_back(p.first, p.second->ExportKeys());
+    }
+    return tableKeysVec;
+}
diff --git a/src/core/ssd_engine/ssd_engine.h b/src/core/ssd_engine/ssd_engine.h
index 10f89d57..538f76e2 100644
--- a/src/core/ssd_engine/ssd_engine.h
+++ b/src/core/ssd_engine/ssd_engine.h
@@ -31,17 +31,18 @@ namespace MxRec {
     public:
         bool IsTableExist(const string &tableName);
 
-        bool IsKeyExist(const string &tableName, emb_key_t key);
+        bool IsKeyExist(const string &tableName, emb_cache_key_t key);
 
         void CreateTable(const string &tableName, vector<string> savePaths, uint64_t maxTableSize);
 
         int64_t GetTableAvailableSpace(const string &tableName);
 
-        void InsertEmbeddings(const string &tableName, vector<emb_key_t> &keys, vector<vector<float>> &embeddings);
+        void InsertEmbeddings(const string &tableName, vector<emb_cache_key_t> &keys,
+                              vector<vector<float>> &embeddings);
 
-        void DeleteEmbeddings(const string &tableName, vector<emb_key_t> &keys);
+        void DeleteEmbeddings(const string &tableName, vector<emb_cache_key_t> &keys);
 
-        vector<vector<float>> FetchEmbeddings(const string &tableName, vector<emb_key_t> &keys);
+        vector<vector<float>> FetchEmbeddings(const string &tableName, vector<emb_cache_key_t> &keys);
 
         void Save(int step);
 
@@ -57,6 +58,11 @@ namespace MxRec {
 
         int64_t GetTableEmbeddingSize(const string& tableName);
 
+        void InsertEmbeddingsByAddr(const string &tableName, vector<emb_cache_key_t> &keys,
+                                    vector<float*> &embeddingsAddr, uint64_t extEmbeddingSize);
+
+        vector<std::pair<string, vector<emb_cache_key_t>>> ExportTableKey();
+
     private:
         bool isRunning = false;
 
diff --git a/src/core/ssd_engine/table.cpp b/src/core/ssd_engine/table.cpp
index c7ed5363..592cce0e 100644
--- a/src/core/ssd_engine/table.cpp
+++ b/src/core/ssd_engine/table.cpp
@@ -72,27 +72,27 @@ Table::Table(const string &name, vector<string> &saveDirs, uint64_t maxTableSize
     LOG_INFO("load table:{} done. try store at path:{}", name, curTablePath);
 }
 
-bool Table::IsKeyExist(emb_key_t key)
+bool Table::IsKeyExist(emb_cache_key_t key)
 {
     lock_guard<mutex> guard(rwLock);
     auto it = keyToFile.find(key);
     return !(it == keyToFile.end());
 }
 
-void Table::InsertEmbeddings(vector<emb_key_t> &keys, vector<vector<float>> &embeddings)
+void Table::InsertEmbeddings(vector<emb_cache_key_t> &keys, vector<vector<float>> &embeddings)
 {
     lock_guard<mutex> guard(rwLock);
     InsertEmbeddingsInner(keys, embeddings);
 }
 
-vector<vector<float>> Table::FetchEmbeddings(vector<emb_key_t> &keys)
+vector<vector<float>> Table::FetchEmbeddings(vector<emb_cache_key_t> &keys)
 {
     lock_guard<mutex> guard(rwLock);
     return FetchEmbeddingsInner(keys);
 }
 
 
-void Table::DeleteEmbeddings(vector<emb_key_t> &keys)
+void Table::DeleteEmbeddings(vector<emb_cache_key_t> &keys)
 {
     lock_guard<mutex> guard(rwLock);
     DeleteEmbeddingsInner(keys);
@@ -205,7 +205,7 @@ void Table::LoadDataFileSet(const shared_ptr<fstream> &metaFile, int step)
             throw invalid_argument("table size too small, key quantity exceed while loading data");
         }
 
-        for (emb_key_t k: keys) {
+        for (emb_cache_key_t k: keys) {
             if (keyToFile.find(k) != keyToFile.end()) {
                 throw invalid_argument(
                     "find duplicate key in files, compaction already done before saving, file may broken or modified");
@@ -267,7 +267,7 @@ void Table::Load(const string &metaFilePath, int step)
     LOG_INFO("table:{}, end load data file", name);
 }
 
-void Table::InsertEmbeddingsInner(vector<emb_key_t> &keys, vector<vector<float>> &embeddings)
+void Table::InsertEmbeddingsInner(vector<emb_cache_key_t> &keys, vector<vector<float>> &embeddings)
 {
     if (totalKeyCnt > maxTableSize) {
         throw invalid_argument("table size too small, key quantity exceed while loading data");
@@ -281,7 +281,7 @@ void Table::InsertEmbeddingsInner(vector<emb_key_t> &keys, vector<vector<float>>
         curMaxFileID++;
     }
 
-    for (emb_key_t k: keys) {
+    for (emb_cache_key_t k: keys) {
         auto it = keyToFile.find(k);
         if (it != keyToFile.end()) {
             it->second->DeleteEmbedding(k);
@@ -294,25 +294,25 @@ void Table::InsertEmbeddingsInner(vector<emb_key_t> &keys, vector<vector<float>>
     totalKeyCnt += keys.size();
 }
 
-vector<vector<float>> Table::FetchEmbeddingsInner(vector<emb_key_t> &keys)
+vector<vector<float>> Table::FetchEmbeddingsInner(vector<emb_cache_key_t> &keys)
 {
     // build mini batch for each file, first element for keys, second for index
     size_t dLen = keys.size();
-    unordered_map<shared_ptr<File>, shared_ptr<pair<vector<emb_key_t>, vector<size_t>>>> miniBatch;
+    unordered_map<shared_ptr<File>, shared_ptr<pair<vector<emb_cache_key_t>, vector<size_t>>>> miniBatch;
     for (size_t i = 0; i < dLen; ++i) {
         auto it = as_const(keyToFile).find(keys[i]);
         if (it == keyToFile.end()) {
             throw invalid_argument(StringFormat("failed to find the key, {key=%d} not exist!", keys[i]));
         }
         if (miniBatch[it->second] == nullptr) {
-            miniBatch[it->second] = make_shared<pair<vector<emb_key_t>, vector<size_t>>>();
+            miniBatch[it->second] = make_shared<pair<vector<emb_cache_key_t>, vector<size_t>>>();
         }
         miniBatch[it->second]->first.emplace_back(keys[i]);
         miniBatch[it->second]->second.emplace_back(i);
     }
 
     // must convert map to list to perform parallel query, omp not support to iterate map
-    vector<tuple<shared_ptr<File>, vector<emb_key_t>, vector<size_t>>> queryList;
+    vector<tuple<shared_ptr<File>, vector<emb_cache_key_t>, vector<size_t>>> queryList;
     queryList.reserve(miniBatch.size());
     for (auto [f, info]: miniBatch) {
         queryList.emplace_back(f, info->first, info->second);
@@ -368,7 +368,7 @@ void Table::Compact(bool fullCompact)
     for (const auto &f: compactFileList) {
         staleDataFileSet.erase(f);
         fileSet.erase(f);
-        vector<emb_key_t> validKeys = f->GetKeys();
+        vector<emb_cache_key_t> validKeys = f->GetKeys();
         vector<vector<float>> validEmbs = f->FetchEmbeddings(validKeys);
         InsertEmbeddingsInner(validKeys, validEmbs);
     }
@@ -381,9 +381,9 @@ uint64_t Table::GetTableAvailableSpace()
     return maxTableSize - totalKeyCnt;
 }
 
-void Table::DeleteEmbeddingsInner(vector<emb_key_t> &keys)
+void Table::DeleteEmbeddingsInner(vector<emb_cache_key_t> &keys)
 {
-    for (emb_key_t k: keys) {
+    for (emb_cache_key_t k: keys) {
         auto it = keyToFile.find(k);
         if (it != keyToFile.end()) {
             it->second->DeleteEmbedding(k);
@@ -441,3 +441,46 @@ void Table::CreateTableDir(const string &path)
     LOG_DEBUG("create table dir:{}", path);
 }
 
+void Table::InsertEmbeddingsByAddr(vector<emb_cache_key_t>& keys, vector<float*>& embeddingsAddr,
+                                   uint32_t extEmbeddingSize)
+{
+    lock_guard<mutex> guard(rwLock);
+    InsertEmbeddingsByAddrInner(keys, embeddingsAddr, extEmbeddingSize);
+}
+
+void Table::InsertEmbeddingsByAddrInner(vector<emb_cache_key_t>& keys, vector<float*>& embeddingsAddr,
+                                        uint64_t extEmbeddingSize)
+{
+    if (totalKeyCnt > maxTableSize) {
+        throw invalid_argument("table size too small, key quantity exceed while loading data");
+    }
+
+    if (curFile == nullptr || (curFile != nullptr && curFile->GetDataCnt() >= maxDataNumInFile)) {
+        SetTablePathToDiskWithSpace();
+        CreateTableDir(curTablePath);
+        curFile = make_shared<File>(curMaxFileID, curTablePath);
+        fileSet.insert(curFile);
+        curMaxFileID++;
+    }
+
+    for (emb_cache_key_t k : keys) {
+        auto it = keyToFile.find(k);
+        if (it != keyToFile.end()) {
+            it->second->DeleteEmbedding(k);
+            staleDataFileSet.insert(it->second);
+            totalKeyCnt -= 1;
+        }
+        keyToFile[k] = curFile;
+    }
+    curFile->InsertEmbeddingsByAddr(keys, embeddingsAddr, extEmbeddingSize);
+    totalKeyCnt += keys.size();
+}
+
+vector<emb_cache_key_t> Table::ExportKeys()
+{
+    vector<emb_cache_key_t> vec;
+    for (const auto& p : keyToFile) {
+        vec.push_back(p.first);
+    }
+    return vec;
+}
\ No newline at end of file
diff --git a/src/core/ssd_engine/table.h b/src/core/ssd_engine/table.h
index 87fa6f35..c34837dc 100644
--- a/src/core/ssd_engine/table.h
+++ b/src/core/ssd_engine/table.h
@@ -32,18 +32,18 @@ namespace MxRec {
 
     class Table {
     public:
-        Table(const string &name, vector<string> &savePaths, uint64_t maxTableSize, double compactThreshold);
+        Table(const string& name, vector<string>& savePaths, uint64_t maxTableSize, double compactThreshold);
 
         // initialize with loading specific step data
-        Table(const string &name, vector<string> &saveDirs, uint64_t maxTableSize, double compactThreshold, int step);
+        Table(const string& name, vector<string>& saveDirs, uint64_t maxTableSize, double compactThreshold, int step);
 
-        bool IsKeyExist(emb_key_t key);
+        bool IsKeyExist(emb_cache_key_t key);
 
-        void InsertEmbeddings(vector<emb_key_t> &keys, vector<vector<float>> &embeddings);
+        void InsertEmbeddings(vector<emb_cache_key_t>& keys, vector<vector<float>>& embeddings);
 
-        vector<vector<float>> FetchEmbeddings(vector<emb_key_t> &keys);
+        vector<vector<float>> FetchEmbeddings(vector<emb_cache_key_t>& keys);
 
-        void DeleteEmbeddings(vector<emb_key_t> &keys);
+        void DeleteEmbeddings(vector<emb_cache_key_t>& keys);
 
         void Save(int step);
 
@@ -53,26 +53,34 @@ namespace MxRec {
 
         uint64_t GetTableUsage();
 
+        void InsertEmbeddingsByAddr(vector<emb_cache_key_t>& keys, vector<float*>& embeddingsAddr,
+                                    uint32_t extEmbeddingSize);
+
+        vector<emb_cache_key_t> ExportKeys();
+
     private:
         static void CreateTableDir(const string& path);
 
         void Load(const string& metaFilePath, int step);
 
-        void InsertEmbeddingsInner(vector<emb_key_t> &keys, vector<vector<float>> &embeddings);
+        void InsertEmbeddingsInner(vector<emb_cache_key_t>& keys, vector<vector<float>>& embeddings);
 
-        void DeleteEmbeddingsInner(vector<emb_key_t> &keys);
+        void DeleteEmbeddingsInner(vector<emb_cache_key_t>& keys);
 
-        vector<vector<float>> FetchEmbeddingsInner(vector<emb_key_t> &keys);
+        vector<vector<float>> FetchEmbeddingsInner(vector<emb_cache_key_t>& keys);
 
         void LoadDataFileSet(const shared_ptr<fstream>& metaFile, int step);
 
         void SetTablePathToDiskWithSpace();
 
+        void InsertEmbeddingsByAddrInner(vector<emb_cache_key_t>& keys, vector<float*>& embeddingsAddr,
+                                         uint64_t extEmbeddingSize);
+
         string name;  // init by constructor
         vector<string> savePaths;  // init by constructor, support Save and Load from multiple path
         uint64_t maxTableSize;    // init by constructor, maximum key-value volume
         uint64_t totalKeyCnt = 0;
-        unordered_map<emb_key_t, shared_ptr<File>> keyToFile{}; // max mem cost 1.5G*2 for 100m keys
+        unordered_map<emb_cache_key_t, shared_ptr<File>> keyToFile{}; // max mem cost 1.5G*2 for 100m keys
         set<shared_ptr<File>> staleDataFileSet{};
         string curTablePath = "";
         uint32_t curSavePathIdx = 0;
diff --git a/src/core/utils/common.cpp b/src/core/utils/common.cpp
index abd50f56..d281162c 100644
--- a/src/core/utils/common.cpp
+++ b/src/core/utils/common.cpp
@@ -37,6 +37,8 @@ namespace MxRec {
     int GlogConfig::gGlogLevel;
     string GlogConfig::gRankId;
 
+    ock::ctr::FactoryPtr factory {};
+
     RankInfo::RankInfo(int rankId, int deviceId, int localRankSize, int option, const vector<int>& ctrlSteps)
         : rankId(rankId), deviceId(deviceId), localRankSize(localRankSize), option(option), ctrlSteps(ctrlSteps)
     {
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 4f1d076c..5bb93a41 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -35,6 +35,8 @@ See the License for the specific language governing permissions and
 #include "initializer/constant_initializer/constant_initializer.h"
 #include "initializer/truncated_normal_initializer/truncated_normal_initializer.h"
 #include "initializer/random_normal_initializer/random_normal_initializer.h"
+#include "ock_ctr_common/include/factory.h"
+#include "ock_ctr_common/include/embedding_cache.h"
 
 #if defined(BUILD_WITH_EASY_PROFILER)
     #include <easy/profiler.h>
@@ -53,6 +55,7 @@ namespace MxRec {
 #define MGMT_CPY_THREADS 4
 #define PROFILING
     using namespace tensorflow;
+    extern ock::ctr::FactoryPtr factory;
     constexpr int TRAIN_CHANNEL_ID = 0;
     constexpr int EVAL_CHANNEL_ID = 1;
 
@@ -65,6 +68,7 @@ namespace MxRec {
     constexpr size_t MAX_VOCABULARY_SIZE = 1e10;
     constexpr int SSD_SIZE_INDEX = 2;
     constexpr int MAX_FILE_NUM = 1000;
+    constexpr int EMBEDDING_THREAD_NUM = 2;
     // for GLOG
     struct GlogConfig {
         static bool gStatOn;
@@ -111,10 +115,13 @@ namespace MxRec {
     const string COMBINE_HISTORY_NAME = "combine_table_history";
 
     using emb_key_t = int64_t;
+    using emb_cache_key_t = uint64_t;
     using freq_num_t = int64_t;
     using EmbNameT= std::string;
     using KeysT = std::vector<emb_key_t>;
     using LookupKeyT = std::tuple<int, EmbNameT, KeysT>;             // batch_id quarry_lable keys_vector
+    using UinqueKeyT = std::tuple<int, EmbNameT, std::vector<uint64_t>>;
+    using RestoreVecSecT = std::tuple<int, EmbNameT, std::vector<int32_t>>;
     using TensorInfoT = std::tuple<int, EmbNameT, std::list<std::unique_ptr<std::vector<Tensor>>>::iterator>;
 
     namespace HybridOption {
@@ -228,12 +235,17 @@ namespace MxRec {
         int localRankSize {};
         bool useStatic { false };
         uint32_t option {};
-        int nBatch {};
         bool isDDR { false };
         bool isSSDEnabled { false };
         bool useDynamicExpansion {false};
         bool useSumSameIdGradients {true};
-        std::vector<int> ctrlSteps; // 包含三个步数: train_steps, eval_steps, save_steps
+        std::vector<int> ctrlSteps; // 包含4个步数: train_steps, eval_steps, save_steps, max_train_steps
+    };
+
+    struct EmbBaseInfo {
+        int batchId;
+        int channelId;
+        string name;
     };
 
     enum TensorIndex : uint32_t {
@@ -445,7 +457,7 @@ namespace MxRec {
 
         EmbInfo(const EmbInfoParams& embInfoParams,
                 std::vector<size_t> vocabsize,
-                std::vector<InitializeInfo> initializeInfos,
+                std::vector<EmbCache::InitializerInfo> initializeInfos,
                 std::vector<std::string> ssdDataPath)
             : name(embInfoParams.name),
               sendCount(embInfoParams.sendCount),
@@ -456,7 +468,7 @@ namespace MxRec {
               devVocabSize(vocabsize[0]),
               hostVocabSize(vocabsize[1]),
               ssdVocabSize(vocabsize[SSD_SIZE_INDEX]),
-              initializeInfos(initializeInfos),
+              initializeInfos(std::move(initializeInfos)),
               ssdDataPath(std::move(ssdDataPath))
         {
         }
@@ -470,7 +482,7 @@ namespace MxRec {
         size_t devVocabSize;
         size_t hostVocabSize;
         size_t ssdVocabSize;
-        std::vector<InitializeInfo> initializeInfos;
+        std::vector<EmbCache::InitializerInfo> initializeInfos;
         std::vector<std::string> ssdDataPath;
     };
 
@@ -479,45 +491,6 @@ namespace MxRec {
         std::vector<std::vector<float>> embData;
     };
 
-    struct EmbHashMapInfo {
-        absl::flat_hash_map<emb_key_t, int64_t> hostHashMap; // key在HBM中的偏移
-        std::vector<int> devOffset2Batch; // has -1
-        std::vector<emb_key_t> devOffset2Key;
-        size_t currentUpdatePos;
-        size_t currentUpdatePosStart;
-        size_t hostVocabSize;
-        size_t devVocabSize;
-        size_t freeSize;
-        std::vector<int32_t> lookUpVec;
-        std::vector<size_t> missingKeysHostPos; // 用于记录当前batch在host上需要换出的偏移
-        std::vector<size_t> swapPos; // 记录从HBM换出到DDR的offset
-        /*
-         * 取值范围：[0,devVocabSize+hostVocabSize);
-         * [0,devVocabSize-1]时存储在HBM, [devVocabSize,devVocabSize+hostVocabSize)存储在DDR
-         */
-        size_t maxOffset { 0 };
-        /*
-         * 记录DDR内淘汰列表，其值为相对HBM+DDR大表的；hostHashMap可直接使用；操作ddr内emb时需减掉devVocabSize
-         * 例如：HBM表大小20(offset:0~19)，DDR表大小为100（offset:0~99）；
-         * 若DDR内0位置被淘汰，记录到evictPos的值为0+20=20
-         */
-        std::vector<size_t> evictPos;
-        std::vector<size_t> evictDevPos; // 记录HBM内淘汰列表
-        size_t maxOffsetOld { 0 };
-        std::vector<size_t> evictPosChange;
-        std::vector<size_t> evictDevPosChange;
-        std::vector<std::pair<int, emb_key_t>> devOffset2KeyOld;
-        std::vector<std::pair<emb_key_t, emb_key_t>> oldSwap; // (old on dev, old on host)
-        /*
-         * HBM与DDR换入换出时,已存在于DDR且要转移到HBM的key(不包含新key); 用于SSD模式
-         * (区别于oldSwap: pair.second为已存在于DDR key + 换入换出前映射到DDR的新key)
-         */
-        std::vector<emb_key_t> ddr2HbmKeys;
-        void SetStartCount();
-
-        bool HasFree(size_t i) const;
-    };
-
     struct All2AllInfo {
         KeysT keyRecv;
         vector<int> scAll;
@@ -542,7 +515,6 @@ namespace MxRec {
     };
 
     using EmbMemT = absl::flat_hash_map<std::string, HostEmbTable>;
-    using EmbHashMemT = absl::flat_hash_map<std::string, EmbHashMapInfo>;
     using OffsetMemT = std::map<EmbNameT, size_t>;
     using KeyOffsetMemT = std::map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>>;
     using KeyCountMemT = std::map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>>;
@@ -551,7 +523,8 @@ namespace MxRec {
     using OffsetMapT = std::map<EmbNameT, std::vector<int64_t>>;
     using OffsetT = std::vector<int64_t>;
     using AllKeyOffsetMapT = std::map<std::string, std::map<int64_t, int64_t>>;
-    using KeyFreqMemT = unordered_map<std::string, unordered_map<emb_key_t, freq_num_t>>;
+    using KeyFreqMemT = unordered_map<std::string, unordered_map<emb_cache_key_t, freq_num_t>>;
+    using EmbLocalTableT = EmbCache::EmbCacheManager;
 
     enum class CkptFeatureType {
         HOST_EMB = 0,
@@ -561,12 +534,12 @@ namespace MxRec {
         FEAT_ADMIT_N_EVICT = 4,
         DDR_KEY_FREQ_MAP = 5,
         EXCLUDE_DDR_KEY_FREQ_MAP = 6,
-        KEY_COUNT_MAP = 7
+        KEY_COUNT_MAP = 7,
+        EMB_LOCAL_TABLE = 8
     };
 
     struct CkptData {
         EmbMemT* hostEmbs = nullptr;
-        EmbHashMemT embHashMaps;
         OffsetMemT maxOffset;
         KeyOffsetMemT keyOffsetMap;
         OffsetMapT offsetMap;
@@ -581,7 +554,6 @@ namespace MxRec {
     struct CkptTransData {
         std::vector<int64_t> int64Arr;
         std::vector<int64_t> addressArr;
-        std::vector<float*> floatArr;
         std::vector<int32_t> int32Arr;
         std::vector<trans_serialize_t> transDataset; // may all use this to transfer data
         std::vector<size_t> attribute; // may need to use other form for attributes
@@ -606,6 +578,33 @@ namespace MxRec {
         KEY_COUNT_MAP = 13
     };
 
+    enum CTRLogLevel {  // can't use enum class due to compatibility for AccCTR
+        DEBUG = 0,
+        INFO,
+        WARN,
+        ERROR,
+    };
+
+    static void CTRLog(int level, const char *msg)
+    {
+        switch (level) {
+            case CTRLogLevel::DEBUG:
+                LOG_DEBUG(msg);
+                break;
+            case CTRLogLevel::INFO:
+                LOG_INFO(msg);
+                break;
+            case CTRLogLevel::WARN:
+                LOG_WARN(msg);
+                break;
+            case CTRLogLevel::ERROR:
+                LOG_ERROR(msg);
+                break;
+            default:
+                break;
+        }
+    }
+
     ostream& operator<<(ostream& ss, MxRec::CkptDataType type);
     bool CheckFilePermission(const string& filePath);
 } // end namespace MxRec
diff --git a/src/core/utils/task_queue.h b/src/core/utils/task_queue.h
new file mode 100644
index 00000000..a42e5147
--- /dev/null
+++ b/src/core/utils/task_queue.h
@@ -0,0 +1,110 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef TASK_QUEUE_H
+#define TASK_QUEUE_H
+
+#include <utility>
+#include <atomic>
+#include <list>
+#include <condition_variable>
+
+namespace MxRec {
+    namespace Common {
+        template<class T>
+        class TaskQueue {
+        public:
+            TaskQueue() = default;
+
+            ~TaskQueue() = default;
+
+            TaskQueue(TaskQueue const &other)
+            {
+                std::lock_guard<std::mutex> lk(other.mut);
+                dataQueue = other.dataQueue;
+            }
+
+            TaskQueue &operator=(TaskQueue const &other)
+            {
+                if (this == &other) {
+                    return *this;
+                }
+                std::lock_guard<std::mutex> lk(other.mut);
+                dataQueue = other.dataQueue;
+                return *this;
+            }
+
+            void Pushv(T &t)
+            {
+                std::lock_guard<std::mutex> lk(mut);
+                dataQueue.push_back(std::move(t));
+                dataCond.notify_one();
+            }
+
+            void Pushv(T &&t)
+            {
+                std::lock_guard<std::mutex> lk(mut);
+                dataQueue.emplace_back(t);
+                dataCond.notify_one();
+            }
+
+            T WaitAndPop()
+            {
+                std::unique_lock<std::mutex> lk(mut);
+                dataCond.wait(lk, [this] {
+                    if (!finished) {
+                        return !dataQueue.empty();
+                    } else {
+                        return true;
+                    }
+                });
+                T res;
+                if (finished) {
+                    return std::move(res);
+                }
+                res = std::move(dataQueue.front());
+                dataQueue.pop_front();
+                return std::move(res);
+            }
+
+            void DestroyQueue()
+            {
+                finished = true;
+                dataCond.notify_one();
+            }
+
+            bool Empty() const
+            {
+                std::lock_guard<std::mutex> lk(mut);
+                return dataQueue.empty();
+            }
+
+            size_t Size() const
+            {
+                std::lock_guard<std::mutex> lk(mut);
+                return dataQueue.size();
+            }
+
+        private:
+            mutable std::mutex mut;
+            std::list<T> dataQueue;
+            std::condition_variable dataCond;
+            bool finished = false;
+        };
+    }
+}
+
+
+#endif
diff --git a/src/pybind/module_main.cpp b/src/pybind/module_main.cpp
index 351d19a4..767cf4e0 100644
--- a/src/pybind/module_main.cpp
+++ b/src/pybind/module_main.cpp
@@ -138,7 +138,7 @@ namespace {
     {
         pybind11::class_<EmbInfo>(m, "EmbInfo")
                 .def(pybind11::init<const EmbInfoParams&, std::vector<size_t>,
-                     std::vector<InitializeInfo>&, std::vector<std::string>&>(),
+                     std::vector<EmbCache::InitializerInfo>&, std::vector<std::string>&>(),
                      py::arg("embInfoParams"),
                      py::arg("vocab_size"),
                      py::arg("initialize_infos"),
@@ -176,36 +176,38 @@ namespace {
 
     void GetInitializeInfo(pybind11::module_ &m)
     {
-        pybind11::class_<InitializeInfo>(m, "InitializeInfo")
-                .def(py::init<std::string &, int, int, ConstantInitializerInfo>(), py::arg("name"), py::arg("start"),
-                     py::arg("len"), py::arg("constant_initializer_info"))
-                .def(py::init<std::string &, int, int, NormalInitializerInfo>(), py::arg("name"), py::arg("start"),
-                     py::arg("len"), py::arg("normal_initializer_info"))
-                .def_readwrite("name", &InitializeInfo::name)
-                .def_readwrite("start", &InitializeInfo::start)
-                .def_readwrite("len", &InitializeInfo::len)
-                .def_readwrite("ConstantInitializerInfo", &InitializeInfo::constantInitializerInfo)
-                .def_readwrite("NormalInitializerInfo", &InitializeInfo::normalInitializerInfo);
+        pybind11::class_<EmbCache::InitializerInfo>(m, "InitializeInfo")
+            .def(py::init<std::string &, uint32_t, uint32_t, EmbCache::ConstantInitializerInfo>(),
+                 py::arg("name"), py::arg("start"), py::arg("len"), py::arg("constant_initializer_info"))
+            .def(py::init<std::string &, uint32_t, uint32_t, EmbCache::NormalInitializerInfo>(),
+                 py::arg("name"), py::arg("start"), py::arg("len"), py::arg("normal_initializer_info"))
+            .def_readwrite("name", &EmbCache::InitializerInfo::name)
+            .def_readwrite("start", &EmbCache::InitializerInfo::start)
+            .def_readwrite("len", &EmbCache::InitializerInfo::len)
+            .def_readwrite("ConstantInitializerInfo", &EmbCache::InitializerInfo::constantInitializerInfo)
+            .def_readwrite("NormalInitializerInfo", &EmbCache::InitializerInfo::normalInitializerInfo);
     }
 
     void GetConstantInitializerInfo(pybind11::module_ &m)
     {
-        pybind11::class_<ConstantInitializerInfo>(m, "ConstantInitializerInfo")
-                .def(py::init<float, float>(), py::arg("constant_val") = 0, py::arg("initK") = 1.0)
-                .def_readwrite("constant_val", &ConstantInitializerInfo::constantValue)
-                .def_readwrite("initK", &ConstantInitializerInfo::initK);
+        pybind11::class_<EmbCache::ConstantInitializerInfo>(m, "ConstantInitializerInfo")
+            .def(py::init<float, float>(), py::arg("constant_val") = 0, py::arg("initK") = 1.0)
+            .def_readwrite("constant_val", &EmbCache::ConstantInitializerInfo::constantValue)
+            .def_readwrite("initK", &EmbCache::ConstantInitializerInfo::initK);
     }
 
     void GetNormalInitializerInfo(pybind11::module_ &m)
     {
-        pybind11::class_<NormalInitializerInfo>(m, "NormalInitializerInfo")
-                .def(py::init<float, float, int, float>(), py::arg("mean") = 0.0,
-                     py::arg("stddev") = 1.0, py::arg("seed") = 0,
-                     py::arg("initK") = 1.0)
-                .def_readwrite("mean", &NormalInitializerInfo::mean)
-                .def_readwrite("stddev", &NormalInitializerInfo::stddev)
-                .def_readwrite("seed", &NormalInitializerInfo::seed)
-                .def_readwrite("initK", &NormalInitializerInfo::initK);
+        pybind11::class_<EmbCache::NormalInitializerInfo>(m, "NormalInitializerInfo")
+            .def(py::init<float, float, uint32_t, float>(),
+                 py::arg("mean") = 0.0,
+                 py::arg("stddev") = 1.0,
+                 py::arg("seed") = 0,
+                 py::arg("initK") = 1.0)
+            .def_readwrite("mean", &EmbCache::NormalInitializerInfo::mean)
+            .def_readwrite("stddev", &EmbCache::NormalInitializerInfo::stddev)
+            .def_readwrite("seed", &EmbCache::NormalInitializerInfo::seed)
+            .def_readwrite("initK", &EmbCache::NormalInitializerInfo::initK);
     }
 
     void GetHybridMgmt(pybind11::module_& m)
@@ -220,6 +222,7 @@ namespace {
                      py::arg("warm_start_tables") = vector<string> {})
                 .def("destroy", &MxRec::HybridMgmt::Destroy)
                 .def("evict", &MxRec::HybridMgmt::Evict)
+                .def("fetch_device_emb", &MxRec::HybridMgmt::FetchDeviceEmb)
                 .def("send", &MxRec::HybridMgmt::SendHostMap, py::arg("table_name") = "")
                 .def("send_load_offset", &MxRec::HybridMgmt::SendLoadMap, py::arg("table_name") = "")
                 .def("receive", &MxRec::HybridMgmt::ReceiveHostMap, py::arg("key_offset_map"))
diff --git a/src/tests/checkpoint/checkpoint_test.cpp b/src/tests/checkpoint/checkpoint_test.cpp
index ad7bf34d..8d296363 100644
--- a/src/tests/checkpoint/checkpoint_test.cpp
+++ b/src/tests/checkpoint/checkpoint_test.cpp
@@ -143,7 +143,7 @@ protected:
         }
     }
 
-    void SetDDRKeyFreqMap(unordered_map<emb_key_t, freq_num_t>& testDDRKeyFreqMap)
+    void SetDDRKeyFreqMap(unordered_map<emb_cache_key_t, freq_num_t>& testDDRKeyFreqMap)
     {
         for (int64_t i { 0 }; i < hostVocabSize; ++i) {
             testDDRKeyFreqMap[featMem] = i;
@@ -159,7 +159,7 @@ protected:
         }
     }
 
-    void SetExcludeDDRKeyFreqMap(unordered_map<emb_key_t, freq_num_t>& testExcludeDDRKeyFreqMap)
+    void SetExcludeDDRKeyFreqMap(unordered_map<emb_cache_key_t, freq_num_t>& testExcludeDDRKeyFreqMap)
     {
         for (int64_t i { 0 }; i < hostVocabSize; ++i) {
             testExcludeDDRKeyFreqMap[featMem] = i;
@@ -169,7 +169,7 @@ protected:
 
     void SetDDRKeyFreqMaps(KeyFreqMemT& testDDRKeyFreqMaps)
     {
-        unordered_map<emb_key_t, freq_num_t> testDDRKeyFreqMap;
+        unordered_map<emb_cache_key_t, freq_num_t> testDDRKeyFreqMap;
         for (const auto& testEmbInfo : testEmbInfos) {
             SetDDRKeyFreqMap(testDDRKeyFreqMap);
             testDDRKeyFreqMaps[testEmbInfo.name] = std::move(testDDRKeyFreqMap);
@@ -187,7 +187,7 @@ protected:
 
     void SetExcludeDDRKeyFreqMaps(KeyFreqMemT& testExcludeDDRKeyFreqMaps)
     {
-        unordered_map<emb_key_t, freq_num_t> testExcludeDDRKeyFreqMap;
+        unordered_map<emb_cache_key_t, freq_num_t> testExcludeDDRKeyFreqMap;
         for (const auto& testEmbInfo : testEmbInfos) {
             SetExcludeDDRKeyFreqMap(testExcludeDDRKeyFreqMap);
             testExcludeDDRKeyFreqMaps[testEmbInfo.name] = std::move(testExcludeDDRKeyFreqMap);
diff --git a/src/tests/emb_hashmap/emb_hashmap_test.cpp b/src/tests/emb_hashmap/emb_hashmap_test.cpp
deleted file mode 100644
index ac2f1583..00000000
--- a/src/tests/emb_hashmap/emb_hashmap_test.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#include <iostream>
-#include <gtest/gtest.h>
-
-#include "emb_hashmap/emb_hashmap.h"
-#include "hybrid_mgmt/hybrid_mgmt_block.h"
-#include "ssd_cache/cache_manager.h"
-#include "utils/common.h"
-
-using namespace std;
-using namespace MxRec;
-using namespace testing;
-
-const int HBM_VOCAB_SIZE = 10;
-const int DDR_VOCAB_SIZE = 100;
-const int SSD_VOCAB_SIZE = 100;
-const int INT_2 = 2;
-const int INT_4 = 4;
-const int INT_21 = 21;
-const int INT_42 = 42;
-const int NEGATIVE_INT_1 = -1;
-
-// 刷新换入换出频次和打印信息
-void RefreshSwapFreqInfoAndPrint(EmbHashMap& hostHashMaps, string embTableName, int opTimes)
-{
-    auto& embHashMap = hostHashMaps.embHashMaps[embTableName];
-    hostHashMaps.RefreshFreqInfoWithSwap(embTableName, embHashMap);
-    vector<emb_key_t> hbm2DdrKeyList;
-    vector<emb_key_t> ddr2HbmKeyList;
-    for (auto it : embHashMap.oldSwap) {
-        hbm2DdrKeyList.emplace_back(it.first);
-        ddr2HbmKeyList.emplace_back(it.second);
-    }
-    LOG_INFO("embHashMap hbm2DdrKeyList: {}", VectorToString(hbm2DdrKeyList));
-    LOG_INFO("embHashMap ddr2HbmKeyList: {}", VectorToString(ddr2HbmKeyList));
-    embHashMap.oldSwap.clear();
-    LOG_INFO("RefreshSwapFreqInfoAndPrint end, opTimes:{}", opTimes);
-}
-
-vector<EmbInfo> GetEmbInfoList()
-{
-    EmbInfo embInfo;
-    embInfo.name = "table1";
-    embInfo.devVocabSize = HBM_VOCAB_SIZE;
-    embInfo.hostVocabSize = DDR_VOCAB_SIZE;
-    embInfo.ssdVocabSize = SSD_VOCAB_SIZE;
-    embInfo.ssdDataPath = {"ssd_data"};
-    vector<EmbInfo> embInfos;
-    embInfos.emplace_back(embInfo);
-    return embInfos;
-}
-
-// 测试HBM与DDR换入换出时CacheManager模块频次刷新
-TEST(EmbHashMap, TestFindOffset)
-{
-    LOG_INFO("start TestFindOffset");
-    string embTableName = "table1";
-    EmbHashMap hostHashMaps;
-    RankInfo rankInfo;
-    rankInfo.isDDR = true;
-    auto embInfo = GetEmbInfoList();
-    hostHashMaps.Init(rankInfo, embInfo, false);
-    CacheManager cacheManager;
-    cacheManager.Init(nullptr, embInfo);
-    bool isSSDEnabled = true;
-    hostHashMaps.isSSDEnabled = isSSDEnabled;
-    hostHashMaps.cacheManager = &cacheManager;
-    int channelId = 0;
-    size_t currentBatchId = 0;
-    size_t keepBatchId = 0;
-    int opTimes = 0;
-
-    vector<emb_key_t> keys = {1, 2, 3, 4, 5};
-    hostHashMaps.FindOffset(embTableName, keys, currentBatchId++, keepBatchId++, channelId);
-    RefreshSwapFreqInfoAndPrint(hostHashMaps, embTableName, opTimes++);
-
-    vector<emb_key_t> keys2 = {6, 7, 8, 9, 10};
-    hostHashMaps.FindOffset(embTableName, keys2, currentBatchId++, keepBatchId++, channelId);
-    RefreshSwapFreqInfoAndPrint(hostHashMaps, embTableName, opTimes++);
-
-    auto& excludeKeyMap = cacheManager.excludeDDRKeyCountMap[embTableName];
-    auto& ddrKeyMap = cacheManager.ddrKeyFreqMap[embTableName];
-
-    auto logLevelTemp = Logger::GetLevel();
-    Logger::SetLevel(Logger::TRACE);
-    vector<emb_key_t> keys4 = {21, 21, 21, 21}; // 新key重复值, 且需要换入换出
-    hostHashMaps.FindOffset(embTableName, keys4, currentBatchId++, keepBatchId++, channelId);
-    RefreshSwapFreqInfoAndPrint(hostHashMaps, embTableName, opTimes++);
-    ASSERT_EQ(excludeKeyMap[INT_21], INT_4);
-    ASSERT_EQ(ddrKeyMap.Get(1), 1);
-
-    keys4 = {41, 42, 43, 44, 45, 46, 47, 48, 49, 50}; // 整个hbm大小key换入换出
-    hostHashMaps.FindOffset(embTableName, keys4, currentBatchId++, keepBatchId++, channelId);
-    RefreshSwapFreqInfoAndPrint(hostHashMaps, embTableName, opTimes++);
-    ASSERT_EQ(ddrKeyMap.Get(INT_21), INT_4);
-
-    keys4 = {51, 52, 53, 1, 2, 21, 41, 42, 43, 44}; // 3个新key， 3个在ddr, 4个在hbm
-    hostHashMaps.FindOffset(embTableName, keys4, currentBatchId, keepBatchId, channelId);
-    RefreshSwapFreqInfoAndPrint(hostHashMaps, embTableName, opTimes);
-    ASSERT_EQ(excludeKeyMap[1], INT_2);
-    ASSERT_EQ(excludeKeyMap[INT_42], INT_2);
-    ASSERT_EQ(ddrKeyMap.Get(INT_21), NEGATIVE_INT_1);
-    ASSERT_EQ(ddrKeyMap.Get(1), NEGATIVE_INT_1);
-    Logger::SetLevel(logLevelTemp); // 恢复日志级别
-    LOG_INFO("test TestFindOffset end.");
-}
-
-TEST(EmbHashMap, TESTGetHashMaps)
-{
-    string embTableName = "table1";
-    EmbHashMap hostHashMaps;
-    RankInfo rankInfo;
-    rankInfo.isDDR = true;
-    auto embInfo = GetEmbInfoList();
-    hostHashMaps.Init(rankInfo, embInfo, false);
-    CacheManager cacheManager;
-    cacheManager.Init(nullptr, embInfo);
-    hostHashMaps.isSSDEnabled = true;
-    hostHashMaps.cacheManager = &cacheManager;
-    int channelId = 0;
-    size_t currentBatchId = 0;
-    size_t keepBatchId = 0;
-    int opTimes = 0;
-
-    vector<emb_key_t> keys = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-    hostHashMaps.FindOffset(embTableName, keys, currentBatchId++, keepBatchId++, channelId);
-    RefreshSwapFreqInfoAndPrint(hostHashMaps, embTableName, opTimes++);
-    auto testEmbHashMap = hostHashMaps.GetHashMaps().at(embTableName);
-    hostHashMaps.embHashMaps.at(embTableName).maxOffsetOld = testEmbHashMap.maxOffset;
-    // 增加10个key, offset长度变为10
-    ASSERT_EQ(testEmbHashMap.maxOffset, 10);
-
-    keys = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
-    hostHashMaps.FindOffset(embTableName, keys, currentBatchId++, keepBatchId++, channelId);
-    RefreshSwapFreqInfoAndPrint(hostHashMaps, embTableName, opTimes++);
-    testEmbHashMap = hostHashMaps.GetHashMaps().at(embTableName);
-    // 再增加10个key，offset变为20
-    ASSERT_EQ(testEmbHashMap.maxOffset, 20);
-
-    HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
-    hybridMgmtBlock->lastRunChannelId = channelId;
-    hybridMgmtBlock->hybridBatchId[0] = 1;
-    testEmbHashMap = hostHashMaps.GetHashMaps().at(embTableName);
-    // 回退一步，offset变回10
-    ASSERT_EQ(testEmbHashMap.maxOffset, 10);
-
-    hybridMgmtBlock->hybridBatchId[0] = 2;
-    // 回退2步，抛出异常
-    ASSERT_THROW(hostHashMaps.GetHashMaps(), HybridMgmtBlockingException);
-    hybridMgmtBlock->hybridBatchId[0] = 0;
-
-    keys = {10, 11};
-    hostHashMaps.EvictDeleteEmb(embTableName, keys);
-    testEmbHashMap = hostHashMaps.GetHashMaps().at(embTableName);
-    // 淘汰1个hbm key和1个ddr key，表中无法查找到该key
-    ASSERT_EQ(testEmbHashMap.hostHashMap.find(10), testEmbHashMap.hostHashMap.end());
-    ASSERT_EQ(testEmbHashMap.hostHashMap.find(11), testEmbHashMap.hostHashMap.end());
-    ASSERT_EQ(cacheManager.excludeDDRKeyCountMap[embTableName][11], 0);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].Get(10), -1);
-
-    keys = {1, 2};
-    hostHashMaps.FindOffset(embTableName, keys, currentBatchId++, keepBatchId++, channelId);
-    RefreshSwapFreqInfoAndPrint(hostHashMaps, embTableName, opTimes++);
-    testEmbHashMap = hostHashMaps.GetHashMaps().at(embTableName);
-    // 从ddr中换回2个key到hbm，交换变量长度为2
-    ASSERT_EQ(testEmbHashMap.ddr2HbmKeys.size(), 2);
-    hostHashMaps.ClearLookupAndSwapOffset(hostHashMaps.embHashMaps.at(embTableName));
-    testEmbHashMap = hostHashMaps.GetHashMaps().at(embTableName);
-    // 清理后，交换变量长度为0
-    ASSERT_EQ(testEmbHashMap.ddr2HbmKeys.size(), 0);
-}
\ No newline at end of file
diff --git a/src/tests/emb_mgmt/emb_mgmt_test.cpp b/src/tests/emb_mgmt/emb_mgmt_test.cpp
index e47f3b4f..4924abf1 100644
--- a/src/tests/emb_mgmt/emb_mgmt_test.cpp
+++ b/src/tests/emb_mgmt/emb_mgmt_test.cpp
@@ -15,7 +15,6 @@ See the License for the specific language governing permissions and
 
 #include <gtest/gtest.h>
 #include "hybrid_mgmt/hybrid_mgmt.h"
-#include "host_emb/host_emb.h"
 #include "utils/common.h"
 
 using namespace std;
@@ -62,30 +61,6 @@ protected:
     string constantInitializerName = "constant_initializer";
     int nBatch = 10;
 
-    void UpdateEmb(vector<size_t> &missingKeysHostPos, int channelId, const string &embName,
-        std::unique_ptr<HostEmb> &hostEmb, vector<Tensor> &d2h_emb)
-    {
-        LOG_INFO(HD + "update emb start");
-        if (d2h_emb.size() == 0) {
-            LOG_INFO(HD + "emb is none channelId:{}", channelId);
-            return;
-        }
-
-        auto tensorPtr = d2h_emb[0].flat<float>().data();
-        for (size_t i = 0; i < missingKeysHostPos.size(); i++) {
-            (hostEmb->GetEmb(embName).embData[missingKeysHostPos[i]]).assign(
-                tensorPtr,
-                tensorPtr + hostEmb->GetEmb(embName).hostEmbInfo.extEmbeddingSize);
-            tensorPtr = tensorPtr + hostEmb->GetEmb(embName).hostEmbInfo.extEmbeddingSize;
-        }
-        for (size_t i = 0; i < hostEmb->GetEmb(embName).embData.size(); ++i) {
-            LOG_INFO("hostEmb: embName {}, {} is: {}", embName, i,
-                VectorToString(hostEmb->GetEmb(embName).embData[i]));
-        }
-        LOG_INFO(HD + "update emb end");
-        d2h_emb.clear();
-    }
-
     bool Float2TensorVec(const vector<vector<float>>& Datas, vector<Tensor>& tensors)
     {
         tensors.clear();
@@ -116,63 +91,6 @@ protected:
         // delete
     }
 };
-#ifndef GTEST
-TEST_F(EmbMgmtTest, Initialize)
-{
-    vector<size_t> vocabsize = { devVocabSize, hostVocabSize };
-    aoto param = EmbInfoParams(name, sendCount, embeddingSize, extEmbeddingSize, isSave)
-    embInfo = EmbInfo(param, vocabsize, initializeInfos);
-    embInfos.emplace_back(embInfo);
-    vector<ThresholdValue> thresholdValues = {};
-
-    auto hybridMgmt = Singleton<HybridMgmt>::GetInstance();
-    cout << "setup..." << endl;
-
-    allRank = RankInfo(GlogConfig::gRankId, deviceId, localRankSize, useStatic, nBatch, maxStep);
-    hybridMgmt->Initialize(allRank, embInfos, seed, thresholdValues, false);
-    auto hostEmbs = make_unique<HostEmb>();
-    hostEmbs->Initialize(embInfos, seed);
-    auto hostHashMaps = make_unique<EmbHashMap>();
-    hostHashMaps->Init(allRank, embInfos, false);
-
-    int currentBatchId = 0;
-    vector<emb_key_t> lookupKeys = { 1, 3, 5, 7 };
-    vector<Tensor> d2h_emb;
-    vector<vector<float>> tmpDatas;
-    vector<Tensor> tmpData;
-    hostHashMaps->Process(embInfo.name, lookupKeys, currentBatchId, tmpData);
-    auto missingKeys = hostHashMaps->embHashMaps[embInfo.name].missingKeysHostPos;
-    LOG_INFO("missingKeys {}", missingKeys);
-    hostEmbs->EmbDataGenerator(initializeInfos, seed, missingKeys.size(), embeddingSize, tmpDatas);
-    auto status = Float2TensorVec(tmpDatas, d2h_emb);
-    ASSERT_EQ(status, true);
-    UpdateEmb(missingKeys, 0, embInfo.name, hostEmbs, d2h_emb);
-    hostHashMaps->embHashMaps[embInfo.name].missingKeysHostPos.clear();
-
-    lookupKeys = { 2, 3, 5, 6 };
-    hostHashMaps->Process(embInfo.name, lookupKeys, currentBatchId, tmpData);
-    missingKeys = hostHashMaps->embHashMaps[embInfo.name].missingKeysHostPos;
-    LOG_INFO("missingKeys {}", missingKeys);
-    hostEmbs->EmbDataGenerator(initializeInfos, seed, missingKeys.size(), embeddingSize, tmpDatas);
-    status = Float2TensorVec(tmpDatas, d2h_emb);
-    ASSERT_EQ(status, true);
-    UpdateEmb(missingKeys, 0, embInfo.name, hostEmbs, d2h_emb);
-    hostHashMaps->embHashMaps[embInfo.name].missingKeysHostPos.clear();
-
-    lookupKeys = { 1, 7, 9, 10 };
-    hostHashMaps->Process(embInfo.name, lookupKeys, currentBatchId, tmpData);
-    missingKeys = hostHashMaps->embHashMaps[embInfo.name].missingKeysHostPos;
-    LOG_INFO("missingKeys {}", missingKeys);
-    hostEmbs->EmbDataGenerator(initializeInfos, seed, missingKeys.size(), embeddingSize, tmpDatas);
-    Float2TensorVec(tmpDatas, d2h_emb);
-    status = Float2TensorVec(tmpDatas, d2h_emb);
-    ASSERT_EQ(status, true);
-    UpdateEmb(missingKeys, 0, embInfo.name, hostEmbs, d2h_emb);
-    hostHashMaps->embHashMaps[embInfo.name].missingKeysHostPos.clear();
-
-    hybridMgmt->Destroy();
-}
-#endif
 
 #ifndef GTEST
 TEST_F(EmbMgmtTest, Initialize_HBM)
diff --git a/src/tests/emb_table/embedding_ddr_test.cpp b/src/tests/emb_table/embedding_ddr_test.cpp
index 374a1392..ddad3905 100644
--- a/src/tests/emb_table/embedding_ddr_test.cpp
+++ b/src/tests/emb_table/embedding_ddr_test.cpp
@@ -24,7 +24,6 @@ See the License for the specific language governing permissions and
 #include "utils/common.h"
 #include "emb_table/emb_table.h"
 #include "emb_table/embedding_ddr.h"
-#include "host_emb/host_emb.h"
 
 using namespace std;
 using namespace MxRec;
@@ -37,7 +36,7 @@ protected:
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
         std::vector<size_t> vocabsize = {100};
-        std::vector<InitializeInfo> initializeInfos = {};
+        vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
         embInfo_ = EmbInfo(embParam, vocabsize, initializeInfos, ssdDataPath);
@@ -75,79 +74,6 @@ protected:
  */
 TEST_F(EmbeddingDDRTest, SaveLoadEmbeddingData)
 {
-    vector<EmbInfo> embInfos = {embInfo_};
-    HostEmb* hostEmbs = Singleton<MxRec::HostEmb>::GetInstance();
-    hostEmbs->Initialize(embInfos, 0);
-    HostEmbTable& table = hostEmbs->GetEmb("test1");
-
-    vector<float> tmp1 {1.1, 2.1, 3.1};
-    vector<float> tmp2 {1.2, 2.2, 3.2};
-    vector<float> tmp3 {1.3, 2.3, 3.3};
-    vector<vector<float>> testData;
-    testData.push_back(tmp1);
-    testData.push_back(tmp2);
-    testData.push_back(tmp3);
-
-    for (vector<float>& tmp : testData) {
-        table.embData.push_back(tmp);
-    }
-
-    shared_ptr<EmbeddingDDR> ddr1 = std::make_shared<EmbeddingDDR>(embInfo_, rankInfo_, 0);
-    shared_ptr<EmbeddingDDR> ddr2 = std::make_shared<EmbeddingDDR>(embInfo_, rankInfo_, 0);
-    ddr1->Save("test_dir");
-    // 修改成0
-    for (vector<float>& tmp: table.embData) {
-        for (float& t : tmp) {
-            t = 0;
-        }
-    }
-    bool fileExist = false;
-    if (access("./test_dir/test1/embedding", F_OK) == 0) {
-        fileExist = true;
-    }
-    EXPECT_EQ(fileExist, true);
-}
-
-/**
- * 测试基本查找
- */
-TEST_F(EmbeddingDDRTest, DDRBasic)
-{
-    shared_ptr<EmbeddingDDR> table = std::make_shared<EmbeddingDDR>(embInfo_, rankInfo_, 0);
-    const size_t testNum = 100;
-    vector<emb_key_t> testKeys;
-    vector<size_t> testSwap;
-    for (size_t i = 0; i < testNum; ++i) {
-        testKeys.push_back(i);
-    }
-    table->FindOffset(testKeys, 0, TRAIN_CHANNEL_ID, testSwap);
-    EXPECT_EQ(testKeys.size(), 100);
-    EXPECT_EQ(testSwap.size(), 0);
-}
-
-TEST_F(EmbeddingDDRTest, evict)
-{
-    shared_ptr<EmbeddingDDR> table = std::make_shared<EmbeddingDDR>(embInfo_, rankInfo_, 0);
-    const size_t testNum = 100;
-    vector<emb_key_t> testKeys;
-    vector<size_t> testSwap;
-    for (size_t i = 0; i < testNum; ++i) {
-        testKeys.push_back(i);
-    }
-    table->FindOffset(testKeys, 0, TRAIN_CHANNEL_ID, testSwap);
-    table->EvictKeys(testKeys);
-    EXPECT_EQ(table->evictDevPos.size(), 100);
-    EXPECT_EQ(testKeys.size(), 100);
-    EXPECT_EQ(testSwap.size(), 0);
-}
-
-TEST_F(EmbeddingDDRTest, FindSwap)
-{
-    shared_ptr<EmbeddingDDR> table = std::make_shared<EmbeddingDDR>(embInfo_, rankInfo_, 0);
-    const size_t testNum = 100;
-    vector<size_t> testSwap;
-    table->FindSwapPosOld(0, 0, 0, testSwap);
-    EXPECT_EQ(testSwap.size(), 1);
 }
 
 TEST_F(EmbeddingDDRTest, EvictDeleteEmb)
diff --git a/src/tests/emb_table/embedding_mgmt_test.cpp b/src/tests/emb_table/embedding_mgmt_test.cpp
index 9374b078..49f10b4f 100644
--- a/src/tests/emb_table/embedding_mgmt_test.cpp
+++ b/src/tests/emb_table/embedding_mgmt_test.cpp
@@ -36,7 +36,7 @@ protected:
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
         std::vector<size_t> vocabsize = {100};
-        std::vector<InitializeInfo> initializeInfos = {};
+        vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
         embInfo_ = EmbInfo(embParam, vocabsize, initializeInfos, ssdDataPath);
@@ -75,7 +75,7 @@ TEST_F(EmbeddingMgmtTest, Init)
     ThresholdValue thvalue(tableName, 0, 0, 0, false);
     vector<EmbInfo> embInfos = {embInfo_};
     vector<ThresholdValue> thresholds = {thvalue};
-    EmbeddingMgmt::Instance()->Init(rankInfo_, embInfos, thresholds, 0);
+    EmbeddingMgmt::Instance()->Init(rankInfo_, embInfos, 0);
 
     constexpr int testNum = 100;
     vector<emb_key_t> testKeys;
@@ -95,7 +95,7 @@ TEST_F(EmbeddingMgmtTest, GetAttributes)
     ThresholdValue thvalue(tableName, 0, 0, 0, false);
     vector<EmbInfo> embInfos = {embInfo_};
     vector<ThresholdValue> thresholds = {thvalue};
-    EmbeddingMgmt::Instance()->Init(rankInfo_, embInfos, thresholds, 0);
+    EmbeddingMgmt::Instance()->Init(rankInfo_, embInfos, 0);
 
     constexpr int testNum = 100;
     vector<emb_key_t> testKeys;
diff --git a/src/tests/emb_table/embedding_static_test.cpp b/src/tests/emb_table/embedding_static_test.cpp
index 09e72ca0..c8a5e252 100644
--- a/src/tests/emb_table/embedding_static_test.cpp
+++ b/src/tests/emb_table/embedding_static_test.cpp
@@ -35,7 +35,7 @@ protected:
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
         std::vector<size_t> vocabsize = {100};
-        std::vector<InitializeInfo> initializeInfos = {};
+        vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
         embInfo_ = EmbInfo(embParam, vocabsize, initializeInfos, ssdDataPath);
@@ -136,7 +136,8 @@ TEST_F(EmbeddingStaticTest, Key2OffsetEvict)
     }
     table->Key2Offset(testData, TRAIN_CHANNEL_ID);
     // 全部淘汰
-    table->EvictKeys(testData);
+    vector<emb_cache_key_t> testDataAdapt(testData.cbegin(), testData.cend());
+    table->EvictKeys(testDataAdapt);
 
     vector<emb_key_t> new_data;
     for (size_t i = 0; i < testNum; ++i) {
diff --git a/src/tests/file_system/hdfs_file_system_test.cpp b/src/tests/file_system/hdfs_file_system_test.cpp
index 3794d14d..0d469ca5 100644
--- a/src/tests/file_system/hdfs_file_system_test.cpp
+++ b/src/tests/file_system/hdfs_file_system_test.cpp
@@ -17,7 +17,6 @@ See the License for the specific language governing permissions and
 #include <emock/emock.hpp>
 
 #include "file_system/file_system_handler.h"
-#include "file_system/hdfs_file_system/hdfs_file_system.h"
 #include "file_system/hdfs_file_system/hdfs_wrapper.h"
 
 using namespace std;
diff --git a/src/tests/file_system/local_file_system_test.cpp b/src/tests/file_system/local_file_system_test.cpp
index dfe5d483..2ea0d9d3 100644
--- a/src/tests/file_system/local_file_system_test.cpp
+++ b/src/tests/file_system/local_file_system_test.cpp
@@ -16,7 +16,6 @@ See the License for the specific language governing permissions and
 #include <gtest/gtest.h>
 
 #include "file_system/file_system_handler.h"
-#include "file_system/local_file_system/local_file_system.h"
 
 using namespace std;
 using namespace MxRec;
@@ -42,10 +41,10 @@ TEST(LocalFileSystem, WriteAndReadFile)
 TEST(LocalFileSystem, WriteEmbedding)
 {
     string filePath = "./write.data";
-    float p[5] = {1.1, 2.2, 3.3, 4.4, 5.5};
-    vector<float*> writeData = {p, p+1, p+2, p+3, p+4};
+    vector<float> writeData = {1.1, 2.2, 3.3, 4.4, 5.5};
+    vector<vector<float>> writeData1 = {writeData};
     auto fileSystemHandler = make_unique<FileSystemHandler>();
     auto fileSystemPtr = fileSystemHandler->Create(filePath);
-    ssize_t res = fileSystemPtr->Write(filePath, writeData, sizeof(float));
+    ssize_t res = fileSystemPtr->Write(filePath, writeData1, sizeof(float));
     ASSERT_EQ(writeData.size() * sizeof(float), res);
 }
diff --git a/src/tests/host_emb/host_emb_test.cpp b/src/tests/host_emb/host_emb_test.cpp
deleted file mode 100644
index 05a636d9..00000000
--- a/src/tests/host_emb/host_emb_test.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#include <gtest/gtest.h>
-#include <emock/emock.hpp>
-
-#include "host_emb/host_emb.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "hd_transfer/hd_transfer.h"
-#include "utils/singleton.h"
-
-using namespace std;
-using namespace tensorflow;
-using namespace MxRec;
-
-namespace {
-bool operator==(const Tensor& tensor1, const Tensor& tensor2)
-{
-    if (tensor1.shape() != tensor2.shape()) {
-        return false;
-    }
-    auto tensor1_data = tensor1.flat<float>();
-    auto tensor2_data = tensor2.flat<float>();
-    for (int j = 0; j < tensor1_data.size(); j++) {
-        if (tensor1_data(j) != tensor2_data(j)) {
-            return false;
-        }
-    }
-    return true;
-}
-
-bool operator==(const vector<Tensor>& p1, const vector<Tensor>& p2)
-{
-    if (p1.size() != p2.size()) {
-        return false;
-    }
-    for (int i = 0; i<p1.size(); i++) {
-        const Tensor& tensor1 = p1[i];
-        const Tensor& tensor2 = p2[i];
-        if (!(tensor1 == tensor2)) {
-            return false;
-        }
-    }
-    return true;
-}
-
-TEST(HostEmb, Tensor2Float)
-{
-    shared_ptr<tuple<int, string, vector<long>>> lookups;
-    vector<int32_t> host_emb;
-    host_emb.resize(15);
-    vector<vector<int32_t>> p(5, vector<int32_t>(3));
-    host_emb[0] = 1;
-    host_emb[1] = 3;
-    std::cout << host_emb[0] << std::endl;
-    for (int i = 0; i < 5; i++) {
-        p[i].assign(host_emb.begin() + i * 3, host_emb.begin() + (i + 1) * 3);
-    }
-    std::cout << p[0][0] << std::endl;
-    std::cout << '5' << std::endl;
-    vector<Tensor> q;
-    std::cout << '0' << std::endl;
-    for (int i = 0; i < 2; i++) {
-        Tensor tmpTensor(tensorflow::DT_INT32, { 3 });
-        std::cout << '1' << std::endl;
-        auto tmpData = tmpTensor.flat<int32_t>();
-        std::cout << '2' << std::endl;
-        for (int j = 0; j < 3; j++) {
-            tmpData(j) = p[i][j];
-            std::cout << '3' << std::endl;
-        }
-
-        q.emplace_back(tmpTensor);
-        std::cout << '4' << std::endl;
-    }
-    std::cout << '1' << std::endl;
-    std::cout << q[0].flat<int32_t>()(0) << std::endl;
-    std::cout << q[0].flat<int32_t>()(1) << std::endl;
-    std::cout << q[1].flat<int32_t>()(0) << std::endl;
-    ASSERT_EQ(1, 1);
-}
-
-TEST(HostEmb, DefaultConstructor)
-{
-    HostEmb h;
-    h.procThreadsForTrain.emplace_back(make_unique<thread>([] {}));
-    h.Join(TRAIN_CHANNEL_ID);
-    ASSERT_EQ(h.procThreadsForTrain.size(), 0);
-
-    h.procThreadsForEval.emplace_back(make_unique<thread>([] {}));
-    h.Join(EVAL_CHANNEL_ID);
-    ASSERT_EQ(h.procThreadsForEval.size(), 0);
-}
-
-}
\ No newline at end of file
diff --git a/src/tests/key_process/feature_admit_and_evict_test.cpp b/src/tests/key_process/feature_admit_and_evict_test.cpp
index 09cadc7f..dffce96c 100644
--- a/src/tests/key_process/feature_admit_and_evict_test.cpp
+++ b/src/tests/key_process/feature_admit_and_evict_test.cpp
@@ -248,7 +248,7 @@ protected:
                 currTime = time(nullptr);
                 if (currTime - lastTime >= SleepTime::SLEEP_SECOND_4) {
                     LOG_INFO("Evict-thread doing at currTime[{}] ...", currTime);
-                    map<std::string, std::vector<emb_key_t>> evictPosMap {};
+                    map<std::string, std::vector<emb_cache_key_t>> evictPosMap {};
                     faae.FeatureEvict(evictPosMap);
                     lastTime = currTime;
                 }
@@ -258,7 +258,7 @@ protected:
     }
     void WaitEvictThread()
     {
-        map<std::string, std::vector<emb_key_t>> evictPosMap {};
+        map<std::string, std::vector<emb_cache_key_t>> evictPosMap {};
         faae.FeatureEvict(evictPosMap); // 退出前保证执行了一次“淘汰”
         isExitFlag = true;
         if (evictThr.joinable()) {
diff --git a/src/tests/key_process/key_process_test.cpp b/src/tests/key_process/key_process_test.cpp
index a68f4787..fb2be40b 100644
--- a/src/tests/key_process/key_process_test.cpp
+++ b/src/tests/key_process/key_process_test.cpp
@@ -30,7 +30,6 @@ using namespace MxRec;
 using namespace testing;
 
 static constexpr size_t BATCH_NUM_EACH_THREAD = 3;
-ock::ctr::FactoryPtr factory;
 
 class SimpleThreadPool {
 public:
@@ -46,17 +45,6 @@ public:
     }
 };
 
-static void CTRLog(int level, const char *msg)
-{
-    switch (level) {
-        case 0:
-            LOG_DEBUG(msg);
-            break;
-        default:
-            break;
-    }
-}
-
 class KeyProcessTest : public testing::Test {
 protected:
     void SetUp()
@@ -352,7 +340,7 @@ TEST_F(KeyProcessTest, Start)
     ASSERT_EQ(process.Start(), 0);
     setenv("keyProcessThreadNum", "abc", 1);
     ASSERT_EQ(process.Start(), 0);
-    CTRLog(0, "key process start successful");
+    LOG_INFO("key process start successful");
     process.Destroy();
 }
 
diff --git a/src/tests/ssd_cache/cache_manager_test.cpp b/src/tests/ssd_cache/cache_manager_test.cpp
index 677939d2..7cb5e032 100644
--- a/src/tests/ssd_cache/cache_manager_test.cpp
+++ b/src/tests/ssd_cache/cache_manager_test.cpp
@@ -18,11 +18,9 @@ See the License for the specific language governing permissions and
 #include <mpi.h>
 
 #include "absl/container/flat_hash_map.h"
-#include "host_emb/host_emb.h"
 #include "ssd_cache/lfu_cache.h"
 #include "ssd_cache/cache_manager.h"
 #include "utils/common.h"
-#include "emb_table/embedding_ddr.h"
 
 using namespace std;
 using namespace MxRec;
@@ -39,10 +37,10 @@ void InitSSDEngine(CacheManager& manager, string embTableName, uint64_t ssdSize)
     manager.ssdEngine->SetCompactPeriod(period);
     manager.ssdEngine->SetCompactThreshold(1);
     manager.ssdEngine->CreateTable(embTableName, {SSD_SAVE_PATH}, ssdSize);
-    vector<emb_key_t> ssdKeys = {15, 25}; // 预设15， 25存储在SSD
+    vector<emb_cache_key_t> ssdKeys = {15, 25}; // 预设15， 25存储在SSD
     std::vector<std::vector<float>> ssdEmbData = {{15.0f},
                                                   {25.0f}};
-    auto& excludeMap = manager.excludeDDRKeyCountMap[embTableName];
+    auto& excludeMap = manager.preProcessMapper[embTableName].excludeDDRKeyCountMap;
     excludeMap[15] = 3; // 初始化次数
     excludeMap[25] = 5;
     manager.ssdEngine->InsertEmbeddings(embTableName, ssdKeys, ssdEmbData);
@@ -94,7 +92,7 @@ protected:
         LFUCache cache2;
         cacheManager.ddrKeyFreqMap[embTableName2] = cache2;
         PutKeyInfo(cacheManager.ddrKeyFreqMap[embTableName2], input_keys);
-        unordered_map<emb_key_t, freq_num_t> excludeDDRKeyFreq;
+        unordered_map<emb_cache_key_t, freq_num_t> excludeDDRKeyFreq;
         excludeDDRKeyFreq[27] = 10;
         excludeDDRKeyFreq[30] = 10;
         cacheManager.excludeDDRKeyCountMap[embTableName] = excludeDDRKeyFreq;
@@ -105,14 +103,13 @@ protected:
         InitDDREmbData(loadData, embTableName, mgmtEmbInfos);
         InitDDREmbData(loadData, embTableName2, mgmtEmbInfos);
 
-        cacheManager.Init(hEmb, mgmtEmbInfos);
+        ock::ctr::EmbCacheManagerPtr embCachePtr = nullptr;
+
+        cacheManager.Init(embCachePtr, mgmtEmbInfos);
 
         InitSSDEngine(cacheManager, embTableName, 5);
         InitSSDEngine(cacheManager, embTableName2, 10);
         // load ddr emb data
-        cacheManager.hostEmbs->hostEmbs = loadData;
-
-        auto& embMap = cacheManager.hostEmbs->hostEmbs;
     }
 
     CacheManager cacheManager;
@@ -126,49 +123,12 @@ protected:
     vector<emb_key_t> input_keys = {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 6, 6, 8, 9};
     string embTableName = "table1";
     string embTableName2 = "table2";
-    HostEmb* hEmb = Singleton<MxRec::HostEmb>::GetInstance();
 
     void TearDown()
     {
     }
 };
 
-TEST_F(CacheManagerTest, RefreshFreqInfo)
-{
-    vector<emb_key_t> ddr2HbmKeys = {8, 9};
-    cacheManager.RefreshFreqInfoCommon(embTableName, ddr2HbmKeys, TransferType::DDR_2_HBM);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].minFreq, 2);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].keyTable.size(), 5);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].freqTable.size(), 2);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].Get(8), -1);
-    ASSERT_EQ(cacheManager.excludeDDRKeyCountMap[embTableName].size(), 6);
-
-    // HBM转移到DDR 频次数据设置构造
-    cacheManager.excludeDDRKeyCountMap[embTableName][150] = 4;
-    cacheManager.excludeDDRKeyCountMap[embTableName][151] = 1;
-    vector<emb_key_t> hbm2DdrKeys = {150, 151};
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].Get(151), -1);
-    cacheManager.RefreshFreqInfoCommon(embTableName, hbm2DdrKeys, TransferType::HBM_2_DDR);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].Get(150), 4);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].Get(151), 1);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].minFreq, 1);
-    ASSERT_EQ(cacheManager.excludeDDRKeyCountMap[embTableName].size(), 6);
-
-    vector<emb_key_t> ddr2EvictKeys = {151};
-    cacheManager.RefreshFreqInfoCommon(embTableName, ddr2EvictKeys, TransferType::DDR_2_EVICT);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].Get(151), -1);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].freqTable.size(), 3);
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].minFreq, 2);
-
-    // HBM2Evict
-    cacheManager.excludeDDRKeyCountMap[embTableName][160] = 1;
-    vector<emb_key_t> hbm2EvictKeys = {160};
-    cacheManager.RefreshFreqInfoCommon(embTableName, hbm2EvictKeys, TransferType::HBM_2_EVICT);
-    const auto it = cacheManager.excludeDDRKeyCountMap[embTableName].find(160);
-    ASSERT_EQ(it, cacheManager.excludeDDRKeyCountMap[embTableName].end());
-    LOG_INFO("test RefreshFreqInfo end.");
-}
-
 TEST_F(CacheManagerTest, PutKey)
 {
     vector<emb_key_t> putDDRKeys = {1, 9, 8, 15};
@@ -191,193 +151,17 @@ TEST_F(CacheManagerTest, IsKeyInSSD)
     LOG_INFO("test IsKeyInSSD end.");
 }
 
-TEST_F(CacheManagerTest, TransferDDREmbWithSSDByEmptyExternalKey)
-{
-    EmbeddingDDR table;
-
-    vector<emb_key_t> currentKeys = {55, 65, 75};
-    table.keyOffsetMap[55] = 119;
-    table.keyOffsetMap[65] = 118;
-    table.keyOffsetMap[75] = 116;
-
-    TableInfo ti = table.GetTableInfo();
-
-    auto ret = cacheManager.TransferDDREmbWithSSD(ti, currentKeys, TRAIN_CHANNEL_ID);
-    ASSERT_EQ(ret, TransferRet::TRANSFER_OK);
-    LOG_INFO("test TransferDDREmbWithSSDByEmptyExternalKey end.");
-}
-
-TEST_F(CacheManagerTest, TransferDDREmbWithSSDByAllProcess)
-{
-    vector<emb_key_t> ssdKeys = {15, 25};
-    vector<vector<float>> ssdKeyEmbInfo = {{1.5f}, {2.5f}};
-
-    // init EmbeddingDDR
-    EmbeddingDDR table;
-    table.name = embTableName;
-    table.devVocabSize = 20;
-    table.hostVocabSize = 100;
-    table.maxOffset = 118;
-    table.evictHostPos.emplace_back(110); // 淘汰列表
-
-    TableInfo ti = table.GetTableInfo();
-
-    // 构造已经存储早DDR中key和offset对应关系; DDR的offset在映射表中范围是 20~119
-    table.keyOffsetMap[9] = 117; // DDR中相对位置: 97
-    table.keyOffsetMap[8] = 116; // DDR中相对位置: 96
-    table.keyOffsetMap[6] = 114; // DDR中相对位置: 94
-    table.keyOffsetMap[4] = 112; // DDR中相对位置: 92
-    table.keyOffsetMap[3] = 111; // DDR中相对位置: 91
-    table.keyOffsetMap[2] = 21; // DDR中相对位置: 1
-    table.keyOffsetMap[1] = 20; // DDR中相对位置: 0
-
-    // 检查构造数据正确性
-    auto& embMap = cacheManager.hostEmbs->hostEmbs;
-    const auto& it = embMap.find(embTableName);
-    auto& hostData = it->second.embData;
-    ASSERT_TRUE(fabs(hostData[0][0] - 1.0f) < EPSILON);
-    ASSERT_TRUE(fabs(hostData[1][0] - 2.0f) < EPSILON);
-    ASSERT_TRUE(fabs(hostData[94][0] - 6.0f) < EPSILON);
-    ASSERT_TRUE(fabs(hostData[97][0] - 9.0f) < EPSILON);
-    auto& excludeKeyCountMap = cacheManager.excludeDDRKeyCountMap[embTableName];
-    ASSERT_EQ(excludeKeyCountMap[15], 3);
-    ASSERT_EQ(excludeKeyCountMap[25], 5);
-    ASSERT_FALSE(cacheManager.ssdEngine->IsKeyExist(embTableName, 9));
-    ASSERT_FALSE(cacheManager.ssdEngine->IsKeyExist(embTableName, 8));
-    ASSERT_TRUE(cacheManager.IsKeyInSSD(embTableName, 15));
-
-    // externalKeys: SSD(15, 25) + newKey(55, 65, 75)
-    // 训练场景，构造结果：offsetAvailableSize=20+100-118+evictPos.size()=3
-    // cacheManager中的频次数据(低-高): 9 8 6 4 3 2 1
-    // 构造空间超出SSD可用上限
-    vector<emb_key_t> exceedKeys = {15, 25, 6, 4, 55, 65, 75, 85, 95, 105, 115};
-    auto spaceError1 = cacheManager.TransferDDREmbWithSSD(ti, exceedKeys, TRAIN_CHANNEL_ID);
-    ASSERT_EQ(spaceError1, TransferRet::SSD_SPACE_NOT_ENOUGH);
-
-    // 构造训练+超SSD可用+当前批次中不包含报错在SSD的key
-    vector<emb_key_t> keys2 = {6, 4, 55, 65, 75, 85, 95, 105, 115, 125, 135};
-    auto spaceError2 = cacheManager.TransferDDREmbWithSSD(ti, exceedKeys, TRAIN_CHANNEL_ID);
-    ASSERT_EQ(spaceError2, TransferRet::SSD_SPACE_NOT_ENOUGH);
-
-    // 构造当前批次key 存储位置: SSD(15, 25) DDR(6, 4) newKey(55, 65, 75)
-    vector<emb_key_t> currentKeys = {15, 25, 6, 4, 55, 65, 75};
-    // 需要从ddr转移4个key到ssd, 低频数据中6 4在当前批次key中,不会被转移,构造的数据转移key:9, 8, 3, 2
-    auto ret = cacheManager.TransferDDREmbWithSSD(ti, currentKeys, TRAIN_CHANNEL_ID);
-
-    // 检查处理后数据正确性
-    ASSERT_EQ(ret, TransferRet::TRANSFER_OK);
-    ASSERT_TRUE(fabs(hostData[94][0] - 6.0f) < EPSILON); // DDR内未移动的数据
-    ASSERT_TRUE(fabs(hostData[96][0] - 25.0f) < EPSILON); // SSD转移到DDR的数据
-    ASSERT_TRUE(fabs(hostData[97][0] - 15.0f) < EPSILON); // SSD转移到DDR的数据
-    ASSERT_EQ(table.evictHostPos.size(), 1);
-    ASSERT_EQ(table.evictHostPos.back(), 110);
-
-    // 原DDR中最小频次key(9,8)次数(1)被转移到SSD,SSD转移到DDR的key(15,25)次数(3,5), DDR内频次索引应变为2
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].minFreq, 2);
-    ASSERT_TRUE(cacheManager.IsKeyInSSD(embTableName, 9));
-    ASSERT_TRUE(cacheManager.IsKeyInSSD(embTableName, 8));
-    ASSERT_FALSE(cacheManager.IsKeyInSSD(embTableName, 15));
-    LOG_INFO("test TransferDDREmbWithSSDByAllProcess end.");
-}
-
-TEST_F(CacheManagerTest, TransferDDREmbWithSSDByEmptyExternalSSDKey)
-{
-    // 训练+评估：构造DDR剩余空间足够，externalSSDKeys为空
-    EmbeddingDDR table;
-    table.name = embTableName;
-    table.devVocabSize = 20;
-    table.hostVocabSize = 100;
-    table.keyOffsetMap[6] = 114; // DDR中相对位置: 94
-    table.keyOffsetMap[4] = 112; // DDR中相对位置: 92
-    // 剩余3个可用空间(DDR剩余2个, 相对位置:98 99； DDR淘汰列表1个)
-    table.maxOffset = 118;
-    table.evictHostPos.emplace_back(110);
-
-    TableInfo ti = table.GetTableInfo();
-
-    vector<emb_key_t> currentKeys = {6, 4, 55, 65, 75};
-    auto ret = cacheManager.TransferDDREmbWithSSD(ti, currentKeys, TRAIN_CHANNEL_ID);
-    ASSERT_EQ(ret, TransferRet::TRANSFER_OK);
-    auto retByEval = cacheManager.TransferDDREmbWithSSD(ti, currentKeys, EVAL_CHANNEL_ID);
-    ASSERT_EQ(retByEval, TransferRet::TRANSFER_OK);
-
-    // 评估场景， DDR剩余空间不足， externalSSDKeys为空
-    vector<emb_key_t> currentKeys2 = {6, 4, 55, 65, 75, 85, 95, 105, 115};
-    auto ret2 = cacheManager.TransferDDREmbWithSSD(ti, currentKeys2, EVAL_CHANNEL_ID);
-    ASSERT_EQ(ret2, TransferRet::TRANSFER_OK);
-    // 训练场景，返回ssd空间不足
-    auto ret3 = cacheManager.TransferDDREmbWithSSD(ti, currentKeys2, TRAIN_CHANNEL_ID);
-    ASSERT_EQ(ret3, TransferRet::SSD_SPACE_NOT_ENOUGH);
-    LOG_INFO("test TransferDDREmbWithSSDByEmptyExternalSSDKey end.");
-}
-
-TEST_F(CacheManagerTest, TransferDDREmbWithSSDByEval)
-{
-    // 评估+DDR剩余空间足够+externalSSDKeys为空
-    EmbeddingDDR table;
-    table.name = embTableName;
-    table.devVocabSize = 20;
-    table.hostVocabSize = 100;
-    table.keyOffsetMap[9] = 117; // DDR中相对位置: 97
-    table.keyOffsetMap[8] = 116; // DDR中相对位置: 96
-    table.keyOffsetMap[6] = 114; // DDR中相对位置: 94
-    table.keyOffsetMap[4] = 112; // DDR中相对位置: 92
-    // 剩余3个可用空间(DDR剩余2个, 相对位置:98 99； DDR淘汰列表1个)
-    table.maxOffset = 118;
-    table.evictHostPos.emplace_back(110); // 淘汰列表
-
-    TableInfo ti = table.GetTableInfo();
-
-    vector<emb_key_t> currentKeys = {6, 4, 55, 65, 75};
-    auto ret = cacheManager.TransferDDREmbWithSSD(ti, currentKeys, EVAL_CHANNEL_ID);
-    ASSERT_EQ(ret, TransferRet::TRANSFER_OK);
-    LOG_INFO("test eval+space enough+externalSSDKeysEmpty ok.");
-
-    // 评估+DDR剩余空间足够+externalSSDKeys非空
-    vector<emb_key_t> currentKeys2 = {15, 25, 6, 4, 55, 65, 75, 85, 95, 105, 115};
-    auto ret2 = cacheManager.TransferDDREmbWithSSD(ti, currentKeys2, EVAL_CHANNEL_ID);
-    ASSERT_EQ(ret2, TransferRet::TRANSFER_OK);
-    // 检查处理后数据正确性
-    const auto& it = cacheManager.hostEmbs->hostEmbs.find(embTableName);
-    auto& hostData = it->second.embData;
-    ASSERT_TRUE(fabs(hostData[94][0] - 6.0f) < EPSILON); // DDR内未移动的数据
-    ASSERT_TRUE(fabs(hostData[98][0] - 25.0f) < EPSILON); // SSD转移到DDR的数据
-    ASSERT_TRUE(fabs(hostData[90][0] - 15.0f) < EPSILON); // SSD转移到DDR的数据
-    ASSERT_EQ(table.evictHostPos.size(), 0);
-    // 原DDR中最小频次key(9,8)次数(1)被转移到SSD,SSD转移到DDR的key(15,25)次数(3,5), DDR内频次索引应变为2
-    ASSERT_EQ(cacheManager.ddrKeyFreqMap[embTableName].minFreq, 1);
-    ASSERT_FALSE(cacheManager.IsKeyInSSD(embTableName, 9));
-    ASSERT_FALSE(cacheManager.IsKeyInSSD(embTableName, 8));
-    ASSERT_FALSE(cacheManager.IsKeyInSSD(embTableName, 15));
-    LOG_INFO("test eval+space enough+externalSSDKeysNotEmpty ok.");
-}
-
-TEST_F(CacheManagerTest, TransferDDREmbWithSSDByDDRSpaceNotEnough)
-{
-    // 构造DDR所有空间不满足存放当前批次数据
-    EmbeddingDDR table;
-    table.name = embTableName2;
-    table.devVocabSize = 20;
-    table.hostVocabSize = 10;
-    table.maxOffset = 30;
-    table.keyOffsetMap[6] = 9;
-    table.keyOffsetMap[4] = 8;
-
-    TableInfo ti = table.GetTableInfo();
-
-    // keys size:10, ddr keys:2 externalKeys:8 externalSSDKeys:0
-    vector<emb_key_t> currentKeys = {6, 4, 101, 102, 103, 104, 105, 106, 107, 108};
-    auto ret = cacheManager.TransferDDREmbWithSSD(ti, currentKeys, TRAIN_CHANNEL_ID);
-    ASSERT_EQ(ret, TransferRet::DDR_SPACE_NOT_ENOUGH);
-    LOG_INFO("test train+ddr space enough+externalSSDKeysEmpty ok.");
-}
-
 TEST_F(CacheManagerTest, EvictSSDEmbedding)
 {
     // 构造时ssd中已存在的key: 15 25
-    emb_key_t key = 15;
-    vector<emb_key_t> ssdKeys = {key};
+    emb_cache_key_t key = 15;
+    vector<emb_cache_key_t> ssdKeys = {key};
     cacheManager.EvictSSDEmbedding(embTableName, ssdKeys);
+    int maxLoop = 1000;
+    while (!cacheManager.ssdEvictThreads.empty() && maxLoop > 0) {
+        this_thread::sleep_for(1ms);
+        maxLoop--;
+    }
     ASSERT_FALSE(cacheManager.IsKeyInSSD(embTableName, key));
     const auto it = cacheManager.excludeDDRKeyCountMap[embTableName].find(key);
     ASSERT_EQ(it, cacheManager.excludeDDRKeyCountMap[embTableName].end());
@@ -386,31 +170,4 @@ TEST_F(CacheManagerTest, EvictSSDEmbedding)
 
 TEST_F(CacheManagerTest, LoadTest)
 {
-    cacheManager.ddrKeyFreqMap.clear();
-    cacheManager.excludeDDRKeyCountMap.clear();
-    unordered_map<std::string, unordered_map<emb_key_t, freq_num_t>> ddrMap;
-    string embTableName = "table1";
-    unordered_map<emb_key_t, freq_num_t> ddrTableMap;
-    ddrTableMap.emplace(1, 3);
-    ddrTableMap.emplace(2, 3);
-    ddrTableMap.emplace(3, 3);
-    ddrTableMap.emplace(4, 2);
-    ddrTableMap.emplace(6, 2);
-    ddrTableMap.emplace(8, 1);
-    ddrTableMap.emplace(9, 1);
-    ddrMap.emplace(embTableName, ddrTableMap);
-    unordered_map<std::string, unordered_map<emb_key_t, freq_num_t>> excludeDdrMap;
-    unordered_map<emb_key_t, freq_num_t> excludeDdrTableMap;
-    excludeDdrTableMap.emplace(15, 1);
-    excludeDdrTableMap.emplace(25, 5);
-    excludeDdrMap.emplace(embTableName, excludeDdrTableMap);
-    cacheManager.Load(ddrMap, excludeDdrMap, 0, 1, 0);
-    // 数据检查
-    auto& ddrKeyFreqMap = cacheManager.ddrKeyFreqMap;
-    auto& excludeDDRKeyCountMap = cacheManager.excludeDDRKeyCountMap;
-    ASSERT_EQ(ddrKeyFreqMap[embTableName].minFreq, 1);
-    ASSERT_EQ(ddrKeyFreqMap[embTableName].freqTable.size(), 3);
-    ASSERT_EQ(ddrKeyFreqMap[embTableName].Get(2), 3);
-    ASSERT_EQ(ddrKeyFreqMap[embTableName].Get(12), -1);
-    ASSERT_EQ(excludeDDRKeyCountMap[embTableName][25], 5);
 }
\ No newline at end of file
diff --git a/src/tests/ssd_cache/lfu_cache_test.cpp b/src/tests/ssd_cache/lfu_cache_test.cpp
index 1adf4aad..7f8a7820 100644
--- a/src/tests/ssd_cache/lfu_cache_test.cpp
+++ b/src/tests/ssd_cache/lfu_cache_test.cpp
@@ -31,7 +31,7 @@ using namespace testing;
  */
 vector<emb_key_t> INPUT_KEYS = {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 6, 6, 8, 9};
 
-inline void CompareHandleRet(vector<emb_key_t>& leastFreqKeys, vector<freq_num_t>& leastFreq,
+inline void CompareHandleRet(vector<emb_cache_key_t>& leastFreqKeys, vector<freq_num_t>& leastFreq,
                              vector<emb_key_t>& expectKeys,
                              vector<freq_num_t>& expectFreq)
 {
@@ -81,8 +81,8 @@ TEST(LFUCache, PutInitTest)
     cache.PutWithInit(6, 2);
     cache.PutWithInit(8, 1);
     cache.PutWithInit(9, 1);
-    vector<emb_key_t> retainedKeys = {4, 6};
-    vector<emb_key_t> leastFreqKeys;
+    vector<emb_cache_key_t> retainedKeys = {4, 6};
+    vector<emb_cache_key_t> leastFreqKeys;
     vector<freq_num_t> leastFreq;
     cache.GetAndDeleteLeastFreqKeyInfo(2, retainedKeys, leastFreqKeys, leastFreq);
     vector<emb_key_t> expectKeys = {9, 8};
@@ -95,8 +95,8 @@ TEST(LFUCache, LFUDeleteTotalFreqListTest)
 {
     LFUCache cache;
     PutKeys(cache, INPUT_KEYS);
-    vector<emb_key_t> retainedKeys = {4, 6, 8, 9};
-    vector<emb_key_t> leastFreqKeys;
+    vector<emb_cache_key_t> retainedKeys = {4, 6, 8, 9};
+    vector<emb_cache_key_t> leastFreqKeys;
     vector<freq_num_t> leastFreq;
     cache.GetAndDeleteLeastFreqKeyInfo(2, retainedKeys, leastFreqKeys, leastFreq);
     vector<emb_key_t> expectKeys = {3, 2};
@@ -108,8 +108,8 @@ TEST(LFUCache, BaseCacheTest)
 {
     LFUCache cache;
     PutKeys(cache, INPUT_KEYS);
-    vector<emb_key_t> retainedKeys = {8, 4, 6, 2};
-    vector<emb_key_t> leastFreqKeys;
+    vector<emb_cache_key_t> retainedKeys = {8, 4, 6, 2};
+    vector<emb_cache_key_t> leastFreqKeys;
     vector<freq_num_t> leastFreq;
     cache.GetAndDeleteLeastFreqKeyInfo(2, retainedKeys, leastFreqKeys, leastFreq);
     vector<emb_key_t> expectKeys = {9, 3};
@@ -120,5 +120,5 @@ TEST(LFUCache, BaseCacheTest)
     cache.Put(9);
     ASSERT_EQ(cache.Get(9), 1);
     cache.Put(9);
-    ASSERT_EQ(cache.minFreq, 2);
+    ASSERT_EQ(cache.minFreq, 1);
 }
diff --git a/src/tests/ssd_engine/engine_test.cpp b/src/tests/ssd_engine/engine_test.cpp
index aad64a99..be57ad2f 100644
--- a/src/tests/ssd_engine/engine_test.cpp
+++ b/src/tests/ssd_engine/engine_test.cpp
@@ -47,9 +47,9 @@ TEST(SSDEngine, CreateAndWriteAndReadAndAutoCompactAndSave)
     ASSERT_EQ(eng->IsTableExist(tbName), true);
 
     // write
-    vector<emb_key_t> keys;
+    vector<emb_cache_key_t> keys;
     vector<vector<float>> embeddings;
-    for (emb_key_t k = 0; k < 10; k++) {
+    for (emb_cache_key_t k = 0; k < 10; k++) {
         keys.emplace_back(k);
         vector<float> emb = {static_cast<float>(k + 0.1), static_cast<float>(k + 0.2)};
         embeddings.emplace_back(emb);
@@ -64,7 +64,7 @@ TEST(SSDEngine, CreateAndWriteAndReadAndAutoCompactAndSave)
     ASSERT_EQ(eng->GetTableAvailableSpace(tbName), maxTableSize - keys.size());
 
     // delete and wait auto compact
-    vector<emb_key_t> deleteKeys = {0};
+    vector<emb_cache_key_t> deleteKeys = {0};
     eng->DeleteEmbeddings(tbName, deleteKeys);
     this_thread::sleep_for(compactPeriod);
 
@@ -124,9 +124,9 @@ TEST(SSDEngine, LoadAndRead)
     engSave->CreateTable(tbName, savePath, maxTableSize);
 
     // write
-    vector<emb_key_t> keys;
+    vector<emb_cache_key_t> keys;
     vector<vector<float>> embeddings;
-    for (emb_key_t k = 0; k < 10; k++) {
+    for (emb_cache_key_t k = 0; k < 10; k++) {
         keys.emplace_back(k);
         vector<float> emb = {static_cast<float>(k + 0.1), static_cast<float>(k + 0.2)};
         embeddings.emplace_back(emb);
@@ -141,7 +141,7 @@ TEST(SSDEngine, LoadAndRead)
     shared_ptr<SSDEngine> engLoad = make_shared<SSDEngine>();
     engLoad->Start();
     engLoad->Load(tbName, savePath, maxTableSize, saveStep);
-    for (emb_key_t k: keys) {
+    for (emb_cache_key_t k: keys) {
         ASSERT_EQ(engLoad->IsKeyExist(tbName, k), true);
     }
     auto ret = engLoad->FetchEmbeddings(tbName, keys);
diff --git a/src/tests/ssd_engine/file_test.cpp b/src/tests/ssd_engine/file_test.cpp
index 599b5975..cdd80fc5 100644
--- a/src/tests/ssd_engine/file_test.cpp
+++ b/src/tests/ssd_engine/file_test.cpp
@@ -100,9 +100,9 @@ TEST(File, WriteAndRead)
     string savePath = GlogConfig::gRankId;
     auto f = make_shared<File>(0, savePath);
 
-    vector<emb_key_t> keys;
+    vector<emb_cache_key_t> keys;
     vector<vector<float>> embeddings;
-    for (emb_key_t k = 0; k < 10; k++) {
+    for (emb_cache_key_t k = 0; k < 10; k++) {
         keys.emplace_back(k);
         vector<float> emb = {static_cast<float>(k + 0.1), static_cast<float>(k + 0.2)};
         embeddings.emplace_back(emb);
@@ -129,7 +129,7 @@ TEST(File, SaveAndLoad)
     string fileDir = GlogConfig::gRankId;
     auto fTmp = make_shared<File>(0, fileDir);
 
-    vector<emb_key_t> key = {0};
+    vector<emb_cache_key_t> key = {0};
     vector<vector<float>> expect = {{1.0, 1.1}};
     fTmp->InsertEmbeddings(key, expect);
     string saveDir = fileDir;  // for test convenience
@@ -142,3 +142,40 @@ TEST(File, SaveAndLoad)
 
     fs::remove_all(fileDir);
 }
+
+TEST(File, WriteByAddrAndRead)
+{
+    int rankId;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rankId);
+    GlogConfig::gRankId = to_string(rankId);
+
+    string savePath = GlogConfig::gRankId;
+    auto f = make_shared<File>(0, savePath);
+
+    vector<emb_cache_key_t> keys;
+    vector<float*> embeddings;
+    uint64_t extEmbeddingSize = 1;
+    for (emb_cache_key_t k = 0; k < 10; k++) {
+        keys.emplace_back(k);
+        float* emb = new float;
+        *emb = static_cast<float>(k + 0.1);
+        embeddings.emplace_back(emb);
+    }
+
+    f->InsertEmbeddingsByAddr(keys, embeddings, extEmbeddingSize);
+    auto ret = f->FetchEmbeddings(keys);
+    for (int i = 0; i < 10; i++) {
+        if (std::abs(ret[i][0] - *embeddings[i]) > std::numeric_limits<float>::epsilon()) {
+            FAIL() << "embedding result not equal to input";
+        }
+    }
+
+    for (auto emb : embeddings)
+    {
+        delete emb;
+        emb = nullptr;
+    }
+    
+
+    fs::remove_all(savePath);
+}
\ No newline at end of file
diff --git a/src/tests/ssd_engine/table_test.cpp b/src/tests/ssd_engine/table_test.cpp
index 2e180c13..20a66f2f 100644
--- a/src/tests/ssd_engine/table_test.cpp
+++ b/src/tests/ssd_engine/table_test.cpp
@@ -41,13 +41,13 @@ TEST(Table, WriteAndReadAndDeleteAndCompact)
     // write
     emb_key_t nData = 1000000;
     emb_key_t batchSize = 10000;
-    vector<emb_key_t> allKeys;
+    vector<emb_cache_key_t> allKeys;
     vector<vector<float>> allEmbs;
-    vector<emb_key_t> batchKeys;
+    vector<emb_cache_key_t> batchKeys;
     vector<vector<float>> batchEmbs;
 
     chrono::milliseconds writeCost = 0ms;
-    for (emb_key_t k = 0; k < nData; k++) {
+    for (emb_cache_key_t k = 0; k < nData; k++) {
         vector<float> emb;
         emb.resize(embDim);
         for (uint64_t i = 0; i < embDim; ++i) {
@@ -122,9 +122,9 @@ TEST(Table, SaveAndLoad)
 
     // write and save
     emb_key_t nData = 10;
-    vector<emb_key_t> keys;
+    vector<emb_cache_key_t> keys;
     vector<vector<float>> embs;
-    for (emb_key_t k = 0; k < nData; k++) {
+    for (emb_cache_key_t k = 0; k < nData; k++) {
         vector<float> emb = {static_cast<float>(k + 0.1), static_cast<float>(k + 0.2)};
         keys.emplace_back(k);
         embs.emplace_back(emb);
@@ -160,7 +160,7 @@ TEST(Table, GetTableUsage)
 
     // write
     uint64_t expectKeyCnt = 2;
-    vector<emb_key_t> keys = {1, 2};
+    vector<emb_cache_key_t> keys = {1, 2};
     vector<vector<float>> embs = {{0.1}, {0.2}};
     tbSave->InsertEmbeddings(keys, embs);
 
diff --git a/src/tests/utils/common_h_test.cpp b/src/tests/utils/common_h_test.cpp
index 2e86b88d..bf089198 100644
--- a/src/tests/utils/common_h_test.cpp
+++ b/src/tests/utils/common_h_test.cpp
@@ -113,12 +113,6 @@ TEST(TestHostEmbTable, DefaultConstructor)
     MxRec::HostEmbTable hostEmbTable;
 }
 
-// 测试 EmbHashMapInfo 结构的默认构造函数
-TEST(TestEmbHashMapInfo, DefaultConstructor)
-{
-    MxRec::EmbHashMapInfo embHashMapInfo;
-}
-
 // 测试 All2AllInfo 结构的默认构造函数
 TEST(TestAll2AllInfo, DefaultConstructor)
 {
diff --git a/tests/mx_rec/core/test_build_graph.py b/tests/mx_rec/core/test_build_graph.py
index 5360f908..5a24fd74 100644
--- a/tests/mx_rec/core/test_build_graph.py
+++ b/tests/mx_rec/core/test_build_graph.py
@@ -21,6 +21,7 @@ from unittest import mock
 import tensorflow as tf
 
 from mx_rec.util.global_env_conf import global_env
+from mx_rec.core.asc.build_graph import SwapInfo
 from tests.mx_rec.core.mock_class import MockConfigInitializer
 
 
@@ -134,10 +135,12 @@ class TestGetIdOffsetsFunc(unittest.TestCase):
 
         with tf.Graph().as_default():
             mock_get_next.return_value = [0]
-            id_offsets, swap_pos, swap_len = get_id_offsets(self.max_lookup_vec_size, self.config)
+            id_offsets, swap_info = get_id_offsets(self.max_lookup_vec_size, self.config)
             self.assertEqual(id_offsets, 0)
-            self.assertListEqual(swap_pos, [])
-            self.assertEqual(swap_len, 0)
+            self.assertListEqual(swap_info.swap_in_pos, [])
+            self.assertEqual(swap_info.swap_in_len, 0)
+            self.assertListEqual(swap_info.swap_out_pos, [])
+            self.assertEqual(swap_info.swap_out_len, 0)
 
     @mock.patch("mx_rec.core.asc.build_graph.npu_ops.gen_npu_ops.get_next")
     def test_get_id_offsets_case2(self, mock_get_next):
@@ -150,10 +153,12 @@ class TestGetIdOffsetsFunc(unittest.TestCase):
         with tf.Graph().as_default():
             self.config["use_dynamic_expansion"] = False
             mock_get_next.return_value = [0]
-            id_offsets, swap_pos, swap_len = get_id_offsets(self.max_lookup_vec_size, self.config)
+            id_offsets, swap_info = get_id_offsets(self.max_lookup_vec_size, self.config)
             self.assertEqual(id_offsets, 0)
-            self.assertListEqual(swap_pos, [])
-            self.assertEqual(swap_len, 0)
+            self.assertListEqual(swap_info.swap_in_pos, [])
+            self.assertEqual(swap_info.swap_in_len, 0)
+            self.assertListEqual(swap_info.swap_out_pos, [])
+            self.assertEqual(swap_info.swap_out_len, 0)
 
 
 class TestGetAll2allArgsFunc(unittest.TestCase):
@@ -217,7 +222,7 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
-                         get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
+                         get_id_offsets=mock.MagicMock(return_value=[0, SwapInfo()]),
                          get_all2all_args=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case1(self, build_graph_config_initializer):
@@ -236,7 +241,7 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
-                         get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
+                         get_id_offsets=mock.MagicMock(return_value=[0, SwapInfo()]),
                          get_all2all_args=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case2(self, build_graph_config_initializer):
@@ -255,7 +260,7 @@ class TestGetPreProcessedTensorForAscFunc(unittest.TestCase):
 
     @mock.patch.multiple("mx_rec.core.asc.build_graph",
                          get_restore_vector=mock.MagicMock(return_value=[0, 0]),
-                         get_id_offsets=mock.MagicMock(return_value=[0, 0, 0]),
+                         get_id_offsets=mock.MagicMock(return_value=[0, SwapInfo]),
                          get_all2all_args=mock.MagicMock(return_value=0))
     @mock.patch("mx_rec.core.asc.build_graph.ConfigInitializer")
     def test_get_preprocessed_tensor_for_asc_case3(self, build_graph_config_initializer):
diff --git a/tests/mx_rec/saver/test_saver.py b/tests/mx_rec/saver/test_saver.py
index c0436a72..bcfa0948 100644
--- a/tests/mx_rec/saver/test_saver.py
+++ b/tests/mx_rec/saver/test_saver.py
@@ -41,6 +41,7 @@ class TestSaver(unittest.TestCase):
 
     @mock.patch.multiple("mx_rec.saver.saver",
                          get_rank_id=mock.MagicMock(return_value=0),
+                         get_rank_size=mock.MagicMock(return_value=1),
                          get_local_rank_size=mock.MagicMock(return_value=1))
     @mock.patch("mx_rec.saver.saver.ConfigInitializer")
     def test_save_and_load_is_consistent(self, saver_config_initializer):
diff --git a/tests/run_python_dt.sh b/tests/run_python_dt.sh
old mode 100644
new mode 100755
index 139e7ff7..475fd788
--- a/tests/run_python_dt.sh
+++ b/tests/run_python_dt.sh
@@ -36,7 +36,7 @@ cd -
 
 # set environment variable
 export PYTHONPATH="${TOP_PATH}"/output:$PYTHONPATH
-export LD_LIBRARY_PATH="${TOP_PATH}"/output:/usr/local/lib:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH="${TOP_PATH}"/output:/usr/local/lib:"${TOP_PATH}"/mx_rec/libasc:$LD_LIBRARY_PATH
 
 rm -rf result
 mkdir -p result
-- 
Gitee


From 41698044eff1712808d44d4231f31f4343e0e76d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 27 May 2024 14:49:19 +0800
Subject: [PATCH 165/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/CMakeLists.txt | 2 +-
 src/test_ut.sh     | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 84505d15..757745a8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -56,7 +56,7 @@ else ()
     message("==EASY_PROFILER_FOUND===")
     ADD_DEFINITIONS(-DBUILD_WITH_EASY_PROFILER)
 endif ()
-set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -ffunction-sections -O0 -Wall -g2 -ggdb")
+set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -ffunction-sections -O0 -Wall -g2 -ggdb -fsanitize=address -fsanitize-recover=address,all")
 set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -ffunction-sections -O3 -Wfatal-errors -DNDEBUG -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -s")
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack")
 
diff --git a/src/test_ut.sh b/src/test_ut.sh
index 6146aaab..cc163baf 100644
--- a/src/test_ut.sh
+++ b/src/test_ut.sh
@@ -38,6 +38,15 @@ opensource_path="${ROOT_DIR}"/../opensource
 acc_ctr_path="${ROOT_DIR}"/src/AccCTR
 export LD_LIBRARY_PATH="${acc_ctr_path}"/output/ock_ctr_common/lib:$LD_LIBRARY_PATH
 
+# config asan report dir and environment variable
+if [ ! -d asan_report ]; then
+  mkdir -p asan_report
+else
+  rm -rf ./asan_report/*
+fi
+export ASAN_OPTIONS=halt_on_error=0:detect_leaks=1:log_path="${CUR_DIR}"/asan_report/asan.log
+
+
 function prepare_googletest(){
   cd ${opensource_path}
   if [ ! -d googletest-release-1.8.1 ]; then
-- 
Gitee


From d74b737e4b24517e557b9551ef1115ae1a08f6f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 27 May 2024 16:00:35 +0800
Subject: [PATCH 166/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/test_ut.sh | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/test_ut.sh b/src/test_ut.sh
index cc163baf..20c6898a 100644
--- a/src/test_ut.sh
+++ b/src/test_ut.sh
@@ -38,13 +38,8 @@ opensource_path="${ROOT_DIR}"/../opensource
 acc_ctr_path="${ROOT_DIR}"/src/AccCTR
 export LD_LIBRARY_PATH="${acc_ctr_path}"/output/ock_ctr_common/lib:$LD_LIBRARY_PATH
 
-# config asan report dir and environment variable
-if [ ! -d asan_report ]; then
-  mkdir -p asan_report
-else
-  rm -rf ./asan_report/*
-fi
-export ASAN_OPTIONS=halt_on_error=0:detect_leaks=1:log_path="${CUR_DIR}"/asan_report/asan.log
+# config asan environment variable
+export ASAN_OPTIONS=halt_on_error=1:detect_leaks=1
 
 
 function prepare_googletest(){
-- 
Gitee


From 638ea9dba6f305d92a5753e6689ca5c5fd6b62fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 27 May 2024 19:27:38 +0800
Subject: [PATCH 167/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/CMakeLists.txt                              | 2 +-
 src/tests/emb_table/embedding_ddr_test.cpp      | 2 +-
 src/tests/emb_table/embedding_mgmt_test.cpp     | 2 +-
 src/tests/emb_table/embedding_static_test.cpp   | 2 +-
 src/tests/file_system/hdfs_file_system_test.cpp | 3 ++-
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 757745a8..a5cd76da 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -56,7 +56,7 @@ else ()
     message("==EASY_PROFILER_FOUND===")
     ADD_DEFINITIONS(-DBUILD_WITH_EASY_PROFILER)
 endif ()
-set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -ffunction-sections -O0 -Wall -g2 -ggdb -fsanitize=address -fsanitize-recover=address,all")
+set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -ffunction-sections -O0 -Wall -g2 -ggdb -fsanitize=address -fsanitize-recover=address,all -fno-omit-frame-pointer -fno-stack-protector")
 set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -ffunction-sections -O3 -Wfatal-errors -DNDEBUG -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -s")
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack")
 
diff --git a/src/tests/emb_table/embedding_ddr_test.cpp b/src/tests/emb_table/embedding_ddr_test.cpp
index ddad3905..60ec5af6 100644
--- a/src/tests/emb_table/embedding_ddr_test.cpp
+++ b/src/tests/emb_table/embedding_ddr_test.cpp
@@ -35,7 +35,7 @@ protected:
     EmbeddingDDRTest()
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
-        std::vector<size_t> vocabsize = {100};
+        std::vector<size_t> vocabsize = {100, 100, 100};
         vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
diff --git a/src/tests/emb_table/embedding_mgmt_test.cpp b/src/tests/emb_table/embedding_mgmt_test.cpp
index 49f10b4f..055cf5c5 100644
--- a/src/tests/emb_table/embedding_mgmt_test.cpp
+++ b/src/tests/emb_table/embedding_mgmt_test.cpp
@@ -35,7 +35,7 @@ protected:
     EmbeddingMgmtTest()
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
-        std::vector<size_t> vocabsize = {100};
+        std::vector<size_t> vocabsize = {100, 100, 100};
         vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
diff --git a/src/tests/emb_table/embedding_static_test.cpp b/src/tests/emb_table/embedding_static_test.cpp
index c8a5e252..9e250f64 100644
--- a/src/tests/emb_table/embedding_static_test.cpp
+++ b/src/tests/emb_table/embedding_static_test.cpp
@@ -34,7 +34,7 @@ protected:
     EmbeddingStaticTest()
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
-        std::vector<size_t> vocabsize = {100};
+        std::vector<size_t> vocabsize = {100, 100, 100};
         vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
diff --git a/src/tests/file_system/hdfs_file_system_test.cpp b/src/tests/file_system/hdfs_file_system_test.cpp
index 0d469ca5..1f94e1c7 100644
--- a/src/tests/file_system/hdfs_file_system_test.cpp
+++ b/src/tests/file_system/hdfs_file_system_test.cpp
@@ -18,6 +18,7 @@ See the License for the specific language governing permissions and
 
 #include "file_system/file_system_handler.h"
 #include "file_system/hdfs_file_system/hdfs_wrapper.h"
+#include "utils/logger.h"
 
 using namespace std;
 using namespace MxRec;
@@ -75,7 +76,7 @@ TEST_F(HdfsFileSystemTest, CreateDirFailed)
 
 TEST_F(HdfsFileSystemTest, GetFileSize)
 {
-    hdfsFileInfo* fileInfo;
+    auto* fileInfo = new hdfsFileInfo();
     EMOCK(&HdfsWrapper::GetPathInfo).stubs().will(returnValue(fileInfo));
     string filePath = "hdfs://master:9000/test_dir/";
     auto fileSystemHandler = make_unique<FileSystemHandler>();
-- 
Gitee


From 83d59d6154b429a97e7abc92109f4bf0a82a6d81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 27 May 2024 19:44:14 +0800
Subject: [PATCH 168/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/file_system/hdfs_file_system_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/file_system/hdfs_file_system_test.cpp b/src/tests/file_system/hdfs_file_system_test.cpp
index 1f94e1c7..0a642d44 100644
--- a/src/tests/file_system/hdfs_file_system_test.cpp
+++ b/src/tests/file_system/hdfs_file_system_test.cpp
@@ -18,7 +18,6 @@ See the License for the specific language governing permissions and
 
 #include "file_system/file_system_handler.h"
 #include "file_system/hdfs_file_system/hdfs_wrapper.h"
-#include "utils/logger.h"
 
 using namespace std;
 using namespace MxRec;
@@ -77,6 +76,7 @@ TEST_F(HdfsFileSystemTest, CreateDirFailed)
 TEST_F(HdfsFileSystemTest, GetFileSize)
 {
     auto* fileInfo = new hdfsFileInfo();
+    fileInfo->mSize = 1;
     EMOCK(&HdfsWrapper::GetPathInfo).stubs().will(returnValue(fileInfo));
     string filePath = "hdfs://master:9000/test_dir/";
     auto fileSystemHandler = make_unique<FileSystemHandler>();
-- 
Gitee


From 9119986cab1b10efad92ba1563e0a4951a77f26c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 27 May 2024 19:45:53 +0800
Subject: [PATCH 169/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/file_system/hdfs_file_system_test.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tests/file_system/hdfs_file_system_test.cpp b/src/tests/file_system/hdfs_file_system_test.cpp
index 0a642d44..46fb0753 100644
--- a/src/tests/file_system/hdfs_file_system_test.cpp
+++ b/src/tests/file_system/hdfs_file_system_test.cpp
@@ -82,5 +82,6 @@ TEST_F(HdfsFileSystemTest, GetFileSize)
     auto fileSystemHandler = make_unique<FileSystemHandler>();
     auto fileSystemPtr = fileSystemHandler->Create(filePath);
     EXPECT_NO_THROW(fileSystemPtr->GetFileSize(filePath));
+    delete fileInfo;
 }
 
-- 
Gitee


From 76b678eab589f75fdf47939c6884ad85ccdbc2d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 27 May 2024 21:10:48 +0800
Subject: [PATCH 170/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/file_system/hdfs_file_system_test.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/tests/file_system/hdfs_file_system_test.cpp b/src/tests/file_system/hdfs_file_system_test.cpp
index 46fb0753..3c1a2561 100644
--- a/src/tests/file_system/hdfs_file_system_test.cpp
+++ b/src/tests/file_system/hdfs_file_system_test.cpp
@@ -75,13 +75,11 @@ TEST_F(HdfsFileSystemTest, CreateDirFailed)
 
 TEST_F(HdfsFileSystemTest, GetFileSize)
 {
-    auto* fileInfo = new hdfsFileInfo();
-    fileInfo->mSize = 1;
-    EMOCK(&HdfsWrapper::GetPathInfo).stubs().will(returnValue(fileInfo));
+    std::unique_ptr<hdfsFileInfo> fileInfo = std::make_unique<hdfsFileInfo>();
+    EMOCK(&HdfsWrapper::GetPathInfo).stubs().will(returnValue(fileInfo.get()));
     string filePath = "hdfs://master:9000/test_dir/";
     auto fileSystemHandler = make_unique<FileSystemHandler>();
     auto fileSystemPtr = fileSystemHandler->Create(filePath);
     EXPECT_NO_THROW(fileSystemPtr->GetFileSize(filePath));
-    delete fileInfo;
 }
 
-- 
Gitee


From d61def25dc0e4a59eb036e01475aac00fb8b4073 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 28 May 2024 19:54:42 +0800
Subject: [PATCH 171/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/CMakeLists.txt               |  4 +++
 src/AccCTR/build/build_test.sh          |  3 ++
 src/AccCTR/tests/ut/src/CMakeLists.txt  |  2 +-
 src/AccCTR/tests/ut/src/unique_test.cpp | 42 +++++++++++++++++++++++++
 4 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/AccCTR/CMakeLists.txt b/src/AccCTR/CMakeLists.txt
index 60e2d638..febf1740 100644
--- a/src/AccCTR/CMakeLists.txt
+++ b/src/AccCTR/CMakeLists.txt
@@ -73,6 +73,10 @@ elseif (${BUILD_MODE} MATCHES "ut")
             -Wfloat-equal
             -Wextra
             -std=c++17
+            -fsanitize=address
+            -fsanitize-recover=address,all
+            -fno-omit-frame-pointer
+            -fstack-protector-all
             )
 else ()
     message(FATAL_ERROR "======BUILD_MODE not found")
diff --git a/src/AccCTR/build/build_test.sh b/src/AccCTR/build/build_test.sh
index 9441efe3..4001b825 100644
--- a/src/AccCTR/build/build_test.sh
+++ b/src/AccCTR/build/build_test.sh
@@ -24,6 +24,9 @@ TOOL_FILE="create_fake_id.py"
 CPU_TYPE=$(arch)
 BUILD_MODE=$1
 
+# config asan environment variable
+export ASAN_OPTIONS=halt_on_error=1:detect_leaks=1
+
 create_data()
 {
     cd ${TOOL_PATH}
diff --git a/src/AccCTR/tests/ut/src/CMakeLists.txt b/src/AccCTR/tests/ut/src/CMakeLists.txt
index 3da58244..93f8f6c2 100644
--- a/src/AccCTR/tests/ut/src/CMakeLists.txt
+++ b/src/AccCTR/tests/ut/src/CMakeLists.txt
@@ -24,7 +24,7 @@ include("${CMAKE_CURRENT_SOURCE_DIR}/../conf/toolchain.cmake")
 set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../src)
 set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../)
 
-file(GLOB_RECURSE TEST_UNIQUE_FILES *.cpp *.h)
+file(GLOB_RECURSE TEST_UNIQUE_FILES unique_test.cpp *.h)
 add_executable(test_unique_files ${TEST_UNIQUE_FILES})
 include_directories(${OCK_CTR_UTIL_INSTALL_DIR}/googletest-release-1.8.1/include)
 link_directories(${OCK_CTR_UTIL_INSTALL_DIR}/googletest-release-1.8.1/lib64)
diff --git a/src/AccCTR/tests/ut/src/unique_test.cpp b/src/AccCTR/tests/ut/src/unique_test.cpp
index a94ebaf7..fe7d0242 100644
--- a/src/AccCTR/tests/ut/src/unique_test.cpp
+++ b/src/AccCTR/tests/ut/src/unique_test.cpp
@@ -95,6 +95,13 @@ TEST_F(UniqueTest, Conf)
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 3); // idCntFill空指针
     uniqueOut.idCntFill = idCntFill;
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 7); // padding长度过小
+
+    unique->UnInitialize();
+    delete[] idCnt;
+    delete[] idCntFill;
+    delete[] uniqueIdCntInBucket;
+    delete[] uniqueIdInBucket;
+
     std::cout << "===========Conf end=============" << std::endl;
 }
 
@@ -115,6 +122,9 @@ TEST_F(UniqueTest, usePaddingNoShardingErr)
     conf.outputType = OutputType::ENHANCED;
 
     ASSERT_EQ(unique->Initialize(conf), 9);
+
+    unique->UnInitialize();
+
     std::cout << "===========usePaddingNoShardingErr end=============" << std::endl;
 }
 
@@ -132,6 +142,8 @@ TEST_F(UniqueTest, useNegativeDesiredSize)
 
     ASSERT_EQ(unique->Initialize(conf), 1);
 
+    unique->UnInitialize();
+
     std::cout << "===========useNegativeDesiredSize end=============" << std::endl;
 }
 
@@ -404,6 +416,9 @@ TEST_F(UniqueTest, DoEnhancedUniqueErr)
     ASSERT_EQ(uniqueOut.uniqueIdCnt, (int)idsSet.size());
 
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
+    delete[] idCnt;
+
     std::cout << "===========DoEnhancedUniqueErr end=============" << std::endl;
 }
 
@@ -544,6 +559,9 @@ TEST_F(UniqueTest, idCntIsNullSharding)
     ASSERT_EQ(ret, 3);
 
     unique->UnInitialize();
+    delete[] uniqueIdCntInBucket;
+    delete[] uniqueIdInBucket;
+
     std::cout << "===========idCntIsNullSharding end=============" << std::endl;
 }
 
@@ -620,6 +638,7 @@ TEST_F(UniqueTest, DoUniqueShard)
     ASSERT_THAT(uniqueIdCntInBucket, testing::ElementsAreArray(expectedUniqueIdCnt));
     ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueShard end=============" << std::endl;
 }
@@ -685,6 +704,7 @@ TEST_F(UniqueTest, DoUniqueOnlyShard)
     ASSERT_THAT(inputId, testing::ElementsAreArray(restoreIds));
     ASSERT_THAT(uniqueIdCntInBucket, testing::ElementsAreArray(expectedUniqueIdCnt));
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueOnlyShard end=============" << std::endl;
 }
@@ -769,6 +789,8 @@ TEST_F(UniqueTest, DoUniquePadding)
     ASSERT_THAT(idCntFill, testing::ElementsAreArray(expectedIdCnt));
     ASSERT_EQ(uniqueOut.uniqueIdCnt, conf.paddingSize * conf.shardingNum);
     unique->UnInitialize();
+    delete[] idCnt;
+    delete[] uniqueIdInBucket;
     std::cout << "===========DoUniquePadding end=============" << std::endl;
 }
 
@@ -913,6 +935,7 @@ TEST_F(UniqueTest, DoUniqueShardNumberOversize)
     ASSERT_THAT(uniqueIdCntInBucket, testing::ElementsAreArray(expectedUniqueIdCnt));
     ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueShardNumberOversize end=============" << std::endl;
 }
@@ -981,6 +1004,12 @@ TEST_F(UniqueTest, DoUniqueSpecial)
     }
 
     unique->UnInitialize();
+    delete[] uniqueData;
+    delete[] index;
+    delete[] idCnt;
+    delete[] idCntFill;
+    delete[] uniqueIdCntInBucket;
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueSpecial end=============" << std::endl;
 }
@@ -1020,6 +1049,10 @@ TEST_F(UniqueTest, IdLarge)
     uniqueOut.idCnt = idCnt;
 
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 6); // ID太大
+
+    unique->UnInitialize();
+    delete[] idCnt;
+
     std::cout << "===========IdLarge end=============" << std::endl;
 }
 
@@ -1095,6 +1128,8 @@ TEST_F(UniqueTest, DoUniqueNormalInt32)
     ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
 
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
+
     std::cout << "===========DoUniqueNormalInt32 end=============" << std::endl;
 }
 
@@ -1228,6 +1263,7 @@ TEST_F(UniqueTest, DoUniqueShardMultipleTimes)
         ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
     }
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueShardMultipleTimes end=============" << std::endl;
 }
@@ -1312,6 +1348,9 @@ TEST_F(UniqueTest, DoUniquePaddingMultipleTimes)
     }
 
     unique->UnInitialize();
+    delete[] idCnt;
+    delete[] uniqueIdInBucket;
+
     std::cout << "===========DoUniquePaddingMultipleTimes end=============" << std::endl;
 }
 
@@ -1348,6 +1387,9 @@ TEST_F(UniqueTest, IdCntSmall)
     uniqueOut.idCnt = idCnt;
 
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 4); // idcnt过小
+
+    unique->UnInitialize();
+
     std::cout << "===========IdCntSmall end=============" << std::endl;
 }
 
-- 
Gitee


From 24129d804f5f85a3fdfa79e139735198eae0d782 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 28 May 2024 20:30:54 +0800
Subject: [PATCH 172/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/tests/ut/src/unique_test.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/AccCTR/tests/ut/src/unique_test.cpp b/src/AccCTR/tests/ut/src/unique_test.cpp
index fe7d0242..1b663ba9 100644
--- a/src/AccCTR/tests/ut/src/unique_test.cpp
+++ b/src/AccCTR/tests/ut/src/unique_test.cpp
@@ -1389,6 +1389,7 @@ TEST_F(UniqueTest, IdCntSmall)
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 4); // idcnt过小
 
     unique->UnInitialize();
+    delete[] idCnt;
 
     std::cout << "===========IdCntSmall end=============" << std::endl;
 }
@@ -1491,6 +1492,7 @@ TEST_F(UniqueTest, DoUniqueLotsDataFunction)
     ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
 
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
     if (path) {
         free(path);
     }
@@ -1599,6 +1601,8 @@ TEST_F(UniqueTest, DoUniqueLotsDataPaddingFunction)
 
     unique->UnInitialize();
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 11);
+    delete[] idCnt;
+    delete[] uniqueIdInBucket;
     if (path) {
         free(path);
     }
-- 
Gitee


From b85bf288341ac1f0791b8e58728819390d0e1ed4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 28 May 2024 20:37:22 +0800
Subject: [PATCH 173/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/tests/ut/src/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AccCTR/tests/ut/src/CMakeLists.txt b/src/AccCTR/tests/ut/src/CMakeLists.txt
index 93f8f6c2..3da58244 100644
--- a/src/AccCTR/tests/ut/src/CMakeLists.txt
+++ b/src/AccCTR/tests/ut/src/CMakeLists.txt
@@ -24,7 +24,7 @@ include("${CMAKE_CURRENT_SOURCE_DIR}/../conf/toolchain.cmake")
 set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../src)
 set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../)
 
-file(GLOB_RECURSE TEST_UNIQUE_FILES unique_test.cpp *.h)
+file(GLOB_RECURSE TEST_UNIQUE_FILES *.cpp *.h)
 add_executable(test_unique_files ${TEST_UNIQUE_FILES})
 include_directories(${OCK_CTR_UTIL_INSTALL_DIR}/googletest-release-1.8.1/include)
 link_directories(${OCK_CTR_UTIL_INSTALL_DIR}/googletest-release-1.8.1/lib64)
-- 
Gitee


From 14e125ff0bcc6a3fea01fc3f05f6208a0e715afc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 29 May 2024 10:36:45 +0800
Subject: [PATCH 174/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/tests/ut/src/unique_test.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/AccCTR/tests/ut/src/unique_test.cpp b/src/AccCTR/tests/ut/src/unique_test.cpp
index 1b663ba9..94e8d92c 100644
--- a/src/AccCTR/tests/ut/src/unique_test.cpp
+++ b/src/AccCTR/tests/ut/src/unique_test.cpp
@@ -219,6 +219,9 @@ TEST_F(UniqueTest, DoUniqueNormal)
     ASSERT_EQ(uniqueOut.uniqueIdCnt, (int)idsSet.size());
 
     unique->UnInitialize();
+    if (path) {
+        free(path);
+    }
     std::cout << "===========DoUniqueNormal end=============" << std::endl;
 }
 
-- 
Gitee


From cecd7ed0f932f5f54f08d1868fd255976217c943 Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Wed, 29 May 2024 03:38:24 +0000
Subject: [PATCH 175/302] =?UTF-8?q?!160=20=E7=89=B9=E6=80=A7=EF=BC=88?=
 =?UTF-8?q?=E4=BF=9D=E5=AD=98=E4=B8=8E=E5=8A=A0=E8=BD=BD=EF=BC=89=EF=BC=9A?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=97=A0slot=E4=BC=98=E5=8C=96=E5=99=A8?=
 =?UTF-8?q?=E4=BF=9D=E5=AD=98=E5=BC=82=E5=B8=B8=E9=97=AE=E9=A2=98=20*=20?=
 =?UTF-8?q?=E7=89=B9=E6=80=A7=EF=BC=88=E4=BF=9D=E5=AD=98=E4=B8=8E=E5=8A=A0?=
 =?UTF-8?q?=E8=BD=BD=EF=BC=89=EF=BC=9A=E4=BF=AE=E5=A4=8D=E6=97=A0slot?=
 =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=99=A8=E4=BF=9D=E5=AD=98=E5=BC=82=E5=B8=B8?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py                | 99 +++++++++++++++-------------
 src/core/emb_table/embedding_ddr.cpp | 10 +++
 src/core/hybrid_mgmt/hybrid_mgmt.cpp |  4 +-
 3 files changed, 66 insertions(+), 47 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index a91599bc..f9dfd0dc 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -260,52 +260,61 @@ class Saver(object):
 
         table_instance0 = self.config_instance.sparse_embed_config.get_table_instance(self.var_list[0])
         if table_instance0.is_hbm:
-            self.config_instance.hybrid_manager_config.save_host_data(root_dir)
-            if self.config_instance.use_dynamic_expansion:
-                # Data related to dynamic expansion needs to be saved only on the host side.
-                return
-
-            result = self.save_op_dict
-            threads = []
-            for table_name in result.keys():
-                thread = SaveModelThread(self, sess, result, root_dir, table_name)
-                threads.append(thread)
-
-            for thread in threads:
-                thread.start()
-
-            for thread in threads:
-                thread.join()
+            self._save_hbm(sess, root_dir)
         else:
-            # 接受host侧传来的需要swap_out的offset用于更新host侧并保存
-            self.config_instance.hybrid_manager_config.fetch_device_emb()
-            for var in self.var_list:
-                table_instance = self.config_instance.sparse_embed_config.get_table_instance(var)
-                table_name = table_instance.table_name
-
-                use_static = ConfigInitializer.get_instance().use_static
-                max_lookup_vec_size = None
-                if use_static:
-                    max_lookup_vec_size = table_instance.send_count * self.rank_size
-                swap_out_pos, swap_out_len = npu_ops.gen_npu_ops.get_next(
-                    output_types=[tf.int32, tf.int32],
-                    output_shapes=[[max_lookup_vec_size], []],
-                    channel_name=f'{table_name}_save_h2d_{TRAIN_CHANNEL_ID}')
-                if use_static:
-                    swap_out_pos = swap_out_pos[:swap_out_len]
-                    
-                optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(table_name)
-                table = [var] + [slot_var for slots in optimizer.values() for slot_var in slots.values()]
-
-                swap_outs = [tf.gather(one_table, swap_out_pos) for one_table in table]
-                swap_out = tf.concat(swap_outs, axis=1)
-                channel_name = f'{table_name}_save_d2h_{TRAIN_CHANNEL_ID}'
-                logger.debug('channel %s was built for op swap_out_op.', channel_name)
-                swap_out_op = npu_ops.outfeed_enqueue_op(channel_name=channel_name, inputs=[swap_out])
-                # 发送host需要的embedding
-                sess.run(swap_out_op)
-            self.config_instance.hybrid_manager_config.save_host_data(root_dir)
-        logger.debug(f"host data was saved.")
+            self._save_ddr(sess, root_dir)
+        logger.debug(f"Host data was saved.")
+        
+    def _save_hbm(self, sess, root_dir):
+        self.config_instance.hybrid_manager_config.save_host_data(root_dir)
+        if self.config_instance.use_dynamic_expansion:
+            # Data related to dynamic expansion needs to be saved only on the host side.
+            return
+
+        result = self.save_op_dict
+        threads = []
+        for table_name in result.keys():
+            thread = SaveModelThread(self, sess, result, root_dir, table_name)
+            threads.append(thread)
+
+        for thread in threads:
+            thread.start()
+
+        for thread in threads:
+            thread.join()
+            
+    def _save_ddr(self, sess, root_dir):
+        # 接受host侧传来的需要swap_out的offset用于更新host侧并保存
+        self.config_instance.hybrid_manager_config.fetch_device_emb()
+        for var in self.var_list:
+            table_instance = self.config_instance.sparse_embed_config.get_table_instance(var)
+            table_name = table_instance.table_name
+
+            use_static = ConfigInitializer.get_instance().use_static
+            max_lookup_vec_size = None
+            if use_static:
+                max_lookup_vec_size = table_instance.send_count * self.rank_size
+            swap_out_pos, swap_out_len = npu_ops.gen_npu_ops.get_next(
+                output_types=[tf.int32, tf.int32],
+                output_shapes=[[max_lookup_vec_size], []],
+                channel_name=f'{table_name}_save_h2d_{TRAIN_CHANNEL_ID}')
+            if use_static:
+                swap_out_pos = swap_out_pos[:swap_out_len]
+                
+            table = [var]
+            optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(table_name)
+            if optimizer is not None:
+                for slots in optimizer.values():
+                    table += list(slots.values())
+
+            swap_outs = [tf.gather(one_table, swap_out_pos) for one_table in table]
+            swap_out = tf.concat(swap_outs, axis=1)
+            channel_name = f'{table_name}_save_d2h_{TRAIN_CHANNEL_ID}'
+            logger.debug('channel %s was built for op swap_out_op.', channel_name)
+            swap_out_op = npu_ops.outfeed_enqueue_op(channel_name=channel_name, inputs=[swap_out])
+            # 发送host需要的embedding
+            sess.run(swap_out_op)
+        self.config_instance.hybrid_manager_config.save_host_data(root_dir)
 
     def _get_valid_dict_data(self, dump_data_dict, table_name):
         host_data = self.config_instance.hybrid_manager_config.get_host_data(table_name)
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index caec0229..f069e5c7 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -156,6 +156,11 @@ void EmbeddingDDR::LoadEmbedding(const string &savePath, vector<vector<float>> &
 
 void EmbeddingDDR::LoadOptimizerSlot(const string &savePath, vector<vector<float>> &optimizerSlots)
 {
+    if (optimParams.size() == 0) {
+        LOG_DEBUG("optimizer has no slot data to load");
+        return;
+    }
+
     // must init first
     for (size_t i = 0; i < hostLoadOffset.size(); i++) {
         vector<float> tmp(extEmbSize_ - embSize_);
@@ -293,6 +298,11 @@ void EmbeddingDDR::SaveEmbedding(const string& savePath, vector<vector<float>>&
 
 void EmbeddingDDR::SaveOptimizerSlot(const string& savePath, vector<vector<float>>& optimizerSlots, size_t keySize)
 {
+    if (optimizerSlots.size() == 0) {
+        LOG_DEBUG("optimizer has no slot data to save");
+        return;
+    }
+    
     if (optimizerSlots.size() != keySize) {
         string errMsg = StringFormat("optimizer slot data size not equal to key size, "
                                      "optimizerSlots.size:%d, keySize:%d",
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 123b2c79..6b998205 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -557,7 +557,7 @@ bool HybridMgmt::IsEvalEndBatch(int batchId) const
 bool HybridMgmt::ParseKeys(int channelId, int& batchId, TaskType type)
 {
 #ifndef GTEST
-    LOG_INFO(MGMT + "channelId:{} batchId:{}, DDR mode, ParseKeys start.", channelId, batchId);
+    LOG_INFO(MGMT + "channelId:{} batchId:{}, ParseKeys start.", channelId, batchId);
     TimeCost parseKeyTC;
     bool remainBatch = true; // 是否从通道获取了数据
 
@@ -1328,7 +1328,7 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
         specialProcessStatus[embInfo.name] = ProcessStatus::NORMAL;
 
         // 初始化embedding cache
-        LOG_INFO("create cache for table:{}, hostVocabSize:{}, embSize:{}, maxCacheSize:{}",
+        LOG_INFO("create cache for table:{}, hostVocabSize:{}, extEmbeddingSize:{}, maxCacheSize(devVocabSize):{}",
                  embInfo.name, embInfo.hostVocabSize, embInfo.extEmbeddingSize, embInfo.devVocabSize);
         EmbCache::EmbCacheInfo embCacheInfo(embInfo.name, embInfo.hostVocabSize, embInfo.embeddingSize,
                                             embInfo.extEmbeddingSize, embInfo.devVocabSize);
-- 
Gitee


From 2781f72ed2815a7c8a1d9d12b0affa3bd0fb6593 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 29 May 2024 17:16:53 +0800
Subject: [PATCH 176/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/test_ut.sh       |  7 +++----
 src/tests/leaks.supp | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)
 create mode 100644 src/tests/leaks.supp

diff --git a/src/test_ut.sh b/src/test_ut.sh
index 20c6898a..c7f8d9c0 100644
--- a/src/test_ut.sh
+++ b/src/test_ut.sh
@@ -38,10 +38,6 @@ opensource_path="${ROOT_DIR}"/../opensource
 acc_ctr_path="${ROOT_DIR}"/src/AccCTR
 export LD_LIBRARY_PATH="${acc_ctr_path}"/output/ock_ctr_common/lib:$LD_LIBRARY_PATH
 
-# config asan environment variable
-export ASAN_OPTIONS=halt_on_error=1:detect_leaks=1
-
-
 function prepare_googletest(){
   cd ${opensource_path}
   if [ ! -d googletest-release-1.8.1 ]; then
@@ -133,6 +129,9 @@ mkdir build
 cd build
 
 python_path="$(dirname "$(dirname "$(which python3.7)")")"
+# config asan environment variable
+export ASAN_OPTIONS=halt_on_error=1:detect_leaks=1
+export LSAN_OPTIONS=suppressions=../tests/leaks.supp
 
 cmake -DCMAKE_BUILD_TYPE=Debug \
     -DTF_PATH="${python_path}"/lib/python3.7/site-packages/"${TF_DIR}" \
diff --git a/src/tests/leaks.supp b/src/tests/leaks.supp
new file mode 100644
index 00000000..c192bc92
--- /dev/null
+++ b/src/tests/leaks.supp
@@ -0,0 +1,21 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+# There are known leaks.
+# 1.known mpi leaks.
+leak:libmpi.so*
+leak:libopen-pal.so*
+leak:libpmix.so*
+leak:libc.so*
\ No newline at end of file
-- 
Gitee


From e0f5391d9353286645a9c63f0669a7462de7eb05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 29 May 2024 17:39:23 +0800
Subject: [PATCH 177/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/leaks.supp | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/tests/leaks.supp b/src/tests/leaks.supp
index c192bc92..ebe0718d 100644
--- a/src/tests/leaks.supp
+++ b/src/tests/leaks.supp
@@ -1,17 +1,17 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 
 # There are known leaks.
 # 1.known mpi leaks.
-- 
Gitee


From 0df1f96d631e2d5a85beba383e60c74b4ea01724 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Wed, 29 May 2024 19:34:19 +0800
Subject: [PATCH 178/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/test_ut.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test_ut.sh b/src/test_ut.sh
index c7f8d9c0..7305c081 100644
--- a/src/test_ut.sh
+++ b/src/test_ut.sh
@@ -130,7 +130,7 @@ cd build
 
 python_path="$(dirname "$(dirname "$(which python3.7)")")"
 # config asan environment variable
-export ASAN_OPTIONS=halt_on_error=1:detect_leaks=1
+export ASAN_OPTIONS=halt_on_error=1:detect_leaks=1:fast_unwind_on_malloc=0
 export LSAN_OPTIONS=suppressions=../tests/leaks.supp
 
 cmake -DCMAKE_BUILD_TYPE=Debug \
-- 
Gitee


From 8387ff18a54c4d6226dd50ea3c0560277e5ad92b Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Thu, 30 May 2024 02:22:56 +0000
Subject: [PATCH 179/302] =?UTF-8?q?!161=20=E3=80=90bugfix=E3=80=91DCNv2?=
 =?UTF-8?q?=E5=88=87=E6=8D=A2=E4=BC=98=E5=8C=96=E5=99=A8=E6=8A=A5=E9=94=99?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20*=20update=20examples/DCNv2/delay=5Floss?=
 =?UTF-8?q?=5Fscale.py.=20*=20update=20examples/DCNv2/delay=5Floss=5Fscale?=
 =?UTF-8?q?.py.=20*=20update=20examples/DCNv2/delay=5Floss=5Fscale.py.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/delay_loss_scale.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/examples/DCNv2/delay_loss_scale.py b/examples/DCNv2/delay_loss_scale.py
index a9ee5e64..821b2210 100644
--- a/examples/DCNv2/delay_loss_scale.py
+++ b/examples/DCNv2/delay_loss_scale.py
@@ -21,13 +21,13 @@ from tensorflow.compat.v1.train import Optimizer
 class DenseLossScaleOptimizer:
     def __init__(self, opt, loss_scale):
         if not isinstance(opt, Optimizer):
-            raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
+            raise ValueError("`opt` must be an instance of Optimizer, but got: %s" % type(opt))
         self._optimizer = opt
         self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        self._optimizer._lr = self._optimizer._lr / self._loss_scale
+        _scale_learning_rate(self._optimizer, loss_scale)
 
     def compute_gradients(self, loss, var_list=None):
-        return self._optimizer.compute_gradients(loss*self._loss_scale, var_list=var_list)
+        return self._optimizer.compute_gradients(loss * self._loss_scale, var_list=var_list)
 
     def apply_gradients(self, avg_grads):
         return self._optimizer.apply_gradients(avg_grads)
@@ -36,13 +36,26 @@ class DenseLossScaleOptimizer:
 class SparseLossScaleOptimizer:
     def __init__(self, opt, loss_scale):
         if not isinstance(opt, Optimizer):
-            raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
+            raise ValueError("`opt` must be an instance of Optimizer, but got: %s" % type(opt))
         self._optimizer = opt
         self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        self._optimizer._lr = self._optimizer._lr / self._loss_scale
+        _scale_learning_rate(self._optimizer, loss_scale)
 
     def compute_gradients(self, loss, var_list=None):
-        return tf.gradients(loss*self._loss_scale, var_list)
+        return tf.gradients(loss * self._loss_scale, var_list)
 
     def apply_gradients(self, grads_and_vars):
-        return self._optimizer.apply_gradients(grads_and_vars)
\ No newline at end of file
+        return self._optimizer.apply_gradients(grads_and_vars)
+
+
+def _scale_learning_rate(opt: Optimizer, loss_scale: float) -> None:
+    if loss_scale == 0:
+        raise ValueError("`loss_scale` can not be zero")
+    if hasattr(opt, "_learning_rate"):
+        # `SGD` or `Adagrad`
+        opt._learning_rate = opt._learning_rate / tf.convert_to_tensor(loss_scale, tf.float32)
+    elif hasattr(opt, "_lr"):
+        # `Adam`
+        opt._lr = opt._lr / tf.convert_to_tensor(loss_scale, tf.float32)
+    else:
+        raise ValueError("`opt` should have a `_learning_rate` or `_lr` named field")
-- 
Gitee


From d3a388c03af109ac4230a4caf2e857e1d4869be5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Thu, 30 May 2024 10:53:07 +0800
Subject: [PATCH 180/302] =?UTF-8?q?C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88?=
 =?UTF-8?q?=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84?=
 =?UTF-8?q?=E9=9C=B2=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/file_system/hdfs_file_system_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/file_system/hdfs_file_system_test.cpp b/src/tests/file_system/hdfs_file_system_test.cpp
index 3c1a2561..98f733f0 100644
--- a/src/tests/file_system/hdfs_file_system_test.cpp
+++ b/src/tests/file_system/hdfs_file_system_test.cpp
@@ -26,10 +26,10 @@ using namespace emock;
 
 void MockHdfs()
 {
+    EMOCK(&HdfsWrapper::LoadHdfsLib).stubs().will(ignoreReturnValue());
     hdfsFS ConnectFs;
     hdfsFile hdfsFileHandler;
     hdfsFileInfo* fileInfo;
-    EMOCK(&HdfsWrapper::LoadHdfsLib).stubs().will(ignoreReturnValue());
     EMOCK(&HdfsWrapper::CloseHdfsLib).stubs().will(ignoreReturnValue());
     EMOCK(&HdfsWrapper::Connect).stubs().will(returnValue(ConnectFs));
     EMOCK(&HdfsWrapper::Disconnect).stubs().will(returnValue(1));
-- 
Gitee


From 7d246a26eca111b836e1e692e3d22bde33a09aaf Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Thu, 30 May 2024 14:28:06 +0000
Subject: [PATCH 181/302] =?UTF-8?q?!164=20xdeepFM=20Github=E5=8E=9F?=
 =?UTF-8?q?=E5=A7=8B=E4=BB=A3=E7=A0=81=20*=20xdeepFM=20Github=E5=8E=9F?=
 =?UTF-8?q?=E5=A7=8B=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/xDeepFM/IO/base_cache.py  |  13 +
 examples/xDeepFM/IO/ffm_cache.py   | 162 ++++++++++++
 examples/xDeepFM/IO/iterator.py    | 207 +++++++++++++++
 examples/xDeepFM/main.py           | 187 +++++++++++++
 examples/xDeepFM/src/base_model.py | 193 ++++++++++++++
 examples/xDeepFM/src/exDeepFM.py   | 409 +++++++++++++++++++++++++++++
 examples/xDeepFM/train.py          | 304 +++++++++++++++++++++
 examples/xDeepFM/utils/log.py      |  20 ++
 examples/xDeepFM/utils/metric.py   |  97 +++++++
 examples/xDeepFM/utils/util.py     |  83 ++++++
 10 files changed, 1675 insertions(+)
 create mode 100644 examples/xDeepFM/IO/base_cache.py
 create mode 100644 examples/xDeepFM/IO/ffm_cache.py
 create mode 100644 examples/xDeepFM/IO/iterator.py
 create mode 100644 examples/xDeepFM/main.py
 create mode 100644 examples/xDeepFM/src/base_model.py
 create mode 100644 examples/xDeepFM/src/exDeepFM.py
 create mode 100644 examples/xDeepFM/train.py
 create mode 100644 examples/xDeepFM/utils/log.py
 create mode 100644 examples/xDeepFM/utils/metric.py
 create mode 100644 examples/xDeepFM/utils/util.py

diff --git a/examples/xDeepFM/IO/base_cache.py b/examples/xDeepFM/IO/base_cache.py
new file mode 100644
index 00000000..11187de9
--- /dev/null
+++ b/examples/xDeepFM/IO/base_cache.py
@@ -0,0 +1,13 @@
+"""define abstract base class"""
+import abc
+
+__all__ = ["BaseCache"]
+
+
+class BaseCache(object):
+    """abstract base class"""
+
+    @abc.abstractmethod
+    def write_tfrecord(self, infile, outfile, hparams):
+        """Subclass must implement this."""
+        pass
diff --git a/examples/xDeepFM/IO/ffm_cache.py b/examples/xDeepFM/IO/ffm_cache.py
new file mode 100644
index 00000000..1f3d505c
--- /dev/null
+++ b/examples/xDeepFM/IO/ffm_cache.py
@@ -0,0 +1,162 @@
+"""define FfmCache class for cache the format dataset"""
+from IO.base_cache import BaseCache
+import tensorflow as tf
+import numpy as np
+from collections import defaultdict
+import utils.util as util
+
+__all__ = ["FfmCache"]
+
+
+class FfmCache(BaseCache):
+    # field index start by 1, feat index start by 1
+    def _load_batch_data_from_file(self, file, hparams):
+        batch_size = hparams.batch_size
+        labels = []
+        features = []
+        impression_id = []
+        cnt = 0
+        with open(file, 'r') as rd:
+            while True:
+                line = rd.readline().strip(' ')
+                if not line:
+                    break
+                tmp = line.strip().split(util.USER_ID_SPLIT)
+                if len(tmp) == 2:
+                    impression_id.append(tmp[1].strip())
+                line = tmp[0]
+                cols = line.strip().split(' ')
+                label = float(cols[0].strip())
+                if label > 0:
+                    label = 1
+                else:
+                    label = 0
+                cur_feature_list = []
+                for word in cols[1:]:
+                    if not word.strip():
+                        continue
+                    tokens = word.strip().split(':')
+                    cur_feature_list.append( \
+                        [int(tokens[0]) - 1, \
+                         int(tokens[1]) - 1, \
+                         float(tokens[2])])
+                features.append(cur_feature_list)
+                labels.append(label)
+                cnt += 1
+                if cnt == batch_size:
+                    yield labels, features, impression_id
+                    labels = []
+                    features = []
+                    impression_id = []
+                    cnt = 0
+        if cnt > 0:
+            yield labels, features, impression_id
+
+    def _convert_data(self, labels, features, hparams):
+        dim = hparams.FEATURE_COUNT
+        FIELD_COUNT = hparams.FIELD_COUNT
+        instance_cnt = len(labels)
+
+        fm_feat_indices = []
+        fm_feat_values = []
+        fm_feat_shape = [instance_cnt, dim]
+
+        dnn_feat_indices = []
+        dnn_feat_values = []
+        dnn_feat_weights = []
+        dnn_feat_shape = [instance_cnt * FIELD_COUNT, -1]
+
+        for i in range(instance_cnt):
+            m = len(features[i])
+            dnn_feat_dic = {}
+            for j in range(m):
+                fm_feat_indices.append([i, features[i][j][1]])
+                fm_feat_values.append(features[i][j][2])
+                if features[i][j][0] not in dnn_feat_dic:
+                    dnn_feat_dic[features[i][j][0]] = 0
+                else:
+                    dnn_feat_dic[features[i][j][0]] += 1
+                dnn_feat_indices.append([i * FIELD_COUNT + features[i][j][0], \
+                                         dnn_feat_dic[features[i][j][0]]])
+                dnn_feat_values.append(features[i][j][1])
+                dnn_feat_weights.append(features[i][j][2])
+                if dnn_feat_shape[1] < dnn_feat_dic[features[i][j][0]]:
+                    dnn_feat_shape[1] = dnn_feat_dic[features[i][j][0]]
+        dnn_feat_shape[1] += 1
+
+        sorted_index = sorted(range(len(dnn_feat_indices)),
+                              key=lambda k: (dnn_feat_indices[k][0], \
+                                             dnn_feat_indices[k][1]))
+
+        res = {}
+        res['fm_feat_indices'] = np.asarray(fm_feat_indices, dtype=np.int64)
+        res['fm_feat_values'] = np.asarray(fm_feat_values, dtype=np.float32)
+        res['fm_feat_shape'] = np.asarray(fm_feat_shape, dtype=np.int64)
+        res['labels'] = np.asarray([[label] for label in labels], dtype=np.float32)
+
+        res['dnn_feat_indices'] = np.asarray(dnn_feat_indices, dtype=np.int64)[sorted_index]
+        res['dnn_feat_values'] = np.asarray(dnn_feat_values, dtype=np.int64)[sorted_index]
+        res['dnn_feat_weights'] = np.asarray(dnn_feat_weights, dtype=np.float32)[sorted_index]
+        res['dnn_feat_shape'] = np.asarray(dnn_feat_shape, dtype=np.int64)
+        return res
+
+    def write_tfrecord(self, infile, outfile, hparams):
+        sample_num = 0
+        FEATURE_COUNT = hparams.FEATURE_COUNT
+        writer = tf.python_io.TFRecordWriter(outfile)
+        feature_cnt = defaultdict(lambda: 0)
+        impression_id_list = []
+        try:
+            for labels, features, impression_id in self._load_batch_data_from_file(infile, hparams):
+                impression_id_list.extend(impression_id)
+                sample_num += len(labels)
+                input_in_sp = self._convert_data(labels, features, hparams)
+                fm_feat_indices = input_in_sp['fm_feat_indices']
+
+                for feat in fm_feat_indices:
+                    feature_cnt[feat[1]] += 1
+
+                fm_feat_values = input_in_sp['fm_feat_values']
+                fm_feat_shape = input_in_sp['fm_feat_shape']
+                labels = input_in_sp['labels']
+                dnn_feat_indices = input_in_sp['dnn_feat_indices']
+                dnn_feat_values = input_in_sp['dnn_feat_values']
+                dnn_feat_weights = input_in_sp['dnn_feat_weights']
+                dnn_feat_shape = input_in_sp['dnn_feat_shape']
+
+                fm_feat_indices_str = fm_feat_indices.tostring()
+                labels_str = labels.tostring()
+                dnn_feat_indices_str = dnn_feat_indices.tostring()
+
+                example = tf.train.Example(
+                    features=tf.train.Features(
+                        feature={
+                            'fm_feat_indices': tf.train.Feature(
+                                bytes_list=tf.train.BytesList(value=[fm_feat_indices_str])),
+                            'fm_feat_values': tf.train.Feature(
+                                float_list=tf.train.FloatList(value=fm_feat_values)),
+                            'fm_feat_shape': tf.train.Feature(
+                                int64_list=tf.train.Int64List(value=fm_feat_shape)),
+                            'labels': tf.train.Feature(
+                                bytes_list=tf.train.BytesList(value=[labels_str])),
+                            'dnn_feat_indices': tf.train.Feature(
+                                bytes_list=tf.train.BytesList(value=[dnn_feat_indices_str])),
+                            'dnn_feat_values': tf.train.Feature(
+                                int64_list=tf.train.Int64List(value=dnn_feat_values)),
+                            'dnn_feat_weights': tf.train.Feature(
+                                float_list=tf.train.FloatList(value=dnn_feat_weights)),
+                            'dnn_feat_shape': tf.train.Feature(
+                                int64_list=tf.train.Int64List(value=dnn_feat_shape))
+                        }
+                    )
+                )
+                serialized = example.SerializeToString()
+                writer.write(serialized)
+        except:
+            raise ValueError('train data format must be libffm, for example 1 2:1:0.1 2:3:0.2 3:4:0.4')
+        writer.close()
+        sort_feature_cnt = sorted(feature_cnt.items(), key=lambda x: x[0])
+        with open(util.FEAT_COUNT_FILE, 'w') as f:
+            for item in sort_feature_cnt:
+                f.write(str(item[0]) + ',' + str(item[1]) + '\n')
+        return sample_num, impression_id_list
diff --git a/examples/xDeepFM/IO/iterator.py b/examples/xDeepFM/IO/iterator.py
new file mode 100644
index 00000000..c7e50032
--- /dev/null
+++ b/examples/xDeepFM/IO/iterator.py
@@ -0,0 +1,207 @@
+"""define iterator"""
+import collections
+import tensorflow as tf
+import abc
+
+BUFFER_SIZE = 256
+__all__ = ["BaseIterator", "FfmIterator", "DinIterator", "CCCFNetIterator"]
+
+
+class BaseIterator(object):
+    @abc.abstractmethod
+    def get_iterator(self, src_dataset):
+        """Subclass must implement this."""
+        pass
+
+    @abc.abstractmethod
+    def parser(self, record):
+        pass
+
+
+class FfmIterator(BaseIterator):
+    def __init__(self, src_dataset):
+        self.get_iterator(src_dataset)
+
+    def get_iterator(self, src_dataset):
+        src_dataset = src_dataset.map(self.parser)
+        # src_dataset = src_dataset.shuffle(buffer_size=BUFFER_SIZE)
+        iterator = src_dataset.make_initializable_iterator()
+        _fm_feat_indices, _fm_feat_values, \
+        _fm_feat_shape, _labels, _dnn_feat_indices, \
+        _dnn_feat_values, _dnn_feat_weights, _dnn_feat_shape = iterator.get_next()
+        self.initializer = iterator.initializer
+        self.fm_feat_indices = _fm_feat_indices
+        self.fm_feat_values = _fm_feat_values
+        self.fm_feat_shape = _fm_feat_shape
+        self.labels = _labels
+        self.dnn_feat_indices = _dnn_feat_indices
+        self.dnn_feat_values = _dnn_feat_values
+        self.dnn_feat_weights = _dnn_feat_weights
+        self.dnn_feat_shape = _dnn_feat_shape
+
+    def parser(self, record):
+        keys_to_features = {
+            'fm_feat_indices': tf.FixedLenFeature([], tf.string),
+            'fm_feat_values': tf.VarLenFeature(tf.float32),
+            'fm_feat_shape': tf.FixedLenFeature([2], tf.int64),
+            'labels': tf.FixedLenFeature([], tf.string),
+            'dnn_feat_indices': tf.FixedLenFeature([], tf.string),
+            'dnn_feat_values': tf.VarLenFeature(tf.int64),
+            'dnn_feat_weights': tf.VarLenFeature(tf.float32),
+            'dnn_feat_shape': tf.FixedLenFeature([2], tf.int64),
+        }
+        parsed = tf.parse_single_example(record, keys_to_features)
+        fm_feat_indices = tf.reshape(tf.decode_raw(parsed['fm_feat_indices'], tf.int64), [-1, 2])
+        fm_feat_values = tf.sparse_tensor_to_dense(parsed['fm_feat_values'])
+        fm_feat_shape = parsed['fm_feat_shape']
+        labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1])
+        dnn_feat_indices = tf.reshape(tf.decode_raw(parsed['dnn_feat_indices'], tf.int64), [-1, 2])
+        dnn_feat_values = tf.sparse_tensor_to_dense(parsed['dnn_feat_values'])
+        dnn_feat_weights = tf.sparse_tensor_to_dense(parsed['dnn_feat_weights'])
+        dnn_feat_shape = parsed['dnn_feat_shape']
+        return fm_feat_indices, fm_feat_values, \
+               fm_feat_shape, labels, dnn_feat_indices, \
+               dnn_feat_values, dnn_feat_weights, dnn_feat_shape
+
+
+class DinIterator(BaseIterator):
+    def __init__(self, src_dataset):
+        self.get_iterator(src_dataset)
+
+    def get_iterator(self, src_dataset):
+        src_dataset = src_dataset.map(self.parser)
+        # src_dataset = src_dataset.shuffle(buffer_size=BUFFER_SIZE)
+        iterator = src_dataset.make_initializable_iterator()
+        output = iterator.get_next()
+        (_attention_news_indices, _attention_news_values, _attention_news_shape, \
+         _attention_user_indices, _attention_user_values, _attention_user_weights, \
+         _attention_user_shape, _fm_feat_indices, _fm_feat_val, \
+         _fm_feat_shape, _labels, _dnn_feat_indices, _dnn_feat_values, \
+         _dnn_feat_weight, _dnn_feat_shape) = output
+        self.initializer = iterator.initializer
+        self.attention_news_indices = _attention_news_indices
+        self.attention_news_values = _attention_news_values
+        self.attention_news_shape = _attention_news_shape
+        self.attention_user_indices = _attention_user_indices
+        self.attention_user_values = _attention_user_values
+        self.attention_user_weights = _attention_user_weights
+        self.attention_user_shape = _attention_user_shape
+        self.fm_feat_indices = _fm_feat_indices
+        self.fm_feat_val = _fm_feat_val
+        self.fm_feat_shape = _fm_feat_shape
+        self.labels = _labels
+        self.dnn_feat_indices = _dnn_feat_indices
+        self.dnn_feat_values = _dnn_feat_values
+        self.dnn_feat_weight = _dnn_feat_weight
+        self.dnn_feat_shape = _dnn_feat_shape
+
+    def parser(self, record):
+        keys_to_features = {
+            'attention_news_indices': tf.FixedLenFeature([], tf.string),
+            'attention_news_values': tf.VarLenFeature(tf.float32),
+            'attention_news_shape': tf.FixedLenFeature([2], tf.int64),
+
+            'attention_user_indices': tf.FixedLenFeature([], tf.string),
+            'attention_user_values': tf.VarLenFeature(tf.int64),
+            'attention_user_weights': tf.VarLenFeature(tf.float32),
+            'attention_user_shape': tf.FixedLenFeature([2], tf.int64),
+
+            'fm_feat_indices': tf.FixedLenFeature([], tf.string),
+            'fm_feat_val': tf.VarLenFeature(tf.float32),
+            'fm_feat_shape': tf.FixedLenFeature([2], tf.int64),
+
+            'labels': tf.FixedLenFeature([], tf.string),
+
+            'dnn_feat_indices': tf.FixedLenFeature([], tf.string),
+            'dnn_feat_values': tf.VarLenFeature(tf.int64),
+            'dnn_feat_weight': tf.VarLenFeature(tf.float32),
+            'dnn_feat_shape': tf.FixedLenFeature([2], tf.int64),
+        }
+        parsed = tf.parse_single_example(record, keys_to_features)
+
+        attention_news_indices = tf.reshape(tf.decode_raw(parsed['attention_news_indices'], \
+                                                          tf.int64), [-1, 2])
+        attention_news_values = tf.sparse_tensor_to_dense(parsed['attention_news_values'])
+        attention_news_shape = parsed['attention_news_shape']
+
+        attention_user_indices = tf.reshape(tf.decode_raw(parsed['attention_user_indices'], \
+                                                          tf.int64), [-1, 2])
+        attention_user_values = tf.sparse_tensor_to_dense(parsed['attention_user_values'])
+        attention_user_weights = tf.sparse_tensor_to_dense(parsed['attention_user_weights'])
+        attention_user_shape = parsed['attention_user_shape']
+
+        fm_feat_indices = tf.reshape(tf.decode_raw(parsed['fm_feat_indices'], \
+                                                   tf.int64), [-1, 2])
+        fm_feat_val = tf.sparse_tensor_to_dense(parsed['fm_feat_val'])
+        fm_feat_shape = parsed['fm_feat_shape']
+
+        labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1])
+
+        dnn_feat_indices = tf.reshape(tf.decode_raw(parsed['dnn_feat_indices'], \
+                                                    tf.int64), [-1, 2])
+        dnn_feat_values = tf.sparse_tensor_to_dense(parsed['dnn_feat_values'])
+        dnn_feat_weight = tf.sparse_tensor_to_dense(parsed['dnn_feat_weight'])
+        dnn_feat_shape = parsed['dnn_feat_shape']
+        return (attention_news_indices, attention_news_values, attention_news_shape, \
+                attention_user_indices, attention_user_values, attention_user_weights, \
+                attention_user_shape, fm_feat_indices, fm_feat_val, \
+                fm_feat_shape, labels, dnn_feat_indices, dnn_feat_values, \
+                dnn_feat_weight, dnn_feat_shape)
+
+
+class CCCFNetIterator(BaseIterator):
+    def __init__(self, src_dataset):
+        self.get_iterator(src_dataset)
+
+    def get_iterator(self, src_dataset):
+        src_dataset = src_dataset.map(self.parser)
+        # src_dataset = src_dataset.shuffle(buffer_size=BUFFER_SIZE)
+        iterator = src_dataset.make_initializable_iterator()
+        _labels, _userIds, _itemIds, \
+        _user_profiles_indices, _user_profiles_values, _user_profiles_weights, _user_profiles_shape, \
+        _item_profiles_indices, _item_profiles_values, _item_profiles_weights, _item_profiles_shape = iterator.get_next()
+        self.initializer = iterator.initializer
+        self.labels = _labels
+        self.userIds = _userIds
+        self.itemIds = _itemIds
+        self.user_profiles_indices = _user_profiles_indices
+        self.user_profiles_values = _user_profiles_values
+        self.user_profiles_weights = _user_profiles_weights
+        self.user_profiles_shape = _user_profiles_shape
+        self.item_profiles_indices = _item_profiles_indices
+        self.item_profiles_values = _item_profiles_values
+        self.item_profiles_weights = _item_profiles_weights
+        self.item_profiles_shape = _item_profiles_shape
+
+    def parser(self, record):
+        keys_to_features = {
+            'labels': tf.FixedLenFeature([], tf.string),
+            'userIds': tf.VarLenFeature(tf.int64),
+            'itemIds': tf.VarLenFeature(tf.int64),
+            'user_profiles_indices': tf.FixedLenFeature([], tf.string),
+            'user_profiles_values': tf.VarLenFeature(tf.int64),
+            'user_profiles_weights': tf.VarLenFeature(tf.float32),
+            'user_profiles_shape': tf.FixedLenFeature([2], tf.int64),
+            'item_profiles_indices': tf.FixedLenFeature([], tf.string),
+            'item_profiles_values': tf.VarLenFeature(tf.int64),
+            'item_profiles_weights': tf.VarLenFeature(tf.float32),
+            'item_profiles_shape': tf.FixedLenFeature([2], tf.int64)
+        }
+        parsed = tf.parse_single_example(record, keys_to_features)
+        labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1])
+        userIds = tf.sparse_tensor_to_dense(parsed['userIds'])
+        itemIds = tf.sparse_tensor_to_dense(parsed['itemIds'])
+
+        user_profiles_indices = tf.reshape(tf.decode_raw(parsed['user_profiles_indices'], tf.int64), [-1, 2])
+        user_profiles_values = tf.sparse_tensor_to_dense(parsed['user_profiles_values'])
+        user_profiles_weights = tf.sparse_tensor_to_dense(parsed['user_profiles_weights'])
+        user_profiles_shape = parsed['user_profiles_shape']
+
+        item_profiles_indices = tf.reshape(tf.decode_raw(parsed['item_profiles_indices'], tf.int64), [-1, 2])
+        item_profiles_values = tf.sparse_tensor_to_dense(parsed['item_profiles_values'])
+        item_profiles_weights = tf.sparse_tensor_to_dense(parsed['item_profiles_weights'])
+        item_profiles_shape = parsed['item_profiles_shape']
+
+        return labels, userIds, itemIds, \
+               user_profiles_indices, user_profiles_values, user_profiles_weights, user_profiles_shape, \
+               item_profiles_indices, item_profiles_values, item_profiles_weights, item_profiles_shape
diff --git a/examples/xDeepFM/main.py b/examples/xDeepFM/main.py
new file mode 100644
index 00000000..265faca9
--- /dev/null
+++ b/examples/xDeepFM/main.py
@@ -0,0 +1,187 @@
+"""This script parse and run train function"""
+import train
+import utils.util as util
+import tensorflow as tf
+import sys
+from utils.log import Log
+
+#yaml = sys.argv[1]
+
+
+
+def flat_config(config):
+    """flat config to a dict"""
+    f_config = {}
+    category = ['data', 'model', 'train', 'info']
+    for cate in category:
+        for key, val in config[cate].items():
+            f_config[key] = val
+    return f_config
+
+
+def create_hparams(FLAGS):
+    """Create hparams."""
+    FLAGS = flat_config(FLAGS)
+    return tf.contrib.training.HParams(
+        # data
+        train_file=FLAGS['train_file'] if 'train_file' in FLAGS else None,
+        eval_file=FLAGS['eval_file'] if 'eval_file' in FLAGS else None,
+        test_file=FLAGS['test_file'] if 'test_file' in FLAGS else None,
+        infer_file=FLAGS['infer_file'] if 'infer_file' in FLAGS else None,
+        FEATURE_COUNT=FLAGS['FEATURE_COUNT'] if 'FEATURE_COUNT' in FLAGS else None,
+        FIELD_COUNT=FLAGS['FIELD_COUNT'] if 'FIELD_COUNT' in FLAGS else None,
+        data_format=FLAGS['data_format'] if 'data_format' in FLAGS else None,
+        PAIR_NUM=FLAGS['PAIR_NUM'] if 'PAIR_NUM' in FLAGS else None,
+        DNN_FIELD_NUM=FLAGS['DNN_FIELD_NUM'] if 'DNN_FIELD_NUM' in FLAGS else None,
+        n_user=FLAGS['n_user'] if 'n_user' in FLAGS else None,
+        n_item=FLAGS['n_item'] if 'n_item' in FLAGS else None,
+        n_user_attr=FLAGS['n_user_attr'] if 'n_user_attr' in FLAGS else None,
+        n_item_attr=FLAGS['n_item_attr'] if 'n_item_attr' in FLAGS else None,
+        # model
+        dim=FLAGS['dim'] if 'dim' in FLAGS else None,
+        layer_sizes=FLAGS['layer_sizes'] if 'layer_sizes' in FLAGS else None,
+        cross_layer_sizes=FLAGS['cross_layer_sizes'] if 'cross_layer_sizes' in FLAGS else None,
+        cross_layers = FLAGS['cross_layers'] if 'cross_layers' in FLAGS else None,
+        activation=FLAGS['activation'] if 'activation' in FLAGS else None,
+        cross_activation=FLAGS['cross_activation'] if 'cross_activation' in FLAGS else "identity",
+        dropout=FLAGS['dropout'] if 'dropout' in FLAGS else None,
+        attention_layer_sizes=FLAGS['attention_layer_sizes'] if 'attention_layer_sizes' in FLAGS else None,
+        attention_activation=FLAGS['attention_activation'] if 'attention_activation' in FLAGS else None,
+        model_type=FLAGS['model_type'] if 'model_type' in FLAGS else None,
+        method=FLAGS['method'] if 'method' in FLAGS else None,
+        load_model_name=FLAGS['load_model_name'] if 'load_model_name' in FLAGS else None,
+        mu=FLAGS['mu'] if 'mu' in FLAGS else None,
+        # train
+        init_method=FLAGS['init_method'] if 'init_method' in FLAGS else 'tnormal',
+        init_value=FLAGS['init_value'] if 'init_value' in FLAGS else 0.01,
+        embed_l2=FLAGS['embed_l2'] if 'embed_l2' in FLAGS else 0.0000,
+        embed_l1=FLAGS['embed_l1'] if 'embed_l1' in FLAGS else 0.0000,
+        layer_l2=FLAGS['layer_l2'] if 'layer_l2' in FLAGS else 0.0000,
+        layer_l1=FLAGS['layer_l1'] if 'layer_l1' in FLAGS else 0.0000,
+        cross_l2=FLAGS['cross_l2'] if 'cross_l2' in FLAGS else 0.0000,
+        cross_l1=FLAGS['cross_l1'] if 'cross_l1' in FLAGS else 0.0000,
+        learning_rate=FLAGS['learning_rate'] if 'learning_rate' in FLAGS else 0.001,
+        loss=FLAGS['loss'] if 'loss' in FLAGS else None,
+        optimizer=FLAGS['optimizer'] if 'optimizer' in FLAGS else 'adam',
+        epochs=FLAGS['epochs'] if 'epochs' in FLAGS else 10,
+        batch_size=FLAGS['batch_size'] if 'batch_size' in FLAGS else 1,
+        # show info
+        log=FLAGS['log'] if 'log' in FLAGS else "log",
+        logger=None,
+        show_step=FLAGS['show_step'] if 'show_step' in FLAGS else 1,
+        save_epoch=FLAGS['save_epoch'] if 'save_epoch' in FLAGS else 5,
+        metrics=FLAGS['metrics'] if 'metrics' in FLAGS else None
+    )
+
+
+def check_type(config):
+    """check config type"""
+    # check parameter type
+    int_parameters = ['FEATURE_COUNT', 'FIELD_COUNT', 'dim', 'epochs', 'batch_size', 'show_step', \
+                      'save_epoch', 'PAIR_NUM', 'DNN_FIELD_NUM', 'attention_layer_sizes', \
+                      'n_user', 'n_item', 'n_user_attr', 'n_item_attr']
+    for param in int_parameters:
+        if param in config and not isinstance(config[param], int):
+            raise TypeError("parameters {0} must be int".format(param))
+
+    float_parameters = ['init_value', 'learning_rate', 'embed_l2', \
+                        'embed_l1', 'layer_l2', 'layer_l1', 'mu']
+    for param in float_parameters:
+        if param in config and not isinstance(config[param], float):
+            raise TypeError("parameters {0} must be float".format(param))
+
+    str_parameters = ['train_file', 'eval_file', 'test_file', 'infer_file', 'method', \
+                      'load_model_name', 'loss', 'optimizer', 'init_method', 'attention_activation']
+    for param in str_parameters:
+        if param in config and not isinstance(config[param], str):
+            raise TypeError("parameters {0} must be str".format(param))
+
+    list_parameters = ['layer_sizes', 'activation', 'dropout']
+    for param in list_parameters:
+        if param in config and not isinstance(config[param], list):
+            raise TypeError("parameters {0} must be list".format(param))
+
+    if ('data_format' in config) and (not config['data_format'] in ['ffm', 'din', 'cccfnet']):
+        raise TypeError("parameters data_format must be din" \
+                        ",ffm, cccfnet but is {0}".format(config['data_format']))
+
+
+def check_nn_config(config):
+    """check neural networks config"""
+    if config['model']['model_type'] in ['fm']:
+        required_parameters = ['train_file', 'eval_file', 'FEATURE_COUNT', 'dim', 'loss', 'data_format', 'method']
+    elif config['model']['model_type'] in ['lr']:
+        required_parameters = ['train_file', 'eval_file', 'FEATURE_COUNT', 'loss', 'data_format', 'method']
+    elif config['model']['model_type'] in ['din']:
+        required_parameters = ['train_file', 'eval_file', 'PAIR_NUM', 'DNN_FIELD_NUM', 'FEATURE_COUNT', 'dim', \
+                               'layer_sizes', 'activation', 'attention_layer_sizes', 'attention_activation', 'loss', \
+                               'data_format', 'dropout', 'method']
+    elif config['model']['model_type'] in ['cccfnet']:
+        required_parameters = ['train_file', 'eval_file', 'dim', 'layer_sizes', 'n_user', 'n_item', 'n_user_attr',
+                               'n_item_attr',
+                               'activation', 'loss', 'data_format', 'dropout', 'mu', 'method']
+    elif config['model']['model_type'] in ['exDeepFM']:
+        required_parameters = ['train_file', 'eval_file', 'FIELD_COUNT', 'FEATURE_COUNT', 'method',
+                               'dim', 'layer_sizes', 'cross_layer_sizes', 'activation', 'loss', 'data_format', 'dropout']
+    elif config['model']['model_type'] in ['deepcross']:
+        required_parameters = ['train_file', 'eval_file', 'FIELD_COUNT', 'FEATURE_COUNT', 'method',
+                               'dim', 'layer_sizes', 'cross_layers', 'activation', 'loss', 'data_format',
+                               'dropout']
+    else:
+        required_parameters = ['train_file', 'eval_file', 'FIELD_COUNT', 'FEATURE_COUNT', 'method',
+                               'dim', 'layer_sizes', 'activation', 'loss', 'data_format', 'dropout']
+    f_config = flat_config(config)
+    # check required parameters
+    for param in required_parameters:
+        if param not in f_config:
+            raise ValueError("parameters {0} must be set".format(param))
+    if f_config['model_type'] == 'din':
+        if f_config['data_format'] != 'din':
+            raise ValueError(
+                "for din model, data format must be din, but your set is {0}".format(f_config['data_format']))
+    elif f_config['model_type'] == 'cccfnet':
+        if f_config['data_format'] != 'cccfnet':
+            raise ValueError(
+                "for cccfnet model, data format must be cccfnet, but your set is {0}".format(f_config['data_format']))
+    else:
+        if f_config['data_format'] != 'ffm':
+            raise ValueError("data format must be ffm, but your set is {0}".format(f_config['data_format']))
+    check_type(f_config)
+
+
+def check_config(config):
+    """check networks config"""
+    if config['model']['model_type'] not in ['deepFM', 'deepWide', 'dnn', 'ipnn', \
+                                             'opnn', 'fm', 'lr', 'din', 'cccfnet', 'deepcross', 'exDeepFM', "cross", "CIN"]:
+        raise ValueError(
+            "model type must be cccfnet, deepFM, deepWide, dnn, ipnn, opnn, fm, lr, din, deepcross, exDeepFM, cross, CIN but you set is {0}".format(
+                config['model']['model_type']))
+    check_nn_config(config)
+
+
+# train process load yaml
+def load_yaml():
+    """load config from yaml"""
+    yaml_name = util.CONFIG_DIR + util.TRAIN_YAML
+    print('trainging network configuration file is {0}'.format(yaml_name))
+    util.check_file_exist(yaml_name)
+    config = util.load_yaml_file(yaml_name)
+    return config
+
+
+def main():
+    """main function"""
+    # flag = True
+    util.check_tensorflow_version()
+    util.check_and_mkdir()
+    #util.TRAIN_YAML = yaml
+    config = load_yaml()
+    check_config(config)
+    hparams = create_hparams(config)
+    print(hparams.values())
+    log = Log(hparams)
+    hparams.logger = log.logger
+    train.train(hparams)
+
+
+main()
diff --git a/examples/xDeepFM/src/base_model.py b/examples/xDeepFM/src/base_model.py
new file mode 100644
index 00000000..320f0876
--- /dev/null
+++ b/examples/xDeepFM/src/base_model.py
@@ -0,0 +1,193 @@
+"""define base class model"""
+import abc
+import math
+import tensorflow as tf
+import utils.util as util
+from IO.iterator import BaseIterator
+
+__all__ = ["BaseModel"]
+
+
+class BaseModel(object):
+    def __init__(self, hparams, iterator, scope=None):
+        assert isinstance(iterator, BaseIterator)
+        tf.set_random_seed(1234)
+        self.iterator = iterator
+        self.layer_params = []
+        self.embed_params = []
+        self.cross_params = []
+        self.layer_keeps = None
+        self.keep_prob_train = None
+        self.keep_prob_test = None
+        self.initializer = self._get_initializer(hparams)
+        self.logit = self._build_graph(hparams)
+        self.pred = self._get_pred(self.logit, hparams)
+        self.data_loss = self._compute_data_loss(hparams)
+        self.regular_loss = self._compute_regular_loss(hparams)
+        self.loss = tf.add(self.data_loss, self.regular_loss)
+        self.saver = tf.train.Saver(max_to_keep=hparams.epochs)
+        self.update = self._build_train_opt(hparams)
+        self.init_op = tf.global_variables_initializer()
+        self.merged = self._add_summaries()
+
+    def _get_pred(self, logit, hparams):
+        if hparams.method == 'regression':
+            pred = tf.identity(logit)
+        elif hparams.method == 'classification':
+            pred = tf.sigmoid(logit)
+        else:
+            raise ValueError("method must be regression or classification, but now is {0}".format(hparams.method))
+        return pred
+
+    def _add_summaries(self):
+        tf.summary.scalar("data_loss", self.data_loss)
+        tf.summary.scalar("regular_loss", self.regular_loss)
+        tf.summary.scalar("loss", self.loss)
+        merged = tf.summary.merge_all()
+        return merged
+
+    @abc.abstractmethod
+    def _build_graph(self, hparams):
+        """Subclass must implement this."""
+        pass
+
+    def _l2_loss(self, hparams):
+        l2_loss = tf.zeros([1], dtype=tf.float32)
+        # embedding_layer l2 loss
+        for param in self.embed_params:
+            l2_loss = tf.add(l2_loss, tf.multiply(hparams.embed_l2, tf.nn.l2_loss(param)))
+        params = self.layer_params
+        for param in params:
+            l2_loss = tf.add(l2_loss, tf.multiply(hparams.layer_l2, tf.nn.l2_loss(param)))
+        return l2_loss
+
+    def _l1_loss(self, hparams):
+        l1_loss = tf.zeros([1], dtype=tf.float32)
+        # embedding_layer l2 loss
+        for param in self.embed_params:
+            l1_loss = tf.add(l1_loss, tf.multiply(hparams.embed_l1, tf.norm(param, ord=1)))
+        params = self.layer_params
+        for param in params:
+            l1_loss = tf.add(l1_loss, tf.multiply(hparams.layer_l1, tf.norm(param, ord=1)))
+        return l1_loss
+
+    def _cross_l_loss(self, hparams):
+        cross_l_loss = tf.zeros([1], dtype=tf.float32)
+        for param in self.cross_params:
+            cross_l_loss = tf.add(cross_l_loss, tf.multiply(hparams.cross_l1, tf.norm(param, ord=1)))
+            cross_l_loss = tf.add(cross_l_loss, tf.multiply(hparams.cross_l2, tf.norm(param, ord=1)))
+        return cross_l_loss 
+
+    def _get_initializer(self, hparams):
+        if hparams.init_method == 'tnormal':
+            return tf.truncated_normal_initializer(stddev=hparams.init_value)
+        elif hparams.init_method == 'uniform':
+            return tf.random_uniform_initializer(-hparams.init_value, hparams.init_value)
+        elif hparams.init_method == 'normal':
+            return tf.random_normal_initializer(stddev=hparams.init_value)
+        elif hparams.init_method == 'xavier_normal':
+            return tf.contrib.layers.xavier_initializer(uniform=False)
+        elif hparams.init_method == 'xavier_uniform':
+            return tf.contrib.layers.xavier_initializer(uniform=True)
+        elif hparams.init_method == 'he_normal':
+            return tf.contrib.layers.variance_scaling_initializer( \
+                factor=2.0, mode='FAN_IN', uniform=False)
+        elif hparams.init_method == 'he_uniform':
+            return tf.contrib.layers.variance_scaling_initializer( \
+                factor=2.0, mode='FAN_IN', uniform=True)
+        else:
+            return tf.truncated_normal_initializer(stddev=hparams.init_value)
+
+    def _compute_data_loss(self, hparams):
+        if hparams.loss == 'cross_entropy_loss':
+            data_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \
+                logits=tf.reshape(self.logit, [-1]), \
+                labels=tf.reshape(self.iterator.labels, [-1])))
+        elif hparams.loss == 'square_loss':
+            data_loss = tf.sqrt(tf.reduce_mean(
+                tf.squared_difference(tf.reshape(self.pred, [-1]), tf.reshape(self.iterator.labels, [-1]))))
+        elif hparams.loss == 'log_loss':
+            data_loss = tf.reduce_mean(tf.losses.log_loss(predictions=tf.reshape(self.pred, [-1]),
+                                                          labels=tf.reshape(self.iterator.labels, [-1])))
+        else:
+            raise ValueError("this loss not defined {0}".format(hparams.loss))
+        return data_loss
+
+    def _compute_regular_loss(self, hparams):
+        regular_loss = self._l2_loss(hparams) + self._l1_loss(hparams) + self._cross_l_loss(hparams)
+        regular_loss = tf.reduce_sum(regular_loss)
+        return regular_loss
+
+    def _build_train_opt(self, hparams):
+        def train_opt(hparams):
+            if hparams.optimizer == 'adadelta':
+                train_step = tf.train.AdadeltaOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            elif hparams.optimizer == 'adagrad':
+                train_step = tf.train.AdagradOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            elif hparams.optimizer == 'sgd':
+                train_step = tf.train.GradientDescentOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            elif hparams.optimizer == 'adam':
+                train_step = tf.train.AdamOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            elif hparams.optimizer == 'ftrl':
+                train_step = tf.train.FtrlOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            elif hparams.optimizer == 'gd':
+                train_step = tf.train.GradientDescentOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            elif hparams.optimizer == 'padagrad':
+                train_step = tf.train.ProximalAdagradOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            elif hparams.optimizer == 'pgd':
+                train_step = tf.train.ProximalGradientDescentOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            elif hparams.optimizer == 'rmsprop':
+                train_step = tf.train.RMSPropOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            else:
+                train_step = tf.train.GradientDescentOptimizer( \
+                    hparams.learning_rate).minimize(self.loss)
+            return train_step
+
+        train_step = train_opt(hparams)
+        return train_step
+
+    def _active_layer(self, logit, scope, activation, layer_idx):
+        logit = self._dropout(logit, layer_idx)
+        logit = self._activate(logit, activation)
+        return logit
+
+    def _activate(self, logit, activation):
+        if activation == 'sigmoid':
+            return tf.nn.sigmoid(logit)
+        elif activation == 'softmax':
+            return tf.nn.softmax(logit)
+        elif activation == 'relu':
+            return tf.nn.relu(logit)
+        elif activation == 'tanh':
+            return tf.nn.tanh(logit)
+        elif activation == 'elu':
+            return tf.nn.elu(logit)
+        elif activation == 'identity':
+            return tf.identity(logit)
+        else:
+            raise ValueError("this activations not defined {0}".format(activation))
+
+    def _dropout(self, logit, layer_idx):
+        logit = tf.nn.dropout(x=logit, keep_prob=self.layer_keeps[layer_idx])
+        return logit
+
+    def train(self, sess):
+        return sess.run([self.update, self.loss, self.data_loss, self.merged], \
+                        feed_dict={self.layer_keeps: self.keep_prob_train})
+
+    def eval(self, sess):
+        return sess.run([self.loss, self.data_loss, self.pred, self.iterator.labels], \
+                        feed_dict={self.layer_keeps: self.keep_prob_test})
+
+    def infer(self, sess):
+        return sess.run([self.pred], \
+                        feed_dict={self.layer_keeps: self.keep_prob_test})
diff --git a/examples/xDeepFM/src/exDeepFM.py b/examples/xDeepFM/src/exDeepFM.py
new file mode 100644
index 00000000..7167d460
--- /dev/null
+++ b/examples/xDeepFM/src/exDeepFM.py
@@ -0,0 +1,409 @@
+"""define Factorization-Machine based Neural Network Model"""
+import math
+import numpy as np
+import tensorflow as tf
+from src.base_model import BaseModel
+
+__all__ = ["ExtremeDeepFMModel"]
+
+
+class ExtremeDeepFMModel(BaseModel):
+    """define Factorization-Machine based Neural Network Model"""
+
+    def _build_graph(self, hparams):
+        self.keep_prob_train = 1 - np.array(hparams.dropout)
+        self.keep_prob_test = np.ones_like(hparams.dropout)
+        self.layer_keeps = tf.placeholder(tf.float32)
+        with tf.variable_scope("exDeepFm") as scope:
+            with tf.variable_scope("embedding", initializer=self.initializer) as escope:
+                self.embedding = tf.get_variable(name='embedding_layer',
+                                                 shape=[hparams.FEATURE_COUNT, hparams.dim],
+                                                 dtype=tf.float32)
+                self.embed_params.append(self.embedding)
+                embed_out, embed_layer_size = self._build_embedding(hparams)
+            logit = self._build_linear(hparams)
+            # logit = tf.add(logit, self._build_fm(hparams))
+            # res: use resnet?  direct: without split?  reduce_D: Dimension reduction?  f_dim: dimension of reduce_D
+            logit = tf.add(logit, self._build_extreme_FM(hparams, embed_out, res=False, direct=False, bias=False, reduce_D=False, f_dim=2))
+            # logit = tf.add(logit, self._build_extreme_FM_quick(hparams, embed_out))
+            logit = tf.add(logit, self._build_dnn(hparams, embed_out, embed_layer_size))
+            return logit
+
+    def _build_embedding(self, hparams):
+        fm_sparse_index = tf.SparseTensor(self.iterator.dnn_feat_indices,
+                                          self.iterator.dnn_feat_values,
+                                          self.iterator.dnn_feat_shape)
+        fm_sparse_weight = tf.SparseTensor(self.iterator.dnn_feat_indices,
+                                           self.iterator.dnn_feat_weights,
+                                           self.iterator.dnn_feat_shape)
+        w_fm_nn_input_orgin = tf.nn.embedding_lookup_sparse(self.embedding,
+                                                            fm_sparse_index,
+                                                            fm_sparse_weight,
+                                                            combiner="sum")
+        embedding = tf.reshape(w_fm_nn_input_orgin, [-1, hparams.dim * hparams.FIELD_COUNT])
+        embedding_size = hparams.FIELD_COUNT * hparams.dim
+        return embedding, embedding_size
+
+    def _build_linear(self, hparams):
+        with tf.variable_scope("linear_part", initializer=self.initializer) as scope:
+            w_linear = tf.get_variable(name='w',
+                                       shape=[hparams.FEATURE_COUNT, 1],
+                                       dtype=tf.float32)
+            b_linear = tf.get_variable(name='b',
+                                       shape=[1],
+                                       dtype=tf.float32,
+                                       initializer=tf.zeros_initializer())
+            x = tf.SparseTensor(self.iterator.fm_feat_indices,
+                                self.iterator.fm_feat_values,
+                                self.iterator.fm_feat_shape)
+            linear_output = tf.add(tf.sparse_tensor_dense_matmul(x, w_linear), b_linear)
+            self.layer_params.append(w_linear)
+            self.layer_params.append(b_linear)
+            tf.summary.histogram("linear_part/w", w_linear)
+            tf.summary.histogram("linear_part/b", b_linear)
+            return linear_output
+
+    def _build_fm(self, hparams):
+        with tf.variable_scope("fm_part") as scope:
+            x = tf.SparseTensor(self.iterator.fm_feat_indices,
+                                self.iterator.fm_feat_values,
+                                self.iterator.fm_feat_shape)
+            xx = tf.SparseTensor(self.iterator.fm_feat_indices,
+                                 tf.pow(self.iterator.fm_feat_values, 2),
+                                 self.iterator.fm_feat_shape)
+            fm_output = 0.5 * tf.reduce_sum(
+                tf.pow(tf.sparse_tensor_dense_matmul(x, self.embedding), 2) - \
+                tf.sparse_tensor_dense_matmul(xx,
+                                              tf.pow(self.embedding, 2)), 1,
+                keep_dims=True)
+            return fm_output
+    """
+    def _build_extreme_FM_slow_bad(self, hparams, nn_input):
+        hidden_nn_layers = []
+        field_nums = []
+        final_len = 0
+        field_num = hparams.FIELD_COUNT
+        nn_input = tf.reshape(nn_input, shape=[-1, int(field_num), hparams.dim])
+        field_nums.append(int(field_num))
+        hidden_nn_layers.append(nn_input)
+        final_result = []
+        with tf.variable_scope("exfm_part", initializer=self.initializer) as scope:
+            for idx, layer_size in enumerate(hparams.cross_layer_sizes):
+                dot_results = []
+                split_tensor = tf.split(hidden_nn_layers[-1], field_nums[-1]*[1], 1)
+                for s in split_tensor:
+                    s = tf.tile(s, [1, field_nums[0], 1])
+                    dot_results.append(tf.multiply(s, hidden_nn_layers[0]))
+                dot_result = tf.concat(dot_results, axis=1)
+                filters = tf.get_variable(name="f_"+str(idx),
+                                         shape=[1, len(dot_results)*field_nums[0], layer_size],
+                                         dtype=tf.float32)
+                dot_result = tf.transpose(dot_result, perm=[0, 2, 1])
+                curr_out = tf.nn.conv1d(dot_result, filters=filters, stride=1, padding='VALID')
+                curr_out = tf.transpose(curr_out, perm=[0, 2, 1])
+
+                if idx != len(hparams.cross_layer_sizes)-1:
+                    next_hidden, direct_connect = tf.split(curr_out, 2*[int(layer_size / 2)], 1)
+                    final_len += int(layer_size / 2)
+                else:
+                    direct_connect = curr_out
+                    next_hidden=0
+                    final_len += layer_size
+                
+                ###
+                direct_connect = curr_out
+                next_hidden = curr_out
+                final_len += layer_size
+                ###
+                
+                final_result.append(direct_connect)
+                hidden_nn_layers.append(next_hidden)
+                field_nums.append(int(layer_size / 2))
+                # field_nums.append(int(layer_size))
+                self.cross_params.append(filters)
+            result = tf.concat(final_result, axis=1)
+            result = tf.reduce_sum(result, -1)
+            ###
+            # residual network
+            w_nn_output1 = tf.get_variable(name='w_nn_output1',
+                                          shape=[final_len, 128],
+                                          dtype=tf.float32)
+            b_nn_output1 = tf.get_variable(name='b_nn_output1',
+                                          shape=[128],
+                                          dtype=tf.float32,
+                                          initializer=tf.zeros_initializer())
+            self.layer_params.append(w_nn_output1)
+            self.layer_params.append(b_nn_output1)
+            exFM_out0 = tf.nn.xw_plus_b(result, w_nn_output1, b_nn_output1)
+            exFM_out1 = self._active_layer(logit=exFM_out0,
+                                                      scope=scope,
+                                                      activation="relu",
+                                                      layer_idx=0)
+            w_nn_output2 = tf.get_variable(name='w_nn_output2',
+                                           shape=[128 + final_len, 1],
+                                           dtype=tf.float32)
+            b_nn_output2 = tf.get_variable(name='b_nn_output2',
+                                           shape=[1],
+                                           dtype=tf.float32,
+                                           initializer=tf.zeros_initializer())
+            self.layer_params.append(w_nn_output2)
+            self.layer_params.append(b_nn_output2)
+            exFM_in = tf.concat([exFM_out1, result], axis=1, name="user_emb")
+            exFM_out = tf.nn.xw_plus_b(exFM_in, w_nn_output2, b_nn_output2)
+
+            ###
+            w_nn_output = tf.get_variable(name='w_nn_output',
+                                          shape=[final_len, 1],
+                                          dtype=tf.float32)
+            b_nn_output = tf.get_variable(name='b_nn_output',
+                                          shape=[1],
+                                          dtype=tf.float32)
+            self.layer_params.append(w_nn_output)
+            self.layer_params.append(b_nn_output)
+            exFM_out = tf.nn.xw_plus_b(result, w_nn_output, b_nn_output)
+
+            return exFM_out
+    """
+
+    def _build_extreme_FM(self, hparams, nn_input, res=False, direct=False, bias=False, reduce_D=False, f_dim=2):
+        hidden_nn_layers = []
+        field_nums = []
+        final_len = 0
+        field_num = hparams.FIELD_COUNT
+        nn_input = tf.reshape(nn_input, shape=[-1, int(field_num), hparams.dim])
+        field_nums.append(int(field_num))
+        hidden_nn_layers.append(nn_input)
+        final_result = []
+        split_tensor0 = tf.split(hidden_nn_layers[0], hparams.dim * [1], 2)
+        with tf.variable_scope("exfm_part", initializer=self.initializer) as scope:
+            for idx, layer_size in enumerate(hparams.cross_layer_sizes):
+                split_tensor = tf.split(hidden_nn_layers[-1], hparams.dim * [1], 2)
+                dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True)
+                dot_result_o = tf.reshape(dot_result_m, shape=[hparams.dim, -1, field_nums[0]*field_nums[-1]])
+                dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2])
+
+                if reduce_D:
+                    hparams.logger.info("reduce_D")
+                    filters0 = tf.get_variable("f0_" + str(idx),
+                                               shape=[1, layer_size, field_nums[0], f_dim],
+                                               dtype=tf.float32)
+                    filters_ = tf.get_variable("f__" + str(idx),
+                                               shape=[1, layer_size, f_dim, field_nums[-1]],
+                                               dtype=tf.float32)
+                    filters_m = tf.matmul(filters0, filters_)
+                    filters_o = tf.reshape(filters_m, shape=[1, layer_size, field_nums[0] * field_nums[-1]])
+                    filters = tf.transpose(filters_o, perm=[0, 2, 1])
+                else:
+                    filters = tf.get_variable(name="f_"+str(idx),
+                                         shape=[1, field_nums[-1]*field_nums[0], layer_size],
+                                         dtype=tf.float32)
+                # dot_result = tf.transpose(dot_result, perm=[0, 2, 1])
+                curr_out = tf.nn.conv1d(dot_result, filters=filters, stride=1, padding='VALID')
+
+                # BIAS ADD
+                if bias:
+                    hparams.logger.info("bias")
+                    b = tf.get_variable(name="f_b" + str(idx),
+                                    shape=[layer_size],
+                                    dtype=tf.float32,
+                                    initializer=tf.zeros_initializer())
+                    curr_out = tf.nn.bias_add(curr_out, b)
+                    self.cross_params.append(b)
+                    self.layer_params.append(b)
+
+                curr_out = self._activate(curr_out, hparams.cross_activation)
+
+                curr_out = tf.transpose(curr_out, perm=[0, 2, 1])
+
+                if direct:
+                    hparams.logger.info("all direct connect")
+                    direct_connect = curr_out
+                    next_hidden = curr_out
+                    final_len += layer_size
+                    field_nums.append(int(layer_size))
+
+                else:
+                    hparams.logger.info("split connect")
+                    if idx != len(hparams.cross_layer_sizes) - 1:
+                        next_hidden, direct_connect = tf.split(curr_out, 2 * [int(layer_size / 2)], 1)
+                        final_len += int(layer_size / 2)
+                    else:
+                        direct_connect = curr_out
+                        next_hidden = 0
+                        final_len += layer_size
+                    field_nums.append(int(layer_size / 2))
+
+                final_result.append(direct_connect)
+                hidden_nn_layers.append(next_hidden)
+
+                self.cross_params.append(filters)
+                self.layer_params.append(filters)
+
+            result = tf.concat(final_result, axis=1)
+            result = tf.reduce_sum(result, -1)
+            if res:
+                hparams.logger.info("residual network")
+                w_nn_output1 = tf.get_variable(name='w_nn_output1',
+                                               shape=[final_len, 128],
+                                               dtype=tf.float32)
+                b_nn_output1 = tf.get_variable(name='b_nn_output1',
+                                               shape=[128],
+                                               dtype=tf.float32,
+                                               initializer=tf.zeros_initializer())
+                self.layer_params.append(w_nn_output1)
+                self.layer_params.append(b_nn_output1)
+                exFM_out0 = tf.nn.xw_plus_b(result, w_nn_output1, b_nn_output1)
+                exFM_out1 = self._active_layer(logit=exFM_out0,
+                                               scope=scope,
+                                               activation="relu",
+                                               layer_idx=0)
+                w_nn_output2 = tf.get_variable(name='w_nn_output2',
+                                               shape=[128 + final_len, 1],
+                                               dtype=tf.float32)
+                b_nn_output2 = tf.get_variable(name='b_nn_output2',
+                                               shape=[1],
+                                               dtype=tf.float32,
+                                               initializer=tf.zeros_initializer())
+                self.layer_params.append(w_nn_output2)
+                self.layer_params.append(b_nn_output2)
+                exFM_in = tf.concat([exFM_out1, result], axis=1, name="user_emb")
+                exFM_out = tf.nn.xw_plus_b(exFM_in, w_nn_output2, b_nn_output2)
+
+            else:
+                hparams.logger.info("no residual network")
+                w_nn_output = tf.get_variable(name='w_nn_output',
+                                              shape=[final_len, 1],
+                                              dtype=tf.float32)
+                b_nn_output = tf.get_variable(name='b_nn_output',
+                                              shape=[1],
+                                              dtype=tf.float32,
+                                              initializer=tf.zeros_initializer())
+                self.layer_params.append(w_nn_output)
+                self.layer_params.append(b_nn_output)
+                exFM_out = tf.nn.xw_plus_b(result, w_nn_output, b_nn_output)
+
+            return exFM_out
+
+    def _build_extreme_FM_quick(self, hparams, nn_input):
+        hidden_nn_layers = []
+        field_nums = []
+        final_len = 0
+        field_num = hparams.FIELD_COUNT
+        nn_input = tf.reshape(nn_input, shape=[-1, int(field_num), hparams.dim])
+        field_nums.append(int(field_num))
+        hidden_nn_layers.append(nn_input)
+        final_result = []
+        split_tensor0 = tf.split(hidden_nn_layers[0], hparams.dim * [1], 2)
+        with tf.variable_scope("exfm_part", initializer=self.initializer) as scope:
+            for idx, layer_size in enumerate(hparams.cross_layer_sizes):
+                split_tensor = tf.split(hidden_nn_layers[-1], hparams.dim * [1], 2)
+                dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True)
+                dot_result_o = tf.reshape(dot_result_m, shape=[hparams.dim, -1, field_nums[0]*field_nums[-1]])
+                dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2])
+
+                filters = tf.get_variable(name="f_"+str(idx),
+                                         shape=[1, field_nums[-1]*field_nums[0], layer_size],
+                                         dtype=tf.float32)
+                # dot_result = tf.transpose(dot_result, perm=[0, 2, 1])
+                curr_out = tf.nn.conv1d(dot_result, filters=filters, stride=1, padding='VALID')
+
+
+                curr_out = tf.transpose(curr_out, perm=[0, 2, 1])
+
+
+                hparams.logger.info("split connect")
+                if idx != len(hparams.cross_layer_sizes) - 1:
+                    next_hidden, direct_connect = tf.split(curr_out, 2 * [int(layer_size / 2)], 1)
+                    final_len += int(layer_size / 2)
+                else:
+                    direct_connect = curr_out
+                    next_hidden = 0
+                    final_len += layer_size
+                field_nums.append(int(layer_size / 2))
+
+                final_result.append(direct_connect)
+                hidden_nn_layers.append(next_hidden)
+
+                self.cross_params.append(filters)
+
+            result = tf.concat(final_result, axis=1)
+            result = tf.reduce_sum(result, -1)
+
+            hparams.logger.info("no residual network")
+            w_nn_output = tf.get_variable(name='w_nn_output',
+                                              shape=[final_len, 1],
+                                              dtype=tf.float32)
+            b_nn_output = tf.get_variable(name='b_nn_output',
+                                              shape=[1],
+                                              dtype=tf.float32,
+                                              initializer=tf.zeros_initializer())
+            self.layer_params.append(w_nn_output)
+            self.layer_params.append(b_nn_output)
+            exFM_out = tf.nn.xw_plus_b(result, w_nn_output, b_nn_output)
+
+            return exFM_out
+
+
+    def _build_dnn(self, hparams, embed_out, embed_layer_size):
+        """
+        fm_sparse_index = tf.SparseTensor(self.iterator.dnn_feat_indices,
+                                          self.iterator.dnn_feat_values,
+                                          self.iterator.dnn_feat_shape)
+        fm_sparse_weight = tf.SparseTensor(self.iterator.dnn_feat_indices,
+                                           self.iterator.dnn_feat_weights,
+                                           self.iterator.dnn_feat_shape)
+        w_fm_nn_input_orgin = tf.nn.embedding_lookup_sparse(self.embedding,
+                                                            fm_sparse_index,
+                                                            fm_sparse_weight,
+                                                            combiner="sum")
+        w_fm_nn_input = tf.reshape(w_fm_nn_input_orgin, [-1, hparams.dim * hparams.FIELD_COUNT])
+        last_layer_size = hparams.FIELD_COUNT * hparams.dim
+        """
+        w_fm_nn_input = embed_out
+        last_layer_size = embed_layer_size
+        layer_idx = 0
+        hidden_nn_layers = []
+        hidden_nn_layers.append(w_fm_nn_input)
+        with tf.variable_scope("nn_part", initializer=self.initializer) as scope:
+            for idx, layer_size in enumerate(hparams.layer_sizes):
+                curr_w_nn_layer = tf.get_variable(name='w_nn_layer' + str(layer_idx),
+                                                  shape=[last_layer_size, layer_size],
+                                                  dtype=tf.float32)
+                curr_b_nn_layer = tf.get_variable(name='b_nn_layer' + str(layer_idx),
+                                                  shape=[layer_size],
+                                                  dtype=tf.float32,
+                                                  initializer=tf.zeros_initializer())
+                tf.summary.histogram("nn_part/" + 'w_nn_layer' + str(layer_idx),
+                                     curr_w_nn_layer)
+                tf.summary.histogram("nn_part/" + 'b_nn_layer' + str(layer_idx),
+                                     curr_b_nn_layer)
+                curr_hidden_nn_layer = tf.nn.xw_plus_b(hidden_nn_layers[layer_idx],
+                                                       curr_w_nn_layer,
+                                                       curr_b_nn_layer)
+                scope = "nn_part" + str(idx)
+                activation = hparams.activation[idx]
+                curr_hidden_nn_layer = self._active_layer(logit=curr_hidden_nn_layer,
+                                                          scope=scope,
+                                                          activation=activation,
+                                                          layer_idx=idx)
+                hidden_nn_layers.append(curr_hidden_nn_layer)
+                layer_idx += 1
+                last_layer_size = layer_size
+                self.layer_params.append(curr_w_nn_layer)
+                self.layer_params.append(curr_b_nn_layer)
+
+            w_nn_output = tf.get_variable(name='w_nn_output',
+                                          shape=[last_layer_size, 1],
+                                          dtype=tf.float32)
+            b_nn_output = tf.get_variable(name='b_nn_output',
+                                          shape=[1],
+                                          dtype=tf.float32,
+                                          initializer=tf.zeros_initializer())
+            tf.summary.histogram("nn_part/" + 'w_nn_output' + str(layer_idx),
+                                 w_nn_output)
+            tf.summary.histogram("nn_part/" + 'b_nn_output' + str(layer_idx),
+                                 b_nn_output)
+            self.layer_params.append(w_nn_output)
+            self.layer_params.append(b_nn_output)
+            nn_output = tf.nn.xw_plus_b(hidden_nn_layers[-1], w_nn_output, b_nn_output)
+            return nn_output
diff --git a/examples/xDeepFM/train.py b/examples/xDeepFM/train.py
new file mode 100644
index 00000000..2a9a0a31
--- /dev/null
+++ b/examples/xDeepFM/train.py
@@ -0,0 +1,304 @@
+"""define train, infer, eval, test process"""
+import numpy as np
+import os, time, collections
+import tensorflow as tf
+from IO.iterator import FfmIterator #, DinIterator, CCCFNetIterator
+#from IO.din_cache import DinCache
+from IO.ffm_cache import FfmCache
+#from IO.cccfnet_cache import CCCFNetCache
+#from src.deep_fm import DeepfmModel
+#from src.deep_wide import DeepWideModel
+#from src.fm import FmModel
+#from src.dnn import DnnModel
+#from src.opnn import OpnnModel
+#from src.ipnn import IpnnModel
+#from src.lr import LrModel
+#from src.din import DinModel
+#from src.cccfnet import CCCFModel
+#from src.deepcross import DeepCrossModel
+from src.exDeepFM import ExtremeDeepFMModel
+from src.CIN import CINModel
+#from src.cross import CrossModel
+import utils.util as util
+import utils.metric as metric
+# from utils.log import Log
+
+# log = Log(hparams)
+
+class TrainModel(collections.namedtuple("TrainModel", ("graph", "model", "iterator", "filenames"))):
+    """define train class, include graph, model, iterator"""
+    pass
+
+
+def create_train_model(model_creator, hparams, scope=None):
+    graph = tf.Graph()
+    with graph.as_default():
+        # feed train file name, valid file name, or test file name
+        filenames = tf.placeholder(tf.string, shape=[None])
+        #src_dataset = tf.contrib.data.TFRecordDataset(filenames)
+        src_dataset = tf.data.TFRecordDataset(filenames)
+
+        if hparams.data_format == 'ffm':
+            batch_input = FfmIterator(src_dataset)
+        elif hparams.data_format == 'din':
+            batch_input = DinIterator(src_dataset)
+        elif hparams.data_format == 'cccfnet':
+            batch_input = CCCFNetIterator(src_dataset)
+        else:
+            raise ValueError("not support {0} format data".format(hparams.data_format))
+        # build model
+        model = model_creator(
+            hparams,
+            iterator=batch_input,
+            scope=scope)
+
+    return TrainModel(
+        graph=graph,
+        model=model,
+        iterator=batch_input,
+        filenames=filenames)
+
+
+# run evaluation and get evaluted loss
+def run_eval(load_model, load_sess, filename, sample_num_file, hparams, flag):
+    # load sample num
+    with open(sample_num_file, 'r') as f:
+        sample_num = int(f.readlines()[0].strip())
+    load_sess.run(load_model.iterator.initializer, feed_dict={load_model.filenames: [filename]})
+    preds = []
+    labels = []
+    while True:
+        try:
+            _, _, step_pred, step_labels = load_model.model.eval(load_sess)
+            preds.extend(np.reshape(step_pred, -1))
+            labels.extend(np.reshape(step_labels, -1))
+        except tf.errors.OutOfRangeError:
+            break
+    preds = preds[:sample_num]
+    labels = labels[:sample_num]
+    hparams.logger.info("data num:{0:d}".format(len(labels)))
+    res = metric.cal_metric(labels, preds, hparams, flag)
+    return res
+
+
+# run infer
+def run_infer(load_model, load_sess, filename, hparams, sample_num_file):
+    # load sample num
+    with open(sample_num_file, 'r') as f:
+        sample_num = int(f.readlines()[0].strip())
+    if not os.path.exists(util.RES_DIR):
+        os.mkdir(util.RES_DIR)
+    load_sess.run(load_model.iterator.initializer, feed_dict={load_model.filenames: [filename]})
+    preds = []
+    while True:
+        try:
+            step_pred = load_model.model.infer(load_sess)
+            preds.extend(np.reshape(step_pred, -1))
+        except tf.errors.OutOfRangeError:
+            break
+    preds = preds[:sample_num]
+    hparams.res_name = util.convert_res_name(hparams.infer_file)
+    # print('result name:', hparams.res_name)
+    with open(hparams.res_name, 'w') as out:
+        out.write('\n'.join(map(str, preds)))
+
+
+# cache data
+def cache_data(hparams, filename, flag):
+    if hparams.data_format == 'ffm':
+        cache_obj = FfmCache()
+    elif hparams.data_format == 'din':
+        cache_obj = DinCache()
+    elif hparams.data_format == 'cccfnet':
+        cache_obj = CCCFNetCache()
+    else:
+        raise ValueError(
+            "data format must be ffm, din, cccfnet, this format not defined {0}".format(hparams.data_format))
+    if not os.path.exists(util.CACHE_DIR):
+        os.mkdir(util.CACHE_DIR)
+    if flag == 'train':
+        hparams.train_file_cache = util.convert_cached_name(hparams.train_file, hparams.batch_size)
+        cached_name = hparams.train_file_cache
+        sample_num_path = util.TRAIN_NUM
+        impression_id_path = util.TRAIN_IMPRESSION_ID
+    elif flag == 'eval':
+        hparams.eval_file_cache = util.convert_cached_name(hparams.eval_file, hparams.batch_size)
+        cached_name = hparams.eval_file_cache
+        sample_num_path = util.EVAL_NUM
+        impression_id_path = util.EVAL_IMPRESSION_ID
+    elif flag == 'test':
+        hparams.test_file_cache = util.convert_cached_name(hparams.test_file, hparams.batch_size)
+        cached_name = hparams.test_file_cache
+        sample_num_path = util.TEST_NUM
+        impression_id_path = util.TEST_IMPRESSION_ID
+    elif flag == 'infer':
+        hparams.infer_file_cache = util.convert_cached_name(hparams.infer_file, hparams.batch_size)
+        cached_name = hparams.infer_file_cache
+        sample_num_path = util.INFER_NUM
+        impression_id_path = util.INFER_IMPRESSION_ID
+    else:
+        raise ValueError("flag must be train, eval, test, infer")
+    print('cache filename:', filename)
+    if not os.path.isfile(cached_name):
+        print('has not cached file, begin cached...')
+        start_time = time.time()
+        sample_num, impression_id_list = cache_obj.write_tfrecord(filename, cached_name, hparams)
+        util.print_time("caced file used time", start_time)
+        print("data sample num:{0}".format(sample_num))
+        with open(sample_num_path, 'w') as f:
+            f.write(str(sample_num) + '\n')
+        with open(impression_id_path, 'w') as f:
+            for impression_id in impression_id_list:
+                f.write(str(impression_id) + '\n')
+
+
+def train(hparams, scope=None, target_session=""):
+    params = hparams.values()
+    for key, val in params.items():
+        hparams.logger.info(str(key) + ':' + str(val))
+
+    print('load and cache data...')
+    if hparams.train_file is not None:
+        cache_data(hparams, hparams.train_file, flag='train')
+    if hparams.eval_file is not None:
+        cache_data(hparams, hparams.eval_file, flag='eval')
+    if hparams.test_file is not None:
+        cache_data(hparams, hparams.test_file, flag='test')
+    if hparams.infer_file is not None:
+        cache_data(hparams, hparams.infer_file, flag='infer')
+
+    if hparams.model_type == 'deepFM':
+        model_creator = DeepfmModel
+        print("run deepfm model!")
+    elif hparams.model_type == 'deepWide':
+        model_creator = DeepWideModel
+        print("run deepWide model!")
+    elif hparams.model_type == 'dnn':
+        print("run dnn model!")
+        model_creator = DnnModel
+    elif hparams.model_type == 'ipnn':
+        print("run ipnn model!")
+        model_creator = IpnnModel
+    elif hparams.model_type == 'opnn':
+        print("run opnn model!")
+        model_creator = OpnnModel
+    elif hparams.model_type == 'din':
+        print("run din model!")
+        model_creator = DinModel
+    elif hparams.model_type == 'fm':
+        print("run fm model!")
+        model_creator = FmModel
+    elif hparams.model_type == 'lr':
+        print("run lr model!")
+        model_creator = LrModel
+    elif hparams.model_type == 'din':
+        print("run din model!")
+        model_creator = DinModel
+    elif hparams.model_type == 'cccfnet':
+        print("run cccfnet model!")
+        model_creator = CCCFModel
+    elif hparams.model_type == 'deepcross':
+        print("run deepcross model!")
+        model_creator = DeepCrossModel
+    elif hparams.model_type == 'exDeepFM':
+        print("run extreme deepFM model!")
+        model_creator = ExtremeDeepFMModel
+    elif hparams.model_type == 'cross':
+        print("run extreme cross model!")
+        model_creator = CrossModel
+    elif hparams.model_type == 'CIN':
+        print("run extreme cin model!")
+        model_creator = CINModel
+    
+    else:
+        raise ValueError("model type should be cccfnet, deepFM, deepWide, dnn, fm, lr, ipnn, opnn, din")
+
+    # define train,eval,infer graph
+    # define train session, eval session, infer session
+    train_model = create_train_model(model_creator, hparams, scope)
+    gpuconfig = tf.ConfigProto()
+    gpuconfig.gpu_options.allow_growth = True
+    tf.set_random_seed(1234)
+    train_sess = tf.Session(target=target_session, graph=train_model.graph, config=gpuconfig)
+
+    train_sess.run(train_model.model.init_op)
+    # load model from checkpoint
+    if not hparams.load_model_name is None:
+        checkpoint_path = hparams.load_model_name
+        try:
+            train_model.model.saver.restore(train_sess, checkpoint_path)
+            print('load model', checkpoint_path)
+        except:
+            raise IOError("Failed to find any matching files for {0}".format(checkpoint_path))
+    print('total_loss = data_loss+regularization_loss, data_loss = {rmse or logloss ..}')
+    writer = tf.summary.FileWriter(util.SUMMARIES_DIR, train_sess.graph)
+    last_eval = 0
+    for epoch in range(hparams.epochs):
+        step = 0
+        train_sess.run(train_model.iterator.initializer, feed_dict={train_model.filenames: [hparams.train_file_cache]})
+        epoch_loss = 0
+        train_start = time.time()
+        train_load_time = 0
+        while True:
+            try:
+                t1 = time.time()
+                step_result = train_model.model.train(train_sess)
+                t3 = time.time()
+                train_load_time += t3 - t1
+                (_, step_loss, step_data_loss, summary) = step_result
+                writer.add_summary(summary, step)
+                epoch_loss += step_loss
+                step += 1
+                if step % hparams.show_step == 0:
+                    print('step {0:d} , total_loss: {1:.4f}, data_loss: {2:.4f}' \
+                          .format(step, step_loss, step_data_loss))
+            except tf.errors.OutOfRangeError:
+                print('finish one epoch!')
+                break
+        train_end = time.time()
+        train_time = train_end - train_start
+        if epoch % hparams.save_epoch == 0:
+            checkpoint_path = train_model.model.saver.save(
+                sess=train_sess,
+                save_path=util.MODEL_DIR + 'epoch_' + str(epoch))
+            # print(checkpoint_path)
+        train_res = dict()
+        train_res["loss"] = epoch_loss / step
+        eval_start = time.time()
+        # train_res = run_eval(train_model, train_sess, hparams.train_file_cache, util.TRAIN_NUM, hparams, flag='train')
+        eval_res = run_eval(train_model, train_sess, hparams.eval_file_cache, util.EVAL_NUM, hparams, flag='eval')
+        train_info = ', '.join(
+            [str(item[0]) + ':' + str(item[1])
+             for item in sorted(train_res.items(), key=lambda x: x[0])])
+        eval_info = ', '.join(
+            [str(item[0]) + ':' + str(item[1])
+             for item in sorted(eval_res.items(), key=lambda x: x[0])])
+        if hparams.test_file is not None:
+            test_res = run_eval(train_model, train_sess, hparams.test_file_cache, util.TEST_NUM, hparams, flag='test')
+            test_info = ', '.join(
+                [str(item[0]) + ':' + str(item[1])
+                 for item in sorted(test_res.items(), key=lambda x: x[0])])
+        eval_end = time.time()
+        eval_time = eval_end - eval_start
+        if hparams.test_file is not None:
+            print('at epoch {0:d}'.format(
+                epoch) + ' train info: ' + train_info + ' eval info: ' + eval_info + ' test info: ' + test_info)
+            hparams.logger.info('at epoch {0:d}'.format(
+                epoch) + ' train info: ' + train_info + ' eval info: ' + eval_info + ' test info: ' + test_info)
+        else:
+            print('at epoch {0:d}'.format(epoch) + ' train info: ' + train_info + ' eval info: ' + eval_info)
+            hparams.logger.info('at epoch {0:d}'.format(epoch) + ' train info: ' + train_info + ' eval info: ' + eval_info)
+        print('at epoch {0:d} , train time: {1:.1f} eval time: {2:.1f}'.format(epoch, train_time, eval_time))
+
+        hparams.logger.info('at epoch {0:d} , train time: {1:.1f} eval time: {2:.1f}' \
+                    .format(epoch, train_time, eval_time))
+        hparams.logger.info('\n')
+
+        if eval_res["auc"] - last_eval < - 0.003:
+            break
+        if eval_res["auc"] > last_eval:
+            last_eval = eval_res["auc"]
+    writer.close()
+    # after train,run infer
+    if hparams.infer_file is not None:
+        run_infer(train_model, train_sess, hparams.infer_file_cache, hparams, util.INFER_NUM)
diff --git a/examples/xDeepFM/utils/log.py b/examples/xDeepFM/utils/log.py
new file mode 100644
index 00000000..a3fa891f
--- /dev/null
+++ b/examples/xDeepFM/utils/log.py
@@ -0,0 +1,20 @@
+"""define logging configure"""
+import logging
+from datetime import datetime, timedelta, timezone
+import platform
+
+__all__ = ["Log"]
+class Log(object):
+    def __init__(self, hparams):
+        # UTC To Beijing Time
+        utc_dt = datetime.utcnow().replace(tzinfo=timezone.utc)
+        bj_dt = utc_dt.astimezone(timezone(timedelta(hours=8)))
+
+        logging_filename = "logs/"+hparams.log + '__' + bj_dt.strftime('%Y-%m-%d_%H_%M_%S') + '.log'
+        self.logger = logging.getLogger(__name__)
+        self.logger.setLevel(logging.INFO)
+        handler = logging.FileHandler(logging_filename)
+        handler.setLevel(logging.INFO)
+        formatter = logging.Formatter('%(message)s')
+        handler.setFormatter(formatter)
+        self.logger.addHandler(handler)
diff --git a/examples/xDeepFM/utils/metric.py b/examples/xDeepFM/utils/metric.py
new file mode 100644
index 00000000..c2749da1
--- /dev/null
+++ b/examples/xDeepFM/utils/metric.py
@@ -0,0 +1,97 @@
+"""define metrics"""
+from collections import defaultdict
+from sklearn.metrics import roc_auc_score, log_loss, mean_squared_error
+import numpy as np
+import utils.util as util
+
+
+def cal_metric(labels, preds, hparams, flag):
+    """Calculate metrics,such as auc, logloss, group auc"""
+    res = {}
+
+    def load_impression_id(file_name):
+        """load impression id, such as user id, news id"""
+        id_list = []
+        with open(file_name, 'r') as f_in:
+            for line in f_in:
+                id_list.append(line.strip())
+        return id_list
+
+    for metric in hparams.metrics:
+        if metric == 'auc':
+            auc = roc_auc_score(np.asarray(labels), np.asarray(preds))
+            res['auc'] = round(auc, 4)
+        elif metric == 'rmse':
+            rmse = mean_squared_error(np.asarray(labels), np.asarray(preds))
+            res['rmse'] = np.sqrt(round(rmse, 4))
+        elif metric == 'logloss':
+            # avoid logloss nan
+            preds = [max(min(p, 1. - 10e-12), 10e-12) for p in preds]
+            logloss = log_loss(np.asarray(labels), np.asarray(preds))
+            res['logloss'] = round(logloss, 4)
+        elif metric == 'group_auc':
+            if flag == 'train':
+                impression_id_list = load_impression_id(util.TRAIN_IMPRESSION_ID)
+                if len(impression_id_list) == 0:
+                    raise ValueError("train data does not has impressionId," \
+                                     "so can not cal the group auc!")
+                group_auc = cal_group_auc(labels, preds, impression_id_list)
+                res['group_auc'] = group_auc
+            elif flag == 'eval':
+                impression_id_list = load_impression_id(util.EVAL_IMPRESSION_ID)
+                if len(impression_id_list) == 0:
+                    raise ValueError("eval data does not has impressionId," \
+                                     "so can not cal the group auc!")
+                group_auc = cal_group_auc(labels, preds, impression_id_list)
+                res['group_auc'] = group_auc
+            elif flag == 'test':
+                impression_id_list = load_impression_id(util.INFER_IMPRESSION_ID)
+                if len(impression_id_list) == 0:
+                    raise ValueError("infer data does not has impressionId," \
+                                     "so can not cal the group auc!")
+                group_auc = cal_group_auc(labels, preds, impression_id_list)
+                res['group_auc'] = group_auc
+            else:
+                raise ValueError("cal metric dataSet should be train, eval , test")
+
+        else:
+            raise ValueError("not define this metric {0}".format(metric))
+    return res
+
+
+def cal_group_auc(labels, preds, impression_id_list):
+    """Calculate group auc"""
+    if len(impression_id_list) != len(labels):
+        raise ValueError(
+            "impression id num should equal to the sample num," \
+            "impression id num is {0}".format(len(impression_id_list)))
+    group_score = defaultdict(lambda: [])
+    group_truth = defaultdict(lambda: [])
+    for idx, truth in enumerate(labels):
+        user_id = impression_id_list[idx]
+        score = preds[idx]
+        truth = labels[idx]
+        group_score[user_id].append(score)
+        group_truth[user_id].append(truth)
+
+    group_flag = defaultdict(lambda: False)
+    for user_id in set(impression_id_list):
+        truths = group_truth[user_id]
+        flag = False
+        for i in range(len(truths) - 1):
+            if truths[i] != truths[i + 1]:
+                flag = True
+                break
+        group_flag[user_id] = flag
+
+    impression_total = 0
+    total_auc = 0
+    #
+    for user_id in group_flag:
+        if group_flag[user_id]:
+            auc = roc_auc_score(np.asarray(group_truth[user_id]), np.asarray(group_score[user_id]))
+            total_auc += auc * len(group_truth[user_id])
+            impression_total += len(group_truth[user_id])
+    group_auc = float(total_auc) / impression_total
+    group_auc = round(group_auc, 4)
+    return group_auc
diff --git a/examples/xDeepFM/utils/util.py b/examples/xDeepFM/utils/util.py
new file mode 100644
index 00000000..36e2ab10
--- /dev/null
+++ b/examples/xDeepFM/utils/util.py
@@ -0,0 +1,83 @@
+"""define util function and  global variable"""
+import tensorflow as tf
+import os, sys
+import time, yaml
+from packaging import version
+
+RES_DIR = './res/'
+CACHE_DIR = './cache/'
+MODEL_DIR = './checkpoint/'
+CONFIG_DIR = './config/'
+TRAIN_YAML = 'network.yaml'
+TRAIN_NUM = './cache/train_num.csv'
+EVAL_NUM = './cache/eval_num.csv'
+TEST_NUM = './cache/test_num.csv'
+INFER_NUM = './cache/infer_num.csv'
+LOG_DIR = './logs/'
+FEAT_COUNT_FILE = './cache/feat_cnt.csv'
+TRAIN_IMPRESSION_ID = './cache/train_impressionId.csv'
+EVAL_IMPRESSION_ID = './cache/eval_impressionId.csv'
+TEST_IMPRESSION_ID = './cache/test_impressionId.csv'
+INFER_IMPRESSION_ID = './cache/infer_impressionId.csv'
+SUMMARIES_DIR = './logs/'
+# define din format feature
+DIN_FORMAT_SPLIT = '#'
+# split feature and userid
+USER_ID_SPLIT = '%'
+
+
+def check_and_mkdir():
+    def make_dir(DIR):
+        if not os.path.exists(DIR):
+            os.mkdir(DIR)
+
+    make_dir(RES_DIR)
+    make_dir(CACHE_DIR)
+    make_dir(MODEL_DIR)
+    make_dir(CONFIG_DIR)
+    make_dir(LOG_DIR)
+
+
+def check_tensorflow_version():
+    if version.parse(tf.__version__) < version.parse("1.2.0"):
+        raise EnvironmentError("Tensorflow version must >= 1.2.0,but version is {0}". \
+                               format(tf.__version__))
+
+
+def print_time(s, start_time):
+    """Take a start time, print elapsed duration, and return a new time."""
+    print("%s, %ds, %s." % (s, (time.time() - start_time), time.ctime()))
+    sys.stdout.flush()
+    return time.time()
+
+
+def check_file_exist(filename):
+    if not os.path.isfile(filename):
+        raise ValueError("{0} is not exits".format(filename))
+
+
+def load_yaml_file(filename):
+    with open(filename) as f:
+        try:
+            config = yaml.load(f)
+        except:
+            raise IOError("load {0} error!".format(filename))
+    return config
+
+
+def convert_cached_name(file_name, batch_size):
+    prefix = CACHE_DIR + 'batch_size_' + str(batch_size) + '_'
+    prefix += (file_name.strip().split('/'))[-1]
+    train_cache_name = prefix.replace(".txt", ".tfrecord"). \
+        replace(".csv", ".tfrecord"). \
+        replace(".libsvm", ".tfrecord")
+    return train_cache_name
+
+
+def convert_res_name(file_name):
+    prefix = RES_DIR
+    inferfile = file_name.split('/')[-1]
+    res_name = prefix + inferfile.replace("tfrecord", "res.csv"). \
+        replace(".csv", ".tfrecord"). \
+        replace(".libsvm", ".tfrecord")
+    return res_name
-- 
Gitee


From 774444030a4dcca59595eedf8a3843e985acc1aa Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Fri, 31 May 2024 06:28:44 +0000
Subject: [PATCH 182/302] =?UTF-8?q?!165=20xdeepFM=20CANN=E8=BD=AC=E6=8D=A2?=
 =?UTF-8?q?=20*=20xdeepFM=20CANN=E8=BD=AC=E6=8D=A2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/xDeepFM/IO/base_cache.py  | 2 ++
 examples/xDeepFM/IO/ffm_cache.py   | 2 ++
 examples/xDeepFM/IO/iterator.py    | 2 ++
 examples/xDeepFM/main.py           | 2 ++
 examples/xDeepFM/src/base_model.py | 4 +++-
 examples/xDeepFM/src/exDeepFM.py   | 2 ++
 examples/xDeepFM/train.py          | 4 +++-
 examples/xDeepFM/utils/log.py      | 2 ++
 examples/xDeepFM/utils/metric.py   | 2 ++
 examples/xDeepFM/utils/util.py     | 2 ++
 10 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/examples/xDeepFM/IO/base_cache.py b/examples/xDeepFM/IO/base_cache.py
index 11187de9..d0d8b5fc 100644
--- a/examples/xDeepFM/IO/base_cache.py
+++ b/examples/xDeepFM/IO/base_cache.py
@@ -1,4 +1,5 @@
 """define abstract base class"""
+from npu_bridge.npu_init import *
 import abc
 
 __all__ = ["BaseCache"]
@@ -11,3 +12,4 @@ class BaseCache(object):
     def write_tfrecord(self, infile, outfile, hparams):
         """Subclass must implement this."""
         pass
+
diff --git a/examples/xDeepFM/IO/ffm_cache.py b/examples/xDeepFM/IO/ffm_cache.py
index 1f3d505c..9a694d8f 100644
--- a/examples/xDeepFM/IO/ffm_cache.py
+++ b/examples/xDeepFM/IO/ffm_cache.py
@@ -1,4 +1,5 @@
 """define FfmCache class for cache the format dataset"""
+from npu_bridge.npu_init import *
 from IO.base_cache import BaseCache
 import tensorflow as tf
 import numpy as np
@@ -160,3 +161,4 @@ class FfmCache(BaseCache):
             for item in sort_feature_cnt:
                 f.write(str(item[0]) + ',' + str(item[1]) + '\n')
         return sample_num, impression_id_list
+
diff --git a/examples/xDeepFM/IO/iterator.py b/examples/xDeepFM/IO/iterator.py
index c7e50032..b044b3b0 100644
--- a/examples/xDeepFM/IO/iterator.py
+++ b/examples/xDeepFM/IO/iterator.py
@@ -1,4 +1,5 @@
 """define iterator"""
+from npu_bridge.npu_init import *
 import collections
 import tensorflow as tf
 import abc
@@ -205,3 +206,4 @@ class CCCFNetIterator(BaseIterator):
         return labels, userIds, itemIds, \
                user_profiles_indices, user_profiles_values, user_profiles_weights, user_profiles_shape, \
                item_profiles_indices, item_profiles_values, item_profiles_weights, item_profiles_shape
+
diff --git a/examples/xDeepFM/main.py b/examples/xDeepFM/main.py
index 265faca9..f0b93bd6 100644
--- a/examples/xDeepFM/main.py
+++ b/examples/xDeepFM/main.py
@@ -1,4 +1,5 @@
 """This script parse and run train function"""
+from npu_bridge.npu_init import *
 import train
 import utils.util as util
 import tensorflow as tf
@@ -185,3 +186,4 @@ def main():
 
 
 main()
+
diff --git a/examples/xDeepFM/src/base_model.py b/examples/xDeepFM/src/base_model.py
index 320f0876..5481012f 100644
--- a/examples/xDeepFM/src/base_model.py
+++ b/examples/xDeepFM/src/base_model.py
@@ -1,4 +1,5 @@
 """define base class model"""
+from npu_bridge.npu_init import *
 import abc
 import math
 import tensorflow as tf
@@ -177,7 +178,7 @@ class BaseModel(object):
             raise ValueError("this activations not defined {0}".format(activation))
 
     def _dropout(self, logit, layer_idx):
-        logit = tf.nn.dropout(x=logit, keep_prob=self.layer_keeps[layer_idx])
+        logit = npu_ops.dropout(x=logit, keep_prob=self.layer_keeps[layer_idx])
         return logit
 
     def train(self, sess):
@@ -191,3 +192,4 @@ class BaseModel(object):
     def infer(self, sess):
         return sess.run([self.pred], \
                         feed_dict={self.layer_keeps: self.keep_prob_test})
+
diff --git a/examples/xDeepFM/src/exDeepFM.py b/examples/xDeepFM/src/exDeepFM.py
index 7167d460..b8d235b7 100644
--- a/examples/xDeepFM/src/exDeepFM.py
+++ b/examples/xDeepFM/src/exDeepFM.py
@@ -1,4 +1,5 @@
 """define Factorization-Machine based Neural Network Model"""
+from npu_bridge.npu_init import *
 import math
 import numpy as np
 import tensorflow as tf
@@ -407,3 +408,4 @@ class ExtremeDeepFMModel(BaseModel):
             self.layer_params.append(b_nn_output)
             nn_output = tf.nn.xw_plus_b(hidden_nn_layers[-1], w_nn_output, b_nn_output)
             return nn_output
+
diff --git a/examples/xDeepFM/train.py b/examples/xDeepFM/train.py
index 2a9a0a31..1c434194 100644
--- a/examples/xDeepFM/train.py
+++ b/examples/xDeepFM/train.py
@@ -1,4 +1,5 @@
 """define train, infer, eval, test process"""
+from npu_bridge.npu_init import *
 import numpy as np
 import os, time, collections
 import tensorflow as tf
@@ -219,7 +220,7 @@ def train(hparams, scope=None, target_session=""):
     gpuconfig = tf.ConfigProto()
     gpuconfig.gpu_options.allow_growth = True
     tf.set_random_seed(1234)
-    train_sess = tf.Session(target=target_session, graph=train_model.graph, config=gpuconfig)
+    train_sess = tf.Session(target=target_session, graph=train_model.graph, config=npu_config_proto(config_proto=gpuconfig))
 
     train_sess.run(train_model.model.init_op)
     # load model from checkpoint
@@ -302,3 +303,4 @@ def train(hparams, scope=None, target_session=""):
     # after train,run infer
     if hparams.infer_file is not None:
         run_infer(train_model, train_sess, hparams.infer_file_cache, hparams, util.INFER_NUM)
+
diff --git a/examples/xDeepFM/utils/log.py b/examples/xDeepFM/utils/log.py
index a3fa891f..9b0c2c92 100644
--- a/examples/xDeepFM/utils/log.py
+++ b/examples/xDeepFM/utils/log.py
@@ -1,4 +1,5 @@
 """define logging configure"""
+from npu_bridge.npu_init import *
 import logging
 from datetime import datetime, timedelta, timezone
 import platform
@@ -18,3 +19,4 @@ class Log(object):
         formatter = logging.Formatter('%(message)s')
         handler.setFormatter(formatter)
         self.logger.addHandler(handler)
+
diff --git a/examples/xDeepFM/utils/metric.py b/examples/xDeepFM/utils/metric.py
index c2749da1..2a44b47b 100644
--- a/examples/xDeepFM/utils/metric.py
+++ b/examples/xDeepFM/utils/metric.py
@@ -1,4 +1,5 @@
 """define metrics"""
+from npu_bridge.npu_init import *
 from collections import defaultdict
 from sklearn.metrics import roc_auc_score, log_loss, mean_squared_error
 import numpy as np
@@ -95,3 +96,4 @@ def cal_group_auc(labels, preds, impression_id_list):
     group_auc = float(total_auc) / impression_total
     group_auc = round(group_auc, 4)
     return group_auc
+
diff --git a/examples/xDeepFM/utils/util.py b/examples/xDeepFM/utils/util.py
index 36e2ab10..7a52b6a5 100644
--- a/examples/xDeepFM/utils/util.py
+++ b/examples/xDeepFM/utils/util.py
@@ -1,4 +1,5 @@
 """define util function and  global variable"""
+from npu_bridge.npu_init import *
 import tensorflow as tf
 import os, sys
 import time, yaml
@@ -81,3 +82,4 @@ def convert_res_name(file_name):
         replace(".csv", ".tfrecord"). \
         replace(".libsvm", ".tfrecord")
     return res_name
+
-- 
Gitee


From 2c68138ebd7d64be3625d33e1ed1e5770c409914 Mon Sep 17 00:00:00 2001
From: yxy1684 <2270320041@qq.com>
Date: Fri, 31 May 2024 09:35:27 +0000
Subject: [PATCH 183/302] =?UTF-8?q?!162=20xDeepFM=E8=BF=81=E7=A7=BB=20*=20?=
 =?UTF-8?q?xdeepFM=20*=20xdeepFM=20*=20xdeepFM=20*=20xdeepFM=20*=20xdeepFM?=
 =?UTF-8?q?=20*=20xdeepFM=20*=20xdeepFM=20*=20xdeepFM=20*=20xdeepFM=20*=20?=
 =?UTF-8?q?xdeepFM=20*=20xdeepFM=20*=20xdeepFM=20*=20xdeepFM=20*=20xdeepFM?=
 =?UTF-8?q?=20*=20xdeepFM=20*=20Merge=20branch=20'develop'=20of=20https://?=
 =?UTF-8?q?gitee.com/ascend/mxrec=20into=20xdeepfm=5Fdevelop=20*=20xdeepFM?=
 =?UTF-8?q?=20*=20exdeepfm=20*=20exdeepfm=20*=20exdeepfm=20*=20exdeepfm=20?=
 =?UTF-8?q?*=20exdeepfm=20*=20exdeepfm=20*=20exdeepfm=20*=20exdeepfm=20*?=
 =?UTF-8?q?=20exdeepfm=20*=20exdeepfm?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/xDeepFM/IO/iterator.py    |  31 ++-
 examples/xDeepFM/README.md         | 296 +++++++++++++++++++++++++++++
 examples/xDeepFM/main.py           |  12 +-
 examples/xDeepFM/run.sh            | 130 +++++++++++++
 examples/xDeepFM/src/base_model.py |   8 +-
 examples/xDeepFM/src/exDeepFM.py   |  22 ++-
 examples/xDeepFM/train.py          |  60 +++---
 examples/xDeepFM/utils/util.py     |   2 +-
 8 files changed, 505 insertions(+), 56 deletions(-)
 create mode 100644 examples/xDeepFM/README.md
 create mode 100644 examples/xDeepFM/run.sh

diff --git a/examples/xDeepFM/IO/iterator.py b/examples/xDeepFM/IO/iterator.py
index b044b3b0..ad6a9145 100644
--- a/examples/xDeepFM/IO/iterator.py
+++ b/examples/xDeepFM/IO/iterator.py
@@ -27,18 +27,16 @@ class FfmIterator(BaseIterator):
         src_dataset = src_dataset.map(self.parser)
         # src_dataset = src_dataset.shuffle(buffer_size=BUFFER_SIZE)
         iterator = src_dataset.make_initializable_iterator()
-        _fm_feat_indices, _fm_feat_values, \
-        _fm_feat_shape, _labels, _dnn_feat_indices, \
-        _dnn_feat_values, _dnn_feat_weights, _dnn_feat_shape = iterator.get_next()
+        batch = iterator.get_next()
         self.initializer = iterator.initializer
-        self.fm_feat_indices = _fm_feat_indices
-        self.fm_feat_values = _fm_feat_values
-        self.fm_feat_shape = _fm_feat_shape
-        self.labels = _labels
-        self.dnn_feat_indices = _dnn_feat_indices
-        self.dnn_feat_values = _dnn_feat_values
-        self.dnn_feat_weights = _dnn_feat_weights
-        self.dnn_feat_shape = _dnn_feat_shape
+        self.fm_feat_indices = batch.get('fm_feat_indices')
+        self.fm_feat_values = batch.get('fm_feat_values')
+        self.fm_feat_shape = batch.get('fm_feat_shape')
+        self.labels = batch.get('labels')
+        self.dnn_feat_indices = batch.get('dnn_feat_indices')
+        self.dnn_feat_values = batch.get('dnn_feat_values')
+        self.dnn_feat_weights = batch.get('dnn_feat_weights')
+        self.dnn_feat_shape = batch.get('dnn_feat_shape')
 
     def parser(self, record):
         keys_to_features = {
@@ -60,9 +58,11 @@ class FfmIterator(BaseIterator):
         dnn_feat_values = tf.sparse_tensor_to_dense(parsed['dnn_feat_values'])
         dnn_feat_weights = tf.sparse_tensor_to_dense(parsed['dnn_feat_weights'])
         dnn_feat_shape = parsed['dnn_feat_shape']
-        return fm_feat_indices, fm_feat_values, \
-               fm_feat_shape, labels, dnn_feat_indices, \
-               dnn_feat_values, dnn_feat_weights, dnn_feat_shape
+        return {
+            'fm_feat_indices': fm_feat_indices, 'fm_feat_values': fm_feat_values, 'fm_feat_shape': fm_feat_shape,
+            'labels': labels, 'dnn_feat_indices': dnn_feat_indices, 'dnn_feat_values': dnn_feat_values,
+            'dnn_feat_weights': dnn_feat_weights, 'dnn_feat_shape': dnn_feat_shape
+        }
 
 
 class DinIterator(BaseIterator):
@@ -205,5 +205,4 @@ class CCCFNetIterator(BaseIterator):
 
         return labels, userIds, itemIds, \
                user_profiles_indices, user_profiles_values, user_profiles_weights, user_profiles_shape, \
-               item_profiles_indices, item_profiles_values, item_profiles_weights, item_profiles_shape
-
+               item_profiles_indices, item_profiles_values, item_profiles_weights, item_profiles_shape
\ No newline at end of file
diff --git a/examples/xDeepFM/README.md b/examples/xDeepFM/README.md
new file mode 100644
index 00000000..d9a93744
--- /dev/null
+++ b/examples/xDeepFM/README.md
@@ -0,0 +1,296 @@
+# xDeepFM迁移样例
+
+## 模型参考开源链接
+
+1. https://github.com/Leavingseason/xDeepFM
+
+2. Commits on Oct 15, 2018，提交的SHA-1 hash值（提交ID）：114c4c45b1cb6144b2540f92a2b357c3f445e98e
+
+3. 只保留执行所需要的代码及文件，其他已删除。
+4. config/network.yaml配置文件，data/dnn/infer.userid.txt、res/infer.userid.txt等数据文件由用户从开源链接下载导入
+
+## 迁移NPU
+
+请参照昇腾社区CANN商用版文档先使用迁移工具进行NPU自动迁移：https://www.hiascend.com/document/detail/zh/canncommercial/700/modeldev/tfmigr1/tfmigr1_000009.html
+
+
+## 迁移mxRec
+
+1、修改IO/iterator.py，把第30~41行
+
+
+```python
+        _fm_feat_indices, _fm_feat_values,
+        _fm_feat_shape, _labels, _dnn_feat_indices,
+        _dnn_feat_values, _dnn_feat_weights, _dnn_feat_shape = iterator.get_next()
+        self.initializer = iterator.initializer
+        self.fm_feat_indices = _fm_feat_indices
+        self.fm_feat_values = _fm_feat_values
+        self.fm_feat_shape = _fm_feat_shape
+        self.labels = _labels
+        self.dnn_feat_indices = _dnn_feat_indices
+        self.dnn_feat_values = _dnn_feat_values
+        self.dnn_feat_weights = _dnn_feat_weights
+        self.dnn_feat_shape = _dnn_feat_shape
+```
+` ` ` `改为：
+```python
+        batch = iterator.get_next()
+        self.initializer = iterator.initializer
+        self.fm_feat_indices = batch.get('fm_feat_indices')
+        self.fm_feat_values = batch.get('fm_feat_values')
+        self.fm_feat_shape = batch.get('fm_feat_shape')
+        self.labels = batch.get('labels')
+        self.dnn_feat_indices = batch.get('dnn_feat_indices')
+        self.dnn_feat_values = batch.get('dnn_feat_values')
+        self.dnn_feat_weights = batch.get('dnn_feat_weights')
+        self.dnn_feat_shape = batch.get('dnn_feat_shape')
+```
+
+` ` ` `第63~65行
+```python
+        return fm_feat_indices, fm_feat_values,
+               fm_feat_shape, labels, dnn_feat_indices,
+               dnn_feat_values, dnn_feat_weights, dnn_feat_shape
+```
+` ` ` `改为：
+```python
+        return {
+            'fm_feat_indices': fm_feat_indices, 'fm_feat_values': fm_feat_values, 'fm_feat_shape': fm_feat_shape,
+            'labels': labels, 'dnn_feat_indices': dnn_feat_indices, 'dnn_feat_values': dnn_feat_values,
+            'dnn_feat_weights': dnn_feat_weights, 'dnn_feat_shape': dnn_feat_shape
+        }
+```
+
+2、修改src/base_model.py。把embedding初始化值设成tf.zeros_initializer()，把84行
+```python
+        return tf.truncated_normal_initializer(stddev=hparams.init_value)
+```
+` ` ` `改为（为了对比CPU，xDeepFM源代码这里也要一起修改）：
+```python
+        return tf.zeros_initializer()
+```
+
+` ` ` `更新自动改图模式下生成新数据集中batch的label记录，把188~189行
+```python
+    def eval(self, sess):
+        return sess.run([self.loss, self.data_loss, self.pred, self.iterator.labels], \
+```
+` ` ` `改为：
+```python
+    def eval(self, sess, eval_label):
+        return sess.run([self.loss, self.data_loss, self.pred, eval_label], \
+```
+
+3、修改src/exDeepFM.py。在第6行添加
+```python
+from mx_rec.core.embedding import create_table
+from mx_rec.core.embedding import sparse_lookup
+```
+` ` ` `把40~43行
+```python
+        w_fm_nn_input_orgin = tf.nn.embedding_lookup_sparse(self.embedding,
+                                                            fm_sparse_index,
+                                                            fm_sparse_weight,
+                                                            combiner="sum")
+```
+` ` ` `改为：
+```python
+        dense_indices = tf.sparse.to_dense(fm_sparse_index, default_value=0)
+        dense_weights = tf.sparse.to_dense(fm_sparse_weight, default_value=0)
+        
+        sparse_hashtable = create_table(key_dtype=tf.int32,
+                                        dim=tf.TensorShape([hparams.dim]),
+                                        name='sparse_embeddings_table',
+                                        emb_initializer=tf.zeros_initializer(),
+                                        device_vocabulary_size=hparams.FEATURE_COUNT,
+                                        host_vocabulary_size=0
+                                        )
+        embedded_values = sparse_lookup(sparse_hashtable,
+                                        dense_indices,
+                                        is_train=True,
+                                        name="sparse_embeddings",
+                                        modify_graph=True)
+        w_fm_nn_input_orgin = tf.reduce_sum(embedded_values * tf.expand_dims(dense_weights, axis=-1), axis=1)
+```
+
+4、修改main.py。在第176行添加
+```python
+    # init
+    from mx_rec.util.initialize import init
+    init(use_dynamic=True,
+         use_dynamic_expansion=False)
+```
+
+5、修改train.py。把第35~57行
+```python
+    graph = tf.Graph()
+    with graph.as_default():
+        # feed train file name, valid file name, or test file name
+        filenames = tf.placeholder(tf.string, shape=[None])
+        #src_dataset = tf.contrib.data.TFRecordDataset(filenames)
+        src_dataset = tf.data.TFRecordDataset(filenames)
+    
+        if hparams.data_format == 'ffm':
+            batch_input = FfmIterator(src_dataset)
+        elif hparams.data_format == 'din':
+            batch_input = DinIterator(src_dataset)
+        elif hparams.data_format == 'cccfnet':
+            batch_input = CCCFNetIterator(src_dataset)
+        else:
+            raise ValueError("not support {0} format data".format(hparams.data_format))
+        # build model
+        model = model_creator(
+            hparams,
+            iterator=batch_input,
+            scope=scope)
+    
+    return TrainModel(
+        graph=graph,
+```
+` ` ` `改为：
+```python
+    # feed train file name, valid file name, or test file name
+    filenames = tf.placeholder(tf.string, shape=[None])
+    # src_dataset = tf.contrib.data.TFRecordDataset(filenames)
+    src_dataset = tf.data.TFRecordDataset(filenames)
+    
+    if hparams.data_format == 'ffm':
+        batch_input = FfmIterator(src_dataset)
+    elif hparams.data_format == 'din':
+        batch_input = DinIterator(src_dataset)
+    elif hparams.data_format == 'cccfnet':
+        batch_input = CCCFNetIterator(src_dataset)
+    else:
+        raise ValueError("not support {0} format data".format(hparams.data_format))
+    # build model
+    model = model_creator(
+        hparams,
+        iterator=batch_input,
+        scope=scope)
+    
+    return TrainModel(
+        graph=tf.get_default_graph(),
+```
+` ` ` `把第68~73行
+```python
+    load_sess.run(load_model.iterator.initializer, feed_dict={load_model.filenames: [filename]})
+    preds = []
+    labels = []
+    while True:
+        try:
+            _, _, step_pred, step_labels = load_model.model.eval(load_sess)
+```
+` ` ` `改为：
+```python
+    from mx_rec.util.initialize import ConfigInitializer
+    eval_label = ConfigInitializer.get_instance().train_params_config.get_target_batch(True).get("labels")
+    initializer = ConfigInitializer.get_instance().train_params_config.get_initializer(True)
+    load_sess.run(initializer, feed_dict={load_model.filenames: [filename]})
+    preds = []
+    labels = []
+    while True:
+        try:
+            _, _, step_pred, step_labels = load_model.model.eval(load_sess, eval_label)
+```
+
+` ` ` `在第223行添加
+```python
+    from mx_rec.graph.modifier import modify_graph_and_start_emb_cache
+    modify_graph_and_start_emb_cache(dump_graph=True)
+```
+` ` ` `把第239行
+```python
+        train_sess.run(train_model.iterator.initializer, feed_dict={train_model.filenames: [hparams.train_file_cache]})
+```
+` ` ` `改为：
+```python
+        from mx_rec.util.initialize import ConfigInitializer
+        initializer = ConfigInitializer.get_instance().train_params_config.get_initializer(True)
+        train_sess.run(initializer, feed_dict={train_model.filenames: [hparams.train_file_cache]})
+```
+6、为了适配mxRec运行环境，添加了run.sh。
+
+## 适配其他代码
+
+1、修改utils/util.py。把第63行
+
+
+```python
+            config = yaml.load(f)
+```
+` ` ` `改为（为了xDeepFM源代码在CPU上能跑通，这里也要一起修改）：
+```python
+             config = yaml.safe_load(f)
+```
+
+2、由于去掉了无关代码src/CIN.py，修改main.py适配。把第156~158行
+
+```python
+                                             'opnn', 'fm', 'lr', 'din', 'cccfnet', 'deepcross', 'exDeepFM', "cross", "CIN"]:
+        raise ValueError(
+            "model type must be cccfnet, deepFM, deepWide, dnn, ipnn, opnn, fm, lr, din, deepcross, exDeepFM, cross, CIN but you set is {0}".format(
+```
+` ` ` `改为：
+```python
+                                             'opnn', 'fm', 'lr', 'din', 'cccfnet', 'deepcross', 'exDeepFM', "cross"]:
+        raise ValueError(
+            "model type must be cccfnet, deepFM, deepWide, dnn, ipnn, opnn, fm, lr, din, deepcross, exDeepFM, "
+            "cross, but you set is {0}".format(config['model']['model_type']))
+```
+
+` ` ` `修改train.py适配。删除第21行代码
+```python
+from src.CIN import CINModel
+```
+
+` ` ` `删除第210~212行代码
+```python
+    elif hparams.model_type == 'CIN':
+        print("run extreme cin model!")
+        model_creator = CINModel
+```
+
+## 运行命令
+```shell
+bash run.sh main.py 10.10.10.10
+```
+其中，10.10.10.10为服务器IP，请替换成对应服务器IP。
+
+## 验证结果
+1、CPU:
+```log
+step 1 , total_loss: 0.6931, data_loss: 0.6931
+step 2 , total_loss: 0.6905, data_loss: 0.6905
+finish one epoch!
+at epoch 0 train info: loss:0.6918214857578278 eval info: auc:0.4867, logloss:0.6865 test info: auc:0.4867, logloss:0.6865
+at epoch 0 , train time: 0.6 eval time: 0.3
+step 1 , total_loss: 0.6845, data_loss: 0.6845
+step 2 , total_loss: 0.6818, data_loss: 0.6818
+finish one epoch!
+at epoch 1 train info: loss:0.6831814646720886 eval info: auc:0.485, logloss:0.6801 test info: auc:0.485, logloss:0.6801
+at epoch 1 , train time: 0.2 eval time: 0.1
+step 1 , total_loss: 0.6766, data_loss: 0.6766
+step 2 , total_loss: 0.6732, data_loss: 0.6732
+finish one epoch!
+at epoch 2 train info: loss:0.6748818755149841 eval info: auc:0.4832, logloss:0.6738 test info: auc:0.4832, logloss:0.6738
+at epoch 2 , train time: 0.1 eval time: 0.1
+```
+2、mxRec:
+```log
+[1,0]<stdout>:step 1 , total_loss: 0.6931, data_loss: 0.6931
+[1,0]<stdout>:step 2 , total_loss: 0.6905, data_loss: 0.6905
+[1,0]<stdout>:finish one epoch!
+[1,0]<stdout>:at epoch 0 train info: loss:0.6918215453624725 eval info: auc:0.4867, logloss:0.6865 test info: auc:0.4867, logloss:0.6865
+[1,0]<stdout>:at epoch 0 , train time: 15.9 eval time: 3.1
+[1,0]<stdout>:step 1 , total_loss: 0.6845, data_loss: 0.6845
+[1,0]<stdout>:step 2 , total_loss: 0.6818, data_loss: 0.6818
+[1,0]<stdout>:finish one epoch!
+[1,0]<stdout>:at epoch 1 train info: loss:0.6831814646720886 eval info: auc:0.485, logloss:0.6801 test info: auc:0.485, logloss:0.6801
+[1,0]<stdout>:at epoch 1 , train time: 7.8 eval time: 0.7
+[1,0]<stdout>:step 1 , total_loss: 0.6766, data_loss: 0.6766
+[1,0]<stdout>:step 2 , total_loss: 0.6732, data_loss: 0.6732
+[1,0]<stdout>:finish one epoch!
+[1,0]<stdout>:at epoch 2 train info: loss:0.6748818457126617 eval info: auc:0.4832, logloss:0.6738 test info: auc:0.4832, logloss:0.6738
+[1,0]<stdout>:at epoch 2 , train time: 0.5 eval time: 0.7
+```
diff --git a/examples/xDeepFM/main.py b/examples/xDeepFM/main.py
index f0b93bd6..0752d18b 100644
--- a/examples/xDeepFM/main.py
+++ b/examples/xDeepFM/main.py
@@ -153,10 +153,10 @@ def check_nn_config(config):
 def check_config(config):
     """check networks config"""
     if config['model']['model_type'] not in ['deepFM', 'deepWide', 'dnn', 'ipnn', \
-                                             'opnn', 'fm', 'lr', 'din', 'cccfnet', 'deepcross', 'exDeepFM', "cross", "CIN"]:
+                                             'opnn', 'fm', 'lr', 'din', 'cccfnet', 'deepcross', 'exDeepFM', "cross"]:
         raise ValueError(
-            "model type must be cccfnet, deepFM, deepWide, dnn, ipnn, opnn, fm, lr, din, deepcross, exDeepFM, cross, CIN but you set is {0}".format(
-                config['model']['model_type']))
+            "model type must be cccfnet, deepFM, deepWide, dnn, ipnn, opnn, fm, lr, din, deepcross, exDeepFM, "
+            "cross, but you set is {0}".format(config['model']['model_type']))
     check_nn_config(config)
 
 
@@ -172,6 +172,12 @@ def load_yaml():
 
 def main():
     """main function"""
+
+    # init
+    from mx_rec.util.initialize import init
+    init(use_dynamic=True,
+         use_dynamic_expansion=False)
+
     # flag = True
     util.check_tensorflow_version()
     util.check_and_mkdir()
diff --git a/examples/xDeepFM/run.sh b/examples/xDeepFM/run.sh
new file mode 100644
index 00000000..613c440e
--- /dev/null
+++ b/examples/xDeepFM/run.sh
@@ -0,0 +1,130 @@
+kill -9 `ps -ef | grep python | grep -v grep | awk '{print $2}'` > /dev/null 2>&1
+
+# 获取输入参数：py、ip
+if [ $# -ge 1 ]; then
+  py=$1
+  ip=$2
+else
+  echo "for example: bash run.sh main.py 10.10.10.10 or bash run.sh main.py"
+  exit 1
+fi
+
+# 检查输入的python文件是否合法
+if [[ $py =~ ^[a-z0-9_]+\.py$ ]]; then
+  echo "File $py is a valid Python file"
+else
+  echo "File $py is not a Python file"
+  exit 1
+fi
+
+# 判断IP地址是否有效
+if [ -n "$ip" ]; then
+  if [[ $ip =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]]; then
+    # 将IP地址拆分成四个数字
+    ip_array=(${ip//./ })
+    # 判断每个数字是否在0-255之间
+    valid=true
+    for i in "${ip_array[@]}"; do
+      if ((i < 0 || i > 255)); then
+        valid=false
+        break
+      fi
+    done
+    if $valid; then
+      echo "ip: $ip is valid"
+    else
+      echo "ip: $ip is not valid"
+      exit 1
+    fi
+  else
+    echo "ip: $ip is not valid."
+    exit 1
+  fi
+fi
+
+cur_path=`pwd`
+mx_rec_package_path="/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec" # please config
+so_path=${mx_rec_package_path}/libasc
+# GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
+mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
+interface="lo"
+local_rank_size=1 # 每个节点使用的NPU卡数
+num_server=1 # 训练节点数
+num_process=$((${num_server} * ${local_rank_size})) # 训练总的进程数，等于使用的NPU卡的总数
+
+export HCCL_CONNECT_TIMEOUT=1200 # HCCL集合通信 建链超时时间，取值范围[120,7200]
+export PYTHONPATH=${so_path}:$PYTHONPATH # 环境python安装路径
+export LD_PRELOAD=/usr/lib64/libgomp.so.1:/usr/local/python3.7.5/lib/python3.7/site-packages/scikit_learn.libs/libgomp-d22c30c5.so.1.0.0
+export LD_LIBRARY_PATH=${so_path}:/usr/local/lib:$LD_LIBRARY_PATH
+# 集合通信文件，格式请参考昇腾官网CANN文档，“准备资源配置文件”章节。
+export JOB_ID=10086
+# 训练任务使用的NPU卡数总数
+export MXREC_LOG_LEVEL="ERROR" # 框架日志等级
+export TF_CPP_MIN_LOG_LEVEL=3 # tensorflow日志级别,3对应FATAL
+# 设置应用类日志的全局日志级别及各模块日志级别，具体请参考昇腾官网CANN文档
+export ASCEND_GLOBAL_LOG_LEVEL=3 # “设置日志级别”章节0:debug, 1:info, 2:warning, 3:error, 4:NULL
+export MXREC_MODE="ASC"
+export USE_MPI=1
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo "Usage: ./run.sh [OPTION]... [IP]..."
+    echo " "
+    echo "parameter explain:
+    [OPTION]       main.py
+    [IP]           IP address of the host
+    -h/--help		   show help message
+    "
+    exit 1
+fi
+
+# 使用ranktable方案
+function rankTableSolution() {
+  echo "The ranktable solution"
+  export RANK_TABLE_FILE="${cur_path}/hccl_json_${local_rank_size}p.json"
+  export RANK_SIZE=$num_process
+  export ASCEND_VISIBLE_DEVICES="0"
+  export RANK_ID=0
+  export ASCEND_DEVICE_ID=$RANK_ID
+  echo "RANK_TABLE_FILE=$RANK_TABLE_FILE"
+  if [ ! -f "$RANK_TABLE_FILE" ];then
+    echo "the rank table file does not exit. Please reference {hccl_json_${local_rank_size}p.json} to correctly config rank table file"
+    exit 1
+  fi
+}
+
+if [ ! -n "$ip" ]; then
+  rankTableSolution
+else
+  VALID_CHECK=$(echo $ip|awk -F. '$1<=255&&$2<=255&&$3<=255&&$4<=255{print "yes"}')
+  if echo $ip|grep -E "^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$">/dev/null; then
+    if [ "$VALID_CHECK" == "yes" ]; then
+      #################使用去除ranktable方案时开启######################
+      echo "ip: $ip available."
+      echo "The ranktable solution is removed."
+      export CM_CHIEF_IP=$ip  # 主节点ip
+      export CM_CHIEF_PORT=6000  # 主节点监听端口
+      export CM_CHIEF_DEVICE=0  # 主节点device id
+      export CM_WORKER_IP=$ip  # 当前节点ip
+      export CM_WORKER_SIZE=$num_process  # 参与集群训练的device数量
+      echo "CM_CHIEF_IP=$CM_CHIEF_IP"
+      echo "CM_CHIEF_PORT=$CM_CHIEF_PORT"
+      echo "CM_CHIEF_DEVICE=$CM_CHIEF_DEVICE"
+      echo "CM_WORKER_IP=$CM_WORKER_IP"
+      echo "CM_WORKER_SIZE=$CM_WORKER_SIZE"
+      echo "ASCEND_VISIBLE_DEVICES=$ASCEND_VISIBLE_DEVICES"
+      #########################################################
+    else
+      echo "ip: $ip not available!" # 使用ranktable方案
+      rankTableSolution
+    fi
+  else
+    echo "ip: $ip not available!" # 使用ranktable方案
+    rankTableSolution
+  fi
+fi
+
+echo "use horovod to start tasks"
+DATE=$(date +%Y-%m-%d-%H-%M-%S)
+horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
+python3.7 ${py} 2>&1 | tee "temp_${local_rank_size}p_t_${DATE}.log"
diff --git a/examples/xDeepFM/src/base_model.py b/examples/xDeepFM/src/base_model.py
index 5481012f..96682ae6 100644
--- a/examples/xDeepFM/src/base_model.py
+++ b/examples/xDeepFM/src/base_model.py
@@ -77,11 +77,11 @@ class BaseModel(object):
         for param in self.cross_params:
             cross_l_loss = tf.add(cross_l_loss, tf.multiply(hparams.cross_l1, tf.norm(param, ord=1)))
             cross_l_loss = tf.add(cross_l_loss, tf.multiply(hparams.cross_l2, tf.norm(param, ord=1)))
-        return cross_l_loss 
+        return cross_l_loss
 
     def _get_initializer(self, hparams):
         if hparams.init_method == 'tnormal':
-            return tf.truncated_normal_initializer(stddev=hparams.init_value)
+            return tf.zeros_initializer()
         elif hparams.init_method == 'uniform':
             return tf.random_uniform_initializer(-hparams.init_value, hparams.init_value)
         elif hparams.init_method == 'normal':
@@ -185,8 +185,8 @@ class BaseModel(object):
         return sess.run([self.update, self.loss, self.data_loss, self.merged], \
                         feed_dict={self.layer_keeps: self.keep_prob_train})
 
-    def eval(self, sess):
-        return sess.run([self.loss, self.data_loss, self.pred, self.iterator.labels], \
+    def eval(self, sess, eval_label):
+        return sess.run([self.loss, self.data_loss, self.pred, eval_label], \
                         feed_dict={self.layer_keeps: self.keep_prob_test})
 
     def infer(self, sess):
diff --git a/examples/xDeepFM/src/exDeepFM.py b/examples/xDeepFM/src/exDeepFM.py
index b8d235b7..9d5b5299 100644
--- a/examples/xDeepFM/src/exDeepFM.py
+++ b/examples/xDeepFM/src/exDeepFM.py
@@ -3,6 +3,8 @@ from npu_bridge.npu_init import *
 import math
 import numpy as np
 import tensorflow as tf
+from mx_rec.core.embedding import create_table
+from mx_rec.core.embedding import sparse_lookup
 from src.base_model import BaseModel
 
 __all__ = ["ExtremeDeepFMModel"]
@@ -37,10 +39,22 @@ class ExtremeDeepFMModel(BaseModel):
         fm_sparse_weight = tf.SparseTensor(self.iterator.dnn_feat_indices,
                                            self.iterator.dnn_feat_weights,
                                            self.iterator.dnn_feat_shape)
-        w_fm_nn_input_orgin = tf.nn.embedding_lookup_sparse(self.embedding,
-                                                            fm_sparse_index,
-                                                            fm_sparse_weight,
-                                                            combiner="sum")
+        dense_indices = tf.sparse.to_dense(fm_sparse_index, default_value=0)
+        dense_weights = tf.sparse.to_dense(fm_sparse_weight, default_value=0)
+
+        sparse_hashtable = create_table(key_dtype=tf.int32,
+                                        dim=tf.TensorShape([hparams.dim]),
+                                        name='sparse_embeddings_table',
+                                        emb_initializer=tf.zeros_initializer(),
+                                        device_vocabulary_size=hparams.FEATURE_COUNT,
+                                        host_vocabulary_size=0
+                                        )
+        embedded_values = sparse_lookup(sparse_hashtable,
+                                        dense_indices,
+                                        is_train=True,
+                                        name="sparse_embeddings",
+                                        modify_graph=True)
+        w_fm_nn_input_orgin = tf.reduce_sum(embedded_values * tf.expand_dims(dense_weights, axis=-1), axis=1)
         embedding = tf.reshape(w_fm_nn_input_orgin, [-1, hparams.dim * hparams.FIELD_COUNT])
         embedding_size = hparams.FIELD_COUNT * hparams.dim
         return embedding, embedding_size
diff --git a/examples/xDeepFM/train.py b/examples/xDeepFM/train.py
index 1c434194..39918b34 100644
--- a/examples/xDeepFM/train.py
+++ b/examples/xDeepFM/train.py
@@ -18,7 +18,6 @@ from IO.ffm_cache import FfmCache
 #from src.cccfnet import CCCFModel
 #from src.deepcross import DeepCrossModel
 from src.exDeepFM import ExtremeDeepFMModel
-from src.CIN import CINModel
 #from src.cross import CrossModel
 import utils.util as util
 import utils.metric as metric
@@ -32,29 +31,27 @@ class TrainModel(collections.namedtuple("TrainModel", ("graph", "model", "iterat
 
 
 def create_train_model(model_creator, hparams, scope=None):
-    graph = tf.Graph()
-    with graph.as_default():
-        # feed train file name, valid file name, or test file name
-        filenames = tf.placeholder(tf.string, shape=[None])
-        #src_dataset = tf.contrib.data.TFRecordDataset(filenames)
-        src_dataset = tf.data.TFRecordDataset(filenames)
+    # feed train file name, valid file name, or test file name
+    filenames = tf.placeholder(tf.string, shape=[None])
+    # src_dataset = tf.contrib.data.TFRecordDataset(filenames)
+    src_dataset = tf.data.TFRecordDataset(filenames)
 
-        if hparams.data_format == 'ffm':
-            batch_input = FfmIterator(src_dataset)
-        elif hparams.data_format == 'din':
-            batch_input = DinIterator(src_dataset)
-        elif hparams.data_format == 'cccfnet':
-            batch_input = CCCFNetIterator(src_dataset)
-        else:
-            raise ValueError("not support {0} format data".format(hparams.data_format))
-        # build model
-        model = model_creator(
-            hparams,
-            iterator=batch_input,
-            scope=scope)
+    if hparams.data_format == 'ffm':
+        batch_input = FfmIterator(src_dataset)
+    elif hparams.data_format == 'din':
+        batch_input = DinIterator(src_dataset)
+    elif hparams.data_format == 'cccfnet':
+        batch_input = CCCFNetIterator(src_dataset)
+    else:
+        raise ValueError("not support {0} format data".format(hparams.data_format))
+    # build model
+    model = model_creator(
+        hparams,
+        iterator=batch_input,
+        scope=scope)
 
     return TrainModel(
-        graph=graph,
+        graph=tf.get_default_graph(),
         model=model,
         iterator=batch_input,
         filenames=filenames)
@@ -65,12 +62,15 @@ def run_eval(load_model, load_sess, filename, sample_num_file, hparams, flag):
     # load sample num
     with open(sample_num_file, 'r') as f:
         sample_num = int(f.readlines()[0].strip())
-    load_sess.run(load_model.iterator.initializer, feed_dict={load_model.filenames: [filename]})
+    from mx_rec.util.initialize import ConfigInitializer
+    eval_label = ConfigInitializer.get_instance().train_params_config.get_target_batch(True).get("labels")
+    initializer = ConfigInitializer.get_instance().train_params_config.get_initializer(True)
+    load_sess.run(initializer, feed_dict={load_model.filenames: [filename]})
     preds = []
     labels = []
     while True:
         try:
-            _, _, step_pred, step_labels = load_model.model.eval(load_sess)
+            _, _, step_pred, step_labels = load_model.model.eval(load_sess, eval_label)
             preds.extend(np.reshape(step_pred, -1))
             labels.extend(np.reshape(step_labels, -1))
         except tf.errors.OutOfRangeError:
@@ -207,10 +207,7 @@ def train(hparams, scope=None, target_session=""):
     elif hparams.model_type == 'cross':
         print("run extreme cross model!")
         model_creator = CrossModel
-    elif hparams.model_type == 'CIN':
-        print("run extreme cin model!")
-        model_creator = CINModel
-    
+
     else:
         raise ValueError("model type should be cccfnet, deepFM, deepWide, dnn, fm, lr, ipnn, opnn, din")
 
@@ -220,6 +217,10 @@ def train(hparams, scope=None, target_session=""):
     gpuconfig = tf.ConfigProto()
     gpuconfig.gpu_options.allow_growth = True
     tf.set_random_seed(1234)
+
+    from mx_rec.graph.modifier import modify_graph_and_start_emb_cache
+    modify_graph_and_start_emb_cache(dump_graph=True)
+
     train_sess = tf.Session(target=target_session, graph=train_model.graph, config=npu_config_proto(config_proto=gpuconfig))
 
     train_sess.run(train_model.model.init_op)
@@ -236,7 +237,10 @@ def train(hparams, scope=None, target_session=""):
     last_eval = 0
     for epoch in range(hparams.epochs):
         step = 0
-        train_sess.run(train_model.iterator.initializer, feed_dict={train_model.filenames: [hparams.train_file_cache]})
+        from mx_rec.util.initialize import ConfigInitializer
+        initializer = ConfigInitializer.get_instance().train_params_config.get_initializer(True)
+        train_sess.run(initializer, feed_dict={train_model.filenames: [hparams.train_file_cache]})
+
         epoch_loss = 0
         train_start = time.time()
         train_load_time = 0
diff --git a/examples/xDeepFM/utils/util.py b/examples/xDeepFM/utils/util.py
index 7a52b6a5..e4d88636 100644
--- a/examples/xDeepFM/utils/util.py
+++ b/examples/xDeepFM/utils/util.py
@@ -60,7 +60,7 @@ def check_file_exist(filename):
 def load_yaml_file(filename):
     with open(filename) as f:
         try:
-            config = yaml.load(f)
+            config = yaml.safe_load(f)
         except:
             raise IOError("load {0} error!".format(filename))
     return config
-- 
Gitee


From e26c32e0c345dd6d80ec690781f2fb41ba00b63f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Mon, 3 Jun 2024 02:07:37 +0000
Subject: [PATCH 184/302] =?UTF-8?q?!158=20mxRec=E6=B5=8B=E8=AF=95=E7=94=A8?=
 =?UTF-8?q?=E4=BE=8B=EF=BC=88=E5=8C=85=E6=8B=ACAccCTR=EF=BC=89=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0=E3=80=81=E9=80=82=E9=85=8DASan=EF=BC=88=E5=9C=B0?=
 =?UTF-8?q?=E5=9D=80=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85?=
 =?UTF-8?q?=E5=AD=98=E6=B3=84=E6=BC=8F=E6=A3=80=E6=B5=8B=EF=BC=8C=E5=B9=B6?=
 =?UTF-8?q?=E8=A7=A3=E5=86=B3=E6=89=AB=E6=8F=8F=E5=87=BA=E6=9D=A5=E7=9A=84?=
 =?UTF-8?q?=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E4=B8=AD=E7=9A=84=E5=86=85?=
 =?UTF-8?q?=E5=AD=98=E6=B3=84=E6=BC=8F=E9=97=AE=E9=A2=98=20*=20C++?=
 =?UTF-8?q?=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN?=
 =?UTF-8?q?=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B?=
 =?UTF-8?q?=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B?=
 =?UTF-8?q?=20*=20C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92?=
 =?UTF-8?q?=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2?=
 =?UTF-8?q?=E6=A3=80=E6=B5=8B=20*=20C++=E6=B5=8B=E8=AF=95=E7=94=A8?=
 =?UTF-8?q?=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80?=
 =?UTF-8?q?=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98?=
 =?UTF-8?q?=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B=20*=20C++=E6=B5=8B?=
 =?UTF-8?q?=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88?=
 =?UTF-8?q?=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C?=
 =?UTF-8?q?=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B=20*=20C+?=
 =?UTF-8?q?+=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN?=
 =?UTF-8?q?=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B?=
 =?UTF-8?q?=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B?=
 =?UTF-8?q?=20*=20C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92?=
 =?UTF-8?q?=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2?=
 =?UTF-8?q?=E6=A3=80=E6=B5=8B=20*=20C++=E6=B5=8B=E8=AF=95=E7=94=A8?=
 =?UTF-8?q?=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80?=
 =?UTF-8?q?=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98?=
 =?UTF-8?q?=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B=20*=20C++=E6=B5=8B?=
 =?UTF-8?q?=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88?=
 =?UTF-8?q?=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C?=
 =?UTF-8?q?=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B=20*=20C+?=
 =?UTF-8?q?+=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN?=
 =?UTF-8?q?=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B?=
 =?UTF-8?q?=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B?=
 =?UTF-8?q?=20*=20C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92?=
 =?UTF-8?q?=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2?=
 =?UTF-8?q?=E6=A3=80=E6=B5=8B=20*=20C++=E6=B5=8B=E8=AF=95=E7=94=A8?=
 =?UTF-8?q?=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80?=
 =?UTF-8?q?=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98?=
 =?UTF-8?q?=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B=20*=20C++=E6=B5=8B?=
 =?UTF-8?q?=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN=EF=BC=88?=
 =?UTF-8?q?=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B=E8=A1=8C?=
 =?UTF-8?q?=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B=20*=20C+?=
 =?UTF-8?q?+=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB=E5=8A=A0ASAN?=
 =?UTF-8?q?=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92=EF=BC=89=E8=BF=9B?=
 =?UTF-8?q?=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2=E6=A3=80=E6=B5=8B?=
 =?UTF-8?q?=20*=20C++=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0ASAN=EF=BC=88=E5=9C=B0=E5=9D=80=E6=B6=88=E6=AF=92?=
 =?UTF-8?q?=EF=BC=89=E8=BF=9B=E8=A1=8C=E5=86=85=E5=AD=98=E6=B3=84=E9=9C=B2?=
 =?UTF-8?q?=E6=A3=80=E6=B5=8B=20*=20Merge=20branch=20'develop'=20of=20http?=
 =?UTF-8?q?s://gitee.com/ascend/mxrec=20into=20develop=20*=20Merge=20branc?=
 =?UTF-8?q?h=20'develop'=20of=20https://gitee.com/ascend/mxrec=20into=20de?=
 =?UTF-8?q?velop=20*=20Merge=20remote-tracking=20branch=20'origin/develop'?=
 =?UTF-8?q?=20into=20develop=20*=20README=E4=B8=AD=E6=B7=BB=E5=8A=A0mxRec?=
 =?UTF-8?q?=E7=94=A8=E6=88=B7=E6=8C=87=E5=8D=97=E7=A4=BE=E5=8C=BA=E9=93=BE?=
 =?UTF-8?q?=E6=8E=A5=E4=BB=A5=E5=8F=8A=E6=9B=B4=E6=96=B0=E5=85=AC=E7=BD=91?=
 =?UTF-8?q?=E5=9C=B0=E5=9D=80=20*=20README=E4=B8=AD=E6=B7=BB=E5=8A=A0mxRec?=
 =?UTF-8?q?=E7=94=A8=E6=88=B7=E6=8C=87=E5=8D=97=E7=A4=BE=E5=8C=BA=E9=93=BE?=
 =?UTF-8?q?=E6=8E=A5=20*=20README=E4=B8=AD=E6=B7=BB=E5=8A=A0mxRec=E7=94=A8?=
 =?UTF-8?q?=E6=88=B7=E6=8C=87=E5=8D=97=E7=A4=BE=E5=8C=BA=E9=93=BE=E6=8E=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/CMakeLists.txt                     |  4 ++
 src/AccCTR/build/build_test.sh                |  3 ++
 src/AccCTR/tests/ut/src/unique_test.cpp       | 49 +++++++++++++++++++
 src/CMakeLists.txt                            |  2 +-
 src/test_ut.sh                                |  3 ++
 src/tests/emb_table/embedding_ddr_test.cpp    |  2 +-
 src/tests/emb_table/embedding_mgmt_test.cpp   |  2 +-
 src/tests/emb_table/embedding_static_test.cpp |  2 +-
 .../file_system/hdfs_file_system_test.cpp     |  6 +--
 src/tests/leaks.supp                          | 21 ++++++++
 10 files changed, 87 insertions(+), 7 deletions(-)
 create mode 100644 src/tests/leaks.supp

diff --git a/src/AccCTR/CMakeLists.txt b/src/AccCTR/CMakeLists.txt
index 60e2d638..febf1740 100644
--- a/src/AccCTR/CMakeLists.txt
+++ b/src/AccCTR/CMakeLists.txt
@@ -73,6 +73,10 @@ elseif (${BUILD_MODE} MATCHES "ut")
             -Wfloat-equal
             -Wextra
             -std=c++17
+            -fsanitize=address
+            -fsanitize-recover=address,all
+            -fno-omit-frame-pointer
+            -fstack-protector-all
             )
 else ()
     message(FATAL_ERROR "======BUILD_MODE not found")
diff --git a/src/AccCTR/build/build_test.sh b/src/AccCTR/build/build_test.sh
index 9441efe3..4001b825 100644
--- a/src/AccCTR/build/build_test.sh
+++ b/src/AccCTR/build/build_test.sh
@@ -24,6 +24,9 @@ TOOL_FILE="create_fake_id.py"
 CPU_TYPE=$(arch)
 BUILD_MODE=$1
 
+# config asan environment variable
+export ASAN_OPTIONS=halt_on_error=1:detect_leaks=1
+
 create_data()
 {
     cd ${TOOL_PATH}
diff --git a/src/AccCTR/tests/ut/src/unique_test.cpp b/src/AccCTR/tests/ut/src/unique_test.cpp
index a94ebaf7..94e8d92c 100644
--- a/src/AccCTR/tests/ut/src/unique_test.cpp
+++ b/src/AccCTR/tests/ut/src/unique_test.cpp
@@ -95,6 +95,13 @@ TEST_F(UniqueTest, Conf)
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 3); // idCntFill空指针
     uniqueOut.idCntFill = idCntFill;
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 7); // padding长度过小
+
+    unique->UnInitialize();
+    delete[] idCnt;
+    delete[] idCntFill;
+    delete[] uniqueIdCntInBucket;
+    delete[] uniqueIdInBucket;
+
     std::cout << "===========Conf end=============" << std::endl;
 }
 
@@ -115,6 +122,9 @@ TEST_F(UniqueTest, usePaddingNoShardingErr)
     conf.outputType = OutputType::ENHANCED;
 
     ASSERT_EQ(unique->Initialize(conf), 9);
+
+    unique->UnInitialize();
+
     std::cout << "===========usePaddingNoShardingErr end=============" << std::endl;
 }
 
@@ -132,6 +142,8 @@ TEST_F(UniqueTest, useNegativeDesiredSize)
 
     ASSERT_EQ(unique->Initialize(conf), 1);
 
+    unique->UnInitialize();
+
     std::cout << "===========useNegativeDesiredSize end=============" << std::endl;
 }
 
@@ -207,6 +219,9 @@ TEST_F(UniqueTest, DoUniqueNormal)
     ASSERT_EQ(uniqueOut.uniqueIdCnt, (int)idsSet.size());
 
     unique->UnInitialize();
+    if (path) {
+        free(path);
+    }
     std::cout << "===========DoUniqueNormal end=============" << std::endl;
 }
 
@@ -404,6 +419,9 @@ TEST_F(UniqueTest, DoEnhancedUniqueErr)
     ASSERT_EQ(uniqueOut.uniqueIdCnt, (int)idsSet.size());
 
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
+    delete[] idCnt;
+
     std::cout << "===========DoEnhancedUniqueErr end=============" << std::endl;
 }
 
@@ -544,6 +562,9 @@ TEST_F(UniqueTest, idCntIsNullSharding)
     ASSERT_EQ(ret, 3);
 
     unique->UnInitialize();
+    delete[] uniqueIdCntInBucket;
+    delete[] uniqueIdInBucket;
+
     std::cout << "===========idCntIsNullSharding end=============" << std::endl;
 }
 
@@ -620,6 +641,7 @@ TEST_F(UniqueTest, DoUniqueShard)
     ASSERT_THAT(uniqueIdCntInBucket, testing::ElementsAreArray(expectedUniqueIdCnt));
     ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueShard end=============" << std::endl;
 }
@@ -685,6 +707,7 @@ TEST_F(UniqueTest, DoUniqueOnlyShard)
     ASSERT_THAT(inputId, testing::ElementsAreArray(restoreIds));
     ASSERT_THAT(uniqueIdCntInBucket, testing::ElementsAreArray(expectedUniqueIdCnt));
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueOnlyShard end=============" << std::endl;
 }
@@ -769,6 +792,8 @@ TEST_F(UniqueTest, DoUniquePadding)
     ASSERT_THAT(idCntFill, testing::ElementsAreArray(expectedIdCnt));
     ASSERT_EQ(uniqueOut.uniqueIdCnt, conf.paddingSize * conf.shardingNum);
     unique->UnInitialize();
+    delete[] idCnt;
+    delete[] uniqueIdInBucket;
     std::cout << "===========DoUniquePadding end=============" << std::endl;
 }
 
@@ -913,6 +938,7 @@ TEST_F(UniqueTest, DoUniqueShardNumberOversize)
     ASSERT_THAT(uniqueIdCntInBucket, testing::ElementsAreArray(expectedUniqueIdCnt));
     ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueShardNumberOversize end=============" << std::endl;
 }
@@ -981,6 +1007,12 @@ TEST_F(UniqueTest, DoUniqueSpecial)
     }
 
     unique->UnInitialize();
+    delete[] uniqueData;
+    delete[] index;
+    delete[] idCnt;
+    delete[] idCntFill;
+    delete[] uniqueIdCntInBucket;
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueSpecial end=============" << std::endl;
 }
@@ -1020,6 +1052,10 @@ TEST_F(UniqueTest, IdLarge)
     uniqueOut.idCnt = idCnt;
 
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 6); // ID太大
+
+    unique->UnInitialize();
+    delete[] idCnt;
+
     std::cout << "===========IdLarge end=============" << std::endl;
 }
 
@@ -1095,6 +1131,8 @@ TEST_F(UniqueTest, DoUniqueNormalInt32)
     ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
 
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
+
     std::cout << "===========DoUniqueNormalInt32 end=============" << std::endl;
 }
 
@@ -1228,6 +1266,7 @@ TEST_F(UniqueTest, DoUniqueShardMultipleTimes)
         ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
     }
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
 
     std::cout << "===========DoUniqueShardMultipleTimes end=============" << std::endl;
 }
@@ -1312,6 +1351,9 @@ TEST_F(UniqueTest, DoUniquePaddingMultipleTimes)
     }
 
     unique->UnInitialize();
+    delete[] idCnt;
+    delete[] uniqueIdInBucket;
+
     std::cout << "===========DoUniquePaddingMultipleTimes end=============" << std::endl;
 }
 
@@ -1348,6 +1390,10 @@ TEST_F(UniqueTest, IdCntSmall)
     uniqueOut.idCnt = idCnt;
 
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 4); // idcnt过小
+
+    unique->UnInitialize();
+    delete[] idCnt;
+
     std::cout << "===========IdCntSmall end=============" << std::endl;
 }
 
@@ -1449,6 +1495,7 @@ TEST_F(UniqueTest, DoUniqueLotsDataFunction)
     ASSERT_THAT(idCnt, testing::ElementsAreArray(expectedIdCnt));
 
     unique->UnInitialize();
+    delete[] uniqueIdInBucket;
     if (path) {
         free(path);
     }
@@ -1557,6 +1604,8 @@ TEST_F(UniqueTest, DoUniqueLotsDataPaddingFunction)
 
     unique->UnInitialize();
     ASSERT_EQ(unique->DoEnhancedUnique(uniqueIn, uniqueOut), 11);
+    delete[] idCnt;
+    delete[] uniqueIdInBucket;
     if (path) {
         free(path);
     }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 84505d15..a5cd76da 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -56,7 +56,7 @@ else ()
     message("==EASY_PROFILER_FOUND===")
     ADD_DEFINITIONS(-DBUILD_WITH_EASY_PROFILER)
 endif ()
-set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -ffunction-sections -O0 -Wall -g2 -ggdb")
+set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -ffunction-sections -O0 -Wall -g2 -ggdb -fsanitize=address -fsanitize-recover=address,all -fno-omit-frame-pointer -fno-stack-protector")
 set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -ffunction-sections -O3 -Wfatal-errors -DNDEBUG -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -s")
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack")
 
diff --git a/src/test_ut.sh b/src/test_ut.sh
index 6146aaab..7305c081 100644
--- a/src/test_ut.sh
+++ b/src/test_ut.sh
@@ -129,6 +129,9 @@ mkdir build
 cd build
 
 python_path="$(dirname "$(dirname "$(which python3.7)")")"
+# config asan environment variable
+export ASAN_OPTIONS=halt_on_error=1:detect_leaks=1:fast_unwind_on_malloc=0
+export LSAN_OPTIONS=suppressions=../tests/leaks.supp
 
 cmake -DCMAKE_BUILD_TYPE=Debug \
     -DTF_PATH="${python_path}"/lib/python3.7/site-packages/"${TF_DIR}" \
diff --git a/src/tests/emb_table/embedding_ddr_test.cpp b/src/tests/emb_table/embedding_ddr_test.cpp
index ddad3905..60ec5af6 100644
--- a/src/tests/emb_table/embedding_ddr_test.cpp
+++ b/src/tests/emb_table/embedding_ddr_test.cpp
@@ -35,7 +35,7 @@ protected:
     EmbeddingDDRTest()
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
-        std::vector<size_t> vocabsize = {100};
+        std::vector<size_t> vocabsize = {100, 100, 100};
         vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
diff --git a/src/tests/emb_table/embedding_mgmt_test.cpp b/src/tests/emb_table/embedding_mgmt_test.cpp
index 49f10b4f..055cf5c5 100644
--- a/src/tests/emb_table/embedding_mgmt_test.cpp
+++ b/src/tests/emb_table/embedding_mgmt_test.cpp
@@ -35,7 +35,7 @@ protected:
     EmbeddingMgmtTest()
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
-        std::vector<size_t> vocabsize = {100};
+        std::vector<size_t> vocabsize = {100, 100, 100};
         vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
diff --git a/src/tests/emb_table/embedding_static_test.cpp b/src/tests/emb_table/embedding_static_test.cpp
index c8a5e252..9e250f64 100644
--- a/src/tests/emb_table/embedding_static_test.cpp
+++ b/src/tests/emb_table/embedding_static_test.cpp
@@ -34,7 +34,7 @@ protected:
     EmbeddingStaticTest()
     {
         struct EmbInfoParams embParam(string("test1"), 0, 1000, 2000, true, true);
-        std::vector<size_t> vocabsize = {100};
+        std::vector<size_t> vocabsize = {100, 100, 100};
         vector<EmbCache::InitializerInfo> initializeInfos = {};
         std::vector<std::string> ssdDataPath = {""};
         vector<int> maxStep = {1000};
diff --git a/src/tests/file_system/hdfs_file_system_test.cpp b/src/tests/file_system/hdfs_file_system_test.cpp
index 0d469ca5..98f733f0 100644
--- a/src/tests/file_system/hdfs_file_system_test.cpp
+++ b/src/tests/file_system/hdfs_file_system_test.cpp
@@ -26,10 +26,10 @@ using namespace emock;
 
 void MockHdfs()
 {
+    EMOCK(&HdfsWrapper::LoadHdfsLib).stubs().will(ignoreReturnValue());
     hdfsFS ConnectFs;
     hdfsFile hdfsFileHandler;
     hdfsFileInfo* fileInfo;
-    EMOCK(&HdfsWrapper::LoadHdfsLib).stubs().will(ignoreReturnValue());
     EMOCK(&HdfsWrapper::CloseHdfsLib).stubs().will(ignoreReturnValue());
     EMOCK(&HdfsWrapper::Connect).stubs().will(returnValue(ConnectFs));
     EMOCK(&HdfsWrapper::Disconnect).stubs().will(returnValue(1));
@@ -75,8 +75,8 @@ TEST_F(HdfsFileSystemTest, CreateDirFailed)
 
 TEST_F(HdfsFileSystemTest, GetFileSize)
 {
-    hdfsFileInfo* fileInfo;
-    EMOCK(&HdfsWrapper::GetPathInfo).stubs().will(returnValue(fileInfo));
+    std::unique_ptr<hdfsFileInfo> fileInfo = std::make_unique<hdfsFileInfo>();
+    EMOCK(&HdfsWrapper::GetPathInfo).stubs().will(returnValue(fileInfo.get()));
     string filePath = "hdfs://master:9000/test_dir/";
     auto fileSystemHandler = make_unique<FileSystemHandler>();
     auto fileSystemPtr = fileSystemHandler->Create(filePath);
diff --git a/src/tests/leaks.supp b/src/tests/leaks.supp
new file mode 100644
index 00000000..ebe0718d
--- /dev/null
+++ b/src/tests/leaks.supp
@@ -0,0 +1,21 @@
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# There are known leaks.
+# 1.known mpi leaks.
+leak:libmpi.so*
+leak:libopen-pal.so*
+leak:libpmix.so*
+leak:libc.so*
\ No newline at end of file
-- 
Gitee


From 74be454e15670afd84926c576d9ffc182b38fb6a Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Mon, 3 Jun 2024 02:08:02 +0000
Subject: [PATCH 185/302] =?UTF-8?q?!169=20=E4=BF=AE=E5=A4=8D=EF=BC=88embCa?=
 =?UTF-8?q?che=EF=BC=89=EF=BC=9Asave=E5=BC=82=E5=B8=B8=E9=80=80=E5=87=BA?=
 =?UTF-8?q?=E5=9C=BA=E6=99=AF=E9=98=BB=E5=A1=9E=E5=9C=A8EvalTask=20*=20?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=EF=BC=88embCache=EF=BC=89=EF=BC=9Asave?=
 =?UTF-8?q?=E5=BC=82=E5=B8=B8=E9=80=80=E5=87=BA=E5=9C=BA=E6=99=AF=E9=98=BB?=
 =?UTF-8?q?=E5=A1=9E=E5=9C=A8EvalTask?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 6b998205..4c64c2ec 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -424,6 +424,7 @@ void HybridMgmt::Destroy()
             cvLastRecvFinishMap[embInfo.name][index].notify_all();
         }
     }
+    cvCheckSave.notify_all();  // 防止save异常退出场景阻塞在EvalTask
 
     {
         // 获取锁 避免KeyProcess中手动发送结束信息时通道关闭
-- 
Gitee


From 5935823be7393f2803280a7f17414ed52d76e133 Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Mon, 3 Jun 2024 02:16:45 +0000
Subject: [PATCH 186/302] =?UTF-8?q?!166=20=E7=89=B9=E6=80=A7=EF=BC=88embCa?=
 =?UTF-8?q?che=EF=BC=89=EF=BC=9A=E4=BF=AE=E5=A4=8D=E9=9D=99=E6=80=81shape?=
 =?UTF-8?q?=20gather=E8=B6=8A=E7=95=8C=E9=97=AE=E9=A2=98=20*=20=E7=89=B9?=
 =?UTF-8?q?=E6=80=A7=EF=BC=88embCache=EF=BC=89=EF=BC=9A=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?=E9=9D=99=E6=80=81shape=20gather=E8=B6=8A=E7=95=8C=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 6 ++++++
 src/core/utils/common.h              | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 4c64c2ec..b318f2d4 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -2047,9 +2047,15 @@ void HybridMgmt::SendRestoreVec(const EmbBaseInfo &info, bool &remainBatchOut)
 void HybridMgmt::SendLookupOffsets(const EmbBaseInfo &info,
                                    vector<uint64_t> &uniqueKeys, vector<int32_t> &restoreVecSec)
 {
+    // uniqueKeys already transfer to offset in GetSwapPairsAndKey2Offset
+    // graph will filter out invalid offset(-1). see function _set_specific_value_for_non_valid_key
     TimeCost sendLookupOffsetsTC;
     std::vector<uint64_t> lookupOffsets;
     for (const auto &index : restoreVecSec) {
+        if (index == INVALID_INDEX_VALUE) {
+            lookupOffsets.emplace_back(static_cast<uint64_t>(INVALID_KEY_VALUE));
+            continue;
+        }
         lookupOffsets.emplace_back(uniqueKeys[index]);
     }
     hdTransfer->Send(TransferChannel::LOOKUP, { Vec2TensorI32(lookupOffsets) }, info.channelId, info.name);
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 5bb93a41..75837349 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -102,8 +102,8 @@ namespace MxRec {
     constexpr int EOS_TIMEOUT = 30;
 
     constexpr size_t DEFAULT_RANDOM_SEED = 10086;
-    // constexpr int INVALID_KEY_VALUE = -1;
     constexpr int64_t INVALID_KEY_VALUE = -1;
+    constexpr int32_t INVALID_INDEX_VALUE = -1;
     constexpr int ALLTOALLVC_ALIGN = 128;
     constexpr int PROFILING_START_BATCH_ID = 100;
     constexpr int PROFILING_END_BATCH_ID = 200;
-- 
Gitee


From 1a71bb5240518635ba17278faf1c3e712b06b957 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 3 Jun 2024 03:38:00 +0000
Subject: [PATCH 187/302] =?UTF-8?q?!159=20dlrm=20sess=E9=80=82=E9=85=8Dlaz?=
 =?UTF-8?q?y=5Fadam=E4=BC=98=E5=8C=96=E5=99=A8=20*=20=E4=BD=BF=E7=94=A8has?=
 =?UTF-8?q?attr=E5=88=A4=E6=96=AD=E4=BC=98=E5=8C=96=E5=99=A8=20*=20dlrm=20?=
 =?UTF-8?q?sess=E9=80=82=E9=85=8Dlazy=5Fadam=E4=BC=98=E5=8C=96=E5=99=A8--?=
 =?UTF-8?q?=E6=A3=80=E8=A7=86=E4=BF=AE=E6=94=B9=20*=20dlrm=20sess=E9=80=82?=
 =?UTF-8?q?=E9=85=8Dlazy=5Fadam=E4=BC=98=E5=8C=96=E5=99=A8--=E6=A3=80?=
 =?UTF-8?q?=E8=A7=86=E4=BF=AE=E6=94=B9=20*=20dlrm=20sess=E9=80=82=E9=85=8D?=
 =?UTF-8?q?lazy=5Fadam=E4=BC=98=E5=8C=96=E5=99=A8--=E6=A3=80=E8=A7=86?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=20*=20dlrm=20sess=E9=80=82=E9=85=8Dlazy=5Fad?=
 =?UTF-8?q?am=E4=BC=98=E5=8C=96=E5=99=A8=20*=20dlrm=20sess=E9=80=82?=
 =?UTF-8?q?=E9=85=8Dlazy=5Fadam=E4=BC=98=E5=8C=96=E5=99=A8-=E9=97=A8?=
 =?UTF-8?q?=E7=A6=81=E6=89=AB=E6=8F=8F=E4=BF=AE=E6=94=B9=20*=20dlrm=20sess?=
 =?UTF-8?q?=E9=80=82=E9=85=8Dlazy=5Fadam=E4=BC=98=E5=8C=96=E5=99=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/dlrm/model/config.py           |  2 ++
 examples/dlrm/model/delay_loss_scale.py | 30 +++++++++++++++++++------
 examples/dlrm/model/optimizer.py        | 24 +++++++++++++++-----
 3 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/examples/dlrm/model/config.py b/examples/dlrm/model/config.py
index 45e8af40..78115d61 100644
--- a/examples/dlrm/model/config.py
+++ b/examples/dlrm/model/config.py
@@ -128,6 +128,8 @@ class Config:
         self.hashtable_threshold = 1
 
         self.USE_PIPELINE_TEST = False
+        # False indicates use SGD optimizer, else use LazyAdam. If True, is incompatible with dynamic_expansion
+        self.use_lazy_adam_optimizer = False
 
         # 动态学习率
         GLOBAL_BATCH_SIZE = 8192 * 8
diff --git a/examples/dlrm/model/delay_loss_scale.py b/examples/dlrm/model/delay_loss_scale.py
index 0cb50688..01bb0d8f 100644
--- a/examples/dlrm/model/delay_loss_scale.py
+++ b/examples/dlrm/model/delay_loss_scale.py
@@ -17,32 +17,48 @@
 import tensorflow as tf
 from tensorflow.python.training import optimizer
 
+from config import Config
+
 
 class DenseLossScaleOptimizer:
-    def __init__(self, opt, loss_scale):
+    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
         if not isinstance(opt, optimizer.Optimizer):
             raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
         self._optimizer = opt
         self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        self._optimizer._learning_rate = self._optimizer._learning_rate / self._loss_scale
+        _update_lr_loss_scale(self._optimizer, loss_scale)
 
     def compute_gradients(self, loss, var_list=None):
-        return self._optimizer.compute_gradients(loss*self._loss_scale, var_list=var_list)
+        return self._optimizer.compute_gradients(loss * self._loss_scale, var_list=var_list)
 
     def apply_gradients(self, avg_grads):
         return self._optimizer.apply_gradients(avg_grads)
 
 
 class SparseLossScaleOptimizer:
-    def __init__(self, opt, loss_scale):
+    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
         if not isinstance(opt, optimizer.Optimizer):
             raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
         self._optimizer = opt
         self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        self._optimizer._learning_rate = self._optimizer._learning_rate / self._loss_scale
+        _update_lr_loss_scale(self._optimizer, loss_scale)
 
     def compute_gradients(self, loss, var_list=None):
-        return tf.gradients(loss*self._loss_scale, var_list)
+        return tf.gradients(loss * self._loss_scale, var_list)
 
     def apply_gradients(self, grads_and_vars):
-        return self._optimizer.apply_gradients(grads_and_vars)
\ No newline at end of file
+        return self._optimizer.apply_gradients(grads_and_vars)
+
+
+def _update_lr_loss_scale(opt, loss_scale):
+    if loss_scale <= 0:
+        raise RuntimeError("the loss_scale must be greater than zero.")
+    loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
+    if hasattr(opt, "_lr"):
+        # LazyAdam or Adam optimizer
+        opt._lr = opt._lr / loss_scale
+    elif hasattr(opt, "_learning_rate"):
+        # SGD optimizer
+        opt._learning_rate = opt._learning_rate / loss_scale
+    else:
+        raise RuntimeError("`opt` should have a `_learning_rate` or `_lr` named field.")
diff --git a/examples/dlrm/model/optimizer.py b/examples/dlrm/model/optimizer.py
index 7a6d6878..18dbe288 100644
--- a/examples/dlrm/model/optimizer.py
+++ b/examples/dlrm/model/optimizer.py
@@ -15,20 +15,32 @@
 # ==============================================================================
 
 import tensorflow as tf
+
 from delay_loss_scale import DenseLossScaleOptimizer, SparseLossScaleOptimizer
 from gradient_descent_w import create_hash_optimizer
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.optimizers.gradient_descent_by_addr import create_hash_optimizer_by_addr
+from mx_rec.optimizers import lazy_adam
 
 
 def get_dense_and_sparse_optimizer(cfg):
-    dense_optimizer = tf.train.GradientDescentOptimizer(learning_rate=cfg.learning_rate[0])
     use_dynamic_expansion = ConfigInitializer.get_instance().use_dynamic_expansion
-    if use_dynamic_expansion:
-        sparse_optimizer = create_hash_optimizer_by_addr(learning_rate=cfg.learning_rate[1], weight_decay=0.0001)
+    if cfg.use_lazy_adam_optimizer:
+        if use_dynamic_expansion:
+            raise RuntimeError("model is incompatible with dynamic_expansion when use lazy_adam optimizer.")
+        # use lazy_adam optimizer
+        dense_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=cfg.learning_rate[0])
+        sparse_optimizer = lazy_adam.create_hash_optimizer(learning_rate=cfg.learning_rate[1])
+        loss_scale = 65536
     else:
-        sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1], weight_decay=0.0001)
-    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, 1024)
-    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, 1024)
+        # use SGD optimizer
+        dense_optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=cfg.learning_rate[0])
+        if use_dynamic_expansion:
+            sparse_optimizer = create_hash_optimizer_by_addr(learning_rate=cfg.learning_rate[1], weight_decay=0.0001)
+        else:
+            sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1], weight_decay=0.0001)
+        loss_scale = 1024
+    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, loss_scale)
+    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, loss_scale)
 
     return dense_optimizer, sparse_optimizer
-- 
Gitee


From dd3deb69d0b6ef9fd0d7c638100fa8347d896c9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Mon, 3 Jun 2024 14:47:32 +0800
Subject: [PATCH 188/302] =?UTF-8?q?WideDeep=E6=A8=A1=E5=9E=8B=E8=BF=81?=
 =?UTF-8?q?=E7=A7=BB=20=E5=8E=9F=E5=A7=8Bdlrm=E6=A8=A1=E5=9E=8B=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/model/config.py             | 241 +++++++++
 examples/WideDeep/model/delay_loss_scale.py   |  64 +++
 examples/WideDeep/model/gradient_descent_w.py |  71 +++
 examples/WideDeep/model/main_mxrec.py         | 469 ++++++++++++++++++
 examples/WideDeep/model/mean_auc.py           |  40 ++
 examples/WideDeep/model/model.py              |  94 ++++
 examples/WideDeep/model/op_impl_mode.ini      |   1 +
 examples/WideDeep/model/optimizer.py          |  46 ++
 examples/WideDeep/model/run.sh                |  99 ++++
 9 files changed, 1125 insertions(+)
 create mode 100644 examples/WideDeep/model/config.py
 create mode 100644 examples/WideDeep/model/delay_loss_scale.py
 create mode 100644 examples/WideDeep/model/gradient_descent_w.py
 create mode 100644 examples/WideDeep/model/main_mxrec.py
 create mode 100644 examples/WideDeep/model/mean_auc.py
 create mode 100644 examples/WideDeep/model/model.py
 create mode 100644 examples/WideDeep/model/op_impl_mode.ini
 create mode 100644 examples/WideDeep/model/optimizer.py
 create mode 100644 examples/WideDeep/model/run.sh

diff --git a/examples/WideDeep/model/config.py b/examples/WideDeep/model/config.py
new file mode 100644
index 00000000..78115d61
--- /dev/null
+++ b/examples/WideDeep/model/config.py
@@ -0,0 +1,241 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import enum
+import os
+
+import tensorflow as tf
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+from npu_bridge.estimator.npu.npu_config import NPURunConfig
+
+SSD_DATA_PATH = ["ssd_data"]
+
+
+class LearningRateScheduler:
+    """
+    LR Scheduler combining Polynomial Decay with Warmup at the beginning.
+    TF-based cond operations necessary for performance in graph mode.
+    """
+
+    def __init__(self, base_lr_dense, base_lr_sparse, warmup_steps, decay_start_step, decay_steps):
+        self.warmup_steps = tf.constant(warmup_steps, dtype=tf.int32)
+        self.decay_start_step = tf.constant(decay_start_step, dtype=tf.int32)
+        self.decay_steps = tf.constant(decay_steps)
+        self.decay_end_step = decay_start_step + decay_steps  # 65041
+        self.poly_power = 2.0
+        self.base_lr_dense = base_lr_dense
+        self.base_lr_sparse = base_lr_sparse
+
+    def calc(self, global_step):
+        # used for the warmup stage
+        warmup_step = tf.cast(1 / self.warmup_steps, tf.float32)
+        lr_factor_warmup = 1 - tf.cast(self.warmup_steps - global_step, tf.float32) * warmup_step
+        lr_factor_warmup = tf.cast(lr_factor_warmup, tf.float32)
+        # used for the constant stage
+        lr_factor_constant = tf.cast(1.0, tf.float32)
+
+        # used for the decay stage
+        lr_factor_decay = (self.decay_end_step - global_step) / self.decay_steps
+        lr_factor_decay = tf.math.pow(lr_factor_decay, self.poly_power)
+        lr_factor_decay = tf.cast(lr_factor_decay, tf.float32)
+        sparse_after_decay = tf.cast(1 / self.decay_steps, tf.float32)
+
+        lr_factor_decay_sparse = tf.cond(
+            global_step < self.decay_end_step,
+            lambda: lr_factor_decay,
+            lambda: sparse_after_decay,
+        )
+
+        lr_factor_decay_dense = tf.cond(
+            global_step < self.decay_end_step,
+            lambda: lr_factor_decay,
+            lambda: sparse_after_decay,
+        )
+
+        poly_schedule_sparse = tf.cond(
+            global_step < self.decay_start_step,
+            lambda: lr_factor_constant,
+            lambda: lr_factor_decay_sparse,
+        )
+
+        poly_schedule_dense = tf.cond(
+            global_step < self.decay_start_step,
+            lambda: lr_factor_constant,
+            lambda: lr_factor_decay_dense,
+        )
+
+        lr_factor_sparse = tf.cond(
+            global_step < self.warmup_steps, lambda: lr_factor_warmup, lambda: poly_schedule_sparse
+        )
+
+        lr_factor_dense = tf.cond(
+            global_step < self.warmup_steps, lambda: lr_factor_warmup, lambda: poly_schedule_dense
+        )
+
+        lr_sparse = self.base_lr_sparse * lr_factor_sparse
+        lr_dense = self.base_lr_dense * lr_factor_dense
+        return lr_dense, lr_sparse
+
+
+class CacheModeEnum(enum.Enum):
+    HBM = "HBM"
+    DDR = "DDR"
+    SSD = "SSD"
+
+
+class Config:
+    def __init__(self, ):
+        self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
+        tmp = os.getenv("TRAIN_RANK_SIZE")
+        if tmp is None:
+            raise ValueError("please export TRAIN_RANK_SIZE")
+        self.rank_size = int(tmp)
+
+        self.data_path = os.getenv("DLRM_CRITEO_DATA_PATH")
+        self.train_file_pattern = "train"
+        self.test_file_pattern = "test"
+
+        self.batch_size = 8192
+        self.line_per_sample = 1024
+        self.train_epoch = 3
+        self.test_epoch = 1
+        self.perform_shuffle = False
+
+        self.key_type = tf.int64
+        self.label_type = tf.float32
+        self.value_type = tf.int64
+
+        self.feat_cnt = 26
+        self.__set_emb_table_size()
+
+        self.field_num = 26
+        self.send_count = 46000 // self.rank_size
+
+        self.emb_dim = 128
+        self.hashtable_threshold = 1
+
+        self.USE_PIPELINE_TEST = False
+        # False indicates use SGD optimizer, else use LazyAdam. If True, is incompatible with dynamic_expansion
+        self.use_lazy_adam_optimizer = False
+
+        # 动态学习率
+        GLOBAL_BATCH_SIZE = 8192 * 8
+        LR_SCHEDULE_STEPS = [
+            int(2750 * 55296 / GLOBAL_BATCH_SIZE),
+            int(49315 * 55296 / GLOBAL_BATCH_SIZE),
+            int(27772 * 55296 / GLOBAL_BATCH_SIZE),
+        ]
+        self.global_step = tf.Variable(0, trainable=False)
+        _lr_scheduler = LearningRateScheduler(
+            28.443,
+            33.71193,
+            LR_SCHEDULE_STEPS[0],
+            LR_SCHEDULE_STEPS[1],
+            LR_SCHEDULE_STEPS[2],
+        )
+        self.learning_rate = _lr_scheduler.calc(self.global_step)
+
+    def __set_emb_table_size(self):
+        self.cache_mode = os.getenv("CACHE_MODE")
+        if self.cache_mode is None:
+            raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
+
+        if self.cache_mode == CacheModeEnum.HBM.value:
+            self.dev_vocab_size = 24_000_000 * self.rank_size
+            self.host_vocab_size = 0
+        elif self.cache_mode == CacheModeEnum.DDR.value:
+            self.dev_vocab_size = 500_000 * self.rank_size
+            self.host_vocab_size = 24_000_000 * self.rank_size
+        elif self.cache_mode == CacheModeEnum.SSD.value:
+            self.dev_vocab_size = 100_000 * self.rank_size
+            self.host_vocab_size = 2_000_000 * self.rank_size
+            self.ssd_vocab_size = 24_000_000 * self.rank_size
+        else:
+            raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
+
+    def get_emb_table_cfg(self):
+        if self.cache_mode == CacheModeEnum.HBM.value:
+            return {"device_vocabulary_size": self.dev_vocab_size}
+        elif self.cache_mode == CacheModeEnum.DDR.value:
+            return {"device_vocabulary_size": self.dev_vocab_size,
+                    "host_vocabulary_size": self.host_vocab_size}
+        elif self.cache_mode == CacheModeEnum.SSD.value:
+            return {"device_vocabulary_size": self.dev_vocab_size,
+                    "host_vocabulary_size": self.host_vocab_size,
+                    "ssd_vocabulary_size": self.ssd_vocab_size,
+                    "ssd_data_path": SSD_DATA_PATH}
+        else:
+            raise RuntimeError(f"get CACHE_MODE:{self.cache_mode}, check Config.__set_emb_table_size implementation")
+
+
+def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2"):
+    session_config = tf.ConfigProto(allow_soft_placement=False,
+                                    log_device_placement=False)
+    session_config.gpu_options.allow_growth = True
+    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    custom_op.parameter_map["mix_compile_mode"].b = False
+    custom_op.parameter_map["use_off_line"].b = True
+    custom_op.parameter_map["min_group_size"].b = 1
+    # 可选配置level0:pairwise;level1:pairwise
+    custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:fullmesh;level1:fullmesh")
+    custom_op.parameter_map["enable_data_pre_proc"].b = True
+    custom_op.parameter_map["iterations_per_loop"].i = 10
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    custom_op.parameter_map["hcom_parallel"].b = False
+    custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes("op_impl_mode.ini")
+    custom_op.parameter_map["op_execute_timeout"].i = 2000
+    custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(
+        str(13 * 1024 * 1024 * 1024))  # total 31 need 13;
+    custom_op.parameter_map["graph_memory_max_size"].s = tf.compat.as_bytes(str(18 * 1024 * 1024 * 1024))  # need 25
+    custom_op.parameter_map["stream_max_parallel_num"].s = tf.compat.as_bytes("DNN_VM_AICPU:3,AIcoreEngine:3")
+
+    if dump_data:
+        custom_op.parameter_map["enable_dump"].b = True
+        custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(dump_path)
+        custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(dump_steps)
+        custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+
+    session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+
+    return session_config
+
+
+def get_npu_run_config():
+    session_config = tf.ConfigProto(allow_soft_placement=False,
+                                    log_device_placement=False)
+
+    session_config.gpu_options.allow_growth = True
+    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+
+    run_config = NPURunConfig(
+        save_summary_steps=1000,
+        save_checkpoints_steps=100,
+        keep_checkpoint_max=5,
+        session_config=session_config,
+        log_step_count_steps=20,
+        precision_mode='allow_mix_precision',
+        enable_data_pre_proc=True,
+        iterations_per_loop=1,
+        jit_compile=False,
+        op_compiler_cache_mode="enable",
+        HCCL_algorithm="level0:fullmesh;level1:fullmesh"  # 可选配置：level0:pairwise;level1:pairwise
+    )
+    return run_config
diff --git a/examples/WideDeep/model/delay_loss_scale.py b/examples/WideDeep/model/delay_loss_scale.py
new file mode 100644
index 00000000..01bb0d8f
--- /dev/null
+++ b/examples/WideDeep/model/delay_loss_scale.py
@@ -0,0 +1,64 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import tensorflow as tf
+from tensorflow.python.training import optimizer
+
+from config import Config
+
+
+class DenseLossScaleOptimizer:
+    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
+        if not isinstance(opt, optimizer.Optimizer):
+            raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
+        self._optimizer = opt
+        self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
+        _update_lr_loss_scale(self._optimizer, loss_scale)
+
+    def compute_gradients(self, loss, var_list=None):
+        return self._optimizer.compute_gradients(loss * self._loss_scale, var_list=var_list)
+
+    def apply_gradients(self, avg_grads):
+        return self._optimizer.apply_gradients(avg_grads)
+
+
+class SparseLossScaleOptimizer:
+    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
+        if not isinstance(opt, optimizer.Optimizer):
+            raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
+        self._optimizer = opt
+        self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
+        _update_lr_loss_scale(self._optimizer, loss_scale)
+
+    def compute_gradients(self, loss, var_list=None):
+        return tf.gradients(loss * self._loss_scale, var_list)
+
+    def apply_gradients(self, grads_and_vars):
+        return self._optimizer.apply_gradients(grads_and_vars)
+
+
+def _update_lr_loss_scale(opt, loss_scale):
+    if loss_scale <= 0:
+        raise RuntimeError("the loss_scale must be greater than zero.")
+    loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
+    if hasattr(opt, "_lr"):
+        # LazyAdam or Adam optimizer
+        opt._lr = opt._lr / loss_scale
+    elif hasattr(opt, "_learning_rate"):
+        # SGD optimizer
+        opt._learning_rate = opt._learning_rate / loss_scale
+    else:
+        raise RuntimeError("`opt` should have a `_learning_rate` or `_lr` named field.")
diff --git a/examples/WideDeep/model/gradient_descent_w.py b/examples/WideDeep/model/gradient_descent_w.py
new file mode 100644
index 00000000..53adb996
--- /dev/null
+++ b/examples/WideDeep/model/gradient_descent_w.py
@@ -0,0 +1,71 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from collections import defaultdict
+
+import tensorflow as tf
+from tensorflow.python.ops import math_ops
+from tensorflow.python.training import gradient_descent
+from mx_rec.optimizers.base import CustomizedOptimizer
+from mx_rec.util.log import logger
+from mx_rec.util.initialize import ConfigInitializer
+
+
+def create_hash_optimizer(learning_rate, weight_decay=0.0001, use_locking=False, name="GradientDescent"):
+    optimizer = CustomizedGradientDescentWithWeighDecay(learning_rate=learning_rate,
+                                                        weight_decay=weight_decay,
+                                                        use_locking=use_locking,
+                                                        name=name)
+    ConfigInitializer.get_instance().optimizer_config.optimizer_instance = optimizer
+    return optimizer
+
+
+class CustomizedGradientDescentWithWeighDecay(gradient_descent.GradientDescentOptimizer, CustomizedOptimizer):
+    name_counter = defaultdict(int)
+
+    def __init__(self, learning_rate, weight_decay, use_locking=False, name="GradientDescent"):
+        self.optimizer_type = "gradient_descent_with_weight_decay"
+        self.weight_decay = weight_decay
+        super(CustomizedGradientDescentWithWeighDecay, self)._get_name(name=name)
+        super(CustomizedGradientDescentWithWeighDecay, self).__init__(
+            learning_rate=learning_rate, use_locking=use_locking, name=self.unique_name
+        )
+        self._slot_num = 0
+        self._derivative = 1
+
+    def get_slot_init_values(self):
+        logger.info("no slot for gradient descent")
+        return []
+
+    def _apply_sparse_duplicate_indices(self, grad, var):
+        logger.debug(">>>> Enter _apply_sparse_duplicate_indices")
+        nd_indices = tf.expand_dims(grad.indices, 1)
+        logger.info(f"weigh_decay={self.weight_decay}")
+        if self.weight_decay is None:
+            nd_value = grad.values * math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
+        else:
+            nd_value = (grad.values + math_ops.cast(self.weight_decay, var.dtype.base_dtype) *
+                        tf.gather(var, grad.indices)) * math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
+        var_update_op = tf.scatter_nd_add(var, nd_indices, -nd_value, use_locking=self._use_locking)
+        return var_update_op
+
+    def _apply_dense(self, grad, var):
+        logger.debug(">>>> Enter _apply_dense")
+        raise NotImplementedError("You are using a wrong type of variable.")
diff --git a/examples/WideDeep/model/main_mxrec.py b/examples/WideDeep/model/main_mxrec.py
new file mode 100644
index 00000000..51ed7c4a
--- /dev/null
+++ b/examples/WideDeep/model/main_mxrec.py
@@ -0,0 +1,469 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import shutil
+import time
+import warnings
+import random
+from glob import glob
+
+import tensorflow as tf
+from sklearn.metrics import roc_auc_score
+import numpy as np
+
+from optimizer import get_dense_and_sparse_optimizer
+from config import sess_config, Config, SSD_DATA_PATH, CacheModeEnum
+from model import MyModel
+from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
+from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
+from mx_rec.core.asc.manager import start_asc_pipeline
+from mx_rec.core.embedding import create_table, sparse_lookup
+from mx_rec.core.feature_process import EvictHook
+from mx_rec.graph.modifier import modify_graph_and_start_emb_cache, GraphModifierHook
+from mx_rec.constants.constants import ASCEND_TIMESTAMP
+from mx_rec.util.initialize import ConfigInitializer, init, terminate_config_initializer
+from mx_rec.util.ops import import_host_pipeline_ops
+import mx_rec.util as mxrec_util
+from mx_rec.util.variable import get_dense_and_sparse_variable
+from mx_rec.util.log import logger
+from npu_bridge.npu_init import *
+
+npu_plugin.set_device_sat_mode(0)
+
+dense_hashtable_seed = 128
+sparse_hashtable_seed = 128
+shuffle_seed = 128
+random.seed(shuffle_seed)
+
+
+def add_timestamp_func(batch):
+    timestamp = import_host_pipeline_ops().return_timestamp(tf.cast(batch['label'], dtype=tf.int64))
+    # tf.constant(np.random.randint(1,1688109060,1)), tf.int64))
+    batch["timestamp"] = timestamp
+    return batch
+
+
+def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph, is_use_faae=False):
+    if config.USE_PIPELINE_TEST:
+        num_parallel = 1
+    else:
+        num_parallel = 8
+
+    def extract_fn(data_record):
+        features = {
+            # Extract features using the keys set during creation
+            'label': tf.compat.v1.FixedLenFeature(shape=(config.line_per_sample,), dtype=tf.int64),
+            'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(26 * config.line_per_sample,), dtype=tf.int64),
+            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * config.line_per_sample,), dtype=tf.float32),
+        }
+        sample = tf.compat.v1.parse_single_example(data_record, features)
+        return sample
+
+    def reshape_fn(batch):
+        batch['label'] = tf.reshape(batch['label'], [-1, 1])
+        batch['dense_feature'] = tf.reshape(batch['dense_feature'], [-1, 13])
+        batch['dense_feature'] = tf.math.log(batch['dense_feature'] + 3.0)
+        batch['sparse_feature'] = tf.reshape(batch['sparse_feature'], [-1, 26])
+        return batch
+
+    if is_training:
+        files_list = glob(os.path.join(config.data_path, config.train_file_pattern) + '/*.tfrecord')
+    else:
+        files_list = glob(os.path.join(config.data_path, config.test_file_pattern) + '/*.tfrecord')
+    dataset = tf.data.TFRecordDataset(files_list, num_parallel_reads=num_parallel)
+    batch_size = config.batch_size // config.line_per_sample
+
+    dataset = dataset.shard(config.rank_size, config.rank_id)
+    if is_training:
+        dataset = dataset.shuffle(batch_size * 1000, seed=shuffle_seed)
+    if is_training:
+        dataset = dataset.repeat(config.train_epoch)
+    else:
+        dataset = dataset.repeat(config.test_epoch)
+    dataset = dataset.map(extract_fn, num_parallel_calls=num_parallel).batch(batch_size,
+                                                                             drop_remainder=True)
+    dataset = dataset.map(reshape_fn, num_parallel_calls=num_parallel)
+    if is_use_faae:
+        dataset = dataset.map(add_timestamp_func)
+
+    if not MODIFY_GRAPH_FLAG:
+        insert_fn = get_asc_insert_func(tgt_key_specs=feature_spec_list, is_training=is_training, dump_graph=dump_graph)
+        dataset = dataset.map(insert_fn)
+
+    dataset = dataset.prefetch(100)
+
+    iterator = dataset.make_initializable_iterator()
+    batch = iterator.get_next()
+    return batch, iterator
+
+
+def model_forward(feature_list, hash_table_list, batch, is_train, modify_graph):
+    embedding_list = []
+    logger.debug(f"In model_forward function, is_train: {is_train}, feature_list: {len(feature_list)}, "
+                 f"hash_table_list: {len(hash_table_list)}")
+    for feature, hash_table in zip(feature_list, hash_table_list):
+        if MODIFY_GRAPH_FLAG:
+            feature = batch["sparse_feature"]
+        embedding = sparse_lookup(hash_table, feature, cfg.send_count, dim=None, is_train=is_train,
+                                  name="user_embedding_lookup", modify_graph=modify_graph, batch=batch,
+                                  access_and_evict_config=None)
+        embedding_list.append(embedding)
+
+    if len(embedding_list) == 1:
+        emb = embedding_list[0]
+    elif len(embedding_list) > 1:
+        emb = tf.reduce_sum(embedding_list, axis=0, keepdims=False)
+    else:
+        raise ValueError("the length of embedding_list must be greater than or equal to 1.")
+    my_model = MyModel()
+    model_output = my_model.build_model(embedding=emb,
+                                        dense_feature=batch["dense_feature"],
+                                        label=batch["label"],
+                                        is_training=is_train,
+                                        seed=dense_hashtable_seed)
+    return model_output
+
+
+def evaluate():
+    print("read_test dataset")
+    if not MODIFY_GRAPH_FLAG:
+        eval_label = eval_model.get("label")
+        sess.run([eval_iterator.initializer])
+    else:
+        # 在sess run模式下，若还是使用原来batch中的label去sess run，则会出现getnext超时报错，需要使用新数据集中的batch
+        eval_label = ConfigInitializer.get_instance().train_params_config.get_target_batch(False).get("label")
+        sess.run([ConfigInitializer.get_instance().train_params_config.get_initializer(False)])
+    log_loss_list = []
+    pred_list = []
+    label_list = []
+    eval_current_steps = 0
+    finished = False
+    print("eval begin")
+
+    while not finished:
+        try:
+            eval_current_steps += 1
+            eval_start = time.time()
+            eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_label])
+            eval_cost = time.time() - eval_start
+            qps_eval = (1 / eval_cost) * rank_size * cfg.batch_size
+            log_loss_list += list(eval_loss.reshape(-1))
+            pred_list += list(pred.reshape(-1))
+            label_list += list(label.reshape(-1))
+            print(f"eval current_steps: {eval_current_steps}, qps: {qps_eval}")
+            if eval_current_steps == eval_steps:
+                finished = True
+        except tf.errors.OutOfRangeError:
+            finished = True
+    auc = roc_auc_score(label_list, pred_list)
+    mean_log_loss = np.mean(log_loss_list)
+    return auc, mean_log_loss
+
+
+def evaluate_fix(step):
+    print("read_test dataset evaluate_fix")
+    if not MODIFY_GRAPH_FLAG:
+        sess.run([eval_iterator.initializer])
+    else:
+        sess.run([ConfigInitializer.get_instance().train_params_config.get_initializer(False)])
+    log_loss_list = []
+    pred_list = []
+    label_list = []
+    eval_current_steps = 0
+    finished = False
+    print("eval begin")
+    while not finished:
+        try:
+            eval_current_steps += 1
+            eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_model.get("label")])
+            log_loss_list += list(eval_loss.reshape(-1))
+            pred_list += list(pred.reshape(-1))
+            label_list += list(label.reshape(-1))
+            print(f"eval current_steps: {eval_current_steps}")
+
+            if eval_current_steps == eval_steps:
+                finished = True
+        except tf.errors.OutOfRangeError:
+            finished = True
+
+    label_numpy = np.array(label_list)
+    pred_numpy = np.array(pred_list)
+    if not os.path.exists(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}"):
+        os.makedirs(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}")
+
+    if os.path.exists(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/label_{rank_id}.npy"):
+        os.remove(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/label_{rank_id}.npy")
+    if os.path.exists(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/pred_{rank_id}.npy"):
+        os.remove(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/pred_{rank_id}.npy")
+    if os.path.exists(f"flag_{rank_id}.txt"):
+        os.remove(f"flag_{rank_id}.txt")
+    np.save(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/label_{rank_id}.npy", label_numpy)
+    np.save(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/pred_{rank_id}.npy", pred_numpy)
+    os.mknod(f"flag_{rank_id}.txt")
+    while True:
+        file_exists_list = [os.path.exists(f"flag_{i}.txt") for i in range(rank_size)]
+        if sum(file_exists_list) == rank_size:
+            print("All saved!!!!!!!!!!")
+            break
+        else:
+            print("Waitting for saving numpy!!!!!!!!")
+            time.sleep(1)
+            continue
+
+    auc = roc_auc_score(label_list, pred_list)
+    mean_log_loss = np.mean(log_loss_list)
+    return auc, mean_log_loss
+
+
+def create_feature_spec_list(use_timestamp=False):
+    access_threshold = None
+    eviction_threshold = None
+    if use_timestamp:
+        access_threshold = 1000
+        eviction_threshold = 180
+
+    feature_spec_list = [FeatureSpec("sparse_feature", table_name="sparse_embeddings", batch_size=cfg.batch_size,
+                                     access_threshold=access_threshold, eviction_threshold=eviction_threshold)]
+    if use_multi_lookup:
+        feature_spec_list.append(FeatureSpec("sparse_feature", table_name="sparse_embeddings",
+                                             batch_size=cfg.batch_size,
+                                             access_threshold=access_threshold,
+                                             eviction_threshold=eviction_threshold))
+    if use_timestamp:
+        feature_spec_list.append(FeatureSpec("timestamp", is_timestamp=True))
+    return feature_spec_list
+
+
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"Delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+    _del_related_dir("kernel*")
+    _del_related_dir("model_dir_rank*")
+    _del_related_dir("op_cache")
+
+    if os.getenv("CACHE_MODE", "") != CacheModeEnum.SSD.value:
+        return
+    logger.info("Current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
+                " then create empty directory for this use case.")
+    for sub_path in SSD_DATA_PATH:
+        _del_related_dir(sub_path)
+        os.makedirs(sub_path, mode=0o550, exist_ok=True)
+        logger.info(f"Create dir:{sub_path}")
+
+
+if __name__ == "__main__":
+    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+    warnings.filterwarnings("ignore")
+    _clear_saved_model()
+
+    rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
+    rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
+    interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
+    train_steps = 10000
+    eval_steps = 1360
+
+    try:
+        use_dynamic_expansion = bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0)))
+        use_multi_lookup = bool(int(os.getenv("USE_MULTI_LOOKUP", 0)))
+        MODIFY_GRAPH_FLAG = bool(int(os.getenv("USE_MODIFY_GRAPH", 0)))
+        use_faae = bool(int(os.getenv("USE_FAAE", 0)))
+    except ValueError as err:
+        raise ValueError("please correctly config USE_DYNAMIC_EXPANSION or USE_MULTI_LOOKUP or USE_FAAE "
+                         "or USE_MODIFY_GRAPH only 0 or 1 is supported.") from err
+
+    use_dynamic = bool(int(os.getenv("USE_DYNAMIC", 0)))
+    logger.info(f"USE_DYNAMIC:{use_dynamic}")
+    init(train_steps=train_steps, eval_steps=eval_steps,
+         use_dynamic=use_dynamic, use_dynamic_expansion=use_dynamic_expansion)
+    IF_LOAD = False
+    rank_id = mxrec_util.communication.hccl_ops.get_rank_id()
+    filelist = glob(f"./saved-model/sparse-model-0")
+    if filelist:
+        IF_LOAD = True
+    ConfigInitializer.get_instance().if_load = IF_LOAD
+
+    cfg = Config()
+    feature_spec_list_train = None
+    feature_spec_list_eval = None
+    if use_faae:
+        feature_spec_list_train = create_feature_spec_list(use_timestamp=True)
+        feature_spec_list_eval = create_feature_spec_list(use_timestamp=True)
+    else:
+        feature_spec_list_train = create_feature_spec_list(use_timestamp=False)
+        feature_spec_list_eval = create_feature_spec_list(use_timestamp=False)
+
+    train_batch, train_iterator = make_batch_and_iterator(cfg, feature_spec_list_train, is_training=True,
+                                                          dump_graph=True, is_use_faae=use_faae)
+    eval_batch, eval_iterator = make_batch_and_iterator(cfg, feature_spec_list_eval, is_training=False,
+                                                        dump_graph=False, is_use_faae=use_faae)
+    logger.info(f"train_batch: {train_batch}")
+
+    if use_faae:
+        cfg.dev_vocab_size = cfg.dev_vocab_size // 2
+
+    optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
+
+    # note: variance_scaling_initializer only support HBM mode
+    emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed) \
+        if cfg.cache_mode != "HBM" or use_dynamic_expansion else \
+        tf.compat.v1.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=sparse_hashtable_seed)
+    sparse_hashtable = create_table(
+        key_dtype=cfg.key_type,
+        dim=tf.TensorShape([cfg.emb_dim]),
+        name="sparse_embeddings",
+        emb_initializer=emb_initializer,
+        **cfg.get_emb_table_cfg()
+    )
+    if use_faae:
+        tf.compat.v1.add_to_collection(ASCEND_TIMESTAMP, train_batch["timestamp"])
+
+    sparse_hashtable_list = [sparse_hashtable, sparse_hashtable] if use_multi_lookup else [sparse_hashtable]
+    train_model = model_forward(feature_spec_list_train, sparse_hashtable_list, train_batch,
+                                is_train=True, modify_graph=MODIFY_GRAPH_FLAG)
+    eval_model = model_forward(feature_spec_list_eval, sparse_hashtable_list, eval_batch,
+                               is_train=False, modify_graph=MODIFY_GRAPH_FLAG)
+
+    dense_variables, sparse_variables = get_dense_and_sparse_variable()
+    trainable_varibles = []
+    trainable_varibles.extend(dense_variables)
+    if use_dynamic_expansion:
+        trainable_varibles.append(tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)[0])
+    else:
+        trainable_varibles.extend(sparse_variables)
+    rank_size = mxrec_util.communication.hccl_ops.get_rank_size()
+    train_ops = []
+    # multi task training
+    for loss, (dense_optimizer, sparse_optimizer) in zip([train_model.get("loss")], optimizer_list):
+        # do dense optimization
+        grads = dense_optimizer.compute_gradients(loss, var_list=trainable_varibles)
+        avg_grads = []
+        for grad, var in grads[:-1]:
+            if rank_size > 1:
+                grad = hccl_ops.allreduce(grad, "sum") if grad is not None else None
+            if grad is not None:
+                avg_grads.append((grad / 8.0, var))
+        # apply gradients: update variables
+        train_ops.append(dense_optimizer.apply_gradients(avg_grads))
+
+        if use_dynamic_expansion:
+            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
+            # do sparse optimization by addr
+            sparse_grads = list(grads[-1])  # local_embedding
+            grads_and_vars = [(grad, address) for grad, address in zip(sparse_grads, train_address_list)]
+            train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
+        else:
+            # do sparse optimization
+            sparse_grads = list(grads[-1])
+            print("sparse_grads_tensor:", sparse_grads)
+            grads_and_vars = [(grad, variable) for grad, variable in zip(sparse_grads, sparse_variables)]
+            train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
+
+    # 动态学习率更新
+    train_ops.extend([cfg.global_step.assign(cfg.global_step + 1), cfg.learning_rate[0], cfg.learning_rate[1]])
+
+    with tf.control_dependencies(train_ops):
+        train_ops = tf.no_op()
+        cfg.learning_rate = [cfg.learning_rate[0], cfg.learning_rate[1]]
+
+    saver = tf.train.Saver()
+    if MODIFY_GRAPH_FLAG:
+        modify_graph_and_start_emb_cache(dump_graph=True)
+    else:
+        start_asc_pipeline()
+
+    hook_list = []
+    if use_faae:
+        hook_evict = EvictHook(evict_enable=True, evict_time_interval=120)
+        hook_list.append(hook_evict)
+        if MODIFY_GRAPH_FLAG:  # 该场景添加hook处理校验问题
+            hook_list.append(GraphModifierHook(modify_graph=False))
+
+    # with tf.compat.v1.Session(config=sess_config(dump_data=False)) as sess:
+    if use_faae:
+        sess = tf.compat.v1.train.MonitoredTrainingSession(
+            hooks=hook_list,
+            config=sess_config(dump_data=False)
+        )
+        sess.graph._unsafe_unfinalize()
+        if not MODIFY_GRAPH_FLAG:
+            sess.run(train_iterator.initializer)
+        else:
+            sess.run(ConfigInitializer.get_instance().train_params_config.get_initializer(True))
+    else:
+        sess = tf.compat.v1.Session(config=sess_config(dump_data=False))
+        sess.run(tf.compat.v1.global_variables_initializer())
+        if not MODIFY_GRAPH_FLAG:
+            sess.run(train_iterator.initializer)
+        else:
+            sess.run(ConfigInitializer.get_instance().train_params_config.get_initializer(True))
+
+    epoch = 0
+    cost_sum = 0
+    qps_sum = 0
+    best_auc = 0
+    iteration_per_loop = 10
+
+    train_ops = util.set_iteration_per_loop(sess, train_ops, 10)
+
+    # for i in range(1, TRAIN_STEPS):
+    i = 0
+    while True:
+        i += 1
+        logger.info(f"################    training at step {i * iteration_per_loop}    ################")
+        start_time = time.time()
+
+        try:
+            grad, loss = sess.run([train_ops, train_model.get("loss")])
+            lr = sess.run(cfg.learning_rate)
+            global_step = sess.run(cfg.global_step)
+        except tf.errors.OutOfRangeError:
+            logger.info(f"Encounter the end of Sequence for training.")
+            break
+
+        end_time = time.time()
+        cost_time = end_time - start_time
+        qps = (1 / cost_time) * rank_size * cfg.batch_size * iteration_per_loop
+        cost_sum += cost_time
+        logger.info(f"step: {i * iteration_per_loop}; training loss: {loss}")
+        logger.info(f"step: {i * iteration_per_loop}; grad: {grad}")
+        logger.info(f"step: {i * iteration_per_loop}; lr: {lr}")
+        logger.info(f"global step: {global_step}")
+        logger.info(f"step: {i * iteration_per_loop}; current sess cost time: {cost_time:.10f}; current QPS: {qps}")
+        logger.info(f"training at step:{i * iteration_per_loop}, table[{sparse_hashtable.table_name}], "
+                    f"table size:{sparse_hashtable.size()}, table capacity:{sparse_hashtable.capacity()}")
+
+        if i % (train_steps // iteration_per_loop) == 0:
+            if interval is not None:
+                test_auc, test_mean_log_loss = evaluate_fix(i * iteration_per_loop)
+            else:
+                test_auc, test_mean_log_loss = evaluate()
+            print("Test auc: {}; log_loss: {} ".format(test_auc, test_mean_log_loss))
+            best_auc = max(best_auc, test_auc)
+            logger.info(f"training step: {i * iteration_per_loop}, best auc: {best_auc}")
+
+    sess.close()
+
+    terminate_config_initializer()
+    logger.info("Demo done!")
diff --git a/examples/WideDeep/model/mean_auc.py b/examples/WideDeep/model/mean_auc.py
new file mode 100644
index 00000000..ff57df00
--- /dev/null
+++ b/examples/WideDeep/model/mean_auc.py
@@ -0,0 +1,40 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+from glob import glob
+import numpy as np
+
+
+def split_auc(log_input):
+    with open(log_input, 'r') as log:
+        all_auc = []
+        for line in log.readlines():
+            if 'Test' in line:
+                all_auc.append(float(line.split(';')[0].split(':')[-1].strip()))
+    all_auc_len = len(all_auc)
+    all_auc_arr = np.array(all_auc)[:all_auc_len - all_auc_len % 8]
+    test_auc = np.mean(all_auc_arr.reshape(-1, 8), axis=-1)
+    return test_auc
+
+
+log_path_all = 'latest_*.log'
+log_path_list = glob(log_path_all)
+
+for log_path in log_path_list:
+    print(os.path.basename(log_path))
+    print(split_auc(log_path))
+    print('*'*20)
\ No newline at end of file
diff --git a/examples/WideDeep/model/model.py b/examples/WideDeep/model/model.py
new file mode 100644
index 00000000..037fb276
--- /dev/null
+++ b/examples/WideDeep/model/model.py
@@ -0,0 +1,94 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import time
+from easydict import EasyDict as edict
+
+import tensorflow as tf
+
+
+model_cfg = edict()
+model_cfg.loss_mode = "batch"
+LOSS_OP_NAME = "loss"
+LABEL_OP_NAME = "label"
+VAR_LIST = "variable"
+PRED_OP_NAME = "pred"
+
+
+class MyModel:
+    def __init__(self):
+        self.kernel_init = None
+        self._loss_fn = None
+        self.is_training = None
+
+    @classmethod
+    def _dot_interaction(cls, _input):
+        num_features = tf.shape(_input)[1]
+        batch_size = tf.shape(_input)[0]
+        xactions = tf.matmul(_input, _input, transpose_b=True)
+        ones = tf.ones_like(xactions, dtype=tf.float32)
+        upper_tri_mask = tf.linalg.band_part(ones, 0, -1)
+
+        activations = tf.where(condition=tf.cast(upper_tri_mask, tf.bool),
+                                x=tf.zeros_like(xactions),
+                                y=xactions)
+        out_dim = num_features * num_features
+        activations = tf.reshape(activations, (batch_size, out_dim))
+        return activations
+
+    def build_model(self,
+                    embedding=None,
+                    dense_feature=None,
+                    label=None,
+                    is_training=True,
+                    seed=None):
+        with tf.variable_scope("mlp", reuse=tf.AUTO_REUSE):
+            self._loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
+            self.is_training = is_training
+            dense_embedding_vec = self.bottom_stack(dense_feature, seed)
+            dense_embedding = tf.expand_dims(dense_embedding_vec, 1)
+            interaction_args = tf.concat([dense_embedding, embedding], axis=1)
+            interaction_output = self._dot_interaction(interaction_args)
+            feature_interaction_output = tf.concat([dense_embedding_vec, interaction_output], axis=1)
+            # (8192, 857)
+            logits = self.top_stack(feature_interaction_output, seed)
+            loss = self._loss_fn(label, logits)
+            prediction = tf.sigmoid(logits)
+            trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='mlp')
+            return {LOSS_OP_NAME: loss,
+                    PRED_OP_NAME: prediction,
+                    LABEL_OP_NAME: label,
+                    VAR_LIST: trainable_variables}
+
+    def bottom_stack(self, _input, seed):
+        dnn1 = tf.layers.dense(_input, 512, activation='relu', name='bs1',
+                               kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed),
+                               bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed),
+                               kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
+        dnn2 = tf.layers.dense(dnn1, 256, activation='relu', name='bs2', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
+        dnn3 = tf.layers.dense(dnn2, 128, activation='relu', name='bs3', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
+        return dnn3
+
+    def top_stack(self, _input, seed):
+        dnn1 = tf.layers.dense(_input, 1024, activation='relu', name='ts1', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
+        dnn2 = tf.layers.dense(dnn1, 1024, activation='relu', name='ts2', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
+        dnn3 = tf.layers.dense(dnn2, 512, activation='relu', name='ts3', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
+        dnn4 = tf.layers.dense(dnn3, 256, activation='relu', name='ts4', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
+        dnn5 = tf.layers.dense(dnn4, 1, activation=None, name='ts5', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
+        return dnn5
+
+
+my_model = MyModel()
diff --git a/examples/WideDeep/model/op_impl_mode.ini b/examples/WideDeep/model/op_impl_mode.ini
new file mode 100644
index 00000000..579dea43
--- /dev/null
+++ b/examples/WideDeep/model/op_impl_mode.ini
@@ -0,0 +1 @@
+ScatterNdAdd=support_out_of_bound_index
\ No newline at end of file
diff --git a/examples/WideDeep/model/optimizer.py b/examples/WideDeep/model/optimizer.py
new file mode 100644
index 00000000..18dbe288
--- /dev/null
+++ b/examples/WideDeep/model/optimizer.py
@@ -0,0 +1,46 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import tensorflow as tf
+
+from delay_loss_scale import DenseLossScaleOptimizer, SparseLossScaleOptimizer
+from gradient_descent_w import create_hash_optimizer
+from mx_rec.util.initialize import ConfigInitializer
+from mx_rec.optimizers.gradient_descent_by_addr import create_hash_optimizer_by_addr
+from mx_rec.optimizers import lazy_adam
+
+
+def get_dense_and_sparse_optimizer(cfg):
+    use_dynamic_expansion = ConfigInitializer.get_instance().use_dynamic_expansion
+    if cfg.use_lazy_adam_optimizer:
+        if use_dynamic_expansion:
+            raise RuntimeError("model is incompatible with dynamic_expansion when use lazy_adam optimizer.")
+        # use lazy_adam optimizer
+        dense_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=cfg.learning_rate[0])
+        sparse_optimizer = lazy_adam.create_hash_optimizer(learning_rate=cfg.learning_rate[1])
+        loss_scale = 65536
+    else:
+        # use SGD optimizer
+        dense_optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=cfg.learning_rate[0])
+        if use_dynamic_expansion:
+            sparse_optimizer = create_hash_optimizer_by_addr(learning_rate=cfg.learning_rate[1], weight_decay=0.0001)
+        else:
+            sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1], weight_decay=0.0001)
+        loss_scale = 1024
+    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, loss_scale)
+    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, loss_scale)
+
+    return dense_optimizer, sparse_optimizer
diff --git a/examples/WideDeep/model/run.sh b/examples/WideDeep/model/run.sh
new file mode 100644
index 00000000..6c142443
--- /dev/null
+++ b/examples/WideDeep/model/run.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+cur_path=$(dirname "$(readlink -f "$0")")
+
+so_path=$1
+mx_rec_package_path=$2
+hccl_cfg_json=$3
+dlrm_criteo_data_path=$4
+ip=$5  # no ranktable时传入该参数
+
+interface="lo"
+num_server=1
+local_rank_size=8
+num_process=$((num_server * local_rank_size))
+export TRAIN_RANK_SIZE=$num_process
+
+################# 参数配置 ######################
+export USE_DYNAMIC=0            # 0：静态shape；1：动态shape
+export CACHE_MODE="HBM"         # HBM；DDR；SSD
+export USE_FAAE=0               # 0：关闭准入淘汰；1：开启准入淘汰
+export USE_DYNAMIC_EXPANSION=0  # 0：关闭动态扩容；1: 开启动态扩容
+export USE_MULTI_LOOKUP=0       # 0：一表一查；1：一表多查
+export USE_MODIFY_GRAPH=0       # 0：feature spec模式；1：自动改图模式
+################################################
+echo "CACHE_MODE:${CACHE_MODE}"
+
+export HCCL_CONNECT_TIMEOUT=1200
+export DLRM_CRITEO_DATA_PATH=${dlrm_criteo_data_path}
+export PYTHONPATH=${mx_rec_package_path}:${so_path}:$PYTHONPATH
+export LD_PRELOAD=/usr/lib64/libgomp.so.1
+export LD_LIBRARY_PATH=${so_path}:/usr/local/lib:$LD_LIBRARY_PATH
+export ASCEND_DEVICE_ID=0
+export RANK_ID_START=0
+export JOB_ID=10086
+export CUSTOMIZED_OPS_LIB_PATH=${so_path}/libcust_ops.so # Todo: please config
+export MXREC_LOG_LEVEL="INFO"
+export TF_CPP_MIN_LOG_LEVEL=3
+export ASCEND_GLOBAL_LOG_LEVEL=3
+#export USE_FAAE=1
+export ENABLE_FORCE_V2_CONTROL=1
+
+export PROFILING_OPTIONS='{"output":"/home/yz/profiling",
+                           "training_trace":"on",
+                           "task_trace":"on",
+                           "aicpu":"on",
+                           "fp_point":"",
+                           "bp_point":"",
+                           "aic_metrics":"PipeUtilization"}'
+
+RANK_ID_START=0
+
+export MXREC_MODE="ASC"
+echo "MXREC_MODE is $MXREC_MODE"
+export py=main_mxrec.py
+echo "py is $py"
+
+# 区分ranktable和no ranktable
+if [ -n "$ip" ]; then
+    # no ranktable分支
+    echo "Current is no ranktable solution."
+    echo "Input node ip: $ip, please make sure this ip is available."
+    export CM_CHIEF_IP=$ip  # 主节点ip
+    export CM_CHIEF_PORT=60001  # 主节点监听端口
+    export CM_CHIEF_DEVICE=0  # 主节点device id
+    export CM_WORKER_IP=$ip  # 当前节点ip
+    export CM_WORKER_SIZE=$num_process  # 参与集群训练的device数量
+    echo "CM_CHIEF_IP=$CM_CHIEF_IP"
+    echo "CM_CHIEF_PORT=$CM_CHIEF_PORT"
+    echo "CM_CHIEF_DEVICE=$CM_CHIEF_DEVICE"
+    echo "CM_WORKER_IP=$CM_WORKER_IP"
+    echo "CM_WORKER_SIZE=$CM_WORKER_SIZE"
+else
+    # ranktable分支
+    echo "Current is ranktable solution, hccl json file:${hccl_cfg_json}"
+    export RANK_SIZE=$num_process
+    echo "RANK_SIZE=${RANK_SIZE}, please make sure hccl configuration json file match this parameter"
+    export RANK_TABLE_FILE=${hccl_cfg_json}
+fi
+
+echo "use horovod to start tasks"
+# GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
+mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
+
+horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
+python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${num_process}p.log
-- 
Gitee


From 7a05b033d41af51df9aed7414ad04216dff821cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Mon, 3 Jun 2024 16:42:26 +0800
Subject: [PATCH 189/302] =?UTF-8?q?WideDeep=E6=A8=A1=E5=9E=8B=20=E8=BF=81?=
 =?UTF-8?q?=E7=A7=BB=E5=BC=80=E6=BA=90=E9=A1=B9=E7=9B=AE=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/README_WD.md              | 567 ++++++++++++++++++++
 examples/WideDeep/criteo.py                 | 246 +++++++++
 examples/WideDeep/model/config.py           |  22 +-
 examples/WideDeep/model/delay_loss_scale.py |  30 +-
 examples/WideDeep/model/main_mxrec.py       | 205 ++++---
 examples/WideDeep/model/model.py            |  91 ++--
 examples/WideDeep/model/optimizer.py        |  28 +-
 7 files changed, 1022 insertions(+), 167 deletions(-)
 create mode 100644 examples/WideDeep/README_WD.md
 create mode 100644 examples/WideDeep/criteo.py

diff --git a/examples/WideDeep/README_WD.md b/examples/WideDeep/README_WD.md
new file mode 100644
index 00000000..261861f7
--- /dev/null
+++ b/examples/WideDeep/README_WD.md
@@ -0,0 +1,567 @@
+# wide&deep模型 迁移样例（基于DLRM模型框架）
+
+开源项目在保证原有结构不变的情况下，可采用替换相关API接口的方式将项目由GPU >> NPU >> mxrec。在模型迁移适配过程中可能因兼容性问题而导致模型迁移失败，此处提供另一种模型适配方案。  
+
+***
+## 开源项目链接
+
+```shell
+https://github.com/ZiyaoGeng/RecLearn
+```
+***
+## 数据集
+
+```shell
+Criteo4500w数据集:  
+https://ailab.criteo.com/ressources/kaggle-display-advertising-challenge-dataset.tar.gz
+```
+***
+## 数据集预处理
+
+### 解压文件列表
+- train.txt
+- test.txt
+- readme.txt
+
+text.txt因缺少label列无法使用，将train.txt数据集切分为10份，train_01.txt~train_09.txt为训练集，train_10.txt为测试集。数据预处理文件：criteo.py。  
+
+***
+### 数据预处理运行脚本
+```shell
+python critro.py --data_path data_path --output_path output_path 
+```
+参数说明：
+- dataset_path: train.txt的路径，如："D:\dat\train.txt"
+- output_path: tfrecord存放路径，如："D:\dat\tfrecord\ "
+***
+
+### criteo.py
+#### 1. 分割数据集
+调用`criteo.py`文件中的`get_split_file_path(parent_path, dataset_path, sample_num=4600000)`方法将数据集分割，`sample_num=4600000`是每个子数据集的样本数量。返回包含全部子数据集名称的列表。
+
+```python
+# get txt_list
+split_file_list = get_split_file_path(dataset_path = dataset_path)
+```
+***
+#### 2. 建立特征映射
+调用`criteo.py`文件中的`get_fea_map()`方法，以`{'C1':{}, 'C2':{},..., 'I1':{},...}`形式储存dense_feature的最大最小值以及sparse_feature去重后的特征映射。
+
+```python
+# get feature_map
+fea_map = get_fea_map(split_file_list=split_file_list)
+```
+***
+#### 3. dense_feature分桶离散化
+调用`criteo.py`文件中的`rec_kbins_discretizer(data_df, n_bins, min_max_dict)`方法将dense_feature分桶化离散化，`nbins=1000`。
+
+```python
+# dense feature: Bin continuous data into intervals.
+data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, fea_map)
+```
+***
+#### 4. sparse_feature特征映射
+通过如下操作将原始的字符串数据映射为0~max的int64数据。
+
+```python
+# sparse feature: mapping
+for col in sparse_features:
+    data_df[col] = data_df[col].map(lambda x: fea_map[col][x])
+```
+***
+#### 5. 39个特征增加偏移项
+开源项目deep部分对39个特征分别作了embedding，即建了39个表。本项目只建了一张表，因此需要对每个特征对应的值作偏移。`slot_size_array`中的值分别对应各特征去重后的类别数。 
+
+```python
+# add offsets
+slot_size_array = [1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001,
+                   1462, 585, 10131228, 2202609, 307, 25, 12519, 635, 5, 93147, 5685, 8351594, 3196,
+                   29, 14994, 5461307, 12, 5654, 2174, 5, 7046548, 19, 17, 286182, 106, 142573]
+offset_size_list = np.cumsum([0] + slot_size_array[:-1])
+for j in range(1,len(offset_size_list)+1):
+    data_df.iloc[:, j] += offset_size_list[j-1]
+```
+***
+#### 6. 数据集格式转换：txt >> tfrecord
+调用`criteo.py`文件中的`convert_input2tfrd(in_file_path, out_file_path)`方法将txt文件转换为tfrecord文件。
+
+```python
+# txt to tfrecords
+convert_input2tfrd(in_file_path=file, out_file_path=output_path)
+```
+***
+
+## 模型运行
+
+参考mxrec的`README.md`文件在NPU服务器上配置环境后，可按照[mxrec-tf1](https://ascendhub.huawei.com/#/detail/mxrec-tf1)中DLRM模型运行命令启动模型训练。`so_path`、`mx_rec_package_path`、`hccl_cfg_json`配置不变，根据实际数据集路径配置`dlrm_criteo_data_path`。
+
+```shell
+# 运行命令
+bash run.sh {so_path} {mx_rec_package_path} {hccl_cfg_json} {dlrm_criteo_data_path}
+```
+***
+
+## 模型结果
+[开源项目](https://github.com/ZiyaoGeng/RecLearn)使用Criteo4500W数据集在GPU上训练模型，结果为`Log Loss=0.4692`、`AUC=0.7930`。适配完成模型后，固定`CACHE_MODE="HBM"`、`USE_FAAE=0`，在`run.sh`中配置其他选项卡，运行结果如下。
+
+<table style="text-align:center;margin:auto">
+  <tr></tr>
+  <tr>
+    <th rowspan="2">Model</th>
+    <th colspan="4">Options</th>
+    <th colspan="2">Criteo4500W</th>
+  </tr>
+  <tr>
+    <th>Use_Dynamic</th>
+    <th>Use_Dynamic_Expansion</th>
+    <th>Use_Multi_Lookup</th>
+    <th>Use_Modify_Graph</th>
+    <th>Log Loss</th>
+    <th>AUC</th>
+  </tr>
+  <tr><td> WDL </td><td> 0 </td><td> 0 </td><td> 0 </td><td> 0 </td><td> 0.4592 </td><td> 0.7934 </td></tr>
+  <tr><td> WDL </td><td> 0 </td><td> 1 </td><td> 0 </td><td> 0 </td><td> 0.4593 </td><td> 0.7933 </td></tr>
+  <tr><td> WDL </td><td> 1 </td><td> 0 </td><td> 0 </td><td> 0 </td><td> 0.4594 </td><td> 0.7932 </td></tr>
+  <tr><td> WDL </td><td> 1 </td><td> 1 </td><td> 0 </td><td> 0 </td><td> 0.4594 </td><td> 0.7932 </td></tr>
+  <tr><td> WDL </td><td> 1 </td><td> 1 </td><td> 1 </td><td> 0 </td><td> 0.4590 </td><td> 0.7937 </td></tr>
+  <tr><td> WDL </td><td> 0 </td><td> 0 </td><td> 0 </td><td> 1 </td><td> 0.4593 </td><td> 0.7934 </td></tr>
+  <tr><td> WDL </td><td> 0 </td><td> 1 </td><td> 0 </td><td> 1 </td><td> 0.4593 </td><td> 0.7933 </td></tr>
+  <tr><td> WDL </td><td> 1 </td><td> 0 </td><td> 0 </td><td> 1 </td><td> 0.4593 </td><td> 0.7933 </td></tr>
+  <tr><td> WDL </td><td> 1 </td><td> 1 </td><td> 0 </td><td> 1 </td><td> 0.4594 </td><td> 0.7932 </td></tr>
+  <tr><td> WDL </td><td> 1 </td><td> 1 </td><td> 1 </td><td> 1 </td><td> 0.4589 </td><td> 0.7937 </td></tr>
+</table>
+
+
+***
+## 模型迁移
+
+**迁移思路：** 参考开源项目，在现有已适配好的dlrm模型框架下，改动相关代码逻辑，完成Wide&deep模型的适配。  
+
+下文所提到的`动态扩容`、`动态shape`、`自动改图`、`一表多查`是mxrec提供的相关特性，开关选项见`run.sh`。
+
+```shell
+# run.sh: 32~37行
+export USE_DYNAMIC=0            # 0：静态shape；1：动态shape
+export CACHE_MODE="HBM"         # HBM；DDR；SSD
+export USE_FAAE=0               # 0：关闭准入淘汰；1：开启准入淘汰
+export USE_DYNAMIC_EXPANSION=0  # 0：关闭动态扩容；1: 开启动态扩容
+export USE_MULTI_LOOKUP=0       # 0：一表一查；1：一表多查
+export USE_MODIFY_GRAPH=0       # 0：feature spec模式；1：自动改图模式
+```
+
+***
+### DLRM模型框架  
+**迁移说明：** 迁移过程中未使用`gradient_descent_w.py`、`mean_auc.py`。
+
+- config.py
+- delay_loss_scale.py
+- gradient_descent_w.py
+- main_mxrec.py
+- mean_auc.py
+- model.py
+- optimizer.py
+- run.sh
+
+***
+
+#### 1. config.py
+实验超参数配置如下：取消动态学习率逻辑，学习率固定为0.001。
+
+```python
+# 88~89行
+lr_sparse = self.base_lr_sparse * lr_factor_constant
+lr_dense = self.base_lr_dense * lr_factor_constant
+# 140~146行
+_lr_scheduler = LearningRateScheduler(
+    0.001,
+    0.001,
+    LR_SCHEDULE_STEPS[0],
+    LR_SCHEDULE_STEPS[1],
+    LR_SCHEDULE_STEPS[2],
+)
+# 超参数
+self.batch_size = 4096
+self.line_per_sample = 1
+self.train_epoch = 1
+self.test_epoch = 9
+self.emb_dim = 8           
+```
+***
+
+
+#### 2. model.py
+迁移过程中，`model.py`需参考开源项目文件`reclearn/models/ranking/wdl.py`的代码逻辑，使用tensorflow的低阶API重新编写。输出参数必须包括`loss`,`prediction`,`label`,`trainable_variables`。**迁移重点：mxRec对推荐模型中sparse_feature的创表查表操作作了加速，使用`create_table`与`sparse_lookup`接口替换tensorflow中的`tf.nn.embedding_lookup`接口。** 因此在适配开源项目时，会将sparse_feature的embedding操作放在模型结构外。 
+
+**reclearn开源项目原始代码：**
+```python
+# wdl.py
+import tensorflow as tf
+from tensorflow.keras import Model
+from tensorflow.keras.layers import Dense, Embedding, Dropout, Input
+from tensorflow.keras.regularizers import l2
+
+from reclearn.layers import Linear, MLP
+from reclearn.layers.utils import index_mapping
+
+class WideDeep(Model):
+    def __init__(self, feature_columns, hidden_units, activation='relu',
+                 dnn_dropout=0., embed_reg=0., w_reg=0.):
+        """Wide&Deep.
+        Args:
+            :param feature_columns: A list. [{'feat_name':, 'feat_num':, 'embed_dim':}, ...]
+            :param hidden_units: A list. Neural network hidden units.
+            :param activation: A string. Activation function of MLP.
+            :param dnn_dropout: A scalar. Dropout of MLP.
+            :param embed_reg: A scalar. The regularization coefficient of embedding.
+            :param w_reg: A scalar. The regularization coefficient of Linear.
+        :return
+        """
+        super(WideDeep, self).__init__()
+        self.feature_columns = feature_columns
+        self.embed_layers = {
+            feat['feat_name']: Embedding(input_dim=feat['feat_num'],
+                                         input_length=1,
+                                         output_dim=feat['embed_dim'],
+                                         embeddings_initializer='random_normal',
+                                         embeddings_regularizer=l2(embed_reg))
+            for feat in self.feature_columns
+        }
+        self.map_dict = {}
+        self.feature_length = 0
+        for feat in self.feature_columns:
+            self.map_dict[feat['feat_name']] = self.feature_length
+            self.feature_length += feat['feat_num']
+        self.dnn_network = MLP(hidden_units, activation, dnn_dropout)
+        self.linear = Linear(self.feature_length, w_reg=w_reg)
+        self.final_dense = Dense(1, activation=None)
+
+    def call(self, inputs):
+        sparse_embed = tf.concat([self.embed_layers[feat_name](value) for feat_name, value in inputs.items()], axis=-1)
+        x = sparse_embed  # (batch_size, field * embed_dim)
+        # Wide
+        wide_inputs = index_mapping(inputs, self.map_dict)
+        wide_inputs = tf.concat([value for _, value in wide_inputs.items()], axis=-1)
+        wide_out = self.linear(wide_inputs)
+        # Deep
+        deep_out = self.dnn_network(x)
+        deep_out = self.final_dense(deep_out)
+        # out
+        outputs = tf.nn.sigmoid(0.5 * wide_out + 0.5 * deep_out)
+        return outputs
+
+    def summary(self):
+        inputs = {
+            feat['feat_name']: Input(shape=(), dtype=tf.int32, name=feat['feat_name'])
+            for feat in self.feature_columns
+        }
+        Model(inputs=inputs, outputs=self.call(inputs)).summary()
+
+```
+`self.embed_layers`是对数据集中39个特征分别建表作embedding的操作，迁移后对应的代码逻辑见`main_mxrec.py`。  
+`self.map_dict`统计了各特征需增加的偏移量。  
+`index_mapping`是对数据增加偏移量的操作，迁移后对应的代码逻辑见`criteo.py`。
+
+**迁移后代码：**
+```python
+# model.py
+import time
+from easydict import EasyDict as edict
+
+import tensorflow as tf
+
+
+model_cfg = edict()
+model_cfg.loss_mode = "batch"
+LOSS_OP_NAME = "loss"
+LABEL_OP_NAME = "label"
+VAR_LIST = "variable"
+PRED_OP_NAME = "pred"
+
+
+class MyModel:
+    def __init__(self):
+        self.kernel_init = None
+        self._loss_fn = None
+        self.is_training = None
+
+    def build_model(self,
+                    wide_embedding=None,
+                    deep_embedding=None,
+                    label=None,
+                    is_training=True,
+                    seed=None,
+                    dropout_rate=None,
+                    batch_norm=False):
+
+        with tf.variable_scope("wide_deep", reuse=tf.AUTO_REUSE):
+            self._loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
+            self.is_training = is_training
+
+            # wide
+            batch_size, wide_num, wide_emb_dim = wide_embedding.shape
+            wide_input = tf.reshape(wide_embedding[:,0], shape=(batch_size, wide_num * 1))
+            wide_output = tf.reshape(tf.reduce_sum(wide_input, axis=1), shape=(-1,1))
+
+            # deep
+            batch_size, deep_num, deep_emb_dim = deep_embedding.shape
+            deep_input = tf.reshape(deep_embedding, shape=(batch_size, deep_num * deep_emb_dim))
+
+            ## MLP
+            hidden_units = [256,128,64]
+            net = deep_input
+            for i,unit in enumerate(hidden_units):
+
+                net = tf.layers.dense(net, units=unit, activation='relu', name=f'hidden_layer_{i}',
+                                      kernel_initializer=tf.glorot_uniform_initializer(seed=seed),
+                                      bias_initializer=tf.zeros_initializer())
+
+                if dropout_rate is not None and 0.0 < dropout_rate < 1.0:
+                    net = tf.layers.dropout(net,dropout_rate,training=self.is_training)
+                if batch_norm:
+                    net = tf.layers.batch_normalization(net, training=self.is_training)
+                    
+            deep_output = tf.layers.dense(net, units=1, activation=None, name='deep_output',
+                                          kernel_initializer=tf.glorot_uniform_initializer(seed=seed),
+                                          bias_initializer=tf.zeros_initializer())
+            
+            total_logits = 0.5 * tf.add(wide_output,deep_output,name='total_logits')
+            loss = self._loss_fn(label, total_logits)
+            prediction = tf.sigmoid(total_logits)
+            trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='wide_deep')
+            return {LOSS_OP_NAME: loss,
+                    PRED_OP_NAME: prediction,
+                    LABEL_OP_NAME: label,
+                    VAR_LIST: trainable_variables}
+
+
+my_model = MyModel()
+
+```
+***
+#### 3. main_mxrec.py
+
+`main_mxrec.py`文件中的函数如下所示。`make_batch_and_iterator()`是读取数据集以及对数据作处理的函数；`model_forward()`是前向过程函数；`evaluate()`与`evaluate_fix()`是评估函数，用于计算测试集的AUC与loss。`add_timestamp_func()`与特征准入、淘汰有关；`create_feature_spec_list()`是生成元素为FeatureSpec类的列表的函数，其返回值是`make_batch_and_iterator()`所需的传参。特征准入与淘汰、FeatureSpec类、自动改图等解释见[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0001.html)。  
+
+- `add_timestamp_func()`
+- `make_batch_and_iterator()`
+- `model_forward()`
+- `evaluate()`
+- `evaluate_fix()`
+- `create_feature_spec_list()`
+
+**迁移代码改动说明：** `add_timestamp_func()`、`evaluate()`、`evaluate_fix()`未作修改。  
+<br>
+
+3.1 读取数据集：`make_batch_and_iterator()` 
+
+```python
+# main_mxrec.py：100~104行
+def map_fn(batch):
+    new_batch = batch
+    new_batch['sparse_feature'] = tf.concat([batch['dense_feature'], batch['sparse_feature']], axis=1)
+    return new_batch
+dataset = dataset.map(map_fn, num_parallel_calls=num_parallel)
+```
+`map_fn()`：该函数是将分桶后的dense_feature与sparse_feature合并为新sparse_feature。该操作主要与`FeatureSpec()`、`sparse_lookup()`传入参数有关。
+
+```python
+# main_mxrec.py：109~118行
+if not MODIFY_GRAPH_FLAG:
+
+    # Enable EOSDataset manually.
+    librec = import_host_pipeline_ops(LIBREC_EOS_OPS_SO)
+    channel_id = 0 if is_training else 1
+    # 此处eos_map的调用必须先于insert_func,避免多卡数据不均匀的情况
+    dataset = dataset.eos_map(librec, channel_id, kwargs.get("max_train_steps", max_train_steps),
+                              kwargs.get("max_eval_steps", eval_steps))
+    insert_fn = get_asc_insert_func(tgt_key_specs=feature_spec_list, is_training=is_training, dump_graph=dump_graph)
+    dataset = dataset.map(insert_fn)
+```
+`dataset.eos_map()`：该函数主要是为了解决FeatureSpec模式下开`动态shape`选项卡，训练结束无法正常退出的问题。
+
+***
+3.2 模型前向传播过程
+
+```python
+# main_mxrec.py：127~179行
+def model_forward(feature_list, wide_hash_table_list, deep_hash_table_list, batch, is_train, modify_graph, is_use_faae=False):
+    wide_embedding_list = []
+    deep_embedding_list = []
+    wide_feature_list = []
+    deep_feature_list = []
+    if is_use_faae:
+        feature_list_copy = feature_list[:-1]
+    else:
+        feature_list_copy = feature_list
+
+    for i,item in enumerate(feature_list_copy):
+        if i % 2 == 0:
+            wide_feature_list.append(item)
+        else:
+            deep_feature_list.append(item)
+
+    logger.debug(f"In model_forward function, is_train: {is_train}, feature_list: {len(feature_list)}, "
+                 f"wide_hash_table_list: {len(wide_hash_table_list)}, deep_hash_table_list: {len(deep_hash_table_list)}")
+
+    # wide
+    for wide_feature, wide_hash_table in zip(wide_feature_list, wide_hash_table_list):
+        if MODIFY_GRAPH_FLAG:
+            wide_feature = batch["sparse_feature"]
+        wide_embedding = sparse_lookup(wide_hash_table, wide_feature, cfg.send_count, dim=None, is_train=is_train,
+                                  name="wide_embedding_lookup", modify_graph=modify_graph, batch=batch,
+                                  access_and_evict_config=None)
+        wide_embedding_list.append(wide_embedding)
+
+    # deep
+    for deep_feature, deep_hash_table in zip(deep_feature_list, deep_hash_table_list):
+        if MODIFY_GRAPH_FLAG:
+            deep_feature = batch["sparse_feature"]
+        deep_embedding = sparse_lookup(deep_hash_table, deep_feature, cfg.send_count, dim=None, is_train=is_train,
+                                  name="deep_embedding_lookup", modify_graph=modify_graph, batch=batch,
+                                  access_and_evict_config=None)
+        deep_embedding_list.append(deep_embedding)
+
+    if len(wide_embedding_list) == 1:
+        wide_emb = wide_embedding_list[0]
+        deep_emb = deep_embedding_list[0]
+    elif len(wide_embedding_list) > 1:
+        wide_emb = tf.reduce_sum(wide_embedding_list, axis=0, keepdims=False)
+        deep_emb = tf.reduce_sum(deep_embedding_list, axis=0, keepdims=False)
+    else:
+        raise ValueError("the length of embedding_list must be greater than or equal to 1.")
+    my_model = MyModel()
+    model_output = my_model.build_model(wide_embedding=wide_emb,
+                                        deep_embedding=deep_emb,
+                                        label=batch["label"],
+                                        is_training=is_train,
+                                        seed=dense_hashtable_seed,
+                                        dropout_rate=0.5)
+    return model_output
+```
+该函数是前向传播函数，主要包括sparse_feature的embedding操作（查表）与model前向操作。130~141行代码是预处理`sparse_lookup`传参的逻辑。147~162行代码对应开源项目中wide部分`self.linear`与deep部分`self.embed_layers`对39个特征作embedding的逻辑。164~171行是配置mxrec中`一表多查`特性的逻辑。
+
+***
+3.3 创表操作
+
+```python
+# main_mxrec.py: 273~296行
+def create_feature_spec_list(use_timestamp=False):
+    access_threshold = None
+    eviction_threshold = None
+    if use_timestamp:
+        access_threshold = 1000
+        eviction_threshold = 180
+
+    feature_spec_list = [FeatureSpec("sparse_feature", table_name="wide_embeddings", batch_size=cfg.batch_size,
+                                     access_threshold=access_threshold, eviction_threshold=eviction_threshold),
+                         FeatureSpec("sparse_feature", table_name="deep_embeddings", batch_size=cfg.batch_size,
+                                     access_threshold=access_threshold, eviction_threshold=eviction_threshold)]
+
+    if use_multi_lookup:
+        feature_spec_list.extend([FeatureSpec("sparse_feature", table_name="wide_embeddings",
+                                             batch_size=cfg.batch_size,
+                                             access_threshold=access_threshold,
+                                             eviction_threshold=eviction_threshold),
+                                  FeatureSpec("sparse_feature", table_name="deep_embeddings",
+                                             batch_size=cfg.batch_size,
+                                             access_threshold=access_threshold,
+                                             eviction_threshold=eviction_threshold)])
+    if use_timestamp:
+        feature_spec_list.append(FeatureSpec("timestamp", is_timestamp=True))
+    return feature_spec_list
+
+```
+
+```python
+# main_mxrec.py: 379~397行
+# 创表操作
+wide_emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed)
+deep_emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed)
+
+sparse_hashtable_wide = create_table(
+    key_dtype=cfg.key_type,
+    dim=tf.TensorShape([cfg.emb_dim]),
+    name="wide_embeddings",
+    emb_initializer=wide_emb_initializer,
+    **cfg.get_emb_table_cfg()
+)
+
+sparse_hashtable_deep = create_table(
+    key_dtype=cfg.key_type,
+    dim=tf.TensorShape([cfg.emb_dim]),
+    name="deep_embeddings",
+    emb_initializer=deep_emb_initializer,
+    **cfg.get_emb_table_cfg()
+)
+```
+`create_feature_spec_list()`的返回值是`make_batch_and_iterator()`、`model_forward()`的传参;`create_table()`的返回值是`sparse_lookup()`的传参。  
+**注意：`len(feature_spec_list)`应与使用`create_table()`接口创建的表数相等；开启`一表多查`选项卡，feature_spec_list中的元素重复添加一次；开启`特征淘汰`选项卡，feature_spec_list增加时间戳的FeatureSpec类元素**。  
+
+***
+
+3.4 模型反向传播过程
+```python
+# main_mxrec.py: 410~442行
+train_variables, emb_variables = get_dense_and_sparse_variable()
+
+rank_size = mxrec_util.communication.hccl_ops.get_rank_size()
+train_ops = []
+# multi task training
+for loss, (model_optimizer, emb_optimizer) in zip([train_model.get("loss")], optimizer_list):
+    # do model optimization
+    grads = model_optimizer.compute_gradients(loss, var_list=train_variables)
+    avg_grads = []
+    for grad, var in grads:
+        if rank_size > 1:
+            grad = hccl_ops.allreduce(grad, "sum") if grad is not None else None
+        if grad is not None:
+            avg_grads.append((grad / 8.0, var))
+    # apply gradients: update variables
+    train_ops.append(model_optimizer.apply_gradients(avg_grads))
+
+    if use_dynamic_expansion:
+        train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
+        train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
+        # do embedding optimization by addr
+        sparse_grads = emb_optimizer.compute_gradients(loss, train_emb_list)  # local_embedding
+        grads_and_vars = [(grad, address) for grad, address in zip(sparse_grads, train_address_list)]
+        train_ops.append(emb_optimizer.apply_gradients(grads_and_vars))
+    else:
+        # do embedding optimization
+        sparse_grads = emb_optimizer.compute_gradients(loss, emb_variables)
+        print("sparse_grads_tensor:", sparse_grads)
+        grads_and_vars = [(grad, variable) for grad, variable in zip(sparse_grads, emb_variables)]
+        train_ops.append(emb_optimizer.apply_gradients(grads_and_vars))
+
+# 动态学习率更新
+train_ops.extend([cfg.global_step.assign(cfg.global_step + 1), cfg.learning_rate[0], cfg.learning_rate[1]])
+```
+410~442行代码是模型的反向过程操作。mxRec对推荐模型中sparse_feature的创表查表操作作了加速，使用`create_table`与`sparse_lookup`接口替换tensorflow中的`tf.nn.embedding_lookup`接口。因此模型反向更新分为两部分：417~425行代码是对`model.py`内的模型部分的反向；427~439行代码是对sparse_feature作embedding操作部分的反向过程，根据是否开启`动态扩容`选择不同的参数计算梯度并更新权重。
+
+***
+
+#### 4. optimizer.py
+如上所述，模型反向过程分为`model.py`与`embedding`两部分；`model.py`可使用tf原生的优化器，`embedding`部分选择mxrec提供的`lazy_adam`或`lazy_adam_by_addr`优化器。`delay_loss_scale.py`包装`dense_optimizer`与`sparse_optimizer`并对其应用损失缩放技术，该技术主要作用于混合精度训练过程中。 
+
+```python
+import tensorflow as tf
+from delay_loss_scale import DenseLossScaleOptimizer, SparseLossScaleOptimizer
+from mx_rec.util.initialize import ConfigInitializer
+from mx_rec.optimizers.lazy_adam import create_hash_optimizer
+from mx_rec.optimizers.lazy_adam_by_addr import create_hash_optimizer_by_address
+
+
+def get_dense_and_sparse_optimizer(cfg):
+    dense_optimizer = tf.train.AdamOptimizer(learning_rate=cfg.learning_rate[0])
+    use_dynamic_expansion = ConfigInitializer.get_instance().use_dynamic_expansion
+    if use_dynamic_expansion:
+        sparse_optimizer = create_hash_optimizer_by_address(learning_rate=cfg.learning_rate[1])
+    else:
+        sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1])
+    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, 1)
+    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, 1)
+
+    return dense_optimizer, sparse_optimizer
+```
+
+
diff --git a/examples/WideDeep/criteo.py b/examples/WideDeep/criteo.py
new file mode 100644
index 00000000..9be82c35
--- /dev/null
+++ b/examples/WideDeep/criteo.py
@@ -0,0 +1,246 @@
+import os
+import stat
+import pickle
+import argparse
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from tqdm import tqdm
+
+NAMES = ['label', 'I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'I10', 'I11',
+         'I12', 'I13', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11',
+         'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22',
+         'C23', 'C24', 'C25', 'C26']
+
+def make_sub_file(lines, head, src_name, sub_dir_name, sub):
+    """Write sub-data.
+    Args:
+        :param lines: A list. Several pieces of data.
+        :param head: A string. ['label', 'I1', 'I2', ...].
+        :param src_name: A string. The name of data.
+        :param sub_dir_name: A string.
+        :param sub: A scalar(Int). Record the current number of sub file.
+    :return: sub + 1.
+    """
+    root_path, file_path = os.path.split(src_name)
+    file_name, suffix = file_path.split('.')
+    split_file_name = file_name + "_" + str(sub).zfill(2) + "." + suffix
+    split_file = os.path.join(root_path, sub_dir_name, split_file_name)
+    if not os.path.exists(os.path.join(root_path, sub_dir_name)):
+        os.mkdir(os.path.join(root_path, sub_dir_name))
+
+    modes = stat.S_IWUSR | stat.S_IRUSR
+    flags = os.O_WRONLY | os.O_TRUNC | os.O_CREAT
+    f = os.fdopen(os.open(split_file, flags, modes), 'w')
+    try:
+        f.writelines([head])
+        f.writelines(lines)
+        return sub + 1
+    finally:
+        os.close(f)
+
+def split_byline_count(filename, count, sub_dir_name):
+    """Split File.
+    Note: You can specify how many rows of data each sub file contains.
+    Args:
+        :param filename: A string.
+        :param count: A scalar(int).
+        :param sub_dir_name: A string.
+    :return:
+    """
+    f = open(filename, 'r')
+    try:
+        head = f.readline()
+        buf = []
+        sub = 1
+        for line in f:
+            buf.append(line)
+            if len(buf) == count:
+                sub = make_sub_file(buf, head, filename, sub_dir_name, sub)
+                buf = []
+        if len(buf) != 0:
+            make_sub_file(buf, head, filename, sub_dir_name, sub)
+    finally:
+        f.close()
+
+def get_split_file_path(parent_path=None, dataset_path=None, sample_num=4600000):
+    """Get the list of split file path.
+    Note: Either parent_path or dataset_path must be valid.
+    If exists dataset_path + "/split", parent_path = dataset_path + "/split".
+    Args:
+        :param parent_path: A string. split file's parent path.
+        :param dataset_path: A string.
+        :param sample_num: A int. The sample number of every split file.
+    :return: A list. [file1_path, file2_path, ...]
+    """
+    sub_dir_name = 'split'
+    if parent_path is None and dataset_path is None:
+        raise ValueError('Please give parent path or file path.')
+    if parent_path is None and os.path.exists(os.path.join(os.path.dirname(dataset_path), sub_dir_name)):
+        parent_path = os.path.join(os.path.dirname(dataset_path), sub_dir_name)
+    elif parent_path is None or not os.path.exists(parent_path):
+        split_byline_count(dataset_path, sample_num, sub_dir_name)
+        parent_path = os.path.join(os.path.dirname(dataset_path), sub_dir_name)
+    split_file_name = os.listdir(parent_path)
+    split_file_name.sort()
+    split_file_list = [parent_path + "/" + file_name for file_name in split_file_name if file_name[-3:] == 'txt']
+    return split_file_list
+
+def get_fea_map(fea_map_path=None, split_file_list=None):
+    """Get feature map.
+    Note: Either parent_path or dataset_path must be valid.
+    If exists dir(split_file_list[0]) + "/fea_map.pkl", fea_map_path is valid.
+    If fea_map_path is None and you want to build the feature map,
+    the default file path is the parent directory of split file + "fea_map.pkl".
+    Args:
+        :param fea_map_path: A string.
+        :param split_file_list: A list. [file1_path, file2_path, ...]
+    :return: A dict. {'C1':{}, 'C2':{}, ...}
+    """
+    if fea_map_path is None and split_file_list is None:
+        raise ValueError('Please give feature map path or split file list.')
+    if fea_map_path is None and os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl"):
+        fea_map_path = os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl")
+    if os.path.exists(fea_map_path) and fea_map_path[-3:] == 'pkl':
+        with open(fea_map_path, 'rb') as f:
+            fea_map = pickle.load(f)
+        return fea_map
+    fea_map = {}
+    for file_open in tqdm(split_file_list):
+        f = open(file_open)
+        for line in f:
+            row = line.strip('\n').split('\t')
+            for i in range(14, 40):
+                if row[i] == '':
+                    continue
+                name = NAMES[i]
+                fea_map.setdefault(name, {})
+                if fea_map[name].get(row[i]) is None:
+                    fea_map[name][row[i]] = len(fea_map[name])
+            for j in range(1, 14):
+                if row[j] == '':
+                    continue
+                name = NAMES[j]
+                fea_map.setdefault(name, {})
+                fea_map[name].setdefault('min', float(row[j]))
+                fea_map[name].setdefault('max', float(row[j]))
+                fea_map[name]['min'] = min(fea_map[name]['min'], float(row[j]))
+                fea_map[name]['max'] = max(fea_map[name]['max'], float(row[j]))
+        f.close()
+    for i in range(14, 40):
+        fea_map[NAMES[i]]['-1'] = len(fea_map[NAMES[i]])
+    fea_map_path = os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl")
+
+
+    modes = stat.S_IWUSR | stat.S_IRUSR
+    flags = os.O_WRONLY | os.O_TRUNC | os.O_CREAT
+    with os.fdopen(os.open(fea_map_path, flags, modes), 'wb') as fd:
+        pickle.dump(fea_map, fd, pickle.HIGHEST_PROTOCOL)
+
+    fd.close()
+    return fea_map
+
+def rec_kbins_discretizer(dat, n_bins, min_max_dict):
+    """Bin continuous data into intervals.
+    Note: The strategy is "uniform".
+    Args:
+        :param dat: A dataframe.
+        :param n_bins: A scalar(int).
+        :param min_max_dict: A dict such as {'min': , 'max': }.
+    :return: The new  dataframe.
+    """
+    features = dat.columns
+    n_features = len(features)
+    bin_edges = np.zeros(n_features, dtype=object)
+    for idx, feature in enumerate(features):
+        bin_edges[idx] = np.linspace(min_max_dict[feature]['min'], min_max_dict[feature]['max'], n_bins + 1)
+        rtol = 1.e-5
+        atol = 1.e-8
+        eps = atol + rtol * np.abs(dat[feature])
+        np.digitize(dat[feature] + eps, bin_edges[idx][1:])
+    return dat
+
+def convert_input2tfrd(in_file_path, out_file_path):
+    """
+    txt to tfrecords
+    """
+    def make_example(label_list, dense_feat_list, sparse_feat_list):
+        # '1.0' >> 1.0 >> 1
+        dense_feature = np.array(np.array(dense_feat_list, dtype=np.float32), dtype=np.int64).reshape(-1)
+        sparse_feature = np.array(np.array(sparse_feat_list, dtype=np.float32), dtype=np.int64).reshape(-1)
+        label = np.array(np.array(label_list, dtype=np.float32), dtype=np.int64).reshape(-1)
+        feature_dict = {"dense_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=dense_feature)),
+                        "sparse_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=sparse_feature)),
+                        "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label))
+                        }
+        example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+
+        return example
+
+    file_name = out_file_path + in_file_path[-12:-4] + '.tfrecords'
+    file_writer = tf.io.TFRecordWriter(file_name)
+
+    with open(in_file_path, encoding='utf-8') as file_in:
+
+        for i, line in tqdm(enumerate(file_in)):
+
+            line = line.strip('\n')
+            items = line.split('\t')
+            if len(items) != 40:
+                continue
+            label = int(items[0])
+            dense = items[1:14]
+            sparse = items[14:]
+
+            ex = make_example(label, dense, sparse)
+            serialized = ex.SerializeToString()
+            file_writer.write(serialized)
+
+        file_writer.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Get datasets')
+    parser.add_argument('--data_path')
+    parser.add_argument('--output_path')
+
+    args, _ = parser.parse_known_args()
+    data_path = args.data_path
+    output_path = args.output_path
+
+    # get txt_list
+    file_split_list = get_split_file_path(dataset_path=data_path)
+    # get feature_map
+    feature_map = get_fea_map(split_file_list=file_split_list)
+
+    for file in tqdm(file_split_list):
+
+        # read data
+        data_df = pd.read_csv(file, sep='\t', header=None, names=NAMES)
+        # name feature
+        sparse_features = ['C' + str(i) for i in range(1, 27)]
+        dense_features = ['I' + str(i) for i in range(1, 14)]
+        # data processing
+        data_df[sparse_features] = data_df[sparse_features].fillna('-1')
+        data_df[dense_features] = data_df[dense_features].fillna(0)
+        # sparse feature: mapping
+        for col in sparse_features:
+            data_df[col] = data_df[col].map(lambda x: feature_map[col][x])
+        # dense feature: Bin continuous data into intervals.
+        data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, feature_map)
+        # add offsets
+        slot_size_array = [1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001,
+                           1462, 585, 10131228, 2202609, 307, 25, 12519, 635, 5, 93147, 5685, 8351594, 3196,
+                           29, 14994, 5461307, 12, 5654, 2174, 5, 7046548, 19, 17, 286182, 106, 142573]
+        offset_size_list = np.cumsum([0] + slot_size_array[:-1])
+        for col_index in range(1, len(offset_size_list) + 1):
+            data_df.iloc[:, col_index] += offset_size_list[col_index - 1]
+        # save to txt
+        data_df.to_csv(file, sep='\t', index=False, header=False)
+        # txt to tfrecords
+        convert_input2tfrd(in_file_path=file, out_file_path=output_path)
+
+
+
+
+
diff --git a/examples/WideDeep/model/config.py b/examples/WideDeep/model/config.py
index 78115d61..fae850f9 100644
--- a/examples/WideDeep/model/config.py
+++ b/examples/WideDeep/model/config.py
@@ -85,8 +85,8 @@ class LearningRateScheduler:
             global_step < self.warmup_steps, lambda: lr_factor_warmup, lambda: poly_schedule_dense
         )
 
-        lr_sparse = self.base_lr_sparse * lr_factor_sparse
-        lr_dense = self.base_lr_dense * lr_factor_dense
+        lr_sparse = self.base_lr_sparse * lr_factor_constant
+        lr_dense = self.base_lr_dense * lr_factor_constant
         return lr_dense, lr_sparse
 
 
@@ -108,10 +108,10 @@ class Config:
         self.train_file_pattern = "train"
         self.test_file_pattern = "test"
 
-        self.batch_size = 8192
-        self.line_per_sample = 1024
-        self.train_epoch = 3
-        self.test_epoch = 1
+        self.batch_size = 4096
+        self.line_per_sample = 1
+        self.train_epoch = 1
+        self.test_epoch = 9
         self.perform_shuffle = False
 
         self.key_type = tf.int64
@@ -124,12 +124,10 @@ class Config:
         self.field_num = 26
         self.send_count = 46000 // self.rank_size
 
-        self.emb_dim = 128
+        self.emb_dim = 8
         self.hashtable_threshold = 1
 
         self.USE_PIPELINE_TEST = False
-        # False indicates use SGD optimizer, else use LazyAdam. If True, is incompatible with dynamic_expansion
-        self.use_lazy_adam_optimizer = False
 
         # 动态学习率
         GLOBAL_BATCH_SIZE = 8192 * 8
@@ -140,8 +138,8 @@ class Config:
         ]
         self.global_step = tf.Variable(0, trainable=False)
         _lr_scheduler = LearningRateScheduler(
-            28.443,
-            33.71193,
+            0.001,
+            0.001,
             LR_SCHEDULE_STEPS[0],
             LR_SCHEDULE_STEPS[1],
             LR_SCHEDULE_STEPS[2],
@@ -154,7 +152,7 @@ class Config:
             raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
 
         if self.cache_mode == CacheModeEnum.HBM.value:
-            self.dev_vocab_size = 24_000_000 * self.rank_size
+            self.dev_vocab_size = 14_000_000 * self.rank_size
             self.host_vocab_size = 0
         elif self.cache_mode == CacheModeEnum.DDR.value:
             self.dev_vocab_size = 500_000 * self.rank_size
diff --git a/examples/WideDeep/model/delay_loss_scale.py b/examples/WideDeep/model/delay_loss_scale.py
index 01bb0d8f..a99a2db3 100644
--- a/examples/WideDeep/model/delay_loss_scale.py
+++ b/examples/WideDeep/model/delay_loss_scale.py
@@ -17,48 +17,32 @@
 import tensorflow as tf
 from tensorflow.python.training import optimizer
 
-from config import Config
-
 
 class DenseLossScaleOptimizer:
-    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
+    def __init__(self, opt, loss_scale):
         if not isinstance(opt, optimizer.Optimizer):
             raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
         self._optimizer = opt
         self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        _update_lr_loss_scale(self._optimizer, loss_scale)
+        self._optimizer._learning_rate = self._optimizer._lr / self._loss_scale
 
     def compute_gradients(self, loss, var_list=None):
-        return self._optimizer.compute_gradients(loss * self._loss_scale, var_list=var_list)
+        return self._optimizer.compute_gradients(loss*self._loss_scale, var_list=var_list)
 
     def apply_gradients(self, avg_grads):
         return self._optimizer.apply_gradients(avg_grads)
 
 
 class SparseLossScaleOptimizer:
-    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
+    def __init__(self, opt, loss_scale):
         if not isinstance(opt, optimizer.Optimizer):
             raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
         self._optimizer = opt
         self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        _update_lr_loss_scale(self._optimizer, loss_scale)
+        self._optimizer._learning_rate = self._optimizer._lr / self._loss_scale
 
     def compute_gradients(self, loss, var_list=None):
-        return tf.gradients(loss * self._loss_scale, var_list)
+        return tf.gradients(loss*self._loss_scale, var_list)
 
     def apply_gradients(self, grads_and_vars):
-        return self._optimizer.apply_gradients(grads_and_vars)
-
-
-def _update_lr_loss_scale(opt, loss_scale):
-    if loss_scale <= 0:
-        raise RuntimeError("the loss_scale must be greater than zero.")
-    loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-    if hasattr(opt, "_lr"):
-        # LazyAdam or Adam optimizer
-        opt._lr = opt._lr / loss_scale
-    elif hasattr(opt, "_learning_rate"):
-        # SGD optimizer
-        opt._learning_rate = opt._learning_rate / loss_scale
-    else:
-        raise RuntimeError("`opt` should have a `_learning_rate` or `_lr` named field.")
+        return self._optimizer.apply_gradients(grads_and_vars)
\ No newline at end of file
diff --git a/examples/WideDeep/model/main_mxrec.py b/examples/WideDeep/model/main_mxrec.py
index 51ed7c4a..088aac84 100644
--- a/examples/WideDeep/model/main_mxrec.py
+++ b/examples/WideDeep/model/main_mxrec.py
@@ -16,6 +16,7 @@
 
 import os
 import shutil
+import collections
 import time
 import warnings
 import random
@@ -34,7 +35,7 @@ from mx_rec.core.asc.manager import start_asc_pipeline
 from mx_rec.core.embedding import create_table, sparse_lookup
 from mx_rec.core.feature_process import EvictHook
 from mx_rec.graph.modifier import modify_graph_and_start_emb_cache, GraphModifierHook
-from mx_rec.constants.constants import ASCEND_TIMESTAMP
+from mx_rec.constants.constants import ASCEND_TIMESTAMP, LIBREC_EOS_OPS_SO
 from mx_rec.util.initialize import ConfigInitializer, init, terminate_config_initializer
 from mx_rec.util.ops import import_host_pipeline_ops
 import mx_rec.util as mxrec_util
@@ -57,7 +58,7 @@ def add_timestamp_func(batch):
     return batch
 
 
-def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph, is_use_faae=False):
+def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph, is_use_faae=False, **kwargs):
     if config.USE_PIPELINE_TEST:
         num_parallel = 1
     else:
@@ -68,7 +69,7 @@ def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph,
             # Extract features using the keys set during creation
             'label': tf.compat.v1.FixedLenFeature(shape=(config.line_per_sample,), dtype=tf.int64),
             'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(26 * config.line_per_sample,), dtype=tf.int64),
-            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * config.line_per_sample,), dtype=tf.float32),
+            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * config.line_per_sample,), dtype=tf.int64),
         }
         sample = tf.compat.v1.parse_single_example(data_record, features)
         return sample
@@ -76,7 +77,6 @@ def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph,
     def reshape_fn(batch):
         batch['label'] = tf.reshape(batch['label'], [-1, 1])
         batch['dense_feature'] = tf.reshape(batch['dense_feature'], [-1, 13])
-        batch['dense_feature'] = tf.math.log(batch['dense_feature'] + 3.0)
         batch['sparse_feature'] = tf.reshape(batch['sparse_feature'], [-1, 26])
         return batch
 
@@ -97,10 +97,24 @@ def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph,
     dataset = dataset.map(extract_fn, num_parallel_calls=num_parallel).batch(batch_size,
                                                                              drop_remainder=True)
     dataset = dataset.map(reshape_fn, num_parallel_calls=num_parallel)
+
+    def map_fn(batch):
+        new_batch = batch
+        new_batch['sparse_feature'] = tf.concat([batch['dense_feature'], batch['sparse_feature']], axis=1)
+        return new_batch
+    dataset = dataset.map(map_fn, num_parallel_calls=num_parallel)
+
     if is_use_faae:
         dataset = dataset.map(add_timestamp_func)
 
     if not MODIFY_GRAPH_FLAG:
+
+        # Enable EOSDataset manually.
+        librec = import_host_pipeline_ops(LIBREC_EOS_OPS_SO)
+        channel_id = 0 if is_training else 1
+        # 此处eos_map的调用必须先于insert_func,避免多卡数据不均匀的情况
+        dataset = dataset.eos_map(librec, channel_id, kwargs.get("max_train_steps", max_train_steps),
+                                  kwargs.get("max_eval_steps", eval_steps))
         insert_fn = get_asc_insert_func(tgt_key_specs=feature_spec_list, is_training=is_training, dump_graph=dump_graph)
         dataset = dataset.map(insert_fn)
 
@@ -111,32 +125,69 @@ def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph,
     return batch, iterator
 
 
-def model_forward(feature_list, hash_table_list, batch, is_train, modify_graph):
-    embedding_list = []
+
+
+def model_forward(model_args):
+    feature_list = model_args.feature_list
+    wide_hash_table_list = model_args.wide_hash_table_list
+    deep_hash_table_list = model_args.deep_hash_table_list
+    batch = model_args.batch
+    is_train =  model_args.is_train
+    modify_graph = model_args.modify_graph
+    is_use_faae = model_args.is_use_faae
+
+    wide_embedding_list = []
+    deep_embedding_list = []
+    wide_feature_list = []
+    deep_feature_list = []
+    if is_use_faae:
+        feature_list_copy = feature_list[:-1]
+    else:
+        feature_list_copy = feature_list
+
+    for i, item in enumerate(feature_list_copy):
+        if i % 2 == 0:
+            wide_feature_list.append(item)
+        else:
+            deep_feature_list.append(item)
+
     logger.debug(f"In model_forward function, is_train: {is_train}, feature_list: {len(feature_list)}, "
-                 f"hash_table_list: {len(hash_table_list)}")
-    for feature, hash_table in zip(feature_list, hash_table_list):
+                 f"wide_hash_table_list: {len(wide_hash_table_list)}, "
+                 f"deep_hash_table_list: {len(deep_hash_table_list)}")
+
+    # wide
+    for wide_feature, wide_hash_table in zip(wide_feature_list, wide_hash_table_list):
         if MODIFY_GRAPH_FLAG:
-            feature = batch["sparse_feature"]
-        embedding = sparse_lookup(hash_table, feature, cfg.send_count, dim=None, is_train=is_train,
-                                  name="user_embedding_lookup", modify_graph=modify_graph, batch=batch,
+            wide_feature = batch["sparse_feature"]
+        wide_embedding = sparse_lookup(wide_hash_table, wide_feature, cfg.send_count, dim=None, is_train=is_train,
+                                  name="wide_embedding_lookup", modify_graph=modify_graph, batch=batch,
                                   access_and_evict_config=None)
-        embedding_list.append(embedding)
+        wide_embedding_list.append(wide_embedding)
 
-    if len(embedding_list) == 1:
-        emb = embedding_list[0]
-    elif len(embedding_list) > 1:
-        emb = tf.reduce_sum(embedding_list, axis=0, keepdims=False)
+    # deep
+    for deep_feature, deep_hash_table in zip(deep_feature_list, deep_hash_table_list):
+        if MODIFY_GRAPH_FLAG:
+            deep_feature = batch["sparse_feature"]
+        deep_embedding = sparse_lookup(deep_hash_table, deep_feature, cfg.send_count, dim=None, is_train=is_train,
+                                  name="deep_embedding_lookup", modify_graph=modify_graph, batch=batch,
+                                  access_and_evict_config=None)
+        deep_embedding_list.append(deep_embedding)
+
+    if len(wide_embedding_list) == 1:
+        wide_emb = wide_embedding_list[0]
+        deep_emb = deep_embedding_list[0]
+    elif len(wide_embedding_list) > 1:
+        wide_emb = tf.reduce_sum(wide_embedding_list, axis=0, keepdims=False)
+        deep_emb = tf.reduce_sum(deep_embedding_list, axis=0, keepdims=False)
     else:
         raise ValueError("the length of embedding_list must be greater than or equal to 1.")
     my_model = MyModel()
-    model_output = my_model.build_model(embedding=emb,
-                                        dense_feature=batch["dense_feature"],
-                                        label=batch["label"],
-                                        is_training=is_train,
-                                        seed=dense_hashtable_seed)
-    return model_output
 
+    BuildModel = collections.namedtuple("BuildModel", ["wide_embedding", "deep_embedding", "label", "is_training",
+                                                       "seed", "dropout_rate", "batch_norm"])
+    build_model_args = BuildModel(wide_emb, deep_emb, batch["label"], is_train, dense_hashtable_seed, 0.5, False)
+    model_output = my_model.build_model(build_model_args)
+    return model_output
 
 def evaluate():
     print("read_test dataset")
@@ -236,13 +287,20 @@ def create_feature_spec_list(use_timestamp=False):
         access_threshold = 1000
         eviction_threshold = 180
 
-    feature_spec_list = [FeatureSpec("sparse_feature", table_name="sparse_embeddings", batch_size=cfg.batch_size,
+    feature_spec_list = [FeatureSpec("sparse_feature", table_name="wide_embeddings", batch_size=cfg.batch_size,
+                                     access_threshold=access_threshold, eviction_threshold=eviction_threshold),
+                         FeatureSpec("sparse_feature", table_name="deep_embeddings", batch_size=cfg.batch_size,
                                      access_threshold=access_threshold, eviction_threshold=eviction_threshold)]
+
     if use_multi_lookup:
-        feature_spec_list.append(FeatureSpec("sparse_feature", table_name="sparse_embeddings",
+        feature_spec_list.extend([FeatureSpec("sparse_feature", table_name="wide_embeddings",
+                                             batch_size=cfg.batch_size,
+                                             access_threshold=access_threshold,
+                                             eviction_threshold=eviction_threshold),
+                                  FeatureSpec("sparse_feature", table_name="deep_embeddings",
                                              batch_size=cfg.batch_size,
                                              access_threshold=access_threshold,
-                                             eviction_threshold=eviction_threshold))
+                                             eviction_threshold=eviction_threshold)])
     if use_timestamp:
         feature_spec_list.append(FeatureSpec("timestamp", is_timestamp=True))
     return feature_spec_list
@@ -281,8 +339,9 @@ if __name__ == "__main__":
     rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
     rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
     interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
-    train_steps = 10000
-    eval_steps = 1360
+    max_train_steps = 1270
+    train_steps = 1120
+    eval_steps = 1080
 
     try:
         use_dynamic_expansion = bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0)))
@@ -315,9 +374,11 @@ if __name__ == "__main__":
         feature_spec_list_eval = create_feature_spec_list(use_timestamp=False)
 
     train_batch, train_iterator = make_batch_and_iterator(cfg, feature_spec_list_train, is_training=True,
-                                                          dump_graph=True, is_use_faae=use_faae)
+                                                          dump_graph=True, is_use_faae=use_faae,
+                                                          max_train_steps=max_train_steps, max_eval_steps=eval_steps)
     eval_batch, eval_iterator = make_batch_and_iterator(cfg, feature_spec_list_eval, is_training=False,
-                                                        dump_graph=False, is_use_faae=use_faae)
+                                                        dump_graph=False, is_use_faae=use_faae,
+                                                        max_train_steps=max_train_steps, max_eval_steps=eval_steps)
     logger.info(f"train_batch: {train_batch}")
 
     if use_faae:
@@ -325,60 +386,75 @@ if __name__ == "__main__":
 
     optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
 
-    # note: variance_scaling_initializer only support HBM mode
-    emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed) \
-        if cfg.cache_mode != "HBM" or use_dynamic_expansion else \
-        tf.compat.v1.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=sparse_hashtable_seed)
-    sparse_hashtable = create_table(
+    # 创表操作
+    wide_emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed)
+    deep_emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed)
+
+    sparse_hashtable_wide = create_table(
         key_dtype=cfg.key_type,
         dim=tf.TensorShape([cfg.emb_dim]),
-        name="sparse_embeddings",
-        emb_initializer=emb_initializer,
+        name="wide_embeddings",
+        emb_initializer=wide_emb_initializer,
         **cfg.get_emb_table_cfg()
     )
+
+    sparse_hashtable_deep = create_table(
+        key_dtype=cfg.key_type,
+        dim=tf.TensorShape([cfg.emb_dim]),
+        name="deep_embeddings",
+        emb_initializer=deep_emb_initializer,
+        **cfg.get_emb_table_cfg()
+    )
+
     if use_faae:
         tf.compat.v1.add_to_collection(ASCEND_TIMESTAMP, train_batch["timestamp"])
 
-    sparse_hashtable_list = [sparse_hashtable, sparse_hashtable] if use_multi_lookup else [sparse_hashtable]
-    train_model = model_forward(feature_spec_list_train, sparse_hashtable_list, train_batch,
-                                is_train=True, modify_graph=MODIFY_GRAPH_FLAG)
-    eval_model = model_forward(feature_spec_list_eval, sparse_hashtable_list, eval_batch,
-                               is_train=False, modify_graph=MODIFY_GRAPH_FLAG)
-
-    dense_variables, sparse_variables = get_dense_and_sparse_variable()
-    trainable_varibles = []
-    trainable_varibles.extend(dense_variables)
-    if use_dynamic_expansion:
-        trainable_varibles.append(tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)[0])
-    else:
-        trainable_varibles.extend(sparse_variables)
+    # 一表多查
+    wide_hashtable_list = [sparse_hashtable_wide, sparse_hashtable_wide] if use_multi_lookup else \
+                          [sparse_hashtable_wide]
+    deep_hashtable_list = [sparse_hashtable_deep, sparse_hashtable_deep] if use_multi_lookup else \
+                          [sparse_hashtable_deep]
+
+
+    Forward = collections.namedtuple("Forward", ["feature_list", "wide_hash_table_list", "deep_hash_table_list",
+                                                 "batch", "is_train", "modify_graph", "is_use_faae"])
+    train_forward_args = Forward(feature_spec_list_train, wide_hashtable_list, deep_hashtable_list, train_batch,
+                                True, MODIFY_GRAPH_FLAG, use_faae)
+    eval_forward_args = Forward(feature_spec_list_eval, wide_hashtable_list, deep_hashtable_list, eval_batch,
+                                False, MODIFY_GRAPH_FLAG, use_faae)
+    train_model = model_forward(train_forward_args)
+    eval_model = model_forward(eval_forward_args)
+
+    train_variables, emb_variables = get_dense_and_sparse_variable()
+
     rank_size = mxrec_util.communication.hccl_ops.get_rank_size()
     train_ops = []
     # multi task training
-    for loss, (dense_optimizer, sparse_optimizer) in zip([train_model.get("loss")], optimizer_list):
-        # do dense optimization
-        grads = dense_optimizer.compute_gradients(loss, var_list=trainable_varibles)
+    for loss, (model_optimizer, emb_optimizer) in zip([train_model.get("loss")], optimizer_list):
+        # do model optimization
+        grads = model_optimizer.compute_gradients(loss, var_list=train_variables)
         avg_grads = []
-        for grad, var in grads[:-1]:
+        for grad, var in grads:
             if rank_size > 1:
                 grad = hccl_ops.allreduce(grad, "sum") if grad is not None else None
             if grad is not None:
                 avg_grads.append((grad / 8.0, var))
         # apply gradients: update variables
-        train_ops.append(dense_optimizer.apply_gradients(avg_grads))
+        train_ops.append(model_optimizer.apply_gradients(avg_grads))
 
         if use_dynamic_expansion:
             train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
-            # do sparse optimization by addr
-            sparse_grads = list(grads[-1])  # local_embedding
+            train_emb_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)
+            # do embedding optimization by addr
+            sparse_grads = emb_optimizer.compute_gradients(loss, train_emb_list)  # local_embedding
             grads_and_vars = [(grad, address) for grad, address in zip(sparse_grads, train_address_list)]
-            train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
+            train_ops.append(emb_optimizer.apply_gradients(grads_and_vars))
         else:
-            # do sparse optimization
-            sparse_grads = list(grads[-1])
+            # do embedding optimization
+            sparse_grads = emb_optimizer.compute_gradients(loss, emb_variables)
             print("sparse_grads_tensor:", sparse_grads)
-            grads_and_vars = [(grad, variable) for grad, variable in zip(sparse_grads, sparse_variables)]
-            train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
+            grads_and_vars = [(grad, variable) for grad, variable in zip(sparse_grads, emb_variables)]
+            train_ops.append(emb_optimizer.apply_gradients(grads_and_vars))
 
     # 动态学习率更新
     train_ops.extend([cfg.global_step.assign(cfg.global_step + 1), cfg.learning_rate[0], cfg.learning_rate[1]])
@@ -451,8 +527,11 @@ if __name__ == "__main__":
         logger.info(f"step: {i * iteration_per_loop}; lr: {lr}")
         logger.info(f"global step: {global_step}")
         logger.info(f"step: {i * iteration_per_loop}; current sess cost time: {cost_time:.10f}; current QPS: {qps}")
-        logger.info(f"training at step:{i * iteration_per_loop}, table[{sparse_hashtable.table_name}], "
-                    f"table size:{sparse_hashtable.size()}, table capacity:{sparse_hashtable.capacity()}")
+        logger.info(f"training at step:{i * iteration_per_loop}, "
+                    f"table[{sparse_hashtable_wide.table_name}], "
+                    f"table size:{sparse_hashtable_wide.size()}, table capacity:{sparse_hashtable_wide.capacity()}, "
+                    f"table[{sparse_hashtable_deep.table_name}], "
+                    f"table size:{sparse_hashtable_deep.size()}, table capacity:{sparse_hashtable_deep.capacity()}")
 
         if i % (train_steps // iteration_per_loop) == 0:
             if interval is not None:
diff --git a/examples/WideDeep/model/model.py b/examples/WideDeep/model/model.py
index 037fb276..61b1fdea 100644
--- a/examples/WideDeep/model/model.py
+++ b/examples/WideDeep/model/model.py
@@ -34,61 +34,54 @@ class MyModel:
         self._loss_fn = None
         self.is_training = None
 
-    @classmethod
-    def _dot_interaction(cls, _input):
-        num_features = tf.shape(_input)[1]
-        batch_size = tf.shape(_input)[0]
-        xactions = tf.matmul(_input, _input, transpose_b=True)
-        ones = tf.ones_like(xactions, dtype=tf.float32)
-        upper_tri_mask = tf.linalg.band_part(ones, 0, -1)
-
-        activations = tf.where(condition=tf.cast(upper_tri_mask, tf.bool),
-                                x=tf.zeros_like(xactions),
-                                y=xactions)
-        out_dim = num_features * num_features
-        activations = tf.reshape(activations, (batch_size, out_dim))
-        return activations
-
-    def build_model(self,
-                    embedding=None,
-                    dense_feature=None,
-                    label=None,
-                    is_training=True,
-                    seed=None):
-        with tf.variable_scope("mlp", reuse=tf.AUTO_REUSE):
+    def build_model(self,model_args):
+        wide_embedding = model_args.wide_embedding
+        deep_embedding = model_args.deep_embedding
+        label = model_args.label
+        is_training = model_args.is_training
+        seed = model_args.seed
+        dropout_rate = model_args.dropout_rate
+        batch_norm = model_args.batch_norm
+
+        with tf.variable_scope("wide_deep", reuse=tf.AUTO_REUSE):
             self._loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
             self.is_training = is_training
-            dense_embedding_vec = self.bottom_stack(dense_feature, seed)
-            dense_embedding = tf.expand_dims(dense_embedding_vec, 1)
-            interaction_args = tf.concat([dense_embedding, embedding], axis=1)
-            interaction_output = self._dot_interaction(interaction_args)
-            feature_interaction_output = tf.concat([dense_embedding_vec, interaction_output], axis=1)
-            # (8192, 857)
-            logits = self.top_stack(feature_interaction_output, seed)
-            loss = self._loss_fn(label, logits)
-            prediction = tf.sigmoid(logits)
-            trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='mlp')
+
+            # wide
+            batch_size, wide_num, wide_emb_dim = wide_embedding.shape
+            wide_input = tf.reshape(wide_embedding[:, :, 0], shape=(batch_size, wide_num * 1))
+            wide_output = tf.reshape(tf.reduce_sum(wide_input, axis=1), shape=(-1, 1))
+
+            # deep
+            batch_size, deep_num, deep_emb_dim = deep_embedding.shape
+            deep_input = tf.reshape(deep_embedding, shape=(batch_size, deep_num * deep_emb_dim))
+
+            ## MLP
+            hidden_units = [256, 128, 64]
+            net = deep_input
+            for i, unit in enumerate(hidden_units):
+
+                net = tf.layers.dense(net, units=unit, activation='relu', name=f'hidden_layer_{i}',
+                                      kernel_initializer=tf.glorot_uniform_initializer(seed=seed),
+                                      bias_initializer=tf.zeros_initializer())
+
+                if dropout_rate is not None and 0.0 < dropout_rate < 1.0:
+                    net = tf.layers.dropout(net, dropout_rate, training=self.is_training)
+                if batch_norm:
+                    net = tf.layers.batch_normalization(net, training=self.is_training)
+
+            deep_output = tf.layers.dense(net, units=1, activation=None, name='deep_output',
+                                          kernel_initializer=tf.glorot_uniform_initializer(seed=seed),
+                                          bias_initializer=tf.zeros_initializer())
+
+            total_logits = 0.5 * tf.add(wide_output, deep_output, name='total_logits')
+            loss = self._loss_fn(label, total_logits)
+            prediction = tf.sigmoid(total_logits)
+            trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='wide_deep')
             return {LOSS_OP_NAME: loss,
                     PRED_OP_NAME: prediction,
                     LABEL_OP_NAME: label,
                     VAR_LIST: trainable_variables}
 
-    def bottom_stack(self, _input, seed):
-        dnn1 = tf.layers.dense(_input, 512, activation='relu', name='bs1',
-                               kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed),
-                               bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed),
-                               kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
-        dnn2 = tf.layers.dense(dnn1, 256, activation='relu', name='bs2', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
-        dnn3 = tf.layers.dense(dnn2, 128, activation='relu', name='bs3', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
-        return dnn3
-
-    def top_stack(self, _input, seed):
-        dnn1 = tf.layers.dense(_input, 1024, activation='relu', name='ts1', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
-        dnn2 = tf.layers.dense(dnn1, 1024, activation='relu', name='ts2', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
-        dnn3 = tf.layers.dense(dnn2, 512, activation='relu', name='ts3', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
-        dnn4 = tf.layers.dense(dnn3, 256, activation='relu', name='ts4', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
-        dnn5 = tf.layers.dense(dnn4, 1, activation=None, name='ts5', kernel_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), bias_initializer=tf.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=seed), kernel_regularizer=tf.contrib.layers.l1_regularizer(1e-2))
-        return dnn5
-
 
 my_model = MyModel()
diff --git a/examples/WideDeep/model/optimizer.py b/examples/WideDeep/model/optimizer.py
index 18dbe288..1a781a01 100644
--- a/examples/WideDeep/model/optimizer.py
+++ b/examples/WideDeep/model/optimizer.py
@@ -15,32 +15,20 @@
 # ==============================================================================
 
 import tensorflow as tf
-
 from delay_loss_scale import DenseLossScaleOptimizer, SparseLossScaleOptimizer
-from gradient_descent_w import create_hash_optimizer
 from mx_rec.util.initialize import ConfigInitializer
-from mx_rec.optimizers.gradient_descent_by_addr import create_hash_optimizer_by_addr
-from mx_rec.optimizers import lazy_adam
+from mx_rec.optimizers.lazy_adam import create_hash_optimizer
+from mx_rec.optimizers.lazy_adam_by_addr import create_hash_optimizer_by_address
 
 
 def get_dense_and_sparse_optimizer(cfg):
+    dense_optimizer = tf.train.AdamOptimizer(learning_rate=cfg.learning_rate[0])
     use_dynamic_expansion = ConfigInitializer.get_instance().use_dynamic_expansion
-    if cfg.use_lazy_adam_optimizer:
-        if use_dynamic_expansion:
-            raise RuntimeError("model is incompatible with dynamic_expansion when use lazy_adam optimizer.")
-        # use lazy_adam optimizer
-        dense_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=cfg.learning_rate[0])
-        sparse_optimizer = lazy_adam.create_hash_optimizer(learning_rate=cfg.learning_rate[1])
-        loss_scale = 65536
+    if use_dynamic_expansion:
+        sparse_optimizer = create_hash_optimizer_by_address(learning_rate=cfg.learning_rate[1])
     else:
-        # use SGD optimizer
-        dense_optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=cfg.learning_rate[0])
-        if use_dynamic_expansion:
-            sparse_optimizer = create_hash_optimizer_by_addr(learning_rate=cfg.learning_rate[1], weight_decay=0.0001)
-        else:
-            sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1], weight_decay=0.0001)
-        loss_scale = 1024
-    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, loss_scale)
-    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, loss_scale)
+        sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1])
+    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, 1)
+    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, 1)
 
     return dense_optimizer, sparse_optimizer
-- 
Gitee


From 91aa31c4db18d261a98cc34ae1d432a4dbb34643 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Mon, 3 Jun 2024 19:03:20 +0800
Subject: [PATCH 190/302] =?UTF-8?q?WideDeep=E6=A8=A1=E5=9E=8B=20=E8=BF=81?=
 =?UTF-8?q?=E7=A7=BB=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9cleancode=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/README_WD.md              |  7 ++---
 examples/WideDeep/criteo.py                 |  5 ++++
 examples/WideDeep/model/delay_loss_scale.py | 29 ++++++++++++++++-----
 examples/WideDeep/model/main_mxrec.py       |  9 +++----
 examples/WideDeep/model/model.py            |  2 +-
 examples/WideDeep/model/optimizer.py        |  5 ++--
 6 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/examples/WideDeep/README_WD.md b/examples/WideDeep/README_WD.md
index 261861f7..beb592c9 100644
--- a/examples/WideDeep/README_WD.md
+++ b/examples/WideDeep/README_WD.md
@@ -135,7 +135,7 @@ bash run.sh {so_path} {mx_rec_package_path} {hccl_cfg_json} {dlrm_criteo_data_pa
 ***
 ## 模型迁移
 
-**迁移思路：** 参考开源项目，在现有已适配好的dlrm模型框架下，改动相关代码逻辑，完成Wide&deep模型的适配。  
+**迁移思路：** 在现有已适配好的dlrm模型框架下，改动相关代码逻辑，完成Wide&deep模型的适配。**核心：根据开源项目model代码修改`model.py`；数据处理操作一部分放入`criteo.py`,一部分放入`main_mxrec.py`中`make_batch_and_iterator()`内；`main_mxrec.py`中其他相关代码改动主要是为了适配mxrec提供的相关特性。**  
 
 下文所提到的`动态扩容`、`动态shape`、`自动改图`、`一表多查`是mxrec提供的相关特性，开关选项见`run.sh`。
 
@@ -164,6 +164,7 @@ export USE_MODIFY_GRAPH=0       # 0：feature spec模式；1：自动改图模
 
 ***
 
+### 代码改动说明
 #### 1. config.py
 实验超参数配置如下：取消动态学习率逻辑，学习率固定为0.001。
 
@@ -438,7 +439,7 @@ def model_forward(feature_list, wide_hash_table_list, deep_hash_table_list, batc
                                         dropout_rate=0.5)
     return model_output
 ```
-该函数是前向传播函数，主要包括sparse_feature的embedding操作（查表）与model前向操作。130~141行代码是预处理`sparse_lookup`传参的逻辑。147~162行代码对应开源项目中wide部分`self.linear`与deep部分`self.embed_layers`对39个特征作embedding的逻辑。164~171行是配置mxrec中`一表多查`特性的逻辑。
+该函数是前向传播函数，主要包括sparse_feature的embedding操作（查表）与model前向操作。130-141行代码是预处理`sparse_lookup`传参的逻辑。147-162行代码对应开源项目中wide部分`self.linear`与deep部分`self.embed_layers`对39个特征作embedding的逻辑。164-171行是配置mxrec中`一表多查`特性的逻辑。
 
 ***
 3.3 创表操作
@@ -536,7 +537,7 @@ for loss, (model_optimizer, emb_optimizer) in zip([train_model.get("loss")], opt
 # 动态学习率更新
 train_ops.extend([cfg.global_step.assign(cfg.global_step + 1), cfg.learning_rate[0], cfg.learning_rate[1]])
 ```
-410~442行代码是模型的反向过程操作。mxRec对推荐模型中sparse_feature的创表查表操作作了加速，使用`create_table`与`sparse_lookup`接口替换tensorflow中的`tf.nn.embedding_lookup`接口。因此模型反向更新分为两部分：417~425行代码是对`model.py`内的模型部分的反向；427~439行代码是对sparse_feature作embedding操作部分的反向过程，根据是否开启`动态扩容`选择不同的参数计算梯度并更新权重。
+410-442行代码是模型的反向过程操作。mxRec对推荐模型中sparse_feature的创表查表操作作了加速，使用`create_table`与`sparse_lookup`接口替换tensorflow中的`tf.nn.embedding_lookup`接口。因此模型反向更新分为两部分：417-425行代码是对`model.py`内的模型部分的反向；427-439行代码是对sparse_feature作embedding操作部分的反向过程，根据是否开启`动态扩容`选择不同的参数计算梯度并更新权重。
 
 ***
 
diff --git a/examples/WideDeep/criteo.py b/examples/WideDeep/criteo.py
index 9be82c35..055c41ec 100644
--- a/examples/WideDeep/criteo.py
+++ b/examples/WideDeep/criteo.py
@@ -12,6 +12,7 @@ NAMES = ['label', 'I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'I10', '
          'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22',
          'C23', 'C24', 'C25', 'C26']
 
+
 def make_sub_file(lines, head, src_name, sub_dir_name, sub):
     """Write sub-data.
     Args:
@@ -39,6 +40,7 @@ def make_sub_file(lines, head, src_name, sub_dir_name, sub):
     finally:
         os.close(f)
 
+
 def split_byline_count(filename, count, sub_dir_name):
     """Split File.
     Note: You can specify how many rows of data each sub file contains.
@@ -86,6 +88,7 @@ def get_split_file_path(parent_path=None, dataset_path=None, sample_num=4600000)
     split_file_list = [parent_path + "/" + file_name for file_name in split_file_name if file_name[-3:] == 'txt']
     return split_file_list
 
+
 def get_fea_map(fea_map_path=None, split_file_list=None):
     """Get feature map.
     Note: Either parent_path or dataset_path must be valid.
@@ -140,6 +143,7 @@ def get_fea_map(fea_map_path=None, split_file_list=None):
     fd.close()
     return fea_map
 
+
 def rec_kbins_discretizer(dat, n_bins, min_max_dict):
     """Bin continuous data into intervals.
     Note: The strategy is "uniform".
@@ -160,6 +164,7 @@ def rec_kbins_discretizer(dat, n_bins, min_max_dict):
         np.digitize(dat[feature] + eps, bin_edges[idx][1:])
     return dat
 
+
 def convert_input2tfrd(in_file_path, out_file_path):
     """
     txt to tfrecords
diff --git a/examples/WideDeep/model/delay_loss_scale.py b/examples/WideDeep/model/delay_loss_scale.py
index a99a2db3..be5b9d58 100644
--- a/examples/WideDeep/model/delay_loss_scale.py
+++ b/examples/WideDeep/model/delay_loss_scale.py
@@ -17,32 +17,47 @@
 import tensorflow as tf
 from tensorflow.python.training import optimizer
 
+from config import Config
 
 class DenseLossScaleOptimizer:
-    def __init__(self, opt, loss_scale):
+    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
         if not isinstance(opt, optimizer.Optimizer):
             raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
         self._optimizer = opt
         self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        self._optimizer._learning_rate = self._optimizer._lr / self._loss_scale
+        _update_lr_loss_scale(self._optimizer, loss_scale)
 
     def compute_gradients(self, loss, var_list=None):
-        return self._optimizer.compute_gradients(loss*self._loss_scale, var_list=var_list)
+        return self._optimizer.compute_gradients(loss * self._loss_scale, var_list=var_list)
 
     def apply_gradients(self, avg_grads):
         return self._optimizer.apply_gradients(avg_grads)
 
 
 class SparseLossScaleOptimizer:
-    def __init__(self, opt, loss_scale):
+    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
         if not isinstance(opt, optimizer.Optimizer):
             raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
         self._optimizer = opt
         self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        self._optimizer._learning_rate = self._optimizer._lr / self._loss_scale
+        _update_lr_loss_scale(self._optimizer, loss_scale)
 
     def compute_gradients(self, loss, var_list=None):
-        return tf.gradients(loss*self._loss_scale, var_list)
+        return tf.gradients(loss * self._loss_scale, var_list)
 
     def apply_gradients(self, grads_and_vars):
-        return self._optimizer.apply_gradients(grads_and_vars)
\ No newline at end of file
+        return self._optimizer.apply_gradients(grads_and_vars)
+
+
+def _update_lr_loss_scale(opt, loss_scale):
+    if loss_scale <= 0:
+        raise RuntimeError("the loss_scale must be greater than zero.")
+    loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
+    if hasattr(opt, "_lr"):
+        # LazyAdam or Adam optimizer
+        opt._lr = opt._lr / loss_scale
+    elif hasattr(opt, "_learning_rate"):
+        # SGD optimizer
+        opt._learning_rate = opt._learning_rate / loss_scale
+    else:
+        raise RuntimeError("`opt` should have a `_learning_rate` or `_lr` named field.")
\ No newline at end of file
diff --git a/examples/WideDeep/model/main_mxrec.py b/examples/WideDeep/model/main_mxrec.py
index 088aac84..37663b14 100644
--- a/examples/WideDeep/model/main_mxrec.py
+++ b/examples/WideDeep/model/main_mxrec.py
@@ -125,14 +125,12 @@ def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph,
     return batch, iterator
 
 
-
-
 def model_forward(model_args):
     feature_list = model_args.feature_list
     wide_hash_table_list = model_args.wide_hash_table_list
     deep_hash_table_list = model_args.deep_hash_table_list
     batch = model_args.batch
-    is_train =  model_args.is_train
+    is_train = model_args.is_train
     modify_graph = model_args.modify_graph
     is_use_faae = model_args.is_use_faae
 
@@ -145,8 +143,8 @@ def model_forward(model_args):
     else:
         feature_list_copy = feature_list
 
-    for i, item in enumerate(feature_list_copy):
-        if i % 2 == 0:
+    for index, item in enumerate(feature_list_copy):
+        if index % 2 == 0:
             wide_feature_list.append(item)
         else:
             deep_feature_list.append(item)
@@ -189,6 +187,7 @@ def model_forward(model_args):
     model_output = my_model.build_model(build_model_args)
     return model_output
 
+
 def evaluate():
     print("read_test dataset")
     if not MODIFY_GRAPH_FLAG:
diff --git a/examples/WideDeep/model/model.py b/examples/WideDeep/model/model.py
index 61b1fdea..bfe2177e 100644
--- a/examples/WideDeep/model/model.py
+++ b/examples/WideDeep/model/model.py
@@ -34,7 +34,7 @@ class MyModel:
         self._loss_fn = None
         self.is_training = None
 
-    def build_model(self,model_args):
+    def build_model(self, model_args):
         wide_embedding = model_args.wide_embedding
         deep_embedding = model_args.deep_embedding
         label = model_args.label
diff --git a/examples/WideDeep/model/optimizer.py b/examples/WideDeep/model/optimizer.py
index 1a781a01..2c7685bb 100644
--- a/examples/WideDeep/model/optimizer.py
+++ b/examples/WideDeep/model/optimizer.py
@@ -28,7 +28,8 @@ def get_dense_and_sparse_optimizer(cfg):
         sparse_optimizer = create_hash_optimizer_by_address(learning_rate=cfg.learning_rate[1])
     else:
         sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1])
-    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, 1)
-    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, 1)
+    loss_scale = 1
+    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, loss_scale)
+    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, loss_scale)
 
     return dense_optimizer, sparse_optimizer
-- 
Gitee


From ffeefc4660fcf7f5bdc53f24a81032318fcc44f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Mon, 3 Jun 2024 19:52:50 +0800
Subject: [PATCH 191/302] =?UTF-8?q?WideDeep=E6=A8=A1=E5=9E=8B=20cleancode?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B92?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/criteo.py                 | 1 +
 examples/WideDeep/model/delay_loss_scale.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/examples/WideDeep/criteo.py b/examples/WideDeep/criteo.py
index 055c41ec..d6c493c0 100644
--- a/examples/WideDeep/criteo.py
+++ b/examples/WideDeep/criteo.py
@@ -65,6 +65,7 @@ def split_byline_count(filename, count, sub_dir_name):
     finally:
         f.close()
 
+
 def get_split_file_path(parent_path=None, dataset_path=None, sample_num=4600000):
     """Get the list of split file path.
     Note: Either parent_path or dataset_path must be valid.
diff --git a/examples/WideDeep/model/delay_loss_scale.py b/examples/WideDeep/model/delay_loss_scale.py
index be5b9d58..f73baf68 100644
--- a/examples/WideDeep/model/delay_loss_scale.py
+++ b/examples/WideDeep/model/delay_loss_scale.py
@@ -19,6 +19,7 @@ from tensorflow.python.training import optimizer
 
 from config import Config
 
+
 class DenseLossScaleOptimizer:
     def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
         if not isinstance(opt, optimizer.Optimizer):
-- 
Gitee


From 5bd6d6811bcbe871e69f0dedebad4564ec71143c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Mon, 3 Jun 2024 20:35:07 +0800
Subject: [PATCH 192/302] =?UTF-8?q?WideDeep=E6=A8=A1=E5=9E=8B=20Issues?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/criteo.py           | 12 ++++--------
 examples/WideDeep/model/main_mxrec.py |  1 -
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/examples/WideDeep/criteo.py b/examples/WideDeep/criteo.py
index d6c493c0..ffb07289 100644
--- a/examples/WideDeep/criteo.py
+++ b/examples/WideDeep/criteo.py
@@ -7,10 +7,7 @@ import numpy as np
 import tensorflow as tf
 from tqdm import tqdm
 
-NAMES = ['label', 'I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'I10', 'I11',
-         'I12', 'I13', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11',
-         'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22',
-         'C23', 'C24', 'C25', 'C26']
+NAMES = ['label'] + [f'I{i}' for i in range(1, 14)] + [f'C{i}' for i in range(1, 27)]
 
 
 def make_sub_file(lines, head, src_name, sub_dir_name, sub):
@@ -38,7 +35,7 @@ def make_sub_file(lines, head, src_name, sub_dir_name, sub):
         f.writelines(lines)
         return sub + 1
     finally:
-        os.close(f)
+        f.close()
 
 
 def split_byline_count(filename, count, sub_dir_name):
@@ -103,7 +100,7 @@ def get_fea_map(fea_map_path=None, split_file_list=None):
     """
     if fea_map_path is None and split_file_list is None:
         raise ValueError('Please give feature map path or split file list.')
-    if fea_map_path is None and os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl"):
+    if fea_map_path is None and split_file_list is not None:
         fea_map_path = os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl")
     if os.path.exists(fea_map_path) and fea_map_path[-3:] == 'pkl':
         with open(fea_map_path, 'rb') as f:
@@ -141,7 +138,6 @@ def get_fea_map(fea_map_path=None, split_file_list=None):
     with os.fdopen(os.open(fea_map_path, flags, modes), 'wb') as fd:
         pickle.dump(fea_map, fd, pickle.HIGHEST_PROTOCOL)
 
-    fd.close()
     return fea_map
 
 
@@ -162,7 +158,7 @@ def rec_kbins_discretizer(dat, n_bins, min_max_dict):
         rtol = 1.e-5
         atol = 1.e-8
         eps = atol + rtol * np.abs(dat[feature])
-        np.digitize(dat[feature] + eps, bin_edges[idx][1:])
+        dat[feature] = np.digitize(dat[feature] + eps, bin_edges[idx][1:])
     return dat
 
 
diff --git a/examples/WideDeep/model/main_mxrec.py b/examples/WideDeep/model/main_mxrec.py
index 37663b14..d81ff215 100644
--- a/examples/WideDeep/model/main_mxrec.py
+++ b/examples/WideDeep/model/main_mxrec.py
@@ -53,7 +53,6 @@ random.seed(shuffle_seed)
 
 def add_timestamp_func(batch):
     timestamp = import_host_pipeline_ops().return_timestamp(tf.cast(batch['label'], dtype=tf.int64))
-    # tf.constant(np.random.randint(1,1688109060,1)), tf.int64))
     batch["timestamp"] = timestamp
     return batch
 
-- 
Gitee


From 09ffbdf80301eaae28d42a25115da1a9f9eb243a Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Mon, 3 Jun 2024 12:47:50 +0000
Subject: [PATCH 193/302] !170 cleancode * cleancode * cleancode

---
 .../offset_mapper/offset_mapper.h             |  2 +-
 src/AccCTR/src/unique/unique_func.h           | 23 +++++++++----------
 src/AccCTR/tests/ut/src/unique_test.cpp       |  8 +++----
 src/core/ssd_engine/file.cpp                  |  4 ++--
 src/core/ssd_engine/file.h                    |  4 ++--
 5 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
index 80170989..f42a0d3f 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
@@ -91,7 +91,7 @@ public:
         std::vector<uint64_t> swapInKeysID = FilterKeys(keys, swapInKoPair);
 
         uint64_t swapInCnt = 0;
-        int ret = FindInUsedPos(keys, swapInCnt, swapInKeysID, swapInKoPair, swapOutKoPair);
+        auto ret = FindInUsedPos(keys, swapInCnt, swapInKeysID, swapInKoPair, swapOutKoPair);
         if (ret != ock::ctr::H_OK) {
             return ret;
         }
diff --git a/src/AccCTR/src/unique/unique_func.h b/src/AccCTR/src/unique/unique_func.h
index 4812f74c..0222e4eb 100644
--- a/src/AccCTR/src/unique/unique_func.h
+++ b/src/AccCTR/src/unique/unique_func.h
@@ -175,22 +175,24 @@ public:
             }
             bucket->replaceBase = replaceOffset;
             for (int j = 0; j < bucket->count; ++j) {
-                out[total++] = bucket->data[j];
+                out[total] = static_cast<int64_t>(bucket->data[j]);
+                ++total;
             }
             replaceOffset += bucket->count;
         }
         auto it = overflow_.begin();
         int32_t totalOverflow = 0;
         while (it != overflow_.end()) {
-            out[total++] = it->first;
+            out[total] = it->first;
             it->second = replaceOffset++;
+            ++total;
             ++it;
             ++totalOverflow;
         }
 
         // set total overflow count
         stats_.totalUniques = static_cast<uint64_t>(total - priorTotal);
-        stats_.totalOverflowUniques = totalOverflow;
+        stats_.totalOverflowUniques = static_cast<uint64_t>(totalOverflow);
         return total - priorTotal;
     }
 
@@ -241,17 +243,14 @@ public:
     {
         const int numOfGroupsInShard = groupMethod_.GroupCount();
         uint32_t totalSize = conf.desiredSize + (conf.desiredSize >> 1);
-        while (bucketCountPower2_ * K_BUCKET_WIDTH * numOfGroupsInShard * estimatedDuplicateRatio < totalSize) {
+        while (static_cast<uint32_t>(bucketCountPower2_ * K_BUCKET_WIDTH * numOfGroupsInShard *
+                                     estimatedDuplicateRatio) < totalSize) {
             bucketCountPower2_ <<= 1;
         }
 
         idCountEnable_ = (conf.outputType == OutputType::ENHANCED) && conf.useIdCount;
         for (int32_t i = 0; i < numOfGroupsInShard; ++i) {
             auto obj = new DedupT(bucketCountPower2_, numOfGroupsInShard, idCountEnable_);
-            if (obj == nullptr) {
-                ExternalLogger::PrintLog(LogLevel::ERROR, "creat object error");
-                throw NullptrError();
-            }
             dedupShards_.emplace_back(obj);
         }
     }
@@ -302,7 +301,7 @@ public:
         if (conf.outputType == OutputType::ENHANCED) {
             int totalNumber = 0;
             for (int i = 0; i < conf.shardingNum; i++) {
-                totalUniqueSize[i] = totalNumber;
+                totalUniqueSize[i] = static_cast<size_t>(totalNumber);
                 if (conf.useSharding) {
                     totalNumber += uniqueOut.uniqueIdCntInBucket[i];
                 }
@@ -376,14 +375,14 @@ private:
                 if (conf.useSharding && conf.useIdCount) {
                     inGroupTotal =
                         dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueIdInBucket, total); // 特征计数使能和shard同时使能
-                    uniqueOut.uniqueIdCntInBucket[j] = inGroupTotal;
+                    uniqueOut.uniqueIdCntInBucket[j] = static_cast<int>(inGroupTotal);
                 } else if (!conf.useSharding && conf.useIdCount) {
                     inGroupTotal =
                         dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total); // 特征计数使能和shard不使能
                 } else if (conf.useSharding && !conf.useIdCount) {
                     inGroupTotal =
                         dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueIdInBucket, total); // 特征计数使能和shard不使能
-                    uniqueOut.uniqueIdCntInBucket[j] = inGroupTotal;
+                    uniqueOut.uniqueIdCntInBucket[j] = static_cast<int>(inGroupTotal);
                 } else {
                     inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId,
                         total); // 特征计数不使能和shard不使能，跟普通unique对等
@@ -391,7 +390,7 @@ private:
             } else {
                 inGroupTotal = dedupShards_[j]->UniqueRaw<T>(uniqueOut.uniqueId, total);
             }
-            total += inGroupTotal;
+            total += static_cast<int32_t>(inGroupTotal);
         }
         uniqueOut.uniqueIdCnt = total;
     }
diff --git a/src/AccCTR/tests/ut/src/unique_test.cpp b/src/AccCTR/tests/ut/src/unique_test.cpp
index 94e8d92c..df5950e1 100644
--- a/src/AccCTR/tests/ut/src/unique_test.cpp
+++ b/src/AccCTR/tests/ut/src/unique_test.cpp
@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
  ==============================================================================*/
+#include "unique_test.h"
 
 #include <sstream>
 #include <fstream>
-#include "unique_test.h"
 #include "common.h"
 
 void UniqueTest::SetUpTestCase()
@@ -155,7 +155,7 @@ TEST_F(UniqueTest, DoUniqueNormal)
     std::string input_path(path);
     std::cout << "input_path:" + input_path + "/data30.txt" << std::endl;
     std::ifstream input(input_path + "/data30.txt");
-    if(!input.good()) {
+    if (!input.good()) {
         std::cout << "Failed to open file:" + input_path + "/data30.txt" << std::endl;
         return;
     }
@@ -1404,7 +1404,7 @@ TEST_F(UniqueTest, DoUniqueLotsDataFunction)
     std::string input_path(path);
     std::cout << "input_path:" + input_path + "/data40.txt" << std::endl;
     std::ifstream input(input_path + "/data40.txt");
-    if(!input.good()) {
+    if (!input.good()) {
         std::cout << "Failed to open file:" + input_path + "/data40.txt" << std::endl;
         return;
     }
@@ -1510,7 +1510,7 @@ TEST_F(UniqueTest, DoUniqueLotsDataPaddingFunction)
     std::string input_path(path);
     std::cout << "input_path:" + input_path + "/data30.txt" << std::endl;
     std::ifstream input(input_path + "/data30.txt");
-    if(!input.good()) {
+    if (!input.good()) {
         std::cout << "Failed to open file:" + input_path + "/data30.txt" << std::endl;
         return;
     }
diff --git a/src/core/ssd_engine/file.cpp b/src/core/ssd_engine/file.cpp
index cc9ec206..8c7da24e 100644
--- a/src/core/ssd_engine/file.cpp
+++ b/src/core/ssd_engine/file.cpp
@@ -281,12 +281,12 @@ void File::Load()
     emb_cache_key_t key;
     offset_t offset;
     do {
-        localFileMeta.read(reinterpret_cast<char *>(&key), keyDataLen);
+        localFileMeta.read(reinterpret_cast<char*>(&key), KEY_DATA_LEN);
         if (!localFileMeta.eof() && localFileMeta.fail()) {
             throw invalid_argument("file broken while reading key");
         }
 
-        localFileMeta.read(reinterpret_cast<char *>(&offset), offsetDataLen);
+        localFileMeta.read(reinterpret_cast<char*>(&offset), OFFSET_DATA_LEN);
         if (!localFileMeta.eof() && localFileMeta.fail()) {
             throw invalid_argument("file broken while reading offset");
         }
diff --git a/src/core/ssd_engine/file.h b/src/core/ssd_engine/file.h
index bc2b1fcb..5789ab8b 100644
--- a/src/core/ssd_engine/file.h
+++ b/src/core/ssd_engine/file.h
@@ -33,8 +33,8 @@ namespace MxRec {
     using offset_t = uint32_t;
 
     class File {
-        static constexpr uint64_t keyDataLen = sizeof(emb_cache_key_t);
-        static constexpr uint64_t offsetDataLen = sizeof(offset_t);
+        static constexpr uint64_t KEY_DATA_LEN = sizeof(emb_cache_key_t);
+        static constexpr uint64_t OFFSET_DATA_LEN = sizeof(offset_t);
 
     public:
         File(uint64_t fileID, string& fileDir);
-- 
Gitee


From 9de52b8e20f19516d55a2a4f9bed941b6b443e70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Tue, 4 Jun 2024 02:02:17 +0000
Subject: [PATCH 194/302] =?UTF-8?q?!167=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91DDR=E7=B2=BE=E5=BA=A6?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=8C=E7=BB=99=E6=90=BA=E5=B8=A6slot?=
 =?UTF-8?q?=E7=9A=84=E4=BC=98=E5=8C=96=E5=99=A8=E5=A2=9E=E5=8A=A0control?=
 =?UTF-8?q?=E8=BE=B9=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=EF=BC=8C=E6=89=A9=E5=AE=B9=E6=A8=A1=E5=BC=8F=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=99=A8=E5=90=8C=E6=AD=A5=E4=BF=AE=E6=94=B9=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91DDR?=
 =?UTF-8?q?=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8C=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=99=A8=E6=9B=B4=E6=8D=A2=E4=B8=BAsqrt(v=5Ft=5Fslice=20+=20te?=
 =?UTF-8?q?mp=5Fepsilon)=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98=EF=BC=8C=E8=A7=A3=E5=86=B3issure=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91DDR?=
 =?UTF-8?q?=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8Ctrain=20bug?=
 =?UTF-8?q?=EF=BC=8C=E8=AE=AD=E7=BB=83=E5=8D=A1=E4=B8=BB=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91DDR?=
 =?UTF-8?q?=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8Ctrain=20bug?=
 =?UTF-8?q?=EF=BC=8C=E8=AE=AD=E7=BB=83=E5=8D=A1=E4=B8=BB=20*=20Merge=20rem?=
 =?UTF-8?q?ote-tracking=20branch=20'upstream/develop'=20into=20develop=20*?=
 =?UTF-8?q?=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8C?=
 =?UTF-8?q?=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0=E5=92=8Ccleancode=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8C?=
 =?UTF-8?q?=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0=E5=92=8Ccleancode=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8C?=
 =?UTF-8?q?=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0=E5=92=8Ccleancode=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8C?=
 =?UTF-8?q?=E7=BB=99=E6=90=BA=E5=B8=A6slot=E7=9A=84=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=99=A8=E5=A2=9E=E5=8A=A0control=E8=BE=B9=20*=20=E3=80=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification=E3=80=91DDR?=
 =?UTF-8?q?=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8C=E7=BB=99=E6=90=BA?=
 =?UTF-8?q?=E5=B8=A6slot=E7=9A=84=E4=BC=98=E5=8C=96=E5=99=A8=E5=A2=9E?=
 =?UTF-8?q?=E5=8A=A0control=E8=BE=B9=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91DDR=E7=B2=BE=E5=BA=A6?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=8C=E7=BB=99=E6=90=BA=E5=B8=A6slot?=
 =?UTF-8?q?=E7=9A=84=E4=BC=98=E5=8C=96=E5=99=A8=E5=A2=9E=E5=8A=A0control?=
 =?UTF-8?q?=E8=BE=B9=20*=20=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=20Modification=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=EF=BC=8C=E7=BB=99=E6=90=BA=E5=B8=A6slot=E7=9A=84=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=E5=99=A8=E5=A2=9E=E5=8A=A0control=E8=BE=B9=20*=20?=
 =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=20Modification?=
 =?UTF-8?q?=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=EF=BC=8C?=
 =?UTF-8?q?=E7=BB=99=E6=90=BA=E5=B8=A6slot=E7=9A=84=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=99=A8=E5=A2=9E=E5=8A=A0control=E8=BE=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py          |  2 ++
 mx_rec/core/asc/build_graph.py         | 36 ++++++++++++++++----------
 mx_rec/core/asc/swap_args.py           |  7 +++++
 mx_rec/graph/modifier.py               | 11 +++++++-
 mx_rec/optimizers/adagrad.py           |  4 ++-
 mx_rec/optimizers/base.py              | 13 ++++++++++
 mx_rec/optimizers/ftrl.py              |  4 ++-
 mx_rec/optimizers/lazy_adam.py         |  6 +++--
 mx_rec/optimizers/lazy_adam_by_addr.py |  2 +-
 src/core/hybrid_mgmt/hybrid_mgmt.cpp   | 22 +++++++++++++---
 src/core/utils/common.cpp              | 11 ++++++++
 src/core/utils/common.h                |  7 ++++-
 12 files changed, 100 insertions(+), 25 deletions(-)

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index 13b3d583..becba0ab 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -170,6 +170,8 @@ class ASCAnchorAttr(Enum):
     RESTORE_VECTOR_SECOND = "restore_vector_second"
     UNIQUE_KEYS = "unique_keys"
     IS_GRAD = "is_grad"
+    TABLE_NAME = "table_name"
+    CHANNEL_ID = "channel_id"
 
 
 class OptimizerType(Enum):
diff --git a/mx_rec/core/asc/build_graph.py b/mx_rec/core/asc/build_graph.py
index 0ddf313e..00b9d282 100644
--- a/mx_rec/core/asc/build_graph.py
+++ b/mx_rec/core/asc/build_graph.py
@@ -21,6 +21,7 @@ from typing import Optional, List, Dict, Union, Tuple
 import tensorflow as tf
 
 import mxrec_pybind
+from mx_rec.constants.constants import ASCAnchorAttr
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.tf_version_adapter import npu_ops
 from mx_rec.util.log import logger
@@ -36,7 +37,8 @@ class SwapInfo:
 
 
 def get_restore_vector(config):
-    logger.debug('Channel %s_restore_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
+    logger.debug('Channel %s_restore_%s was built for getnext', config.get(ASCAnchorAttr.TABLE_NAME.value),
+                 config.get(ASCAnchorAttr.CHANNEL_ID.value))
     if config.get("is_hbm"):
         if not isinstance(config.get("emb_size"), int) or config.get("emb_size") < 1:
             raise TypeError(f"emb_size must be a int")
@@ -58,32 +60,36 @@ def get_restore_vector(config):
         restore_size = None
         hot_size = None
 
-    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
+    with tf.compat.v1.variable_scope(config.get(ASCAnchorAttr.TABLE_NAME.value), reuse=tf.compat.v1.AUTO_REUSE):
         restore_vector, hot_pos = npu_ops.gen_npu_ops.get_next(
             output_types=[tf.int32, tf.int32],
             output_shapes=[restore_size, [hot_size]],
-            channel_name=f'{config.get("table_name")}_restore_{config.get("channel_id")}')
+            channel_name=f'{config.get(ASCAnchorAttr.TABLE_NAME.value)}'
+                         f'_restore_{config.get(ASCAnchorAttr.CHANNEL_ID.value)}')
 
     return restore_vector, hot_pos
 
 
 def get_id_offsets(max_lookup_vec_size: int, config: dict) -> Tuple[int, SwapInfo]:
-    logger.debug('Channel %s_lookup_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
+    logger.debug('Channel %s_lookup_%s was built for getnext', config.get(ASCAnchorAttr.TABLE_NAME.value),
+                 config.get(ASCAnchorAttr.CHANNEL_ID.value))
     # 自动扩容当前只支持HBM模式，默认没有换入换出
     swap_info = SwapInfo()
 
-    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
+    with tf.compat.v1.variable_scope(config.get(ASCAnchorAttr.TABLE_NAME.value), reuse=tf.compat.v1.AUTO_REUSE):
         if config.get("use_dynamic_expansion"):
             [id_offsets] = npu_ops.gen_npu_ops.get_next(
                 output_types=[tf.int64],
                 output_shapes=[[max_lookup_vec_size]],
-                channel_name=f'{config.get("table_name")}_lookup_{config.get("channel_id")}')
+                channel_name=f'{config.get(ASCAnchorAttr.TABLE_NAME.value)}'
+                             f'_lookup_{config.get(ASCAnchorAttr.CHANNEL_ID.value)}')
             return id_offsets, swap_info
 
         [id_offsets] = npu_ops.gen_npu_ops.get_next(
             output_types=[tf.int32],
             output_shapes=[[max_lookup_vec_size]],
-            channel_name=f'{config.get("table_name")}_lookup_{config.get("channel_id")}')
+            channel_name=f'{config.get(ASCAnchorAttr.TABLE_NAME.value)}'
+                         f'_lookup_{config.get(ASCAnchorAttr.CHANNEL_ID.value)}')
         if config.get("is_hbm"):
             return id_offsets, swap_info
         (
@@ -94,9 +100,9 @@ def get_id_offsets(max_lookup_vec_size: int, config: dict) -> Tuple[int, SwapInf
         ) = npu_ops.gen_npu_ops.get_next(
             output_types=[tf.int32, tf.int32, tf.int32, tf.int32],
             output_shapes=[[max_lookup_vec_size], [max_lookup_vec_size], [], []],
-            channel_name=f'{config.get("table_name")}_swap_all',
+            channel_name=f'{config.get(ASCAnchorAttr.TABLE_NAME.value)}_swap_all',
         )
-        logger.debug('Channel %s_swap_all was built for getnext', config.get("table_name"))
+        logger.debug('Channel %s_swap_all was built for getnext', config.get(ASCAnchorAttr.TABLE_NAME.value))
     return id_offsets, swap_info
 
 
@@ -111,13 +117,15 @@ def get_all2all_args(use_static: bool, config: dict) -> Optional[list]:
     if use_static:
         return all2all_args
 
-    with tf.compat.v1.variable_scope(config.get("table_name"), reuse=tf.compat.v1.AUTO_REUSE):
+    with tf.compat.v1.variable_scope(config.get(ASCAnchorAttr.TABLE_NAME.value), reuse=tf.compat.v1.AUTO_REUSE):
         with tf.compat.v1.variable_scope("all2all"):
-            logger.debug('Channel %s_a2a_%s was built for getnext', config.get("table_name"), config.get("channel_id"))
+            logger.debug('Channel %s_a2a_%s was built for getnext', config.get(ASCAnchorAttr.TABLE_NAME.value),
+                         config.get(ASCAnchorAttr.CHANNEL_ID.value))
             all2all_args = npu_ops.gen_npu_ops.get_next(
                 output_types=[tf.int64],
                 output_shapes=[[config.get("rank_size"), config.get("rank_size")]],
-                channel_name=f'{config.get("table_name")}_all2all_{config.get("channel_id")}',
+                channel_name=f'{config.get(ASCAnchorAttr.TABLE_NAME.value)}'
+                             f'_all2all_{config.get(ASCAnchorAttr.CHANNEL_ID.value)}',
                 name="a2a_get_next")[0] * config.get("emb_size")
 
     return all2all_args
@@ -139,8 +147,8 @@ def get_preprocessed_tensor_for_asc(table, config):
         # 一表多查时，会多次进入get_preprocessed_tensor_for_asc，最后一次大查询替换map的key-value即可
         swap_args = SwapArgs()
         
-        swap_args.set_data(SwapDataType.CONFIG.value, var_name=config.get("table_name"),
-                           var_channel=config.get("channel_id"), config=config, swap_info=swap_info)
+        swap_args.set_data(SwapDataType.CONFIG.value, var_name=config.get(ASCAnchorAttr.TABLE_NAME.value),
+                           var_channel=config.get(ASCAnchorAttr.CHANNEL_ID.value), config=config, swap_info=swap_info)
 
     all2all_args = get_all2all_args(use_static, config)
 
diff --git a/mx_rec/core/asc/swap_args.py b/mx_rec/core/asc/swap_args.py
index 5bcba234..1d206b95 100644
--- a/mx_rec/core/asc/swap_args.py
+++ b/mx_rec/core/asc/swap_args.py
@@ -41,6 +41,7 @@ class SwapArgs:
     def __init__(self):
         self.swap_config_dict = defaultdict(dict)
         self.swap_control_dict = defaultdict(dict)
+        self.slot_control_dict = defaultdict(dict)
 
     def set_data(self, data_type: str, **kwargs):
         if "var_name" not in kwargs:
@@ -56,3 +57,9 @@ class SwapArgs:
             self.swap_control_dict[var_name][var_channel] = kwargs
         else:
             raise ValueError(f"Error data type in swap args: {data_type}")
+
+    def set_slot_control(self, **kwargs):
+        if "var_name" not in kwargs:
+            raise ValueError("Missing Required key: var_name")
+        var_name = kwargs.pop("var_name")
+        self.slot_control_dict[var_name] = kwargs
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index 01aeda94..8629b350 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -253,12 +253,21 @@ class _GraphModifier:
                 swap_args_dict = swap_args.swap_config_dict[table_instance.table_name][channel_id]
                 swap_op = _get_swap_info(
                     table_instance, variable_and_slot_list, swap_args_dict["swap_info"], channel_id)
+                # gather for id_offset need to be executed after swap_op
                 swap_control_dict = swap_args.swap_control_dict[table_instance.table_name][channel_id]
                 if "control_ops" not in swap_control_dict:
-                    raise ValueError("Missing Required key in modify_graph_for_asc: control_ops")
+                    raise ValueError("swap control missing key [control_ops] in modify_graph_for_asc")
                 control_ops = swap_control_dict["control_ops"]
                 utils.replace_anchor_control(self._full_graph, control_ops, swap_op)
 
+                if is_training:
+                    # gather for slot need to be executed after swap_op
+                    slot_control_dict = swap_args.slot_control_dict[table_instance.variable]
+                    if "control_ops" not in slot_control_dict:
+                        raise ValueError("slot control missing key [control_ops] in modify_graph_for_asc")
+                    slot_control_ops = slot_control_dict["control_ops"]
+                    utils.replace_anchor_control(self._full_graph, slot_control_ops, swap_op)
+
     def _generate_get_next_op_specs(self, cutting_point_list: List[Tensor]) -> Dict[Tensor, _AnchorRecord]:
         get_next_op_map = defaultdict(dict)
 
diff --git a/mx_rec/optimizers/adagrad.py b/mx_rec/optimizers/adagrad.py
index 9998ec1f..df1fe2a3 100644
--- a/mx_rec/optimizers/adagrad.py
+++ b/mx_rec/optimizers/adagrad.py
@@ -26,7 +26,7 @@ from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import adagrad, training_ops
 
-from mx_rec.optimizers.base import CustomizedOptimizer
+from mx_rec.optimizers.base import CustomizedOptimizer, control_update_op_decorator
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.validator.validator import para_checker_decorator, StringValidator, ClassValidator, FloatValidator
 
@@ -111,6 +111,7 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
         unique_local_grad, unique_keys = self.sum_same_id_gradients(grad=grad, var=handle, is_expansion=False)
         return self._resource_apply_sparse(unique_local_grad, handle, unique_keys)
 
+    @control_update_op_decorator
     def _apply_sparse(self, grad, var):
         acc = self.get_slot(var, "acc")
         return training_ops.sparse_apply_adagrad(
@@ -119,6 +120,7 @@ class CustomizedAdagrad(adagrad.AdagradOptimizer, CustomizedOptimizer):
             grad.indices,
             use_locking=self._use_locking)
 
+    @control_update_op_decorator
     def _resource_apply_sparse(self, grad, var, indices):
         acc = self.get_slot(var, "acc")
         return training_ops.resource_sparse_apply_adagrad(
diff --git a/mx_rec/optimizers/base.py b/mx_rec/optimizers/base.py
index f74e9778..49629641 100644
--- a/mx_rec/optimizers/base.py
+++ b/mx_rec/optimizers/base.py
@@ -26,6 +26,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.training.optimizer import _TensorProcessor
 
+from mx_rec.core.asc.swap_args import SwapArgs
 from mx_rec.constants.constants import ASCAnchorAttr
 from mx_rec.util.tf_version_adapter import npu_ops
 from mx_rec.util.initialize import ConfigInitializer
@@ -143,6 +144,18 @@ def custom_update_op(self, opt, grad):
         raise RuntimeError("Only support g with type Tensor.")
 
 
+def control_update_op_decorator(apply_sparse):
+    def wrapper(*args, **kwargs):
+        second_arg = args[2] if len(args) > 2 else None  # index 2 input must be var
+        slot_control_ops = tf.no_op(name="place_holder_slot_control_op")
+        swap_args = SwapArgs()
+        swap_args.set_slot_control(var_name=second_arg, control_ops=slot_control_ops)
+        with tf.control_dependencies([slot_control_ops]):
+            result = apply_sparse(*args, **kwargs)
+        return result
+    return wrapper
+
+
 def patch_for_optimizer():
     _TensorProcessor.update_op = custom_update_op
     logger.debug("update_op in Class optimizer._TensorProcessor has been patched.")
\ No newline at end of file
diff --git a/mx_rec/optimizers/ftrl.py b/mx_rec/optimizers/ftrl.py
index 30287abd..ad4f9880 100644
--- a/mx_rec/optimizers/ftrl.py
+++ b/mx_rec/optimizers/ftrl.py
@@ -30,7 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import gen_state_ops
 from tensorflow.python.training import ftrl
 
-from mx_rec.optimizers.base import CustomizedOptimizer
+from mx_rec.optimizers.base import CustomizedOptimizer, control_update_op_decorator
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.constants.constants import MAX_INT32
 from mx_rec.validator.validator import para_checker_decorator, ClassValidator, StringValidator, \
@@ -128,6 +128,7 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
                 grad.indices,
                 lambda x, i, v: tf.compat.v1.scatter_nd_update(x, i, v))
 
+    @control_update_op_decorator
     def _apply_sparse_shared(self, grad, var, indices, scatter_nd_update):
         accum = self.get_slot(var, "accum")
         linear = self.get_slot(var, "linear")
@@ -169,6 +170,7 @@ class CustomizedFtrl(ftrl.FtrlOptimizer, CustomizedOptimizer):
 
         return control_flow_ops.group(accum_update_op, linear_update_op, var_update_op)
 
+    @control_update_op_decorator
     def _apply_sparse_shared_v2(self, grad, var, indices, scatter_nd_update):
         accum = self.get_slot(var, "accum")
         linear = self.get_slot(var, "linear")
diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 9aee0204..0684a715 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -29,7 +29,7 @@ from tensorflow.python.ops import gen_state_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import adam
 
-from mx_rec.optimizers.base import CustomizedOptimizer
+from mx_rec.optimizers.base import CustomizedOptimizer, control_update_op_decorator
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.util.ops import import_host_pipeline_ops
 from mx_rec.validator.validator import para_checker_decorator, StringValidator, FloatValidator, ClassValidator
@@ -120,6 +120,7 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         }
         return temp
 
+    @control_update_op_decorator
     def _resource_apply_sparse(self, grad, handle, indices):
         return self._apply_sparse_shared(
             grad,
@@ -127,6 +128,7 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
             indices,
             self._resource_scatter_nd_add)
 
+    @control_update_op_decorator
     def _apply_sparse(self, grad, var):
         return self._apply_sparse_shared(
             grad.values,
@@ -168,7 +170,7 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(grad)
         v_update_op = scatter_nd_add(velocity, nd_indices, v_t_slice - old_v_slice)
 
-        denominator_slice = math_ops.sqrt(v_t_slice) + temp_epsilon
+        denominator_slice = math_ops.sqrt(v_t_slice + temp_epsilon)
         var_update_op = scatter_nd_add(var, nd_indices, tf.divide(-learning_rate * m_t_slice, denominator_slice))
         return control_flow_ops.group(m_update_op, v_update_op, var_update_op)
 
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index f1f8a2df..b7887052 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -136,7 +136,7 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         old_v_slice = split_tensors[2]
         v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(grad)
 
-        denominator_slice = math_ops.sqrt(v_t_slice) + temp_epsilon
+        denominator_slice = math_ops.sqrt(v_t_slice + temp_epsilon)
         update_list = [tf.divide(-learning_rate * m_t_slice, denominator_slice)] + [m_t_slice - old_m_slice] + \
                       [v_t_slice - old_v_slice]
         update_tensor = tf.concat(update_list, axis=1)
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index b318f2d4..b96f4eb9 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -1515,8 +1515,15 @@ void HybridMgmt::EmbeddingUpdateDDR(const EmbTaskInfo& info, const float* embPtr
             throw runtime_error("memcpy_s failed, error code:" + to_string(rc));
         }
     }
-    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingUpdateTC(ms):{}",
-              info.name, info.batchId, info.threadIdx, EmbeddingUpdateTC.ElapsedMS());
+    if (MxRec::Logger::GetLevel() <= MxRec::Logger::DEBUG) {
+        string sample;
+        if (!swapOutAddrs.empty()) {
+            sample = FloatPtrToLimitStr(swapOutAddrs.front(), info.extEmbeddingSize); // print first element
+        }
+        LOG_DEBUG("table:{}, batchId:{}, thread:{}, receive d2hEmb, ext emb:{}, emb size:{}, emb samples:{}, "
+                  "EmbeddingUpdateTC(ms):{}", info.name.c_str(), info.batchId, info.threadIdx,
+                  info.extEmbeddingSize, swapOutAddrs.size(), sample, EmbeddingUpdateTC.ElapsedMS());
+    }
 
     lastUpdateFinishStepMap[info.name]++;
     cvLastUpdateFinishMap[info.name][info.cvNotifyIndex].notify_all();
@@ -1953,8 +1960,10 @@ bool HybridMgmt::BuildH2DEmbedding(const EmbTaskInfo &info, vector<Tensor> &h2dE
             throw runtime_error("memcpy_s failed, error code:" + to_string(rc));
         }
     }
-    LOG_DEBUG("table:{}, thread:{}, embeddingLookupTC(ms):{}",
-              info.name.c_str(), info.threadIdx, embeddingLookupTC.ElapsedMS());
+    LOG_DEBUG("table:{}, thread:{}, batchId:{}, send h2dEmb, emb size:{}, emb samples:{}, embeddingLookupTC(ms):{}",
+              info.name.c_str(), info.threadIdx, info.batchId, swapInAddrs.size(),
+              FloatPtrToLimitStr(h2dEmbAddr, swapInAddrs.size() * info.extEmbeddingSize),
+              embeddingLookupTC.ElapsedMS());
     return true;
 }
 
@@ -2196,6 +2205,11 @@ void HybridMgmt::GetSwapPairsAndKey2Offset(const EmbBaseInfo &info, vector<uint6
     }
     LOG_DEBUG("table:{}, channel:{}, batchId:{}, GetSwapPairsAndKey2OffsetTC(ms):{}",
               info.name, info.channelId, info.batchId, GetSwapPairsAndKey2OffsetTC.ElapsedMS());
+
+    LOG_DEBUG("table:{}, channel:{}, batchId:{}, swapIn keys:{}, swapIn pos:{}, swapOut keys:{}, swapOut pos:{}",
+              info.name, info.channelId, info.batchId, VectorToString(swapInKoPair.first),
+              VectorToString(swapInKoPair.second), VectorToString(swapOutKoPair.first),
+              VectorToString(swapOutKoPair.second));
 }
 
 void HybridMgmt::EnqueueSwapInfo(const EmbBaseInfo &info,
diff --git a/src/core/utils/common.cpp b/src/core/utils/common.cpp
index d281162c..32e32827 100644
--- a/src/core/utils/common.cpp
+++ b/src/core/utils/common.cpp
@@ -149,6 +149,17 @@ namespace MxRec {
         return true;
     }
 
+    std::string FloatPtrToLimitStr(float* ptr, const size_t& prtSize)
+    {
+        constexpr size_t maxDispLen = 10; // max display number
+        int maxLen = static_cast<int>(std::min(prtSize, maxDispLen));
+        std::string s;
+        for (int i = 0; i < maxLen; i++) {
+            s += std::to_string(*(ptr + i)) + " ";
+        }
+        return s;
+    }
+
     ostream& operator<<(ostream& ss, MxRec::CkptDataType type)
     {
         ss << static_cast<int>(type);
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 75837349..4fdb7c8d 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -361,9 +361,12 @@ namespace MxRec {
     template<typename T>
     std::string VectorToString(const std::vector<T>& vec)
     {
+        constexpr size_t maxDispLen = 20; // max display number
+        int maxLen = static_cast<int>(std::min(vec.size(), maxDispLen));
+
         std::stringstream ss;
         ss << "[";
-        for (size_t i = 0; i < vec.size(); ++i) {
+        for (size_t i = 0; i < maxLen; ++i) {
             ss << vec[i];
             if (i != vec.size() - 1) {
                 ss << ", ";
@@ -373,6 +376,8 @@ namespace MxRec {
         return ss.str();
     }
 
+    std::string FloatPtrToLimitStr(float* ptr, const size_t& prtSize);
+
     template<typename K, typename V>
     std::string MapToString(const std::map<K, V>& map)
     {
-- 
Gitee


From e713f2c9f202757deb4f326dc3ccdac44e8d1378 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Tue, 4 Jun 2024 11:06:34 +0800
Subject: [PATCH 195/302] =?UTF-8?q?WideDeep=E6=A8=A1=E5=9E=8B=EF=BC=9AIssu?=
 =?UTF-8?q?es=E9=97=AE=E9=A2=98=E4=BF=AE=E6=94=B92?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/criteo.py           | 2 +-
 examples/WideDeep/model/main_mxrec.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/WideDeep/criteo.py b/examples/WideDeep/criteo.py
index ffb07289..137dac5c 100644
--- a/examples/WideDeep/criteo.py
+++ b/examples/WideDeep/criteo.py
@@ -179,7 +179,7 @@ def convert_input2tfrd(in_file_path, out_file_path):
 
         return example
 
-    file_name = out_file_path + in_file_path[-12:-4] + '.tfrecords'
+    file_name = out_file_path + in_file_path[-12:-4] + '.tfrecord'
     file_writer = tf.io.TFRecordWriter(file_name)
 
     with open(in_file_path, encoding='utf-8') as file_in:
diff --git a/examples/WideDeep/model/main_mxrec.py b/examples/WideDeep/model/main_mxrec.py
index d81ff215..b1d77698 100644
--- a/examples/WideDeep/model/main_mxrec.py
+++ b/examples/WideDeep/model/main_mxrec.py
@@ -89,7 +89,6 @@ def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph,
     dataset = dataset.shard(config.rank_size, config.rank_id)
     if is_training:
         dataset = dataset.shuffle(batch_size * 1000, seed=shuffle_seed)
-    if is_training:
         dataset = dataset.repeat(config.train_epoch)
     else:
         dataset = dataset.repeat(config.test_epoch)
@@ -382,8 +381,6 @@ if __name__ == "__main__":
     if use_faae:
         cfg.dev_vocab_size = cfg.dev_vocab_size // 2
 
-    optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
-
     # 创表操作
     wide_emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed)
     deep_emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed)
@@ -424,6 +421,7 @@ if __name__ == "__main__":
     eval_model = model_forward(eval_forward_args)
 
     train_variables, emb_variables = get_dense_and_sparse_variable()
+    optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
 
     rank_size = mxrec_util.communication.hccl_ops.get_rank_size()
     train_ops = []
-- 
Gitee


From e62e62b14c9115097354ee49d225815153c9fa75 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 4 Jun 2024 11:16:10 +0000
Subject: [PATCH 196/302] =?UTF-8?q?!172=20tf=E5=8E=9F=E7=94=9Fwrite=20grap?=
 =?UTF-8?q?h=E5=87=BD=E6=95=B0=E6=B7=BB=E5=8A=A0patch=EF=BC=9B=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8Dddr=E4=BF=9D=E5=AD=98=E9=80=BB=E8=BE=91=EF=BC=9BhdfsCo?=
 =?UTF-8?q?nnet=E6=93=8D=E4=BD=9C=E4=BF=AE=E6=94=B9=20*=20tf=E5=8E=9F?=
 =?UTF-8?q?=E7=94=9Fwrite=20graph=E5=87=BD=E6=95=B0=E6=B7=BB=E5=8A=A0patch?=
 =?UTF-8?q?=EF=BC=9B=E4=BF=AE=E5=A4=8Dddr=E4=BF=9D=E5=AD=98=E9=80=BB?=
 =?UTF-8?q?=E8=BE=91=EF=BC=9BhdfsConnet=E6=93=8D=E4=BD=9C=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=20*=20Merge=20remote-tracking=20branch=20'upstream/de?=
 =?UTF-8?q?velop'=20into=20hdfs-fix-0530=20*=20tf=E5=8E=9F=E7=94=9Fwrite?=
 =?UTF-8?q?=20graph=E5=87=BD=E6=95=B0=E6=B7=BB=E5=8A=A0patch=EF=BC=9B?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8Dddr=E4=BF=9D=E5=AD=98=E9=80=BB=E8=BE=91?=
 =?UTF-8?q?=EF=BC=9BhdfsConnet=E6=93=8D=E4=BD=9C=E4=BF=AE=E6=94=B9=20*=20t?=
 =?UTF-8?q?f=E5=8E=9F=E7=94=9Fwrite=20graph=E5=87=BD=E6=95=B0=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0patch=EF=BC=9A=20=E5=A4=9A=E8=BF=9B=E7=A8=8B=E4=B8=AD?=
 =?UTF-8?q?=E5=8F=AA=E5=85=81=E8=AE=B8=E4=B8=80=E4=B8=AA=E8=BF=9B=E7=A8=8B?=
 =?UTF-8?q?=E8=BF=9B=E8=A1=8Cwrite=5Fgraph=E6=93=8D=E4=BD=9C=20*=20tf?=
 =?UTF-8?q?=E5=8E=9F=E7=94=9Fwrite=20graph=E5=87=BD=E6=95=B0=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0patch=EF=BC=9A=20=E5=A4=9A=E8=BF=9B=E7=A8=8B=E4=B8=AD?=
 =?UTF-8?q?=E5=8F=AA=E5=85=81=E8=AE=B8=E4=B8=80=E4=B8=AA=E8=BF=9B=E7=A8=8B?=
 =?UTF-8?q?=E8=BF=9B=E8=A1=8Cwrite=5Fgraph=E6=93=8D=E4=BD=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/patch.py                         | 16 +++++-
 mx_rec/saver/saver.py                         |  3 +
 src/core/emb_table/embedding_ddr.cpp          | 51 ++++++++---------
 src/core/emb_table/embedding_dynamic.cpp      | 47 ++++++++-------
 src/core/emb_table/embedding_mgmt.cpp         | 18 +++++-
 src/core/emb_table/embedding_static.cpp       | 19 ++++---
 src/core/emb_table/embedding_table.cpp        | 18 +++++-
 src/core/emb_table/embedding_table.h          |  9 ++-
 .../hdfs_file_system/hdfs_file_system.cpp     | 57 ++-----------------
 .../hdfs_file_system/hdfs_file_system.h       | 11 ++--
 src/tests/emb_table/embedding_static_test.cpp |  1 +
 11 files changed, 128 insertions(+), 122 deletions(-)

diff --git a/mx_rec/saver/patch.py b/mx_rec/saver/patch.py
index 6cffcc18..dcdf95ca 100644
--- a/mx_rec/saver/patch.py
+++ b/mx_rec/saver/patch.py
@@ -30,6 +30,7 @@ from tensorflow.python.client import session
 from tensorflow.python.eager import context
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import graph_io
 from tensorflow.python.ops import variables
 from tensorflow.python.ops import io_ops
 from tensorflow.python.platform import gfile
@@ -41,6 +42,7 @@ from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.training.saving import saveable_object
 from tensorflow.python.training.saving import saveable_object_util
 import numpy as np
+from mpi4py import MPI
 
 from mx_rec.saver.saver import Saver as SparseSaver, check_file_system_is_valid
 from mx_rec.util.initialize import ConfigInitializer
@@ -248,7 +250,6 @@ def save(self, sess, save_path, global_step=None, latest_filename=None, meta_gra
         self.sparse_saver.save(sess, save_path=checkpoint_file)
         logger.info("Save sparse model into dir %s", checkpoint_file)
 
-    from mpi4py import MPI
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
     comm.Barrier()
@@ -447,6 +448,18 @@ class BulkSaverBuilder(BaseSaverBuilder):
             return io_ops.restore_v2(filename_tensor, tensor_names, tensor_slices, tensor_dtypes)
 
 
+def patch_for_write_graph_func(func):
+    def wrapper(*args, **kwargs):
+        comm = MPI.COMM_WORLD
+        rank = comm.Get_rank()
+        # In the case of multiple processes, choose one process to write graph.
+        if rank == 0:
+            return func(*args, **kwargs)
+        else:
+            return None
+    return wrapper
+
+
 def patch_for_saver():
     dense_saver = tf.compat.v1.train.Saver
     dense_saver.__init__ = saver_init
@@ -454,3 +467,4 @@ def patch_for_saver():
     dense_saver.restore = restore
     dense_saver.build = build
     logger.debug("Class tf.train.Saver has been patched.")
+    training_util.write_graph = patch_for_write_graph_func(graph_io.write_graph)
diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index f9dfd0dc..f7c6b9a2 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -286,6 +286,9 @@ class Saver(object):
     def _save_ddr(self, sess, root_dir):
         # 接受host侧传来的需要swap_out的offset用于更新host侧并保存
         self.config_instance.hybrid_manager_config.fetch_device_emb()
+        # In DDR mode, within the save process, the graph has been fixed and cannot execute the get_next op.
+        # The _unsafe_unfinalize operation can modify the state of the graph being fixed.
+        sess.graph._unsafe_unfinalize()
         for var in self.var_list:
             table_instance = self.config_instance.sparse_embed_config.get_table_instance(var)
             table_name = table_instance.table_name
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index f069e5c7..092ad0c5 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -18,7 +18,6 @@ See the License for the specific language governing permissions and
 
 #include "utils/logger.h"
 #include "utils/singleton.h"
-#include "file_system/file_system_handler.h"
 #include "ssd_cache/cache_manager.h"
 #include "ock_ctr_common/include/error_code.h"
 
@@ -86,12 +85,13 @@ void EmbeddingDDR::LoadKey(const string &savePath, vector<emb_cache_key_t> &keys
     stringstream ss;
     ss << savePath << "/" << name << "/key/slice.data";
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
 
     size_t fileSize = 0;
     try {
-        fileSize = fileSystemPtr->GetFileSize(ss.str());
+        fileSize = fileSystemPtr_->GetFileSize(ss.str());
     } catch (exception& e) {
         string errMsg = StringFormat("open file failed:%s, error code:%d", ss.str().c_str(), strerror(errno));
         throw runtime_error(errMsg);
@@ -107,7 +107,7 @@ void EmbeddingDDR::LoadKey(const string &savePath, vector<emb_cache_key_t> &keys
         string errMsg = StringFormat("malloc buffer failed, error code:%d", strerror(errno));
         throw runtime_error(errMsg);
     }
-    ssize_t result = fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
+    ssize_t result = fileSystemPtr_->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
     if (result == -1) {
         free(static_cast<void*>(buf));
         string errMsg = StringFormat("read buffer failed, error code:%d", strerror(errno));
@@ -144,13 +144,13 @@ void EmbeddingDDR::LoadEmbedding(const string &savePath, vector<vector<float>> &
 
     stringstream ss;
     ss << savePath << "/" << name;
-
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
     stringstream embedStream;
     embedStream << ss.str() << "/" << "embedding/slice.data";
-    ssize_t res = fileSystemPtr->Read(embedStream.str(), embeddings, 0, hostLoadOffset, embSize_);
+
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+    ssize_t res = fileSystemPtr_->Read(embedStream.str(), embeddings, 0, hostLoadOffset, embSize_);
     LOG_DEBUG("load embedding done, table:{}, read bytes:{}", name, res);
 }
 
@@ -170,14 +170,14 @@ void EmbeddingDDR::LoadOptimizerSlot(const string &savePath, vector<vector<float
     stringstream ss;
     ss << savePath << "/" << name;
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
     int64_t slotIdx = 0;
     for (const auto &param: optimParams) {
         stringstream paramStream;
         paramStream << ss.str() << "/" << optimName + "_" + param << "/slice.data";
-        ssize_t res = fileSystemPtr->Read(paramStream.str(), optimizerSlots, slotIdx, hostLoadOffset, embSize_);
+        ssize_t res = fileSystemPtr_->Read(paramStream.str(), optimizerSlots, slotIdx, hostLoadOffset, embSize_);
         slotIdx++;
         LOG_DEBUG("load optimizer slot, table:{}, slot:{}, read bytes:{}", name, param, res);
     }
@@ -264,14 +264,14 @@ void EmbeddingDDR::SaveKey(const string& savePath, vector<emb_cache_key_t>& keys
     MakeDir(ss.str());
     ss << "slice_" << rankId_ << ".data";
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
     // 暂时向HBM兼容，转成int64_t，后续再归一key类型为uint64_t
     vector<int64_t> keysCompat(keys.cbegin(), keys.cend());
 
-    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(keysCompat.data()),
-                                       static_cast<size_t>(keys.size() * sizeof(int64_t)));
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+    ssize_t res = fileSystemPtr_->Write(ss.str(), reinterpret_cast<const char *>(keysCompat.data()),
+                                        static_cast<size_t>(keys.size() * sizeof(int64_t)));
     if (res == -1) {
         throw runtime_error("save key failed!");
     }
@@ -284,10 +284,10 @@ void EmbeddingDDR::SaveEmbedding(const string& savePath, vector<vector<float>>&
     MakeDir(ss.str());
     ss << "slice_" << rankId_ << ".data";
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
-    ssize_t writeBytesNum = fileSystemPtr->Write(ss.str(), embeddings, embSize_);
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+    ssize_t writeBytesNum = fileSystemPtr_->Write(ss.str(), embeddings, embSize_);
     ssize_t expectWriteBytes = embeddings.size() * embSize_ * sizeof(float);
     if (writeBytesNum != expectWriteBytes) {
         string errMsg = StringFormat("save embedding failed, write expect:%d, actual:%d, path:%s",
@@ -317,15 +317,12 @@ void EmbeddingDDR::SaveOptimizerSlot(const string& savePath, vector<vector<float
         MakeDir(ss.str());
         ss << "slice_" << rankId_ << ".data";
 
-        unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-        unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
         vector<vector<float>> slotData;
         for (const auto &data: optimizerSlots) {
             vector<float> tmp(data.cbegin() + slotIdx * embSize_, data.cbegin() + (slotIdx+1) * embSize_);
             slotData.emplace_back(tmp);
         }
-        ssize_t writeBytesNum = fileSystemPtr->Write(ss.str(), slotData, embSize_);
+        ssize_t writeBytesNum = fileSystemPtr_->Write(ss.str(), slotData, embSize_);
         ssize_t expectWriteBytes = slotData.size() * embSize_ * sizeof(float);
         if (writeBytesNum != expectWriteBytes) {
             string errMsg = StringFormat("save optimizer slot failed, write expect:%d, actual:%d, path:%s",
diff --git a/src/core/emb_table/embedding_dynamic.cpp b/src/core/emb_table/embedding_dynamic.cpp
index 78c94862..a69cf930 100644
--- a/src/core/emb_table/embedding_dynamic.cpp
+++ b/src/core/emb_table/embedding_dynamic.cpp
@@ -17,7 +17,6 @@ See the License for the specific language governing permissions and
 #include "utils/logger.h"
 #include "utils/singleton.h"
 #include "hd_transfer/hd_transfer.h"
-#include "file_system/file_system_handler.h"
 #include "utils/common.h"
 
 using namespace MxRec;
@@ -139,9 +138,6 @@ void EmbeddingDynamic::SaveKey(const string& savePath)
     MakeDir(ss.str());
     ss << "slice_" << rankId_ << ".data";
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
     deviceKey.clear();
     embAddress.clear();
 
@@ -150,8 +146,11 @@ void EmbeddingDynamic::SaveKey(const string& savePath)
         embAddress.push_back(it.second);
     }
 
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
     size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
-    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
+    ssize_t res = fileSystemPtr_->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
         throw runtime_error(StringFormat("Error: Save keys failed. "
                                          "An error occurred while writing file: {}.", ss.str()));
@@ -195,22 +194,25 @@ void EmbeddingDynamic::SaveEmbData(const string& savePath)
     MakeDir(ss.str());
     ss << "slice_" << rankId_ << ".data";
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-    fileSystemPtr->WriteEmbedding(ss.str(), embSize_, embAddress, deviceId);
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+    fileSystemPtr_->WriteEmbedding(ss.str(), embSize_, embAddress, deviceId);
 }
 
 void EmbeddingDynamic::SaveOptimData(const string &savePath)
 {
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+
     for (const auto &content: optimAddressMap) {
         stringstream ss;
         ss << savePath << "/" << name << "/" << optimName + "_" + content.first << "/";
         MakeDir(ss.str());
         ss << "slice_" << rankId_ << ".data";
 
-        unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-        unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-        fileSystemPtr->WriteEmbedding(ss.str(), embSize_, content.second, deviceId);
+        fileSystemPtr_->WriteEmbedding(ss.str(), embSize_, content.second, deviceId);
     }
 }
 
@@ -225,22 +227,23 @@ void EmbeddingDynamic::LoadEmbAndOptim(const string& savePath)
     stringstream ss;
     ss << savePath << "/" << name;
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
     // 读embedding
     stringstream embedStream;
     embedStream << ss.str() << "/" << "embedding/slice.data";
+
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
     EmbeddingSizeInfo embeddingSizeInfo = {embSize_, extEmbSize_};
-    fileSystemPtr->ReadEmbedding(embedStream.str(), embeddingSizeInfo, firstAddress, rankId_, loadOffset);
+    fileSystemPtr_->ReadEmbedding(embedStream.str(), embeddingSizeInfo, firstAddress, rankId_, loadOffset);
 
     // 读optim
     int optimIndex = 1;
     for (const auto &param: optimParams) {
         stringstream paramStream;
         paramStream << ss.str() << "/" << optimName + "_" + param << "/slice.data";
-        fileSystemPtr->ReadEmbedding(paramStream.str(), embeddingSizeInfo,
-                                     firstAddress + optimIndex * embSize_ * sizeof(float), deviceId, loadOffset);
+        fileSystemPtr_->ReadEmbedding(paramStream.str(), embeddingSizeInfo,
+                                      firstAddress + optimIndex * embSize_ * sizeof(float), deviceId, loadOffset);
         optimIndex++;
     }
 }
@@ -250,10 +253,10 @@ void EmbeddingDynamic::LoadKey(const string& savePath)
     stringstream ss;
     ss << savePath << "/" << name << "/key/slice.data";
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
-    size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+    size_t fileSize = fileSystemPtr_->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
         throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
@@ -264,7 +267,7 @@ void EmbeddingDynamic::LoadKey(const string& savePath)
                                          "failed to allocate {} bytes using malloc.", fileSize));
     }
 
-    ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
+    ssize_t res = fileSystemPtr_->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
     if (res == -1) {
         throw runtime_error(StringFormat("Error: Load keys failed. "
                                          "An error occurred while reading file: {}.", ss.str()));
diff --git a/src/core/emb_table/embedding_mgmt.cpp b/src/core/emb_table/embedding_mgmt.cpp
index 33e1c671..9e7dcbb0 100644
--- a/src/core/emb_table/embedding_mgmt.cpp
+++ b/src/core/emb_table/embedding_mgmt.cpp
@@ -19,6 +19,7 @@ See the License for the specific language governing permissions and
 #include "emb_table/embedding_static.h"
 #include "emb_table/embedding_dynamic.h"
 #include "emb_table/embedding_ddr.h"
+#include "file_system/file_system_handler.h"
 #include "utils/logger.h"
 
 using namespace MxRec;
@@ -111,23 +112,32 @@ int64_t EmbeddingMgmt::GetCapacity(const std::string &name)
 void EmbeddingMgmt::Load(const string& name, const string& filePath,
                          map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
-    return embeddings[name]->Load(filePath, trainKeySet);
+    embeddings[name]->SetFileSystemPtr(filePath);
+    embeddings[name]->Load(filePath, trainKeySet);
+    embeddings[name]->UnsetFileSystemPtr();
 }
 
 void EmbeddingMgmt::Load(const string& filePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
     for (auto& tablePair: embeddings) {
+        tablePair.second->SetFileSystemPtr(filePath);
         tablePair.second->Load(filePath, trainKeySet);
+        tablePair.second->UnsetFileSystemPtr();
     }
 }
 
 void EmbeddingMgmt::Save(const string& name, const string& filePath)
 {
-    return embeddings[name]->Save(filePath);
+    embeddings[name]->SetFileSystemPtr(filePath);
+    embeddings[name]->Save(filePath);
+    embeddings[name]->UnsetFileSystemPtr();
 }
 
 void EmbeddingMgmt::Save(const string& filePath)
 {
+    for (auto& tablePair: embeddings) {
+        tablePair.second->SetFileSystemPtr(filePath);
+    }
     // use multi-thread to prevent receiving save_d2h blocked when table order different between cpp and python
     vector<future<void>> futures;
     for (auto& tablePair: embeddings) {
@@ -137,6 +147,10 @@ void EmbeddingMgmt::Save(const string& filePath)
     for (auto& f: futures) {
         f.get();  // get() will repost exception if happened
     }
+
+    for (auto& tablePair: embeddings) {
+        tablePair.second->UnsetFileSystemPtr();
+    }
 }
 
 OffsetMapT EmbeddingMgmt::GetDeviceOffsets()
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index fdda5ede..ab66a42c 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -83,9 +83,6 @@ void EmbeddingStatic::SaveKey(const string& savePath)
     MakeDir(ss.str());
     ss << "slice_" << rankId_ << ".data";
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
     deviceKey.clear();
     deviceOffset.clear();
 
@@ -94,8 +91,12 @@ void EmbeddingStatic::SaveKey(const string& savePath)
         deviceOffset.push_back(it.second);
     }
 
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+
     size_t writeSize = static_cast<size_t>(deviceKey.size() * sizeof(int64_t));
-    ssize_t res = fileSystemPtr->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
+    ssize_t res = fileSystemPtr_->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
         throw runtime_error(StringFormat("Error: Save keys failed. "
                                          "An error occurred while writing file: {}.", ss.str()));
@@ -116,10 +117,10 @@ void EmbeddingStatic::LoadKey(const string& savePath)
     stringstream ss;
     ss << savePath << "/" << name << "/key/slice.data";
 
-    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(ss.str());
-
-    size_t fileSize = fileSystemPtr->GetFileSize(ss.str());
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+    size_t fileSize = fileSystemPtr_->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
         throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
     }
@@ -130,7 +131,7 @@ void EmbeddingStatic::LoadKey(const string& savePath)
                                          "failed to allocate {} bytes using malloc.", fileSize));
     }
 
-    ssize_t res = fileSystemPtr->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
+    ssize_t res = fileSystemPtr_->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
     if (res == -1) {
         throw runtime_error(StringFormat("Error: Load keys failed. "
                                          "An error occurred while reading file: {}.", ss.str()));
diff --git a/src/core/emb_table/embedding_table.cpp b/src/core/emb_table/embedding_table.cpp
index 1579282f..b4eb2379 100644
--- a/src/core/emb_table/embedding_table.cpp
+++ b/src/core/emb_table/embedding_table.cpp
@@ -119,6 +119,17 @@ absl::flat_hash_map<emb_key_t, int64_t> EmbeddingTable::GetKeyOffsetMap()
     return keyOffsetMap;
 }
 
+void EmbeddingTable::SetFileSystemPtr(const string& savePath)
+{
+    unique_ptr<FileSystemHandler> fileSystemHandler = make_unique<FileSystemHandler>();
+    fileSystemPtr_ = fileSystemHandler->Create(savePath);
+}
+
+void EmbeddingTable::UnsetFileSystemPtr()
+{
+    fileSystemPtr_ = nullptr;
+}
+
 vector<int64_t> EmbeddingTable::GetLoadOffset()
 {
     return loadOffset;
@@ -134,9 +145,10 @@ void EmbeddingTable::Save(const string& filePath)
 
 void EmbeddingTable::MakeDir(const string& dirName)
 {
-    auto fileSystemHandler = make_unique<FileSystemHandler>();
-    unique_ptr<FileSystem> fileSystemPtr = fileSystemHandler->Create(dirName);
-    fileSystemPtr->CreateDir(dirName);
+    if (fileSystemPtr_ == nullptr) {
+        throw runtime_error("failed to obtain the file system pointer, the file system pointer is null.");
+    }
+    fileSystemPtr_->CreateDir(dirName);
 }
 
 void EmbeddingTable::SetCacheManager(CacheManager *cm)
diff --git a/src/core/emb_table/embedding_table.h b/src/core/emb_table/embedding_table.h
index 1fa9008b..8b622194 100644
--- a/src/core/emb_table/embedding_table.h
+++ b/src/core/emb_table/embedding_table.h
@@ -21,6 +21,7 @@ See the License for the specific language governing permissions and
 
 #include "utils/common.h"
 #include "ssd_cache/cache_manager.h"
+#include "file_system/file_system_handler.h"
 
 namespace MxRec {
 
@@ -65,11 +66,15 @@ public:
 
     absl::flat_hash_map<emb_key_t, int64_t> GetKeyOffsetMap();
 
+    void SetFileSystemPtr(const string& savePath);
+
+    void UnsetFileSystemPtr();
+
     virtual void Load(const string& savePath, map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
     virtual void Save(const string& savePath);
 
-    static void MakeDir(const string& dirName);
+    void MakeDir(const string& dirName);
 
     virtual vector<int64_t> GetDeviceOffset();
 
@@ -116,6 +121,8 @@ protected:
     std::vector<size_t> missingKeysHostPos_; // 用于记录当前batch在host上需要换出的偏移
     CacheManager* cacheManager_;
     bool isSSDEnabled_ = false;
+
+    unique_ptr<FileSystem> fileSystemPtr_;
 };
 
 }
diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
index 68fc47a8..3cbf4a44 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
@@ -28,19 +28,15 @@ using namespace MxRec;
 
 void HdfsFileSystem::CreateDir(const string& dirName)
 {
-    hdfsFS fs = ConnectHdfs();
     int ret = hdfs->CreateDirectory(fs, dirName.c_str());
     if (ret == -1) {
         LOG_DEBUG("Unable to create hdfs directory: {}", dirName);
     }
-    hdfs->Disconnect(fs);
 }
 
 vector<string> HdfsFileSystem::ListDir(const string& dirName)
 {
     vector<string> dirs;
-    hdfsFS fs = ConnectHdfs();
-
     int numEntries = 0;
     hdfsFileInfo* subDirs = hdfs->ListDirectory(fs, dirName.c_str(), &numEntries);
     for (int i = 0; i < numEntries; ++i) {
@@ -50,15 +46,12 @@ vector<string> HdfsFileSystem::ListDir(const string& dirName)
     }
 
     hdfs->FreeFileInfo(subDirs, numEntries);
-    hdfs->Disconnect(fs);
     return dirs;
 }
 
 size_t HdfsFileSystem::GetFileSize(const string& filePath)
 {
-    hdfsFS fs = ConnectHdfs();
     hdfsFileInfo* fileInfo = hdfs->GetPathInfo(fs, filePath.c_str());
-    hdfs->Disconnect(fs);
     if (fileInfo == nullptr) {
         throw runtime_error(StringFormat("Error: Unable to get hdfs file info : {}.", filePath.c_str()));
     }
@@ -68,15 +61,8 @@ size_t HdfsFileSystem::GetFileSize(const string& filePath)
 
 ssize_t HdfsFileSystem::Write(const string& filePath, const char* fileContent, size_t dataSize)
 {
-    hdfsFS fs = ConnectHdfs();
-    int flag = O_WRONLY | O_CREAT;
-    hdfsFileInfo* fileInfo = hdfs->GetPathInfo(fs, filePath.c_str());
-    if (fileInfo) {
-        flag = O_WRONLY | O_APPEND;
-    }
-    hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), flag, 0, 0, 0);
+    hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
     if (!file) {
-        hdfs->Disconnect(fs);
         throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
     }
 
@@ -84,39 +70,32 @@ ssize_t HdfsFileSystem::Write(const string& filePath, const char* fileContent, s
     tSize res = hdfs->Write(fs, file, fileContent, dataSize);
     if (res == -1) {
         hdfs->CloseFile(fs, file);
-        hdfs->Disconnect(fs);
         return static_cast<ssize_t>(res);
     }
     writeBytesNum += res;
 
     hdfs->CloseFile(fs, file);
-    hdfs->Disconnect(fs);
     return static_cast<ssize_t>(writeBytesNum);
 }
 
 ssize_t HdfsFileSystem::Write(const string& filePath, vector<vector<float>>& fileContent, size_t dataSize)
 {
-    hdfsFS fs = ConnectHdfs();
-
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
     if (!file) {
-        hdfs->Disconnect(fs);
         throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
     }
 
     tSize writeBytesNum = 0;
     size_t loops = fileContent.size();
     for (size_t i = 0; i < loops; i++) {
-        tSize res = hdfs->Write(fs, file, reinterpret_cast<const char *>(&fileContent[i]), dataSize);
+        tSize res = hdfs->Write(fs, file, reinterpret_cast<const char *>(&fileContent[i]), dataSize * sizeof(float));
         if (res == -1) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             return static_cast<ssize_t>(res);
         }
         writeBytesNum += res;
     }
     hdfs->CloseFile(fs, file);
-    hdfs->Disconnect(fs);
     return static_cast<ssize_t>(writeBytesNum);
 }
 
@@ -129,11 +108,8 @@ ssize_t HdfsFileSystem::Write(const string& filePath, vector<vector<float>>& fil
 void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embeddingSize,
                                     const vector<int64_t>& addressArr, int deviceId)
 {
-    hdfsFS fs = ConnectHdfs();
-
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
     if (!file) {
-        hdfs->Disconnect(fs);
         throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
     }
 
@@ -141,7 +117,6 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
     auto res = aclrtSetDevice(static_cast<int32_t>(deviceId));
     if (res != ACL_ERROR_NONE) {
         hdfs->CloseFile(fs, file);
-        hdfs->Disconnect(fs);
         throw runtime_error(StringFormat("Set device failed, device_id:%d", deviceId).c_str());
     }
 
@@ -155,20 +130,17 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
                                    ACL_MEMCPY_DEVICE_TO_HOST);
         if (ret != ACL_SUCCESS) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             throw runtime_error("Error: Execute aclrtmemcpy from device to host failed.");
         }
 
         tSize res = hdfs->Write(fs, file, row.data(), embeddingSize * sizeof(float));
         if (res == -1) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             throw runtime_error(StringFormat("Error: An error occurred while writing file: {}.", filePath.c_str()));
         }
 
         if (res != embeddingSize * sizeof(float)) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             throw runtime_error(StringFormat("Error: Expected to write {} bytes, "
                                              "but actually write {} bytes to file {}.",
                                              embeddingSize * sizeof(float), res, filePath.c_str()));
@@ -176,16 +148,12 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
     }
 #endif
     hdfs->CloseFile(fs, file);
-    hdfs->Disconnect(fs);
 }
 
 ssize_t HdfsFileSystem::Read(const string& filePath, char* fileContent, size_t datasetSize)
 {
-    hdfsFS fs = ConnectHdfs();
-
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
-        hdfs->Disconnect(fs);
         throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
     }
 
@@ -193,24 +161,19 @@ ssize_t HdfsFileSystem::Read(const string& filePath, char* fileContent, size_t d
     tSize res = hdfs->Read(fs, file, fileContent, datasetSize);
     if (res == -1) {
         hdfs->CloseFile(fs, file);
-        hdfs->Disconnect(fs);
         return static_cast<ssize_t>(res);
     }
     readBytesNum += res;
 
     hdfs->CloseFile(fs, file);
-    hdfs->Disconnect(fs);
     return static_cast<ssize_t>(readBytesNum);
 }
 
 ssize_t HdfsFileSystem::Read(const string& filePath, vector<vector<float>>& fileContent, int64_t contentOffset,
                              vector<int64_t> offsetArr, const size_t& embeddingSize)
 {
-    hdfsFS fs = ConnectHdfs();
-
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
-        hdfs->Disconnect(fs);
         throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
     }
 
@@ -223,7 +186,6 @@ ssize_t HdfsFileSystem::Read(const string& filePath, vector<vector<float>>& file
                                embeddingSize * sizeof(float));
         if (res == -1) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             return static_cast<ssize_t>(res);
         }
         embeddingCount++;
@@ -231,7 +193,6 @@ ssize_t HdfsFileSystem::Read(const string& filePath, vector<vector<float>>& file
     }
 
     hdfs->CloseFile(fs, file);
-    hdfs->Disconnect(fs);
     return static_cast<ssize_t>(readBytesNum);
 }
 
@@ -245,11 +206,8 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
                                    int deviceId, vector<int64_t> offsetArr)
 {
 #ifndef GTEST
-    hdfsFS fs = ConnectHdfs();
-
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
-        hdfs->Disconnect(fs);
         throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
     }
 
@@ -265,7 +223,6 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
         int seekRes = hdfs->Seek(fs, file, offset * embedSizeInfo.embeddingSize * sizeof(float));
         if (seekRes == -1) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             throw runtime_error(StringFormat("Error: hdfsSeek failed with error. file offset: {}",
                                              offset * embedSizeInfo.embeddingSize * sizeof(float)));
         }
@@ -273,12 +230,10 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
         tSize res = hdfs->Read(fs, file, row.data(), embedSizeInfo.embeddingSize * sizeof(float));
         if (res == -1) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             throw runtime_error(StringFormat("Error: An error occurred while reading file: {}.", filePath.c_str()));
         }
         if (res != embedSizeInfo.embeddingSize * sizeof(float)) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             throw runtime_error(StringFormat("Error: Expected to read {} bytes, "
                                              "but actually read {} bytes from file {}.",
                                              embedSizeInfo.embeddingSize * sizeof(float), res, filePath.c_str()));
@@ -289,21 +244,19 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
                                    row.data(), embedSizeInfo.embeddingSize * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE);
         if (ret != ACL_SUCCESS) {
             hdfs->CloseFile(fs, file);
-            hdfs->Disconnect(fs);
             throw runtime_error("Error: Execute aclrtmemcpy from host to device failed.");
         }
         i++;
     }
     hdfs->CloseFile(fs, file);
-    hdfs->Disconnect(fs);
 #endif
 }
 
 hdfsFS HdfsFileSystem::ConnectHdfs()
 {
-    hdfsFS fs = hdfs->Connect("default", 0);
-    if (!fs) {
+    hdfsFS hdfsClient = hdfs->Connect("default", 0);
+    if (!hdfsClient) {
         throw runtime_error("Connect hdfs file system failed.");
     }
-    return fs;
+    return hdfsClient;
 }
\ No newline at end of file
diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.h b/src/core/file_system/hdfs_file_system/hdfs_file_system.h
index f6c6a489..bf56062f 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.h
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.h
@@ -24,11 +24,11 @@ namespace MxRec {
 
     class HdfsFileSystem : public FileSystem {
     public:
-        HdfsFileSystem()
+        HdfsFileSystem() {};
+        ~HdfsFileSystem()
         {
-            hdfs = make_unique<HdfsWrapper>();
-        };
-        ~HdfsFileSystem() override {}
+            hdfs->Disconnect(fs);
+        }
 
         void CreateDir(const string& dirName) override;
         vector<string> ListDir(const string& dirName) override;
@@ -47,7 +47,8 @@ namespace MxRec {
 
         hdfsFS ConnectHdfs();
 
-        unique_ptr<HdfsWrapper> hdfs;
+        unique_ptr<HdfsWrapper> hdfs = make_unique<HdfsWrapper>();
+        hdfsFS fs = ConnectHdfs();
     };
 }
 
diff --git a/src/tests/emb_table/embedding_static_test.cpp b/src/tests/emb_table/embedding_static_test.cpp
index 9e250f64..a08569b3 100644
--- a/src/tests/emb_table/embedding_static_test.cpp
+++ b/src/tests/emb_table/embedding_static_test.cpp
@@ -156,6 +156,7 @@ TEST_F(EmbeddingStaticTest, SaveKeyData)
 {
     vector<EmbInfo> embInfos = {embInfo_};
     shared_ptr<EmbeddingStatic> hbm = std::make_shared<EmbeddingStatic>(embInfo_, rankInfo_, 0);
+    hbm->SetFileSystemPtr("test_dir");
     hbm->Save("test_dir");
     bool fileExist = false;
     if (access("./test_dir/test1/key", F_OK) == 0) {
-- 
Gitee


From 80b3718559001df56b389f73e4c0992a87f12b0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Tue, 4 Jun 2024 20:42:43 +0800
Subject: [PATCH 197/302] =?UTF-8?q?WideDeep=E6=A8=A1=E5=9E=8B=EF=BC=9A?=
 =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=A2=84=E5=A4=84=E7=90=86=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?criteo.py=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/criteo.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/WideDeep/criteo.py b/examples/WideDeep/criteo.py
index 137dac5c..f9624d21 100644
--- a/examples/WideDeep/criteo.py
+++ b/examples/WideDeep/criteo.py
@@ -167,10 +167,9 @@ def convert_input2tfrd(in_file_path, out_file_path):
     txt to tfrecords
     """
     def make_example(label_list, dense_feat_list, sparse_feat_list):
-        # '1.0' >> 1.0 >> 1
-        dense_feature = np.array(np.array(dense_feat_list, dtype=np.float32), dtype=np.int64).reshape(-1)
-        sparse_feature = np.array(np.array(sparse_feat_list, dtype=np.float32), dtype=np.int64).reshape(-1)
-        label = np.array(np.array(label_list, dtype=np.float32), dtype=np.int64).reshape(-1)
+        dense_feature = np.array(dense_feat_list, dtype=np.int64).reshape(-1)
+        sparse_feature = np.array(sparse_feat_list, dtype=np.int64).reshape(-1)
+        label = np.array(label_list, dtype=np.int64).reshape(-1)
         feature_dict = {"dense_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=dense_feature)),
                         "sparse_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=sparse_feature)),
                         "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label))
-- 
Gitee


From 6fbabb2abf1a9b9d7a0c45613ce2b3d3cefbd7b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 5 Jun 2024 10:31:34 +0800
Subject: [PATCH 198/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91DDR=E7=B2=BE=E5=BA=A6=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98=EF=BC=8C=E4=BD=BF=E7=94=A8sgd=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=99=A8ddr=E6=A8=A1=E5=BC=8Fbug=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/graph/modifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index 8629b350..15c240e5 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -260,7 +260,7 @@ class _GraphModifier:
                 control_ops = swap_control_dict["control_ops"]
                 utils.replace_anchor_control(self._full_graph, control_ops, swap_op)
 
-                if is_training:
+                if is_training and slot_num > 1:
                     # gather for slot need to be executed after swap_op
                     slot_control_dict = swap_args.slot_control_dict[table_instance.variable]
                     if "control_ops" not in slot_control_dict:
-- 
Gitee


From 3dbc71e41d9c24b864000170b55f9fe73274f7b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 6 Jun 2024 16:07:28 +0800
Subject: [PATCH 199/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/config.py          |  8 ++---
 examples/WideDeep/model/config.py |  8 ++---
 examples/demo/little_demo/main.py |  8 +----
 examples/dlrm/model/config.py     |  8 ++---
 mx_rec/constants/constants.py     | 14 +++++++-
 mx_rec/core/embedding.py          | 60 +++++++++++++++++++++++++------
 6 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/examples/DCNv2/config.py b/examples/DCNv2/config.py
index 463f9aa1..6b7c9bce 100644
--- a/examples/DCNv2/config.py
+++ b/examples/DCNv2/config.py
@@ -20,6 +20,8 @@ import os
 import tensorflow as tf
 from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
 
+from mx_rec.constants.constants import CacheModeEnum
+
 SSD_DATA_PATH = ["ssd_data"]
 
 
@@ -89,12 +91,6 @@ class LearningRateScheduler:
         return lr_dense, lr_sparse
 
 
-class CacheModeEnum(enum.Enum):
-    HBM = "HBM"
-    DDR = "DDR"
-    SSD = "SSD"
-
-
 class Config:
     def __init__(self, ):
         self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
diff --git a/examples/WideDeep/model/config.py b/examples/WideDeep/model/config.py
index fae850f9..0072dc59 100644
--- a/examples/WideDeep/model/config.py
+++ b/examples/WideDeep/model/config.py
@@ -21,6 +21,8 @@ import tensorflow as tf
 from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
 from npu_bridge.estimator.npu.npu_config import NPURunConfig
 
+from mx_rec.constants.constants import CacheModeEnum
+
 SSD_DATA_PATH = ["ssd_data"]
 
 
@@ -90,12 +92,6 @@ class LearningRateScheduler:
         return lr_dense, lr_sparse
 
 
-class CacheModeEnum(enum.Enum):
-    HBM = "HBM"
-    DDR = "DDR"
-    SSD = "SSD"
-
-
 class Config:
     def __init__(self, ):
         self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
diff --git a/examples/demo/little_demo/main.py b/examples/demo/little_demo/main.py
index ff09bc50..cfaecbde 100644
--- a/examples/demo/little_demo/main.py
+++ b/examples/demo/little_demo/main.py
@@ -24,7 +24,7 @@ from glob import glob
 import numpy as np
 import tensorflow as tf
 
-from mx_rec.constants.constants import ASCEND_TIMESTAMP
+from mx_rec.constants.constants import ASCEND_TIMESTAMP, CacheModeEnum
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.helper import get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
@@ -46,12 +46,6 @@ tf.compat.v1.disable_eager_execution()
 _SSD_SAVE_PATH = ["ssd_data"]  # user should make sure directory exist and clean before training
 
 
-class CacheModeEnum(enum.Enum):
-    HBM = "HBM"
-    DDR = "DDR"
-    SSD = "SSD"
-
-
 def make_batch_and_iterator(is_training, feature_spec_list=None,
                             use_timestamp=False, dump_graph=False, batch_number=100):
     dataset = generate_dataset(cfg, use_timestamp=use_timestamp, batch_number=batch_number)
diff --git a/examples/dlrm/model/config.py b/examples/dlrm/model/config.py
index 78115d61..c30a22d4 100644
--- a/examples/dlrm/model/config.py
+++ b/examples/dlrm/model/config.py
@@ -21,6 +21,8 @@ import tensorflow as tf
 from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
 from npu_bridge.estimator.npu.npu_config import NPURunConfig
 
+from mx_rec.constants.constants import CacheModeEnum
+
 SSD_DATA_PATH = ["ssd_data"]
 
 
@@ -90,12 +92,6 @@ class LearningRateScheduler:
         return lr_dense, lr_sparse
 
 
-class CacheModeEnum(enum.Enum):
-    HBM = "HBM"
-    DDR = "DDR"
-    SSD = "SSD"
-
-
 class Config:
     def __init__(self, ):
         self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index becba0ab..fa34fddc 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
+import sys
 from enum import Enum
 import numpy as np
 
@@ -26,6 +26,12 @@ ASCEND_TIMESTAMP = "ASCEND_TIMESTAMP"
 ASCEND_SPARSE_LOOKUP_LOCAL_EMB = "ASCEND_SPARSE_LOOKUP_LOCAL_EMB"
 EMPTY_STR = ""
 
+# default emb memory size for hbm、ddr、ssd
+DEFAULT_DEVICE_CACHE_MEMORY_SIZE = 2 * 12024 * 1024 * 1024
+DEFAULT_HOST_CACHE_MEMORY_SIZE = 20 * 12024 * 1024 * 1024
+DEFAULT_SSD_CACHE_MEMORY_SIZE = sys.maxsize
+
+
 # 获取ConfigInitializer对象实例失败提示信息
 GET_CONFIG_INSTANCE_ERR_MSG = "Please init the environment for mx_rec at first."
 
@@ -142,6 +148,12 @@ class EnvOption(Enum):
     OMPI_COMM_WORLD_RANK = "OMPI_COMM_WORLD_RANK"
 
 
+class CacheModeEnum(Enum):
+    HBM = "HBM"
+    DDR = "DDR"
+    SSD = "SSD"
+
+
 class DataName(Enum):
     KEY = "key"
     EMBEDDING = "embedding"
diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index 348ab9d6..51c1231d 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -16,7 +16,8 @@
 # ==============================================================================
 
 import os
-from typing import Optional, Union
+import psutil
+from typing import Optional, Union, List
 
 import tensorflow as tf
 from tensorflow import Tensor
@@ -28,7 +29,9 @@ from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.core.emb.emb_factory import HBMDynamicSparseEmbeddingFactory, HBMSparseEmbeddingFactory, \
     ExternalStorageSparseEmbeddingFactory
-from mx_rec.constants.constants import MAX_INT32, All2allGradientsOp, MAX_VOCABULARY_SIZE, MAX_DEVICE_VOCABULARY_SIZE
+from mx_rec.constants.constants import (MAX_INT32, All2allGradientsOp, MAX_VOCABULARY_SIZE, MAX_DEVICE_VOCABULARY_SIZE,
+                                        CacheModeEnum, DEFAULT_DEVICE_CACHE_MEMORY_SIZE, DEFAULT_HOST_CACHE_MEMORY_SIZE,
+                                        DEFAULT_SSD_CACHE_MEMORY_SIZE)
 from mx_rec.graph.constants import AnchorIteratorOp
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.validator.validator import ClassValidator, StringValidator, SSDFeatureValidator, \
@@ -51,19 +54,19 @@ from mx_rec.util.log import logger
     ("host_vocabulary_size", IntValidator, {"min_value": 0, "max_value": MAX_VOCABULARY_SIZE}, ["check_value"]),
     ("ssd_vocabulary_size", IntValidator, {"min_value": 0, "max_value": MAX_VOCABULARY_SIZE}, ["check_value"]),
     ("ssd_data_path", ClassValidator, {"classes": (list, tuple)}),
-    ("is_save", ClassValidator, {"classes": (bool, )}),
+    ("is_save", ClassValidator, {"classes": (bool,)}),
     ("init_param", FloatValidator, {"min_value": -10, "max_value": 10}, ["check_value"]),
     ("all2all_gradients_op", OptionValidator, {"options": [i.value for i in list(All2allGradientsOp)]}),
     ("value_dtype", OptionValidator, {"options": [tf.float32]}),
     ("shard_num", IntValidator, {"min_value": 1, "max_value": 8192}, ["check_value"]),
-    ("fusion_optimizer_var", ClassValidator, {"classes": (bool, )}),
+    ("fusion_optimizer_var", ClassValidator, {"classes": (bool,)}),
     ("hashtable_threshold", IntValidator, {"min_value": 0, "max_value": MAX_INT32}, ["check_value"])
 ])
 def create_table(key_dtype, dim, name, emb_initializer,
                  device_vocabulary_size=1,
                  host_vocabulary_size=0,
                  ssd_vocabulary_size=0,
-                 ssd_data_path=(os.getcwd(), ),
+                 ssd_data_path=(os.getcwd(),),
                  is_save=True,
                  init_param=1.,
                  all2all_gradients_op=All2allGradientsOp.SUM_GRADIENTS.value,
@@ -91,24 +94,28 @@ def create_table(key_dtype, dim, name, emb_initializer,
     """
     name = fix_invalid_table_name(name)
 
+    voc_size_list = [device_vocabulary_size, host_vocabulary_size, ssd_vocabulary_size]
+    if check_and_set_default_voc_size(voc_size_list, dim):
+        raise ValueError("voc_size_lis does not fit this cache mode")
+
     config = dict(key_dtype=key_dtype, embedding_size=dim, table_name=name, emb_initializer=emb_initializer,
-                  device_vocabulary_size=device_vocabulary_size, host_vocabulary_size=host_vocabulary_size,
-                  ssd_vocabulary_size=ssd_vocabulary_size, ssd_data_path=ssd_data_path,
+                  device_vocabulary_size=voc_size_list[0], host_vocabulary_size=voc_size_list[1],
+                  ssd_vocabulary_size=voc_size_list[2], ssd_data_path=ssd_data_path,
                   init_param=init_param, is_save=is_save, all2all_gradients_op=all2all_gradients_op)
     # 动态扩容
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         return HBMDynamicSparseEmbeddingFactory().create_embedding(config)
     # DDR or SSD
-    if host_vocabulary_size > 0:
+    if voc_size_list[1] > 0:
         return ExternalStorageSparseEmbeddingFactory().create_embedding(config)
     # HBM
     return HBMSparseEmbeddingFactory().create_embedding(config)
 
 
 @para_checker_decorator(check_option_list=[
-    ("hashtable", ClassValidator, {"classes": (BaseSparseEmbedding, )}),
+    ("hashtable", ClassValidator, {"classes": (BaseSparseEmbedding,)}),
     ("ids", ClassValidator, {"classes": (FeatureSpec, tf.Tensor)}),
-    ("is_train", ClassValidator, {"classes": (bool, )}),
+    ("is_train", ClassValidator, {"classes": (bool,)}),
     ("send_count", ClassValidator, {"classes": (int, type(None))}),
     ("send_count", OptionalIntValidator, {"min_value": 1, "max_value": MAX_INT32}, ["check_value"]),
     ("name", ClassValidator, {"classes": (str, type(None))}),
@@ -116,7 +123,7 @@ def create_table(key_dtype, dim, name, emb_initializer,
     ("modify_graph", ClassValidator, {"classes": (bool, type(None))}),
     ("batch", ClassValidator, {"classes": (dict, type(None))}),
     ("access_and_evict_config", ClassValidator, {"classes": (dict, type(None))}),
-    ("is_grad", ClassValidator, {"classes": (bool, )}),
+    ("is_grad", ClassValidator, {"classes": (bool,)}),
     ("serving_default_value", ClassValidator, {"classes": (tf.Tensor, type(None))})
 ])
 def sparse_lookup(hashtable: BaseSparseEmbedding,
@@ -201,3 +208,34 @@ def mark_orphan_lookup_key(lookup_key: Tensor) -> Tensor:
 
     logger.info('Mark orphan lookup key %s as %s.', lookup_key, marked_lookup_key)
     return marked_lookup_key
+
+
+def check_and_set_default_voc_size(voc_size_list: List[int], dim: int) -> bool:
+    if ConfigInitializer.get_instance().use_dynamic_expansion:
+        voc_size_list[1] = 0
+        voc_size_list[2] = 0
+        return True
+    cache_mode = os.getenv("CACHE_MODE")
+    if cache_mode is None and voc_size_list[0] <= 1:  # no cache mode, no use_dynamic_expansion, must input dev-voc
+        return False
+    if cache_mode is None and voc_size_list[1] == 0:  # no cache mode, dev-voc not None, use HBM
+        return True
+    if cache_mode is None and voc_size_list[2] == 0:  # no cache mode, dev-voc/host-voc not None, use DDR
+        return True
+    if cache_mode is None:  # no cache mode, dev-voc/host-voc/ssd-voc not None, use SSD
+        return True
+
+    if cache_mode not in [mode.value for mode in CacheModeEnum]:
+        return False
+    if cache_mode == CacheModeEnum.HBM.value and (voc_size_list[1] > 0 or voc_size_list[2]) > 0:
+        return False
+    if cache_mode == CacheModeEnum.DDR.value and voc_size_list[2] > 0:
+        return False
+    if voc_size_list[0] == 1:
+        voc_size_list[0] = int(DEFAULT_DEVICE_CACHE_MEMORY_SIZE / dim / 4)  # float32 4 bytes
+    if (cache_mode == CacheModeEnum.DDR.value or cache_mode == CacheModeEnum.SSD.value) and voc_size_list[1] == 0:
+        sys_mem = psutil.virtual_memory().total / dim / 4  # float32 4 bytes
+        voc_size_list[1] = sys_mem if sys_mem is not None else int(DEFAULT_HOST_CACHE_MEMORY_SIZE / dim / 4)
+    if cache_mode == CacheModeEnum.SSD.value and voc_size_list[2] == 0:
+        voc_size_list[2] = DEFAULT_SSD_CACHE_MEMORY_SIZE
+    return True
-- 
Gitee


From 6512fca2b5f49505891d37b39a1b1c50b71344a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 6 Jun 2024 16:10:21 +0800
Subject: [PATCH 200/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/embedding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index 51c1231d..206b6af3 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -95,7 +95,7 @@ def create_table(key_dtype, dim, name, emb_initializer,
     name = fix_invalid_table_name(name)
 
     voc_size_list = [device_vocabulary_size, host_vocabulary_size, ssd_vocabulary_size]
-    if check_and_set_default_voc_size(voc_size_list, dim):
+    if not check_and_set_default_voc_size(voc_size_list, dim):
         raise ValueError("voc_size_lis does not fit this cache mode")
 
     config = dict(key_dtype=key_dtype, embedding_size=dim, table_name=name, emb_initializer=emb_initializer,
-- 
Gitee


From 74c1fe6adfb0bfbfd9141c0fc28ad9e33ca3628b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 6 Jun 2024 17:28:00 +0800
Subject: [PATCH 201/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/embedding.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index 206b6af3..872491da 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -94,8 +94,12 @@ def create_table(key_dtype, dim, name, emb_initializer,
     """
     name = fix_invalid_table_name(name)
 
+    if isinstance(dim, tf.TensorShape):
+        dim_bytes = dim.as_list()[0] * 4  # float32 4 bytes
+    else:
+        dim_bytes = dim * 4  # float32 4 bytes
     voc_size_list = [device_vocabulary_size, host_vocabulary_size, ssd_vocabulary_size]
-    if not check_and_set_default_voc_size(voc_size_list, dim):
+    if not check_and_set_default_voc_size(voc_size_list, dim_bytes):
         raise ValueError("voc_size_lis does not fit this cache mode")
 
     config = dict(key_dtype=key_dtype, embedding_size=dim, table_name=name, emb_initializer=emb_initializer,
@@ -210,7 +214,7 @@ def mark_orphan_lookup_key(lookup_key: Tensor) -> Tensor:
     return marked_lookup_key
 
 
-def check_and_set_default_voc_size(voc_size_list: List[int], dim: int) -> bool:
+def check_and_set_default_voc_size(voc_size_list: List[int], dim_bytes: int) -> bool:
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         voc_size_list[1] = 0
         voc_size_list[2] = 0
@@ -232,10 +236,12 @@ def check_and_set_default_voc_size(voc_size_list: List[int], dim: int) -> bool:
     if cache_mode == CacheModeEnum.DDR.value and voc_size_list[2] > 0:
         return False
     if voc_size_list[0] == 1:
-        voc_size_list[0] = int(DEFAULT_DEVICE_CACHE_MEMORY_SIZE / dim / 4)  # float32 4 bytes
+        voc_size_list[0] = int(DEFAULT_DEVICE_CACHE_MEMORY_SIZE / dim_bytes)
     if (cache_mode == CacheModeEnum.DDR.value or cache_mode == CacheModeEnum.SSD.value) and voc_size_list[1] == 0:
-        sys_mem = psutil.virtual_memory().total / dim / 4  # float32 4 bytes
-        voc_size_list[1] = sys_mem if sys_mem is not None else int(DEFAULT_HOST_CACHE_MEMORY_SIZE / dim / 4)
+        sys_voc = int(psutil.virtual_memory().total * 0.8 / dim_bytes)  # max host mem equal (0.8 * sys mem)
+        default_host_voc_size = int(DEFAULT_HOST_CACHE_MEMORY_SIZE / dim_bytes)
+        max_host_voc_size = MAX_VOCABULARY_SIZE if (sys_voc is not None and sys_voc > MAX_VOCABULARY_SIZE) else sys_voc
+        voc_size_list[1] = max_host_voc_size if sys_voc is not None else default_host_voc_size
     if cache_mode == CacheModeEnum.SSD.value and voc_size_list[2] == 0:
         voc_size_list[2] = DEFAULT_SSD_CACHE_MEMORY_SIZE
     return True
-- 
Gitee


From 6620de17237f8396987c5f4a262ced42b2d2f2e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 6 Jun 2024 18:47:45 +0800
Subject: [PATCH 202/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py |  6 +++---
 mx_rec/core/embedding.py      | 14 +++++---------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index fa34fddc..fd27fc27 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -27,8 +27,8 @@ ASCEND_SPARSE_LOOKUP_LOCAL_EMB = "ASCEND_SPARSE_LOOKUP_LOCAL_EMB"
 EMPTY_STR = ""
 
 # default emb memory size for hbm、ddr、ssd
-DEFAULT_DEVICE_CACHE_MEMORY_SIZE = 2 * 12024 * 1024 * 1024
-DEFAULT_HOST_CACHE_MEMORY_SIZE = 20 * 12024 * 1024 * 1024
+DEFAULT_DEVICE_CACHE_MEMORY_SIZE = 2 * 1024 * 1024 * 1024
+DEFAULT_HOST_CACHE_MEMORY_SIZE = 40 * 1024 * 1024 * 1024
 DEFAULT_SSD_CACHE_MEMORY_SIZE = sys.maxsize
 
 
@@ -74,7 +74,7 @@ DEFAULT_EVICT_TIME_INTERVAL = 60 * 60 * 24
 TRAIN_CHANNEL_ID = 0
 EVAL_CHANNEL_ID = 1
 HASHTABLE_COLLECTION_NAME_LENGTH = 30
-MAX_VOCABULARY_SIZE = 10**10
+MAX_VOCABULARY_SIZE = 10**9
 MAX_DEVICE_VOCABULARY_SIZE = 10 ** 9
 
 # RANK INFO
diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index 872491da..0422f893 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -16,7 +16,6 @@
 # ==============================================================================
 
 import os
-import psutil
 from typing import Optional, Union, List
 
 import tensorflow as tf
@@ -94,10 +93,7 @@ def create_table(key_dtype, dim, name, emb_initializer,
     """
     name = fix_invalid_table_name(name)
 
-    if isinstance(dim, tf.TensorShape):
-        dim_bytes = dim.as_list()[0] * 4  # float32 4 bytes
-    else:
-        dim_bytes = dim * 4  # float32 4 bytes
+    dim_bytes = dim.as_list()[0] * 4 if isinstance(dim, tf.TensorShape) else dim * 4  # float32 4 bytes
     voc_size_list = [device_vocabulary_size, host_vocabulary_size, ssd_vocabulary_size]
     if not check_and_set_default_voc_size(voc_size_list, dim_bytes):
         raise ValueError("voc_size_lis does not fit this cache mode")
@@ -236,12 +232,12 @@ def check_and_set_default_voc_size(voc_size_list: List[int], dim_bytes: int) ->
     if cache_mode == CacheModeEnum.DDR.value and voc_size_list[2] > 0:
         return False
     if voc_size_list[0] == 1:
-        voc_size_list[0] = int(DEFAULT_DEVICE_CACHE_MEMORY_SIZE / dim_bytes)
+        default_device_voc_size = int(DEFAULT_DEVICE_CACHE_MEMORY_SIZE / dim_bytes)
+        voc_size_list[0] = default_device_voc_size if default_device_voc_size < MAX_VOCABULARY_SIZE \
+            else MAX_VOCABULARY_SIZE
     if (cache_mode == CacheModeEnum.DDR.value or cache_mode == CacheModeEnum.SSD.value) and voc_size_list[1] == 0:
-        sys_voc = int(psutil.virtual_memory().total * 0.8 / dim_bytes)  # max host mem equal (0.8 * sys mem)
         default_host_voc_size = int(DEFAULT_HOST_CACHE_MEMORY_SIZE / dim_bytes)
-        max_host_voc_size = MAX_VOCABULARY_SIZE if (sys_voc is not None and sys_voc > MAX_VOCABULARY_SIZE) else sys_voc
-        voc_size_list[1] = max_host_voc_size if sys_voc is not None else default_host_voc_size
+        voc_size_list[1] = default_host_voc_size if default_host_voc_size < MAX_VOCABULARY_SIZE else MAX_VOCABULARY_SIZE
     if cache_mode == CacheModeEnum.SSD.value and voc_size_list[2] == 0:
         voc_size_list[2] = DEFAULT_SSD_CACHE_MEMORY_SIZE
     return True
-- 
Gitee


From 6a275d1dcef4773fc5426a3f92d8de55d3becbe9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 6 Jun 2024 18:48:03 +0800
Subject: [PATCH 203/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index fd27fc27..60985115 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -74,7 +74,7 @@ DEFAULT_EVICT_TIME_INTERVAL = 60 * 60 * 24
 TRAIN_CHANNEL_ID = 0
 EVAL_CHANNEL_ID = 1
 HASHTABLE_COLLECTION_NAME_LENGTH = 30
-MAX_VOCABULARY_SIZE = 10**9
+MAX_VOCABULARY_SIZE = 10**8
 MAX_DEVICE_VOCABULARY_SIZE = 10 ** 9
 
 # RANK INFO
-- 
Gitee


From 99f8838efc2b0c2abd49bb8c4fd036c76f6142ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 6 Jun 2024 19:49:54 +0800
Subject: [PATCH 204/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index 60985115..fd27fc27 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -74,7 +74,7 @@ DEFAULT_EVICT_TIME_INTERVAL = 60 * 60 * 24
 TRAIN_CHANNEL_ID = 0
 EVAL_CHANNEL_ID = 1
 HASHTABLE_COLLECTION_NAME_LENGTH = 30
-MAX_VOCABULARY_SIZE = 10**8
+MAX_VOCABULARY_SIZE = 10**9
 MAX_DEVICE_VOCABULARY_SIZE = 10 ** 9
 
 # RANK INFO
-- 
Gitee


From 4d37925e6a15d0c387220498572c569610bd252e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 6 Jun 2024 20:20:58 +0800
Subject: [PATCH 205/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91issure=20bug=20fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/embedding.py | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index 0422f893..8c12eb4c 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -95,8 +95,7 @@ def create_table(key_dtype, dim, name, emb_initializer,
 
     dim_bytes = dim.as_list()[0] * 4 if isinstance(dim, tf.TensorShape) else dim * 4  # float32 4 bytes
     voc_size_list = [device_vocabulary_size, host_vocabulary_size, ssd_vocabulary_size]
-    if not check_and_set_default_voc_size(voc_size_list, dim_bytes):
-        raise ValueError("voc_size_lis does not fit this cache mode")
+    check_and_set_default_voc_size(voc_size_list, dim_bytes)
 
     config = dict(key_dtype=key_dtype, embedding_size=dim, table_name=name, emb_initializer=emb_initializer,
                   device_vocabulary_size=voc_size_list[0], host_vocabulary_size=voc_size_list[1],
@@ -210,27 +209,29 @@ def mark_orphan_lookup_key(lookup_key: Tensor) -> Tensor:
     return marked_lookup_key
 
 
-def check_and_set_default_voc_size(voc_size_list: List[int], dim_bytes: int) -> bool:
+def check_and_set_default_voc_size(voc_size_list: List[int], dim_bytes: int):
     if ConfigInitializer.get_instance().use_dynamic_expansion:
         voc_size_list[1] = 0
         voc_size_list[2] = 0
-        return True
+        return
     cache_mode = os.getenv("CACHE_MODE")
-    if cache_mode is None and voc_size_list[0] <= 1:  # no cache mode, no use_dynamic_expansion, must input dev-voc
-        return False
-    if cache_mode is None and voc_size_list[1] == 0:  # no cache mode, dev-voc not None, use HBM
-        return True
-    if cache_mode is None and voc_size_list[2] == 0:  # no cache mode, dev-voc/host-voc not None, use DDR
-        return True
-    if cache_mode is None:  # no cache mode, dev-voc/host-voc/ssd-voc not None, use SSD
-        return True
+    if not cache_mode and voc_size_list[0] <= 1:
+        raise ValueError("no cache mode, no use_dynamic_expansion, must input dev-voc")
+    if not cache_mode and voc_size_list[1] == 0 and voc_size_list[2] == 0:  # no cache mode, dev-voc not None, use HBM
+        return
+    if not cache_mode and voc_size_list[1] == 0 and voc_size_list[2] > 0:
+        raise ValueError("no cache mode, dev-voc is not none and host-voc is none, ssd-voc must be none too")
+    if not cache_mode and voc_size_list[2] == 0:  # no cache mode, dev-voc/host-voc not None, use DDR
+        return
+    if not cache_mode:  # no cache mode, dev-voc/host-voc/ssd-voc not None, use SSD
+        return
 
     if cache_mode not in [mode.value for mode in CacheModeEnum]:
-        return False
-    if cache_mode == CacheModeEnum.HBM.value and (voc_size_list[1] > 0 or voc_size_list[2]) > 0:
-        return False
+        raise ValueError("cache mode need to fit HBM, DDR, SSD")
+    if cache_mode == CacheModeEnum.HBM.value and (voc_size_list[1] > 0 or voc_size_list[2] > 0):
+        raise ValueError("cache mode HBM, host-voc or ssd-voc is need to be none")
     if cache_mode == CacheModeEnum.DDR.value and voc_size_list[2] > 0:
-        return False
+        raise ValueError("cache mode DDR, ssd-voc is need to be none")
     if voc_size_list[0] == 1:
         default_device_voc_size = int(DEFAULT_DEVICE_CACHE_MEMORY_SIZE / dim_bytes)
         voc_size_list[0] = default_device_voc_size if default_device_voc_size < MAX_VOCABULARY_SIZE \
@@ -240,4 +241,4 @@ def check_and_set_default_voc_size(voc_size_list: List[int], dim_bytes: int) ->
         voc_size_list[1] = default_host_voc_size if default_host_voc_size < MAX_VOCABULARY_SIZE else MAX_VOCABULARY_SIZE
     if cache_mode == CacheModeEnum.SSD.value and voc_size_list[2] == 0:
         voc_size_list[2] = DEFAULT_SSD_CACHE_MEMORY_SIZE
-    return True
+    return
-- 
Gitee


From c864ba8124b8f45d3fc0540bd4e60cba9e40134c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 6 Jun 2024 20:42:03 +0800
Subject: [PATCH 206/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91issure=20bug=20fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/mx_rec/core/test_embedding.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/mx_rec/core/test_embedding.py b/tests/mx_rec/core/test_embedding.py
index 5bc762f4..509b9ae7 100644
--- a/tests/mx_rec/core/test_embedding.py
+++ b/tests/mx_rec/core/test_embedding.py
@@ -88,7 +88,8 @@ class TestCreateTableFunc(unittest.TestCase):
             test_table = create_table(key_dtype=tf.int64,
                                       dim=8,
                                       name='test_table',
-                                      emb_initializer=tf.compat.v1.truncated_normal_initializer())
+                                      emb_initializer=tf.compat.v1.truncated_normal_initializer(),
+                                      device_vocabulary_size=8)
             self.assertIsInstance(test_table, HBMSparseEmbedding)
 
     @mock.patch.multiple("mx_rec.core.emb.base_sparse_embedding",
@@ -120,6 +121,7 @@ class TestCreateTableFunc(unittest.TestCase):
                                       dim=8,
                                       name='test_table',
                                       emb_initializer=tf.compat.v1.truncated_normal_initializer(),
+                                      device_vocabulary_size=8,
                                       host_vocabulary_size=8)
             self.assertIsInstance(test_table, ExternalStorageSparseEmbedding)
 
-- 
Gitee


From 0af09c050417adb28125ba6ecebe4cabc506cd36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Thu, 6 Jun 2024 14:08:39 +0000
Subject: [PATCH 207/302] =?UTF-8?q?!177=20WideDeep=E6=A8=A1=E5=9E=8B?=
 =?UTF-8?q?=EF=BC=9A=E7=89=88=E6=9C=AC=E6=A3=80=E6=9F=A5cleancode=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=20*=20WideDeep=E6=A8=A1=E5=9E=8B=EF=BC=9A=20=E7=89=88?=
 =?UTF-8?q?=E6=9C=AC=E6=A3=80=E6=9F=A5cleancode=E4=BF=AE=E6=94=B93?=
 =?UTF-8?q?=E8=AF=84=E8=AE=BA=E6=84=8F=E8=A7=81=E4=BF=AE=E6=94=B9=20*=20Wi?=
 =?UTF-8?q?deDeep=E6=A8=A1=E5=9E=8B=EF=BC=9A=E7=89=88=E6=9C=AC=E6=A3=80?=
 =?UTF-8?q?=E6=9F=A5cleancode=E4=BF=AE=E6=94=B93=20*=20WideDeep=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=EF=BC=9A=E7=89=88=E6=9C=AC=E6=A3=80=E6=9F=A5cleancode?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B92=20*=20WideDeep=E6=A8=A1=E5=9E=8B=EF=BC=9A?=
 =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=A3=80=E6=9F=A5cleancode=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/criteo.py           | 45 +++++++++++++++++++++------
 examples/WideDeep/model/main_mxrec.py |  8 +++--
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/examples/WideDeep/criteo.py b/examples/WideDeep/criteo.py
index f9624d21..617c76f6 100644
--- a/examples/WideDeep/criteo.py
+++ b/examples/WideDeep/criteo.py
@@ -1,3 +1,19 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 import os
 import stat
 import pickle
@@ -58,7 +74,10 @@ def split_byline_count(filename, count, sub_dir_name):
                 sub = make_sub_file(buf, head, filename, sub_dir_name, sub)
                 buf = []
         if len(buf) != 0:
-            make_sub_file(buf, head, filename, sub_dir_name, sub)
+            try:
+                make_sub_file(buf, head, filename, sub_dir_name, sub)
+            except FileNotFoundError as err:
+                raise FileNotFoundError("please check the filename of data") from err
     finally:
         f.close()
 
@@ -170,10 +189,11 @@ def convert_input2tfrd(in_file_path, out_file_path):
         dense_feature = np.array(dense_feat_list, dtype=np.int64).reshape(-1)
         sparse_feature = np.array(sparse_feat_list, dtype=np.int64).reshape(-1)
         label = np.array(label_list, dtype=np.int64).reshape(-1)
-        feature_dict = {"dense_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=dense_feature)),
-                        "sparse_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=sparse_feature)),
-                        "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label))
-                        }
+        feature_dict = {
+                    "dense_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=dense_feature)),
+                    "sparse_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=sparse_feature)),
+                    "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label))
+        }
         example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
 
         return example
@@ -183,7 +203,7 @@ def convert_input2tfrd(in_file_path, out_file_path):
 
     with open(in_file_path, encoding='utf-8') as file_in:
 
-        for i, line in tqdm(enumerate(file_in)):
+        for _, line in tqdm(enumerate(file_in)):
 
             line = line.strip('\n')
             items = line.split('\t')
@@ -226,13 +246,18 @@ if __name__ == '__main__':
         data_df[dense_features] = data_df[dense_features].fillna(0)
         # sparse feature: mapping
         for col in sparse_features:
-            data_df[col] = data_df[col].map(lambda x: feature_map[col][x])
+            try:
+                data_df[col] = data_df[col].map(lambda x: feature_map[col][x])
+            except KeyError as er:
+                raise KeyError("Feature {} not found in dataset".format(col)) from er
         # dense feature: Bin continuous data into intervals.
         data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, feature_map)
         # add offsets
-        slot_size_array = [1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001,
-                           1462, 585, 10131228, 2202609, 307, 25, 12519, 635, 5, 93147, 5685, 8351594, 3196,
-                           29, 14994, 5461307, 12, 5654, 2174, 5, 7046548, 19, 17, 286182, 106, 142573]
+        slot_size_array = [
+                        1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001,
+                        1462, 585, 10131228, 2202609, 307, 25, 12519, 635, 5, 93147, 5685, 8351594, 3196,
+                        29, 14994, 5461307, 12, 5654, 2174, 5, 7046548, 19, 17, 286182, 106, 142573
+        ]
         offset_size_list = np.cumsum([0] + slot_size_array[:-1])
         for col_index in range(1, len(offset_size_list) + 1):
             data_df.iloc[:, col_index] += offset_size_list[col_index - 1]
diff --git a/examples/WideDeep/model/main_mxrec.py b/examples/WideDeep/model/main_mxrec.py
index b1d77698..0a7c2f87 100644
--- a/examples/WideDeep/model/main_mxrec.py
+++ b/examples/WideDeep/model/main_mxrec.py
@@ -284,10 +284,12 @@ def create_feature_spec_list(use_timestamp=False):
         access_threshold = 1000
         eviction_threshold = 180
 
-    feature_spec_list = [FeatureSpec("sparse_feature", table_name="wide_embeddings", batch_size=cfg.batch_size,
+    feature_spec_list = [
+                    FeatureSpec("sparse_feature", table_name="wide_embeddings", batch_size=cfg.batch_size,
                                      access_threshold=access_threshold, eviction_threshold=eviction_threshold),
-                         FeatureSpec("sparse_feature", table_name="deep_embeddings", batch_size=cfg.batch_size,
-                                     access_threshold=access_threshold, eviction_threshold=eviction_threshold)]
+                    FeatureSpec("sparse_feature", table_name="deep_embeddings", batch_size=cfg.batch_size,
+                                     access_threshold=access_threshold, eviction_threshold=eviction_threshold)
+    ]
 
     if use_multi_lookup:
         feature_spec_list.extend([FeatureSpec("sparse_feature", table_name="wide_embeddings",
-- 
Gitee


From 747c21b273a76b42a9addd37c1dd9462bbff1dcd Mon Sep 17 00:00:00 2001
From: sihaixianyu <sihaixianyu@gmail.com>
Date: Fri, 7 Jun 2024 10:40:29 +0800
Subject: [PATCH 208/302] =?UTF-8?q?[CleanCode]=20=E6=B8=85=E7=90=86?=
 =?UTF-8?q?=E6=94=B9=E5=9B=BE=E9=83=A8=E5=88=86=E7=9A=84=E9=AD=94=E6=B3=95?=
 =?UTF-8?q?=E5=AD=97=E7=AC=A6=E4=B8=B2=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/asc/swap_args.py |  1 +
 mx_rec/graph/modifier.py     | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/mx_rec/core/asc/swap_args.py b/mx_rec/core/asc/swap_args.py
index 1d206b95..3157e1e0 100644
--- a/mx_rec/core/asc/swap_args.py
+++ b/mx_rec/core/asc/swap_args.py
@@ -22,6 +22,7 @@ from enum import Enum
 class SwapDataType(Enum):
     CONFIG = "config"
     CONTROL = "control"
+    CONTROL_OPS = "control_ops"
 
 
 def singleton(cls):
diff --git a/mx_rec/graph/modifier.py b/mx_rec/graph/modifier.py
index 15c240e5..97205481 100644
--- a/mx_rec/graph/modifier.py
+++ b/mx_rec/graph/modifier.py
@@ -38,7 +38,7 @@ from mx_rec.constants.constants import (
 from mx_rec.core.asc.feature_spec import FeatureSpec
 from mx_rec.core.asc.helper import get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
-from mx_rec.core.asc.swap_args import SwapArgs
+from mx_rec.core.asc.swap_args import SwapArgs, SwapDataType
 from mx_rec.core.asc.build_graph import SwapInfo
 from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.graph.merge_lookup import do_merge_lookup
@@ -255,17 +255,17 @@ class _GraphModifier:
                     table_instance, variable_and_slot_list, swap_args_dict["swap_info"], channel_id)
                 # gather for id_offset need to be executed after swap_op
                 swap_control_dict = swap_args.swap_control_dict[table_instance.table_name][channel_id]
-                if "control_ops" not in swap_control_dict:
+                if SwapDataType.CONTROL_OPS.value not in swap_control_dict:
                     raise ValueError("swap control missing key [control_ops] in modify_graph_for_asc")
-                control_ops = swap_control_dict["control_ops"]
+                control_ops = swap_control_dict[SwapDataType.CONTROL_OPS.value]
                 utils.replace_anchor_control(self._full_graph, control_ops, swap_op)
 
                 if is_training and slot_num > 1:
                     # gather for slot need to be executed after swap_op
                     slot_control_dict = swap_args.slot_control_dict[table_instance.variable]
-                    if "control_ops" not in slot_control_dict:
+                    if SwapDataType.CONTROL_OPS.value not in slot_control_dict:
                         raise ValueError("slot control missing key [control_ops] in modify_graph_for_asc")
-                    slot_control_ops = slot_control_dict["control_ops"]
+                    slot_control_ops = slot_control_dict[SwapDataType.CONTROL_OPS.value]
                     utils.replace_anchor_control(self._full_graph, slot_control_ops, swap_op)
 
     def _generate_get_next_op_specs(self, cutting_point_list: List[Tensor]) -> Dict[Tensor, _AnchorRecord]:
@@ -728,8 +728,8 @@ def _get_variable_and_slot_list(each_var, slot_num, table_name, channel_id):
     return variable_and_slot_list
 
 
-def _get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list: list, 
-                   swap_info: SwapInfo, channel_id: int) -> list:    
+def _get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list: list,
+                   swap_info: SwapInfo, channel_id: int) -> list:
     """
     Get swap op.
     :param table_instance: BaseSparseEmbedding
@@ -740,10 +740,10 @@ def _get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list:
     """
     if table_instance.is_hbm:
         return [tf.no_op()]
-    
+
     if len(variable_and_slot_list) == 0:
         raise RuntimeError("When enable emb_transfer, optimizer should have slots")
-    
+
     use_static = ConfigInitializer.get_instance().use_static
     max_lookup_vec_size = None
     if use_static:
@@ -756,7 +756,7 @@ def _get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list:
             output_shapes=[[max_lookup_vec_size, table_instance.ext_emb_size]],
             channel_name=f'{table_instance.table_name}_h2d_all')[0]
     logger.debug("h2d_emb shape: %s", h2d_emb)
-    
+
     swap_out_pos = swap_info.swap_out_pos
     swap_in_pos = swap_info.swap_in_pos
     if use_static:
@@ -766,14 +766,14 @@ def _get_swap_info(table_instance: BaseSparseEmbedding, variable_and_slot_list:
     swap_outs = [tf.gather(one_table, swap_out_pos) for one_table in variable_and_slot_list]
     swap_out = tf.concat(swap_outs, axis=1)
     logger.debug('Channel %s_d2h_all was built for op outfeed.', table_instance.table_name)
-    
+
     swap_out_op = npu_ops.outfeed_enqueue_op(
         channel_name=f'{table_instance.table_name}_d2h_all', inputs=[swap_out])
     with tf.control_dependencies([swap_out_op]):
         nd_swap_pos = tf.expand_dims(swap_in_pos, 1)
         var_num = len(variable_and_slot_list)
         h2d_emb_split = tf.split(h2d_emb, var_num, axis=1)
-        
+
         optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(
             table_instance.table_name)
         if optimizer is None and channel_id == 1:
-- 
Gitee


From 11b42c08a357d4ea5924403daa357587f4d8b5e2 Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Tue, 4 Jun 2024 03:57:47 +0000
Subject: [PATCH 209/302] =?UTF-8?q?=E6=8A=BD=E8=B1=A1L3=E5=AD=98=E5=82=A8?=
 =?UTF-8?q?=E6=8E=A5=E5=8F=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/emb_table/embedding_ddr.cpp          |   4 +-
 src/core/emb_table/embedding_table.h          |   2 +-
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          | 254 +++++++++---------
 src/core/hybrid_mgmt/hybrid_mgmt.h            |  44 +--
 .../cache_manager.cpp                         | 137 +++++-----
 .../{ssd_cache => l3_storage}/cache_manager.h |  37 +--
 src/core/l3_storage/l3_storage.cpp            |  69 +++++
 src/core/l3_storage/l3_storage.h              |  63 +++++
 .../{ssd_cache => l3_storage}/lfu_cache.cpp   |   0
 .../{ssd_cache => l3_storage}/lfu_cache.h     |   0
 .../preprocess_mapper.h                       |  56 ++--
 src/core/ssd_engine/ssd_engine.cpp            |   2 +-
 src/core/ssd_engine/ssd_engine.h              |   6 +-
 src/tests/ssd_cache/cache_manager_test.cpp    |  46 ++--
 src/tests/ssd_cache/lfu_cache_test.cpp        |   2 +-
 15 files changed, 438 insertions(+), 284 deletions(-)
 rename src/core/{ssd_cache => l3_storage}/cache_manager.cpp (61%)
 rename src/core/{ssd_cache => l3_storage}/cache_manager.h (71%)
 create mode 100644 src/core/l3_storage/l3_storage.cpp
 create mode 100644 src/core/l3_storage/l3_storage.h
 rename src/core/{ssd_cache => l3_storage}/lfu_cache.cpp (100%)
 rename src/core/{ssd_cache => l3_storage}/lfu_cache.h (100%)
 rename src/core/{ssd_cache => l3_storage}/preprocess_mapper.h (54%)

diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 092ad0c5..3898a7da 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -18,7 +18,7 @@ See the License for the specific language governing permissions and
 
 #include "utils/logger.h"
 #include "utils/singleton.h"
-#include "ssd_cache/cache_manager.h"
+#include "l3_storage/cache_manager.h"
 #include "ock_ctr_common/include/error_code.h"
 
 using namespace MxRec;
@@ -253,7 +253,7 @@ void EmbeddingDDR::SyncLatestEmbedding()
                 throw std::invalid_argument(errMsg);
             }
         }
-        cacheManager_->UpdateSSDEmb(name, ptr, embInfo_.extEmbeddingSize, info.swapOutSSDKeys, info.swapOutSSDAddrOffs);
+        cacheManager_->UpdateL3StorageEmb(name, ptr, embInfo_.extEmbeddingSize, info.swapOutL3StorageKeys, info.swapOutL3StorageAddrOffs);
     }
 }
 
diff --git a/src/core/emb_table/embedding_table.h b/src/core/emb_table/embedding_table.h
index 8b622194..cbf15a7a 100644
--- a/src/core/emb_table/embedding_table.h
+++ b/src/core/emb_table/embedding_table.h
@@ -20,7 +20,7 @@ See the License for the specific language governing permissions and
 #include <vector>
 
 #include "utils/common.h"
-#include "ssd_cache/cache_manager.h"
+#include "l3_storage/cache_manager.h"
 #include "file_system/file_system_handler.h"
 
 namespace MxRec {
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index b96f4eb9..973831a2 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -48,17 +48,17 @@ void HybridMgmt::InitRankInfo(RankInfo& rankInfo, const vector<EmbInfo>& embInfo
 
     // 计算训练任务涉及的所有表在DDR中需要分配的key数量
     size_t totHostVocabSize = 0;
-    size_t totalSsdVocabSize = 0;
+    size_t totalL3StorageVocabSize = 0;
     for (const auto& emb : embInfos) {
         totHostVocabSize += emb.hostVocabSize;
-        totalSsdVocabSize += emb.ssdVocabSize;
+        totalL3StorageVocabSize += emb.ssdVocabSize;
     }
 
     // 根据DDR的key数量，配置存储模式HBM/DDR
     if (totHostVocabSize != 0) {
         rankInfo.isDDR = true;
     }
-    if (totalSsdVocabSize != 0) {
+    if (totalL3StorageVocabSize != 0) {
         rankInfo.isSSDEnabled = true;
     }
 #endif
@@ -115,16 +115,18 @@ bool HybridMgmt::Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos,
     KEY_PROCESS_INSTANCE->Initialize(rankInfo, embInfos, thresholdValues, seed);
 
     isRunning = true;
-    isSSDEnabled = rankInfo.isSSDEnabled;
+    isL3StorageEnabled = rankInfo.isSSDEnabled;
     EmbeddingMgmt::Instance()->Init(rankInfo, embInfos, seed);
 
     if (rankInfo.isDDR) {
         InitEmbeddingCache(embInfos);
     }
 
-    if (isSSDEnabled) {
+    if (isL3StorageEnabled) {
         cacheManager = Singleton<MxRec::CacheManager>::GetInstance();
-        cacheManager->Init(embCache, mgmtEmbInfo);
+        // 用户可实现L3Storage接口替换SSDEngine以对接外部存储服务
+        auto ssdEngine = std::make_shared<SSDEngine>();
+        cacheManager->Init(embCache, mgmtEmbInfo, ssdEngine);
         EmbeddingMgmt::Instance()->SetCacheManagerForEmbTable(cacheManager);
     }
     isLoad = ifLoad;
@@ -170,10 +172,10 @@ void HybridMgmt::Save(const string& savePath)
         offsetMapToSend = EmbeddingMgmt::Instance()->GetDeviceOffsets();
     }
 
-    if (isSSDEnabled) {
-        LOG_DEBUG(MGMT + "start save SSD data");
+    if (isL3StorageEnabled) {
+        LOG_DEBUG(MGMT + "start save L3Storage data");
         auto step = GetStepFromPath(savePath);
-        cacheManager->SaveSSDEngine(step);
+        cacheManager->Save(step);
     }
 
     // 保存特征准入淘汰相关的数据
@@ -248,8 +250,8 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
         featAdmitNEvict.LoadHistoryRecords(loadData.histRec);
     }
 
-    if (isSSDEnabled) {
-        LOG_DEBUG(MGMT + "Start host side load: ssd key freq map");
+    if (isL3StorageEnabled) {
+        LOG_DEBUG(MGMT + "Start host side load: L3Storage key freq map");
         auto step = GetStepFromPath(loadPath);
         cacheManager->Load(mgmtEmbInfo, step, trainKeysSet);
     }
@@ -572,13 +574,13 @@ bool HybridMgmt::ParseKeys(int channelId, int& batchId, TaskType type)
                 });
                 break;
             case TaskType::DDR:
-                if (!isSSDEnabled) {
+                if (!isL3StorageEnabled) {
                     parseKeyThreadPool.emplace_back([this, info, &remainBatch, embInfo]() {
                         ProcessEmbInfoDDR(info, remainBatch);
                     });
                 } else {
                     parseKeyThreadPool.emplace_back([this, info, &remainBatch, embInfo]() {
-                        ProcessEmbInfoSSD(info, remainBatch);
+                        ProcessEmbInfoL3Storage(info, remainBatch);
                     });
                 }
                 break;
@@ -780,12 +782,12 @@ bool HybridMgmt::Evict()
             }
             for (const string& embName : allTableNames) {
                 EvictKeys(embName, evictKeyMap[COMBINE_HISTORY_NAME]);
-                EvictSSDKeys(embName, evictKeyMap[COMBINE_HISTORY_NAME]);
+                EvictL3StorageKeys(embName, evictKeyMap[COMBINE_HISTORY_NAME]);
             }
         } else {
             for (const auto& evict : as_const(evictKeyMap)) {
                 EvictKeys(evict.first, evict.second);
-                EvictSSDKeys(evict.first, evict.second);
+                EvictL3StorageKeys(evict.first, evict.second);
             }
         }
     }
@@ -809,12 +811,12 @@ void HybridMgmt::EvictKeys(const string& embName, const vector<emb_cache_key_t>&
     }
 }
 
-void HybridMgmt::EvictSSDKeys(const string& embName, const vector<emb_cache_key_t>& keys) const
+void HybridMgmt::EvictL3StorageKeys(const string& embName, const vector<emb_cache_key_t>& keys) const
 {
-    if (!isSSDEnabled) {
+    if (!isL3StorageEnabled) {
         return;
     }
-    cacheManager->EvictSSDEmbedding(embName, keys);
+    cacheManager->EvictL3StorageEmbedding(embName, keys);
 }
 
 int HybridMgmt::GetStepFromPath(const string& loadPath) const
@@ -885,14 +887,14 @@ int64_t HybridMgmt::GetTableSize(const string& embName) const
         LOG_INFO(MGMT + "HBM mode, get emb:[{}] size:{}", embName, size);
         return size;
     }
-    int64_t ssdSize = 0;
-    if (mgmtRankInfo.isSSDEnabled) {
-        ssdSize = cacheManager->GetTableEmbeddingSize(embName);
+    int64_t l3StorageUsage = 0;
+    if (isL3StorageEnabled) {
+        l3StorageUsage = cacheManager->GetTableUsage(embName);
     }
 
     uint32_t ddrSize = embCache->GetUsage(embName);
-    size = static_cast<int64_t>(ddrSize) + ssdSize;
-    LOG_INFO(MGMT + "DDR/SSD mode, get emb:[{}] size:{}", embName, size);
+    size = static_cast<int64_t>(ddrSize) + l3StorageUsage;
+    LOG_INFO(MGMT + "DDR/L3Storage mode, get emb:[{}] size:{}", embName, size);
 #endif
     return size;
 }
@@ -1118,7 +1120,7 @@ void HybridMgmt::EmbeddingReceiveAndUpdateDDR(int batchId, int index, const EmbI
     EmbeddingUpdateDDR(info, ptr, swapOutAddrs);
 }
 
-void HybridMgmt::EmbeddingLookUpAndSendSSD(int batchId, int index, const EmbInfo& embInfo)
+void HybridMgmt::EmbeddingLookUpAndSendL3Storage(int batchId, int index, const EmbInfo& embInfo)
 {
     int cvNotifyIndex = 0;
     if (index + 1 != EMBEDDING_THREAD_NUM) {
@@ -1134,16 +1136,16 @@ void HybridMgmt::EmbeddingLookUpAndSendSSD(int batchId, int index, const EmbInfo
     };
     vector<Tensor> h2dEmb;
 
-    auto isSuccess = EmbeddingLookUpSSD(info, h2dEmb);
+    auto isSuccess = EmbeddingLookUpL3Storage(info, h2dEmb);
     if (!isSuccess) {
         LOG_INFO("HybridMgmt is not running");
         return;
     }
 
-    EmbeddingSendSSD(info, h2dEmb);
+    EmbeddingSendL3Storage(info, h2dEmb);
 }
 
-void HybridMgmt::EmbeddingReceiveAndUpdateSSD(int batchId, int index, const EmbInfo& embInfo)
+void HybridMgmt::EmbeddingReceiveAndUpdateL3Storage(int batchId, int index, const EmbInfo& embInfo)
 {
     int cvNotifyIndex = 0;
     if (index + 1 != EMBEDDING_THREAD_NUM) {
@@ -1160,9 +1162,9 @@ void HybridMgmt::EmbeddingReceiveAndUpdateSSD(int batchId, int index, const EmbI
     float* ptr = nullptr;
     vector<float*> swapOutAddrs;
     int64_t dims0 = 0;
-    EmbeddingReceiveSSD(info, ptr, swapOutAddrs, dims0);
+    EmbeddingReceiveL3Storage(info, ptr, swapOutAddrs, dims0);
 
-    EmbeddingUpdateSSD(info, ptr, swapOutAddrs, dims0);
+    EmbeddingUpdateL3Storage(info, ptr, swapOutAddrs, dims0);
 }
 
 
@@ -1172,11 +1174,11 @@ void HybridMgmt::EmbeddingReceiveAndUpdateSSD(int batchId, int index, const EmbI
 /// \param channelId 通道索引（训练/推理）
 /// \param remainBatchOut 是否从通道获取了数据
 /// \return 是否处理成功
-void HybridMgmt::ProcessEmbInfoSSD(const EmbBaseInfo& info, bool& remainBatchOut)
+void HybridMgmt::ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBatchOut)
 {
 #ifndef GTEST
     TimeCost getAndSendTensorsTC;
-    LOG_DEBUG("ProcessEmbInfoSSD table:{}, channel:{}, batchId:{}", info.name, info.channelId, info.batchId);
+    LOG_DEBUG("ProcessEmbInfoL3Storage table:{}, channel:{}, batchId:{}", info.name, info.channelId, info.batchId);
 
     if (info.channelId == TRAIN_CHANNEL_ID  && info.batchId == hybridMgmtBlock->maxTrainStep) {
         HandleReachMaxStepCase(info, remainBatchOut);
@@ -1184,7 +1186,7 @@ void HybridMgmt::ProcessEmbInfoSSD(const EmbBaseInfo& info, bool& remainBatchOut
     }
 
     // 只有在每次GetUniqueKeys的时候才知道上游是否已经EOS
-    // 注意GetUniqueKeys与EOS关联，需要在ProcessEmbInfoSSD最先调用，如需调整位置，请参考并适配其他函数
+    // 注意GetUniqueKeys与EOS关联，需要在ProcessEmbInfoL3Storage最先调用，如需调整位置，请参考并适配其他函数
     // 获取GlobalUnique向量
     auto uniqueKeys = GetUniqueKeys(info, remainBatchOut);
     if (uniqueKeys.empty()) {
@@ -1215,7 +1217,7 @@ void HybridMgmt::ProcessEmbInfoSSD(const EmbBaseInfo& info, bool& remainBatchOut
 
     SendGlobalUniqueVec(info, uniqueKeys, restoreVecSec);
 
-    auto isNeedReturn = HandleSpecialProcessStatusSSD(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
+    auto isNeedReturn = HandleSpecialProcessStatusL3Storage(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
     if (isNeedReturn) {
         return;
     }
@@ -1226,7 +1228,7 @@ void HybridMgmt::ProcessEmbInfoSSD(const EmbBaseInfo& info, bool& remainBatchOut
     auto &swapOutKeys = swapOutKoPair.first;
     auto &swapOutPos = swapOutKoPair.second;
 
-    HandleDataSwapForSSD(info, swapInKeys, swapOutKeys);
+    HandleDataSwapForL3Storage(info, swapInKeys, swapOutKeys);
 
     auto lastSwapInPos = lastSwapInPosMap[info.name];
     lastSwapInPosMap[info.name] = swapInPos; // 暂存待下一步发送
@@ -1238,13 +1240,13 @@ void HybridMgmt::ProcessEmbInfoSSD(const EmbBaseInfo& info, bool& remainBatchOut
 
     HandleEndBatchCase(info, swapInPos);
 
-    CheckLookupAddrSuccessSSD();
+    CheckLookupAddrSuccessL3Storage();
 
     if (info.channelId == TRAIN_CHANNEL_ID) {
         alreadyTrainOnce = true;
     }
 
-    LOG_DEBUG("ProcessEmbInfoSSD end, table:{}, batchId:{}, swapProcessTC(ms):{}, getAndSendTensorsTC(ms):{}",
+    LOG_DEBUG("ProcessEmbInfoL3Storage end, table:{}, batchId:{}, swapProcessTC(ms):{}, getAndSendTensorsTC(ms):{}",
               info.name, info.batchId, swapProcessTC.ElapsedMS(), getAndSendTensorsTC.ElapsedMS());
 #endif
 }
@@ -1286,7 +1288,7 @@ void HybridMgmt::InitDataPipelineForDDR(const string &embName)
     LOG_DEBUG("data pipeline for ddr init");
 }
 
-void HybridMgmt::InitDataPipelineForSSD(const string &embName, int extEmbeddingSize)
+void HybridMgmt::InitDataPipelineForL3Storage(const string &embName, int extEmbeddingSize)
 {
     // 初始化公共队列
     HBMSwapKeyQue[embName+SWAP_IN_STR];
@@ -1295,21 +1297,21 @@ void HybridMgmt::InitDataPipelineForSSD(const string &embName, int extEmbeddingS
     tableToQueueLookup[embName+SWAP_OUT_STR];
 
     HBMSwapKeyQue[embName + ADDR_STR];
-    SwapOut2SSDKeyQue[embName + SWAP_IN_STR];
-    SwapOut2SSDKeyQue[embName + ADDR_STR];
-    SwapOut2SSDKeyQue[embName + SWAP_OUT_STR];
+    SwapOut2L3StorageKeyQue[embName + SWAP_IN_STR];
+    SwapOut2L3StorageKeyQue[embName + ADDR_STR];
+    SwapOut2L3StorageKeyQue[embName + SWAP_OUT_STR];
 
     DDRSwapKeyQue[embName + SWAP_OUT_STR];
     DDRSwapKeyQue[embName + SWAP_IN_STR];
-    DDRSwapKeyForSSDQue[embName + SWAP_OUT_STR];
-    DDRSwapKeyForSSDQue[embName + SWAP_IN_STR];
+    DDRSwapKeyForL3StorageQue[embName + SWAP_OUT_STR];
+    DDRSwapKeyForL3StorageQue[embName + SWAP_IN_STR];
     DDRSwapAddrsQue[embName + SWAP_OUT_STR];
     DDRSwapAddrsQue[embName + SWAP_IN_STR];
 
     // 初始化lookup线程
     lookUpThreads.emplace_back(
         std::async(std::launch::async, [=] { LookUpAddrs(embName, extEmbeddingSize); }));
-    LOG_DEBUG("data pipeline for ssd init");
+    LOG_DEBUG("data pipeline for L3Storage init");
 }
 
 void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
@@ -1320,8 +1322,8 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
     EmbeddingMgmt::Instance()->SetHDTransferForEmbTable(hdTransfer);
 
     for (auto embInfo: embInfos) {
-        if (isSSDEnabled) {
-            InitDataPipelineForSSD(embInfo.name, embInfo.extEmbeddingSize);
+        if (isL3StorageEnabled) {
+            InitDataPipelineForL3Storage(embInfo.name, embInfo.extEmbeddingSize);
         } else {
             InitDataPipelineForDDR(embInfo.name);
         }
@@ -1349,13 +1351,13 @@ void HybridMgmt::JoinEmbeddingCacheThread()
     for (auto &p : HBMSwapKeyQue) {
         p.second.DestroyQueue();
     }
-    for (auto &p : SwapOut2SSDKeyQue) {
+    for (auto &p : SwapOut2L3StorageKeyQue) {
         p.second.DestroyQueue();
     }
     for (auto &p : DDRSwapKeyQue) {
         p.second.DestroyQueue();
     }
-    for (auto &p : DDRSwapKeyForSSDQue) {
+    for (auto &p : DDRSwapKeyForL3StorageQue) {
         p.second.DestroyQueue();
     }
     for (auto &p : DDRSwapAddrsQue) {
@@ -1585,10 +1587,10 @@ void HybridMgmt::CreateEmbeddingLookUpAndSendThread(int index, const EmbInfo& em
                 int cur_batch_id = lookUpAndSendTableBatchMap[embInfo.name];
                 lookUpAndSendTableBatchMap[embInfo.name]++;
                 lookUpAndSendBatchIdMtx.unlock();
-                if (!isSSDEnabled) {
+                if (!isL3StorageEnabled) {
                     EmbeddingLookUpAndSendDDR(cur_batch_id, index, embInfo);
                 } else {
-                    EmbeddingLookUpAndSendSSD(cur_batch_id, index, embInfo);
+                    EmbeddingLookUpAndSendL3Storage(cur_batch_id, index, embInfo);
                 }
             } else {
                 lookUpAndSendBatchIdMtx.unlock();
@@ -1609,10 +1611,10 @@ void HybridMgmt::CreateEmbeddingReceiveAndUpdateThread(int index, const EmbInfo&
                 int cur_batch_id = receiveAndUpdateTableBatchMap[embInfo.name];
                 receiveAndUpdateTableBatchMap[embInfo.name]++;
                 receiveAndUpdateBatchIdMtx.unlock();
-                if (!isSSDEnabled) {
+                if (!isL3StorageEnabled) {
                     EmbeddingReceiveAndUpdateDDR(cur_batch_id, index, embInfo);
                 } else {
-                    EmbeddingReceiveAndUpdateSSD(cur_batch_id, index, embInfo);
+                    EmbeddingReceiveAndUpdateL3Storage(cur_batch_id, index, embInfo);
                 }
             } else {
                 receiveAndUpdateBatchIdMtx.unlock();
@@ -1624,8 +1626,8 @@ void HybridMgmt::CreateEmbeddingReceiveAndUpdateThread(int index, const EmbInfo&
     });
 }
 
-bool HybridMgmt::EmbeddingReceiveSSD(const EmbTaskInfo &info, float *&ptr,
-                                     vector<float *> &swapOutAddrs, int64_t& dims0)
+bool HybridMgmt::EmbeddingReceiveL3Storage(const EmbTaskInfo &info, float *&ptr,
+                                           vector<float *> &swapOutAddrs, int64_t& dims0)
 {
     std::unique_lock<std::mutex> lastRecvFinishLocker(lastRecvFinishMutexMap[info.name][info.threadIdx]);
     cvLastRecvFinishMap[info.name][info.threadIdx].wait(lastRecvFinishLocker, [info, this] {
@@ -1671,8 +1673,8 @@ bool HybridMgmt::EmbeddingReceiveSSD(const EmbTaskInfo &info, float *&ptr,
     return true;
 }
 
-void HybridMgmt::EmbeddingUpdateSSD(const EmbTaskInfo& info, float *embPtr,
-                                    vector<float *>& swapOutAddrs, int64_t& dims0)
+void HybridMgmt::EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float *embPtr,
+                                          vector<float *>& swapOutAddrs, int64_t& dims0)
 {
     std::unique_lock<std::mutex> lastUpdateFinishLocker(lastUpdateFinishMutexMap[info.name][info.threadIdx]);
     cvLastUpdateFinishMap[info.name][info.threadIdx].wait(lastUpdateFinishLocker, [info, this] {
@@ -1698,26 +1700,26 @@ void HybridMgmt::EmbeddingUpdateSSD(const EmbTaskInfo& info, float *embPtr,
     LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingUpdateTC(ms):{}",
               info.name.c_str(), info.batchId, info.threadIdx, EmbeddingUpdateTC.ElapsedMS());
 
-    // SSD更新
-    TimeCost SSDUpdateTC = TimeCost();
-    std::vector<uint64_t> swapOutSSDAddrOffs = SwapOut2SSDKeyQue[info.name + ADDR_STR].WaitAndPop();
-    std::vector<uint64_t> swapOutSSDKeys = SwapOut2SSDKeyQue[info.name + SWAP_OUT_STR].WaitAndPop();
+    // L3Storage更新
+    TimeCost L3StorageUpdateTC = TimeCost();
+    std::vector<uint64_t> swapOutL3StorageAddrOffs = SwapOut2L3StorageKeyQue[info.name + ADDR_STR].WaitAndPop();
+    std::vector<uint64_t> swapOutL3StorageKeys = SwapOut2L3StorageKeyQue[info.name + SWAP_OUT_STR].WaitAndPop();
     if (!isRunning) {
         return;
     }
 
-    if (dims0 != static_cast<int64_t>(swapOutAddrs.size() + swapOutSSDKeys.size())) {
+    if (dims0 != static_cast<int64_t>(swapOutAddrs.size() + swapOutL3StorageKeys.size())) {
         throw runtime_error("data dims[0] != swapOutKeys.size");
     }
-    cacheManager->UpdateSSDEmb(info.name, embPtr, extEmbeddingSize, swapOutSSDKeys, swapOutSSDAddrOffs);
-    LOG_DEBUG("table:{}, batchId:{}, thread{}, SSDUpdateTC(ms):{}",
-              info.name.c_str(), info.batchId, info.threadIdx, SSDUpdateTC.ElapsedMS());
+    cacheManager->UpdateL3StorageEmb(info.name, embPtr, extEmbeddingSize, swapOutL3StorageKeys, swapOutL3StorageAddrOffs);
+    LOG_DEBUG("table:{}, batchId:{}, thread{}, L3StorageUpdateTC(ms):{}",
+              info.name.c_str(), info.batchId, info.threadIdx, L3StorageUpdateTC.ElapsedMS());
 
     lastUpdateFinishStepMap[info.name]++;
     cvLastUpdateFinishMap[info.name][info.cvNotifyIndex].notify_all();
 }
 
-bool HybridMgmt::EmbeddingLookUpSSD(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
+bool HybridMgmt::EmbeddingLookUpL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
 {
     std::unique_lock<std::mutex> lastUpdateFinishLocker(lastUpdateFinishMutexMap[info.name][info.threadIdx]);
     cvLastUpdateFinishMap[info.name][info.threadIdx].wait(lastUpdateFinishLocker, [info, this] {
@@ -1735,27 +1737,27 @@ bool HybridMgmt::EmbeddingLookUpSSD(const EmbTaskInfo& info, vector<Tensor>& h2d
         return false;
     }
 
-    TimeCost transferDDR2SSDTC = TimeCost();
+    TimeCost transferDDR2L3StorageTC = TimeCost();
     // DDR腾空间
-    std::vector<uint64_t> DDR2SSDKeys = DDRSwapKeyForSSDQue[info.name + SWAP_OUT_STR].WaitAndPop();
-    std::vector<float*> DDR2SSDAddrs = DDRSwapAddrsQue[info.name + SWAP_OUT_STR].WaitAndPop();
+    std::vector<uint64_t> DDR2L3StorageKeys = DDRSwapKeyForL3StorageQue[info.name + SWAP_OUT_STR].WaitAndPop();
+    std::vector<float*> DDR2L3StorageAddrs = DDRSwapAddrsQue[info.name + SWAP_OUT_STR].WaitAndPop();
     if (!isRunning) {
         return false;
     }
-    cacheManager->TransferDDR2SSD(info.name, info.extEmbeddingSize, DDR2SSDKeys, DDR2SSDAddrs);
-    LOG_DEBUG("table:{}, thread:{}, transferDDR2SSDTC(ms):{}",
-              info.name.c_str(), info.threadIdx, transferDDR2SSDTC.ElapsedMS());
+    cacheManager->TransferDDR2L3Storage(info.name, info.extEmbeddingSize, DDR2L3StorageKeys, DDR2L3StorageAddrs);
+    LOG_DEBUG("table:{}, thread:{}, transferDDR2L3StorageTC(ms):{}",
+              info.name.c_str(), info.threadIdx, transferDDR2L3StorageTC.ElapsedMS());
 
-    TimeCost fetchSSDEmb2DDRTC = TimeCost();
-    // swapInKeys中在SSD的到DDR
-    std::vector<uint64_t> SSD2DDRKeys = DDRSwapKeyForSSDQue[info.name + SWAP_IN_STR].WaitAndPop();
-    std::vector<float*> SSD2DDRAddrs = DDRSwapAddrsQue[info.name + SWAP_IN_STR].WaitAndPop();
+    TimeCost fetchL3StorageEmb2DDRTC = TimeCost();
+    // swapInKeys中在L3Storage的挪到DDR
+    std::vector<uint64_t> L3Storage2DDRKeys = DDRSwapKeyForL3StorageQue[info.name + SWAP_IN_STR].WaitAndPop();
+    std::vector<float*> L3Storage2DDRAddrs = DDRSwapAddrsQue[info.name + SWAP_IN_STR].WaitAndPop();
     if (!isRunning) {
         return false;
     }
-    cacheManager->FetchSSDEmb2DDR(info.name, info.extEmbeddingSize, SSD2DDRKeys, SSD2DDRAddrs);
-    LOG_DEBUG("table:{}, thread:{}, fetchSSDEmb2DDRTC(ms):{}",
-              info.name.c_str(), info.threadIdx, fetchSSDEmb2DDRTC.ElapsedMS());
+    cacheManager->FetchL3StorageEmb2DDR(info.name, info.extEmbeddingSize, L3Storage2DDRKeys, L3Storage2DDRAddrs);
+    LOG_DEBUG("table:{}, thread:{}, fetchL3StorageEmb2DDRTC(ms):{}",
+              info.name.c_str(), info.threadIdx, fetchL3StorageEmb2DDRTC.ElapsedMS());
 
     bool isSuccess = BuildH2DEmbedding(info, h2dEmb);
     if (!isSuccess) {
@@ -1768,7 +1770,7 @@ bool HybridMgmt::EmbeddingLookUpSSD(const EmbTaskInfo& info, vector<Tensor>& h2d
     return true;
 }
 
-void HybridMgmt::EmbeddingSendSSD(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
+void HybridMgmt::EmbeddingSendL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
 {
     std::unique_lock<std::mutex> lastSendFinishLocker(lastSendFinishMutexMap[info.name][info.threadIdx]);
     cvLastSendFinishMap[info.name][info.threadIdx].wait(lastSendFinishLocker, [info, this] {
@@ -1847,9 +1849,9 @@ void HybridMgmt::HandleFirstBatchCaseDDR(const EmbBaseInfo& info,
     HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKeys);
 }
 
-void HybridMgmt::HandleFirstBatchCaseSSD(const EmbBaseInfo& info,
-                                         std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                                         std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
+void HybridMgmt::HandleFirstBatchCaseL3Storage(const EmbBaseInfo& info,
+                                               std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                               std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
 {
     // 发现train、save、eval切换，先保存状态，发emptySwapOutKeys以对应上一步的emptySwapOutPos
     vector<uint64_t> emptySwapOutKeys;
@@ -1858,51 +1860,51 @@ void HybridMgmt::HandleFirstBatchCaseSSD(const EmbBaseInfo& info,
     trainTestSwitchInfoStore[info.name] = {swapOutKoPair.first, swapOutKoPair.second};
 
     TimeCost ProcessSwapInKeysTC = TimeCost();
-    vector<emb_cache_key_t> SSDToDDRKeys;
-    vector<emb_cache_key_t> DDRToSSDKeys;
-    cacheManager->ProcessSwapInKeys(info.name, swapInKoPair.first, DDRToSSDKeys, SSDToDDRKeys);
+    vector<emb_cache_key_t> L3StorageToDDRKeys;
+    vector<emb_cache_key_t> DDRToL3StorageKeys;
+    cacheManager->ProcessSwapInKeys(info.name, swapInKoPair.first, DDRToL3StorageKeys, L3StorageToDDRKeys);
     LOG_DEBUG("ProcessSwapInKeysTC(ms):{} ", ProcessSwapInKeysTC.ElapsedMS());
 
     vector<uint64_t> emptySwapOutDDRKeys;
     vector<uint64_t> emptySwapOutDDRAddrOffs;
-    vector<uint64_t> emptySwapOutSSDKeys;
-    vector<uint64_t> emptySwapOutSSDAddrOff;
+    vector<uint64_t> emptySwapOutL3StorageKeys;
+    vector<uint64_t> emptySwapOutL3StorageAddrOff;
 
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
               info.name, info.batchId, info.channelId, swapInKoPair.first.size(), swapOutKoPair.first.size());
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapOutDDRKeys.size:{}, swapOutDDRAddrOffs.size:{}, "
-              "swapOutSSDKeys.size:{}, swapOutSSDAddrOff.size:{}",
+              "swapOutL3StorageKeys.size:{}, swapOutL3StorageAddrOff.size:{}",
               info.name, info.batchId, info.channelId, emptySwapOutDDRKeys.size(), emptySwapOutDDRAddrOffs.size(),
-              emptySwapOutSSDKeys.size(), emptySwapOutSSDAddrOff.size());
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToSSDKeys.size:{}, SSDToDDRKeys.size:{}",
-              info.name, info.batchId, info.channelId, DDRToSSDKeys.size(), SSDToDDRKeys.size());
+              emptySwapOutL3StorageKeys.size(), emptySwapOutL3StorageAddrOff.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToL3StorageKeys.size:{}, L3StorageToDDRKeys.size:{}",
+              info.name, info.batchId, info.channelId, DDRToL3StorageKeys.size(), L3StorageToDDRKeys.size());
 
-    auto DDRToSSDKeysForSSD = DDRToSSDKeys;
-    auto SSDToDDRKeysForSSD = SSDToDDRKeys;
-    // DDR<->SSD
-    DDRSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(DDRToSSDKeys);
-    DDRSwapKeyQue[info.name + SWAP_IN_STR].Pushv(SSDToDDRKeys);
+    auto DDRToL3StorageKeysForL3S = DDRToL3StorageKeys;
+    auto L3StorageToDDRKeysForL3S = L3StorageToDDRKeys;
+    // DDR<->L3Storage
+    DDRSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(DDRToL3StorageKeys);
+    DDRSwapKeyQue[info.name + SWAP_IN_STR].Pushv(L3StorageToDDRKeys);
 
-    DDRSwapKeyForSSDQue[info.name + SWAP_OUT_STR].Pushv(DDRToSSDKeysForSSD);
-    DDRSwapKeyForSSDQue[info.name + SWAP_IN_STR].Pushv(SSDToDDRKeysForSSD);
+    DDRSwapKeyForL3StorageQue[info.name + SWAP_OUT_STR].Pushv(DDRToL3StorageKeysForL3S);
+    DDRSwapKeyForL3StorageQue[info.name + SWAP_IN_STR].Pushv(L3StorageToDDRKeysForL3S);
 
     // HBM<->DDR
     HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutDDRKeys);
     HBMSwapKeyQue[info.name + ADDR_STR].Pushv(emptySwapOutDDRAddrOffs);
     HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKoPair.first);
 
-    // HBM->SSD
-    SwapOut2SSDKeyQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutSSDKeys);
-    SwapOut2SSDKeyQue[info.name + ADDR_STR].Pushv(emptySwapOutSSDAddrOff);
+    // HBM->L3Storage
+    SwapOut2L3StorageKeyQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutL3StorageKeys);
+    SwapOut2L3StorageKeyQue[info.name + ADDR_STR].Pushv(emptySwapOutL3StorageAddrOff);
 }
 
-void HybridMgmt::HandleDataSwapForSSD(const EmbBaseInfo& info,
-                                      vector<uint64_t> &swapInKeys, vector<uint64_t> &swapOutKeys)
+void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info,
+                                            vector<uint64_t> &swapInKeys, vector<uint64_t> &swapOutKeys)
 {
     TimeCost ProcessSwapInKeysTC;
-    vector<emb_cache_key_t> SSDToDDRKeys;
-    vector<emb_cache_key_t> DDRToSSDKeys;
-    cacheManager->ProcessSwapInKeys(info.name, swapInKeys, DDRToSSDKeys, SSDToDDRKeys);
+    vector<emb_cache_key_t> L3StorageToDDRKeys;
+    vector<emb_cache_key_t> DDRToL3StorageKeys;
+    cacheManager->ProcessSwapInKeys(info.name, swapInKeys, DDRToL3StorageKeys, L3StorageToDDRKeys);
     LOG_DEBUG("ProcessSwapInKeysTC(ms):{} ", ProcessSwapInKeysTC.ElapsedMS());
 
     TimeCost ProcessSwapOutKeysTC;
@@ -1913,29 +1915,29 @@ void HybridMgmt::HandleDataSwapForSSD(const EmbBaseInfo& info,
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
               info.name, info.batchId, info.channelId, swapInKeys.size(), swapOutKeys.size());
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapOutDDRKeys:{}, swapOutDDRAddrOffs:{}, "
-              "swapOutSSDKeys:{}, swapOutSSDAddrOff:{}",
+              "swapOutL3StorageKeys:{}, swapOutL3StorageAddrOff:{}",
               info.name, info.batchId, info.channelId, swapInfo.swapOutDDRKeys.size(),
-              swapInfo.swapOutDDRAddrOffs.size(), swapInfo.swapOutSSDKeys.size(), swapInfo.swapOutSSDAddrOffs.size());
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToSSDKeys:{}, SSDToDDRKeys:{}",
-              info.name, info.batchId, info.channelId, DDRToSSDKeys.size(), SSDToDDRKeys.size());
+              swapInfo.swapOutDDRAddrOffs.size(), swapInfo.swapOutL3StorageKeys.size(), swapInfo.swapOutL3StorageAddrOffs.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToL3StorageKeys:{}, L3StorageToDDRKeys:{}",
+              info.name, info.batchId, info.channelId, DDRToL3StorageKeys.size(), L3StorageToDDRKeys.size());
 
-    auto DDRToSSDKeysForSSD = DDRToSSDKeys;
-    auto SSDToDDRKeysForSSD = SSDToDDRKeys;
-    // DDR<->SSD
-    DDRSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(DDRToSSDKeys);
-    DDRSwapKeyQue[info.name + SWAP_IN_STR].Pushv(SSDToDDRKeys);
+    auto DDRToL3StorageKeysForL3S = DDRToL3StorageKeys;
+    auto L3StorageToDDRKeysForL3S = L3StorageToDDRKeys;
+    // DDR<->L3Storage
+    DDRSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(DDRToL3StorageKeys);
+    DDRSwapKeyQue[info.name + SWAP_IN_STR].Pushv(L3StorageToDDRKeys);
 
-    DDRSwapKeyForSSDQue[info.name + SWAP_OUT_STR].Pushv(DDRToSSDKeysForSSD);
-    DDRSwapKeyForSSDQue[info.name + SWAP_IN_STR].Pushv(SSDToDDRKeysForSSD);
+    DDRSwapKeyForL3StorageQue[info.name + SWAP_OUT_STR].Pushv(DDRToL3StorageKeysForL3S);
+    DDRSwapKeyForL3StorageQue[info.name + SWAP_IN_STR].Pushv(L3StorageToDDRKeysForL3S);
 
     // HBM<->DDR
     HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(swapInfo.swapOutDDRKeys);
     HBMSwapKeyQue[info.name + ADDR_STR].Pushv(swapInfo.swapOutDDRAddrOffs);
     HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKeys);
 
-    // HBM->SSD
-    SwapOut2SSDKeyQue[info.name + SWAP_OUT_STR].Pushv(swapInfo.swapOutSSDKeys);
-    SwapOut2SSDKeyQue[info.name + ADDR_STR].Pushv(swapInfo.swapOutSSDAddrOffs);
+    // HBM->L3Storage
+    SwapOut2L3StorageKeyQue[info.name + SWAP_OUT_STR].Pushv(swapInfo.swapOutL3StorageKeys);
+    SwapOut2L3StorageKeyQue[info.name + ADDR_STR].Pushv(swapInfo.swapOutL3StorageAddrOffs);
 }
 
 bool HybridMgmt::BuildH2DEmbedding(const EmbTaskInfo &info, vector<Tensor> &h2dEmb)
@@ -2130,9 +2132,9 @@ bool HybridMgmt::HandleSpecialProcessStatusDDR(const EmbBaseInfo &info, TimeCost
     return false;
 }
 
-bool HybridMgmt::HandleSpecialProcessStatusSSD(const EmbBaseInfo &info, TimeCost &getAndSendTensorsTC,
-                                               pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
-                                               pair<vector<uint64_t>, vector<uint64_t>> &swapOutKoPair)
+bool HybridMgmt::HandleSpecialProcessStatusL3Storage(const EmbBaseInfo &info, TimeCost &getAndSendTensorsTC,
+                                                     pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
+                                                     pair<vector<uint64_t>, vector<uint64_t>> &swapOutKoPair)
 {
     TimeCost swapProcessTC;
     auto &swapInPos = swapInKoPair.second;
@@ -2141,19 +2143,19 @@ bool HybridMgmt::HandleSpecialProcessStatusSSD(const EmbBaseInfo &info, TimeCost
 
     if (specialProcessStatus[info.name] == ProcessStatus::AFTER_SWITCH_FIRST_BATCH) {
         // 发现train、save、eval切换，先保存状态，发emptySwapOutKeys以对应上一步的emptySwapOutPos
-        HandleFirstBatchCaseSSD(info, swapInKoPair, swapOutKoPair);
+        HandleFirstBatchCaseL3Storage(info, swapInKoPair, swapOutKoPair);
         LOG_DEBUG("handle channel switch case:afterSwitchFirstBatch, table:{}, channelId:{}, batchId:{}",
                   info.name, info.channelId, info.batchId);
 
         if (mgmtRankInfo.ctrlSteps[info.channelId] == 1) {
             vector<uint64_t> emptySwapOutPos;
             SendTensorForSwap(info, swapInPos, emptySwapOutPos);
-            LOG_DEBUG("ProcessEmbInfoSSD special case, user only run one step, table:{}, channelId:{}, batchId:{}",
+            LOG_DEBUG("ProcessEmbInfoL3Storage special case, user only run one step, table:{}, channelId:{}, batchId:{}",
                       info.name, info.channelId, info.batchId);
         }
 
         specialProcessStatus[info.name] = ProcessStatus::AFTER_SWITCH_SECOND_BATCH;
-        LOG_DEBUG("ProcessEmbInfoSSD end, table:{}, batchId:{}, swapProcessTC(ms):{}, getAndSendTensorsTC(ms):{}",
+        LOG_DEBUG("ProcessEmbInfoL3Storage end, table:{}, batchId:{}, swapProcessTC(ms):{}, getAndSendTensorsTC(ms):{}",
                   info.name, info.batchId, swapProcessTC.ElapsedMS(), getAndSendTensorsTC.ElapsedMS());
         return true;
     }
@@ -2183,7 +2185,7 @@ void HybridMgmt::CheckLookupAddrSuccessDDR()
     }
 }
 
-void HybridMgmt::CheckLookupAddrSuccessSSD()
+void HybridMgmt::CheckLookupAddrSuccessL3Storage()
 {
     if (!lookupAddrSuccess) {
         for (auto& t : lookUpThreads) {
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index 2b4b2fc8..02829896 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -33,7 +33,7 @@ See the License for the specific language governing permissions and
 #include "ock_ctr_common/include/error_code.h"
 
 #include "hd_transfer/hd_transfer.h"
-#include "ssd_cache/cache_manager.h"
+#include "l3_storage/cache_manager.h"
 #include "hybrid_mgmt_block.h"
 #include "emb_table/embedding_table.h"
 
@@ -131,7 +131,7 @@ namespace MxRec {
 
         void ProcessEmbInfoDDR(const EmbBaseInfo& info, bool& remainBatchOut);
 
-        void ProcessEmbInfoSSD(const EmbBaseInfo& info, bool& remainBatchOut);
+        void ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBatchOut);
 
     GTEST_PRIVATE:
         bool mutexDestroy { false };
@@ -160,9 +160,9 @@ namespace MxRec {
         std::vector<std::future<void>> lookUpThreads;
 
         std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyQue;
-        std::map<std::string, TaskQueue<std::vector<uint64_t>>> SwapOut2SSDKeyQue;
+        std::map<std::string, TaskQueue<std::vector<uint64_t>>> SwapOut2L3StorageKeyQue;
         std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyQue;
-        std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyForSSDQue;
+        std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyForL3StorageQue;
         std::map<std::string, TaskQueue<std::vector<float *>>> DDRSwapAddrsQue;
 
         std::mutex evictMut;
@@ -188,7 +188,7 @@ namespace MxRec {
 
         void InitRankInfo(RankInfo& rankInfo, const vector<EmbInfo>& embInfos) const;
 
-        void EvictSSDKeys(const string& embName, const vector<emb_cache_key_t>& keys) const;
+        void EvictL3StorageKeys(const string& embName, const vector<emb_cache_key_t>& keys) const;
 
         int GetStepFromPath(const string& loadPath) const;
 
@@ -204,9 +204,9 @@ namespace MxRec {
 
         void EmbeddingReceiveAndUpdateDDR(int batchId, int index, const EmbInfo& embInfo);
 
-        void EmbeddingLookUpAndSendSSD(int batchId, int index, const EmbInfo& embInfo);
+        void EmbeddingLookUpAndSendL3Storage(int batchId, int index, const EmbInfo& embInfo);
 
-        void EmbeddingReceiveAndUpdateSSD(int batchId, int index, const EmbInfo& embInfo);
+        void EmbeddingReceiveAndUpdateL3Storage(int batchId, int index, const EmbInfo& embInfo);
 
         void SendTensorForSwap(const EmbBaseInfo& info,
                                const vector<uint64_t> &swapInPosUint,
@@ -222,7 +222,7 @@ namespace MxRec {
         HDTransfer *hdTransfer;
         OffsetMapT offsetMapToSend;
         OffsetMapT loadOffsetToSend;
-        bool isSSDEnabled { false };
+        bool isL3StorageEnabled { false };
         bool isRunning;
         bool isLoad { false };
         bool isInitialized { false };
@@ -247,7 +247,7 @@ namespace MxRec {
 
         void InitDataPipelineForDDR(const string &embName);
 
-        void InitDataPipelineForSSD(const string &embName, int extEmbeddingSize);
+        void InitDataPipelineForL3Storage(const string &embName, int extEmbeddingSize);
 
         void JoinEmbeddingCacheThread();
 
@@ -265,13 +265,13 @@ namespace MxRec {
 
         void EmbeddingSendDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
-        bool EmbeddingReceiveSSD(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs, int64_t& dims0);
+        bool EmbeddingReceiveL3Storage(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs, int64_t& dims0);
 
-        void EmbeddingUpdateSSD(const EmbTaskInfo& info, float* embPtr, vector<float*>& swapOutAddrs, int64_t& dims0);
+        void EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float* embPtr, vector<float*>& swapOutAddrs, int64_t& dims0);
 
-        bool EmbeddingLookUpSSD(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+        bool EmbeddingLookUpL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
-        void EmbeddingSendSSD(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+        void EmbeddingSendL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
         void CreateEmbeddingLookUpAndSendThread(int index, const EmbInfo& embInfo);
 
@@ -281,12 +281,12 @@ namespace MxRec {
                                      std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
                                      std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        void HandleFirstBatchCaseSSD(const EmbBaseInfo& info,
-                                     std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                                     std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+        void HandleFirstBatchCaseL3Storage(const EmbBaseInfo& info,
+                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        void HandleDataSwapForSSD(const EmbBaseInfo& info,
-                                  vector<uint64_t> &swapInKeys, vector<uint64_t> &swapOutKeys);
+        void HandleDataSwapForL3Storage(const EmbBaseInfo& info,
+                                        vector<uint64_t> &swapInKeys, vector<uint64_t> &swapOutKeys);
 
         bool BuildH2DEmbedding(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
@@ -306,13 +306,13 @@ namespace MxRec {
                                            std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
                                            std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        bool HandleSpecialProcessStatusSSD(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
-                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+        bool HandleSpecialProcessStatusL3Storage(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
+                                                 std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                                 std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
         void CheckLookupAddrSuccessDDR();
 
-        void CheckLookupAddrSuccessSSD();
+        void CheckLookupAddrSuccessL3Storage();
 
         void GetSwapPairsAndKey2Offset(const EmbBaseInfo& info, vector<uint64_t> &uniqueKeys,
                                        std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
diff --git a/src/core/ssd_cache/cache_manager.cpp b/src/core/l3_storage/cache_manager.cpp
similarity index 61%
rename from src/core/ssd_cache/cache_manager.cpp
rename to src/core/l3_storage/cache_manager.cpp
index a82a65a7..a2cbfb32 100644
--- a/src/core/ssd_cache/cache_manager.cpp
+++ b/src/core/l3_storage/cache_manager.cpp
@@ -25,55 +25,60 @@ See the License for the specific language governing permissions and
 
 using namespace MxRec;
 
-void CacheManager::Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo)
+void CacheManager::Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo, shared_ptr<L3Storage> level3Storage)
 {
     LOG_INFO("CacheManager Init method begin");
+    if (level3Storage == nullptr) {
+        throw runtime_error("level3Storage is nullptr");
+    }
+    
     this->embCache = std::move(embCachePtr);
     for (auto& emb : mgmtEmbInfo) {
         EmbBaseInfo baseInfo {emb.ssdVocabSize, emb.ssdDataPath, false};
         embBaseInfos.emplace(emb.name, baseInfo);
         preProcessMapper[emb.name].Initialize(emb.name, emb.hostVocabSize, emb.ssdVocabSize);
     }
-    ssdEngine->Start();
+    this->l3Storage = level3Storage;
+    this->l3Storage->Start();
     LOG_INFO("CacheManager Init method end");
 }
 
-bool CacheManager::IsKeyInSSD(const string& embTableName, emb_cache_key_t key)
+bool CacheManager::IsKeyInL3Storage(const string& embTableName, emb_cache_key_t key)
 {
-    return ssdEngine->IsKeyExist(embTableName, key);
+    return l3Storage->IsKeyExist(embTableName, key);
 }
 
-/// 淘汰SSD中Emb信息
+/// 淘汰三级存储中Emb信息
 /// \param embTableName emb表名
 /// \param keys 淘汰key列表
-void CacheManager::EvictSSDEmbedding(const string& embTableName, const vector<emb_cache_key_t>& keys)
+void CacheManager::EvictL3StorageEmbedding(const string& embTableName, const vector<emb_cache_key_t>& keys)
 {
     if (keys.empty()) {
         return;
     }
 
     int keyStep = preProcessStep;
-    unordered_map<emb_cache_key_t, freq_num_t>& ssdMap = preProcessMapper[embTableName].excludeDDRKeyCountMap;
+    unordered_map<emb_cache_key_t, freq_num_t>& l3StorageMap = preProcessMapper[embTableName].excludeDDRKeyCountMap;
     LFUCache& ddrLfu = preProcessMapper[embTableName].lfuCache;
-    std::vector<emb_cache_key_t> ssdKeysToBeDeleted;
+    std::vector<emb_cache_key_t> l3StorageKeysToBeDeleted;
     // 1 删除缓存中记录的key的次数
     for (auto &key: keys) {
-        auto it = ssdMap.find(key);
-        if (it != ssdMap.end()) {
-            ssdMap.erase(it);
-            ssdKeysToBeDeleted.emplace_back(key);
+        auto it = l3StorageMap.find(key);
+        if (it != l3StorageMap.end()) {
+            l3StorageMap.erase(it);
+            l3StorageKeysToBeDeleted.emplace_back(key);
         } else {
             ddrLfu.Pop(key);
         }
     }
 
-    ssdEvictThreads.emplace_back([=]() mutable {
-        // 2 删除SSD中保存的Emb数据
+    l3StorageEvictThreads.emplace_back([=]() mutable {
+        // 2 删除L3Storage中保存的Emb数据
         std::unique_lock<std::mutex> lk(evictWaitMut);
         evictWaitCond.wait(lk, [keyStep, this] {
             return embeddingTaskStep == keyStep;
         });
-        ssdEngine->DeleteEmbeddings(embTableName, ssdKeysToBeDeleted);
+        l3Storage->DeleteEmbeddings(embTableName, l3StorageKeysToBeDeleted);
     });
 }
 
@@ -93,29 +98,29 @@ void CacheManager::PutKey(const string& embTableName, const emb_key_t& key, Reco
     hashMap[key] = count;
 }
 
-void CacheManager::CreateSSDTableIfNotExist(const std::string& embTableName)
+void CacheManager::CreateL3StorageTableIfNotExist(const std::string& embTableName)
 {
     if (embBaseInfos[embTableName].isExist) {
         return;
     }
-    if (!ssdEngine->IsTableExist(embTableName)) {
-        ssdEngine->CreateTable(embTableName, embBaseInfos[embTableName].savePath,
+    if (!l3Storage->IsTableExist(embTableName)) {
+        l3Storage->CreateTable(embTableName, embBaseInfos[embTableName].savePath,
                                embBaseInfos[embTableName].maxTableSize);
         embBaseInfos[embTableName].isExist = true;
-        LOG_INFO("create ssd table end, embTableName:" + embTableName);
+        LOG_INFO("create l3Storage table end, embTableName:" + embTableName);
         return;
     }
-    // 续训场景：embBaseInfos 没有保存，不会初始化；SSD表会初始化，此时表已存在
+    // 续训场景：embBaseInfos 没有保存，不会初始化；L3Storage表会初始化，此时表已存在
     embBaseInfos[embTableName].isExist = true;
-    LOG_INFO("ssd table is exist, embTableName:" + embTableName);
+    LOG_INFO("l3Storage table is exist, embTableName:" + embTableName);
 }
 
 CacheManager::~CacheManager()
 {
-    for (auto &t : ssdEvictThreads) {
+    for (auto& t : l3StorageEvictThreads) {
         t.join();
     }
-    ssdEngine->Stop();
+    l3Storage->Stop();
     ddrKeyFreqMap.clear();
     excludeDDRKeyCountMap.clear();
 }
@@ -123,18 +128,18 @@ CacheManager::~CacheManager()
 /// 加载数据到CacheManager
 /// \param ddrFreqInitMap ddr内key频次数据
 /// \param excludeDdrFreqInitMap 非DDR key频次数据
-/// \param step 加载SSDEngine传入步数
+/// \param step 加载L3Storage传入步数
 void CacheManager::Load(const std::vector<EmbInfo> &mgmtEmbInfo, int step,
                         map<string, unordered_set<emb_cache_key_t>>& trainKeySet)
 {
-    // 加载SSDEngine数据
+    // 加载L3Storage数据
 #ifndef GTEST
     for (auto& it : embBaseInfos) {
         string embTableName = it.first;
         EmbBaseInfo& embBase = it.second;
-        ssdEngine->Load(embTableName, embBase.savePath, embBase.maxTableSize, step);
+        l3Storage->Load(embTableName, embBase.savePath, embBase.maxTableSize, step);
     }
-    auto tableKeysVec = ssdEngine->ExportTableKey();
+    auto tableKeysVec = l3Storage->ExportTableKey();
     for (auto &it: tableKeysVec) {
         auto &embTableName = it.first;
         auto &keys = it.second;
@@ -159,19 +164,19 @@ void CacheManager::Load(const std::vector<EmbInfo> &mgmtEmbInfo, int step,
 #endif
 }
 
-void CacheManager::SaveSSDEngine(int step)
+void CacheManager::Save(int step)
 {
 #ifndef GTEST
-    ssdEngine->Save(step);
+    l3Storage->Save(step);
 #endif
 }
 
-int64_t CacheManager::GetTableEmbeddingSize(const string& tableName)
+int64_t CacheManager::GetTableUsage(const string& tableName)
 {
-    if (ssdEngine == nullptr) {
-        throw runtime_error("SSDEngine not init");
+    if (l3Storage == nullptr) {
+        throw runtime_error("L3Storage not init");
     }
-    return ssdEngine->GetTableEmbeddingSize(tableName);
+    return l3Storage->GetTableUsage(tableName);
 }
 
 void CacheManager::ProcessSwapOutKeys(const string& tableName, const vector<emb_cache_key_t>& swapOutKeys,
@@ -179,10 +184,10 @@ void CacheManager::ProcessSwapOutKeys(const string& tableName, const vector<emb_
 {
     auto& swapOutDDRKeys = info.swapOutDDRKeys;
     auto& swapOutDDRAddrOffs = info.swapOutDDRAddrOffs;
-    auto& swapOutSSDKeys = info.swapOutSSDKeys;
-    auto& swapOutSSDAddrOffs = info.swapOutSSDAddrOffs;
+    auto& swapOutL3StorageKeys = info.swapOutL3StorageKeys;
+    auto& swapOutL3StorageAddrOffs = info.swapOutL3StorageAddrOffs;
 
-    // 处理一下没见过的key，看是更新到DDR还是SSD中
+    // 处理一下没见过的key，看是更新到DDR还是L3Storage中
     auto& keyMapper = preProcessMapper[tableName];
     size_t availableDDRSize = keyMapper.DDRAvailableSize();
     for (size_t i = 0; i < swapOutKeys.size(); ++i) {
@@ -191,25 +196,25 @@ void CacheManager::ProcessSwapOutKeys(const string& tableName, const vector<emb_
             keyMapper.lfuCache.Put(key);
             swapOutDDRKeys.push_back(key);
             swapOutDDRAddrOffs.push_back(i);
-        } else if (keyMapper.IsSSDKeyExist(key)) {
+        } else if (keyMapper.IsL3StorageKeyExist(key)) {
             keyMapper.excludeDDRKeyCountMap[key]++;
-            swapOutSSDKeys.push_back(key);
-            swapOutSSDAddrOffs.push_back(i);
+            swapOutL3StorageKeys.push_back(key);
+            swapOutL3StorageAddrOffs.push_back(i);
         } else if (availableDDRSize > 0) {
             keyMapper.InsertDDRKey(key);
             swapOutDDRKeys.push_back(key);
             swapOutDDRAddrOffs.push_back(i);
             availableDDRSize--;
         } else {
-            keyMapper.InsertSSDKey(key);
-            swapOutSSDKeys.push_back(key);
-            swapOutSSDAddrOffs.push_back(i);
+            keyMapper.InsertL3StorageKey(key);
+            swapOutL3StorageKeys.push_back(key);
+            swapOutL3StorageAddrOffs.push_back(i);
         }
     }
 }
 
 void CacheManager::ProcessSwapInKeys(const string& tableName, const vector<emb_cache_key_t>& swapInKeys,
-                                     vector<emb_cache_key_t>& DDRToSSDKeys, vector<emb_cache_key_t>& SSDToDDRKeys)
+                                     vector<emb_cache_key_t>& DDRToL3StorageKeys, vector<emb_cache_key_t>& L3StorageToDDRKeys)
 {
     auto& keyMapper = preProcessMapper[tableName];
     size_t externalDDRSize = 0;
@@ -219,28 +224,28 @@ void CacheManager::ProcessSwapInKeys(const string& tableName, const vector<emb_c
             continue;
         }
         externalDDRSize++;
-        if (keyMapper.IsSSDKeyExist(key)) {
-            SSDToDDRKeys.push_back(key);
+        if (keyMapper.IsL3StorageKeyExist(key)) {
+            L3StorageToDDRKeys.push_back(key);
         } else {
             firstSeenKeys.push_back(key);
         }
     }
 
     auto ddrAvailableSize = keyMapper.DDRAvailableSize();
-    if (externalDDRSize > ddrAvailableSize) {  // 需要DDR--->SSD
+    if (externalDDRSize > ddrAvailableSize) {  // 需要DDR--->L3Storage
         size_t transNum = externalDDRSize - ddrAvailableSize;
 
-        if (transNum > keyMapper.SSDAvailableSize()) {
-            throw invalid_argument("SSD table size too small, key quantity exceed while transferring DDR data to SSD");
+        if (transNum > keyMapper.L3StorageAvailableSize()) {
+            throw invalid_argument("L3Storage table size too small, key quantity exceed while transferring DDR data to L3Storage");
         }
-        // DDR--->SSD
-        keyMapper.GetAndDeleteLeastFreqDDRKey2SSD(transNum, swapInKeys, DDRToSSDKeys);
+        // DDR--->L3Storage
+        keyMapper.GetAndDeleteLeastFreqDDRKey2L3Storage(transNum, swapInKeys, DDRToL3StorageKeys);
     }
 
-    // SSD--->DDR
-    for (uint64_t key : SSDToDDRKeys) {
+    // L3Storage--->DDR
+    for (uint64_t key : L3StorageToDDRKeys) {
         keyMapper.InsertDDRKey(key);
-        keyMapper.RemoveSSDKey(key);
+        keyMapper.RemoveL3StorageKey(key);
     }
     for (uint64_t key : firstSeenKeys) {
         keyMapper.InsertDDRKey(key);
@@ -248,31 +253,31 @@ void CacheManager::ProcessSwapInKeys(const string& tableName, const vector<emb_c
     preProcessStep++;
 }
 
-void CacheManager::UpdateSSDEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize,
-                                vector<emb_cache_key_t>& keys, const vector<uint64_t>& swapOutSSDddrOffs)
+void CacheManager::UpdateL3StorageEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize,
+                                      vector<emb_cache_key_t>& keys, const vector<uint64_t>& swapOutL3StorageOffs)
 {
     vector<float*> embeddingsAddr(keys.size());
-    for (uint64_t i = 0; i < swapOutSSDddrOffs.size(); i++) {
-        embeddingsAddr[i] = embPtr + swapOutSSDddrOffs[i] * extEmbeddingSize;
+    for (uint64_t i = 0; i < swapOutL3StorageOffs.size(); i++) {
+        embeddingsAddr[i] = embPtr + swapOutL3StorageOffs[i] * extEmbeddingSize;
     }
-    ssdEngine->InsertEmbeddingsByAddr(tableName, keys, embeddingsAddr, extEmbeddingSize);
+    l3Storage->InsertEmbeddingsByAddr(tableName, keys, embeddingsAddr, extEmbeddingSize);
 }
 
-void CacheManager::TransferDDR2SSD(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
-                                   vector<float*>& addrs)
+void CacheManager::TransferDDR2L3Storage(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                                         vector<float*>& addrs)
 {
-    CreateSSDTableIfNotExist(tableName);
-    ssdEngine->InsertEmbeddingsByAddr(tableName, keys, addrs, extEmbeddingSize);
+    CreateL3StorageTableIfNotExist(tableName);
+    l3Storage->InsertEmbeddingsByAddr(tableName, keys, addrs, extEmbeddingSize);
     for (auto addr : addrs) {
         free(addr);
         addr = nullptr;
     }
 }
 
-void CacheManager::FetchSSDEmb2DDR(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
-                                   const vector<float*>& addrs)
+void CacheManager::FetchL3StorageEmb2DDR(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                                         const vector<float*>& addrs)
 {
-    auto embeddings = ssdEngine->FetchEmbeddings(tableName, keys);
+    auto embeddings = l3Storage->FetchEmbeddings(tableName, keys);
     for (uint64_t i = 0; i < embeddings.size(); i++) {
         int rc = memcpy_s(addrs[i], extEmbeddingSize * sizeof(float), embeddings[i].data(),
                           extEmbeddingSize * sizeof(float));
@@ -280,7 +285,7 @@ void CacheManager::FetchSSDEmb2DDR(string tableName, uint32_t extEmbeddingSize,
             throw runtime_error("memcpy_s failed, rc: " + to_string(rc));
         }
     }
-    ssdEngine->DeleteEmbeddings(tableName, keys);
+    l3Storage->DeleteEmbeddings(tableName, keys);
 
     embeddingTaskStep++;
     evictWaitCond.notify_all();
diff --git a/src/core/ssd_cache/cache_manager.h b/src/core/l3_storage/cache_manager.h
similarity index 71%
rename from src/core/ssd_cache/cache_manager.h
rename to src/core/l3_storage/cache_manager.h
index 89ed61d7..1571454b 100644
--- a/src/core/ssd_cache/cache_manager.h
+++ b/src/core/l3_storage/cache_manager.h
@@ -28,6 +28,7 @@ See the License for the specific language governing permissions and
 #include "utils/common.h"
 #include "preprocess_mapper.h"
 #include "ock_ctr_common/include/factory.h"
+#include "l3_storage.h"
 
 namespace MxRec {
 
@@ -42,14 +43,14 @@ namespace MxRec {
     struct SwapOutInfo {
         vector<emb_cache_key_t> swapOutDDRKeys;
         vector<emb_cache_key_t> swapOutDDRAddrOffs;
-        vector<emb_cache_key_t> swapOutSSDKeys;
-        vector<emb_cache_key_t> swapOutSSDAddrOffs;
+        vector<emb_cache_key_t> swapOutL3StorageKeys;
+        vector<emb_cache_key_t> swapOutL3StorageAddrOffs;
     };
 
     enum class TransferRet {
         TRANSFER_OK = 0, // 转移成功或无需处理
         TRANSFER_ERROR,
-        SSD_SPACE_NOT_ENOUGH,
+        L3Storage_SPACE_NOT_ENOUGH,
         DDR_SPACE_NOT_ENOUGH,
     };
 
@@ -73,16 +74,16 @@ namespace MxRec {
 
         ~CacheManager();
 
-        void Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo);
+        void Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo, shared_ptr<L3Storage> level3Storage);
 
         void Load(const std::vector<EmbInfo>& mgmtEmbInfo, int step,
                   map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
 
-        void SaveSSDEngine(int step);
+        void Save(int step);
 
-        bool IsKeyInSSD(const string& embTableName, emb_cache_key_t key);
+        bool IsKeyInL3Storage(const string& embTableName, emb_cache_key_t key);
 
-        void EvictSSDEmbedding(const string& embTableName, const vector<emb_cache_key_t>& keys);
+        void EvictL3StorageEmbedding(const string& embTableName, const vector<emb_cache_key_t>& keys);
 
         void PutKey(const string& embTableName, const emb_key_t& key, RecordType type);
 
@@ -90,18 +91,18 @@ namespace MxRec {
                                 SwapOutInfo& info);
 
         void ProcessSwapInKeys(const string& tableName, const vector<emb_cache_key_t>& swapInKeys,
-                               vector<emb_cache_key_t>& DDRToSSDKeys, vector<emb_cache_key_t>& SSDToDDRKeys);
+                               vector<emb_cache_key_t>& DDRToL3StorageKeys, vector<emb_cache_key_t>& L3StorageToDDRKeys);
 
-        void UpdateSSDEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
-                          const vector<uint64_t>& swapOutSSDAddrOffs);
+        void UpdateL3StorageEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                                const vector<uint64_t>& swapOutL3StorageAddrOffs);
 
-        void TransferDDR2SSD(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
-                             vector<float*>& addrs);
+        void TransferDDR2L3Storage(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                                   vector<float*>& addrs);
 
-        void FetchSSDEmb2DDR(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
-                             const vector<float*>& addrs);
+        void FetchL3StorageEmb2DDR(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+                                   const vector<float*>& addrs);
 
-        int64_t GetTableEmbeddingSize(const string& tableName);
+        int64_t GetTableUsage(const string& tableName);
 
         // DDR内每个表中emb数据频次缓存；map<embTableName, 频次缓存>
         unordered_map<std::string, LFUCache> ddrKeyFreqMap;
@@ -123,13 +124,13 @@ namespace MxRec {
             bool isExist;
         };
 
-        void CreateSSDTableIfNotExist(const std::string& embTableName);
+        void CreateL3StorageTableIfNotExist(const std::string& embTableName);
 
         unordered_map<std::string, EmbBaseInfo> embBaseInfos;
 
     GTEST_PRIVATE:
-        shared_ptr<SSDEngine> ssdEngine = std::make_shared<SSDEngine>();
-        vector<std::thread> ssdEvictThreads;
+        shared_ptr<L3Storage> l3Storage;
+        vector<std::thread> l3StorageEvictThreads;
         ock::ctr::EmbCacheManagerPtr embCache {};
     };
 }
diff --git a/src/core/l3_storage/l3_storage.cpp b/src/core/l3_storage/l3_storage.cpp
new file mode 100644
index 00000000..4eb61c6e
--- /dev/null
+++ b/src/core/l3_storage/l3_storage.cpp
@@ -0,0 +1,69 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#include "l3_storage.h"
+
+using MxRec::L3Storage;
+
+L3Storage::L3Storage() {}
+
+L3Storage::~L3Storage() {}
+
+bool L3Storage::IsTableExist(const string& tableName)
+{
+    return false;
+}
+
+bool L3Storage::IsKeyExist(const string& tableName, emb_cache_key_t key)
+{
+    return false;
+}
+
+void L3Storage::CreateTable(const string& tableName, vector<string> savePaths, uint64_t maxTableSize) {}
+
+int64_t L3Storage::GetTableAvailableSpace(const string& tableName)
+{
+    return 0;
+}
+
+void L3Storage::InsertEmbeddingsByAddr(const string& tableName, vector<emb_cache_key_t>& keys,
+                                            vector<float*>& embeddingsAddr, uint64_t extEmbeddingSize)
+{
+}
+
+void L3Storage::DeleteEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys) {}
+
+vector<vector<float>> L3Storage::FetchEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys)
+{
+    return vector<vector<float>>();
+}
+
+void L3Storage::Save(int step) {}
+
+void L3Storage::Load(const string& tableName, vector<string> savePaths, uint64_t maxTableSize, int step) {}
+
+void L3Storage::Start() {}
+
+void L3Storage::Stop() {}
+
+int64_t L3Storage::GetTableUsage(const string& tableName)
+{
+    return 0;
+}
+
+vector<std::pair<string, vector<emb_cache_key_t>>> L3Storage::ExportTableKey()
+{
+    return vector<std::pair<string, vector<emb_cache_key_t>>>();
+}
diff --git a/src/core/l3_storage/l3_storage.h b/src/core/l3_storage/l3_storage.h
new file mode 100644
index 00000000..606f2320
--- /dev/null
+++ b/src/core/l3_storage/l3_storage.h
@@ -0,0 +1,63 @@
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
+
+#ifndef MX_REC_L3_STORAGE_H
+#define MX_REC_L3_STORAGE_H
+
+#include <string>
+#include <vector>
+
+#include "utils/common.h"
+
+using MxRec::emb_cache_key_t;
+using std::string;
+using std::vector;
+
+namespace MxRec {
+
+class L3Storage {
+public:
+    L3Storage();
+    virtual ~L3Storage();
+
+    virtual bool IsTableExist(const string& tableName);
+
+    virtual bool IsKeyExist(const string& tableName, emb_cache_key_t key);
+
+    virtual void CreateTable(const string& tableName, vector<string> savePaths, uint64_t maxTableSize);
+
+    virtual int64_t GetTableAvailableSpace(const string& tableName);
+
+    virtual void InsertEmbeddingsByAddr(const string& tableName, vector<emb_cache_key_t>& keys,
+                                        vector<float*>& embeddingsAddr, uint64_t extEmbeddingSize);
+
+    virtual void DeleteEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys);
+
+    virtual vector<vector<float>> FetchEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys);
+
+    virtual void Save(int step);
+
+    virtual void Load(const string& tableName, vector<string> savePaths, uint64_t maxTableSize, int step);
+
+    virtual void Start();
+
+    virtual void Stop();
+
+    virtual int64_t GetTableUsage(const string& tableName);
+
+    virtual vector<std::pair<string, vector<emb_cache_key_t>>> ExportTableKey();
+};
+}  // namespace MxRec
+#endif  // MX_REC_L3_STORAGE_H
\ No newline at end of file
diff --git a/src/core/ssd_cache/lfu_cache.cpp b/src/core/l3_storage/lfu_cache.cpp
similarity index 100%
rename from src/core/ssd_cache/lfu_cache.cpp
rename to src/core/l3_storage/lfu_cache.cpp
diff --git a/src/core/ssd_cache/lfu_cache.h b/src/core/l3_storage/lfu_cache.h
similarity index 100%
rename from src/core/ssd_cache/lfu_cache.h
rename to src/core/l3_storage/lfu_cache.h
diff --git a/src/core/ssd_cache/preprocess_mapper.h b/src/core/l3_storage/preprocess_mapper.h
similarity index 54%
rename from src/core/ssd_cache/preprocess_mapper.h
rename to src/core/l3_storage/preprocess_mapper.h
index 03860181..fd28677f 100644
--- a/src/core/ssd_cache/preprocess_mapper.h
+++ b/src/core/l3_storage/preprocess_mapper.h
@@ -1,9 +1,17 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
- * Description: ssd cache module
- * Author: MindX SDK
- * Date: 2024/2/18
- */
+/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+        limitations under the License.
+==============================================================================*/
 
 #ifndef MXREC_DDR_PREPROCESS_MAPPER_H
 #define MXREC_DDR_PREPROCESS_MAPPER_H
@@ -18,12 +26,12 @@ namespace MxRec {
     */
     class PreProcessMapper {
     public:
-        void Initialize(const string& embName, uint32_t vocabSize, uint32_t ssdVocabSize)
+        void Initialize(const string& embName, uint32_t ddrVocabSize, uint32_t l3StorageVocabSize)
         {
             tableName = embName;
             lfuCache = LFUCache(embName);
-            ddrAvailableSize = vocabSize;
-            ssdAvailableSize = ssdVocabSize;
+            ddrAvailableSize = ddrVocabSize;
+            l3StorageAvailableSize = l3StorageVocabSize;
         }
 
         bool IsDDRKeyExist(uint64_t key)
@@ -31,7 +39,7 @@ namespace MxRec {
             return lfuCache.keyTable.find(key) != lfuCache.keyTable.end();
         }
 
-        bool IsSSDKeyExist(uint64_t key)
+        bool IsL3StorageKeyExist(uint64_t key)
         {
             return excludeDDRKeyCountMap.find(key) != excludeDDRKeyCountMap.end();
         }
@@ -47,19 +55,19 @@ namespace MxRec {
             return true;
         }
 
-        bool InsertSSDKey(uint64_t key)
+        bool InsertL3StorageKey(uint64_t key)
         {
-            if (IsSSDKeyExist(key)) {
-                throw std::invalid_argument("InsertSSDKey failed! key already exist");
+            if (IsL3StorageKeyExist(key)) {
+                throw std::invalid_argument("InsertL3StorageKey failed! key already exist");
             }
 
             excludeDDRKeyCountMap[key] = 1;
             return true;
         }
 
-        bool RemoveSSDKey(uint64_t key)
+        bool RemoveL3StorageKey(uint64_t key)
         {
-            if (!IsSSDKeyExist(key)) {
+            if (!IsL3StorageKeyExist(key)) {
                 throw std::invalid_argument("RemoveKey failed! key not exist");
             }
             excludeDDRKeyCountMap.erase(key);
@@ -74,18 +82,18 @@ namespace MxRec {
             return ddrAvailableSize - lfuCache.keyTable.size();
         }
 
-        size_t SSDAvailableSize()
+        size_t L3StorageAvailableSize()
         {
-            if (ssdAvailableSize < excludeDDRKeyCountMap.size()) {
-                throw std::invalid_argument("ssdAvailableSize < existKeys.size()");
+            if (l3StorageAvailableSize < excludeDDRKeyCountMap.size()) {
+                throw std::invalid_argument("l3StorageAvailableSize < existKeys.size()");
             }
-            return ssdAvailableSize - excludeDDRKeyCountMap.size();
+            return l3StorageAvailableSize - excludeDDRKeyCountMap.size();
         }
 
-        void GetAndDeleteLeastFreqDDRKey2SSD(uint64_t transNum, const std::vector<uint64_t>& keys,
-                                             std::vector<uint64_t>& DDRSwapOutKeys)
+        void GetAndDeleteLeastFreqDDRKey2L3Storage(uint64_t transNum, const std::vector<uint64_t>& keys,
+                                                   std::vector<uint64_t>& DDRSwapOutKeys)
         {
-            LOG_DEBUG("start GetAndDeleteLeastFreqDDRKey2SSD, table:{}", tableName);
+            LOG_DEBUG("start GetAndDeleteLeastFreqDDRKey2L3Storage, table:{}", tableName);
             std::vector<freq_num_t> DDRSwapOutCounts;
             lfuCache.GetAndDeleteLeastFreqKeyInfo(transNum, keys, DDRSwapOutKeys, DDRSwapOutCounts);
             for (uint64_t i = 0; i < DDRSwapOutKeys.size(); i++) {
@@ -93,13 +101,13 @@ namespace MxRec {
             }
             if (DDRSwapOutCounts.size() != transNum) {
                 throw std::invalid_argument(
-                    "GetAndDeleteLeastFreqDDRKey2SSD failed! DDRSwapOutCounts.size()!=transNum");
+                    "GetAndDeleteLeastFreqDDRKey2L3Storage failed! DDRSwapOutCounts.size()!=transNum");
             }
         }
 
         string tableName;
         uint64_t ddrAvailableSize = 0;
-        uint64_t ssdAvailableSize = 0;
+        uint64_t l3StorageAvailableSize = 0;
         LFUCache lfuCache;
         std::unordered_map<uint64_t, freq_num_t> excludeDDRKeyCountMap;
     };
diff --git a/src/core/ssd_engine/ssd_engine.cpp b/src/core/ssd_engine/ssd_engine.cpp
index bbf55e66..e50ad43c 100644
--- a/src/core/ssd_engine/ssd_engine.cpp
+++ b/src/core/ssd_engine/ssd_engine.cpp
@@ -199,7 +199,7 @@ void SSDEngine::SetCompactThreshold(double threshold)
     throw invalid_argument("compact threshold should in range [0, 1]");
 }
 
-int64_t SSDEngine::GetTableEmbeddingSize(const string &tableName)
+int64_t SSDEngine::GetTableUsage(const string &tableName)
 {
     if (!isRunning) {
         throw runtime_error("SSDEngine not running");
diff --git a/src/core/ssd_engine/ssd_engine.h b/src/core/ssd_engine/ssd_engine.h
index 538f76e2..40b65843 100644
--- a/src/core/ssd_engine/ssd_engine.h
+++ b/src/core/ssd_engine/ssd_engine.h
@@ -22,12 +22,12 @@ See the License for the specific language governing permissions and
 #include <map>
 #include <vector>
 
-#include "utils/common.h"
+#include "l3_storage/l3_storage.h"
 
 
 namespace MxRec {
 
-    class SSDEngine {
+    class SSDEngine : public L3Storage {
     public:
         bool IsTableExist(const string &tableName);
 
@@ -56,7 +56,7 @@ namespace MxRec {
 
         void SetCompactThreshold(double threshold);
 
-        int64_t GetTableEmbeddingSize(const string& tableName);
+        int64_t GetTableUsage(const string& tableName);
 
         void InsertEmbeddingsByAddr(const string &tableName, vector<emb_cache_key_t> &keys,
                                     vector<float*> &embeddingsAddr, uint64_t extEmbeddingSize);
diff --git a/src/tests/ssd_cache/cache_manager_test.cpp b/src/tests/ssd_cache/cache_manager_test.cpp
index 7cb5e032..164e667a 100644
--- a/src/tests/ssd_cache/cache_manager_test.cpp
+++ b/src/tests/ssd_cache/cache_manager_test.cpp
@@ -18,8 +18,8 @@ See the License for the specific language governing permissions and
 #include <mpi.h>
 
 #include "absl/container/flat_hash_map.h"
-#include "ssd_cache/lfu_cache.h"
-#include "ssd_cache/cache_manager.h"
+#include "l3_storage/lfu_cache.h"
+#include "l3_storage/cache_manager.h"
 #include "utils/common.h"
 
 using namespace std;
@@ -34,16 +34,21 @@ void InitSSDEngine(CacheManager& manager, string embTableName, uint64_t ssdSize)
 {
     // Init ssd engine data
     chrono::seconds period = chrono::seconds(120);
-    manager.ssdEngine->SetCompactPeriod(period);
-    manager.ssdEngine->SetCompactThreshold(1);
-    manager.ssdEngine->CreateTable(embTableName, {SSD_SAVE_PATH}, ssdSize);
+    auto ssdEngine = static_pointer_cast<SSDEngine>(manager.l3Storage);
+    ssdEngine->SetCompactPeriod(period);
+    ssdEngine->SetCompactThreshold(1);
+    ssdEngine->CreateTable(embTableName, {SSD_SAVE_PATH}, ssdSize);
     vector<emb_cache_key_t> ssdKeys = {15, 25}; // 预设15， 25存储在SSD
-    std::vector<std::vector<float>> ssdEmbData = {{15.0f},
-                                                  {25.0f}};
+    auto emb1 = new float(15.0f);
+    auto emb2 = new float(25.0f);
+    uint64_t extEmbeddingSize = 1;
+    std::vector<float*> ssdEmbData = {{emb1}, {emb2}};
     auto& excludeMap = manager.preProcessMapper[embTableName].excludeDDRKeyCountMap;
     excludeMap[15] = 3; // 初始化次数
     excludeMap[25] = 5;
-    manager.ssdEngine->InsertEmbeddings(embTableName, ssdKeys, ssdEmbData);
+    ssdEngine->InsertEmbeddingsByAddr(embTableName, ssdKeys, ssdEmbData, extEmbeddingSize);
+    delete emb1;
+    delete emb2;
 }
 
 void InitDDREmbData(absl::flat_hash_map<string, HostEmbTable>& loadData, string& embTableName,
@@ -105,7 +110,8 @@ protected:
 
         ock::ctr::EmbCacheManagerPtr embCachePtr = nullptr;
 
-        cacheManager.Init(embCachePtr, mgmtEmbInfos);
+        auto ssdEngine = make_shared<SSDEngine>();
+        cacheManager.Init(embCachePtr, mgmtEmbInfos, ssdEngine);
 
         InitSSDEngine(cacheManager, embTableName, 5);
         InitSSDEngine(cacheManager, embTableName2, 10);
@@ -141,31 +147,31 @@ TEST_F(CacheManagerTest, PutKey)
     LOG_INFO("test PutKey end.");
 }
 
-TEST_F(CacheManagerTest, IsKeyInSSD)
+TEST_F(CacheManagerTest, IsKeyInL3Storage)
 {
     vector<emb_key_t> checkKeys = {1, 2, 15, 25};
-    ASSERT_FALSE(cacheManager.IsKeyInSSD(embTableName, checkKeys[0]));
-    ASSERT_FALSE(cacheManager.IsKeyInSSD(embTableName, checkKeys[1]));
-    ASSERT_TRUE(cacheManager.IsKeyInSSD(embTableName, checkKeys[2]));
-    ASSERT_TRUE(cacheManager.IsKeyInSSD(embTableName, checkKeys[3]));
-    LOG_INFO("test IsKeyInSSD end.");
+    ASSERT_FALSE(cacheManager.IsKeyInL3Storage(embTableName, checkKeys[0]));
+    ASSERT_FALSE(cacheManager.IsKeyInL3Storage(embTableName, checkKeys[1]));
+    ASSERT_TRUE(cacheManager.IsKeyInL3Storage(embTableName, checkKeys[2]));
+    ASSERT_TRUE(cacheManager.IsKeyInL3Storage(embTableName, checkKeys[3]));
+    LOG_INFO("test IsKeyInL3Storage end.");
 }
 
-TEST_F(CacheManagerTest, EvictSSDEmbedding)
+TEST_F(CacheManagerTest, EvictL3StorageEmbedding)
 {
     // 构造时ssd中已存在的key: 15 25
     emb_cache_key_t key = 15;
     vector<emb_cache_key_t> ssdKeys = {key};
-    cacheManager.EvictSSDEmbedding(embTableName, ssdKeys);
+    cacheManager.EvictL3StorageEmbedding(embTableName, ssdKeys);
     int maxLoop = 1000;
-    while (!cacheManager.ssdEvictThreads.empty() && maxLoop > 0) {
+    while (!cacheManager.l3StorageEvictThreads.empty() && maxLoop > 0) {
         this_thread::sleep_for(1ms);
         maxLoop--;
     }
-    ASSERT_FALSE(cacheManager.IsKeyInSSD(embTableName, key));
+    ASSERT_FALSE(cacheManager.IsKeyInL3Storage(embTableName, key));
     const auto it = cacheManager.excludeDDRKeyCountMap[embTableName].find(key);
     ASSERT_EQ(it, cacheManager.excludeDDRKeyCountMap[embTableName].end());
-    LOG_INFO("test EvictSSDEmbedding end.");
+    LOG_INFO("test EvictL3StorageEmbedding end.");
 }
 
 TEST_F(CacheManagerTest, LoadTest)
diff --git a/src/tests/ssd_cache/lfu_cache_test.cpp b/src/tests/ssd_cache/lfu_cache_test.cpp
index 7f8a7820..500e3989 100644
--- a/src/tests/ssd_cache/lfu_cache_test.cpp
+++ b/src/tests/ssd_cache/lfu_cache_test.cpp
@@ -16,7 +16,7 @@ See the License for the specific language governing permissions and
 #include <iostream>
 #include <gtest/gtest.h>
 
-#include "ssd_cache/lfu_cache.h"
+#include "l3_storage/lfu_cache.h"
 
 using namespace std;
 using namespace MxRec;
-- 
Gitee


From 8251c4e9ce25f59d273d837e731f00c9a233f4e4 Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Fri, 7 Jun 2024 03:35:51 +0000
Subject: [PATCH 210/302] cleancode

---
 src/core/emb_table/embedding_ddr.cpp  |  3 ++-
 src/core/hybrid_mgmt/hybrid_mgmt.cpp  | 10 ++++++----
 src/core/hybrid_mgmt/hybrid_mgmt.h    |  6 ++++--
 src/core/l3_storage/cache_manager.cpp |  9 ++++++---
 src/core/l3_storage/cache_manager.h   |  8 +++++---
 src/core/l3_storage/l3_storage.cpp    |  3 ++-
 src/core/l3_storage/l3_storage.h      |  1 -
 7 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 3898a7da..257238b8 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -253,7 +253,8 @@ void EmbeddingDDR::SyncLatestEmbedding()
                 throw std::invalid_argument(errMsg);
             }
         }
-        cacheManager_->UpdateL3StorageEmb(name, ptr, embInfo_.extEmbeddingSize, info.swapOutL3StorageKeys, info.swapOutL3StorageAddrOffs);
+        cacheManager_->UpdateL3StorageEmb(name, ptr, embInfo_.extEmbeddingSize, info.swapOutL3StorageKeys,
+                                          info.swapOutL3StorageAddrOffs);
     }
 }
 
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 973831a2..895715c9 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -1711,7 +1711,8 @@ void HybridMgmt::EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float *embPtr
     if (dims0 != static_cast<int64_t>(swapOutAddrs.size() + swapOutL3StorageKeys.size())) {
         throw runtime_error("data dims[0] != swapOutKeys.size");
     }
-    cacheManager->UpdateL3StorageEmb(info.name, embPtr, extEmbeddingSize, swapOutL3StorageKeys, swapOutL3StorageAddrOffs);
+    cacheManager->UpdateL3StorageEmb(info.name, embPtr, extEmbeddingSize, swapOutL3StorageKeys,
+                                     swapOutL3StorageAddrOffs);
     LOG_DEBUG("table:{}, batchId:{}, thread{}, L3StorageUpdateTC(ms):{}",
               info.name.c_str(), info.batchId, info.threadIdx, L3StorageUpdateTC.ElapsedMS());
 
@@ -1917,7 +1918,8 @@ void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info,
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapOutDDRKeys:{}, swapOutDDRAddrOffs:{}, "
               "swapOutL3StorageKeys:{}, swapOutL3StorageAddrOff:{}",
               info.name, info.batchId, info.channelId, swapInfo.swapOutDDRKeys.size(),
-              swapInfo.swapOutDDRAddrOffs.size(), swapInfo.swapOutL3StorageKeys.size(), swapInfo.swapOutL3StorageAddrOffs.size());
+              swapInfo.swapOutDDRAddrOffs.size(), swapInfo.swapOutL3StorageKeys.size(),
+              swapInfo.swapOutL3StorageAddrOffs.size());
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToL3StorageKeys:{}, L3StorageToDDRKeys:{}",
               info.name, info.batchId, info.channelId, DDRToL3StorageKeys.size(), L3StorageToDDRKeys.size());
 
@@ -2150,8 +2152,8 @@ bool HybridMgmt::HandleSpecialProcessStatusL3Storage(const EmbBaseInfo &info, Ti
         if (mgmtRankInfo.ctrlSteps[info.channelId] == 1) {
             vector<uint64_t> emptySwapOutPos;
             SendTensorForSwap(info, swapInPos, emptySwapOutPos);
-            LOG_DEBUG("ProcessEmbInfoL3Storage special case, user only run one step, table:{}, channelId:{}, batchId:{}",
-                      info.name, info.channelId, info.batchId);
+            LOG_DEBUG("ProcessEmbInfoL3Storage special case, user only run one step, "
+                      "table:{}, channelId:{}, batchId:{}", info.name, info.channelId, info.batchId);
         }
 
         specialProcessStatus[info.name] = ProcessStatus::AFTER_SWITCH_SECOND_BATCH;
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index 02829896..4fd2b541 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -265,9 +265,11 @@ namespace MxRec {
 
         void EmbeddingSendDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
-        bool EmbeddingReceiveL3Storage(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs, int64_t& dims0);
+        bool EmbeddingReceiveL3Storage(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs,
+                                       int64_t& dims0);
 
-        void EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float* embPtr, vector<float*>& swapOutAddrs, int64_t& dims0);
+        void EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float* embPtr, vector<float*>& swapOutAddrs,
+                                      int64_t& dims0);
 
         bool EmbeddingLookUpL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
diff --git a/src/core/l3_storage/cache_manager.cpp b/src/core/l3_storage/cache_manager.cpp
index a2cbfb32..188f2aaf 100644
--- a/src/core/l3_storage/cache_manager.cpp
+++ b/src/core/l3_storage/cache_manager.cpp
@@ -25,7 +25,8 @@ See the License for the specific language governing permissions and
 
 using namespace MxRec;
 
-void CacheManager::Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo, shared_ptr<L3Storage> level3Storage)
+void CacheManager::Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo,
+                        shared_ptr<L3Storage> level3Storage)
 {
     LOG_INFO("CacheManager Init method begin");
     if (level3Storage == nullptr) {
@@ -214,7 +215,8 @@ void CacheManager::ProcessSwapOutKeys(const string& tableName, const vector<emb_
 }
 
 void CacheManager::ProcessSwapInKeys(const string& tableName, const vector<emb_cache_key_t>& swapInKeys,
-                                     vector<emb_cache_key_t>& DDRToL3StorageKeys, vector<emb_cache_key_t>& L3StorageToDDRKeys)
+                                     vector<emb_cache_key_t>& DDRToL3StorageKeys,
+                                     vector<emb_cache_key_t>& L3StorageToDDRKeys)
 {
     auto& keyMapper = preProcessMapper[tableName];
     size_t externalDDRSize = 0;
@@ -236,7 +238,8 @@ void CacheManager::ProcessSwapInKeys(const string& tableName, const vector<emb_c
         size_t transNum = externalDDRSize - ddrAvailableSize;
 
         if (transNum > keyMapper.L3StorageAvailableSize()) {
-            throw invalid_argument("L3Storage table size too small, key quantity exceed while transferring DDR data to L3Storage");
+            throw invalid_argument(
+                "L3Storage table size too small, key quantity exceed while transferring DDR data to L3Storage");
         }
         // DDR--->L3Storage
         keyMapper.GetAndDeleteLeastFreqDDRKey2L3Storage(transNum, swapInKeys, DDRToL3StorageKeys);
diff --git a/src/core/l3_storage/cache_manager.h b/src/core/l3_storage/cache_manager.h
index 1571454b..dda4c396 100644
--- a/src/core/l3_storage/cache_manager.h
+++ b/src/core/l3_storage/cache_manager.h
@@ -50,7 +50,7 @@ namespace MxRec {
     enum class TransferRet {
         TRANSFER_OK = 0, // 转移成功或无需处理
         TRANSFER_ERROR,
-        L3Storage_SPACE_NOT_ENOUGH,
+        L3STORAGE_SPACE_NOT_ENOUGH,
         DDR_SPACE_NOT_ENOUGH,
     };
 
@@ -74,7 +74,8 @@ namespace MxRec {
 
         ~CacheManager();
 
-        void Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo, shared_ptr<L3Storage> level3Storage);
+        void Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo>& mgmtEmbInfo,
+                  shared_ptr<L3Storage> level3Storage);
 
         void Load(const std::vector<EmbInfo>& mgmtEmbInfo, int step,
                   map<string, unordered_set<emb_cache_key_t>>& trainKeySet);
@@ -91,7 +92,8 @@ namespace MxRec {
                                 SwapOutInfo& info);
 
         void ProcessSwapInKeys(const string& tableName, const vector<emb_cache_key_t>& swapInKeys,
-                               vector<emb_cache_key_t>& DDRToL3StorageKeys, vector<emb_cache_key_t>& L3StorageToDDRKeys);
+                               vector<emb_cache_key_t>& DDRToL3StorageKeys,
+                               vector<emb_cache_key_t>& L3StorageToDDRKeys);
 
         void UpdateL3StorageEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
                                 const vector<uint64_t>& swapOutL3StorageAddrOffs);
diff --git a/src/core/l3_storage/l3_storage.cpp b/src/core/l3_storage/l3_storage.cpp
index 4eb61c6e..6a3ea668 100644
--- a/src/core/l3_storage/l3_storage.cpp
+++ b/src/core/l3_storage/l3_storage.cpp
@@ -16,6 +16,7 @@ See the License for the specific language governing permissions and
 #include "l3_storage.h"
 
 using MxRec::L3Storage;
+using MxRec::emb_cache_key_t;
 
 L3Storage::L3Storage() {}
 
@@ -39,7 +40,7 @@ int64_t L3Storage::GetTableAvailableSpace(const string& tableName)
 }
 
 void L3Storage::InsertEmbeddingsByAddr(const string& tableName, vector<emb_cache_key_t>& keys,
-                                            vector<float*>& embeddingsAddr, uint64_t extEmbeddingSize)
+                                       vector<float*>& embeddingsAddr, uint64_t extEmbeddingSize)
 {
 }
 
diff --git a/src/core/l3_storage/l3_storage.h b/src/core/l3_storage/l3_storage.h
index 606f2320..6462409f 100644
--- a/src/core/l3_storage/l3_storage.h
+++ b/src/core/l3_storage/l3_storage.h
@@ -21,7 +21,6 @@ See the License for the specific language governing permissions and
 
 #include "utils/common.h"
 
-using MxRec::emb_cache_key_t;
 using std::string;
 using std::vector;
 
-- 
Gitee


From 8b389399dc1b70801a2779271fa82092f81b6a6e Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Fri, 7 Jun 2024 03:45:31 +0000
Subject: [PATCH 211/302] cleancode

---
 src/core/l3_storage/cache_manager.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/core/l3_storage/cache_manager.h b/src/core/l3_storage/cache_manager.h
index dda4c396..3f5b0a22 100644
--- a/src/core/l3_storage/cache_manager.h
+++ b/src/core/l3_storage/cache_manager.h
@@ -95,7 +95,8 @@ namespace MxRec {
                                vector<emb_cache_key_t>& DDRToL3StorageKeys,
                                vector<emb_cache_key_t>& L3StorageToDDRKeys);
 
-        void UpdateL3StorageEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
+        void UpdateL3StorageEmb(string tableName, float* embPtr, uint32_t extEmbeddingSize,\
+                                vector<emb_cache_key_t>& keys,
                                 const vector<uint64_t>& swapOutL3StorageAddrOffs);
 
         void TransferDDR2L3Storage(string tableName, uint32_t extEmbeddingSize, vector<emb_cache_key_t>& keys,
-- 
Gitee


From 167b0e1d9eeadb152ac5996681045b13c3b45eb5 Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Wed, 12 Jun 2024 08:55:42 +0000
Subject: [PATCH 212/302] =?UTF-8?q?!180=20=E6=A0=B7=E4=BE=8B=EF=BC=88?=
 =?UTF-8?q?=E6=8E=A5=E5=85=A5PS=EF=BC=89=EF=BC=9A=E6=B7=BB=E5=8A=A0?=
 =?UTF-8?q?=E9=80=82=E9=85=8D=E6=8C=87=E5=AF=BC=20*=20=E6=A0=B7=E4=BE=8B?=
 =?UTF-8?q?=EF=BC=88=E6=8E=A5=E5=85=A5PS=EF=BC=89=EF=BC=9A=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0=E9=80=82=E9=85=8D=E6=8C=87=E5=AF=BC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ps_adapt_to_mxrec/ps_adapt_to_mxrec.md    | 750 ++++++++++++++++++
 1 file changed, 750 insertions(+)
 create mode 100644 examples/ps_adapt_to_mxrec/ps_adapt_to_mxrec.md

diff --git a/examples/ps_adapt_to_mxrec/ps_adapt_to_mxrec.md b/examples/ps_adapt_to_mxrec/ps_adapt_to_mxrec.md
new file mode 100644
index 00000000..431133b7
--- /dev/null
+++ b/examples/ps_adapt_to_mxrec/ps_adapt_to_mxrec.md
@@ -0,0 +1,750 @@
+# 版本信息
+
+1. ps-lite
+
+   [GitHub - dmlc/ps-lite: A lightweight parameter server interface](https://github.com/dmlc/ps-lite)
+
+   commit 11b42c08a357d4ea5924403daa357587f4d8b5e2（包含本commit及之后都可以）
+
+2. mxRec
+
+   [mxrec: 华为昇腾-MindX 推荐SDK - Gitee.com](https://gitee.com/ascend/mxrec/tree/develop/)
+
+   commit ae36047f1dda8c03fa849184205bdc8bcfb4a137
+
+**注：ps-lite不支持多表存储，所以本文档以单表训练场景为例。**
+
+# 适配流程
+
+## ps-lite
+
+### 下载ps-lite代码
+
+```shell
+# 在mxrec根目录下
+cd mxrec/src
+mkdir 3rdparty
+cd 3rdparty
+git clone https://github.com/dmlc/ps-lite.git
+```
+
+### 修改ps-lite/make/deps.mk
+
+* 调整为不删除源码包，减少重复编译耗时
+* 调整依赖版本与ps-lite/CMakeLists.txt一致。其中protobuf 3.8.0为tensorflow 1.15适配版本，用户可根据自身tf版本调整。
+
+```makefile
+# protobuf
+PROTOBUF = ${DEPS_PATH}/include/google/protobuf/message.h
+${PROTOBUF}:
+	$(eval FILE=protobuf-cpp-3.8.0.tar.gz)
+	$(eval DIR=protobuf-3.8.0)
+	rm -rf $(DIR)
+	$(WGET) -nc $(URL2)/$(FILE) && tar --no-same-owner -zxf $(FILE)
+	cd $(DIR) && export CFLAGS=-fPIC && export CXXFLAGS=-fPIC && ./configure -prefix=$(DEPS_PATH) && $(MAKE) && $(MAKE) install
+	rm -rf $(DIR)
+
+# zmq
+ZMQ = ${DEPS_PATH}/include/zmq.h
+
+${ZMQ}:
+	$(eval FILE=zeromq-4.3.2.tar.gz)
+	$(eval DIR=zeromq-4.3.2)
+	rm -rf $(DIR)
+	$(WGET) -nc $(URL1)/$(FILE) && tar --no-same-owner -zxf $(FILE)
+	cd $(DIR) && export CFLAGS=-fPIC && export CXXFLAGS=-fPIC && ./configure -prefix=$(DEPS_PATH) --with-libsodium=no --with-libgssapi_krb5=no && $(MAKE) && $(MAKE) install
+	rm -rf $(DIR)
+
+# lz4
+LZ4 = ${DEPS_PATH}/include/lz4.h
+${LZ4}:
+	$(eval FILE=lz4-r129.tar.gz)
+	$(eval DIR=lz4-r129)
+	rm -rf $(DIR)
+	wget -nc $(URL1)/$(FILE) && tar --no-same-owner -zxf $(FILE)
+	cd $(DIR) && $(MAKE) && PREFIX=$(DEPS_PATH) $(MAKE) install
+	rm -rf $(DIR)
+
+# cityhash
+CITYHASH = ${DEPS_PATH}/include/city.h
+${CITYHASH}:
+	$(eval FILE=cityhash-1.1.1.tar.gz)
+	$(eval DIR=cityhash-1.1.1)
+	rm -rf $(DIR)
+	wget -nc $(URL1)/$(FILE)&& tar --no-same-owner -zxf $(FILE)
+	cd $(DIR) && ./configure -prefix=$(DEPS_PATH) --enable-sse4.2 && $(MAKE) CXXFLAGS="-g -O3 -msse4.2" && $(MAKE) install
+	rm -rf $(DIR)
+```
+
+### 安装依赖
+
+* protobuf：需要确保版本与tensorflow的一致，在tensorflow目录中搜索`GOOGLE_PROTOBUF_VERSION`查看protobuf版本
+
+* zeromq：参考github，版本如ps-lite/make/deps.mk所示
+
+### 准备KVServerMxRecHandle源码
+
+在ps-lite/include/ps/kv_app.h中增加如下代码：
+
+```c++
+/**
+ * \brief for mxrec embedding storage
+ */
+template <typename Val>
+struct KVServerMxRecHandle {
+    void operator()(
+      const KVMeta& req_meta, const KVPairs<Val>& req_data, KVServer<Val>* server) {
+          LL << "KVServerMxRecHandle, customerId:" << req_meta.customer_id << ", push:" << req_meta.push << ", pull:" << req_meta.pull;
+          auto es = std::getenv("EMB_SIZE");
+          if (es == nullptr) {
+              throw std::runtime_error("EMB_SIZE environment variable not found, please export");
+          }
+          int embeddingSize = std::stoi(es);
+          size_t keyCnt = req_data.keys.size();
+          KVPairs<Val> res;
+
+          if (req_meta.pull) {
+              LL << "pull, customerId:" << req_meta.customer_id << ", keys.size:" << keyCnt << ", embeddingSize:" << embeddingSize;
+              res.keys = req_data.keys;
+              res.vals.resize(keyCnt * embeddingSize);  // flatten all data
+              for (size_t i = 0; i < keyCnt; ++i) {
+                  Key key = req_data.keys[i];
+                  std::vector<Val> emb = store[key];
+                  if (emb.size() == 0) {
+                      emb = std::vector<Val>(embeddingSize, 0);
+                  } else if (emb.size() != embeddingSize) {
+                      throw std::runtime_error("embedding size in server not equal to request");
+                  }
+                  for (int j = 0; j < embeddingSize; j++) {
+                      res.vals[i * embeddingSize + j] = emb[j];
+                  }
+              }
+          } else if (req_meta.push) {
+              LL << "push, customerId:" << req_meta.customer_id << ", keys.size:" << keyCnt << ", vals.size:" << req_data.vals.size() << ", embeddingSize:" << embeddingSize;
+              for (size_t i = 0; i < keyCnt; i++) {
+                  Key key = req_data.keys[i];
+                  std::vector<Val> tmp(embeddingSize);
+                  for (size_t j = 0; j < embeddingSize; j++)
+                  {
+                      tmp[j] = res.vals[i * embeddingSize + j];
+                  }
+                  store[key] = tmp;
+              }
+          } else {
+              LL << "error: request neither push or pull";
+              throw std::runtime_error("request neither push or pull");
+          }
+
+          server->Response(req_meta, res);
+      }
+    std::unordered_map<Key, std::vector<Val>> store;
+};
+```
+
+### 准备scheduler、server、worker源码
+
+* ps-lite/tests/test_scheduler.cc
+
+  ```c++
+  #include <cmath>
+  #include "ps/ps.h"
+  
+  using namespace ps;
+  
+  void RunSchedular(int appId) {
+    // start system
+    LL << "start schedular, appId:" << appId;
+    Start(appId);
+    Finalize(appId, true);
+    LL << "quit schedular, appId:" << appId;
+  }
+  
+  int main(int argc, char *argv[]) {
+    int appId = std::stoi(argv[1]);
+    RunSchedular(appId);
+    return 0;
+  }
+  ```
+
+* ps-lite/tests/test_server.cc
+
+  ```c++
+  #include <cmath>
+  #include "ps/ps.h"
+  
+  using namespace ps;
+  
+  void StartServer(int serverId) {
+      if (!IsServer()) {
+          return;
+  	}
+      auto server = new KVServer<float>(serverId);
+      server->set_request_handle(KVServerMxRecHandle<float>());
+      RegisterExitCallback([server](){ delete server; });
+  }
+  
+  void RunServer(int appId) {
+      LL << "start server, appId:" << appId;
+      Start(appId);
+      StartServer(appId);
+      // stop system
+      Finalize(appId, true);
+      LL << "quit server, appId:" << appId;
+  }
+  
+  int main(int argc, char *argv[]) {
+      int appId = std::stoi(argv[1]);
+      RunServer(appId);
+      return 0;
+  }
+  
+  ```
+
+* ps-lite/tests/test_worker.cc
+
+  ```c++
+  #include <cmath>
+  #include "ps/ps.h"
+  
+  using namespace ps;
+  using std::vector;
+  
+  
+  void RunWorker(int appId, int customerId) {
+      LL << "start worker, appId:" << appId << ", customerId:" << customerId; 
+      Start(appId);
+      if (!IsWorker()) {
+          return;
+      }
+      KVWorker<float> kv(appId, customerId);
+  
+      // init
+      int num = 10000;
+      int embSize = 2;
+      vector<Key> lens(num, embSize);
+      vector<Key> keys(num);
+      vector<float> vals(num * embSize);
+      int rank = MyRank();
+      srand(rank + 7);
+      for (int i = 0; i < num; ++i) {
+          keys[i] = kMaxKey / num * i + customerId;
+          for (int j = 0; j < embSize; ++j)
+          {
+              vals[i * embSize + j] = rand() % 1000;
+          }
+      }
+  
+      // push
+      LL << "start push";
+      kv.Wait(kv.Push(keys, vals));
+  
+      // pull
+      LL << "start pull";
+      std::vector<float> rets;
+      kv.Wait(kv.Pull(keys, &rets));
+  
+      LL << "start validation";
+      float res = 0;
+      for (int i = 0; i < num; ++i) {
+          for (int j = 0; j < embSize; ++j) {
+              if (abs(vals[i * embSize + j] - rets[i * embSize + j]) > std::numeric_limits<float>::epsilon()) {
+                  LL << "error: embedding from server not equal to original data";
+                  Finalize(appId, true);
+                  return;
+              }
+          }
+      }
+  
+      // stop system
+      Finalize(appId, true);
+      LL << "stop worker, appId:" << appId << ", customerId:" << customerId; 
+  }
+  
+  int main(int argc, char *argv[]) {
+      int customerId = std::stoi(argv[1]);
+      std::thread t0(RunWorker, 0, customerId);
+      t0.join();
+      return 0;
+  }
+  ```
+
+### 修改ps-lite/tests/CMakeLists.txt
+
+修改为如下代码：
+
+```makefile
+add_executable(test_schedular test_schedular.cc)
+target_link_libraries(test_schedular pslite)
+
+add_executable(test_server test_server.cc)
+target_link_libraries(test_server pslite)
+
+add_executable(test_worker test_worker.cc)
+target_link_libraries(test_worker pslite)
+```
+
+### 修改ps-lite/CMakeLists.txt
+
+增加如下代码：
+
+```cmake
+target_link_libraries(pslite PUBLIC pthread)
+```
+
+### 编译scheduler、server、worker
+
+在ps-lite目录下执行
+
+```shell
+mkdir build
+cd build
+cmake ..
+make -j4
+```
+
+### 准备scheduler、server、worker启动脚本
+
+* ps-lite/start_service.sh
+
+    ```shell
+    #!/bin/bash
+    # set -x
+    if [ $# -lt 2 ]; then
+        echo "usage: $0 bin_schedular bin_server"
+        exit -1;
+    fi
+
+    export DMLC_NUM_SERVER=1
+    export DMLC_NUM_WORKER=1
+    bin_schedular=$1
+    bin_server=$2
+
+    # start the scheduler
+    export DMLC_PS_ROOT_URI='127.0.0.1'
+    export DMLC_ROLE='scheduler'
+    export DMLC_PS_ROOT_PORT=8000
+    ${bin_schedular} 0 &
+
+    # start servers
+    export DMLC_ROLE='server'
+    ${bin_server} 0 &
+
+    wait
+    ```
+
+* ps-lite/start_worker.sh
+
+  ```shell
+  #!/bin/bash
+  # set -x
+  if [ $# -lt 1 ]; then
+      echo "usage: $0 bin_worker"
+      exit -1;
+  fi
+  
+  export DMLC_NUM_SERVER=1
+  export DMLC_NUM_WORKER=1
+  bin_worker=$1
+  
+  # scheduler info
+  export DMLC_PS_ROOT_URI='127.0.0.1'
+  export DMLC_PS_ROOT_PORT=8000
+  export DMLC_ROLE='worker'
+  ${bin_worker} 0 &
+  
+  wait
+  ```
+
+
+### 编译ps-lite
+
+在ps-lite目录下
+
+```shell
+mkdir build
+cd build
+cmake ..
+make -j8
+```
+
+### 测试基础功能是否正常
+
+将编译好的test文件复制到ps-lite目录，执行：
+
+```shell
+#分别执行
+./start_service.sh ./test_schedular ./test_server
+./start_worker.sh ./test_worker
+```
+
+无报错表示正常。
+
+## mxrec
+
+### 调整ps-lite
+
+1. 删除ps-lite/build
+2. 修改ps-lite/CMakeLists.txt，注释掉`add_subdirectory(tests)`
+
+搜索以下代码片段，新增、替换源码。
+
+### src/build.sh
+
+```makefile
+cmake -DCMAKE_BUILD_TYPE=Release \
+    -DTF_PATH="$1" \
+    -DOMPI_PATH="$(whereis openmpi)" \
+    -DPYTHON_PATH="$python_path" \
+    -DEASY_PROFILER_PATH=/ \
+    -DASCEND_PATH="$ascend_path" \
+    -DABSEIL_PATH="$1" \
+    -DSECUREC_PATH="$2"/../opensource/securec \
+    -DCMAKE_INSTALL_PREFIX="$2"/output \
+    -DBUILD_CUST="$3" .. \
+    -DDEPS_PATH="$2"/src/3rdparty/ps-lite  # new
+```
+
+### src/CMakeLists.txt
+
+```cmake
+add_subdirecotry(dataset_tf)
+add_subdirecotry(core/3rdparty/ps-lite)  # new
+```
+
+### src/core/CMakeLists.txt
+
+```cmake
+file(GLOB_RECURSE MXREC_SRC ./*.cpp ./*.h)
+add_library(ASC SHARED ${MXREC_SRC})
+
+target_include_directories(ASC PUBLIC 3rdparty/ps-lite/include)  # new
+```
+
+```makefile
+target_link_libraries(ASC PUBLIC ascendcl msprofiler ge_executor gert runtime ge_common register graph ascend_protobuf
+    profapi opt_feature error_manager exe_graph acl_tdt_channel acl_tdt_queue securec drvdsmi_host _ock_ctr_common
+    pslite  # new
+)
+```
+
+### src/core/ps_store/ps_store.h**（新增）**
+
+```c
+#ifndef MXREC_PS_STORE_H
+#define MXREC_PS_STORE_H
+
+#include <map>
+#include <memory>
+
+#include "l3_storage/l3_storage.h"
+#include "ps/ps.h"  // must set behind any mxrec header file, otherwise will compile fail
+
+using MxRec::L3Storage;
+using ps::KVWorker;
+using std::map;
+using std::shared_ptr;
+using std::string;
+
+namespace MxRec {
+class PSStore : public L3Storage {
+public:
+    PSStore(int rankId);
+
+    bool IsTableExist(const string& tableName);
+
+    bool IsKeyExist(const string& tableName, emb_cache_key_t key);
+
+    void CreateTable(const string& tableName, vector<string> savePaths, uint64_t maxTableSize);
+
+    int64_t GetTableAvailableSpace(const string& tableName);
+
+    void InsertEmbeddingsByAddr(const string& tableName, vector<emb_cache_key_t>& keys, vector<float*>& embeddingsAddr,
+                                uint64_t extEmbeddingSize);
+
+    void DeleteEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys);
+
+    vector<vector<float>> FetchEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys);
+
+    void Save(int step);
+
+    void Load(const string& tableName, vector<string> savePaths, uint64_t maxTableSize, int step);
+
+    void Start();
+
+    void Stop();
+
+    int64_t GetTableUsage(const string& tableName);
+
+    vector<std::pair<string, vector<emb_cache_key_t>>> ExportTableKey();
+
+private:
+    // ps-lite not support multiple table yet, thus this example code only use one client
+    int appId = 0;
+    int customerId = 0;
+
+    // table --> client
+    map<string, std::shared_ptr<KVWorker<float>>> cliMap;
+};
+}  // namespace MxRec
+#endif  // MXREC_PS_STORE_H
+```
+
+### src/core/ps_store/ps_store.cpp**（新增）**
+
+```c++
+#include "ps_store.h"
+
+using MxRec::PSStore;
+using MxRec::emb_cache_key_t;
+
+struct KeyWithIdx {
+    emb_cache_key_t key;
+    size_t index;
+}
+
+bool CompareKeyWithIdx(KeyWithIdx a, KeyWithIdx b) {
+    return a.key < b.key;
+}
+
+PSStore::PSStore(int rankId) 
+{
+    this->customerId = rankId + std::stoi(std::getenv("REC_WORKER_ID_START_IDX"));
+}
+
+bool PSStore::IsTableExist(const string& tableName)
+{
+    auto iter = cliMap.find(tableName);
+    if (iter == cliMap.end()) {
+        return false;
+    }
+    return true;
+}
+
+bool PSStore::IsKeyExist(const string& tableName, emb_cache_key_t key)
+{
+    auto iter = cliMap.find(tableName);
+    if (iter == cliMap.end()) {
+        LOG_DEBUG("table:{} not create yet", tableName);
+        throw std::runtime_error("table not create yet");
+    }
+
+    auto worker = cliMap[tableName];
+    vector<emb_cache_key_t> keys = {key};
+    vector<float> rets;
+    worker->Wait(worker->Pull(keys, &rets));
+    if (rets.size() > 0) {
+        return true;
+    }
+    return false;
+}
+
+void PSStore::CreateTable(const string& tableName, vector<string> savePaths, uint64_t maxTableSize) {
+    static bool alreadyCreate = false;
+    if (alreadyCreate) {
+        throw runtime_error("ps-lite not support multiple table yet, thus this example code only support one table");
+    }
+    LOG_DEBUG("start create table:{}, init ps-lite client, appId:{}, customerId:{}", tableName, appId, customerId);
+    ps::Start(appId);
+    auto worker = make_shared<ps::KVWorker<float>>(appId, customerId);
+    cliMap[tableName] = worker;
+    LOG_DEBUG("finish create table:{}, worker appId:{}, customerId:{}", tableName, appId, customerId);
+    alreadyCreate = true;
+}
+
+int64_t PSStore::GetTableAvailableSpace(const string& tableName)
+{
+    // ps-lite don't have this api
+    // thus always available
+    return 1000000000000;
+}
+
+void PSStore::InsertEmbeddingsByAddr(const string& tableName, vector<emb_cache_key_t>& keys,
+                                     vector<float*>& embeddingsAddr, uint64_t extEmbeddingSize)
+{
+	if (keys.size() == 0) {
+        return;
+    }
+
+    auto iter = cliMap.find(tableName);
+    if (iter == cliMap.end()) {
+        LOG_DEBUG("table:{} not create yet", tableName);
+        throw std::runtime_error("table not create yet");
+    }
+    auto psCli = cliMap[tableName];
+    
+    // note: ps-lite need keys in order
+    vector<KeyWithIdx> elements;
+    for (size_t i = 0; i < keys.size(); i++) {
+        KeyWithIdx e = {keys[i], i};
+        elements.push_back(e);
+    }
+    sort(elements.begin(), elements.end(), CompareKeyWithIdx);
+    vector<emb_cache_key_t> sortedKeys;
+    vector<float*> sortedEmbeddingsAddr;
+    for (size_t i = 0; i < elements.size(); i++) {
+		sortedKeys.push_back(elements[i].key);
+	    sortedEmbeddingsAddr.push_back(embeddingsAddr[elements[i].index]);
+    }
+    
+    vector<int> lens(keys.size(), extEmbeddingSize);
+    vector<float> vals(embeddingsAddr.size() * extEmbeddingSize);
+    for (size_t i = 0; i < embeddingsAddr.size(); i++)
+    {
+        auto rc = memcpy_s(vals.data()+i*extEmbeddingSize, extEmbeddingSize, sortedEmbeddingsAddr[i], extEmbeddingSize);
+        if (rc !=0){
+            throw std::runtime_error("copy embedding data failed");
+        }
+    }
+
+    LOG_DEBUG("start push to server, table:{}, keys.size:{}, vals.size:{}", tableName, keys.size(), vals.size());
+    int timeStamp = psCli->Push(keys, vals);
+    psCli->Wait(timeStamp);
+
+    LOG_DEBUG("end push embedding to server, table:{}", tableName);
+}
+
+void PSStore::DeleteEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys) 
+{
+    LOG_WARN("ps-lite don't have delete function, just return");
+    return;
+}
+
+vector<vector<float>> PSStore::FetchEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys)
+{
+    LOG_DEBUG("start pull embedding to server, table:{}, keys.size:{}", tableName, keys.size());
+	if (keys.size() == 0) {
+        return vector<vector<float>>;
+    }
+    
+    
+    auto iter = cliMap.find(tableName);
+    if (iter == cliMap.end()) {
+        LOG_DEBUG("table:{} not create yet", tableName);
+        throw std::runtime_error("table not create yet");
+    }
+    auto psCli = cliMap[tableName];
+    
+    // note: ps-lite need keys in order
+        vector<KeyWithIdx> elements;
+    for (size_t i = 0; i < keys.size(); i++) {
+        KeyWithIdx e = {keys[i], i};
+        elements.push_back(e);
+    }
+    sort(elements.begin(), elements.end(), CompareKeyWithIdx);
+    vector<emb_cache_key_t> sortedKeys;
+    for (size_t i = 0; i < elements.size(); i++) {
+		sortedKeys.push_back(elements[i].key);
+    }
+    
+    // input lens will be stuck at req_data.lens, so we use environment variable to work around
+    std::vector<float> rets;
+    psCli->Wait(psCli->Pull(sortedKeys, &rets));
+    
+    LOG_DEBUG("finish pull embedding, table:{}, embedding len:{}", tableName, rets.size());
+    if (rets.size() % keys.size() != 0) {
+        LOG_ERROR("can't split received embedding equally, keys.size:{}, embeddings.size:{}", keys.size(), rets.size());
+        throw std::runtime_error("embedding from server incomplete");
+    }
+
+    auto extEmbSize = rets.size() % keys.size();
+    vector<vector<float>> embs(keys.size());
+    for (size_t i = 0; i < elements.size(); i++) {
+        auto emb = embs[elements[i].index];
+        emb.insert(emb.cbegin(), rets.cbegin() + i * extEmbSize, rets.cend() + (i + 1) * extEmbSize);
+    }
+
+    LOG_DEBUG("end pull embedding to server, table:{}", tableName);
+    return embs;
+}
+
+void PSStore::Save(int step) 
+{
+    LOG_WARN("ps-lite don't have save function, just return");
+}
+
+void PSStore::Load(const string& tableName, vector<string> savePaths, uint64_t maxTableSize, int step) 
+{
+    LOG_WARN("ps-lite don't have save function, just return");
+}
+
+void PSStore::Start() 
+{
+    LOG_INFO("start ps store");
+}
+
+void PSStore::Stop() 
+{
+    LOG_INFO("start stop ps store");
+    ps::Finalize(appId, true);
+    LOG_INFO("finish stop ps store");
+}
+
+int64_t PSStore::GetTableUsage(const string& tableName)
+{
+    LOG_WARN("ps-lite don't have GetTableUsage function, just return 0");
+    return 0;
+}
+
+vector<std::pair<string, vector<emb_cache_key_t>>> PSStore::ExportTableKey()
+{
+    LOG_WARN("ps-lite don't have export key function, just return empty result");
+    return vector<std::pair<string, vector<emb_cache_key_t>>>();
+}
+```
+
+### src/core/hybrid_mgmt/hybrid_mgmt.cpp
+
+```c++
+#include "ps_store/ps_store.h"  // new
+```
+
+```c++
+if (isL3StorageEnabled) {
+    cacheManager = Singleton<MxRec::CacheManager>::GetInstance();
+    // 用户可实现L3Storage接口替换SSDEngine以对接外部存储服务
+    auto psStore = std::make_shared<PSStore>(mgmtRankInfo.rankId);  // replace
+    cacheManager->Init(embCache, mgmtEmbInfo, psStore);  // replace
+    EmbeddingMgmt::Instance()->SetCacheManagerForEmbTable(cacheManager);
+}
+```
+
+### 模型代码
+
+以dcnV2为例，在run.sh中新增以下环境变量。
+
+```shell
+# ps-lite info
+export DMLC_NUM_SERVER=1
+export DMLC_NUM_WORKER=8  # ausume we run 8 train process
+
+# scheduler info
+export DMLC_PS_ROOT_URI='127.0.0.1'  # user can set to remote server
+export DMLC_PS_ROOT_PORT=8000
+
+# set role as workers
+export DMLC_ROLE='worker'
+
+# mark worker id for train process between multiple train server
+# e.g. server A, worker id range [REC_WORKER_ID_START_IDX, +1, ..., +n]; server B, worker id range [REC_WORKER_ID_START_IDX +(n+1), +(n+2), ...]
+export REC_WORKER_ID_START_IDX=0
+```
+
+在ps-lite目录拉起存储服务
+
+```shell
+./start_service.sh ./test_schedular ./test_server
+```
+
+在模型目录拉起训练
+
+```shell
+# 修改缓存模式为SSD（按上述mxrec源码修改步骤，SSDEngine已被替换为ps-lite，为了不影响对外接口，未修改对外暴露的ssd参数，用户可自行修改）
+export CACHE_MODE="SSD"
+
+./run.sh $LIBSAC_PATH $PYTHON_PATH $HCCL_JSON_PATH $DATA_PATH
+```
+
+
+
+
+
-- 
Gitee


From 3ea537c40fa4f58edd27dff102f135a9ab940647 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 11 Jun 2024 14:33:41 +0800
Subject: [PATCH 213/302] =?UTF-8?q?=E5=8E=BB=E6=8E=89=E8=B0=83=E7=94=A8hdf?=
 =?UTF-8?q?sRead=E3=80=81hdfsWrite=E6=8E=A5=E5=8F=A3=E7=9A=84=E5=BE=AA?=
 =?UTF-8?q?=E7=8E=AF=E6=AC=A1=E6=95=B0=E9=99=90=E5=88=B6=EF=BC=8C=E6=95=B4?=
 =?UTF-8?q?=E6=94=B9=E6=97=A5=E5=BF=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/checkpoint/checkpoint.cpp            | 20 ++++++------
 src/core/emb_table/embedding_ddr.cpp          |  4 +--
 src/core/emb_table/embedding_dynamic.cpp      | 19 ++++++------
 src/core/emb_table/embedding_static.cpp       | 19 ++++++------
 .../hdfs_file_system/hdfs_file_system.cpp     | 31 ++++++++++---------
 .../hdfs_file_system/hdfs_wrapper.h           | 16 +++-------
 .../local_file_system/local_file_system.cpp   |  4 +--
 src/core/ssd_engine/table.cpp                 |  4 +--
 src/core/utils/common.h                       |  3 --
 9 files changed, 56 insertions(+), 64 deletions(-)

diff --git a/src/core/checkpoint/checkpoint.cpp b/src/core/checkpoint/checkpoint.cpp
index 8a6750d5..abd3a10e 100644
--- a/src/core/checkpoint/checkpoint.cpp
+++ b/src/core/checkpoint/checkpoint.cpp
@@ -210,13 +210,13 @@ void Checkpoint::WriteStream(CkptTransData& transData, const string& dataDir, si
     }
 
     if (writeBytesNum == -1) {
-        throw runtime_error(StringFormat("Error: Save data failed. data type: {}. "
-                                         "An error occurred while writing file: {}.", dataType, dataDir));
+        throw runtime_error(StringFormat("Error: Save data failed. data type: %d. "
+                                         "An error occurred while writing file: %s.", dataType, dataDir.c_str()));
     }
     if (writeBytesNum != dataSize) {
-        throw runtime_error(StringFormat("Error: Save data failed. data type: {} ."
-                                         "Expected to write {} bytes, but actually write {} bytes to file {}.",
-                                         dataType, dataSize, writeBytesNum, dataDir));
+        throw runtime_error(StringFormat("Error: Save data failed. data type: %d ."
+                                         "Expected to write %d bytes, but actually write %d bytes to file %s.",
+                                         dataType, dataSize, writeBytesNum, dataDir.c_str()));
     }
 }
 
@@ -334,13 +334,13 @@ void Checkpoint::ReadStream(CkptTransData& transData,
     }
 
     if (readBytesNum == -1) {
-        throw runtime_error(StringFormat("Error: Load data failed. data type: {} ."
-                                         "An error occurred while reading file: {}.", dataType, dataDir));
+        throw runtime_error(StringFormat("Error: Load data failed. data type: %d ."
+                                         "An error occurred while reading file: %s.", dataType, dataDir.c_str()));
     }
     if (readBytesNum != datasetSize) {
-        throw runtime_error(StringFormat("Error: Load data failed. data type: {} ."
-                                         "Expected to read {} bytes, but actually read {} bytes to file {}.",
-                                         dataType, datasetSize, readBytesNum, dataDir));
+        throw runtime_error(StringFormat("Error: Load data failed. data type: %d ."
+                                         "Expected to read %d bytes, but actually read %d bytes to file %s.",
+                                         dataType, datasetSize, readBytesNum, dataDir.c_str()));
     }
 }
 
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 257238b8..167894e5 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -115,8 +115,8 @@ void EmbeddingDDR::LoadKey(const string &savePath, vector<emb_cache_key_t> &keys
     }
     if (result != fileSize) {
         free(static_cast<void*>(buf));
-        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read {} bytes, "
-                                         "but actually read {} bytes to file {}.", fileSize, result, ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read %d bytes, "
+                                         "but actually read %d bytes to file %s.", fileSize, result, ss.str().c_str()));
     }
 
     hostLoadOffset.clear();
diff --git a/src/core/emb_table/embedding_dynamic.cpp b/src/core/emb_table/embedding_dynamic.cpp
index a69cf930..7f8cd7e5 100644
--- a/src/core/emb_table/embedding_dynamic.cpp
+++ b/src/core/emb_table/embedding_dynamic.cpp
@@ -153,11 +153,11 @@ void EmbeddingDynamic::SaveKey(const string& savePath)
     ssize_t res = fileSystemPtr_->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
         throw runtime_error(StringFormat("Error: Save keys failed. "
-                                         "An error occurred while writing file: {}.", ss.str()));
+                                         "An error occurred while writing file: %s.", ss.str().c_str()));
     }
     if (res != writeSize) {
-        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write {} bytes, "
-                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write %d bytes, "
+                                         "but actually write %d bytes to file %s.", writeSize, res, ss.str().c_str()));
     }
 }
 
@@ -258,23 +258,24 @@ void EmbeddingDynamic::LoadKey(const string& savePath)
     }
     size_t fileSize = fileSystemPtr_->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
+        throw runtime_error(StringFormat("Error: Load keys failed. "
+                                         "file %s size %d is too big.", ss.str().c_str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
         throw runtime_error(StringFormat("Error: Load keys failed. "
-                                         "failed to allocate {} bytes using malloc.", fileSize));
+                                         "failed to allocate %d bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr_->Read(ss.str(), reinterpret_cast<char*>(buf), fileSize);
     if (res == -1) {
         throw runtime_error(StringFormat("Error: Load keys failed. "
-                                         "An error occurred while reading file: {}.", ss.str()));
+                                         "An error occurred while reading file: %s.", ss.str().c_str()));
     }
     if (res != fileSize) {
-        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read {} bytes, "
-                                         "but actually read {} bytes to file {}.", fileSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read %d bytes, "
+                                         "but actually read %d bytes to file %s.", fileSize, res, ss.str().c_str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -293,7 +294,7 @@ void EmbeddingDynamic::LoadKey(const string& savePath)
     aclError ret = aclrtMalloc(&newBlock, static_cast<int>(datasetSize), ACL_MEM_MALLOC_HUGE_FIRST);
     if (ret != ACL_SUCCESS) {
         throw runtime_error(StringFormat("Error: in dynamic expansion mode, "
-                                         "aclrtMalloc failed, malloc size: {}.", datasetSize));
+                                         "aclrtMalloc failed, malloc size: %d.", datasetSize));
     }
     // 此处的 newBlock -> first address;
     // 对key_offset map 进行一个恢复操作
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index ab66a42c..61874b1f 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -99,11 +99,11 @@ void EmbeddingStatic::SaveKey(const string& savePath)
     ssize_t res = fileSystemPtr_->Write(ss.str(), reinterpret_cast<const char *>(deviceKey.data()), writeSize);
     if (res == -1) {
         throw runtime_error(StringFormat("Error: Save keys failed. "
-                                         "An error occurred while writing file: {}.", ss.str()));
+                                         "An error occurred while writing file: %s.", ss.str().c_str()));
     }
     if (res != writeSize) {
-        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write {} bytes, "
-                                         "but actually write {} bytes to file {}.", writeSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Save keys failed. Expected to write %d bytes, "
+                                         "but actually write %d bytes to file %s.", writeSize, res, ss.str().c_str()));
     }
 }
 
@@ -122,23 +122,24 @@ void EmbeddingStatic::LoadKey(const string& savePath)
     }
     size_t fileSize = fileSystemPtr_->GetFileSize(ss.str());
     if (fileSize >= FILE_MAX_SIZE) {
-        throw runtime_error(StringFormat("Error: Load keys failed. file {} size {}  is too big.", ss.str(), fileSize));
+        throw runtime_error(StringFormat("Error: Load keys failed. "
+                                         "file %s size %d is too big.", ss.str().c_str(), fileSize));
     }
 
     int64_t* buf = static_cast<int64_t*>(malloc(fileSize));
     if (buf == nullptr) {
         throw runtime_error(StringFormat("Error: Load keys failed. "
-                                         "failed to allocate {} bytes using malloc.", fileSize));
+                                         "failed to allocate %d bytes using malloc.", fileSize));
     }
 
     ssize_t res = fileSystemPtr_->Read(ss.str(), reinterpret_cast<char *>(buf), fileSize);
     if (res == -1) {
         throw runtime_error(StringFormat("Error: Load keys failed. "
-                                         "An error occurred while reading file: {}.", ss.str()));
+                                         "An error occurred while reading file: %s.", ss.str().c_str()));
     }
     if (res != fileSize) {
-        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read {} bytes, "
-                                         "but actually read {} bytes to file {}.", fileSize, res, ss.str()));
+        throw runtime_error(StringFormat("Error: Load keys failed. Expected to read %d bytes, "
+                                         "but actually read %d bytes to file %s.", fileSize, res, ss.str().c_str()));
     }
 
     size_t loadKeySize = fileSize / sizeof(int64_t);
@@ -154,7 +155,7 @@ void EmbeddingStatic::LoadKey(const string& savePath)
 
     if (loadOffset.size() > devVocabSize) {
         free(static_cast<void*>(buf));
-        throw runtime_error(StringFormat("Error: Load keys failed. Load key size :{} exceeds device vocab size: {}.",
+        throw runtime_error(StringFormat("Error: Load keys failed. Load key size :%d exceeds device vocab size: %d.",
                                          loadOffset.size(), devVocabSize));
     }
 
diff --git a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
index 3cbf4a44..45c50f6f 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
+++ b/src/core/file_system/hdfs_file_system/hdfs_file_system.cpp
@@ -53,7 +53,7 @@ size_t HdfsFileSystem::GetFileSize(const string& filePath)
 {
     hdfsFileInfo* fileInfo = hdfs->GetPathInfo(fs, filePath.c_str());
     if (fileInfo == nullptr) {
-        throw runtime_error(StringFormat("Error: Unable to get hdfs file info : {}.", filePath.c_str()));
+        throw runtime_error(StringFormat("Error: Unable to get hdfs file info : %s.", filePath.c_str()));
     }
     auto fileSize = static_cast<size_t>(fileInfo->mSize);
     return fileSize;
@@ -63,7 +63,7 @@ ssize_t HdfsFileSystem::Write(const string& filePath, const char* fileContent, s
 {
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
     if (!file) {
-        throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
+        throw runtime_error(StringFormat("Error: Unable to open hdfs file : %s.", filePath.c_str()));
     }
 
     tSize writeBytesNum = 0;
@@ -82,13 +82,13 @@ ssize_t HdfsFileSystem::Write(const string& filePath, vector<vector<float>>& fil
 {
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
     if (!file) {
-        throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
+        throw runtime_error(StringFormat("Error: Unable to open hdfs file : %s.", filePath.c_str()));
     }
 
     tSize writeBytesNum = 0;
     size_t loops = fileContent.size();
     for (size_t i = 0; i < loops; i++) {
-        tSize res = hdfs->Write(fs, file, reinterpret_cast<const char *>(&fileContent[i]), dataSize * sizeof(float));
+        tSize res = hdfs->Write(fs, file, fileContent[i].data(), dataSize * sizeof(float));
         if (res == -1) {
             hdfs->CloseFile(fs, file);
             return static_cast<ssize_t>(res);
@@ -110,7 +110,7 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
 {
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_WRONLY | O_CREAT, 0, 0, 0);
     if (!file) {
-        throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
+        throw runtime_error(StringFormat("Error: Unable to open hdfs file : %s.", filePath.c_str()));
     }
 
 #ifndef GTEST
@@ -136,13 +136,13 @@ void HdfsFileSystem::WriteEmbedding(const string& filePath, const int& embedding
         tSize res = hdfs->Write(fs, file, row.data(), embeddingSize * sizeof(float));
         if (res == -1) {
             hdfs->CloseFile(fs, file);
-            throw runtime_error(StringFormat("Error: An error occurred while writing file: {}.", filePath.c_str()));
+            throw runtime_error(StringFormat("Error: An error occurred while writing file: %s.", filePath.c_str()));
         }
 
         if (res != embeddingSize * sizeof(float)) {
             hdfs->CloseFile(fs, file);
-            throw runtime_error(StringFormat("Error: Expected to write {} bytes, "
-                                             "but actually write {} bytes to file {}.",
+            throw runtime_error(StringFormat("Error: Expected to write %d bytes, "
+                                             "but actually write %d bytes to file %s.",
                                              embeddingSize * sizeof(float), res, filePath.c_str()));
         }
     }
@@ -154,10 +154,11 @@ ssize_t HdfsFileSystem::Read(const string& filePath, char* fileContent, size_t d
 {
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
-        throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
+        throw runtime_error(StringFormat("Error: Unable to open hdfs file : %s.", filePath.c_str()));
     }
 
     tSize readBytesNum = 0;
+    LOG_INFO("Start to read file : {}", filePath);
     tSize res = hdfs->Read(fs, file, fileContent, datasetSize);
     if (res == -1) {
         hdfs->CloseFile(fs, file);
@@ -174,7 +175,7 @@ ssize_t HdfsFileSystem::Read(const string& filePath, vector<vector<float>>& file
 {
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
-        throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
+        throw runtime_error(StringFormat("Error: Unable to open hdfs file : %s.", filePath.c_str()));
     }
 
     ssize_t readBytesNum = 0;
@@ -208,7 +209,7 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
 #ifndef GTEST
     hdfsFile file = hdfs->OpenFile(fs, filePath.c_str(), O_RDONLY, 0, 0, 0);
     if (!file) {
-        throw runtime_error(StringFormat("Error: Unable to open hdfs file : {}.", filePath.c_str()));
+        throw runtime_error(StringFormat("Error: Unable to open hdfs file : %s.", filePath.c_str()));
     }
 
     auto res = aclrtSetDevice(static_cast<int32_t>(deviceId));
@@ -223,19 +224,19 @@ void HdfsFileSystem::ReadEmbedding(const string& filePath, EmbeddingSizeInfo& em
         int seekRes = hdfs->Seek(fs, file, offset * embedSizeInfo.embeddingSize * sizeof(float));
         if (seekRes == -1) {
             hdfs->CloseFile(fs, file);
-            throw runtime_error(StringFormat("Error: hdfsSeek failed with error. file offset: {}",
+            throw runtime_error(StringFormat("Error: hdfsSeek failed with error. file offset: %d",
                                              offset * embedSizeInfo.embeddingSize * sizeof(float)));
         }
 
         tSize res = hdfs->Read(fs, file, row.data(), embedSizeInfo.embeddingSize * sizeof(float));
         if (res == -1) {
             hdfs->CloseFile(fs, file);
-            throw runtime_error(StringFormat("Error: An error occurred while reading file: {}.", filePath.c_str()));
+            throw runtime_error(StringFormat("Error: An error occurred while reading file: %s.", filePath.c_str()));
         }
         if (res != embedSizeInfo.embeddingSize * sizeof(float)) {
             hdfs->CloseFile(fs, file);
-            throw runtime_error(StringFormat("Error: Expected to read {} bytes, "
-                                             "but actually read {} bytes from file {}.",
+            throw runtime_error(StringFormat("Error: Expected to read %d bytes, "
+                                             "but actually read %d bytes from file %s.",
                                              embedSizeInfo.embeddingSize * sizeof(float), res, filePath.c_str()));
         }
 
diff --git a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
index 6b9fe19c..b00913ff 100644
--- a/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
+++ b/src/core/file_system/hdfs_file_system/hdfs_wrapper.h
@@ -140,11 +140,10 @@ namespace MxRec {
                 throw runtime_error("Failed to obtain the pointer of the function hdfsRead from the libhdfs.");
             }
 
-            tSize reTryCount = 0;
             tSize unReadLength = length;
             tSize readBytes = 0;
 
-            while (unReadLength != 0 && reTryCount < RETRY_COUNT) {
+            while (unReadLength != 0) {
                 tSize offset = (length - unReadLength) / sizeof(char);
                 tSize res = hdfsRead(fs, file, buffer + offset, unReadLength);
                 if (res == -1) {
@@ -152,7 +151,6 @@ namespace MxRec {
                 }
                 unReadLength -= res;
                 readBytes += res;
-                reTryCount++;
             }
             return readBytes;
         }
@@ -163,11 +161,10 @@ namespace MxRec {
                 throw runtime_error("Failed to obtain the pointer of the function hdfsRead from the libhdfs.");
             }
 
-            tSize reTryCount = 0;
             tSize unReadLength = length;
             tSize readBytes = 0;
 
-            while (unReadLength != 0 && reTryCount < RETRY_COUNT) {
+            while (unReadLength != 0) {
                 tSize offset = (length - unReadLength) / sizeof(float);
                 tSize res = hdfsRead(fs, file, buffer + offset, unReadLength);
                 if (res == -1) {
@@ -175,7 +172,6 @@ namespace MxRec {
                 }
                 unReadLength -= res;
                 readBytes += res;
-                reTryCount++;
             }
             return readBytes;
         }
@@ -185,11 +181,10 @@ namespace MxRec {
             if (hdfsWrite == nullptr) {
                 throw runtime_error("Failed to obtain the pointer of the function hdfsWrite from the libhdfs.");
             }
-            tSize reTryCount = 0;
             tSize unWriteLength = length;
             tSize writeBytes = 0;
 
-            while (unWriteLength != 0 && reTryCount < RETRY_COUNT) {
+            while (unWriteLength != 0) {
                 tSize offset = (length - unWriteLength) / sizeof(char);
                 tSize res = hdfsWrite(fs, file, buffer + offset, unWriteLength);
                 if (res == -1) {
@@ -197,7 +192,6 @@ namespace MxRec {
                 }
                 unWriteLength -= res;
                 writeBytes += res;
-                reTryCount++;
             }
             return writeBytes;
         }
@@ -207,11 +201,10 @@ namespace MxRec {
             if (hdfsWrite == nullptr) {
                 throw runtime_error("Failed to obtain the pointer of the function hdfsWrite from the libhdfs.");
             }
-            tSize reTryCount = 0;
             tSize unWriteLength = length;
             tSize writeBytes = 0;
 
-            while (unWriteLength != 0 && reTryCount < RETRY_COUNT) {
+            while (unWriteLength != 0) {
                 tSize offset = (length - unWriteLength) / sizeof(float);
                 tSize res = hdfsWrite(fs, file, buffer + offset, unWriteLength);
                 if (res == -1) {
@@ -219,7 +212,6 @@ namespace MxRec {
                 }
                 unWriteLength -= res;
                 writeBytes += res;
-                reTryCount++;
             }
             return writeBytes;
         }
diff --git a/src/core/file_system/local_file_system/local_file_system.cpp b/src/core/file_system/local_file_system/local_file_system.cpp
index 6215d2ac..e9ddb8a4 100644
--- a/src/core/file_system/local_file_system/local_file_system.cpp
+++ b/src/core/file_system/local_file_system/local_file_system.cpp
@@ -38,13 +38,13 @@ void LocalFileSystem::CreateDir(const string& dirName)
     while (getline(input, tmp, '/')) {
         guard++;
         if (guard > maxDepth) {
-            throw runtime_error(StringFormat("create directory {} exceed max depth", dirName.c_str()));
+            throw runtime_error(StringFormat("create directory %s exceed max depth", dirName.c_str()));
         }
         ss << tmp << '/';
         int ret = mkdir(ss.str().c_str(), dirMode);
         if (ret != 0 && errno != EEXIST) {
             LOG_ERROR("Unable to create directory: {} ret:{} error info: {}", dirName, ret, strerror(errno));
-            throw runtime_error(StringFormat("create directory {} failed: {}", dirName.c_str(), strerror(errno)));
+            throw runtime_error(StringFormat("create directory %s failed: %s", dirName.c_str(), strerror(errno)));
         }
     }
 }
diff --git a/src/core/ssd_engine/table.cpp b/src/core/ssd_engine/table.cpp
index 592cce0e..9e48b0ef 100644
--- a/src/core/ssd_engine/table.cpp
+++ b/src/core/ssd_engine/table.cpp
@@ -137,7 +137,7 @@ void Table::Save(int step)
             SetTablePathToDiskWithSpace();
         } catch (runtime_error &e) {
             metaFile.close();
-            throw runtime_error(StringFormat("set table path to disk with space error:{}", e.what()));
+            throw runtime_error(StringFormat("set table path to disk with space error:%s", e.what()));
         }
         try {
             CreateTableDir(curTablePath);
@@ -258,7 +258,7 @@ void Table::Load(const string &metaFilePath, int step)
         LoadDataFileSet(metaFile, step);
     } catch (exception &e) {
         metaFile->close();
-        throw runtime_error(StringFormat("load data file set error:{}", e.what()));
+        throw runtime_error(StringFormat("load data file set error: %s", e.what()));
     }
     metaFile->close();
     if (metaFile->fail()) {
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 4fdb7c8d..0013f27e 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -81,9 +81,6 @@ namespace MxRec {
     constexpr int GLOG_TIME_WIDTH_6 = 6;
     constexpr char GLOG_STAT_FLAG[] = "statOn";
 
-    // for file system
-    constexpr int RETRY_COUNT = 100;
-
     // unique related config
     constexpr int UNIQUE_BUCKET = 6;
     constexpr int MIN_UNIQUE_THREAD_NUM = 1;
-- 
Gitee


From 425454bcfb3fbb0ab57d2566833c5ab0ba80aacf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 13 Jun 2024 04:12:44 +0000
Subject: [PATCH 214/302] =?UTF-8?q?!182=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=EF=BC=8Cbug=20fix=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=EF=BC=8Cbug=20fix=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=EF=BC=8Cbug=20fix=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=EF=BC=8Cbug=20fix=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=EF=BC=8Cbug=20fix=20*=20=E3=80=90=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E5=AF=B9=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=EF=BC=8Cbug=20fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/embedding.py                      | 10 +++----
 mx_rec/validator/emb_validator.py             |  4 +--
 src/AccCTR/src/embedding_cache/common.h       |  1 +
 .../offset_mapper/mapper_base.h               | 28 +++++++++++++++++--
 4 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index 8c12eb4c..eaf0c759 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -32,6 +32,7 @@ from mx_rec.constants.constants import (MAX_INT32, All2allGradientsOp, MAX_VOCAB
                                         CacheModeEnum, DEFAULT_DEVICE_CACHE_MEMORY_SIZE, DEFAULT_HOST_CACHE_MEMORY_SIZE,
                                         DEFAULT_SSD_CACHE_MEMORY_SIZE)
 from mx_rec.graph.constants import AnchorIteratorOp
+from mx_rec.util.communication.hccl_ops import get_rank_size
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.validator.validator import ClassValidator, StringValidator, SSDFeatureValidator, \
     para_checker_decorator, IntValidator, NumValidator, OptionValidator, OptionalIntValidator, \
@@ -233,12 +234,11 @@ def check_and_set_default_voc_size(voc_size_list: List[int], dim_bytes: int):
     if cache_mode == CacheModeEnum.DDR.value and voc_size_list[2] > 0:
         raise ValueError("cache mode DDR, ssd-voc is need to be none")
     if voc_size_list[0] == 1:
-        default_device_voc_size = int(DEFAULT_DEVICE_CACHE_MEMORY_SIZE / dim_bytes)
-        voc_size_list[0] = default_device_voc_size if default_device_voc_size < MAX_VOCABULARY_SIZE \
-            else MAX_VOCABULARY_SIZE
+        default_device_voc_size = int(DEFAULT_DEVICE_CACHE_MEMORY_SIZE / dim_bytes * get_rank_size())  # single rank 2GB
+        voc_size_list[0] = min(default_device_voc_size, MAX_DEVICE_VOCABULARY_SIZE)
     if (cache_mode == CacheModeEnum.DDR.value or cache_mode == CacheModeEnum.SSD.value) and voc_size_list[1] == 0:
-        default_host_voc_size = int(DEFAULT_HOST_CACHE_MEMORY_SIZE / dim_bytes)
-        voc_size_list[1] = default_host_voc_size if default_host_voc_size < MAX_VOCABULARY_SIZE else MAX_VOCABULARY_SIZE
+        default_host_voc_size = int(DEFAULT_HOST_CACHE_MEMORY_SIZE / dim_bytes)  # total 40GB
+        voc_size_list[1] = min(default_host_voc_size, MAX_VOCABULARY_SIZE)
     if cache_mode == CacheModeEnum.SSD.value and voc_size_list[2] == 0:
         voc_size_list[2] = DEFAULT_SSD_CACHE_MEMORY_SIZE
     return
diff --git a/mx_rec/validator/emb_validator.py b/mx_rec/validator/emb_validator.py
index c9d18f05..0c7d7e81 100644
--- a/mx_rec/validator/emb_validator.py
+++ b/mx_rec/validator/emb_validator.py
@@ -57,8 +57,8 @@ def check_emb_lookup_params(table_params: dict, feature_spec: Union[tf.Tensor, F
     slice_device_vocabulary_size = table_params.get("slice_device_vocabulary_size")
     slice_host_vocabulary_size = table_params.get("slice_host_vocabulary_size")
     table_name = table_params.get("table_name")
-    if slice_host_vocabulary_size + slice_device_vocabulary_size > MAX_VOCABULARY_SIZE:
-        raise ValueError(f"Given device_vocabulary_size and host_vocabulary_size was too big for table "
+    if slice_host_vocabulary_size > MAX_VOCABULARY_SIZE:
+        raise ValueError(f"given host_vocabulary_size was too big for table "
                          f"'{table_name}', in which slice_device_vocabulary_size was "
                          f"{slice_device_vocabulary_size} and slice_host_vocabulary_size was "
                          f"{slice_host_vocabulary_size}.")
diff --git a/src/AccCTR/src/embedding_cache/common.h b/src/AccCTR/src/embedding_cache/common.h
index 72433332..d9841541 100644
--- a/src/AccCTR/src/embedding_cache/common.h
+++ b/src/AccCTR/src/embedding_cache/common.h
@@ -61,5 +61,6 @@ constexpr float CONSTANT_VALUE_MIN = -1e9;
 constexpr float INIT_K_MAX = 10000;
 constexpr float INIT_K_MIN = -10000;
 const int INVALID_EMB_SIZE = -1;
+const size_t MEMSET_S_MAX_SIZE = 2LL * 1024 * 1024 * 1024 - 1;
 }
 #endif // MXREC_COMMON_H
diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
index 969845ee..164daaab 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
@@ -313,10 +313,13 @@ public:
             }
 
             /* make physical page and set to zero */
-            auto ret = memset_s(tmp, sizeof(NetHashBucket) * bucketCount, 0, sizeof(NetHashBucket) * bucketCount);
+            auto ret = SafeMemset(tmp, 0, sizeof(NetHashBucket) * bucketCount);
             if (ret != 0) {
-                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR,
-                    "memset_s failed... size: " + std::to_string(sizeof(NetHashBucket) * bucketCount));
+                delete[] tmp;
+                tmp = nullptr;
+                FreeSubMaps();
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "memset_s failed... size: " +
+                std::to_string(sizeof(NetHashBucket) * bucketCount) + ", error code:" + std::to_string(ret));
                 return false;
             }
 
@@ -693,6 +696,25 @@ private:
         }
     }
 
+    /*
+     * Description: SECUREC_MEM_MAX_LEN of memset_s function is 2GB
+     * Parameter: dest - destination address
+     * Parameter: c - the value to be copied
+     * Parameter: count - copies count bytes of value to dest
+     */
+    int SafeMemset(void* dest, int c, size_t count)
+    {
+        char* destBytePtr = reinterpret_cast<char*>(dest);
+        for (size_t i = 0; i < count; i += MEMSET_S_MAX_SIZE) {
+            size_t bytesOnceSet = (i + MEMSET_S_MAX_SIZE <= count) ? MEMSET_S_MAX_SIZE : (count - i);
+            auto ret = memset_s(destBytePtr + i, bytesOnceSet, c, bytesOnceSet);
+            if (ret != 0) {
+                return ret;
+            }
+        }
+        return 0;
+    }
+
     void FreeOverFlowedEntries()
     {
         for (auto &mSubMap : mSubMaps) {
-- 
Gitee


From 7b0cfa94f50de57e7c58299797e3e6f9955680fd Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Thu, 13 Jun 2024 03:42:15 +0000
Subject: [PATCH 215/302] cleancode

---
 src/core/l3_storage/cache_manager.cpp |  4 ++--
 src/core/l3_storage/l3_storage.cpp    |  2 ++
 src/core/l3_storage/l3_storage.h      | 27 +++++++++++++--------------
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/core/l3_storage/cache_manager.cpp b/src/core/l3_storage/cache_manager.cpp
index 188f2aaf..75d73b2d 100644
--- a/src/core/l3_storage/cache_manager.cpp
+++ b/src/core/l3_storage/cache_manager.cpp
@@ -108,12 +108,12 @@ void CacheManager::CreateL3StorageTableIfNotExist(const std::string& embTableNam
         l3Storage->CreateTable(embTableName, embBaseInfos[embTableName].savePath,
                                embBaseInfos[embTableName].maxTableSize);
         embBaseInfos[embTableName].isExist = true;
-        LOG_INFO("create l3Storage table end, embTableName:" + embTableName);
+        LOG_INFO("create l3Storage table end, embTableName:{}", embTableName);
         return;
     }
     // 续训场景：embBaseInfos 没有保存，不会初始化；L3Storage表会初始化，此时表已存在
     embBaseInfos[embTableName].isExist = true;
-    LOG_INFO("l3Storage table is exist, embTableName:" + embTableName);
+    LOG_INFO("l3Storage table is exist, embTableName:{}", embTableName);
 }
 
 CacheManager::~CacheManager()
diff --git a/src/core/l3_storage/l3_storage.cpp b/src/core/l3_storage/l3_storage.cpp
index 6a3ea668..cc26d8a4 100644
--- a/src/core/l3_storage/l3_storage.cpp
+++ b/src/core/l3_storage/l3_storage.cpp
@@ -17,6 +17,8 @@ See the License for the specific language governing permissions and
 
 using MxRec::L3Storage;
 using MxRec::emb_cache_key_t;
+using std::vector;
+using std::string;
 
 L3Storage::L3Storage() {}
 
diff --git a/src/core/l3_storage/l3_storage.h b/src/core/l3_storage/l3_storage.h
index 6462409f..5f7270c1 100644
--- a/src/core/l3_storage/l3_storage.h
+++ b/src/core/l3_storage/l3_storage.h
@@ -21,9 +21,6 @@ See the License for the specific language governing permissions and
 
 #include "utils/common.h"
 
-using std::string;
-using std::vector;
-
 namespace MxRec {
 
 class L3Storage {
@@ -31,32 +28,34 @@ public:
     L3Storage();
     virtual ~L3Storage();
 
-    virtual bool IsTableExist(const string& tableName);
+    virtual bool IsTableExist(const std::string& tableName);
 
-    virtual bool IsKeyExist(const string& tableName, emb_cache_key_t key);
+    virtual bool IsKeyExist(const std::string& tableName, emb_cache_key_t key);
 
-    virtual void CreateTable(const string& tableName, vector<string> savePaths, uint64_t maxTableSize);
+    virtual void CreateTable(const std::string& tableName, std::vector<std::string> savePaths, uint64_t maxTableSize);
 
-    virtual int64_t GetTableAvailableSpace(const string& tableName);
+    virtual int64_t GetTableAvailableSpace(const std::string& tableName);
 
-    virtual void InsertEmbeddingsByAddr(const string& tableName, vector<emb_cache_key_t>& keys,
-                                        vector<float*>& embeddingsAddr, uint64_t extEmbeddingSize);
+    virtual void InsertEmbeddingsByAddr(const std::string& tableName, std::vector<emb_cache_key_t>& keys,
+                                        std::vector<float*>& embeddingsAddr, uint64_t extEmbeddingSize);
 
-    virtual void DeleteEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys);
+    virtual void DeleteEmbeddings(const std::string& tableName, std::vector<emb_cache_key_t>& keys);
 
-    virtual vector<vector<float>> FetchEmbeddings(const string& tableName, vector<emb_cache_key_t>& keys);
+    virtual std::vector<std::vector<float>> FetchEmbeddings(const std::string& tableName,
+                                                            std::vector<emb_cache_key_t>& keys);
 
     virtual void Save(int step);
 
-    virtual void Load(const string& tableName, vector<string> savePaths, uint64_t maxTableSize, int step);
+    virtual void Load(const std::string& tableName, std::vector<std::string> savePaths, uint64_t maxTableSize,
+                      int step);
 
     virtual void Start();
 
     virtual void Stop();
 
-    virtual int64_t GetTableUsage(const string& tableName);
+    virtual int64_t GetTableUsage(const std::string& tableName);
 
-    virtual vector<std::pair<string, vector<emb_cache_key_t>>> ExportTableKey();
+    virtual std::vector<std::pair<std::string, std::vector<emb_cache_key_t>>> ExportTableKey();
 };
 }  // namespace MxRec
 #endif  // MX_REC_L3_STORAGE_H
\ No newline at end of file
-- 
Gitee


From df4494d014cb117a6e31cdbfd5e185984df65a59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Fri, 14 Jun 2024 16:18:42 +0800
Subject: [PATCH 216/302] =?UTF-8?q?=E9=97=AE=E9=A2=98=E5=8D=95=E5=8F=B7?=
 =?UTF-8?q?=E3=80=90DTS2024061404223=E3=80=91=EF=BC=9AWideDeep=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=E7=B2=BE=E5=BA=A6=E5=8A=A3=E5=8C=96=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/optimizers/lazy_adam.py         | 2 +-
 mx_rec/optimizers/lazy_adam_by_addr.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mx_rec/optimizers/lazy_adam.py b/mx_rec/optimizers/lazy_adam.py
index 0684a715..ac88afc9 100644
--- a/mx_rec/optimizers/lazy_adam.py
+++ b/mx_rec/optimizers/lazy_adam.py
@@ -170,7 +170,7 @@ class CustomizedLazyAdam(adam.AdamOptimizer, CustomizedOptimizer):
         v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(grad)
         v_update_op = scatter_nd_add(velocity, nd_indices, v_t_slice - old_v_slice)
 
-        denominator_slice = math_ops.sqrt(v_t_slice + temp_epsilon)
+        denominator_slice = math_ops.sqrt(tf.abs(v_t_slice)) + temp_epsilon
         var_update_op = scatter_nd_add(var, nd_indices, tf.divide(-learning_rate * m_t_slice, denominator_slice))
         return control_flow_ops.group(m_update_op, v_update_op, var_update_op)
 
diff --git a/mx_rec/optimizers/lazy_adam_by_addr.py b/mx_rec/optimizers/lazy_adam_by_addr.py
index b7887052..1d5aacd2 100644
--- a/mx_rec/optimizers/lazy_adam_by_addr.py
+++ b/mx_rec/optimizers/lazy_adam_by_addr.py
@@ -136,7 +136,7 @@ class CustomizedLazyAdamByAddress(adam.AdamOptimizer, CustomizedOptimizer):
         old_v_slice = split_tensors[2]
         v_t_slice = temp_b2 * old_v_slice + (1 - temp_b2) * math_ops.square(grad)
 
-        denominator_slice = math_ops.sqrt(v_t_slice + temp_epsilon)
+        denominator_slice = math_ops.sqrt(tf.abs(v_t_slice)) + temp_epsilon
         update_list = [tf.divide(-learning_rate * m_t_slice, denominator_slice)] + [m_t_slice - old_m_slice] + \
                       [v_t_slice - old_v_slice]
         update_tensor = tf.concat(update_list, axis=1)
-- 
Gitee


From 7f0bd73f37d23788ca5b45aec25238feb7c0fc70 Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Mon, 17 Jun 2024 07:44:31 +0000
Subject: [PATCH 217/302] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=EF=BC=88embCache?=
 =?UTF-8?q?=EF=BC=89=EF=BC=9Aeval=20swapInPos=E4=B8=8Eh2dEmb=E4=B8=8D?=
 =?UTF-8?q?=E5=8C=B9=E9=85=8D=E5=AF=BC=E8=87=B4=E7=9A=84=E8=B6=8A=E7=95=8C?=
 =?UTF-8?q?=E6=88=96=E7=B2=BE=E5=BA=A6=E5=BC=82=E5=B8=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 895715c9..409a0e92 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -709,19 +709,19 @@ void HybridMgmt::ProcessEmbInfoDDR(const EmbBaseInfo& info, bool& remainBatchOut
 
     SendGlobalUniqueVec(info, uniqueKeys, restoreVecSec);
 
+    TimeCost swapProcessTC;
+    auto &swapInPos = swapInKoPair.second;
+    auto &swapOutPos = swapOutKoPair.second;
+    auto lastSwapInPos = lastSwapInPosMap[info.name];
+    lastSwapInPosMap[info.name] = swapInPos; // 暂存待下一步发送
+
     auto isNeedReturn = HandleSpecialProcessStatusDDR(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
     if (isNeedReturn) {
         return;
     }
 
-    TimeCost swapProcessTC;
     EnqueueSwapInfo(info, swapInKoPair, swapOutKoPair);
 
-    auto &swapInPos = swapInKoPair.second;
-    auto &swapOutPos = swapOutKoPair.second;
-    auto lastSwapInPos = lastSwapInPosMap[info.name];
-    lastSwapInPosMap[info.name] = swapInPos; // 暂存待下一步发送
-
     // 下发swaptensor
     if (info.batchId != 0) {
         SendTensorForSwap(info, lastSwapInPos, swapOutPos);
@@ -1217,22 +1217,21 @@ void HybridMgmt::ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBa
 
     SendGlobalUniqueVec(info, uniqueKeys, restoreVecSec);
 
-    auto isNeedReturn = HandleSpecialProcessStatusL3Storage(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
-    if (isNeedReturn) {
-        return;
-    }
-
     TimeCost swapProcessTC;
     auto &swapInKeys = swapInKoPair.first;
     auto &swapInPos = swapInKoPair.second;
     auto &swapOutKeys = swapOutKoPair.first;
     auto &swapOutPos = swapOutKoPair.second;
-
-    HandleDataSwapForL3Storage(info, swapInKeys, swapOutKeys);
-
     auto lastSwapInPos = lastSwapInPosMap[info.name];
     lastSwapInPosMap[info.name] = swapInPos; // 暂存待下一步发送
 
+    auto isNeedReturn = HandleSpecialProcessStatusL3Storage(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
+    if (isNeedReturn) {
+        return;
+    }
+
+    HandleDataSwapForL3Storage(info, swapInKeys, swapOutKeys);
+
     // 下发swaptensor
     if (info.batchId != 0) {
         SendTensorForSwap(info, lastSwapInPos, swapOutPos);
-- 
Gitee


From 6804a8e59a945573e0c9901481eae63b9d405b6c Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Mon, 17 Jun 2024 04:32:14 +0000
Subject: [PATCH 218/302] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=EF=BC=88=E4=BF=9D?=
 =?UTF-8?q?=E5=AD=98=EF=BC=89=EF=BC=9Aestimator=E4=BF=9D=E5=AD=98=E6=AD=A5?=
 =?UTF-8?q?=E6=95=B0=E6=AD=A3=E5=88=99=E5=8C=B9=E9=85=8D=E6=97=A0=E6=95=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py                | 9 ++++++---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 2 +-
 src/core/utils/common.h              | 1 +
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index f7c6b9a2..9e0e1d29 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -36,6 +36,9 @@ from mx_rec.optimizers.base import CustomizedOptimizer
 from mx_rec.util.tf_version_adapter import npu_ops
 
 
+SAVE_SPARSE_PATH_PREFIX = "sparse"
+
+
 # define save model thread
 class SaveModelThread(threading.Thread):
     def __init__(self, saver, sess, result, root_dir, table_name):
@@ -128,9 +131,9 @@ class Saver(object):
         if global_step:
             if not isinstance(global_step, compat.integral_types):
                 global_step = int(sess.run(global_step))
-            ckpt_name = f"sparse-{base_name}-{global_step}"
+            ckpt_name = f"{SAVE_SPARSE_PATH_PREFIX}-{base_name}-{global_step}"
         else:
-            ckpt_name = f"sparse-{base_name}"
+            ckpt_name = f"{SAVE_SPARSE_PATH_PREFIX}-{base_name}"
 
         saving_path = os.path.join(directory, ckpt_name)
         self.config_instance.train_params_config.sparse_dir = saving_path
@@ -185,7 +188,7 @@ class Saver(object):
                              "only local file system and hdfs file system supported. ")
 
         directory, base_name = os.path.split(reading_path)
-        ckpt_name = f"sparse-{base_name}"
+        ckpt_name = f"{SAVE_SPARSE_PATH_PREFIX}-{base_name}"
 
         reading_path = os.path.join(directory, ckpt_name)
         if not tf.io.gfile.exists(reading_path):
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 895715c9..c3b9cbec 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -821,7 +821,7 @@ void HybridMgmt::EvictL3StorageKeys(const string& embName, const vector<emb_cach
 
 int HybridMgmt::GetStepFromPath(const string& loadPath) const
 {
-    regex pattern("sparse-model-(\\d+)");
+    regex pattern(SAVE_SPARSE_PATH_PREFIX + "-.*-(\\d+)");
     smatch match;
     if (regex_search(loadPath, match, pattern)) {
         int res = 0;
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 0013f27e..26aad3fe 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -110,6 +110,7 @@ namespace MxRec {
     constexpr float HOT_EMB_CACHE_PCT = static_cast<float>(1. / 3);  // hot emb cache percent
 
     const string COMBINE_HISTORY_NAME = "combine_table_history";
+    const string SAVE_SPARSE_PATH_PREFIX = "sparse";
 
     using emb_key_t = int64_t;
     using emb_cache_key_t = uint64_t;
-- 
Gitee


From 508b62839c5c065e4e5afb5f3bbb447231f3ec4b Mon Sep 17 00:00:00 2001
From: liangrenhao <francking@189.cn>
Date: Wed, 19 Jun 2024 11:31:41 +0800
Subject: [PATCH 219/302] add Infer Reference Cases

Signed-off-by: liangrenhao <francking@189.cn>
---
 examples/rec_infer/README.md                  | 134 +++++++++++++
 examples/rec_infer/client.py                  |  76 ++++++++
 examples/rec_infer/client.sh                  |   4 +
 examples/rec_infer/input_config.py            | 178 ++++++++++++++++++
 ...-Performance-optimization-referrence.patch |  72 +++++++
 examples/rec_infer/optimize/README.md         |  51 +++++
 examples/rec_infer/server.sh                  |   6 +
 tools/graph_partition/gen_config.py           |  54 ++++++
 tools/graph_partition/graph_partition.py      | 116 ++++++++++++
 tools/graph_partition/template.cfg            |  57 ++++++
 10 files changed, 748 insertions(+)
 create mode 100644 examples/rec_infer/README.md
 create mode 100644 examples/rec_infer/client.py
 create mode 100644 examples/rec_infer/client.sh
 create mode 100644 examples/rec_infer/input_config.py
 create mode 100644 examples/rec_infer/optimize/0001-Performance-optimization-referrence.patch
 create mode 100644 examples/rec_infer/optimize/README.md
 create mode 100644 examples/rec_infer/server.sh
 create mode 100644 tools/graph_partition/gen_config.py
 create mode 100644 tools/graph_partition/graph_partition.py
 create mode 100644 tools/graph_partition/template.cfg

diff --git a/examples/rec_infer/README.md b/examples/rec_infer/README.md
new file mode 100644
index 00000000..573ecafc
--- /dev/null
+++ b/examples/rec_infer/README.md
@@ -0,0 +1,134 @@
+# 推理环境部署
+一、安装依赖包：</p>
+安装开发套件包Ascend-cann-toolkit_{version}_linux-{arch}.run</p>
+安装框架插件包Ascend-cann-tfplugin_{version}_linux-{arch}.run</p>
+安装其他依赖包：</p>
+|依赖包   |  版本限制|
+|:---|:---:|
+|gcc,g++|8.4及以上版本|
+|zip,unzip,libtool,automake|无特定版本要求|
+|python|3.7.5|
+|TensorFlow| 1.15.0|
+|tensorflow-serving-api|1.15.0|
+|future|无特定版本要求|
+|bazel|0.24.1|
+|camake|3.14.0|
+|swig|若操作系统为"aarch64"，软件安装版本需大于或等于3.0.12。若操作系统架构为"X86_64"，软件安装版本需大于或等于4.0.1|
+|java|jdk-11|
+|||
+
+二、编译serving
+1. 下载TF-serving源码：https://github.com/tensorflow/serving/archive/1.15.0.zip
+2. 解压后进入源码目录
+3. 添加TF-serving第三方依赖
+
+a)执行如下命令，在“serving-1.15.0/third_party”目录下创建“tf_adapter”文件夹并进入。
+>cd third_party/<br>
+mkdir tf_adapter<br>
+cd tf_adapter<br>
+b)执行如下命令，在“tf_adapter”文件夹下拷贝存放“libpython3.7m.so.1.0”文件，并创建软链接。
+> cp /usr/local/python3.7.5/lib/libpython3.7m.so.1.0 .<br>
+ln -s libpython3.7m.so.1.0 libpython3.7m.so<br>
+
+c.执行如下命令，在“tf_adapter”文件夹下拷贝存放“_tf_adapter.so”文件，并将“_tf_adapter.so”文件名修改为“lib_tf_adapter.so”。
+>cp /home/HwHiAiUser/Ascend/tfplugin/latest/python/site-packages/npu_bridge/_tf_adapter.so .<br>
+mv _tf_adapter.so lib_tf_adapter.so<br>
+
+4. 编译空的libtensorflow_framework.so、_pywrap_tensorflow_internal.so文件.
+
+a. 在“tf_adapter”文件夹下，执行如下命令。
+>vim CMakeLists.txt<br>
+
+b. 写入如下内容保存。
+```text
+file(TOUCH ${CMAKE_CURRENT_BINARY_DIR}/stub.c)
+add_library(_pywrap_tensorflow_internal SHARED ${CMAKE_CURRENT_BINARY_DIR}/stub.c)
+add_library(tensorflow_framework SHARED ${CMAKE_CURRENT_BINARY_DIR}/stub.c)
+```
+
+c.执行:wq!命令保存文件并退出。
+d.执行如下命令，编译出空的.so文件。
+> mkdir temp<br>
+cd temp<br>
+cmake ..<br>
+make<br>
+mv lib_pywrap_tensorflow_internal.so ../_pywrap_tensorflow_internal.so<br>
+mv libtensorflow_framework.so ../libtensorflow_framework.so<br>
+cd ..<br>
+ln -s libtensorflow_framework.so libtensorflow_framework.so.1<br>
+
+e.配置环境命令。
+```text
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)<br>
+```
+
+5. 在“tf_adapter”文件夹下创建BUILD文件。 写入如下内容。
+```text
+licenses(["notice"])  # BSD/MIT.
+
+cc_import(
+    name = "tf_adapter",
+    shared_library = "lib_tf_adapter.so",
+    visibility = ["//visibility:public"]
+)
+
+cc_import(
+    name = "tf_python",
+    shared_library = "libpython3.7m.so",
+    visibility = ["//visibility:public"]
+)
+```
+
+6. 修改“serving-1.15.0/tensorflow_serving/model_servers/”路径下的BUILD文件，在“cc_binary”中添加如下加粗内容。
+
+>cc_binary(<br>
+name = "tensorflow_model_server",<br>
+&nbsp;&nbsp;&nbsp;&nbsp; stamp = 1,<br>
+&nbsp;&nbsp;&nbsp;&nbsp; visibility = [<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ":testing",<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; "//tensorflow_serving:internal",<br>
+&nbsp;&nbsp;&nbsp;&nbsp; ],<br>
+&nbsp;&nbsp;&nbsp;&nbsp; deps = [<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ":tensorflow_model_server_main_lib",<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; __"//third_party/tf_adapter:tf_adapter",__<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; __"//third_party/tf_adapter:tf_python",__<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; __"@org_tensorflow//tensorflow/compiler/jit:xla_cpu_jit",__<br>
+&nbsp;&nbsp;&nbsp;&nbsp; ],<br>
+)<br>
+
+7. TF Serving,在TF Serving安装目录“serving-1.15.0”下执行如下命令，编译TF Serving。
+
+> bazel --output_user_root=/opt/tf_serving build -c opt --distdir=../depends --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" tensorflow_serving/model_servers:tensorflow_model_server<br>
+如果编译过程中遇到依赖包下载失败问题，可手动下载，TF serving编译依赖包(https://www.hiascend.com/document/detail/zh/canncommercial/80RC1/developmentguide/moddevg/onlineinfer1/atlastfserv_26_0011.html)
+
+8. 建立软连接。
+> ln -s /opt/tf_serving/{tf_serving_ID}/execroot/tf_serving/bazel-out/xxx-opt/bin/tensorflow_serving/model_servers/tensorflow_model_server /usr/local/bin/tensorflow_model_server<br>
+
++ {tf_serving_ID}为一串如“063944eceea3e72745362a0b6eb12a3c”的无规则字符。请根据实际进行填写。
++ xxx-opt为工具自动生成文件夹，具体显示请以实际为准。
+
+# 脚本工具介绍
+server.sh/client.sh
+启动服务脚本/客户端请求服务器脚本
+
+1. 启动tf-serving server方法
+进入目录 tf_serving_inerence
+> 更改server.sh中模型路径model_base_path为导出的savedModel路径，<br>
+> 将编译tf_serving的第三方依赖tf_adapter路径加入环境变量,export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/xxx/xxx/serving-1.15.0/third_party/tf_adapter/,<br>
+> source /usr/local/Ascend/ascend-toolkit/set_env.sh<br>
+> sh server.sh<br>
+
+若日志中显示Running gRPC ModelServer at 0.0.0.0:xxxx则表示启动成功
+2.请求服务器方法
+执行脚本：sh client.sh
+推理成功会打印端到端时延
+
+# 使用切图工具
+1.进入目录：graph_patition,修改gen_config.py中的模型目录
+2.执行 python3 gen_config.py，使用生成的test1.cfg文件启动模型，使用方法如下：
+> python3 gen_config.py --output_path . --output_filename test1.cfg --model_path savedmodel_path<br>
++ 参数解释：output_path(输出路径),output_filename(输出文件名),model_path(输入模型路径)<br>
++ 得到输出文件后，替换服务启动脚本中--platform_config_file参数选项即可生效
+
+#性能优化
+1. 具体参考optimize目录下的文件
\ No newline at end of file
diff --git a/examples/rec_infer/client.py b/examples/rec_infer/client.py
new file mode 100644
index 00000000..62a15882
--- /dev/null
+++ b/examples/rec_infer/client.py
@@ -0,0 +1,76 @@
+import os
+import time
+
+import grpc
+import numpy as np
+
+import tensorflow as tf
+from input_config import config
+from tensorflow_serving.apis import predict_pb2, prediction_service_pb2_grpc
+
+
+class PredictModelGrpc():
+    def __init__(
+        self,
+        model_name,
+        inputs,
+        input_types,
+        output_name,
+        socket="xxx.xxx.xxx.xxx:8500",
+    ):
+        self.socket = socket
+        self.model_name = model_name
+        self.inputs = inputs
+        self.input_types = input_types
+        self.output_name = output_name
+        self.request, self.stub = self.__get_request()
+
+    def inference(self):
+        for name in self.inputs:
+            self.request.inputs[name].CopyFrom(
+                tf.make_tensor_proto(self.inputs[name], dtype=self.input_types[name])
+            )
+
+        for _ in range(100):
+            result = self.stub.Predict.future(self.request, 1000.0)
+            result.result()
+
+    def __get_request(self):
+        channel = grpc.insecure_channel(
+            self.socket,
+            options=[
+                ("grpc.max_send_message_length", 1024 * 1024 * 1024),
+                ("grpc.max_receive_message_length", 1024 * 1024 * 1024),
+            ],
+        )
+        stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+        request = predict_pb2.PredictRequest()
+        request.model_spec.name = self.model_name
+        request.model_spec.signature_name = "serving_default"
+
+        return request, stub
+
+
+def gen_inputs():
+    inputs = {}
+    input_types = {}
+    for name in config:
+        input_types[name] = config[name]["dtype"]
+        if config[name]["dtype"] == tf.int32:
+            inputs[name] = np.random.randint(0, 100, size=config[name]["shape"])
+        elif config[name]["dtype"] == tf.float32:
+            inputs[name] = np.random.randint(0, 2, size=config[name]["shape"]) * 1.0
+    return inputs, input_types
+
+
+if __name__ == "__main__":
+    input_datas, types = gen_inputs()
+    model = PredictModelGrpc(
+        model_name="saved_model",
+        inputs=input_datas,
+        input_types=types,
+        output_name="",
+        socket="127.0.0.1:9999",
+    )
+
+    model.inference()
diff --git a/examples/rec_infer/client.sh b/examples/rec_infer/client.sh
new file mode 100644
index 00000000..fa968858
--- /dev/null
+++ b/examples/rec_infer/client.sh
@@ -0,0 +1,4 @@
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+unset http_proxy
+unset https_proxy
+python3 client.py
\ No newline at end of file
diff --git a/examples/rec_infer/input_config.py b/examples/rec_infer/input_config.py
new file mode 100644
index 00000000..24e28f03
--- /dev/null
+++ b/examples/rec_infer/input_config.py
@@ -0,0 +1,178 @@
+import tensorflow as tf
+
+BATCH_SIZE = 9600
+config = {
+    "feat_0": {"dtype": tf.float32, "shape": [BATCH_SIZE, 40], "name": "feat_0"},
+    "feat_1": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_1"},
+    "feat_2": {"dtype": tf.float32, "shape": [BATCH_SIZE, 40], "name": "feat_2"},
+    "feat_3": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_3"},
+    "feat_4": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_4"},
+    "feat_5": {"dtype": tf.float32, "shape": [BATCH_SIZE, 32], "name": "feat_5"},
+    "feat_6": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_6"},
+    "feat_7": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_7"},
+    "feat_8": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_8"},
+    "feat_9": {"dtype": tf.int32, "shape": [BATCH_SIZE, 16], "name": "feat_9"},
+    "feat_10": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_10"},
+    "feat_11": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_11"},
+    "feat_12": {"dtype": tf.float32, "shape": [BATCH_SIZE, 480], "name": "feat_12"},
+    "feat_13": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_13"},
+    "feat_14": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_14"},
+    "feat_15": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_15"},
+    "feat_16": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_16"},
+    "feat_17": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_17"},
+    "feat_18": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_18"},
+    "feat_19": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_19"},
+    "feat_20": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_20"},
+    "feat_21": {"dtype": tf.float32, "shape": [BATCH_SIZE, 32], "name": "feat_21"},
+    "feat_22": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_22"},
+    "feat_23": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_23"},
+    "feat_24": {"dtype": tf.int32, "shape": [BATCH_SIZE, 10], "name": "feat_24"},
+    "feat_25": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_25"},
+    "feat_26": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_26"},
+    "feat_27": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_27"},
+    "feat_28": {"dtype": tf.int32, "shape": [BATCH_SIZE, 36], "name": "feat_28"},
+    "feat_29": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_29"},
+    "feat_30": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_30"},
+    "feat_31": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_31"},
+    "feat_32": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_32"},
+    "feat_33": {"dtype": tf.float32, "shape": [BATCH_SIZE, 256], "name": "feat_33"},
+    "feat_34": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_34"},
+    "feat_35": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_35"},
+    "feat_36": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_36"},
+    "feat_37": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_37"},
+    "feat_38": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_38"},
+    "feat_39": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_39"},
+    "feat_40": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_40"},
+    "feat_41": {"dtype": tf.float32, "shape": [BATCH_SIZE, 32], "name": "feat_41"},
+    "feat_42": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_42"},
+    "feat_43": {"dtype": tf.float32, "shape": [BATCH_SIZE, 40], "name": "feat_43"},
+    "feat_44": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_44"},
+    "feat_45": {"dtype": tf.int32, "shape": [BATCH_SIZE, 7], "name": "feat_45"},
+    "feat_46": {"dtype": tf.int32, "shape": [BATCH_SIZE, 4], "name": "feat_46"},
+    "feat_47": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_47"},
+    "feat_48": {"dtype": tf.int32, "shape": [BATCH_SIZE, 4], "name": "feat_48"},
+    "feat_49": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_49"},
+    "feat_50": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_50"},
+    "feat_51": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_51"},
+    "feat_52": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_52"},
+    "feat_53": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_53"},
+    "feat_54": {"dtype": tf.int32, "shape": [BATCH_SIZE, 100], "name": "feat_54"},
+    "feat_55": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_55"},
+    "feat_56": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_56"},
+    "feat_57": {"dtype": tf.float32, "shape": [BATCH_SIZE, 8], "name": "feat_57"},
+    "feat_58": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_58"},
+    "feat_59": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_59"},
+    "feat_60": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_60"},
+    "feat_61": {"dtype": tf.float32, "shape": [BATCH_SIZE, 8], "name": "feat_61"},
+    "feat_62": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_62"},
+    "feat_63": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_63"},
+    "feat_64": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_64"},
+    "feat_65": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_65"},
+    "feat_66": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_66"},
+    "feat_67": {"dtype": tf.float32, "shape": [BATCH_SIZE, 192], "name": "feat_67"},
+    "feat_68": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_68"},
+    "feat_69": {"dtype": tf.float32, "shape": [BATCH_SIZE, 8], "name": "feat_69"},
+    "feat_70": {"dtype": tf.float32, "shape": [BATCH_SIZE, 6, 32], "name": "feat_70"},
+    "feat_71": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_71"},
+    "feat_72": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_72"},
+    "feat_73": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_73"},
+    "feat_74": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_74"},
+    "feat_75": {"dtype": tf.int32, "shape": [BATCH_SIZE, 10], "name": "feat_75"},
+    "feat_76": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_76"},
+    "feat_77": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_77"},
+    "feat_78": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_78"},
+    "feat_79": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_79"},
+    "feat_80": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_80"},
+    "feat_81": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_81"},
+    "feat_82": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_82"},
+    "feat_83": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_83"},
+    "feat_84": {"dtype": tf.float32, "shape": [BATCH_SIZE, 32], "name": "feat_84"},
+    "feat_85": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_85"},
+    "feat_86": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_86"},
+    "feat_87": {"dtype": tf.float32, "shape": [BATCH_SIZE, 40], "name": "feat_87"},
+    "feat_88": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_88"},
+    "feat_89": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_89"},
+    "feat_90": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_90"},
+    "feat_91": {"dtype": tf.float32, "shape": [BATCH_SIZE, 40], "name": "feat_91"},
+    "feat_92": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_92"},
+    "feat_93": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_93"},
+    "feat_94": {"dtype": tf.int32, "shape": [BATCH_SIZE, 36], "name": "feat_94"},
+    "feat_95": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_95"},
+    "feat_96": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_96"},
+    "feat_97": {"dtype": tf.float32, "shape": [BATCH_SIZE, 320], "name": "feat_97"},
+    "feat_98": {"dtype": tf.float32, "shape": [BATCH_SIZE, 1], "name": "feat_98"},
+    "feat_99": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_99"},
+    "feat_100": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_100"},
+    "feat_101": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_101"},
+    "feat_102": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_102"},
+    "feat_103": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_103"},
+    "feat_104": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_104"},
+    "feat_105": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_105"},
+    "feat_106": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_106"},
+    "feat_107": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_107"},
+    "feat_108": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_108"},
+    "feat_109": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_109"},
+    "feat_110": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_110"},
+    "feat_111": {"dtype": tf.int32, "shape": [BATCH_SIZE, 36], "name": "feat_111"},
+    "feat_112": {"dtype": tf.int32, "shape": [BATCH_SIZE, 10], "name": "feat_112"},
+    "feat_113": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_113"},
+    "feat_114": {"dtype": tf.float32, "shape": [BATCH_SIZE, 8], "name": "feat_114"},
+    "feat_115": {"dtype": tf.float32, "shape": [BATCH_SIZE, 60], "name": "feat_115"},
+    "feat_116": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_116"},
+    "feat_117": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_117"},
+    "feat_118": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_118"},
+    "feat_119": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_119"},
+    "feat_120": {"dtype": tf.int32, "shape": [BATCH_SIZE, 13], "name": "feat_120"},
+    "feat_121": {"dtype": tf.int32, "shape": [BATCH_SIZE, 3], "name": "feat_121"},
+    "feat_122": {"dtype": tf.int32, "shape": [BATCH_SIZE, 9], "name": "feat_122"},
+    "feat_123": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_123"},
+    "feat_124": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_124"},
+    "feat_125": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_125"},
+    "feat_126": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_126"},
+    "feat_127": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_127"},
+    "feat_128": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_128"},
+    "feat_129": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_129"},
+    "feat_130": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_130"},
+    "feat_131": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_131"},
+    "feat_132": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_132"},
+    "feat_133": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_133"},
+    "feat_134": {"dtype": tf.int32, "shape": [BATCH_SIZE, 10], "name": "feat_134"},
+    "feat_135": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_135"},
+    "feat_136": {"dtype": tf.int32, "shape": [BATCH_SIZE, 33], "name": "feat_136"},
+    "feat_137": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_137"},
+    "feat_138": {"dtype": tf.int32, "shape": [BATCH_SIZE, 36], "name": "feat_138"},
+    "feat_139": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_139"},
+    "feat_140": {"dtype": tf.float32, "shape": [BATCH_SIZE, 40], "name": "feat_140"},
+    "feat_141": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_141"},
+    "feat_142": {"dtype": tf.int32, "shape": [BATCH_SIZE, 26], "name": "feat_142"},
+    "feat_143": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_143"},
+    "feat_144": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_144"},
+    "feat_145": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_145"},
+    "feat_146": {"dtype": tf.float32, "shape": [BATCH_SIZE, 8], "name": "feat_146"},
+    "feat_147": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_147"},
+    "feat_148": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_148"},
+    "feat_149": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_149"},
+    "feat_150": {"dtype": tf.float32, "shape": [BATCH_SIZE, 8], "name": "feat_150"},
+    "feat_151": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_151"},
+    "feat_152": {"dtype": tf.float32, "shape": [BATCH_SIZE, 7], "name": "feat_152"},
+    "feat_153": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_153"},
+    "feat_154": {"dtype": tf.float32, "shape": [BATCH_SIZE, 8], "name": "feat_154"},
+    "feat_155": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_155"},
+    "feat_156": {"dtype": tf.float32, "shape": [BATCH_SIZE, 8], "name": "feat_156"},
+    "feat_157": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_157"},
+    "feat_158": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_158"},
+    "feat_159": {"dtype": tf.int32, "shape": [BATCH_SIZE, 8], "name": "feat_159"},
+    "feat_160": {"dtype": tf.float32, "shape": [BATCH_SIZE, 40], "name": "feat_160"},
+    "feat_161": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_161"},
+    "feat_162": {"dtype": tf.int32, "shape": [BATCH_SIZE, 36], "name": "feat_162"},
+    "feat_163": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_163"},
+    "feat_164": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_164"},
+    "feat_165": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_165"},
+    "feat_166": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_166"},
+    "feat_167": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_167"},
+    "feat_168": {"dtype": tf.int32, "shape": [BATCH_SIZE, 6], "name": "feat_168"},
+    "feat_169": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_169"},
+    "feat_170": {"dtype": tf.int32, "shape": [BATCH_SIZE, 40], "name": "feat_170"},
+    "feat_172": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_172"},
+    "feat_173": {"dtype": tf.int32, "shape": [BATCH_SIZE, 1], "name": "feat_173"},
+}
diff --git a/examples/rec_infer/optimize/0001-Performance-optimization-referrence.patch b/examples/rec_infer/optimize/0001-Performance-optimization-referrence.patch
new file mode 100644
index 00000000..a3576055
--- /dev/null
+++ b/examples/rec_infer/optimize/0001-Performance-optimization-referrence.patch
@@ -0,0 +1,72 @@
+---
+ tensorflow_serving/model_servers/BUILD     | 1 +
+ tensorflow_serving/model_servers/main.cc   | 8 +++++++-
+ tensorflow_serving/model_servers/server.cc | 5 +++++
+ tensorflow_serving/model_servers/server.h  | 6 +++++-
+ 4 files changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/tensorflow_serving/model_servers/BUILD b/tensorflow_serving/model_servers/BUILD
+index f60f3d7..e74a514 100644
+--- a/tensorflow_serving/model_servers/BUILD
++++ b/tensorflow_serving/model_servers/BUILD
+@@ -373,6 +373,7 @@ cc_binary(
+     deps = [
+         ":tensorflow_model_server_main_lib",
+     ],
++    linkops = ["-L/usr/local/lib -lstringlib", "-L/usr/local/lib -ljemalloc"]
+ )
+ 
+ py_test(
+diff --git a/tensorflow_serving/model_servers/main.cc b/tensorflow_serving/model_servers/main.cc
+index 2b83500..3a055d0 100644
+--- a/tensorflow_serving/model_servers/main.cc
++++ b/tensorflow_serving/model_servers/main.cc
+@@ -192,7 +192,13 @@ int main(int argc, char** argv) {
+                        "EXPERIMENTAL; CAN BE REMOVED ANYTIME! Load and use "
+                        "TensorFlow Lite model from `model.tflite` file in "
+                        "SavedModel directory instead of the TensorFlow model "
+-                       "from `saved_model.pb` file.")};
++                       "from `saved_model.pb` file."),
++      tensorflow::Flag("set_SyncServerOption_flag", &options.set_SyncServerOption_flag,
++          "if true,the server will config SyncServerOption"),
++      tensorflow::Flag("NUM_CQS", &options.NUM_CQS, "config NUM_CQS"),
++      tensorflow::Flag("MIN_POLLERS", &options.MIN_POLLERS, "config MIN_POLLERS"),
++      tensorflow::Flag("MAX_POLLERS", &options.MAX_POLLERS, "config MAX_POLLERS"),
++      };
+ 
+   const auto& usage = tensorflow::Flags::Usage(argv[0], flag_list);
+   if (!tensorflow::Flags::Parse(&argc, argv, flag_list)) {
+diff --git a/tensorflow_serving/model_servers/server.cc b/tensorflow_serving/model_servers/server.cc
+index 9808f9a..b5df129 100644
+--- a/tensorflow_serving/model_servers/server.cc
++++ b/tensorflow_serving/model_servers/server.cc
+@@ -330,6 +330,11 @@ Status Server::BuildAndStart(const Options& server_options) {
+                              BuildServerCredentialsFromSSLConfigFile(
+                                  server_options.ssl_config_file));
+   }
++  if (server_options.set_SyncServerOption_flag) {
++    builder.SetSyncServerOption(::grpc::ServerBuilder::SyncServerOption.NUM_CQS, server_options.NUM_CQS);
++    builder.SetSyncServerOption(::grpc::ServerBuilder::SyncServerOption.MIN_POLLERS, server_options.MIN_POLLERS);
++    builder.SetSyncServerOption(::grpc::ServerBuilder::SyncServerOption.MAX_POLLERS, server_options.MAX_POLLERS);
++  }
+   builder.RegisterService(model_service_.get());
+   builder.RegisterService(prediction_service_.get());
+   builder.SetMaxMessageSize(tensorflow::kint32max);
+diff --git a/tensorflow_serving/model_servers/server.h b/tensorflow_serving/model_servers/server.h
+index 7738f29..90a0994 100644
+--- a/tensorflow_serving/model_servers/server.h
++++ b/tensorflow_serving/model_servers/server.h
+@@ -83,7 +83,11 @@ class Server {
+     bool enforce_session_run_timeout = true;
+     bool remove_unused_fields_from_bundle_metagraph = true;
+     bool use_tflite_model = false;
+-
++    // SyncServerOption config
++    bool set_SyncServerOption_flag = false;
++    tensorflow::int32 NUM_CQS = 3;
++    tensorflow::int32 MIN_POLLERS = 6;
++    tensorflow::int32 MAX_POLLERS = 12;
+     Options();
+   };
+ 
+-- 
diff --git a/examples/rec_infer/optimize/README.md b/examples/rec_infer/optimize/README.md
new file mode 100644
index 00000000..a6d7cd35
--- /dev/null
+++ b/examples/rec_infer/optimize/README.md
@@ -0,0 +1,51 @@
+# 链接ARM的optimized-routines库
+在memcpy等接口占比较大的模型中，有性能收益，源码路径为(https://github.com/ARM-software/optimized-routines/tree/v23.01)
+```shell
+unzip optimized-routines-23.01.zip
+cd optimized-routines-23.01
+```
+
+在源码基础上，修改代码，修改脚本如下：
+```shell
+for m in memcmp memcpy memset memmove memrchr strcpy strchrnul strchr strcmp stpcpy strncmp strnlen strrchr; do
+    for f in $(grep __${m}_aarch64 * -r |awk -F ':' '{print $1}'); do
+        sed_str1="__${m}_aarch64"
+        sed_str2="${m}"
+        sed -i 's!'${sed_str1}'!'${sed_str2}'!g' $f
+    done
+done
+```
+
+编译：
+```shell
+make ARCH=aarch64 -j 8
+cp build/lib/libstringlib.so /usr/local/lib/
+```
+
+在编译tensorflow serving时链接libstringlib.so，相关修改代码参考0001-Performance-optimization-referrence
+运行server时，需要配置环境变量：
+```shell
+export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
+```
+
+# 链接jemalloc库
+源码下载链接: https://github.com/jemalloc/jemalloc/archive/refs/tags/5.3.0.tar.gz
+编译安装命令如下：
+```shell
+tar -xzvf jemalloc-5.3.0.tar.gz
+cd jemalloc-5.3.0
+./autogen.sh
+make -j 8
+make install
+```
+
+安装完成后，默认安装在/usr/local/lib/，在编译tensorflow serving时链接libjemalloc.so，相关修改代码参考0001-Performance-optimization-referrence
+运行server时，需要配置环境变量：
+```shell
+export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
+```
+
+# gRPC配置优化
+增加NUM_CQS,MIN_POLLERS,MAX_POLLERS这三个配置项的配置，在多线程请求推理场景可以提升性能
+配置项参考gRPC官网(https://grpc.github.io/grpc/cpp/classgrpc_1_1_server_builder.html)
+具体修改参考0001-Performance-optimization-referrence，配置最优值根据不同模型和机器可能有所不同；
diff --git a/examples/rec_infer/server.sh b/examples/rec_infer/server.sh
new file mode 100644
index 00000000..50735b0f
--- /dev/null
+++ b/examples/rec_infer/server.sh
@@ -0,0 +1,6 @@
+taskset -c 0-32 /home/lmp/serving-1.15.0/bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server \
+  --model_name=saved_model \
+  --model_base_path=$(pwd)/inference_model/saved_model/ \
+  --port=9999 \
+  --rest_api_prot=9991 \
+  --platform_config_file=test.cfg
\ No newline at end of file
diff --git a/tools/graph_partition/gen_config.py b/tools/graph_partition/gen_config.py
new file mode 100644
index 00000000..8e80a182
--- /dev/null
+++ b/tools/graph_partition/gen_config.py
@@ -0,0 +1,54 @@
+import argparse
+import os
+
+import tensorflow as tf
+from graph_partition import GraphPartitioner
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("--model_path", type=str, default="./")
+    parser.add_argument("--output_path", type=str, default="./")
+    parser.add_argument("--output_filename", type=str, default="config.cfg")
+    args = parser.parse_args()
+
+    signature_def = "serving_default"
+
+    # 模型配置
+    embedding_lookup_op_type = ["Sum"]
+    heavy_load_ops = ["MatMul"]  # 必须下沉的算子（暂时没用到）
+    use_whole_graph = False
+    partition_to_first_heavy_load = False
+    #########################################################
+
+    output_filepath = os.path.join(args.output_path, args.output_filename)
+
+    with tf.compat.v1.Session() as sess:
+        meta_graph = tf.compat.v1.saved_model.loader.load(
+            sess, ["serve"], args.model_path
+        )
+        ops = sess.graph.get_operations()
+        graph_partitioner = GraphPartitioner()
+
+        graph_partitioner.graph = sess.graph
+        graph_partitioner.signature_def = meta_graph.signature_def.get(signature_def)
+        graph_partitioner.set_embedding_lookup_op_type(embedding_lookup_op_type)
+
+        inputs, outputs = graph_partitioner.get_sub_graph()
+
+    res_string = "[[" + inputs + "," + outputs + "]]"
+
+    ori_test = open("template.cfg")
+    template = ori_test.read()
+    output = template.replace("#value@in_out_pair#", res_string)
+    if os.path.exists(output_filepath):
+        os.remove(output_filepath)
+
+    # open text file
+    text_file = os.fdopen(os.open(output_filepath, os.O_WRONLY | os.O_CREAT, 0o666, "w"))
+
+    # write string to file
+    n = text_file.write(output)
+
+    # close file
+    text_file.close()
+    ori_test.close()
diff --git a/tools/graph_partition/graph_partition.py b/tools/graph_partition/graph_partition.py
new file mode 100644
index 00000000..6e01e6e6
--- /dev/null
+++ b/tools/graph_partition/graph_partition.py
@@ -0,0 +1,116 @@
+import tensorflow as tf
+from tensorflow.contrib import graph_editor as ge
+
+
+class GraphPartitioner:
+    def __init__(self):
+        self.signature_def = None
+        self.graph = None
+        self.op_node_lookup = dict()
+        self.input_op_nodes = []
+        self.output_op_nodes = []
+        self.tensor_node_lookup = dict()
+        self.heavy_load_ops = []
+        self.embedding_lookup_op_type = None
+        self.first_heavy_load_on_sparse_path = set()
+        self.first_op_after_lookup = []
+        self.seen = set()
+        self.post_out = set()
+        self.partition_to_first_heavy_load = False
+
+        self.sparse_lookup_ops = []
+        self.sparse_lookup_tensors = []
+        self.input_nodes = []
+        self.output_nodes = []
+
+    @staticmethod
+    def has_gray_downstreams(op):
+        gray_list = ["DynamicPartition"]
+        down_ops = ge.get_forward_walk_ops([op])
+        for op in down_ops:
+            if op.type in gray_list:
+                return True
+        return False
+
+    def set_embedding_lookup_op_type(self, s):
+        self.embedding_lookup_op_type = s
+
+    def get_sub_graph(self):
+        for op in self.graph.get_operations():
+            if self._is_embedding_lookup(op):
+                self.sparse_lookup_ops.append(op)
+        if not self.sparse_lookup_ops:
+            for op in self.graph.get_operations():
+                is_top_op = True
+                for op1 in self.graph.get_operations():
+                    for tensor in op1.outputs:
+                        if tensor in op.inputs:
+                            is_top_op = False
+                            break
+                    if not is_top_op:
+                        break
+                if is_top_op:
+                    self.sparse_lookup_ops.append(op)
+        check_ops = self.sparse_lookup_ops
+        self.sparse_lookup_ops = []
+        for op in check_ops:
+            if not self.has_gray_downstreams(op):
+                self.sparse_lookup_ops.append(op)
+                self.sparse_lookup_tensors.extend(op.outputs)
+
+        for op in self.graph.get_operations():
+            for tensor in self.sparse_lookup_tensors:
+                if tensor in op.inputs:
+                    self.input_nodes.append(op)
+        for k, v in self.signature_def.outputs.items():
+            op_name = (
+                str(v)
+                .split("\n")[0]
+                .replace(" ", "")
+                .replace('"', "")
+                .split(":")[1]
+                .split(":")[0]
+            )
+            for op in self.graph.get_operations():
+                if op.name == op_name:
+                    self.output_nodes.append(op)
+
+        float_ups = []
+        to_expand = []
+        in_str = []
+
+        for op in self.input_nodes:
+            if op.type not in float_ups:
+                if op.name not in in_str:
+                    in_str.append(op.name)
+            else:
+                to_expand.append(op)
+
+        while to_expand:
+            candidates = []
+            for top in to_expand:
+                for op in self.graph.get_operations():
+                    for tensor in op.inputs:
+                        if tensor in top.outputs:
+                            candidates.append(op)
+            to_expand = []
+            for op in candidates:
+                if op.type not in float_ups:
+                    if op.name not in in_str:
+                        in_str.append(op.name)
+                else:
+                    to_expand.append(op)
+        return str(in_str), str([op.name for op in self.output_nodes])
+
+    def _is_embedding_lookup(self, op):
+        if op.type in self.embedding_lookup_op_type:
+            return True
+
+        return False
+
+    def _check_op_status(self):
+        unseen_list = []
+        for name, op_node in self.op_node_lookup.items():
+            if not op_node.seen:
+                unseen_list.append(name)
+        return unseen_list
diff --git a/tools/graph_partition/template.cfg b/tools/graph_partition/template.cfg
new file mode 100644
index 00000000..3227bdea
--- /dev/null
+++ b/tools/graph_partition/template.cfg
@@ -0,0 +1,57 @@
+platform_configs {
+    key: "tensorflow"
+    value {
+        source_adapter_config {
+            [type.googleapis.com/tensorflow.serving.SaveModelBundleSourceAdapterConfig] {
+                legacy_config {
+                    session_config {
+                        graph_options {
+                            rewrite_options {
+                                custom_optimizers {
+                                    name: "NpuOptimizer"
+                                    parameter_map: {
+                                        key:"use_off_line"
+                                        value:{
+                                            b:true
+                                        }
+                                    }
+                                    parameter_map: {
+                                        key:"mix_compile_mode"
+                                        value:{
+                                            b:true
+                                        }
+                                    }
+                                    parameter_map: {
+                                        key:"variable_placement"
+                                        value:{
+                                            s："Host"
+                                        }
+                                    }
+                                    parameter_map: {
+                                        key:"graph_run_mode"
+                                        value:{
+                                            i:0
+                                        }
+                                    }
+                                    parameter_map: {
+                                        key:"precision_mode"
+                                        value:{
+                                            s:"must_keep_origin_dtype"
+                                        }
+                                    }
+                                    parameter_map: {
+                                        key:"in_out_pair"
+                                        value:{
+                                            s:"#value@in_out_pair#"
+                                        }
+                                    }
+                                }
+                                remapping: OFF
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
-- 
Gitee


From 4401dcbce1940cf8c1c5de6afebe23bbe55bb38d Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Wed, 19 Jun 2024 07:47:34 +0000
Subject: [PATCH 220/302] bugfix: add `GlobalTensor` cache mode explicitly

Signed-off-by: steepcurve <steepcurve@163.com>
---
 .../op_kernel/embedding_lookup_by_address.cpp              | 5 +++++
 .../op_kernel/embedding_update_by_address.cpp              | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
index cc45c5be..0d9babc8 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
@@ -44,6 +44,10 @@ public:
     pipe.InitBuffer(inQueue, pingpongNum, veclen);
     pipe.InitBuffer(outQueue, pingpongNum, veclen);
 
+    // set `GlobalTensor` cache mode explicitly
+    srcAddrGlobal.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+    dstDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+
     // get start index for current core, core parallel block_indx block_dim，即使是最后一个核也应该多初始化一些，并对齐4的倍数
     srcAddrGlobal.SetGlobalBuffer((__gm__ int64_t *)(address + block_idx * singleCoreAddrLen), needComputeAddrLen);
     dstDataGm.SetGlobalBuffer((__gm__ T *)(y));
@@ -111,6 +115,7 @@ private:
             int64_t address = srcAddrLocal.GetValue(i);
 
             if (address != 0) {
+                srcDataBufferGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL); 
                 srcDataBufferGm.SetGlobalBuffer((__gm__ T *)(address), embDimAligned);
                 DataCopy(dataLocal[embDimAligned * nums], srcDataBufferGm, embDimAligned);
             } else {
diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
index 828d7fbe..cfefb021 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
@@ -40,6 +40,11 @@ public:
     pipe.InitBuffer(inQueue, pingpongNum, veclen);
     pipe.InitBuffer(outQueue, pingpongNum, veclen);
 
+    // set `GlobalTensor` cache mode explicitly
+    srcAddrGlobal.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+    srcDataBufferGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL); 
+    outDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+
     // get start index for current core, core parallel block_indx block_dim
     srcAddrGlobal.SetGlobalBuffer((__gm__ int64_t *)(address + block_idx * singleCoreAddrLen));
     srcDataBufferGm.SetGlobalBuffer((__gm__ T *)(embedding + block_idx * singleCoreAddrLen
@@ -112,6 +117,7 @@ private:
             for (int i = 0; i < addrNum; i++) {
                 address = srcAddrLocal.GetValue(i);
                 if (address != 0) {
+                    dstDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
                     dstDataGm.SetGlobalBuffer((__gm__ T*)(address));
                     DataCopy(dstDataGm, dstLocal[i * inputDimAligned], inputDimAligned);
                 }
@@ -150,6 +156,7 @@ private:
         LocalTensor<T> dstLocal = outQueue.DeQue<T>();
 
         if (address != 0) {
+            dstDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
             dstDataGm.SetGlobalBuffer((__gm__ T *)(address));
 
             if (updateType == 0) {
-- 
Gitee


From c12cf34f3f53d304d10eb71702650884f9eb4c56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 19 Jun 2024 15:53:04 +0800
Subject: [PATCH 221/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9=E5=AF=B9?=
 =?UTF-8?q?=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84=E6=94=AF=E6=8C=81?=
 =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8Dsave=E6=8A=A5=E9=94=99=E5=92=8Cdestr?=
 =?UTF-8?q?oy=E5=8D=A1=E9=A1=BF=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py                             | 2 --
 mx_rec/core/embedding.py                                  | 5 ++---
 .../src/embedding_cache/offset_mapper/address_mapper.h    | 8 ++------
 src/core/hybrid_mgmt/hybrid_mgmt.cpp                      | 2 ++
 src/core/l3_storage/preprocess_mapper.h                   | 2 +-
 5 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index fd27fc27..50c8dd2e 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -29,8 +29,6 @@ EMPTY_STR = ""
 # default emb memory size for hbm、ddr、ssd
 DEFAULT_DEVICE_CACHE_MEMORY_SIZE = 2 * 1024 * 1024 * 1024
 DEFAULT_HOST_CACHE_MEMORY_SIZE = 40 * 1024 * 1024 * 1024
-DEFAULT_SSD_CACHE_MEMORY_SIZE = sys.maxsize
-
 
 # 获取ConfigInitializer对象实例失败提示信息
 GET_CONFIG_INSTANCE_ERR_MSG = "Please init the environment for mx_rec at first."
diff --git a/mx_rec/core/embedding.py b/mx_rec/core/embedding.py
index eaf0c759..23eb86aa 100644
--- a/mx_rec/core/embedding.py
+++ b/mx_rec/core/embedding.py
@@ -29,8 +29,7 @@ from mx_rec.core.emb.base_sparse_embedding import BaseSparseEmbedding
 from mx_rec.core.emb.emb_factory import HBMDynamicSparseEmbeddingFactory, HBMSparseEmbeddingFactory, \
     ExternalStorageSparseEmbeddingFactory
 from mx_rec.constants.constants import (MAX_INT32, All2allGradientsOp, MAX_VOCABULARY_SIZE, MAX_DEVICE_VOCABULARY_SIZE,
-                                        CacheModeEnum, DEFAULT_DEVICE_CACHE_MEMORY_SIZE, DEFAULT_HOST_CACHE_MEMORY_SIZE,
-                                        DEFAULT_SSD_CACHE_MEMORY_SIZE)
+                                        CacheModeEnum, DEFAULT_DEVICE_CACHE_MEMORY_SIZE, DEFAULT_HOST_CACHE_MEMORY_SIZE)
 from mx_rec.graph.constants import AnchorIteratorOp
 from mx_rec.util.communication.hccl_ops import get_rank_size
 from mx_rec.util.initialize import ConfigInitializer
@@ -240,5 +239,5 @@ def check_and_set_default_voc_size(voc_size_list: List[int], dim_bytes: int):
         default_host_voc_size = int(DEFAULT_HOST_CACHE_MEMORY_SIZE / dim_bytes)  # total 40GB
         voc_size_list[1] = min(default_host_voc_size, MAX_VOCABULARY_SIZE)
     if cache_mode == CacheModeEnum.SSD.value and voc_size_list[2] == 0:
-        voc_size_list[2] = DEFAULT_SSD_CACHE_MEMORY_SIZE
+        voc_size_list[2] = MAX_VOCABULARY_SIZE
     return
diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
index 649b2d8a..eac5f46d 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
@@ -92,10 +92,7 @@ public:
 
     ~AutoRefillEmbeddingMemoryPool()
     {
-        {
-            std::lock_guard<std::mutex> lock(producerMutex);
-            stop = true;
-        }
+        stop = true;
         producerCv.notify_all();
         fullCv.notify_all();
         for (auto& t : producerThreads) {
@@ -105,7 +102,6 @@ public:
 
     void Stop()
     {
-        std::lock_guard<std::mutex> lock(producerMutex);
         stop = true;
         producerCv.notify_all();
         fullCv.notify_all();
@@ -141,7 +137,7 @@ private:
     uint64_t totalLeftVocabSize;
     uint32_t numThreads;
     std::atomic<uint64_t> currBufferSize{0};
-    volatile bool stop = false;
+    volatile std::atomic<bool> stop = false;
     volatile std::atomic<bool> full = false;
     std::mutex producerMutex;
     std::mutex getAddrMutex;
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 30c41e0c..c38aa131 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -449,6 +449,8 @@ void HybridMgmt::Destroy()
     procThreads.clear();
     // 停止预处理
     KEY_PROCESS_INSTANCE->Destroy();
+    // stop embCache, even if the host emb is still allocating
+    embCache->Destroy();
     LOG_DEBUG(MGMT + "Destroy hybrid_mgmt module end.");
 }
 
diff --git a/src/core/l3_storage/preprocess_mapper.h b/src/core/l3_storage/preprocess_mapper.h
index fd28677f..0fc8e4d8 100644
--- a/src/core/l3_storage/preprocess_mapper.h
+++ b/src/core/l3_storage/preprocess_mapper.h
@@ -26,7 +26,7 @@ namespace MxRec {
     */
     class PreProcessMapper {
     public:
-        void Initialize(const string& embName, uint32_t ddrVocabSize, uint32_t l3StorageVocabSize)
+        void Initialize(const string& embName, size_t ddrVocabSize, size_t l3StorageVocabSize)
         {
             tableName = embName;
             lfuCache = LFUCache(embName);
-- 
Gitee


From a49ed83833b0259bb5b9a0a8b8fd9d0798092dd9 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Wed, 19 Jun 2024 09:24:19 +0000
Subject: [PATCH 222/302] bugfix: add `GlobalTensor` cache mode explicitly

Signed-off-by: steepcurve <steepcurve@163.com>
---
 .../cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp   | 2 +-
 .../cust_op_by_addr/op_kernel/embedding_update_by_address.cpp   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
index 0d9babc8..e198b6c0 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
@@ -115,7 +115,7 @@ private:
             int64_t address = srcAddrLocal.GetValue(i);
 
             if (address != 0) {
-                srcDataBufferGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL); 
+                srcDataBufferGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
                 srcDataBufferGm.SetGlobalBuffer((__gm__ T *)(address), embDimAligned);
                 DataCopy(dataLocal[embDimAligned * nums], srcDataBufferGm, embDimAligned);
             } else {
diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
index cfefb021..5d496ee8 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
@@ -42,7 +42,7 @@ public:
 
     // set `GlobalTensor` cache mode explicitly
     srcAddrGlobal.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
-    srcDataBufferGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL); 
+    srcDataBufferGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
     outDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
 
     // get start index for current core, core parallel block_indx block_dim
-- 
Gitee


From c4eb0df086df5aa4ed7de4d7f998492e547696a5 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Wed, 19 Jun 2024 21:41:21 +0800
Subject: [PATCH 223/302] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8DDDR=E6=A8=A1=E5=BC=8Fdevice=E4=BE=A7=E5=A4=84=E7=90=86?=
 =?UTF-8?q?=E8=BE=83=E5=BF=AB=E6=97=B6host=E4=BE=A7=E6=8F=90=E5=89=8D?=
 =?UTF-8?q?=E5=8F=91=E9=80=81eos=E4=BF=A1=E6=81=AF=E5=9C=BA=E6=99=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/key_process/key_process.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 63163453..4207852f 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -1270,8 +1270,12 @@ bool KeyProcess::IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::s
                   lookUpSwapInAddrsPushId[info.name]);
         startTime = std::chrono::system_clock::now();
     }
+    // hybridMgmtBlock->h2dNextBatchId[info.name] used by postfix increment, the last value will be grater than
+    // readEmbKeyBatchId and equals readEmbKeyBatchId + 1.
+    // Check '> readEmbKeyBatchId' condition to avoid send eos before handle all batch data from readEmbKey Op.
     if (isNeedSendEos[info.channelId] && readEmbKeyBatchId < info.batchId &&
-        hybridMgmtBlock->h2dNextBatchId[info.name] == lookUpSwapInAddrsPushId[info.name]) {
+        hybridMgmtBlock->h2dNextBatchId[info.name] == lookUpSwapInAddrsPushId[info.name] &&
+        hybridMgmtBlock->h2dNextBatchId[info.name] > readEmbKeyBatchId) {
         LOG_INFO("table:{}, channelId:{} batchId:{}, GetUniqueKeys eos",
                  info.name, info.channelId, info.batchId);
         return true;
-- 
Gitee


From ef5bcd2886b40904f98d90fcee839c2fe4ba03b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 20 Jun 2024 08:59:19 +0800
Subject: [PATCH 224/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91=E6=89=A9=E5=AE=B9=E5=AF=B9?=
 =?UTF-8?q?=E5=A4=9A=E7=BA=A7=E7=BC=93=E5=AD=98=E7=9A=84=E6=94=AF=E6=8C=81?=
 =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8Dsave=E6=8A=A5=E9=94=99=E5=92=8Cdestr?=
 =?UTF-8?q?oy=E5=8D=A1=E9=A1=BF=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/constants/constants.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mx_rec/constants/constants.py b/mx_rec/constants/constants.py
index 50c8dd2e..f8558cd9 100644
--- a/mx_rec/constants/constants.py
+++ b/mx_rec/constants/constants.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-import sys
 from enum import Enum
 import numpy as np
 
-- 
Gitee


From f309355edfe4a1771f5c9e70e15f6ba9e97f7932 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Thu, 20 Jun 2024 10:03:53 +0800
Subject: [PATCH 225/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=9E=84=E5=BB=BAmxR?=
 =?UTF-8?q?ec=E6=97=B6=E4=BC=9A=E7=BC=96=E8=AF=91=E5=AE=89=E8=A3=85?=
 =?UTF-8?q?=E6=89=A9=E5=AE=B9=E7=AE=97=E5=AD=90=E7=9A=84=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build/gen_mxrec_tar_pkg.sh | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/build/gen_mxrec_tar_pkg.sh b/build/gen_mxrec_tar_pkg.sh
index 3b6a9713..b5cba7a2 100644
--- a/build/gen_mxrec_tar_pkg.sh
+++ b/build/gen_mxrec_tar_pkg.sh
@@ -82,10 +82,3 @@ function clean()
 gen_tar_file
 
 clean
-
-# compile cust op
-echo "----------------        start to compile cust op        ----------------"
-cd "${MxRec_DIR}"/cust_op/cust_op_by_addr
-chmod u+x run.sh
-./run.sh
-echo "----------------      compile cust op success!!!!       ----------------"
\ No newline at end of file
-- 
Gitee


From 3da1b22d7d3b7cde2ad3caf9aa065fcb3773241b Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 20 Jun 2024 11:22:31 +0800
Subject: [PATCH 226/302] =?UTF-8?q?=E6=96=B0=E5=A2=9EreadEmbKeyBatchId?=
 =?UTF-8?q?=E8=AE=B0=E5=BD=95readEmbedKey=20Op=E5=A4=84=E7=90=86=E8=BF=87?=
 =?UTF-8?q?=E7=9A=84batch=E8=AE=A1=E6=95=B0=E7=94=A8=E4=BA=8E=E5=88=A4?=
 =?UTF-8?q?=E6=96=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt_block.cpp | 45 +++++++++++++---------
 src/core/hybrid_mgmt/hybrid_mgmt_block.h   |  9 ++++-
 src/core/key_process/key_process.cpp       |  4 +-
 src/ops_tf/hybrid_dataset_ops.cpp          |  4 +-
 4 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
index 65235389..e4935166 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
@@ -114,7 +114,8 @@ void HybridMgmtBlock::CheckValid(int channelId)
     // 当python侧第一次调用时，此时跳过参数检查
     if (lastRunChannelId == -1) {
         LOG_DEBUG(HYBRID_BLOCKING + "The data channel was called for the first time, and the parameters were "
-            "checked to be normal channelId {} hybridBatchId {}", channelId, hybridBatchId[channelId]);
+                                    "checked to be normal channelId {} hybridBatchId {}", channelId,
+                  hybridBatchId[channelId]);
 
         lastRunChannelId = channelId;
         return;
@@ -122,9 +123,9 @@ void HybridMgmtBlock::CheckValid(int channelId)
     // 在通道切换时，hybrid预处理的batch与python的一致。
     if (pythonBatchId[lastRunChannelId] == hybridBatchId[lastRunChannelId]) {
         LOG_DEBUG(HYBRID_BLOCKING +
-            "HybridMgmt is switching data channels and checking for normal parameters. he number of steps "
-            "in the previous round is lastRunChannelId {} pythonBatchId {} hybridBatchId {}",
-            lastRunChannelId,  pythonBatchId[lastRunChannelId], hybridBatchId[lastRunChannelId]);
+                  "HybridMgmt is switching data channels and checking for normal parameters. he number of steps "
+                  "in the previous round is lastRunChannelId {} pythonBatchId {} hybridBatchId {}",
+                  lastRunChannelId, pythonBatchId[lastRunChannelId], hybridBatchId[lastRunChannelId]);
     } else if (pythonBatchId[lastRunChannelId] < hybridBatchId[lastRunChannelId]) {
         // 在通道切换时，上一个通道处理的数据超出了python侧的调用
         if (rankInfo.isDDR and !WaitValid(lastRunChannelId)) {
@@ -133,10 +134,10 @@ void HybridMgmtBlock::CheckValid(int channelId)
     } else {
         // 在通道切换时，hybrid处理的数据还没有赶上python侧，此时需要等待hybrid处理完成
         LOG_INFO(HYBRID_BLOCKING +
-            "When switching data channels, it was found that HybridMgmt processed less data than the "
-            "Python side.In this case, after reading the dataset, the Python side called it again, but it was "
-            "interrupted midway,which did not affect the subsequent calls lastRunChannelId {} hybridBatchId {}",
-            lastRunChannelId, hybridBatchId[lastRunChannelId]);
+                 "When switching data channels, it was found that HybridMgmt processed less data than the "
+                 "Python side.In this case, after reading the dataset, the Python side called it again, but it was "
+                 "interrupted midway,which did not affect the subsequent calls lastRunChannelId {} hybridBatchId {}",
+                 lastRunChannelId, hybridBatchId[lastRunChannelId]);
     }
     lastRunChannelId = channelId;
 }
@@ -147,7 +148,7 @@ void HybridMgmtBlock::DoBlock(int channelId)
 {
     // 通道没有切换，不用处理
     LOG_DEBUG(HYBRID_BLOCKING + "HybridMgmt starts blocking channelId {} hybridBatchId {}",
-        channelId, hybridBatchId[channelId]);
+              channelId, hybridBatchId[channelId]);
 
     while (isBlock[channelId]) {
         std::this_thread::sleep_for(SLEEP_MS);
@@ -156,7 +157,7 @@ void HybridMgmtBlock::DoBlock(int channelId)
         }
     }
     LOG_DEBUG(HYBRID_BLOCKING + "HybridMgmt is starting to wake up channelId {} hybridBatchId {}",
-        channelId, hybridBatchId[channelId]);
+              channelId, hybridBatchId[channelId]);
 }
 
 /// 重置所有的步数，主要用于图重构的情况，readembedkey算子重建
@@ -187,24 +188,24 @@ int HybridMgmtBlock::CheckSaveEmbMapValid()
     // 检查数据通道此时的HashMap是否被提前处理了
     if (pythonBatchId[lastRunChannelId] >= hybridBatchId[lastRunChannelId]) {
         LOG_DEBUG(HYBRID_BLOCKING +
-            "HybridMgmt is checking the step and checking that the parameters are normal. "
-            "The number of steps in the previous round is "
-            "lastRunChannelId {} pythonBatchId {} hybridBatchId {}",
-            lastRunChannelId, pythonBatchId[lastRunChannelId], hybridBatchId[lastRunChannelId]);
+                  "HybridMgmt is checking the step and checking that the parameters are normal. "
+                  "The number of steps in the previous round is "
+                  "lastRunChannelId {} pythonBatchId {} hybridBatchId {}",
+                  lastRunChannelId, pythonBatchId[lastRunChannelId], hybridBatchId[lastRunChannelId]);
         return 0;
     } else if (pythonBatchId[lastRunChannelId] + 1 == hybridBatchId[lastRunChannelId]) {
         // 在通道切换时，上一个通道处理的数据超出了python侧的调用
         LOG_DEBUG(HYBRID_BLOCKING +
-            "HybridMgmt is checking the step, and the parameters have been processed one step "
-            "in advance. The number of steps in the previous round was "
-            "lastRunChannelId {} pythonBatchId {} hybridBatchId {}",
-            lastRunChannelId, pythonBatchId[lastRunChannelId], hybridBatchId[lastRunChannelId]);
+                  "HybridMgmt is checking the step, and the parameters have been processed one step "
+                  "in advance. The number of steps in the previous round was "
+                  "lastRunChannelId {} pythonBatchId {} hybridBatchId {}",
+                  lastRunChannelId, pythonBatchId[lastRunChannelId], hybridBatchId[lastRunChannelId]);
 
         return 1;
     } else {
         // 在通道切换时，hybrid处理的数据还没有赶上python侧，此时需要等待hybrid处理完成
         LOG_DEBUG(HYBRID_BLOCKING + "ERROR FLAG lastRunChannelId {} hybridBatchId {}",
-            lastRunChannelId, hybridBatchId[lastRunChannelId]);
+                  lastRunChannelId, hybridBatchId[lastRunChannelId]);
         return -1;
     }
 }
@@ -267,3 +268,9 @@ void HybridMgmtBlock::FinishSave()
 {
     finishSave = true;
 }
+
+void HybridMgmtBlock::IncreaseReadEmbBatchId(const int channelId)
+{
+    this->readEmbedBatchId[channelId] += 1;
+    this->readEmbedBatchIdAll += 1;
+}
\ No newline at end of file
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.h b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
index a969d7a9..78b5260a 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
@@ -32,6 +32,7 @@ namespace MxRec {
     class HybridMgmtBlock {
     public:
         HybridMgmtBlock() = default;
+
         // 上一次运行的通道ID
         int lastRunChannelId = -1;
         // hybrid将要处理的batch id
@@ -40,10 +41,12 @@ namespace MxRec {
         int pythonBatchId[2] = {0, 0};
         // readEmbed算子侧将要处理的batch id
         int readEmbedBatchId[2] = {0, 0};
+        // readEmbed算子处理过的batch计数，不区分通道，不会重置；用于判断h2d swap是否需要eos
+        int readEmbedBatchIdAll = 0;
         int maxTrainStep = 0;
         int stepsInterval[2] = {0, 0};  // 通道i运行多少步后切换为通道j
 
-        // hybrid已完成H2D的step
+        // hybrid已完成H2D的step；不区分通道、图，不会重置；
         map<string, int> h2dNextBatchId;
 
         int loop[2] = {1, 1};
@@ -88,6 +91,8 @@ namespace MxRec {
 
         void FinishSave();
 
+        void IncreaseReadEmbBatchId(const int channelId);
+
     private:
         // 控制通道阻塞的变量
         bool isBlock[2] = {true, true};
@@ -101,7 +106,7 @@ namespace MxRec {
     public:
         explicit HybridMgmtBlockingException(const string scene)
         {
-            HybridMgmtBlock *hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
+            HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
             int channelId = hybridMgmtBlock->lastRunChannelId;
             int preprocessBatchNumber = hybridMgmtBlock->hybridBatchId[channelId];
             int currentBatchNumber = hybridMgmtBlock->pythonBatchId[channelId];
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 4207852f..237e3d2a 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -1272,10 +1272,10 @@ bool KeyProcess::IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::s
     }
     // hybridMgmtBlock->h2dNextBatchId[info.name] used by postfix increment, the last value will be grater than
     // readEmbKeyBatchId and equals readEmbKeyBatchId + 1.
-    // Check '> readEmbKeyBatchId' condition to avoid send eos before handle all batch data from readEmbKey Op.
+    // Check '>= readEmbedBatchIdAll' condition to avoid send eos before handle all batch data from readEmbKey Op.
     if (isNeedSendEos[info.channelId] && readEmbKeyBatchId < info.batchId &&
         hybridMgmtBlock->h2dNextBatchId[info.name] == lookUpSwapInAddrsPushId[info.name] &&
-        hybridMgmtBlock->h2dNextBatchId[info.name] > readEmbKeyBatchId) {
+        hybridMgmtBlock->h2dNextBatchId[info.name] >= hybridMgmtBlock->readEmbedBatchIdAll) {
         LOG_INFO("table:{}, channelId:{} batchId:{}, GetUniqueKeys eos",
                  info.name, info.channelId, info.batchId);
         return true;
diff --git a/src/ops_tf/hybrid_dataset_ops.cpp b/src/ops_tf/hybrid_dataset_ops.cpp
index 2eee8531..1ef52de1 100644
--- a/src/ops_tf/hybrid_dataset_ops.cpp
+++ b/src/ops_tf/hybrid_dataset_ops.cpp
@@ -214,7 +214,7 @@ namespace MxRec {
                     return;
                 }
             }
-            hybridMgmtBlock->readEmbedBatchId[channelId] += 1;
+            hybridMgmtBlock->IncreaseReadEmbBatchId(channelId);
             const Tensor& inputTensor = context->input(TensorIndex::TENSOR_INDEX_0);
             const auto& splits = context->input(TENSOR_INDEX_1).flat<int32>();
             int fieldNum = 0;
@@ -407,7 +407,7 @@ namespace MxRec {
                     return;
                 }
             }
-            hybridMgmtBlock->readEmbedBatchId[channelId] += 1;
+            hybridMgmtBlock->IncreaseReadEmbBatchId(channelId);
             const Tensor& inputTensor = context->input(TensorIndex::TENSOR_INDEX_0);
             size_t dataSize = inputTensor.NumElements();
 
-- 
Gitee


From 7445ebc8b0f19386f1d80c596aaf1ae110223c67 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 20 Jun 2024 11:35:02 +0800
Subject: [PATCH 227/302] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt_block.h | 2 +-
 src/core/key_process/key_process.cpp     | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.h b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
index 78b5260a..05e60e72 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
@@ -41,7 +41,7 @@ namespace MxRec {
         int pythonBatchId[2] = {0, 0};
         // readEmbed算子侧将要处理的batch id
         int readEmbedBatchId[2] = {0, 0};
-        // readEmbed算子处理过的batch计数，不区分通道，不会重置；用于判断h2d swap是否需要eos
+        // readEmbed算子处理过的batch计数，不区分通道、图，不会重置；用于判断h2d swap是否需要eos
         int readEmbedBatchIdAll = 0;
         int maxTrainStep = 0;
         int stepsInterval[2] = {0, 0};  // 通道i运行多少步后切换为通道j
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 237e3d2a..b5dc962e 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -1270,8 +1270,6 @@ bool KeyProcess::IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::s
                   lookUpSwapInAddrsPushId[info.name]);
         startTime = std::chrono::system_clock::now();
     }
-    // hybridMgmtBlock->h2dNextBatchId[info.name] used by postfix increment, the last value will be grater than
-    // readEmbKeyBatchId and equals readEmbKeyBatchId + 1.
     // Check '>= readEmbedBatchIdAll' condition to avoid send eos before handle all batch data from readEmbKey Op.
     if (isNeedSendEos[info.channelId] && readEmbKeyBatchId < info.batchId &&
         hybridMgmtBlock->h2dNextBatchId[info.name] == lookUpSwapInAddrsPushId[info.name] &&
-- 
Gitee


From 9c5e09c9c3e43fc5f36d270bfc7349fd8b55b8c2 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 20 Jun 2024 11:47:51 +0800
Subject: [PATCH 228/302] =?UTF-8?q?=E7=BC=A9=E8=BF=9B=E6=A0=BC=E5=BC=8F?=
 =?UTF-8?q?=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt_block.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
index e4935166..092cfa7c 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
@@ -113,9 +113,10 @@ void HybridMgmtBlock::CheckValid(int channelId)
     }
     // 当python侧第一次调用时，此时跳过参数检查
     if (lastRunChannelId == -1) {
-        LOG_DEBUG(HYBRID_BLOCKING + "The data channel was called for the first time, and the parameters were "
-                                    "checked to be normal channelId {} hybridBatchId {}", channelId,
-                  hybridBatchId[channelId]);
+        LOG_DEBUG(HYBRID_BLOCKING +
+                  "The data channel was called for the first time, and the parameters were "
+                  "checked to be normal channelId {} hybridBatchId {}",
+                  channelId, hybridBatchId[channelId]);
 
         lastRunChannelId = channelId;
         return;
-- 
Gitee


From e080280e15b7d40fcd728b15024637beb6ca8fd4 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 20 Jun 2024 11:56:25 +0800
Subject: [PATCH 229/302] =?UTF-8?q?=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt_block.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
index 092cfa7c..c3459c77 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
@@ -121,10 +121,11 @@ void HybridMgmtBlock::CheckValid(int channelId)
         lastRunChannelId = channelId;
         return;
     }
+    t
     // 在通道切换时，hybrid预处理的batch与python的一致。
     if (pythonBatchId[lastRunChannelId] == hybridBatchId[lastRunChannelId]) {
         LOG_DEBUG(HYBRID_BLOCKING +
-                  "HybridMgmt is switching data channels and checking for normal parameters. he number of steps "
+                  "HybridMgmt is switching data channels and checking for normal parameters. The number of steps "
                   "in the previous round is lastRunChannelId {} pythonBatchId {} hybridBatchId {}",
                   lastRunChannelId, pythonBatchId[lastRunChannelId], hybridBatchId[lastRunChannelId]);
     } else if (pythonBatchId[lastRunChannelId] < hybridBatchId[lastRunChannelId]) {
-- 
Gitee


From 8adefa043295d5bb6245f215861e22c69220d3aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 20 Jun 2024 12:02:23 +0800
Subject: [PATCH 230/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91lock=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=E5=AF=BC=E8=87=B4=E6=B3=84=E9=9C=B2=E5=86=85=E5=AD=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
index eac5f46d..251c185c 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
@@ -103,6 +103,7 @@ public:
     void Stop()
     {
         stop = true;
+        std::lock_guard<std::mutex> lock(producerMutex);
         producerCv.notify_all();
         fullCv.notify_all();
     }
-- 
Gitee


From 4eea2da6e251acba489adb98648add0eddeba681 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 20 Jun 2024 12:02:47 +0800
Subject: [PATCH 231/302] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=20Modification=E3=80=91lock=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=E5=AF=BC=E8=87=B4=E6=B3=84=E9=9C=B2=E5=86=85=E5=AD=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
index 251c185c..46daaf29 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
@@ -93,6 +93,7 @@ public:
     ~AutoRefillEmbeddingMemoryPool()
     {
         stop = true;
+        std::lock_guard<std::mutex> lock(producerMutex);
         producerCv.notify_all();
         fullCv.notify_all();
         for (auto& t : producerThreads) {
-- 
Gitee


From b1a9e5982e7bfa887c0ef8a399e709df2512ad31 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Thu, 20 Jun 2024 15:28:13 +0800
Subject: [PATCH 232/302] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt_block.cpp | 8 ++++----
 src/core/hybrid_mgmt/hybrid_mgmt_block.h   | 2 +-
 src/ops_tf/hybrid_dataset_ops.cpp          | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
index c3459c77..fbee8b9a 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
@@ -115,18 +115,18 @@ void HybridMgmtBlock::CheckValid(int channelId)
     if (lastRunChannelId == -1) {
         LOG_DEBUG(HYBRID_BLOCKING +
                   "The data channel was called for the first time, and the parameters were "
-                  "checked to be normal channelId {} hybridBatchId {}",
+                  "checked to be normal channelId {} hybridBatchId {}.",
                   channelId, hybridBatchId[channelId]);
 
         lastRunChannelId = channelId;
         return;
     }
-    t
+
     // 在通道切换时，hybrid预处理的batch与python的一致。
     if (pythonBatchId[lastRunChannelId] == hybridBatchId[lastRunChannelId]) {
         LOG_DEBUG(HYBRID_BLOCKING +
                   "HybridMgmt is switching data channels and checking for normal parameters. The number of steps "
-                  "in the previous round is lastRunChannelId {} pythonBatchId {} hybridBatchId {}",
+                  "in the previous round is lastRunChannelId {} pythonBatchId {} hybridBatchId {}.",
                   lastRunChannelId, pythonBatchId[lastRunChannelId], hybridBatchId[lastRunChannelId]);
     } else if (pythonBatchId[lastRunChannelId] < hybridBatchId[lastRunChannelId]) {
         // 在通道切换时，上一个通道处理的数据超出了python侧的调用
@@ -271,7 +271,7 @@ void HybridMgmtBlock::FinishSave()
     finishSave = true;
 }
 
-void HybridMgmtBlock::IncreaseReadEmbBatchId(const int channelId)
+void HybridMgmtBlock::IncrementReadEmbBatchId(const int channelId)
 {
     this->readEmbedBatchId[channelId] += 1;
     this->readEmbedBatchIdAll += 1;
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.h b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
index 05e60e72..a66f9b00 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
@@ -91,7 +91,7 @@ namespace MxRec {
 
         void FinishSave();
 
-        void IncreaseReadEmbBatchId(const int channelId);
+        void IncrementReadEmbBatchId(const int channelId);
 
     private:
         // 控制通道阻塞的变量
diff --git a/src/ops_tf/hybrid_dataset_ops.cpp b/src/ops_tf/hybrid_dataset_ops.cpp
index 1ef52de1..0b192da5 100644
--- a/src/ops_tf/hybrid_dataset_ops.cpp
+++ b/src/ops_tf/hybrid_dataset_ops.cpp
@@ -214,7 +214,7 @@ namespace MxRec {
                     return;
                 }
             }
-            hybridMgmtBlock->IncreaseReadEmbBatchId(channelId);
+            hybridMgmtBlock->IncrementReadEmbBatchId(channelId);
             const Tensor& inputTensor = context->input(TensorIndex::TENSOR_INDEX_0);
             const auto& splits = context->input(TENSOR_INDEX_1).flat<int32>();
             int fieldNum = 0;
@@ -407,7 +407,7 @@ namespace MxRec {
                     return;
                 }
             }
-            hybridMgmtBlock->IncreaseReadEmbBatchId(channelId);
+            hybridMgmtBlock->IncrementReadEmbBatchId(channelId);
             const Tensor& inputTensor = context->input(TensorIndex::TENSOR_INDEX_0);
             size_t dataSize = inputTensor.NumElements();
 
-- 
Gitee


From 58e4da2d272e67e4026fb0829b576604a18850b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Thu, 20 Jun 2024 15:36:33 +0800
Subject: [PATCH 233/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91DCNv2=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8Bssd=E6=A8=A1=E5=BC=8F=EF=BC=8CError=E6=89=93=E5=B1=8F?=
 =?UTF-8?q?=E6=97=A5=E5=BF=97=E5=B1=8F=E8=94=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 30c41e0c..9ede05c8 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -2044,7 +2044,9 @@ void HybridMgmt::SendRestoreVec(const EmbBaseInfo &info, bool &remainBatchOut)
         info, ProcessedInfo::RESTORE, isEos);
     if (infoVecs == nullptr) {
         remainBatchOut = false;
-        LOG_ERROR("Information vector is nullptr!");
+        if (isRunning) {
+            LOG_ERROR("Information vector is nullptr!");
+        }
         return;
     }
     LOG_DEBUG("table:{}, channelId:{}, batchId:{}, get restore end, getRestoreTC(ms):{}",
-- 
Gitee


From e38bd5da61f159919aff1111242d1962f2dc1a57 Mon Sep 17 00:00:00 2001
From: liangrenhao <francking@189.cn>
Date: Thu, 20 Jun 2024 15:22:45 +0800
Subject: [PATCH 234/302] [FIX]clean code

Signed-off-by: liangrenhao <francking@189.cn>
---
 examples/rec_infer/client.py             | 31 ++++++++++++++++++++----
 examples/rec_infer/client.sh             |  4 +++
 examples/rec_infer/input_config.py       | 17 +++++++++++++
 examples/rec_infer/server.sh             |  4 +++
 tools/graph_partition/gen_config.py      | 17 +++++++++++++
 tools/graph_partition/graph_partition.py | 17 +++++++++++++
 tools/graph_partition/template.cfg       |  2 ++
 7 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/examples/rec_infer/client.py b/examples/rec_infer/client.py
index 62a15882..7c6f1cb1 100644
--- a/examples/rec_infer/client.py
+++ b/examples/rec_infer/client.py
@@ -1,3 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 import os
 import time
 
@@ -51,15 +68,19 @@ class PredictModelGrpc():
         return request, stub
 
 
+FIELD_TYPE = "dtype"
+FIELD_SHAPE = "shape"
+
+
 def gen_inputs():
     inputs = {}
     input_types = {}
     for name in config:
-        input_types[name] = config[name]["dtype"]
-        if config[name]["dtype"] == tf.int32:
-            inputs[name] = np.random.randint(0, 100, size=config[name]["shape"])
-        elif config[name]["dtype"] == tf.float32:
-            inputs[name] = np.random.randint(0, 2, size=config[name]["shape"]) * 1.0
+        input_types[name] = config[name][FIELD_TYPE]
+        if config[name][FIELD_TYPE] == tf.int32:
+            inputs[name] = np.random.randint(0, 100, size=config[name][FIELD_SHAPE])
+        elif config[name][FIELD_TYPE] == tf.float32:
+            inputs[name] = np.random.randint(0, 2, size=config[name][FIELD_SHAPE]) * 1.0
     return inputs, input_types
 
 
diff --git a/examples/rec_infer/client.sh b/examples/rec_infer/client.sh
index fa968858..0d3169c2 100644
--- a/examples/rec_infer/client.sh
+++ b/examples/rec_infer/client.sh
@@ -1,3 +1,7 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+# Description: startup client
+
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 unset http_proxy
 unset https_proxy
diff --git a/examples/rec_infer/input_config.py b/examples/rec_infer/input_config.py
index 24e28f03..8fff6ceb 100644
--- a/examples/rec_infer/input_config.py
+++ b/examples/rec_infer/input_config.py
@@ -1,3 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 import tensorflow as tf
 
 BATCH_SIZE = 9600
diff --git a/examples/rec_infer/server.sh b/examples/rec_infer/server.sh
index 50735b0f..67166f61 100644
--- a/examples/rec_infer/server.sh
+++ b/examples/rec_infer/server.sh
@@ -1,3 +1,7 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+# Description: startup server
+
 taskset -c 0-32 /home/lmp/serving-1.15.0/bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server \
   --model_name=saved_model \
   --model_base_path=$(pwd)/inference_model/saved_model/ \
diff --git a/tools/graph_partition/gen_config.py b/tools/graph_partition/gen_config.py
index 8e80a182..7cd69de3 100644
--- a/tools/graph_partition/gen_config.py
+++ b/tools/graph_partition/gen_config.py
@@ -1,3 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 import argparse
 import os
 
diff --git a/tools/graph_partition/graph_partition.py b/tools/graph_partition/graph_partition.py
index 6e01e6e6..8ebfbfba 100644
--- a/tools/graph_partition/graph_partition.py
+++ b/tools/graph_partition/graph_partition.py
@@ -1,3 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 import tensorflow as tf
 from tensorflow.contrib import graph_editor as ge
 
diff --git a/tools/graph_partition/template.cfg b/tools/graph_partition/template.cfg
index 3227bdea..fef30a9b 100644
--- a/tools/graph_partition/template.cfg
+++ b/tools/graph_partition/template.cfg
@@ -1,3 +1,5 @@
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+
 platform_configs {
     key: "tensorflow"
     value {
-- 
Gitee


From 20580c9d9970f10764ccc27603c4e21a3ddf36c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 20 Jun 2024 20:13:20 +0800
Subject: [PATCH 235/302] =?UTF-8?q?=E3=80=90fix=E3=80=91HBM=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E4=B8=8D=E5=BA=94=E8=AF=A5=E8=B0=83=E7=94=A8embCache-?=
 =?UTF-8?q?>Destroy()?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index c38aa131..8ba1a9cd 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -450,7 +450,7 @@ void HybridMgmt::Destroy()
     // 停止预处理
     KEY_PROCESS_INSTANCE->Destroy();
     // stop embCache, even if the host emb is still allocating
-    embCache->Destroy();
+    if (embCache != nullptr) { embCache->Destroy(); }
     LOG_DEBUG(MGMT + "Destroy hybrid_mgmt module end.");
 }
 
-- 
Gitee


From 1dafba4fd98cc6d818064b65b6f69a006cebbe10 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Thu, 20 Jun 2024 16:22:10 +0800
Subject: [PATCH 236/302] =?UTF-8?q?little=20demo=20estimator=20=E8=B0=83?=
 =?UTF-8?q?=E7=94=A8init=E6=8E=A5=E5=8F=A3=E6=97=B6=E4=BC=A0=E5=85=A5save?=
 =?UTF-8?q?=5Fsteps=E3=80=81max=5Fsteps?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/demo/little_demo_estimator/main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/demo/little_demo_estimator/main.py b/examples/demo/little_demo_estimator/main.py
index a369abe5..716e40d0 100644
--- a/examples/demo/little_demo_estimator/main.py
+++ b/examples/demo/little_demo_estimator/main.py
@@ -214,6 +214,8 @@ if __name__ == '__main__':
     # set init
     init(train_steps=args.train_steps,
          eval_steps=args.eval_steps,
+         save_steps=args.save_checkpoints_steps,
+         max_steps=args.max_steps,
          use_dynamic=use_dynamic,
          use_dynamic_expansion=use_dynamic_expansion)
 
-- 
Gitee


From 3272d73c1dbd47af85220f95a883f7971b21bb28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Fri, 21 Jun 2024 11:42:12 +0800
Subject: [PATCH 237/302] =?UTF-8?q?=E3=80=90fix=E3=80=91Acctr=E4=B8=AD?=
 =?UTF-8?q?=E7=9A=84cleacode=EF=BC=8C=E4=B8=8D=E8=83=BD=E5=B0=81=E8=A3=85?=
 =?UTF-8?q?=E5=AE=89=E5=85=A8=E5=87=BD=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../offset_mapper/mapper_base.h               | 53 +++++++++----------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
index 164daaab..d4e0aaa6 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
@@ -304,25 +304,8 @@ public:
 
         /* allocate buckets for sub-maps */
         for (auto &mSubMap : mSubMaps) {
-            auto tmp = new (std::nothrow) NetHashBucket[bucketCount];
-            if (HM_UNLIKELY(tmp == nullptr)) {
-                FreeSubMaps();
-                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR,
-                    "Failed to new hash bucket, probably out of memory");
-                return false;
-            }
-
-            /* make physical page and set to zero */
-            auto ret = SafeMemset(tmp, 0, sizeof(NetHashBucket) * bucketCount);
-            if (ret != 0) {
-                delete[] tmp;
-                tmp = nullptr;
-                FreeSubMaps();
-                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "memset_s failed... size: " +
-                std::to_string(sizeof(NetHashBucket) * bucketCount) + ", error code:" + std::to_string(ret));
-                return false;
-            }
-
+            NetHashBucket* tmp;
+            if (!NewAndSetBucket(bucketCount, 0, tmp)) { return false;}
             mSubMap = tmp;
         }
 
@@ -697,22 +680,38 @@ private:
     }
 
     /*
-     * Description: SECUREC_MEM_MAX_LEN of memset_s function is 2GB
-     * Parameter: dest - destination address
+     * Description: allocate buckets and init it
+     * Parameter: bucketCount - the bucket counts
      * Parameter: c - the value to be copied
-     * Parameter: count - copies count bytes of value to dest
+     * Parameter: bucketPtr - pointing at the bucket array which is allocated
+     * NOTES: SECUREC_MEM_MAX_LEN of memset_s function is 2GB
      */
-    int SafeMemset(void* dest, int c, size_t count)
+    bool NewAndSetBucket(const uint32_t& bucketCount, const int& c, NetHashBucket* &bucketPtr)
     {
+        bucketPtr = new (std::nothrow) NetHashBucket[bucketCount];
+        if (HM_UNLIKELY(bucketPtr == nullptr)) {
+            FreeSubMaps();
+            ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR,
+                                          "Failed to new hash bucket, probably out of memory");
+            return false;
+        }
+
+        /* make physical page and set to zero */
+        size_t bucketsBytes = sizeof(NetHashBucket) * bucketCount;
         char* destBytePtr = reinterpret_cast<char*>(dest);
-        for (size_t i = 0; i < count; i += MEMSET_S_MAX_SIZE) {
-            size_t bytesOnceSet = (i + MEMSET_S_MAX_SIZE <= count) ? MEMSET_S_MAX_SIZE : (count - i);
+        for (size_t i = 0; i < bucketsBytes; i += MEMSET_S_MAX_SIZE) {
+            size_t bytesOnceSet = (i + MEMSET_S_MAX_SIZE <= bucketsBytes) ? MEMSET_S_MAX_SIZE : (bucketsBytes - i);
             auto ret = memset_s(destBytePtr + i, bytesOnceSet, c, bytesOnceSet);
             if (ret != 0) {
-                return ret;
+                delete[] bucketPtr;
+                bucketPtr = nullptr;
+                FreeSubMaps();
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "memset_s failed... size: " + std::to_string(
+                        bucketsBytes) + ", error code:" + std::to_string(ret));
+                return false;
             }
         }
-        return 0;
+        return true;
     }
 
     void FreeOverFlowedEntries()
-- 
Gitee


From 65195772e61067916f37e12d73afbc0cba53bc41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Fri, 21 Jun 2024 12:02:55 +0800
Subject: [PATCH 238/302] =?UTF-8?q?=E3=80=90fix=E3=80=91Acctr=E4=B8=AD?=
 =?UTF-8?q?=E7=9A=84cleacode=EF=BC=8C=E4=B8=8D=E8=83=BD=E5=B0=81=E8=A3=85?=
 =?UTF-8?q?=E5=AE=89=E5=85=A8=E5=87=BD=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
index d4e0aaa6..78729be3 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
@@ -698,7 +698,7 @@ private:
 
         /* make physical page and set to zero */
         size_t bucketsBytes = sizeof(NetHashBucket) * bucketCount;
-        char* destBytePtr = reinterpret_cast<char*>(dest);
+        char* destBytePtr = reinterpret_cast<char*>(bucketPtr);
         for (size_t i = 0; i < bucketsBytes; i += MEMSET_S_MAX_SIZE) {
             size_t bytesOnceSet = (i + MEMSET_S_MAX_SIZE <= bucketsBytes) ? MEMSET_S_MAX_SIZE : (bucketsBytes - i);
             auto ret = memset_s(destBytePtr + i, bytesOnceSet, c, bytesOnceSet);
-- 
Gitee


From 7957f2c72c5fcc353c49ee53b5e2aa4fb8b22df8 Mon Sep 17 00:00:00 2001
From: yangzhen_BIG <yangzhen92a@163.com>
Date: Fri, 21 Jun 2024 08:03:01 +0000
Subject: [PATCH 239/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=EF=BC=88=E4=BF=9D?=
 =?UTF-8?q?=E5=AD=98=E4=B8=8E=E5=8A=A0=E8=BD=BD=EF=BC=89=EF=BC=9Aestimator?=
 =?UTF-8?q?=E6=A8=A1=E5=BC=8Fhost=E4=BE=A7=E9=81=BF=E5=85=8D=E9=87=8D?=
 =?UTF-8?q?=E5=A4=8D=E4=BF=9D=E5=AD=98=E5=8A=A0=E8=BD=BD=EF=BC=9B=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D=E5=8D=95=E6=AC=A1=E5=86=99=E8=B6=85=E7=B3=BB=E7=BB=9F?=
 =?UTF-8?q?=E4=B8=8A=E9=99=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/emb_table/embedding_ddr.cpp          | 11 ++++--
 .../local_file_system/local_file_system.cpp   | 23 ++++++++++-
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          | 39 +++++++++----------
 src/core/hybrid_mgmt/hybrid_mgmt.h            |  4 +-
 src/core/ssd_engine/ssd_engine.cpp            | 14 +++++++
 src/core/ssd_engine/ssd_engine.h              |  3 ++
 src/core/utils/common.cpp                     | 20 ++++++++++
 src/core/utils/common.h                       |  2 +
 8 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 167894e5..ca706c73 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -187,12 +187,15 @@ void EmbeddingDDR::LoadOptimizerSlot(const string &savePath, vector<vector<float
 
 void EmbeddingDDR::Save(const string& savePath)
 {
-    SyncLatestEmbedding();
-
     vector<emb_cache_key_t> keys;
     vector<vector<float>> embeddings;
     vector<vector<float>> optimizerSlots;
-    embCache->GetEmbTableInfos(name, keys, embeddings, optimizerSlots);
+
+    auto step = GetStepFromPath(savePath);
+    if (step > 0) {
+        SyncLatestEmbedding();
+        embCache->GetEmbTableInfos(name, keys, embeddings, optimizerSlots);
+    }
 
     SaveKey(savePath, keys);
     SaveEmbedding(savePath, embeddings);
@@ -291,7 +294,7 @@ void EmbeddingDDR::SaveEmbedding(const string& savePath, vector<vector<float>>&
     ssize_t writeBytesNum = fileSystemPtr_->Write(ss.str(), embeddings, embSize_);
     ssize_t expectWriteBytes = embeddings.size() * embSize_ * sizeof(float);
     if (writeBytesNum != expectWriteBytes) {
-        string errMsg = StringFormat("save embedding failed, write expect:%d, actual:%d, path:%s",
+        string errMsg = StringFormat("Save embedding failed, write expect:%ld, actual:%ld, path:%s .",
                                      expectWriteBytes, writeBytesNum, savePath.c_str());
         throw runtime_error(errMsg);
     }
diff --git a/src/core/file_system/local_file_system/local_file_system.cpp b/src/core/file_system/local_file_system/local_file_system.cpp
index e9ddb8a4..b0b5c76a 100644
--- a/src/core/file_system/local_file_system/local_file_system.cpp
+++ b/src/core/file_system/local_file_system/local_file_system.cpp
@@ -124,8 +124,27 @@ ssize_t LocalFileSystem::Write(const string& filePath, vector<vector<float>>& fi
         flattenContent.insert(flattenContent.cend(), vec.cbegin(), vec.cend());
     }
 
-    ssize_t writeBytesNum =
-        write(fd, reinterpret_cast<const char*>(flattenContent.data()), flattenContent.size() * sizeof(float));
+    size_t writeBytesRemain = flattenContent.size() * sizeof(float);
+    size_t writeSize = 0;
+    size_t idx = 0;
+    ssize_t writeBytesNum = 0;
+    auto dumpPtr = reinterpret_cast<const char*>(flattenContent.data());
+
+    while (writeBytesRemain != 0) {
+        if (writeBytesRemain > oneTimeReadWriteLen) {
+            writeSize = oneTimeReadWriteLen;
+        } else {
+            writeSize = writeBytesRemain;
+        }
+        ssize_t res = write(fd, dumpPtr + idx, writeSize);
+        if (res == -1) {
+            close(fd);
+            return res;
+        }
+        writeBytesRemain -= res;
+        idx += res;
+        writeBytesNum += res;
+    }
 
     close(fd);
 
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 8155f1ec..fda54d9d 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -206,6 +206,12 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
         throw runtime_error("HybridMgmt not initialized. Call Initialize first.");
     }
 
+    if (mgmtRankInfo.isDDR && IsTrainAndEvalCase()) {
+        LOG_INFO("estimator train and eval case, skip loading, "
+                 "host will reuse data in memory while evaluating since is's same as saved data");
+        return true;
+    }
+
     // 数据处理线程上锁
     KEY_PROCESS_INSTANCE->LoadSaveLock();
 
@@ -821,27 +827,6 @@ void HybridMgmt::EvictL3StorageKeys(const string& embName, const vector<emb_cach
     cacheManager->EvictL3StorageEmbedding(embName, keys);
 }
 
-int HybridMgmt::GetStepFromPath(const string& loadPath) const
-{
-    regex pattern(SAVE_SPARSE_PATH_PREFIX + "-.*-(\\d+)");
-    smatch match;
-    if (regex_search(loadPath, match, pattern)) {
-        int res = 0;
-        unsigned int minSize = 2;
-        if (match.size() < minSize) {
-            return res;
-        }
-        try {
-            res = stoi(match[1]);
-        } catch (const std::invalid_argument& e) {
-            LOG_ERROR(e.what());
-        } catch (const std::out_of_range& e) {
-            LOG_ERROR(e.what());
-        }
-        return res;
-    }
-    return 0;
-}
 
 /// 通过pyBind在python侧调用，通知hybridMgmt上层即将进行图的执行，需要进行唤醒
 /// \param channelID 通道id
@@ -2233,3 +2218,15 @@ void HybridMgmt::EnqueueSwapInfo(const EmbBaseInfo &info,
 
     CheckLookupAddrSuccessDDR();
 }
+
+bool HybridMgmt::IsTrainAndEvalCase()
+{
+    bool isChannelSwitchCase = false;
+    for (auto& i: mgmtEmbInfo) {
+        if (specialProcessStatus[i.name] == ProcessStatus::AFTER_SWITCH_FIRST_BATCH) {
+            isChannelSwitchCase = true;
+            break;
+        }
+    }
+    return alreadyTrainOnce && isChannelSwitchCase;
+}
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index 4fd2b541..83299da3 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -190,8 +190,6 @@ namespace MxRec {
 
         void EvictL3StorageKeys(const string& embName, const vector<emb_cache_key_t>& keys) const;
 
-        int GetStepFromPath(const string& loadPath) const;
-
         void LookUpAddrs(const string &embName, int extEmbeddingSize);
 
         void LookUpSwapAddrs(const std::string &embName, const std::string &swapStr);
@@ -323,6 +321,8 @@ namespace MxRec {
         void EnqueueSwapInfo(const EmbBaseInfo& info,
                              std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
                              std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+
+        bool IsTrainAndEvalCase();
     };
 }
 #endif // MX_REC_EMB_MGMT_H
diff --git a/src/core/ssd_engine/ssd_engine.cpp b/src/core/ssd_engine/ssd_engine.cpp
index e50ad43c..3f0b3a1c 100644
--- a/src/core/ssd_engine/ssd_engine.cpp
+++ b/src/core/ssd_engine/ssd_engine.cpp
@@ -103,9 +103,16 @@ void SSDEngine::Save(int step)
     if (!isRunning) {
         throw runtime_error("SSDEngine not running");
     }
+
+    if (step == loadStep) {
+        LOG_INFO("save step equal to load step, skip saving, step:{}", step);
+        return;
+    }
+
     for (auto item: as_const(tableMap)) {
         item.second->Save(step);
     }
+    saveStep = step;
 }
 
 void SSDEngine::Load(const string &tableName, vector<string> savePaths, uint64_t maxTableSize, int step)
@@ -113,12 +120,19 @@ void SSDEngine::Load(const string &tableName, vector<string> savePaths, uint64_t
     if (!isRunning) {
         throw runtime_error("SSDEngine not running");
     }
+
+    if (step == saveStep) {
+        LOG_INFO("load step equal to save step, skip loading, step:{}", step);
+        return;
+    }
+
     auto it = as_const(tableMap).find(tableName);
     if (it != tableMap.end()) {
         throw invalid_argument("table already exist");
     }
 
     tableMap[tableName] = make_shared<Table>(tableName, savePaths, maxTableSize, compactThreshold, step);
+    loadStep = step;
 }
 
 void SSDEngine::Start()
diff --git a/src/core/ssd_engine/ssd_engine.h b/src/core/ssd_engine/ssd_engine.h
index 40b65843..942318c4 100644
--- a/src/core/ssd_engine/ssd_engine.h
+++ b/src/core/ssd_engine/ssd_engine.h
@@ -74,6 +74,9 @@ namespace MxRec {
         shared_ptr<thread> compactThread = nullptr;
 
         void CompactMonitor();
+
+        int loadStep = -1;
+        int saveStep = -1;
     };
 }
 
diff --git a/src/core/utils/common.cpp b/src/core/utils/common.cpp
index 32e32827..1b3edcfd 100644
--- a/src/core/utils/common.cpp
+++ b/src/core/utils/common.cpp
@@ -20,6 +20,7 @@ See the License for the specific language governing permissions and
 #include <stdexcept>
 #include <experimental/filesystem>
 #include <unistd.h>
+#include <regex>
 
 #include <mpi.h>
 
@@ -166,4 +167,23 @@ namespace MxRec {
         return ss;
     }
 
+    int GetStepFromPath(const string& loadPath)
+    {
+        regex pattern(SAVE_SPARSE_PATH_PREFIX + "-.*-(\\d+)");
+        smatch match;
+        if (!regex_search(loadPath, match, pattern)) {
+            return 0;
+        }
+        int res = 0;
+        unsigned int minSize = 2;
+        if (match.size() < minSize) {
+            return res;
+        }
+        try {
+            res = stoi(match[1]);
+        } catch (const std::invalid_argument& e) {
+            LOG_ERROR(e.what());
+        }
+        return res;
+    }
 } // end namespace MxRec
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 26aad3fe..9a39e7ac 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -610,6 +610,8 @@ namespace MxRec {
 
     ostream& operator<<(ostream& ss, MxRec::CkptDataType type);
     bool CheckFilePermission(const string& filePath);
+
+    int GetStepFromPath(const string& loadPath);
 } // end namespace MxRec
 
 #define KEY_PROCESS "\033[45m[KeyProcess]\033[0m "
-- 
Gitee


From d3a463873ad1777b322627529cc89a9df720757e Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 24 Jun 2024 04:12:47 +0000
Subject: [PATCH 240/302] fix: add compatibility with old cann versions

Signed-off-by: steepcurve <steepcurve@163.com>
---
 .../op_kernel/embedding_lookup_by_address.cpp               | 4 ++++
 .../op_kernel/embedding_update_by_address.cpp               | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
index e198b6c0..f6a1e656 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_lookup_by_address.cpp
@@ -44,9 +44,11 @@ public:
     pipe.InitBuffer(inQueue, pingpongNum, veclen);
     pipe.InitBuffer(outQueue, pingpongNum, veclen);
 
+#ifdef L2_CACHE_HINT
     // set `GlobalTensor` cache mode explicitly
     srcAddrGlobal.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
     dstDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+#endif
 
     // get start index for current core, core parallel block_indx block_dim，即使是最后一个核也应该多初始化一些，并对齐4的倍数
     srcAddrGlobal.SetGlobalBuffer((__gm__ int64_t *)(address + block_idx * singleCoreAddrLen), needComputeAddrLen);
@@ -115,7 +117,9 @@ private:
             int64_t address = srcAddrLocal.GetValue(i);
 
             if (address != 0) {
+#ifdef L2_CACHE_HINT
                 srcDataBufferGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+#endif
                 srcDataBufferGm.SetGlobalBuffer((__gm__ T *)(address), embDimAligned);
                 DataCopy(dataLocal[embDimAligned * nums], srcDataBufferGm, embDimAligned);
             } else {
diff --git a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
index 5d496ee8..50abf83c 100644
--- a/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
+++ b/cust_op/cust_op_by_addr/op_kernel/embedding_update_by_address.cpp
@@ -40,10 +40,12 @@ public:
     pipe.InitBuffer(inQueue, pingpongNum, veclen);
     pipe.InitBuffer(outQueue, pingpongNum, veclen);
 
+#ifdef L2_CACHE_HINT
     // set `GlobalTensor` cache mode explicitly
     srcAddrGlobal.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
     srcDataBufferGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
     outDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+#endif
 
     // get start index for current core, core parallel block_indx block_dim
     srcAddrGlobal.SetGlobalBuffer((__gm__ int64_t *)(address + block_idx * singleCoreAddrLen));
@@ -117,7 +119,9 @@ private:
             for (int i = 0; i < addrNum; i++) {
                 address = srcAddrLocal.GetValue(i);
                 if (address != 0) {
+#ifdef L2_CACHE_HINT
                     dstDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+#endif
                     dstDataGm.SetGlobalBuffer((__gm__ T*)(address));
                     DataCopy(dstDataGm, dstLocal[i * inputDimAligned], inputDimAligned);
                 }
@@ -156,7 +160,9 @@ private:
         LocalTensor<T> dstLocal = outQueue.DeQue<T>();
 
         if (address != 0) {
+#ifdef L2_CACHE_HINT
             dstDataGm.SetL2CacheHint(CacheMode::CACHE_MODE_NORMAL);
+#endif
             dstDataGm.SetGlobalBuffer((__gm__ T *)(address));
 
             if (updateType == 0) {
-- 
Gitee


From d304c4b5422e9367410ab1dbd6cff659c852bb9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 24 Jun 2024 19:19:39 +0800
Subject: [PATCH 241/302] =?UTF-8?q?=E3=80=90fix=E3=80=91Acctr=E4=B8=AD?=
 =?UTF-8?q?=E7=9A=84cleacode=EF=BC=8C=E4=B8=8D=E8=83=BD=E5=B0=81=E8=A3=85?=
 =?UTF-8?q?=E5=AE=89=E5=85=A8=E5=87=BD=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
index 78729be3..42d62ca4 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/mapper_base.h
@@ -706,8 +706,8 @@ private:
                 delete[] bucketPtr;
                 bucketPtr = nullptr;
                 FreeSubMaps();
-                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "memset_s failed... size: " + std::to_string(
-                        bucketsBytes) + ", error code:" + std::to_string(ret));
+                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR,
+                    "memset_s failed... size: " + std::to_string(bucketsBytes) + ", error code:" + std::to_string(ret));
                 return false;
             }
         }
-- 
Gitee


From 277e413ffca6e714cb12acacd997aef153e2623e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 24 Jun 2024 19:56:58 +0800
Subject: [PATCH 242/302] =?UTF-8?q?=E3=80=90fix=E3=80=91=E3=80=90=E5=8A=A8?=
 =?UTF-8?q?=E6=80=81=E6=89=A9=E5=AE=B9=E3=80=91=E5=88=A0=E9=99=A4=E6=89=A9?=
 =?UTF-8?q?=E5=AE=B9=E7=9A=84=E5=86=97=E4=BD=99=E4=BB=A3=E7=A0=81=EF=BC=8C?=
 =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=89=A9=E5=AE=B9=E9=A2=9D=E5=A4=96=E7=94=B3?=
 =?UTF-8?q?=E8=AF=B7=E7=A9=BA=E9=97=B4=E7=9A=84=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/emb_table/emb_table.cpp       | 163 -------------------------
 src/core/emb_table/emb_table.h         |  93 --------------
 src/core/key_process/key_process.cpp   |  39 ------
 src/core/key_process/key_process.h     |   4 -
 src/tests/emb_table/emb_table_test.cpp | 135 --------------------
 5 files changed, 434 deletions(-)
 delete mode 100644 src/core/emb_table/emb_table.cpp
 delete mode 100644 src/core/emb_table/emb_table.h
 delete mode 100644 src/tests/emb_table/emb_table_test.cpp

diff --git a/src/core/emb_table/emb_table.cpp b/src/core/emb_table/emb_table.cpp
deleted file mode 100644
index 914cf535..00000000
--- a/src/core/emb_table/emb_table.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#include <list>
-#include <stdexcept>
-#include <random>
-#include <acl/acl_rt.h>
-#include "acl/acl_base.h"
-#include "utils/common.h"
-#include "initializer/initializer.h"
-#include "emb_table/emb_table.h"
-
-
-using namespace std;
-using namespace MxRec;
-using namespace tensorflow;
-
-void EmbTable::Init(const EmbInfo& eInfo, const RankInfo& rInfo, int initSeed)
-{
-#ifndef GTEST
-    this->rankInfo = rInfo;
-    this->seed = initSeed;
-    this->embInfo = eInfo;
-    LOG_INFO("EmbTable init, deviceID {}, embSize {} running", rInfo.deviceId, embInfo.extEmbeddingSize);
-    // 计算embedding table需要分配的内存块数
-    auto ret = aclrtSetDevice(static_cast<int32_t>(rInfo.deviceId));
-    if (ret != ACL_ERROR_NONE) {
-        LOG_ERROR("Set device failed, device_id:{}, ret={}", rInfo.deviceId, ret);
-        throw AclError();
-    }
-    embSize = embInfo.extEmbeddingSize;
-    blockSize = BLOCK_EMB_COUNT * embSize;
-    for (int i = 0; i < INIT_BLOCK_COUNT; ++i) {
-        // 申请新的内存块
-        void *newBlock = nullptr;
-        aclError ec = aclrtMalloc(&newBlock, blockSize * sizeof(float), ACL_MEM_MALLOC_HUGE_FIRST);
-        if (ec != ACL_SUCCESS) {
-            LOG_ERROR("aclrtMalloc failed, ret={}", ec);
-            throw AclError();
-        }
-        // 申请内存初始化
-        RandomInit(newBlock);
-        // 将新的内存块加入内存链表
-        memoryList.push_back(newBlock);
-        SplitMemoryBlock(newBlock);
-    }
-    totalCapacity = static_cast<int>(memoryList.size()) * BLOCK_EMB_COUNT;
-    LOG_INFO("aclrtMalloc success, emb name:{}, total capacity:{}", embInfo.name, totalCapacity);
-#endif
-}
-
-EmbTable::~EmbTable()
-{
-#ifndef GTEST
-    for (void *block : memoryList) {
-        // 释放内存块
-        aclError ret = aclrtFree(block);
-        if (ret != ACL_SUCCESS) {
-            LOG_ERROR("aclrtFree failed, ret={}", ret);
-        }
-        block = nullptr;
-    }
-#endif
-}
-
-// 从embeddingList获取一个可用的emb地址
-int64_t EmbTable::GetEmbAddress()
-{
-    int64_t ret = -1;
-#ifndef GTEST
-    if (embeddingList.empty()) {
-        PrintStatus();
-        LOG_DEBUG("GetEmbAddress, embedding_list size: empty! Add block!");
-        void *addBlock = nullptr;
-        aclError ret = aclrtMalloc(&addBlock, blockSize * sizeof(float), ACL_MEM_MALLOC_HUGE_FIRST);
-        if (ret != ACL_SUCCESS) {
-            LOG_ERROR("aclrtMalloc failed, ret={}", ret);
-            throw AclError();
-        }
-        RandomInit(addBlock);
-        // 将新的内存块加入内存list
-        memoryList.push_back(addBlock);
-        SplitMemoryBlock(addBlock);
-        totalCapacity += BLOCK_EMB_COUNT;
-    }
-    float *embAddr = embeddingList.front();
-    embeddingList.pop_front();
-    usedCapacity++;
-    ret = reinterpret_cast<int64_t>(embAddr);
-#endif
-    return ret;
-}
-
-void EmbTable::RandomInit(void* newBlock)
-{
-#ifndef GTEST
-    LOG_INFO("Device GenerateEmbData Start, seed:{}, initializer num: {}", seed, embInfo.initializeInfos.size());
-    vector<float> devEmb(blockSize);
-    for (const auto& initializeInfo: as_const(embInfo.initializeInfos)) {
-        LOG_INFO("Device GenerateEmbData ing. name {}", initializeInfo.name.c_str());
-        for (int i = 0; i < BLOCK_EMB_COUNT; i++) {
-            initializeInfo.initializer->GenerateData(&devEmb[i * embSize], embSize);
-        }
-    }
-    LOG_INFO("Device GenerateEmbData End, seed:{}", seed);
-    ExecuteAclMemcpy(newBlock, devEmb);
-#endif
-}
-
-void EmbTable::ExecuteAclMemcpy(void* newBlock, vector<float> devEmb) const
-{
-#ifndef GTEST
-    aclError ret = aclrtMemcpy(
-        newBlock, blockSize * sizeof(float), devEmb.data(), blockSize * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE);
-    if (ret != ACL_SUCCESS) {
-        LOG_ERROR("aclrtMemcpy failed, ret={}", ret);
-        throw AclError();
-    }
-#endif
-}
-
-
-void EmbTable::SplitMemoryBlock(void *newBlock)
-{
-#ifndef GTEST
-    if (embSize == 0) {
-        throw std::runtime_error("SplitMemoryBlock by embSize=0!");
-    }
-    for (int i = 0; i < BLOCK_EMB_COUNT; i++) {
-        float *embPtr = static_cast<float*>(newBlock) + i * embSize;
-        embeddingList.push_back(embPtr);
-    }
-#endif
-}
-
-void EmbTable::PrintStatus() const
-{
-    // 输出embedding table的总容量和未使用的使用容量
-    LOG_INFO("Total capacity:{}, Unused capacity:{}",
-        totalCapacity * embSize, totalCapacity * embSize - usedCapacity * embSize);
-}
-
-int64_t EmbTable::GetTableSize() const
-{
-    return static_cast<int64_t>(usedCapacity);
-}
-
-int64_t EmbTable::GetTableCapacity() const
-{
-    return static_cast<int64_t>(totalCapacity);
-}
diff --git a/src/core/emb_table/emb_table.h b/src/core/emb_table/emb_table.h
deleted file mode 100644
index 2d30818c..00000000
--- a/src/core/emb_table/emb_table.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#ifndef MX_REC_EMB_TABLE_H
-#define MX_REC_EMB_TABLE_H
-
-#include <iostream>
-#include <list>
-#include <stdexcept>
-#include <bits/stdc++.h>
-
-#include "utils/common.h"
-
-namespace MxRec {
-
-    using namespace std;
-
-    class EmbTable {
-    public:
-        EmbTable() = default;
-
-        void Init(const EmbInfo& eInfo, const RankInfo& rInfo, int initSeed = 0);
-
-        ~EmbTable();
-
-        // 从embeddingList获取获取一个可用的emb地址
-        int64_t GetEmbAddress();
-
-        // 打印emb表使用情况
-        void PrintStatus() const;
-
-        int64_t GetTableSize() const;
-
-        int64_t GetTableCapacity() const;
-
-        EmbTable(const EmbTable&) = delete;
-
-        EmbTable(EmbTable&&) = delete;
-
-        EmbTable& operator=(const EmbTable&) = delete;
-
-        EmbTable& operator=(EmbTable&&) = delete;
-
-        void ExecuteAclMemcpy(void* newBlock, vector<float> devEmb) const;
-
-    GTEST_PRIVATE:
-        constexpr static int BLOCK_EMB_COUNT = 100000;
-        constexpr static int INIT_BLOCK_COUNT = 5;
-        constexpr static int TEST_EMB_SIZE = 12;
-        EmbInfo embInfo;
-        RankInfo rankInfo;
-        size_t blockSize = 1;
-        int embSize = 1;
-        size_t totalCapacity = 1;
-        size_t usedCapacity = 0;
-        int seed = 0;
-        // embedding地址的列表
-        list<float*> embeddingList;
-        // 内存块列表
-        vector<void*> memoryList;
-
-        void RandomInit(void* newBlock);
-
-        // embSize由embInfo得出
-        void SplitMemoryBlock(void* newBlock);
-
-        // 内部类，抛出内存不足异常
-        class OutOfMemoryError : public runtime_error {
-        public:
-            OutOfMemoryError() : runtime_error("Out of memory!") {}
-        };
-
-        // 内部类，抛出acl异常
-        class AclError : public runtime_error {
-        public:
-            AclError() : runtime_error("Acl failed!") {}
-        };
-    };
-}
-
-#endif // MX_REC_EMB_TABLE_MANAGER_H
\ No newline at end of file
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index b5dc962e..74dfafa5 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -57,11 +57,6 @@ bool KeyProcess::Initialize(const RankInfo& rInfo, const vector<EmbInfo>& eInfos
         embInfos[info.name] = info;
         scInfo[info.name] = info.sendCount;
         InitHotEmbTotCount(info, rInfo);
-        if (rankInfo.useDynamicExpansion) {
-            // 动态扩容
-            embeddingTableMap[info.name].Init(info, rInfo, seed);
-            LOG_INFO(KEY_PROCESS "EmbeddingTableMap：{} init success", info.name);
-        }
     }
 
     LOG_INFO(KEY_PROCESS "hot emb count info:{}", MapToString(hotEmbTotCount));
@@ -1114,40 +1109,6 @@ void KeyProcess::Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channe
         embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
 }
 
-void KeyProcess::Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel)
-{
-    TimeCost key2OffsetTC;
-    EASY_FUNCTION(profiler::colors::Blue600)
-    std::lock_guard<std::mutex> lk(mut); // lock for PROCESS_THREAD
-    auto& key2Offset = keyOffsetMap[embName];
-    auto& maxOffsetTmp  = maxOffset[embName];
-    auto& curEmbTable = embeddingTableMap[embName]; // empty when not use dynamic expansion
-    for (long& key : splitKey) {
-        if (key == -1) {
-            key = 0;
-            continue;
-        }
-        const auto& iter = key2Offset.find(key);
-        if (iter != key2Offset.end()) {
-            key = iter->second;
-        } else {
-            // 新值
-            if (channel == TRAIN_CHANNEL_ID) {
-#ifndef GTEST
-                int64_t addr = curEmbTable.GetEmbAddress();
-                key2Offset[key] = addr;
-                key = addr;
-#endif
-                maxOffsetTmp++;
-                continue;
-            }
-            key = 0;
-        }
-    }
-    LOG_DEBUG("current expansion emb:{}, usage:{}/{}, key2OffsetTC({} ms)",
-        embName, maxOffsetTmp, embInfos[embName].devVocabSize, key2OffsetTC.ElapsedMS());
-}
-
 /*
  * 构建恢复向量，以便从去重后的emb向量/key恢复回batch对应的emb向量
  * 输入接收到emb块的偏移blockOffset，batch内每个key在块内的偏移restoreVec
diff --git a/src/core/key_process/key_process.h b/src/core/key_process/key_process.h
index 589fc2a5..82a3205b 100644
--- a/src/core/key_process/key_process.h
+++ b/src/core/key_process/key_process.h
@@ -28,7 +28,6 @@ See the License for the specific language governing permissions and
 #include "ock_ctr_common/include/factory.h"
 
 #include "utils/common.h"
-#include "emb_table/emb_table.h"
 #include "feature_admit_and_evict.h"
 #include "hybrid_mgmt/hybrid_mgmt_block.h"
 #include "utils/singleton.h"
@@ -196,7 +195,6 @@ namespace MxRec {
         map<EmbNameT, std::vector<size_t>> evictPosMap {};
         map<EmbNameT, absl::flat_hash_map<emb_key_t, int>> hotKey {};
         map<EmbNameT, int> hotEmbTotCount;
-        map<EmbNameT, EmbTable> embeddingTableMap {};
         ock::ctr::FactoryPtr factory {};
         int hotEmbUpdateStep = HOT_EMB_UPDATE_STEP_DEFAULT;
         bool isWithFAAE;
@@ -251,8 +249,6 @@ namespace MxRec {
 
         void Key2Offset(const EmbNameT& embName, KeysT& splitKey, int channel);
 
-        void Key2OffsetDynamicExpansion(const EmbNameT& embName, KeysT& splitKey, int channel);
-
         unique_ptr<EmbBatchT> GetBatchData(int channel, int commId) const;
 
         void BuildRestoreVec(const unique_ptr<EmbBatchT>& batch, const vector<int>& blockOffset,
diff --git a/src/tests/emb_table/emb_table_test.cpp b/src/tests/emb_table/emb_table_test.cpp
deleted file mode 100644
index b26b4487..00000000
--- a/src/tests/emb_table/emb_table_test.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-        limitations under the License.
-==============================================================================*/
-
-#include <random>
-#include <gtest/gtest.h>
-#include <gmock/gmock.h>
-#include <easy/profiler.h>
-#include <acl/acl.h>
-#include <acl/acl_rt.h>
-#include <limits>
-#include "utils/common.h"
-#include "emb_table/emb_table.h"
-
-using namespace std;
-using namespace MxRec;
-using namespace testing;
-using namespace tensorflow;
-
-class EmbTableTest : public testing::Test {
-protected:
-    void SetUp()
-    {
-        // 设置测试用的EmbInfo
-        embInfo.extEmbeddingSize = embTable.TEST_EMB_SIZE;
-        LOG_INFO("EmbTable BLOCK_EMB_COUNT {} INIT_BLOCK_COUNT {}",
-            embTable.BLOCK_EMB_COUNT, embTable.INIT_BLOCK_COUNT);
-        rankInfo.rankId = 0;
-        rankInfo.rankSize = 1;
-        rankInfo.localRankSize = 1;
-        rankInfo.useStatic = true;
-        rankInfo.localRankId = 0;
-        rankInfo.isDDR = true;
-        rankInfo.ctrlSteps = { 1, -1 };
-        rankInfo.deviceId = 0;
-        // 初始化EmbeddingTable
-#ifndef GTEST
-        LOG_INFO("rank {} running", rankInfo.deviceId);
-        aclInit(nullptr);
-#endif
-    }
-
-    EmbTable embTable;
-    EmbInfo embInfo;
-    RankInfo rankInfo;
-    aclrtContext  context;
-
-    void TearDown() {
-    }
-};
-
-// 测试初始化是否正常
-TEST_F(EmbTableTest, Init)
-{
-#ifndef GTEST
-    // 测试初始化是否出现异常
-    EXPECT_NO_THROW(embTable.Init(embInfo, rankInfo, 0));
-    LOG_INFO("embTable Init succeed!");
-    ASSERT_EQ(embTable.rankInfo.g_rankId, rankInfo.g_rankId);
-    ASSERT_EQ(embTable.rankInfo.rankSize, rankInfo.rankSize);
-    ASSERT_EQ(embTable.rankInfo.localRankSize, rankInfo.localRankSize);
-    ASSERT_EQ(embTable.rankInfo.useStatic, rankInfo.useStatic);
-    ASSERT_EQ(embTable.rankInfo.localRankId, rankInfo.localRankId);
-    // 测试容量是否正常
-    LOG_INFO("totalCapacity {}, INIT_BLOCK_COUNT {}", embTable.totalCapacity, embTable.INIT_BLOCK_COUNT);
-    EXPECT_EQ(embTable.totalCapacity, embTable.INIT_BLOCK_COUNT * embTable.BLOCK_EMB_COUNT);
-#endif
-}
-
-// 测试embedding list为空时的情况
-TEST_F(EmbTableTest, GetEmbAddressEmptyList)
-{
-#ifndef GTEST
-    embTable.Init(embInfo, rankInfo, 0);
-    while (!embTable.embeddingList.empty()) {
-        float *embAddr = reinterpret_cast<float*>(embTable.GetEmbAddress());
-        EXPECT_NE(embAddr, nullptr);
-    }
-    ASSERT_EQ(embTable.embeddingList.size(), 0);
-
-    float *curAddr = nullptr;
-    int usedCapacityBefore = embTable.usedCapacity;
-    ASSERT_NO_THROW({
-        curAddr= reinterpret_cast<float*>(embTable.GetEmbAddress());
-    });
-    EXPECT_NE(curAddr, nullptr);
-    EXPECT_EQ(embTable.usedCapacity, usedCapacityBefore + 1);
-#endif
-}
-
-// 测试正常情况
-TEST_F(EmbTableTest, GetEmbAddressNormal)
-{
-#ifndef GTEST
-    embTable.Init(embInfo, rankInfo, 0);
-    ASSERT_EQ(embTable.totalCapacity, embTable.INIT_BLOCK_COUNT);
-    float *curAddr = nullptr;
-    int totalCapacityBefore = embTable.totalCapacity;
-    int usedCapacityBefore = embTable.usedCapacity;
-    ASSERT_NO_THROW({
-        curAddr = reinterpret_cast<float*>(embTable.GetEmbAddress());
-    });
-    EXPECT_NE(curAddr, nullptr);
-    EXPECT_EQ(embTable.totalCapacity, totalCapacityBefore);
-    EXPECT_EQ(embTable.usedCapacity, usedCapacityBefore + 1);
-#endif
-}
-
-// 测试将一个emb地址放入embeddingList中,是否成功
-TEST_F(EmbTableTest, PutEmbAddress)
-{
-#ifndef GTEST
-    embTable.Init(embInfo, rankInfo, 0);
-    int64_t curAddr;
-    int usedCapacityBefore = embTable.usedCapacity;
-    ASSERT_NO_THROW({
-        curAddr = embTable.GetEmbAddress();
-    });
-    EXPECT_EQ(embTable.usedCapacity, usedCapacityBefore + 1);
-    embTable.PutEmbAddress(curAddr);
-    EXPECT_EQ(embTable.usedCapacity, usedCapacityBefore);
-    EXPECT_EQ(curAddr, reinterpret_cast<int64_t>(embTable.embeddingList.back()));
-#endif
-}
-- 
Gitee


From 3b9fbb550f6ca5b78f3e6adfbe4220ea98c7afb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 24 Jun 2024 20:24:09 +0800
Subject: [PATCH 243/302] =?UTF-8?q?=E3=80=90fix=E3=80=91capacity=E6=8E=A5?=
 =?UTF-8?q?=E5=8F=A3=E9=80=82=E9=85=8D=E6=96=B0ddr=E3=80=81ssd?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/core/emb/sparse_embedding.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mx_rec/core/emb/sparse_embedding.py b/mx_rec/core/emb/sparse_embedding.py
index 071f4506..39af9d60 100644
--- a/mx_rec/core/emb/sparse_embedding.py
+++ b/mx_rec/core/emb/sparse_embedding.py
@@ -77,9 +77,9 @@ class ExternalStorageSparseEmbedding(SparseEmbedding):
     def capacity(self) -> int:
         # DDR
         if not self._ssd_vocabulary_size:
-            return self._device_vocabulary_size + self._host_vocabulary_size
+            return self._host_vocabulary_size
         # SSD
-        return self._device_vocabulary_size + self._host_vocabulary_size + self._ssd_vocabulary_size
+        return self._host_vocabulary_size + self._ssd_vocabulary_size
 
 
 def _set_specific_value_for_non_valid_key(id_offsets: Optional[tf.Tensor],
-- 
Gitee


From d54007682b22976f72a62049888e60d6335cf123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 26 Jun 2024 17:19:26 +0800
Subject: [PATCH 244/302] =?UTF-8?q?=E3=80=90fix=E3=80=91=E5=A2=9E=E5=8A=A0?=
 =?UTF-8?q?=E5=BC=82=E5=B8=B8=E6=83=85=E5=86=B5=E4=B8=8B=E7=9A=84=E6=97=A5?=
 =?UTF-8?q?=E5=BF=97=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/validator/emb_validator.py                         | 8 ++++----
 .../src/embedding_cache/cache_manager/cache_manager.cpp   | 4 +++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/mx_rec/validator/emb_validator.py b/mx_rec/validator/emb_validator.py
index 0c7d7e81..e4417b6d 100644
--- a/mx_rec/validator/emb_validator.py
+++ b/mx_rec/validator/emb_validator.py
@@ -78,14 +78,14 @@ def check_emb_lookup_params(table_params: dict, feature_spec: Union[tf.Tensor, F
     if slice_device_vocabulary_size < send_count * rank_size:
         raise ValueError(f"Given device_vocabulary_size was too small for table '{table_name}', "
                          f"in which slice_device_vocabulary_size was {slice_device_vocabulary_size} "
-                         f"and send_count({send_count}) * rank_size({rank_size}) was "
-                         f"{send_count * rank_size}.")
+                         f"and it must be bigger than send_count({send_count}) * rank_size({rank_size}): "
+                         f"{send_count * rank_size}, please increase [device vocabSize] in [create_table] interface")
 
     if slice_host_vocabulary_size < send_count * rank_size:
         raise ValueError(f"Given host_vocabulary_size was too small for table '{table_name}', "
                          f"in which slice_host_vocabulary_size was {slice_host_vocabulary_size} "
-                         f"and send_count({send_count}) * rank_size({rank_size}) was "
-                         f"{send_count * rank_size}.")
+                         f"and it must be bigger than send_count({send_count}) * rank_size({rank_size}): "
+                         f"{send_count * rank_size}, please increase [host vocabSize] in [create_table] interface")
 
 
 def check_emb_multi_lookup_times(lookup_times: int, table_name: str):
diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
index 3620c5d0..8a6187a1 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
@@ -40,7 +40,9 @@ int EmbCacheManagerImpl::CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
     }
 
     if (embCacheInfo.vocabSize < embCacheInfo.maxCacheSize) {
-        ExternalLogger::PrintLog(LogLevel::ERROR, "vocabSize must be greater than or equal to maxCacheSize");
+        ExternalLogger::PrintLog(LogLevel::ERROR, "host vocabSize:" + std::to_string(embCacheInfo.vocabSize) +
+        " must be greater than or equal to device vocabSize:" + std::to_string(embCacheInfo.maxCacheSize) +
+        ", please increase [host vocabSize] in [create_table] interface");
         return H_HOST_VOCAB_SIZE_TOO_SMALL;
     }
 
-- 
Gitee


From 2187cc1c56a8e4fb3f90bb051c717f6cb951153d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 27 Jun 2024 19:19:32 +0800
Subject: [PATCH 245/302] =?UTF-8?q?=E3=80=90fix=E3=80=91ddr=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E4=B8=8B=E7=9A=84eos=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt_block.cpp |  9 +++------
 src/core/hybrid_mgmt/hybrid_mgmt_block.h   |  6 ++----
 src/core/key_process/key_process.cpp       | 20 ++++++++++++++------
 src/core/key_process/key_process.h         |  2 +-
 src/ops_tf/hybrid_dataset_ops.cpp          |  4 ++--
 5 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
index fbee8b9a..04433469 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.cpp
@@ -174,6 +174,9 @@ void HybridMgmtBlock::ResetAll(int channelId)
     pythonBatchId[channelId] = 0;
     hybridBatchId[channelId] = 0;
     isBlock[channelId] = false;
+    if (channelId == EVAL_CHANNEL_ID) {
+        evalBatchIdTotal += readEmbedBatchId[channelId];
+    }
 
     LOG_DEBUG(HYBRID_BLOCKING + "after reset block status,"
                                 " channelId:{}, pythonBatchId:{}, readEmbedBatchId:{}, hybridBatchId:{}",
@@ -269,10 +272,4 @@ bool HybridMgmtBlock::IsNeedWaitSave()
 void HybridMgmtBlock::FinishSave()
 {
     finishSave = true;
-}
-
-void HybridMgmtBlock::IncrementReadEmbBatchId(const int channelId)
-{
-    this->readEmbedBatchId[channelId] += 1;
-    this->readEmbedBatchIdAll += 1;
 }
\ No newline at end of file
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt_block.h b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
index a66f9b00..f3ee6e8f 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt_block.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt_block.h
@@ -41,8 +41,8 @@ namespace MxRec {
         int pythonBatchId[2] = {0, 0};
         // readEmbed算子侧将要处理的batch id
         int readEmbedBatchId[2] = {0, 0};
-        // readEmbed算子处理过的batch计数，不区分通道、图，不会重置；用于判断h2d swap是否需要eos
-        int readEmbedBatchIdAll = 0;
+        // eval通道处理过的batch计数，不区分通道、图，不会重置；用于判断h2d swap是否需要eos
+        int evalBatchIdTotal = 0;
         int maxTrainStep = 0;
         int stepsInterval[2] = {0, 0};  // 通道i运行多少步后切换为通道j
 
@@ -91,8 +91,6 @@ namespace MxRec {
 
         void FinishSave();
 
-        void IncrementReadEmbBatchId(const int channelId);
-
     private:
         // 控制通道阻塞的变量
         bool isBlock[2] = {true, true};
diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index b5dc962e..96448c05 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -1263,19 +1263,27 @@ bool KeyProcess::IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::s
     int readEmbKeyBatchId = hybridMgmtBlock->readEmbedBatchId[info.channelId] - 1;
     // 避免eos在keyProcess还未处理完数据时插队到通道前面
     std::chrono::duration<double> elapsedTime = endTime - startTime;
+    // train and eval batch total num
+    int allChannelBatchId = 0;
+    if (info.channelId == EVAL_CHANNEL_ID) {
+        allChannelBatchId = hybridMgmtBlock->evalBatchIdTotal + hybridMgmtBlock->hybridBatchId[TRAIN_CHANNEL_ID] +
+                            readEmbKeyBatchId;
+    } else {
+        allChannelBatchId = hybridMgmtBlock->evalBatchIdTotal + readEmbKeyBatchId;
+    }
     if (info.batchId != 0 && elapsedTime.count() >= timeoutGetUniqueKeysEmpty) {
         LOG_DEBUG("table:{}, channelId:{}, isNeedSendEos:{}, readEmbKeyBatchId:{}, batch:{}, h2dNextBatchId:{},"
-                  " lookUpSwapInAddrsPushId:{}", info.name, info.channelId, isNeedSendEos[info.channelId],
-                  readEmbKeyBatchId, info.batchId, hybridMgmtBlock->h2dNextBatchId[info.name],
-                  lookUpSwapInAddrsPushId[info.name]);
+                  " lookUpSwapInAddrsPushId:{}, allChannelBatchId:{}", info.name, info.channelId,
+                  isNeedSendEos[info.channelId], readEmbKeyBatchId, info.batchId,
+                  hybridMgmtBlock->h2dNextBatchId[info.name], lookUpSwapInAddrsPushId[info.name], allChannelBatchId);
         startTime = std::chrono::system_clock::now();
     }
     // Check '>= readEmbedBatchIdAll' condition to avoid send eos before handle all batch data from readEmbKey Op.
     if (isNeedSendEos[info.channelId] && readEmbKeyBatchId < info.batchId &&
         hybridMgmtBlock->h2dNextBatchId[info.name] == lookUpSwapInAddrsPushId[info.name] &&
-        hybridMgmtBlock->h2dNextBatchId[info.name] >= hybridMgmtBlock->readEmbedBatchIdAll) {
-        LOG_INFO("table:{}, channelId:{} batchId:{}, GetUniqueKeys eos",
-                 info.name, info.channelId, info.batchId);
+        hybridMgmtBlock->h2dNextBatchId[info.name] >= allChannelBatchId) {
+        LOG_INFO("table:{}, channelId:{} batchId:{}, GetUniqueKeys eos, h2dNextBatchId:{}, allChannelBatchId:{}",
+                 info.name, info.channelId, info.batchId, hybridMgmtBlock->h2dNextBatchId[info.name], allChannelBatchId);
         return true;
     }
     LOG_TRACE("getting uniqueKeys failed, table:{}, channel:{}, mgmt batchId:{}, readEmbKey batchId:{}, list is empty",
diff --git a/src/core/key_process/key_process.h b/src/core/key_process/key_process.h
index 589fc2a5..ba24181a 100644
--- a/src/core/key_process/key_process.h
+++ b/src/core/key_process/key_process.h
@@ -205,7 +205,7 @@ namespace MxRec {
         bool isNeedSendEos[2] = {false, false}; // 表示各表通道0、1的eos状态
         atomic<int> readySendEosCnt[2];
         atomic<int> finishSendEosCnt[2];
-        const double timeoutGetUniqueKeys = 10.0;  // 如果超时仍未获取到数据将触发EOS
+        const double timeoutGetUniqueKeys = 30.0;  // 如果超时仍未获取到数据将触发EOS
         const double timeoutGetUniqueKeysEmpty = 1.0;  // 如果超时仍未获取到数据将打印信息
 
         void InitHotEmbTotCount(const EmbInfo& info, const RankInfo& rInfo);
diff --git a/src/ops_tf/hybrid_dataset_ops.cpp b/src/ops_tf/hybrid_dataset_ops.cpp
index 0b192da5..2eee8531 100644
--- a/src/ops_tf/hybrid_dataset_ops.cpp
+++ b/src/ops_tf/hybrid_dataset_ops.cpp
@@ -214,7 +214,7 @@ namespace MxRec {
                     return;
                 }
             }
-            hybridMgmtBlock->IncrementReadEmbBatchId(channelId);
+            hybridMgmtBlock->readEmbedBatchId[channelId] += 1;
             const Tensor& inputTensor = context->input(TensorIndex::TENSOR_INDEX_0);
             const auto& splits = context->input(TENSOR_INDEX_1).flat<int32>();
             int fieldNum = 0;
@@ -407,7 +407,7 @@ namespace MxRec {
                     return;
                 }
             }
-            hybridMgmtBlock->IncrementReadEmbBatchId(channelId);
+            hybridMgmtBlock->readEmbedBatchId[channelId] += 1;
             const Tensor& inputTensor = context->input(TensorIndex::TENSOR_INDEX_0);
             size_t dataSize = inputTensor.NumElements();
 
-- 
Gitee


From 1d735d5e609b243da9720f14c164bc29649d6197 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 27 Jun 2024 20:17:25 +0800
Subject: [PATCH 246/302] =?UTF-8?q?=E3=80=90fix=E3=80=91ddr=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E4=B8=8B=E7=9A=84eos=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/key_process/key_process.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/core/key_process/key_process.cpp b/src/core/key_process/key_process.cpp
index 96448c05..4d467153 100644
--- a/src/core/key_process/key_process.cpp
+++ b/src/core/key_process/key_process.cpp
@@ -1259,7 +1259,7 @@ bool KeyProcess::IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::s
     HybridMgmtBlock* hybridMgmtBlock = Singleton<HybridMgmtBlock>::GetInstance();
     auto endTime = std::chrono::system_clock::now();
     
-    // readEmbKey真实的次数是readEmbedBatchId减1
+    // readEmbKey start with 0
     int readEmbKeyBatchId = hybridMgmtBlock->readEmbedBatchId[info.channelId] - 1;
     // 避免eos在keyProcess还未处理完数据时插队到通道前面
     std::chrono::duration<double> elapsedTime = endTime - startTime;
@@ -1267,9 +1267,9 @@ bool KeyProcess::IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::s
     int allChannelBatchId = 0;
     if (info.channelId == EVAL_CHANNEL_ID) {
         allChannelBatchId = hybridMgmtBlock->evalBatchIdTotal + hybridMgmtBlock->hybridBatchId[TRAIN_CHANNEL_ID] +
-                            readEmbKeyBatchId;
+                            hybridMgmtBlock->readEmbedBatchId[info.channelId];
     } else {
-        allChannelBatchId = hybridMgmtBlock->evalBatchIdTotal + readEmbKeyBatchId;
+        allChannelBatchId = hybridMgmtBlock->evalBatchIdTotal + hybridMgmtBlock->readEmbedBatchId[info.channelId];
     }
     if (info.batchId != 0 && elapsedTime.count() >= timeoutGetUniqueKeysEmpty) {
         LOG_DEBUG("table:{}, channelId:{}, isNeedSendEos:{}, readEmbKeyBatchId:{}, batch:{}, h2dNextBatchId:{},"
@@ -1283,7 +1283,8 @@ bool KeyProcess::IsGetUniqueKeysEos(const EmbBaseInfo& info, std::chrono::_V2::s
         hybridMgmtBlock->h2dNextBatchId[info.name] == lookUpSwapInAddrsPushId[info.name] &&
         hybridMgmtBlock->h2dNextBatchId[info.name] >= allChannelBatchId) {
         LOG_INFO("table:{}, channelId:{} batchId:{}, GetUniqueKeys eos, h2dNextBatchId:{}, allChannelBatchId:{}",
-                 info.name, info.channelId, info.batchId, hybridMgmtBlock->h2dNextBatchId[info.name], allChannelBatchId);
+                 info.name, info.channelId, info.batchId, hybridMgmtBlock->h2dNextBatchId[info.name],
+                 allChannelBatchId);
         return true;
     }
     LOG_TRACE("getting uniqueKeys failed, table:{}, channel:{}, mgmt batchId:{}, readEmbKey batchId:{}, list is empty",
-- 
Gitee


From 533bc2be9043d0e29fbf962a5a3f781cea3250f4 Mon Sep 17 00:00:00 2001
From: LiJiang <jiangli67@huawei.com>
Date: Fri, 28 Jun 2024 14:31:16 +0800
Subject: [PATCH 247/302] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=86=97=E4=BD=99?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81=EF=BC=9B=E4=BF=AE=E6=94=B9=E9=94=99=E8=AF=AF?=
 =?UTF-8?q?log?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp | 2 +-
 cust_op/fused_lazy_adam/op_host/lazy_adam.cpp                  | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
index e9711379..3b9b51fe 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/src/op_runner.cpp
@@ -322,7 +322,7 @@ namespace AclnnLazyAdam {
             ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
             return false;
         }
-        INFO_LOG("Execute aclnnAddCustom success");
+        INFO_LOG("Execute aclnnLazyAdam success");
 
         ret = aclrtSynchronizeStreamWithTimeout(stream, STREAM_TIMEOUT);
         if (ret != SUCCESS) {
diff --git a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
index fb7f86b3..2c288729 100644
--- a/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
+++ b/cust_op/fused_lazy_adam/op_host/lazy_adam.cpp
@@ -54,8 +54,6 @@ static ge::graphStatus LazyAdamTilingFunc(gert::TilingContext* context)
     ge::DataType indicesDtype = context->GetInputDesc(1)->GetDataType();
     int indicesDtypeSize = ge::GetSizeByDataType(indicesDtype);
 
-    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
     auto attrs = context->GetAttrs();
 
     float beta1 = *attrs->GetAttrPointer<float>(0);
-- 
Gitee


From 21e75227a44bbfabf1adf7c47a1352843a0e7276 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Sat, 29 Jun 2024 15:24:55 +0800
Subject: [PATCH 248/302] =?UTF-8?q?=E3=80=90fix=E3=80=91stoi=E6=8A=9B?=
 =?UTF-8?q?=E5=87=BA=E5=BC=82=E5=B8=B8=E7=9A=84cleancode=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/utils/common.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/utils/common.cpp b/src/core/utils/common.cpp
index 1b3edcfd..15aa69bb 100644
--- a/src/core/utils/common.cpp
+++ b/src/core/utils/common.cpp
@@ -182,7 +182,7 @@ namespace MxRec {
         try {
             res = stoi(match[1]);
         } catch (const std::invalid_argument& e) {
-            LOG_ERROR(e.what());
+            LOG_ERROR("argument is invalid: {}", e.what());
         }
         return res;
     }
-- 
Gitee


From 550a302c91620bbb771ac980ac564aa5c4f467a8 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 1 Jul 2024 15:14:44 +0800
Subject: [PATCH 249/302] fix: `StringFormat` use cases

---
 src/core/checkpoint/checkpoint.cpp |  142 ++--
 src/core/utils/common.h            | 1132 ++++++++++++++--------------
 src/ops_tf/hybrid_dataset_ops.cpp  |    4 +-
 3 files changed, 653 insertions(+), 625 deletions(-)

diff --git a/src/core/checkpoint/checkpoint.cpp b/src/core/checkpoint/checkpoint.cpp
index abd3a10e..bc7501bb 100644
--- a/src/core/checkpoint/checkpoint.cpp
+++ b/src/core/checkpoint/checkpoint.cpp
@@ -13,21 +13,22 @@ See the License for the specific language governing permissions and
         limitations under the License.
 ==============================================================================*/
 
-#include <iostream>
-#include <sys/mman.h>
-#include <unistd.h>
-#include <sys/stat.h>
+#include "checkpoint.h"
+
 #include <fcntl.h>
 #include <omp.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <iostream>
 
 #include "ckpt_data_handler/feat_admit_n_evict_ckpt/feat_admit_n_evict_ckpt.h"
-#include "ckpt_data_handler/key_freq_map_ckpt/key_freq_map_ckpt.h"
 #include "ckpt_data_handler/key_count_map_ckpt/key_count_map_ckpt.h"
-#include "utils/time_cost.h"
-#include "utils/common.h"
+#include "ckpt_data_handler/key_freq_map_ckpt/key_freq_map_ckpt.h"
 #include "file_system/file_system_handler.h"
-
-#include "checkpoint.h"
+#include "utils/common.h"
+#include "utils/time_cost.h"
 
 using namespace std;
 using namespace MxRec;
@@ -89,11 +90,17 @@ void Checkpoint::SetDataHandler(CkptData& ckptData)
 
 void Checkpoint::SetDataHandler(const vector<CkptFeatureType>& featureTypes)
 {
-    map<CkptFeatureType, function<void()>> setCkptMap{
-        {CkptFeatureType::FEAT_ADMIT_N_EVICT, [this] { dataHandlers.push_back(make_unique<FeatAdmitNEvictCkpt>()); }},
-        {CkptFeatureType::DDR_KEY_FREQ_MAP,   [this] { dataHandlers.push_back(make_unique<KeyFreqMapCkpt>()); }},
-        {CkptFeatureType::KEY_COUNT_MAP,      [this] { dataHandlers.push_back(make_unique<KeyCountMapCkpt>()); }}
-    };
+    map<CkptFeatureType, function<void()>> setCkptMap{{CkptFeatureType::FEAT_ADMIT_N_EVICT,
+                                                       [this] {
+                                                           dataHandlers.push_back(make_unique<FeatAdmitNEvictCkpt>());
+                                                       }},
+                                                      {CkptFeatureType::DDR_KEY_FREQ_MAP,
+                                                       [this] {
+                                                           dataHandlers.push_back(make_unique<KeyFreqMapCkpt>());
+                                                       }},
+                                                      {CkptFeatureType::KEY_COUNT_MAP, [this] {
+                                                           dataHandlers.push_back(make_unique<KeyCountMapCkpt>());
+                                                       }}};
 
     for (const auto& featureType : featureTypes) {
         setCkptMap.at(featureType)();
@@ -104,8 +111,8 @@ void Checkpoint::SaveProcess(CkptData& ckptData)
 {
     for (const auto& dataHandler : dataHandlers) {
         dataHandler->SetProcessData(ckptData);
-        vector<string> embNames { dataHandler->GetEmbNames() };
-        vector<CkptDataType> saveDataTypes { dataHandler->GetDataTypes() };
+        vector<string> embNames{dataHandler->GetEmbNames()};
+        vector<CkptDataType> saveDataTypes{dataHandler->GetDataTypes()};
         MakeUpperLayerSaveDir();
         MakeDataLayerSaveDir(embNames, saveDataTypes, dataHandler);
         SaveDataset(embNames, saveDataTypes, dataHandler);
@@ -118,17 +125,16 @@ void Checkpoint::MakeUpperLayerSaveDir()
     MakeSaveDir(innerDirPath);
 }
 
-void Checkpoint::MakeDataLayerSaveDir(const vector<string>& embNames,
-                                      const vector<CkptDataType>& saveDataTypes,
+void Checkpoint::MakeDataLayerSaveDir(const vector<string>& embNames, const vector<CkptDataType>& saveDataTypes,
                                       const unique_ptr<CkptDataHandler>& dataHandler)
 {
     for (const auto& embName : embNames) {
-        auto dataDir { innerDirPath + dirSeparator + embName };
+        auto dataDir{innerDirPath + dirSeparator + embName};
         MakeSaveDir(dataDir);
 
         for (const auto& saveDataType : saveDataTypes) {
-            auto dataDirName { dataHandler->GetDataDirName(saveDataType) };
-            auto datasetPath { dataDir + dirSeparator + dataDirName };
+            auto dataDirName{dataHandler->GetDataDirName(saveDataType)};
+            auto datasetPath{dataDir + dirSeparator + dataDirName};
             MakeSaveDir(datasetPath);
         }
     }
@@ -146,7 +152,7 @@ void Checkpoint::MakeSaveDir(const string& dirName) const
 Checkpoint::EmbSizeInfo Checkpoint::GetEmbeddingSize(const string& embName)
 {
     EmbSizeInfo embSizeInfo;
-    for (const auto &embInfo: mgmtEmbInfo) {
+    for (const auto& embInfo : mgmtEmbInfo) {
         if (embInfo.name == embName) {
             embSizeInfo.embSize = embInfo.embeddingSize;
             embSizeInfo.extEmbSize = embInfo.extEmbeddingSize;
@@ -158,29 +164,28 @@ Checkpoint::EmbSizeInfo Checkpoint::GetEmbeddingSize(const string& embName)
 
 bool Checkpoint::CheckEmbNames(const string& embName)
 {
-    for (const auto &embInfo: mgmtEmbInfo) {
-        if (embInfo.name == embName && embInfo.isSave)  {
+    for (const auto& embInfo : mgmtEmbInfo) {
+        if (embInfo.name == embName && embInfo.isSave) {
             return true;
         }
     }
     return false;
 }
 
-void Checkpoint::SaveDataset(const vector<string>& embNames,
-                             const vector<CkptDataType>& saveDataTypes,
+void Checkpoint::SaveDataset(const vector<string>& embNames, const vector<CkptDataType>& saveDataTypes,
                              const unique_ptr<CkptDataHandler>& dataHandler)
 {
-    for (const auto& embName: embNames) {
+    for (const auto& embName : embNames) {
         if (!CheckEmbNames(embName)) {
             continue;
         }
         auto dataDir{innerDirPath + dirSeparator + embName};
-        for (const auto& saveDataType: saveDataTypes) {
-            auto datasetPath { dataDir + dirSeparator + dataHandler->GetDataDirName(saveDataType) };
-            auto datasetDir { datasetPath + dirSeparator + datasetName + to_string(rankId) + dataFileType };
+        for (const auto& saveDataType : saveDataTypes) {
+            auto datasetPath{dataDir + dirSeparator + dataHandler->GetDataDirName(saveDataType)};
+            auto datasetDir{datasetPath + dirSeparator + datasetName + to_string(rankId) + dataFileType};
 
             LOG_DEBUG("====Start getting data from handler to: {}", datasetDir);
-            auto transData { dataHandler->GetDataset(saveDataType, embName) };
+            auto transData{dataHandler->GetDataset(saveDataType, embName)};
 
             LOG_DEBUG("====Start saving data to: {}", datasetDir);
             WriteStream(transData, datasetDir, transData.datasetSize, saveDataType);
@@ -197,36 +202,36 @@ void Checkpoint::WriteStream(CkptTransData& transData, const string& dataDir, si
 
     ssize_t writeBytesNum;
     if (int32TransSet.find(dataType) != int32TransSet.end()) {
-        writeBytesNum = fileSystemPtr->Write(dataDir,
-                                             reinterpret_cast<const char*>(transData.int32Arr.data()), dataSize);
+        writeBytesNum =
+            fileSystemPtr->Write(dataDir, reinterpret_cast<const char*>(transData.int32Arr.data()), dataSize);
     } else if (int64TransSet.find(dataType) != int64TransSet.end()) {
-        writeBytesNum = fileSystemPtr->Write(dataDir,
-                                             reinterpret_cast<const char*>(transData.int64Arr.data()), dataSize);
+        writeBytesNum =
+            fileSystemPtr->Write(dataDir, reinterpret_cast<const char*>(transData.int64Arr.data()), dataSize);
     } else if (dataType == CkptDataType::ATTRIBUTE) {
-        writeBytesNum = fileSystemPtr->Write(dataDir,
-                                             reinterpret_cast<const char*>(transData.attribute.data()), dataSize);
+        writeBytesNum =
+            fileSystemPtr->Write(dataDir, reinterpret_cast<const char*>(transData.attribute.data()), dataSize);
     } else {
         throw runtime_error("unknown CkptDataType");
     }
 
     if (writeBytesNum == -1) {
-        throw runtime_error(StringFormat("Error: Save data failed. data type: %d. "
-                                         "An error occurred while writing file: %s.", dataType, dataDir.c_str()));
+        throw runtime_error(StringFormat("Error: Save data failed. data type: %s. "
+                                         "An error occurred while writing file: %s.",
+                                         CkptDataTypeName(dataType).c_str(), dataDir.c_str()));
     }
     if (writeBytesNum != dataSize) {
-        throw runtime_error(StringFormat("Error: Save data failed. data type: %d ."
+        throw runtime_error(StringFormat("Error: Save data failed. data type: %s. "
                                          "Expected to write %d bytes, but actually write %d bytes to file %s.",
-                                         dataType, dataSize, writeBytesNum, dataDir.c_str()));
+                                         CkptDataTypeName(dataType).c_str(), dataSize, writeBytesNum, dataDir.c_str()));
     }
 }
 
-
 void Checkpoint::LoadProcess(CkptData& ckptData)
 {
     for (const auto& dataHandler : dataHandlers) {
-        vector<string> embNames {};
-        vector<string> dirNames { dataHandler->GetDirNames() };
-        vector<CkptDataType> saveDataTypes { dataHandler->GetDataTypes() };
+        vector<string> embNames{};
+        vector<string> dirNames{dataHandler->GetDirNames()};
+        vector<CkptDataType> saveDataTypes{dataHandler->GetDataTypes()};
         innerDirPath = processPath;
         if (find(dirNames.begin(), dirNames.end(), ssdSymbol) != dirNames.end()) {
             embNames = GetTableLayerLoadDir();
@@ -238,7 +243,6 @@ void Checkpoint::LoadProcess(CkptData& ckptData)
     }
 }
 
-
 vector<string> Checkpoint::GetEmbedTableNames()
 {
     vector<string> loadTableNames;
@@ -262,22 +266,20 @@ vector<string> Checkpoint::GetTableLayerLoadDir()
     return loadTableDir;
 }
 
-void Checkpoint::LoadDataset(const vector<string>& embNames,
-                             const vector<CkptDataType>& saveDataTypes,
-                             const unique_ptr<CkptDataHandler>& dataHandler,
-                             CkptData& ckptData)
+void Checkpoint::LoadDataset(const vector<string>& embNames, const vector<CkptDataType>& saveDataTypes,
+                             const unique_ptr<CkptDataHandler>& dataHandler, CkptData& ckptData)
 {
     for (const auto& embName : embNames) {
-        auto dataDir { innerDirPath + dirSeparator + embName };
+        auto dataDir{innerDirPath + dirSeparator + embName};
         for (const auto& saveDataType : saveDataTypes) {
-            auto datasetPath { dataDir + dirSeparator + dataHandler->GetDataDirName(saveDataType) };
+            auto datasetPath{dataDir + dirSeparator + dataHandler->GetDataDirName(saveDataType)};
 
-            auto datasetDir { datasetPath + dirSeparator + "slice" + dataFileType };
-            auto attributeDir { datasetPath + dirSeparator + "slice" + attribFileType };
+            auto datasetDir{datasetPath + dirSeparator + "slice" + dataFileType};
+            auto attributeDir{datasetPath + dirSeparator + "slice" + attribFileType};
 
             CkptTransData transData;
             LOG_DEBUG("====Start reading data from: {}", attributeDir);
-            auto dataElmtBytes { dataHandler->GetDataElmtBytes(CkptDataType::ATTRIBUTE) };
+            auto dataElmtBytes{dataHandler->GetDataElmtBytes(CkptDataType::ATTRIBUTE)};
             ReadStream(transData, attributeDir, CkptDataType::ATTRIBUTE, dataElmtBytes);
 
             dataElmtBytes = dataHandler->GetDataElmtBytes(saveDataType);
@@ -290,7 +292,7 @@ void Checkpoint::LoadDataset(const vector<string>& embNames,
             }
 
             LOG_DEBUG("====Start loading data from: {} to data handler.", attributeDir);
-            if ((saveDataType == CkptDataType::EMB_INFO))  {
+            if ((saveDataType == CkptDataType::EMB_INFO)) {
                 dataHandler->SetDatasetForLoadEmb(saveDataType, embName, transData, ckptData);
             } else {
                 dataHandler->SetDataset(saveDataType, embName, transData);
@@ -299,14 +301,12 @@ void Checkpoint::LoadDataset(const vector<string>& embNames,
     }
 }
 
-void Checkpoint::ReadStream(CkptTransData& transData,
-                            const string& dataDir,
-                            CkptDataType dataType,
+void Checkpoint::ReadStream(CkptTransData& transData, const string& dataDir, CkptDataType dataType,
                             uint32_t dataElmtBytes)
 {
     if (dataElmtBytes == 0) {
         LOG_WARN("dataElmtBytes is 0, don't handle [/ %] operation");
-        return ;
+        return;
     }
 
     if (fileSystemPtr == nullptr) {
@@ -315,7 +315,7 @@ void Checkpoint::ReadStream(CkptTransData& transData,
     }
 
     size_t datasetSize = fileSystemPtr->GetFileSize(dataDir);
-    auto resizeSize { datasetSize / dataElmtBytes };
+    auto resizeSize{datasetSize / dataElmtBytes};
     SetTransDataSize(transData, resizeSize, dataType);
 
     if (datasetSize % dataElmtBytes > 0) {
@@ -328,31 +328,29 @@ void Checkpoint::ReadStream(CkptTransData& transData,
     } else if (int64TransSet.find(dataType) != int64TransSet.end()) {
         readBytesNum = fileSystemPtr->Read(dataDir, reinterpret_cast<char*>(transData.int64Arr.data()), datasetSize);
     } else if (dataType == CkptDataType::ATTRIBUTE) {
-        readBytesNum = fileSystemPtr->Read(dataDir, reinterpret_cast<char *>(transData.attribute.data()), datasetSize);
+        readBytesNum = fileSystemPtr->Read(dataDir, reinterpret_cast<char*>(transData.attribute.data()), datasetSize);
     } else {
         throw runtime_error("unknown CkptDataType");
     }
 
     if (readBytesNum == -1) {
-        throw runtime_error(StringFormat("Error: Load data failed. data type: %d ."
-                                         "An error occurred while reading file: %s.", dataType, dataDir.c_str()));
+        throw runtime_error(StringFormat("Error: Load data failed. data type: %s. "
+                                         "An error occurred while reading file: %s.",
+                                         CkptDataTypeName(dataType).c_str(), dataDir.c_str()));
     }
     if (readBytesNum != datasetSize) {
-        throw runtime_error(StringFormat("Error: Load data failed. data type: %d ."
+        throw runtime_error(StringFormat("Error: Load data failed. data type: %s. "
                                          "Expected to read %d bytes, but actually read %d bytes to file %s.",
-                                         dataType, datasetSize, readBytesNum, dataDir.c_str()));
+                                         CkptDataTypeName(dataType).c_str(), datasetSize, readBytesNum, dataDir.c_str()));
     }
 }
 
-void Checkpoint::ReadStreamForEmbData(CkptTransData& transData,
-                                      const string& dataDir,
-                                      uint32_t dataElmtBytes,
-                                      CkptData& ckptData,
-                                      string embName) const
+void Checkpoint::ReadStreamForEmbData(CkptTransData& transData, const string& dataDir, uint32_t dataElmtBytes,
+                                      CkptData& ckptData, string embName) const
 {
     if (dataElmtBytes == 0) {
         LOG_ERROR("dataElmtBytes is 0, don't handle [/ %] operation");
-        return ;
+        return;
     }
 
     if (fileSystemPtr == nullptr) {
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 9a39e7ac..f8ff4565 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -17,608 +17,638 @@ See the License for the specific language governing permissions and
 #define COMMON_H
 
 #include <sys/stat.h>
-#include <cstring>
-#include <vector>
-#include <random>
+
+#include <algorithm>
 #include <chrono>
+#include <cstring>
+#include <fstream>
 #include <map>
+#include <random>
 #include <sstream>
-#include <fstream>
-#include <algorithm>
-#include "tensorflow/core/framework/tensor.h"
-#include "absl/container/flat_hash_map.h"
-#include "securec.h"
-#include "utils/logger.h"
-#include "utils/config.h"
+#include <vector>
 
-#include "initializer/initializer.h"
+#include "absl/container/flat_hash_map.h"
 #include "initializer/constant_initializer/constant_initializer.h"
-#include "initializer/truncated_normal_initializer/truncated_normal_initializer.h"
+#include "initializer/initializer.h"
 #include "initializer/random_normal_initializer/random_normal_initializer.h"
-#include "ock_ctr_common/include/factory.h"
+#include "initializer/truncated_normal_initializer/truncated_normal_initializer.h"
 #include "ock_ctr_common/include/embedding_cache.h"
+#include "ock_ctr_common/include/factory.h"
+#include "securec.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "utils/config.h"
+#include "utils/logger.h"
 
 #if defined(BUILD_WITH_EASY_PROFILER)
-    #include <easy/profiler.h>
-    #include <easy/arbitrary_value.h>
+#include <easy/arbitrary_value.h>
+#include <easy/profiler.h>
 #else
-    #define EASY_FUNCTION(...)
-    #define EASY_VALUE(...)
-    #define EASY_BLOCK(...)
-    #define EASY_END_BLOCK
-    #define EASY_PROFILER_ENABLE
-    #define EASY_PROFILER_DISABLE
+#define EASY_FUNCTION(...)
+#define EASY_VALUE(...)
+#define EASY_BLOCK(...)
+#define EASY_END_BLOCK
+#define EASY_PROFILER_ENABLE
+#define EASY_PROFILER_DISABLE
 #endif
 
 namespace MxRec {
 #define INFO_PTR shared_ptr
 #define MGMT_CPY_THREADS 4
 #define PROFILING
-    using namespace tensorflow;
-    extern ock::ctr::FactoryPtr factory;
-    constexpr int TRAIN_CHANNEL_ID = 0;
-    constexpr int EVAL_CHANNEL_ID = 1;
-
-    constexpr int MAX_CHANNEL_NUM = 2;
-    constexpr int MAX_KEY_PROCESS_THREAD = 10;
-    constexpr int MAX_QUEUE_NUM = MAX_CHANNEL_NUM * MAX_KEY_PROCESS_THREAD;
-    constexpr int DEFAULT_KEY_PROCESS_THREAD = 6;
-    constexpr int KEY_PROCESS_THREAD = 6;
-    constexpr char SUM_SAME_ID[] = "sum_same_id_gradients_and_apply";
-    constexpr size_t MAX_VOCABULARY_SIZE = 1e10;
-    constexpr int SSD_SIZE_INDEX = 2;
-    constexpr int MAX_FILE_NUM = 1000;
-    constexpr int EMBEDDING_THREAD_NUM = 2;
-    // for GLOG
-    struct GlogConfig {
-        static bool gStatOn;
-        static int gGlogLevel;
-        static string gRankId;
-    };
-
-    constexpr int GLOG_MAX_BUF_SIZE = 1024;
-    constexpr int GLOG_TIME_WIDTH_2 = 2;
-    constexpr int GLOG_TIME_WIDTH_6 = 6;
-    constexpr char GLOG_STAT_FLAG[] = "statOn";
-
-    // unique related config
-    constexpr int UNIQUE_BUCKET = 6;
-    constexpr int MIN_UNIQUE_THREAD_NUM = 1;
-
-    // validate file
-    constexpr long long FILE_MAX_SIZE = 1LL << 40;
-    constexpr int FILE_MIN_SIZE = 0;
-    constexpr size_t BUFFER_SIZE{1024 * 1024 * 64};
-    constexpr size_t MAP_BYTE_SIZE{static_cast<size_t>(10) * 1024 * 1024 * 1024};
+using namespace tensorflow;
+extern ock::ctr::FactoryPtr factory;
+constexpr int TRAIN_CHANNEL_ID = 0;
+constexpr int EVAL_CHANNEL_ID = 1;
+
+constexpr int MAX_CHANNEL_NUM = 2;
+constexpr int MAX_KEY_PROCESS_THREAD = 10;
+constexpr int MAX_QUEUE_NUM = MAX_CHANNEL_NUM * MAX_KEY_PROCESS_THREAD;
+constexpr int DEFAULT_KEY_PROCESS_THREAD = 6;
+constexpr int KEY_PROCESS_THREAD = 6;
+constexpr char SUM_SAME_ID[] = "sum_same_id_gradients_and_apply";
+constexpr size_t MAX_VOCABULARY_SIZE = 1e10;
+constexpr int SSD_SIZE_INDEX = 2;
+constexpr int MAX_FILE_NUM = 1000;
+constexpr int EMBEDDING_THREAD_NUM = 2;
+// for GLOG
+struct GlogConfig {
+    static bool gStatOn;
+    static int gGlogLevel;
+    static string gRankId;
+};
+
+constexpr int GLOG_MAX_BUF_SIZE = 1024;
+constexpr int GLOG_TIME_WIDTH_2 = 2;
+constexpr int GLOG_TIME_WIDTH_6 = 6;
+constexpr char GLOG_STAT_FLAG[] = "statOn";
+
+// unique related config
+constexpr int UNIQUE_BUCKET = 6;
+constexpr int MIN_UNIQUE_THREAD_NUM = 1;
+
+// validate file
+constexpr long long FILE_MAX_SIZE = 1LL << 40;
+constexpr int FILE_MIN_SIZE = 0;
+constexpr size_t BUFFER_SIZE{1024 * 1024 * 64};
+constexpr size_t MAP_BYTE_SIZE{static_cast<size_t>(10) * 1024 * 1024 * 1024};
 #ifdef GTEST
-    constexpr int KEY_PROCESS_TIMEOUT = 3;
+constexpr int KEY_PROCESS_TIMEOUT = 3;
 #else
-    constexpr int KEY_PROCESS_TIMEOUT = 120;
+constexpr int KEY_PROCESS_TIMEOUT = 120;
 #endif
-    constexpr int GET_BATCH_TIMEOUT = 300;
-    constexpr int EOS_TIMEOUT = 30;
-
-    constexpr size_t DEFAULT_RANDOM_SEED = 10086;
-    constexpr int64_t INVALID_KEY_VALUE = -1;
-    constexpr int32_t INVALID_INDEX_VALUE = -1;
-    constexpr int ALLTOALLVC_ALIGN = 128;
-    constexpr int PROFILING_START_BATCH_ID = 100;
-    constexpr int PROFILING_END_BATCH_ID = 200;
-    constexpr int MGMT_THREAD_BIND = 48;
-    constexpr int UNIQUE_MAX_BUCKET_WIDTH = 6;
-    constexpr int HOT_EMB_UPDATE_STEP_DEFAULT = 1000;
-    constexpr float HOT_EMB_CACHE_PCT = static_cast<float>(1. / 3);  // hot emb cache percent
-
-    const string COMBINE_HISTORY_NAME = "combine_table_history";
-    const string SAVE_SPARSE_PATH_PREFIX = "sparse";
-
-    using emb_key_t = int64_t;
-    using emb_cache_key_t = uint64_t;
-    using freq_num_t = int64_t;
-    using EmbNameT= std::string;
-    using KeysT = std::vector<emb_key_t>;
-    using LookupKeyT = std::tuple<int, EmbNameT, KeysT>;             // batch_id quarry_lable keys_vector
-    using UinqueKeyT = std::tuple<int, EmbNameT, std::vector<uint64_t>>;
-    using RestoreVecSecT = std::tuple<int, EmbNameT, std::vector<int32_t>>;
-    using TensorInfoT = std::tuple<int, EmbNameT, std::list<std::unique_ptr<std::vector<Tensor>>>::iterator>;
-
-    namespace HybridOption {
-        const unsigned int USE_STATIC = 0x001;
-        const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 1;
-        const unsigned int USE_SUM_SAME_ID_GRADIENTS = 0x001 << 2;
-    };
-
-    string GetChipName(int devID);
-    int GetThreadNumEnv();
-
-    namespace UBSize {
-        const int ASCEND910_PREMIUM_A = 262144;
-        const int ASCEND910_PRO_B = 262144;
-        const int ASCEND910_B2 = 196608;
-        const int ASCEND910_B1 = 196608;
-        const int ASCEND910_B3 = 196608;
-        const int ASCEND910_B4 = 196608;
-        const int ASCEND910_C1 = 196608;
-        const int ASCEND910_C2 = 196608;
-        const int ASCEND910_C3 = 196608;
-        const int ASCEND920_A = 196608;
-        const int ASCEND910_PRO_A = 262144;
-        const int ASCEND910_B = 262144;
-        const int ASCEND910_A = 262144;
-        const int ASCEND910_B2C = 196608;
-    };
-
-    inline int GetUBSize(int devID)
-    {
-        const std::map<string, int> chipUbSizeList = {{"910A", UBSize::ASCEND910_A},
-            {"910B", UBSize::ASCEND910_B},
-            {"920A", UBSize::ASCEND920_A},
-            {"910B1", UBSize::ASCEND910_B1},
-            {"910B2", UBSize::ASCEND910_B2},
-            {"910B3", UBSize::ASCEND910_B3},
-            {"910B4", UBSize::ASCEND910_B4},
-            {"910B2C", UBSize::ASCEND910_B2C},
-            {"910C1", UBSize::ASCEND910_C1},
-            {"910C2", UBSize::ASCEND910_C1},
-            {"910C3", UBSize::ASCEND910_C3}
-        };
-        auto it = chipUbSizeList.find(GetChipName(devID));
-        if (it != chipUbSizeList.end()) {
-            return it->second;
-        }
-
-        throw std::runtime_error("unknown chip ub size" + GetChipName(devID));
+constexpr int GET_BATCH_TIMEOUT = 300;
+constexpr int EOS_TIMEOUT = 30;
+
+constexpr size_t DEFAULT_RANDOM_SEED = 10086;
+constexpr int64_t INVALID_KEY_VALUE = -1;
+constexpr int32_t INVALID_INDEX_VALUE = -1;
+constexpr int ALLTOALLVC_ALIGN = 128;
+constexpr int PROFILING_START_BATCH_ID = 100;
+constexpr int PROFILING_END_BATCH_ID = 200;
+constexpr int MGMT_THREAD_BIND = 48;
+constexpr int UNIQUE_MAX_BUCKET_WIDTH = 6;
+constexpr int HOT_EMB_UPDATE_STEP_DEFAULT = 1000;
+constexpr float HOT_EMB_CACHE_PCT = static_cast<float>(1. / 3);  // hot emb cache percent
+
+const string COMBINE_HISTORY_NAME = "combine_table_history";
+const string SAVE_SPARSE_PATH_PREFIX = "sparse";
+
+using emb_key_t = int64_t;
+using emb_cache_key_t = uint64_t;
+using freq_num_t = int64_t;
+using EmbNameT = std::string;
+using KeysT = std::vector<emb_key_t>;
+using LookupKeyT = std::tuple<int, EmbNameT, KeysT>;  // batch_id quarry_lable keys_vector
+using UinqueKeyT = std::tuple<int, EmbNameT, std::vector<uint64_t>>;
+using RestoreVecSecT = std::tuple<int, EmbNameT, std::vector<int32_t>>;
+using TensorInfoT = std::tuple<int, EmbNameT, std::list<std::unique_ptr<std::vector<Tensor>>>::iterator>;
+
+namespace HybridOption {
+const unsigned int USE_STATIC = 0x001;
+const unsigned int USE_DYNAMIC_EXPANSION = 0x001 << 1;
+const unsigned int USE_SUM_SAME_ID_GRADIENTS = 0x001 << 2;
+};  // namespace HybridOption
+
+string GetChipName(int devID);
+int GetThreadNumEnv();
+
+namespace UBSize {
+const int ASCEND910_PREMIUM_A = 262144;
+const int ASCEND910_PRO_B = 262144;
+const int ASCEND910_B2 = 196608;
+const int ASCEND910_B1 = 196608;
+const int ASCEND910_B3 = 196608;
+const int ASCEND910_B4 = 196608;
+const int ASCEND910_C1 = 196608;
+const int ASCEND910_C2 = 196608;
+const int ASCEND910_C3 = 196608;
+const int ASCEND920_A = 196608;
+const int ASCEND910_PRO_A = 262144;
+const int ASCEND910_B = 262144;
+const int ASCEND910_A = 262144;
+const int ASCEND910_B2C = 196608;
+};  // namespace UBSize
+
+inline int GetUBSize(int devID)
+{
+    const std::map<string, int> chipUbSizeList = {
+        {"910A", UBSize::ASCEND910_A},   {"910B", UBSize::ASCEND910_B},     {"920A", UBSize::ASCEND920_A},
+        {"910B1", UBSize::ASCEND910_B1}, {"910B2", UBSize::ASCEND910_B2},   {"910B3", UBSize::ASCEND910_B3},
+        {"910B4", UBSize::ASCEND910_B4}, {"910B2C", UBSize::ASCEND910_B2C}, {"910C1", UBSize::ASCEND910_C1},
+        {"910C2", UBSize::ASCEND910_C1}, {"910C3", UBSize::ASCEND910_C3}};
+    auto it = chipUbSizeList.find(GetChipName(devID));
+    if (it != chipUbSizeList.end()) {
+        return it->second;
     }
 
-    template <class T>
-    struct Batch {
-        size_t Size() const
-        {
-            return sample.size();
-        }
+    throw std::runtime_error("unknown chip ub size" + GetChipName(devID));
+}
 
-        std::string UnParse() const
-        {
-            std::string s;
-            constexpr size_t maxDispLen = 20;
-            int maxLen = static_cast<int>(std::min(sample.size(), maxDispLen));
-            for (int i = 0; i < maxLen; i++) {
-                s += std::to_string(sample[i]) + " ";
-            }
-            return s;
-        }
-
-        std::vector<T> sample;
-        std::string name;
-        size_t batchSize;
-        int batchId;
-        int channel = 0;
-        time_t timestamp { -1 };
-    };
-
-    struct BatchTask {
-        vector<int> splits;
-        vector<string> embNames;
-        size_t batchSize;
-        int batchQueueId;
-        int batchId;
-        int channelId;
-        time_t timestamp { -1 };
-        const void *tensor;
-    };
-
-    using EmbBatchT = Batch<int64_t>;
-    using BatchTaskT = BatchTask;
-
-    struct DDRParam {
-        vector<Tensor> tmpDataOut;
-        vector<int32_t> offsetsOut;
-        DDRParam(vector<Tensor> tmpData, vector<int32_t> offset)
-        {
-            tmpDataOut = tmpData;
-            offsetsOut = offset;
-        }
-    };
-
-    struct RankInfo {
-        RankInfo() = default;
-
-        RankInfo(int rankId, int deviceId, int localRankSize, int option, const std::vector<int>& ctrlSteps);
-        RankInfo(int localRankSize, int option, const std::vector<int>& maxStep);
-
-        int rankId {};
-        int deviceId {};
-        int rankSize {};
-        int localRankId {};
-        int localRankSize {};
-        bool useStatic { false };
-        uint32_t option {};
-        bool isDDR { false };
-        bool isSSDEnabled { false };
-        bool useDynamicExpansion {false};
-        bool useSumSameIdGradients {true};
-        std::vector<int> ctrlSteps; // 包含4个步数: train_steps, eval_steps, save_steps, max_train_steps
-    };
-
-    struct EmbBaseInfo {
-        int batchId;
-        int channelId;
-        string name;
-    };
-
-    enum TensorIndex : uint32_t {
-        TENSOR_INDEX_0,
-        TENSOR_INDEX_1,
-        TENSOR_INDEX_2,
-        TENSOR_INDEX_3,
-        TENSOR_INDEX_4,
-        TENSOR_INDEX_5,
-        TENSOR_INDEX_6,
-        TENSOR_INDEX_7,
-        TENSOR_INDEX_8
-    };
-
-    enum TupleIndex : uint32_t {
-        TUPLE_INDEX_0 = 0,
-        TUPLE_INDEX_1,
-        TUPLE_INDEX_2,
-        TUPLE_INDEX_3,
-        TUPLE_INDEX_4,
-        TUPLE_INDEX_5,
-        TUPLE_INDEX_6,
-        TUPLE_INDEX_7
-    };
-
-    struct RandomInfo {
-        RandomInfo() = default;
-
-        RandomInfo(int start, int len, float constantVal, float randomMin, float randomMax);
-
-        int start;
-        int len;
-        float constantVal;
-        float randomMin;
-        float randomMax;
-    };
-
-    struct EmbeddingSizeInfo {
-        size_t embeddingSize = 0;
-        size_t extendEmbSize = 0;
-        EmbeddingSizeInfo() = default;
-        EmbeddingSizeInfo(size_t embSize, size_t extendSize)
-            : embeddingSize(embSize), extendEmbSize(extendSize) {}
-    };
-
-    struct OptimizerInfo {
-        OptimizerInfo() = default;
-        OptimizerInfo(std::string name, vector<std::string> params)
-        {
-            optimName = name;
-            optimParams = std::move(params);
-        }
-
-        std::string optimName;
-        vector<std::string> optimParams;
-    };
-
-    struct ThresholdValue {
-        ThresholdValue() = default;
-        ThresholdValue(EmbNameT name, int countThre, int timeThre, int faaeCoef, bool isSum)
-        {
-            tableName = name;
-            countThreshold = countThre;
-            timeThreshold = timeThre;
-            faaeCoefficient = faaeCoef;
-            isEnableSum = isSum;
-        }
-
-        EmbNameT tableName { "" }; // embName
-        int countThreshold { -1 }; // 只配置count，即“只有准入、而没有淘汰”功能，对应SingleHostEmbTableStatus::SETS_ONLY_ADMIT状态
-        int timeThreshold { -1 };  // 只配置time，配置错误；即准入是淘汰的前提，对应SingleHostEmbTableStatus::SETS_BOTH状态
-        int faaeCoefficient { 1 }; // 配置后,该表在准入时，count计数会乘以该系数
-        bool isEnableSum {true};   // 配置false,该表在准入时，count计数不会累加
-    };
-
-    struct FeatureItemInfo {
-        FeatureItemInfo() = default;
-        FeatureItemInfo(uint32_t cnt, time_t lastT)
-            : count(cnt), lastTime(lastT)
-        {}
-
-        uint32_t count { 0 };
-        time_t lastTime { 0 };
-    };
-
-    using HistoryRecords = absl::flat_hash_map<std::string, absl::flat_hash_map<int64_t, FeatureItemInfo>>;
-    struct AdmitAndEvictData {
-        HistoryRecords historyRecords;                       // embName ---> {id, FeatureItemInfo} 映射
-        absl::flat_hash_map<std::string, time_t> timestamps; // 用于特征准入&淘汰的时间戳
-    };
-
-    void SetLog(int rank);
-
-    template<typename ... Args>
-    string StringFormat(const string& format, Args ... args)
+template <class T>
+struct Batch {
+    size_t Size() const
     {
-        auto size = static_cast<size_t>(GLOG_MAX_BUF_SIZE);
-        auto buf = std::make_unique<char[]>(size);
-        memset_s(buf.get(), size, 0, size);
-        int nChar =  snprintf_s(buf.get(), size, size - 1, format.c_str(), args ...);
-        if (nChar == -1) {
-            throw invalid_argument("StringFormat failed");
-        }
-        return string(buf.get(), buf.get() + nChar);
+        return sample.size();
     }
 
-    // use environment variable GLOG_v to decide if showing debug log.
-    // default 0, debug message will not display.
-    // 1 for debug, 2 for trace
-    constexpr int GLOG_DEBUG = 1;
-    constexpr int GLOG_TRACE = 2;
-
-    template<typename T>
-    std::string VectorToString(const std::vector<T>& vec)
+    std::string UnParse() const
     {
-        constexpr size_t maxDispLen = 20; // max display number
-        int maxLen = static_cast<int>(std::min(vec.size(), maxDispLen));
-
-        std::stringstream ss;
-        ss << "[";
-        for (size_t i = 0; i < maxLen; ++i) {
-            ss << vec[i];
-            if (i != vec.size() - 1) {
-                ss << ", ";
-            }
+        std::string s;
+        constexpr size_t maxDispLen = 20;
+        int maxLen = static_cast<int>(std::min(sample.size(), maxDispLen));
+        for (int i = 0; i < maxLen; i++) {
+            s += std::to_string(sample[i]) + " ";
         }
-        ss << "]";
-        return ss.str();
+        return s;
     }
 
-    std::string FloatPtrToLimitStr(float* ptr, const size_t& prtSize);
-
-    template<typename K, typename V>
-    std::string MapToString(const std::map<K, V>& map)
+    std::vector<T> sample;
+    std::string name;
+    size_t batchSize;
+    int batchId;
+    int channel = 0;
+    time_t timestamp{-1};
+};
+
+struct BatchTask {
+    vector<int> splits;
+    vector<string> embNames;
+    size_t batchSize;
+    int batchQueueId;
+    int batchId;
+    int channelId;
+    time_t timestamp{-1};
+    const void* tensor;
+};
+
+using EmbBatchT = Batch<int64_t>;
+using BatchTaskT = BatchTask;
+
+struct DDRParam {
+    vector<Tensor> tmpDataOut;
+    vector<int32_t> offsetsOut;
+    DDRParam(vector<Tensor> tmpData, vector<int32_t> offset)
     {
-        std::stringstream ss;
-        ss << "{";
-        for (auto it = map.begin(); it != map.end(); ++it) {
-            ss << it->first << ": " << it->second;
-            if (std::next(it) != map.end()) {
-                ss << ", ";
-            }
-        }
-        ss << "}";
-        return ss.str();
+        tmpDataOut = tmpData;
+        offsetsOut = offset;
     }
-
-    template<typename K, typename V>
-    std::string MapToString(const absl::flat_hash_map<K, V>& map)
+};
+
+struct RankInfo {
+    RankInfo() = default;
+
+    RankInfo(int rankId, int deviceId, int localRankSize, int option, const std::vector<int>& ctrlSteps);
+    RankInfo(int localRankSize, int option, const std::vector<int>& maxStep);
+
+    int rankId{};
+    int deviceId{};
+    int rankSize{};
+    int localRankId{};
+    int localRankSize{};
+    bool useStatic{false};
+    uint32_t option{};
+    bool isDDR{false};
+    bool isSSDEnabled{false};
+    bool useDynamicExpansion{false};
+    bool useSumSameIdGradients{true};
+    std::vector<int> ctrlSteps;  // 包含4个步数: train_steps, eval_steps, save_steps, max_train_steps
+};
+
+struct EmbBaseInfo {
+    int batchId;
+    int channelId;
+    string name;
+};
+
+enum TensorIndex : uint32_t {
+    TENSOR_INDEX_0,
+    TENSOR_INDEX_1,
+    TENSOR_INDEX_2,
+    TENSOR_INDEX_3,
+    TENSOR_INDEX_4,
+    TENSOR_INDEX_5,
+    TENSOR_INDEX_6,
+    TENSOR_INDEX_7,
+    TENSOR_INDEX_8
+};
+
+enum TupleIndex : uint32_t {
+    TUPLE_INDEX_0 = 0,
+    TUPLE_INDEX_1,
+    TUPLE_INDEX_2,
+    TUPLE_INDEX_3,
+    TUPLE_INDEX_4,
+    TUPLE_INDEX_5,
+    TUPLE_INDEX_6,
+    TUPLE_INDEX_7
+};
+
+struct RandomInfo {
+    RandomInfo() = default;
+
+    RandomInfo(int start, int len, float constantVal, float randomMin, float randomMax);
+
+    int start;
+    int len;
+    float constantVal;
+    float randomMin;
+    float randomMax;
+};
+
+struct EmbeddingSizeInfo {
+    size_t embeddingSize = 0;
+    size_t extendEmbSize = 0;
+    EmbeddingSizeInfo() = default;
+    EmbeddingSizeInfo(size_t embSize, size_t extendSize) : embeddingSize(embSize), extendEmbSize(extendSize) {}
+};
+
+struct OptimizerInfo {
+    OptimizerInfo() = default;
+    OptimizerInfo(std::string name, vector<std::string> params)
     {
-        std::stringstream ss;
-        ss << "{";
-        for (auto it = map.begin(); it != map.end(); ++it) {
-            ss << it->first << ": " << it->second;
-            if (std::next(it) != map.end()) {
-                ss << ", ";
-            }
-        }
-        ss << "}";
-        return ss.str();
+        optimName = name;
+        optimParams = std::move(params);
     }
 
-    void ValidateReadFile(const string& dataDir, size_t datasetSize);
+    std::string optimName;
+    vector<std::string> optimParams;
+};
 
-    template<class T>
-    inline Tensor Vec2TensorI32(const std::vector<T>& data)
+struct ThresholdValue {
+    ThresholdValue() = default;
+    ThresholdValue(EmbNameT name, int countThre, int timeThre, int faaeCoef, bool isSum)
     {
-        Tensor tmpTensor(tensorflow::DT_INT32, { static_cast<int>(data.size()) });
-        auto tmpData = tmpTensor.flat<int32>();
-        for (int j = 0; j < static_cast<int>(data.size()); j++) {
-            tmpData(j) = static_cast<int>(data[j]);
-        }
-        return tmpTensor;
+        tableName = name;
+        countThreshold = countThre;
+        timeThreshold = timeThre;
+        faaeCoefficient = faaeCoef;
+        isEnableSum = isSum;
     }
 
-    template<class T>
-    inline Tensor Vec2TensorI64(const std::vector<T>& data)
-    {
-        Tensor tmpTensor(tensorflow::DT_INT64, { static_cast<int>(data.size()) });
-        auto tmpData = tmpTensor.flat<int64>();
-        for (int j = 0; j < static_cast<int>(data.size()); j++) {
-            tmpData(j) = static_cast<int64>(data[j]);
+    EmbNameT tableName{""};  // embName
+    int countThreshold{
+        -1};  // 只配置count，即“只有准入、而没有淘汰”功能，对应SingleHostEmbTableStatus::SETS_ONLY_ADMIT状态
+    int timeThreshold{-1};  // 只配置time，配置错误；即准入是淘汰的前提，对应SingleHostEmbTableStatus::SETS_BOTH状态
+    int faaeCoefficient{1};  // 配置后,该表在准入时，count计数会乘以该系数
+    bool isEnableSum{true};  // 配置false,该表在准入时，count计数不会累加
+};
+
+struct FeatureItemInfo {
+    FeatureItemInfo() = default;
+    FeatureItemInfo(uint32_t cnt, time_t lastT) : count(cnt), lastTime(lastT) {}
+
+    uint32_t count{0};
+    time_t lastTime{0};
+};
+
+using HistoryRecords = absl::flat_hash_map<std::string, absl::flat_hash_map<int64_t, FeatureItemInfo>>;
+struct AdmitAndEvictData {
+    HistoryRecords historyRecords;                        // embName ---> {id, FeatureItemInfo} 映射
+    absl::flat_hash_map<std::string, time_t> timestamps;  // 用于特征准入&淘汰的时间戳
+};
+
+void SetLog(int rank);
+
+template <typename... Args>
+string StringFormat(const string& format, Args... args)
+{
+    auto size = static_cast<size_t>(GLOG_MAX_BUF_SIZE);
+    auto buf = std::make_unique<char[]>(size);
+    memset_s(buf.get(), size, 0, size);
+    int nChar = snprintf_s(buf.get(), size, size - 1, format.c_str(), args...);
+    if (nChar == -1) {
+        throw invalid_argument("StringFormat failed");
+    }
+    return string(buf.get(), buf.get() + nChar);
+}
+
+// use environment variable GLOG_v to decide if showing debug log.
+// default 0, debug message will not display.
+// 1 for debug, 2 for trace
+constexpr int GLOG_DEBUG = 1;
+constexpr int GLOG_TRACE = 2;
+
+template <typename T>
+std::string VectorToString(const std::vector<T>& vec)
+{
+    constexpr size_t maxDispLen = 20;  // max display number
+    int maxLen = static_cast<int>(std::min(vec.size(), maxDispLen));
+
+    std::stringstream ss;
+    ss << "[";
+    for (size_t i = 0; i < maxLen; ++i) {
+        ss << vec[i];
+        if (i != vec.size() - 1) {
+            ss << ", ";
         }
-        return tmpTensor;
     }
-
-    struct EmbInfoParams {
-        std::string name;
-        int sendCount;
-        int embeddingSize;
-        int extEmbeddingSize;
-        bool isSave;
-        bool isGrad;
-        EmbInfoParams() = default;
-
-        EmbInfoParams(const std::string& name,
-                int sendCount,
-                int embeddingSize,
-                int extEmbeddingSize,
-                bool isSave,
-                bool isGrad)
-            : name(name),
-              sendCount(sendCount),
-              embeddingSize(embeddingSize),
-              extEmbeddingSize(extEmbeddingSize),
-              isSave(isSave),
-              isGrad(isGrad)
-        {
+    ss << "]";
+    return ss.str();
+}
+
+std::string FloatPtrToLimitStr(float* ptr, const size_t& prtSize);
+
+template <typename K, typename V>
+std::string MapToString(const std::map<K, V>& map)
+{
+    std::stringstream ss;
+    ss << "{";
+    for (auto it = map.begin(); it != map.end(); ++it) {
+        ss << it->first << ": " << it->second;
+        if (std::next(it) != map.end()) {
+            ss << ", ";
         }
-    };
-
-    struct EmbInfo {
-        EmbInfo() = default;
-
-        EmbInfo(const EmbInfoParams& embInfoParams,
-                std::vector<size_t> vocabsize,
-                std::vector<EmbCache::InitializerInfo> initializeInfos,
-                std::vector<std::string> ssdDataPath)
-            : name(embInfoParams.name),
-              sendCount(embInfoParams.sendCount),
-              embeddingSize(embInfoParams.embeddingSize),
-              extEmbeddingSize(embInfoParams.extEmbeddingSize),
-              isSave(embInfoParams.isSave),
-              isGrad(embInfoParams.isGrad),
-              devVocabSize(vocabsize[0]),
-              hostVocabSize(vocabsize[1]),
-              ssdVocabSize(vocabsize[SSD_SIZE_INDEX]),
-              initializeInfos(std::move(initializeInfos)),
-              ssdDataPath(std::move(ssdDataPath))
-        {
+    }
+    ss << "}";
+    return ss.str();
+}
+
+template <typename K, typename V>
+std::string MapToString(const absl::flat_hash_map<K, V>& map)
+{
+    std::stringstream ss;
+    ss << "{";
+    for (auto it = map.begin(); it != map.end(); ++it) {
+        ss << it->first << ": " << it->second;
+        if (std::next(it) != map.end()) {
+            ss << ", ";
         }
+    }
+    ss << "}";
+    return ss.str();
+}
+
+void ValidateReadFile(const string& dataDir, size_t datasetSize);
+
+template <class T>
+inline Tensor Vec2TensorI32(const std::vector<T>& data)
+{
+    Tensor tmpTensor(tensorflow::DT_INT32, {static_cast<int>(data.size())});
+    auto tmpData = tmpTensor.flat<int32>();
+    for (int j = 0; j < static_cast<int>(data.size()); j++) {
+        tmpData(j) = static_cast<int>(data[j]);
+    }
+    return tmpTensor;
+}
+
+template <class T>
+inline Tensor Vec2TensorI64(const std::vector<T>& data)
+{
+    Tensor tmpTensor(tensorflow::DT_INT64, {static_cast<int>(data.size())});
+    auto tmpData = tmpTensor.flat<int64>();
+    for (int j = 0; j < static_cast<int>(data.size()); j++) {
+        tmpData(j) = static_cast<int64>(data[j]);
+    }
+    return tmpTensor;
+}
+
+struct EmbInfoParams {
+    std::string name;
+    int sendCount;
+    int embeddingSize;
+    int extEmbeddingSize;
+    bool isSave;
+    bool isGrad;
+    EmbInfoParams() = default;
+
+    EmbInfoParams(const std::string& name, int sendCount, int embeddingSize, int extEmbeddingSize, bool isSave,
+                  bool isGrad)
+        : name(name),
+          sendCount(sendCount),
+          embeddingSize(embeddingSize),
+          extEmbeddingSize(extEmbeddingSize),
+          isSave(isSave),
+          isGrad(isGrad)
+    {
+    }
+};
+
+struct EmbInfo {
+    EmbInfo() = default;
+
+    EmbInfo(const EmbInfoParams& embInfoParams, std::vector<size_t> vocabsize,
+            std::vector<EmbCache::InitializerInfo> initializeInfos, std::vector<std::string> ssdDataPath)
+        : name(embInfoParams.name),
+          sendCount(embInfoParams.sendCount),
+          embeddingSize(embInfoParams.embeddingSize),
+          extEmbeddingSize(embInfoParams.extEmbeddingSize),
+          isSave(embInfoParams.isSave),
+          isGrad(embInfoParams.isGrad),
+          devVocabSize(vocabsize[0]),
+          hostVocabSize(vocabsize[1]),
+          ssdVocabSize(vocabsize[SSD_SIZE_INDEX]),
+          initializeInfos(std::move(initializeInfos)),
+          ssdDataPath(std::move(ssdDataPath))
+    {
+    }
 
-        std::string name;
-        int sendCount;
-        int embeddingSize;
-        int extEmbeddingSize;
-        bool isSave;
-        bool isGrad;
-        size_t devVocabSize;
-        size_t hostVocabSize;
-        size_t ssdVocabSize;
-        std::vector<EmbCache::InitializerInfo> initializeInfos;
-        std::vector<std::string> ssdDataPath;
-    };
-
-    struct HostEmbTable {
-        EmbInfo hostEmbInfo;
-        std::vector<std::vector<float>> embData;
-    };
-
-    struct All2AllInfo {
-        KeysT keyRecv;
-        vector<int> scAll;
-        vector<uint32_t> countRecv;
-        All2AllInfo() = default;
-        All2AllInfo(KeysT keyRecv, vector<int> scAll, vector<uint32_t> countRecv)
-            : keyRecv(keyRecv), scAll(scAll), countRecv(countRecv) {}
-    };
-
-    struct UniqueInfo {
-        vector<int32_t> restore;
-        vector<int32_t> hotPos;
-        All2AllInfo all2AllInfo;
-        UniqueInfo() = default;
-        UniqueInfo(vector<int32_t> restore, vector<int32_t> hotPos, All2AllInfo all2AllInfo)
-            : restore(restore), hotPos(hotPos), all2AllInfo(all2AllInfo) {}
-    };
-
-    struct KeySendInfo {
-        KeysT keySend;
-        vector<int32_t> keyCount;
-    };
-
-    using EmbMemT = absl::flat_hash_map<std::string, HostEmbTable>;
-    using OffsetMemT = std::map<EmbNameT, size_t>;
-    using KeyOffsetMemT = std::map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>>;
-    using KeyCountMemT = std::map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>>;
-    using Table2ThreshMemT = absl::flat_hash_map<std::string, ThresholdValue>;
-    using trans_serialize_t = uint8_t;
-    using OffsetMapT = std::map<EmbNameT, std::vector<int64_t>>;
-    using OffsetT = std::vector<int64_t>;
-    using AllKeyOffsetMapT = std::map<std::string, std::map<int64_t, int64_t>>;
-    using KeyFreqMemT = unordered_map<std::string, unordered_map<emb_cache_key_t, freq_num_t>>;
-    using EmbLocalTableT = EmbCache::EmbCacheManager;
-
-    enum class CkptFeatureType {
-        HOST_EMB = 0,
-        EMB_HASHMAP = 1,
-        MAX_OFFSET = 2,
-        KEY_OFFSET_MAP = 3,
-        FEAT_ADMIT_N_EVICT = 4,
-        DDR_KEY_FREQ_MAP = 5,
-        EXCLUDE_DDR_KEY_FREQ_MAP = 6,
-        KEY_COUNT_MAP = 7,
-        EMB_LOCAL_TABLE = 8
-    };
-
-    struct CkptData {
-        EmbMemT* hostEmbs = nullptr;
-        OffsetMemT maxOffset;
-        KeyOffsetMemT keyOffsetMap;
-        OffsetMapT offsetMap;
-        OffsetMapT* offsetMapPtr = &offsetMap;
-        KeyCountMemT keyCountMap;
-        Table2ThreshMemT table2Thresh;
-        AdmitAndEvictData histRec;
-        KeyFreqMemT ddrKeyFreqMaps;
-        KeyFreqMemT excludeDDRKeyFreqMaps;
-    };
-
-    struct CkptTransData {
-        std::vector<int64_t> int64Arr;
-        std::vector<int64_t> addressArr;
-        std::vector<int32_t> int32Arr;
-        std::vector<trans_serialize_t> transDataset; // may all use this to transfer data
-        std::vector<size_t> attribute; // may need to use other form for attributes
-        size_t datasetSize;
-        size_t attributeSize;
-    };
-
-    enum class CkptDataType {
-        EMB_INFO = 0,
-        EMB_DATA = 1,
-        EMB_HASHMAP = 2,
-        DEV_OFFSET = 3,
-        EMB_CURR_STAT = 4,
-        NDDR_OFFSET = 5,
-        NDDR_FEATMAP = 6,
-        TABLE_2_THRESH = 7,
-        HIST_REC = 8,
-        ATTRIBUTE = 9,
-        DDR_FREQ_MAP = 10,
-        EXCLUDE_FREQ_MAP = 11,
-        EVICT_POS = 12,
-        KEY_COUNT_MAP = 13
-    };
-
-    enum CTRLogLevel {  // can't use enum class due to compatibility for AccCTR
-        DEBUG = 0,
-        INFO,
-        WARN,
-        ERROR,
-    };
-
-    static void CTRLog(int level, const char *msg)
+    std::string name;
+    int sendCount;
+    int embeddingSize;
+    int extEmbeddingSize;
+    bool isSave;
+    bool isGrad;
+    size_t devVocabSize;
+    size_t hostVocabSize;
+    size_t ssdVocabSize;
+    std::vector<EmbCache::InitializerInfo> initializeInfos;
+    std::vector<std::string> ssdDataPath;
+};
+
+struct HostEmbTable {
+    EmbInfo hostEmbInfo;
+    std::vector<std::vector<float>> embData;
+};
+
+struct All2AllInfo {
+    KeysT keyRecv;
+    vector<int> scAll;
+    vector<uint32_t> countRecv;
+    All2AllInfo() = default;
+    All2AllInfo(KeysT keyRecv, vector<int> scAll, vector<uint32_t> countRecv)
+        : keyRecv(keyRecv),
+          scAll(scAll),
+          countRecv(countRecv)
     {
-        switch (level) {
-            case CTRLogLevel::DEBUG:
-                LOG_DEBUG(msg);
-                break;
-            case CTRLogLevel::INFO:
-                LOG_INFO(msg);
-                break;
-            case CTRLogLevel::WARN:
-                LOG_WARN(msg);
-                break;
-            case CTRLogLevel::ERROR:
-                LOG_ERROR(msg);
-                break;
-            default:
-                break;
-        }
     }
+};
+
+struct UniqueInfo {
+    vector<int32_t> restore;
+    vector<int32_t> hotPos;
+    All2AllInfo all2AllInfo;
+    UniqueInfo() = default;
+    UniqueInfo(vector<int32_t> restore, vector<int32_t> hotPos, All2AllInfo all2AllInfo)
+        : restore(restore),
+          hotPos(hotPos),
+          all2AllInfo(all2AllInfo)
+    {
+    }
+};
+
+struct KeySendInfo {
+    KeysT keySend;
+    vector<int32_t> keyCount;
+};
+
+using EmbMemT = absl::flat_hash_map<std::string, HostEmbTable>;
+using OffsetMemT = std::map<EmbNameT, size_t>;
+using KeyOffsetMemT = std::map<EmbNameT, absl::flat_hash_map<emb_key_t, int64_t>>;
+using KeyCountMemT = std::map<EmbNameT, absl::flat_hash_map<emb_key_t, size_t>>;
+using Table2ThreshMemT = absl::flat_hash_map<std::string, ThresholdValue>;
+using trans_serialize_t = uint8_t;
+using OffsetMapT = std::map<EmbNameT, std::vector<int64_t>>;
+using OffsetT = std::vector<int64_t>;
+using AllKeyOffsetMapT = std::map<std::string, std::map<int64_t, int64_t>>;
+using KeyFreqMemT = unordered_map<std::string, unordered_map<emb_cache_key_t, freq_num_t>>;
+using EmbLocalTableT = EmbCache::EmbCacheManager;
+
+enum class CkptFeatureType {
+    HOST_EMB = 0,
+    EMB_HASHMAP = 1,
+    MAX_OFFSET = 2,
+    KEY_OFFSET_MAP = 3,
+    FEAT_ADMIT_N_EVICT = 4,
+    DDR_KEY_FREQ_MAP = 5,
+    EXCLUDE_DDR_KEY_FREQ_MAP = 6,
+    KEY_COUNT_MAP = 7,
+    EMB_LOCAL_TABLE = 8
+};
+
+struct CkptData {
+    EmbMemT* hostEmbs = nullptr;
+    OffsetMemT maxOffset;
+    KeyOffsetMemT keyOffsetMap;
+    OffsetMapT offsetMap;
+    OffsetMapT* offsetMapPtr = &offsetMap;
+    KeyCountMemT keyCountMap;
+    Table2ThreshMemT table2Thresh;
+    AdmitAndEvictData histRec;
+    KeyFreqMemT ddrKeyFreqMaps;
+    KeyFreqMemT excludeDDRKeyFreqMaps;
+};
+
+struct CkptTransData {
+    std::vector<int64_t> int64Arr;
+    std::vector<int64_t> addressArr;
+    std::vector<int32_t> int32Arr;
+    std::vector<trans_serialize_t> transDataset;  // may all use this to transfer data
+    std::vector<size_t> attribute;                // may need to use other form for attributes
+    size_t datasetSize;
+    size_t attributeSize;
+};
+
+enum class CkptDataType {
+    EMB_INFO = 0,
+    EMB_DATA = 1,
+    EMB_HASHMAP = 2,
+    DEV_OFFSET = 3,
+    EMB_CURR_STAT = 4,
+    NDDR_OFFSET = 5,
+    NDDR_FEATMAP = 6,
+    TABLE_2_THRESH = 7,
+    HIST_REC = 8,
+    ATTRIBUTE = 9,
+    DDR_FREQ_MAP = 10,
+    EXCLUDE_FREQ_MAP = 11,
+    EVICT_POS = 12,
+    KEY_COUNT_MAP = 13
+};
+
+static std::string CkptDataTypeName(CkptDataType type)
+{
+    switch (type) {
+        case CkptDataType::EMB_INFO:
+            return "EMB_INFO";
+        case CkptDataType::EMB_DATA:
+            return "EMB_DATA";
+        case CkptDataType::EMB_HASHMAP:
+            return "EMB_HASHMAP";
+        case CkptDataType::DEV_OFFSET:
+            return "DEV_OFFSET";
+        case CkptDataType::EMB_CURR_STAT:
+            return "EMB_CURR_STAT";
+        case CkptDataType::NDDR_OFFSET:
+            return "NDDR_OFFSET";
+        case CkptDataType::NDDR_FEATMAP:
+            return "NDDR_FEATMAP";
+        case CkptDataType::TABLE_2_THRESH:
+            return "TABLE_2_THRESH";
+        case CkptDataType::HIST_REC:
+            return "HIST_REC";
+        case CkptDataType::ATTRIBUTE:
+            return "ATTRIBUTE";
+        case CkptDataType::DDR_FREQ_MAP:
+            return "DDR_FREQ_MAP";
+        case CkptDataType::EXCLUDE_FREQ_MAP:
+            return "EXCLUDE_FREQ_MAP";
+        case CkptDataType::EVICT_POS:
+            return "EVICT_POS";
+        case CkptDataType::KEY_COUNT_MAP:
+            return "KEY_COUNT_MAP";
+        default:
+            return "UNKNOWN";
+    }
+}
+
+enum CTRLogLevel {  // can't use enum class due to compatibility for AccCTR
+    DEBUG = 0,
+    INFO,
+    WARN,
+    ERROR,
+};
+
+static void CTRLog(int level, const char* msg)
+{
+    switch (level) {
+        case CTRLogLevel::DEBUG:
+            LOG_DEBUG(msg);
+            break;
+        case CTRLogLevel::INFO:
+            LOG_INFO(msg);
+            break;
+        case CTRLogLevel::WARN:
+            LOG_WARN(msg);
+            break;
+        case CTRLogLevel::ERROR:
+            LOG_ERROR(msg);
+            break;
+        default:
+            break;
+    }
+}
 
-    ostream& operator<<(ostream& ss, MxRec::CkptDataType type);
-    bool CheckFilePermission(const string& filePath);
+ostream& operator<<(ostream& ss, MxRec::CkptDataType type);
+bool CheckFilePermission(const string& filePath);
 
-    int GetStepFromPath(const string& loadPath);
-} // end namespace MxRec
+int GetStepFromPath(const string& loadPath);
+}  // end namespace MxRec
 
 #define KEY_PROCESS "\033[45m[KeyProcess]\033[0m "
 #define STAT_INFO "[StatInfo] "
 #ifdef GTEST
-    #define GTEST_PRIVATE public
+#define GTEST_PRIVATE public
 #else
-    #define GTEST_PRIVATE private
+#define GTEST_PRIVATE private
 #endif
 #endif
diff --git a/src/ops_tf/hybrid_dataset_ops.cpp b/src/ops_tf/hybrid_dataset_ops.cpp
index 2eee8531..98fca961 100644
--- a/src/ops_tf/hybrid_dataset_ops.cpp
+++ b/src/ops_tf/hybrid_dataset_ops.cpp
@@ -403,7 +403,7 @@ namespace MxRec {
             out(0) = batchId;
             if (channelId == 1) {
                 if (maxStep != -1 && batchId >= maxStep) {
-                    LOG_DEBUG(StringFormat("skip excess batch after {}/{}", batchId, maxStep));
+                    LOG_DEBUG(StringFormat("skip excess batch after %d/%d", batchId, maxStep));
                     return;
                 }
             }
@@ -658,4 +658,4 @@ namespace tensorflow {
         .SetIsStateful()
         .SetShapeFn(::tensorflow::shape_inference::UnknownShape);
     REGISTER_KERNEL_BUILDER(Name("LazyAdam").Device(DEVICE_CPU), MxRec::CustOps);
-}
\ No newline at end of file
+}
-- 
Gitee


From f6340067b4f2475582615b8cf77cf38baeabffe3 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 1 Jul 2024 08:28:14 +0000
Subject: [PATCH 250/302] fix: cleancode.

Signed-off-by: steepcurve <steepcurve@163.com>
---
 src/core/checkpoint/checkpoint.cpp | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/core/checkpoint/checkpoint.cpp b/src/core/checkpoint/checkpoint.cpp
index bc7501bb..469e209e 100644
--- a/src/core/checkpoint/checkpoint.cpp
+++ b/src/core/checkpoint/checkpoint.cpp
@@ -90,17 +90,18 @@ void Checkpoint::SetDataHandler(CkptData& ckptData)
 
 void Checkpoint::SetDataHandler(const vector<CkptFeatureType>& featureTypes)
 {
-    map<CkptFeatureType, function<void()>> setCkptMap{{CkptFeatureType::FEAT_ADMIT_N_EVICT,
-                                                       [this] {
-                                                           dataHandlers.push_back(make_unique<FeatAdmitNEvictCkpt>());
-                                                       }},
-                                                      {CkptFeatureType::DDR_KEY_FREQ_MAP,
-                                                       [this] {
-                                                           dataHandlers.push_back(make_unique<KeyFreqMapCkpt>());
-                                                       }},
-                                                      {CkptFeatureType::KEY_COUNT_MAP, [this] {
-                                                           dataHandlers.push_back(make_unique<KeyCountMapCkpt>());
-                                                       }}};
+    auto featAdmitNEvictHandler = [this] {
+        dataHandlers.push_back(make_unique<FeatAdmitNEvictCkpt>());
+    };
+    auto ddrKeyFreqMapHandler = [this] {
+        dataHandlers.push_back(make_unique<KeyFreqMapCkpt>());
+    };
+    auto keyCountMapHandler = [this] {
+        dataHandlers.push_back(make_unique<KeyCountMapCkpt>());
+    };
+    map<CkptFeatureType, function<void()>> setCkptMap{{CkptFeatureType::FEAT_ADMIT_N_EVICT, featAdmitNEvictHandler},
+                                                      {CkptFeatureType::DDR_KEY_FREQ_MAP, ddrKeyFreqMapHandler},
+                                                      {CkptFeatureType::KEY_COUNT_MAP, keyCountMapHandler}};
 
     for (const auto& featureType : featureTypes) {
         setCkptMap.at(featureType)();
@@ -341,7 +342,8 @@ void Checkpoint::ReadStream(CkptTransData& transData, const string& dataDir, Ckp
     if (readBytesNum != datasetSize) {
         throw runtime_error(StringFormat("Error: Load data failed. data type: %s. "
                                          "Expected to read %d bytes, but actually read %d bytes to file %s.",
-                                         CkptDataTypeName(dataType).c_str(), datasetSize, readBytesNum, dataDir.c_str()));
+                                         CkptDataTypeName(dataType).c_str(), datasetSize, readBytesNum,
+                                         dataDir.c_str()));
     }
 }
 
-- 
Gitee


From 90bd4a3705ce1a239178284d001abe91dd05fb91 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Tue, 2 Jul 2024 17:10:56 +0800
Subject: [PATCH 251/302] =?UTF-8?q?estimator=E4=B8=ADtrain=E5=88=87?=
 =?UTF-8?q?=E6=8D=A2=E4=B8=BAeval,=E5=A2=9E=E5=8A=A0=E5=8E=9Fhost=E4=BE=A7?=
 =?UTF-8?q?train=E7=9A=84=E7=9B=B8=E5=85=B3=E7=8A=B6=E6=80=81=E5=A4=87?=
 =?UTF-8?q?=E4=BB=BD=EF=BC=8C=E5=9C=A8eval=E5=88=87=E6=8D=A2=E4=B8=BAtrain?=
 =?UTF-8?q?=E5=90=8E=E8=BF=9B=E8=A1=8C=E8=BF=98=E5=8E=9F=E5=A4=87=E4=BB=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../cache_manager/cache_manager.cpp           | 27 ++++++++++++++++
 .../cache_manager/cache_manager.h             |  4 +++
 src/AccCTR/src/embedding_cache/limited_set.h  | 18 +++++++++++
 .../offset_mapper/offset_mapper.h             | 32 +++++++++++++++++++
 src/AccCTR/src/include/embedding_cache.h      | 14 ++++++++
 src/core/emb_table/embedding_ddr.cpp          | 10 ++++++
 src/core/emb_table/embedding_ddr.h            |  3 ++
 src/core/emb_table/embedding_mgmt.cpp         | 14 ++++++++
 src/core/emb_table/embedding_mgmt.h           | 11 +++++++
 src/core/emb_table/embedding_static.cpp       | 16 ++++++++--
 src/core/emb_table/embedding_static.h         |  4 +++
 src/core/emb_table/embedding_table.cpp        |  8 +++++
 src/core/emb_table/embedding_table.h          |  5 +++
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          | 27 ++++++++++++++++
 src/core/hybrid_mgmt/hybrid_mgmt.h            |  5 +++
 .../ock_ctr_common/include/embedding_cache.h  | 14 ++++++++
 16 files changed, 210 insertions(+), 2 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
index 8a6187a1..452e2fd1 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
@@ -317,6 +317,33 @@ int EmbCacheManagerImpl::LoadEmbTableInfos(std::string tableName, const std::vec
     return H_OK;
 }
 
+int EmbCacheManagerImpl::BackUpTrainStatus(std:string tableName)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+
+    auto om = offsetMappersBackUp.find(tableName);
+    if (om != offsetMappersBackUp.end()) {
+        offsetMappersBackUp[tableName] = offsetMappers[tableName];
+    } else{
+        offsetMappersBackUp[tableName].Initialize(1000, 1000);
+        offsetMappersBackUp[tableName] = offsetMappers[tableName];
+    }
+    return H_OK;
+}
+
+int EmbCacheManagerImpl::RecoverTrainStatus(std:string tableName)
+{
+    int checkTableNameRet = CheckValidTableName(tableName);
+    if (checkTableNameRet != H_OK) {
+        return checkTableNameRet;
+    }
+    offsetMappers[tableName] = offsetMappersBackUp[tableName];
+    return H_OK;
+}
+
 void EmbCacheManagerImpl::Destroy()
 {
     for (auto it = offsetMappers.begin(); it != offsetMappers.end(); it++) {
diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
index 80fbcd46..359e88ad 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
@@ -73,11 +73,15 @@ public:
                           const std::vector<std::vector<float>>& embeddings,
                           const std::vector<std::vector<float>>& optimizerSlots) override;
 
+    int BackUpTrainStatus(std:string tableName) override;
+    int RecoverTrainStatus(std::string tableName) override;
+
     uint32_t GetUsage(const std::string& tableName) override;
 
 private:
     std::map<std::string, EmbCacheInfo> embCacheInfos;
     std::map<std::string, OffsetMapper> offsetMappers;
+    std::map<std::string, OffsetMapper> offsetMappersBackUp;
     std::map<std::string, EmbLocalTable> embTables;
 
     int CheckValidTableName(const std::string& tableName);
diff --git a/src/AccCTR/src/embedding_cache/limited_set.h b/src/AccCTR/src/embedding_cache/limited_set.h
index 036a6477..d44b615a 100644
--- a/src/AccCTR/src/embedding_cache/limited_set.h
+++ b/src/AccCTR/src/embedding_cache/limited_set.h
@@ -47,6 +47,24 @@ public:
         delete tail;
     }
 
+    // 拷贝构造函数
+    LimitedSet(const LimitedSet& other): head(new Node(-1)), tail(new Node(-1))
+    {
+        nodes.resize(other.nodes.size());
+        for (auto &node: nodes) {
+            node = new Node(-1);
+        }
+
+        // 初始化头尾节点
+        head->next = tail;
+        tail->prev = head;
+
+        // 遍历原vector的每一个节点并复制
+        for (Node* node = other.head->next; node != other.tail; node = node->next) {
+            insert(node->value);
+        }
+    }
+
     void insert(uint64_t value)
     {
         if (nodes[value]->value == value) {
diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
index f42a0d3f..1ad470c5 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/offset_mapper.h
@@ -35,6 +35,38 @@ public:
 
     ~OffsetMapper() = default;
 
+    OffsetMapper(const OffsetMapper& other): maxCacheSize(other.maxCacheSize), useLength(other.useLength),
+                                             validPos(new LimitedSet(*other.validPos)),
+                                             evictPos(new LimitedSet(*other.evictPos)),
+                                             pos2Key(other.pos2Key), lastBatchPos(other.lastBatchPos),
+                                             evictSize(other.evictSize)
+    {
+    }
+
+    OffsetMapper& operator=(const OffsetMapper& other)
+    {
+        if (this != &other) {
+            delete validPos;
+            validPos = nullptr;
+            delete evictPos;
+            evictPos = nullptr;
+
+            if (other.validPos != nullptr) {
+                validPos = new LimitedSet(*other.validPos);
+            }
+            if (other.evictPos != nullptr) {
+                evictPos = new LimitedSet(*other.evictPos);
+            }
+
+            maxCacheSize = other.maxCacheSize;
+            useLength = other.useLength;
+            pos2Key = other.pos2Key;
+            lastBatchPos = other.lastBatchPos;
+            evictSize = other.evictSize;
+        }
+        return *this;
+    }
+
     bool Initialize(uint32_t reserve, uint32_t maxSize = 0)
     {
         maxCacheSize = maxSize;
diff --git a/src/AccCTR/src/include/embedding_cache.h b/src/AccCTR/src/include/embedding_cache.h
index 4adf1fbf..40d9dcbe 100644
--- a/src/AccCTR/src/include/embedding_cache.h
+++ b/src/AccCTR/src/include/embedding_cache.h
@@ -315,6 +315,20 @@ public:
     virtual int LoadEmbTableInfos(std::string tableName, const std::vector<uint64_t>& keys,
                                   const std::vector<std::vector<float>>& embeddings,
                                   const std::vector<std::vector<float>>& optimizerSlots) = 0;
+
+    /* *
+     * train通道切换为eval, 备份当前表的offsetMapper对象, 存储下当前train对应的devices上key的状态
+     * @Param tableName: 需要加载信息的table名字
+     * @Return errorCode
+     */
+     virtual int BackUpTrainStatus(std::string tableName) = 0;
+
+    /* *
+     * eval通道切换为train, 将当前表的offsetMapper对象还原成备份的train对应的的device上key的状态
+     * @Param tableName: 需要加载信息的table名字
+     * @Return errorCode
+     */
+     virtual int RecoverTrainStatus(std::string tableName) = 0;
 };
 }  // namespace EmbCache
 
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index ca706c73..e4b96eb6 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -376,3 +376,13 @@ void EmbeddingDDR::SetEmbCache(ock::ctr::EmbCacheManagerPtr embCache)
 {
     this->embCache = embCache;
 }
+
+void EmbeddingDDR::BackUpTrainStatus()
+{
+    embCache->BackUpTrainStatus(name);
+}
+
+void EmbeddingDDR::RecoverTrainStatus()
+{
+    embCache->RecoverTrainStatus(name);
+}
diff --git a/src/core/emb_table/embedding_ddr.h b/src/core/emb_table/embedding_ddr.h
index ac5c5878..26d85e60 100644
--- a/src/core/emb_table/embedding_ddr.h
+++ b/src/core/emb_table/embedding_ddr.h
@@ -73,6 +73,9 @@ public:
     void SaveEmbAndOptim(const string& savePath);
     void SetEmbCache(ock::ctr::EmbCacheManagerPtr embCache);
 
+    void BackUpTrainStatus();
+    void RecoverTrainStatus();
+
 GTEST_PRIVATE:
 
     void EvictDeleteEmb(const vector<emb_key_t>& keys);
diff --git a/src/core/emb_table/embedding_mgmt.cpp b/src/core/emb_table/embedding_mgmt.cpp
index 9e7dcbb0..d889cdba 100644
--- a/src/core/emb_table/embedding_mgmt.cpp
+++ b/src/core/emb_table/embedding_mgmt.cpp
@@ -196,3 +196,17 @@ void EmbeddingMgmt::SetEmbCacheForEmbTable(const ock::ctr::EmbCacheManagerPtr& e
         table.second->SetEmbCache(embCache);
     }
 }
+
+void EmbeddingMgmt::BackUpTrainStatusBeforeLoad()
+{
+    for (auto& table: embeddings) {
+        table.second->BackUpTrainStatus();
+    }
+}
+
+void EmbeddingMgmt::RecoverTrainStatus()
+{
+    for (auto& table: embeddings) {
+        table.second->RecoverTrainStatus();
+    }
+}
\ No newline at end of file
diff --git a/src/core/emb_table/embedding_mgmt.h b/src/core/emb_table/embedding_mgmt.h
index ef106786..7cd3f782 100644
--- a/src/core/emb_table/embedding_mgmt.h
+++ b/src/core/emb_table/embedding_mgmt.h
@@ -89,6 +89,17 @@ public:
      */
     void Save(const string& filePath);
 
+    /**
+     * estimator模式下train切换为eval时， 备份所有表train的状态
+     */
+    void BackUpTrainStatusBeforeLoad();
+
+    /**
+     * estimator模式下eval切换为train时， 还原所有表train的状态
+     */
+    void RecoverTrainStatus();
+
+
     /**
     * 获取所有表对应的DeviceOffsets，该偏移用于python侧保存embedding时抽取key对应的embedding
     */
diff --git a/src/core/emb_table/embedding_static.cpp b/src/core/emb_table/embedding_static.cpp
index 61874b1f..0db152ed 100644
--- a/src/core/emb_table/embedding_static.cpp
+++ b/src/core/emb_table/embedding_static.cpp
@@ -160,11 +160,23 @@ void EmbeddingStatic::LoadKey(const string& savePath)
     }
 
     maxOffset = keyOffsetMap.size();
-
     free(static_cast<void*>(buf));
 }
 
 vector<int64_t> EmbeddingStatic::GetDeviceOffset()
 {
     return deviceOffset;
-}
\ No newline at end of file
+}
+
+void EmbeddingStatic::BackUpTrainStatus()
+{
+    keyOffsetMapBackUp = keyOffsetMap;
+}
+
+void EmbeddingStatic::RecoverTrainStatus()
+{
+    if (keyOffsetMapBackUp.size()!=0) {
+        keyOffsetMap = keyOffsetMapBackUp;
+        keyOffsetMapBackUp.clear();
+    }
+}
diff --git a/src/core/emb_table/embedding_static.h b/src/core/emb_table/embedding_static.h
index 6515f586..6f772e08 100644
--- a/src/core/emb_table/embedding_static.h
+++ b/src/core/emb_table/embedding_static.h
@@ -39,6 +39,10 @@ public:
 
     void Save(const string& savePath);
 
+    void BackUpTrainStatus();
+
+    void RecoverTrainStatus();
+
     vector<int64_t> GetDeviceOffset();
 
 GTEST_PRIVATE:
diff --git a/src/core/emb_table/embedding_table.cpp b/src/core/emb_table/embedding_table.cpp
index b4eb2379..12b0137a 100644
--- a/src/core/emb_table/embedding_table.cpp
+++ b/src/core/emb_table/embedding_table.cpp
@@ -143,6 +143,14 @@ void EmbeddingTable::Save(const string& filePath)
 {
 }
 
+void EmbeddingTable::BackUpTrainStatus()
+{
+}
+
+void EmbeddingTable::RecoverTrainStatus()
+{
+}
+
 void EmbeddingTable::MakeDir(const string& dirName)
 {
     if (fileSystemPtr_ == nullptr) {
diff --git a/src/core/emb_table/embedding_table.h b/src/core/emb_table/embedding_table.h
index cbf15a7a..174cc0fc 100644
--- a/src/core/emb_table/embedding_table.h
+++ b/src/core/emb_table/embedding_table.h
@@ -76,6 +76,10 @@ public:
 
     void MakeDir(const string& dirName);
 
+    virtual void BackUpTrainStatus();
+
+    virtual void RecoverTrainStatus();
+
     virtual vector<int64_t> GetDeviceOffset();
 
     vector<int64_t> GetLoadOffset();
@@ -96,6 +100,7 @@ public:
     size_t ssdVocabSize;
     size_t maxOffset;
     absl::flat_hash_map<emb_key_t, int64_t> keyOffsetMap;
+    absl::flat_hash_map<emb_key_t, int64_t> keyOffsetMapBackUp;
     std::vector<int64_t> evictDevPos;     // 记录HBM内被淘汰的key
     std::vector<int64_t> evictHostPos; // 记录Host内淘汰列表
 
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index fda54d9d..100ed24e 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -221,6 +221,7 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
     Checkpoint loadCkpt;
     vector<CkptFeatureType> loadFeatures;
     SetFeatureTypeForLoad(loadFeatures);
+    BackUpTrainStatus();
 
     if (warmStartTables.size() == 0) {
         EmbeddingMgmt::Instance()->Load(loadPath, trainKeysSet);
@@ -499,6 +500,8 @@ void HybridMgmt::EvalTask(TaskType type)
             cvCheckSave.wait(checkSaveLocker, [this] {
                 return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy;
             });
+            // 在唤醒train的数据处理进程之前，需要将备份的train状态还原
+            RecoverTrainStatus();
             hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
             LOG_DEBUG("wake TrainTask");
             hybridMgmtBlock->DoBlock(channelId);
@@ -2230,3 +2233,27 @@ bool HybridMgmt::IsTrainAndEvalCase()
     }
     return alreadyTrainOnce && isChannelSwitchCase;
 }
+
+void HybridMgmt::BackUpTrainStatus()
+{
+    int channelID = TRAIN_CHANNEL_ID;
+    int& theTrainBatchId = hybridMgmtBlock->hybridBatchId[channelID];
+    //续训load、predict模式下的load不需要对train的状态进行备份
+    if (theTrainBatchId==0) {
+        return;
+    }
+    // train and eval模式下，train切换为eval之后
+    // eval的load需要线备份原有的相关状态， HBM非扩容模式需要备份keyOffsetMap, DDR模式需要备份offsetMapper对象
+    LOG_INFO("On Estimator train and eval mode, start to backup train status, "
+             "current train batchId: {} .", theTrainBatchId);
+    EmbeddingMgmt::Instance()->BackUpTrainStatusBeforeLoad();
+    isBackUpTrainStatus = true;
+}
+
+void HybridMgmt::RecoverTrainStatus()
+{
+    if (isBackUpTrainStatus) {
+        EmbeddingMgmt::Instance()->RecoverTrainStatus();
+    }
+    isBackUpTrainStatus = false;
+}
\ No newline at end of file
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index 83299da3..fb050e70 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -133,6 +133,10 @@ namespace MxRec {
 
         void ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBatchOut);
 
+        void BackUpTrainStatus();
+
+        void RecoverTrainStatus();
+
     GTEST_PRIVATE:
         bool mutexDestroy { false };
         std::mutex lookUpAndSendBatchIdMtx;
@@ -225,6 +229,7 @@ namespace MxRec {
         bool isLoad { false };
         bool isInitialized { false };
         bool alreadyTrainOnce = false;  // 用于判断是否为predict模式
+        bool isBackUpTrainStatus = false; // 用于判断当前是否已经备份了train的状态
         map<string, int> lookUpSwapInAddrsPushId;  // 用于处理eos场景，当消费者追上生产者且长时间无上游数据，会触发eos
         map<string, ProcessStatus> specialProcessStatus;
 
diff --git a/src/core/ock_ctr_common/include/embedding_cache.h b/src/core/ock_ctr_common/include/embedding_cache.h
index f3bc9e23..5e25a718 100644
--- a/src/core/ock_ctr_common/include/embedding_cache.h
+++ b/src/core/ock_ctr_common/include/embedding_cache.h
@@ -315,6 +315,20 @@ public:
     virtual int LoadEmbTableInfos(std::string tableName, const std::vector<uint64_t>& keys,
                                   const std::vector<std::vector<float>>& embeddings,
                                   const std::vector<std::vector<float>>& optimizerSlots) = 0;
+
+    /* *
+     * train通道切换为eval, 备份当前表的offsetMapper对象, 存储下当前train对应的devices上key的状态
+     * @Param tableName: 需要加载信息的table名字
+     * @Return errorCode
+     */
+     virtual int BackUpTrainStatus(std::string tableName) = 0;
+
+    /* *
+     * eval通道切换为train, 将当前表的offsetMapper对象还原成备份的train对应的的device上key的状态
+     * @Param tableName: 需要加载信息的table名字
+     * @Return errorCode
+     */
+     virtual int RecoverTrainStatus(std::string tableName) = 0;
 };
 }  // namespace EmbCache
 
-- 
Gitee


From c145cc40abe4a77ab850169ebd09576d27261c21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 3 Jul 2024 14:31:25 +0800
Subject: [PATCH 252/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?SSD=E6=A8=A1=E5=BC=8F=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../cache_manager/cache_manager.cpp           |  8 +--
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          | 59 +++++++------------
 src/core/hybrid_mgmt/hybrid_mgmt.h            |  7 +--
 3 files changed, 28 insertions(+), 46 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
index 8a6187a1..3017cf8e 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
@@ -72,16 +72,16 @@ int EmbCacheManagerImpl::CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
         return H_THREAD_NUM_ERROR;
     }
 
-    uint32_t reserve = embCacheInfo.vocabSize / VOCAB_CACHE_RATIO;
-    if (!offsetMappers[embCacheInfo.tableName].Initialize(reserve, embCacheInfo.maxCacheSize)) {
+    uint32_t reserveDevice = embCacheInfo.maxCacheSize / VOCAB_CACHE_RATIO;
+    if (!offsetMappers[embCacheInfo.tableName].Initialize(reserveDevice, embCacheInfo.maxCacheSize)) {
         offsetMappers[embCacheInfo.tableName].UnInitialize();
         offsetMappers.erase(embCacheInfo.tableName);
         return H_MEMORY_ALLOC_ERROR;
     }
 
     EmbPoolParam embPoolParam{prefillBufferSize, refillThreadNum};
-
-    if (!embTables[embCacheInfo.tableName].Initialize(embCacheInfo, reserve, initializerInfos, embPoolParam)) {
+    uint32_t reserveHost = embCacheInfo.vocabSize / VOCAB_CACHE_RATIO;
+    if (!embTables[embCacheInfo.tableName].Initialize(embCacheInfo, reserveHost, initializerInfos, embPoolParam)) {
         offsetMappers.erase(embCacheInfo.tableName);
         embTables.erase(embCacheInfo.tableName);
         return H_MEMORY_ALLOC_ERROR;
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index fda54d9d..9e195419 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -918,28 +918,27 @@ void HybridMgmt::SetOptimizerInfo(const string& embName, OptimizerInfo optimInfo
     EmbeddingMgmt::Instance()->SetOptimizerInfo(embName, optimInfo);
 }
 
-void HybridMgmt::LookUpAddrs(const string &embName, int extEmbeddingSize)
+// L3Storage
+void HybridMgmt::LookUpAndRemoveAddrs(const EmbTaskInfo &info)
 {
-    int id = 0;
-    uint64_t memSize = extEmbeddingSize * sizeof(float);
+    uint64_t memSize = info.extEmbeddingSize * sizeof(float);
     const std::string hbmSwapKeyQueName = "HBMSwapKeyQue";
     const std::string ddrSwapKeyQueName = "DDRSwapKeyQue";
-    auto lookUpFunc = [this, memSize, embName, id](
+    auto lookUpFunc = [this, memSize, info](
         std::map<std::string, TaskQueue<std::vector<uint64_t>>> &fromQue,
         std::map<std::string, TaskQueue<std::vector<float *>>> &toQue,
         const string &swapStr, const string &fromQueName
     ) {
-        std::vector<uint64_t> keys = fromQue[embName + swapStr].WaitAndPop();
+        std::vector<uint64_t> keys = fromQue[info.name + swapStr].WaitAndPop();
         if (!isRunning) {
             return;
         }
         std::vector<float*> addrs;
         TimeCost lookupAddrsTC;
-        int rc = embCache->EmbeddingLookupAddrs(embName, keys, addrs);
+        int rc = embCache->EmbeddingLookupAddrs(info.name, keys, addrs);
         if (rc != H_OK) {
-            lookupAddrSuccess = false;
             LOG_ERROR("lookUpAddrs, table:{}, fromQue: {}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}",
-                      embName, fromQueName, swapStr, keys.size(), addrs.size(), id);
+                      info.name, fromQueName, swapStr, keys.size(), addrs.size(), info.batchId);
             throw runtime_error("EmbeddingLookupAddrs failed! error code:" + std::to_string(rc));
         }
         if (&fromQue == &DDRSwapKeyQue && swapStr == SWAP_OUT_STR) {
@@ -947,31 +946,28 @@ void HybridMgmt::LookUpAddrs(const string &embName, int extEmbeddingSize)
                 auto *newAddr = (float*)malloc(memSize);
                 rc = memcpy_s(newAddr, memSize, addr, memSize);
                 if (rc != 0) {
-                    lookupAddrSuccess = false;
                     throw runtime_error("memcpy_s failed! error code:" + std::to_string(rc));
                 }
                 addr = newAddr;
             }
-            rc = embCache->EmbeddingRemove(embName, keys);
+            rc = embCache->EmbeddingRemove(info.name, keys);
             if (rc != H_OK) {
-                lookupAddrSuccess = false;
                 throw runtime_error("EmbeddingRemove failed! error code:" + std::to_string(rc));
             }
         }
         LOG_DEBUG("table:{}, fromQue:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsTC(ms):{}",
-                  embName, fromQueName, swapStr, keys.size(), addrs.size(), id, lookupAddrsTC.ElapsedMS());
-        toQue[embName + swapStr].Pushv(addrs);
+                  info.name, fromQueName, swapStr, keys.size(), addrs.size(), info.batchId, lookupAddrsTC.ElapsedMS());
+        toQue[info.name + swapStr].Pushv(addrs);
     };
-    while (isRunning && lookupAddrSuccess) {
-        lookUpFunc(DDRSwapKeyQue, DDRSwapAddrsQue, SWAP_OUT_STR, ddrSwapKeyQueName);
-        lookUpFunc(DDRSwapKeyQue, DDRSwapAddrsQue, SWAP_IN_STR, ddrSwapKeyQueName);
-        lookUpFunc(HBMSwapKeyQue, tableToQueueLookup, SWAP_IN_STR, hbmSwapKeyQueName);
-        lookUpFunc(HBMSwapKeyQue, tableToQueueLookup, SWAP_OUT_STR, hbmSwapKeyQueName);
-        id++;
-        lookUpSwapInAddrsPushId[embName]++;
-    }
+
+    lookUpFunc(DDRSwapKeyQue, DDRSwapAddrsQue, SWAP_OUT_STR, ddrSwapKeyQueName);
+    lookUpFunc(DDRSwapKeyQue, DDRSwapAddrsQue, SWAP_IN_STR, ddrSwapKeyQueName);
+    lookUpFunc(HBMSwapKeyQue, tableToQueueLookup, SWAP_IN_STR, hbmSwapKeyQueName);
+    lookUpFunc(HBMSwapKeyQue, tableToQueueLookup, SWAP_OUT_STR, hbmSwapKeyQueName);
+    lookUpSwapInAddrsPushId[info.name]++;
 }
 
+// DDR
 void HybridMgmt::LookUpSwapAddrs(const string &embName, const string &swapStr)
 {
     int id = 0;
@@ -1146,6 +1142,9 @@ void HybridMgmt::EmbeddingReceiveAndUpdateL3Storage(int batchId, int index, cons
         .extEmbeddingSize=embInfo.extEmbeddingSize,
         .name=embInfo.name
     };
+    // host swap out need to be executed before lookup
+    LookUpAndRemoveAddrs(info);
+
     float* ptr = nullptr;
     vector<float*> swapOutAddrs;
     int64_t dims0 = 0;
@@ -1226,8 +1225,6 @@ void HybridMgmt::ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBa
 
     HandleEndBatchCase(info, swapInPos);
 
-    CheckLookupAddrSuccessL3Storage();
-
     if (info.channelId == TRAIN_CHANNEL_ID) {
         alreadyTrainOnce = true;
     }
@@ -1295,8 +1292,6 @@ void HybridMgmt::InitDataPipelineForL3Storage(const string &embName, int extEmbe
     DDRSwapAddrsQue[embName + SWAP_IN_STR];
 
     // 初始化lookup线程
-    lookUpThreads.emplace_back(
-        std::async(std::launch::async, [=] { LookUpAddrs(embName, extEmbeddingSize); }));
     LOG_DEBUG("data pipeline for L3Storage init");
 }
 
@@ -1321,8 +1316,9 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
                  embInfo.name, embInfo.hostVocabSize, embInfo.extEmbeddingSize, embInfo.devVocabSize);
         EmbCache::EmbCacheInfo embCacheInfo(embInfo.name, embInfo.hostVocabSize, embInfo.embeddingSize,
                                             embInfo.extEmbeddingSize, embInfo.devVocabSize);
+        size_t prefill = std::max(embInfo.hostVocabSize/10, 2 * embInfo.devVocabSize);
         int ret = embCache->CreateCacheForTable(
-            embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, embInfo.hostVocabSize, EMBEDDING_THREAD_NUM);
+            embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, prefill, EMBEDDING_THREAD_NUM);
         if (ret != H_OK) {
             throw runtime_error(embInfo.name + "create cache for table failed, error code: " + std::to_string(ret));
         }
@@ -1355,9 +1351,6 @@ void HybridMgmt::JoinEmbeddingCacheThread()
     for (auto& t : EmbeddingReceiveAndUpdateThreadPool) {
         t.join();
     }
-    for (auto& t : lookUpThreads) {
-        t.wait();
-    }
     for (auto& t : lookUpSwapInAddrsThreads) {
         t.wait();
     }
@@ -2175,14 +2168,6 @@ void HybridMgmt::CheckLookupAddrSuccessDDR()
     }
 }
 
-void HybridMgmt::CheckLookupAddrSuccessL3Storage()
-{
-    if (!lookupAddrSuccess) {
-        for (auto& t : lookUpThreads) {
-            t.get();
-        }
-    }
-}
 
 void HybridMgmt::GetSwapPairsAndKey2Offset(const EmbBaseInfo &info, vector<uint64_t> &uniqueKeys,
                                            pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index 83299da3..0654be91 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -157,7 +157,6 @@ namespace MxRec {
         std::vector<std::thread> EmbeddingReceiveAndUpdateThreadPool;
         std::vector<std::future<void>> lookUpSwapOutAddrsThreads;
         std::vector<std::future<void>> lookUpSwapInAddrsThreads;
-        std::vector<std::future<void>> lookUpThreads;
 
         std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyQue;
         std::map<std::string, TaskQueue<std::vector<uint64_t>>> SwapOut2L3StorageKeyQue;
@@ -190,9 +189,9 @@ namespace MxRec {
 
         void EvictL3StorageKeys(const string& embName, const vector<emb_cache_key_t>& keys) const;
 
-        void LookUpAddrs(const string &embName, int extEmbeddingSize);
+        void LookUpAndRemoveAddrs(const EmbTaskInfo &info);  // L3Storage, synchronous
 
-        void LookUpSwapAddrs(const std::string &embName, const std::string &swapStr);
+        void LookUpSwapAddrs(const std::string &embName, const std::string &swapStr);  // DDR, asynchronous
 
         void EmbeddingTask();
 
@@ -312,8 +311,6 @@ namespace MxRec {
 
         void CheckLookupAddrSuccessDDR();
 
-        void CheckLookupAddrSuccessL3Storage();
-
         void GetSwapPairsAndKey2Offset(const EmbBaseInfo& info, vector<uint64_t> &uniqueKeys,
                                        std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
                                        std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
-- 
Gitee


From 12b6f9f608f43a9d3e0f981f531dc2c72021478f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 3 Jul 2024 14:48:57 +0800
Subject: [PATCH 253/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?SSD=E6=A8=A1=E5=BC=8F=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../cache_manager/cache_manager.cpp           |  7 +-
 src/core/emb_table/embedding_ddr.cpp          |  2 +-
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          | 65 +++++++++----------
 src/core/hybrid_mgmt/hybrid_mgmt.h            |  7 +-
 src/core/l3_storage/cache_manager.h           |  4 +-
 5 files changed, 41 insertions(+), 44 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
index 3017cf8e..c6cc1bbd 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
@@ -72,16 +72,15 @@ int EmbCacheManagerImpl::CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
         return H_THREAD_NUM_ERROR;
     }
 
-    uint32_t reserveDevice = embCacheInfo.maxCacheSize / VOCAB_CACHE_RATIO;
-    if (!offsetMappers[embCacheInfo.tableName].Initialize(reserveDevice, embCacheInfo.maxCacheSize)) {
+    uint32_t reserve = embCacheInfo.vocabSize / VOCAB_CACHE_RATIO;
+    if (!offsetMappers[embCacheInfo.tableName].Initialize(reserve, embCacheInfo.maxCacheSize)) {
         offsetMappers[embCacheInfo.tableName].UnInitialize();
         offsetMappers.erase(embCacheInfo.tableName);
         return H_MEMORY_ALLOC_ERROR;
     }
 
     EmbPoolParam embPoolParam{prefillBufferSize, refillThreadNum};
-    uint32_t reserveHost = embCacheInfo.vocabSize / VOCAB_CACHE_RATIO;
-    if (!embTables[embCacheInfo.tableName].Initialize(embCacheInfo, reserveHost, initializerInfos, embPoolParam)) {
+    if (!embTables[embCacheInfo.tableName].Initialize(embCacheInfo, reserve, initializerInfos, embPoolParam)) {
         offsetMappers.erase(embCacheInfo.tableName);
         embTables.erase(embCacheInfo.tableName);
         return H_MEMORY_ALLOC_ERROR;
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index ca706c73..151e372c 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -235,7 +235,7 @@ void EmbeddingDDR::SyncLatestEmbedding()
         }
     } else {
         // 在保存之前先更新ddr和ssd的embedding
-        SwapOutInfo info;
+        HBMSwapOutInfo info;
         cacheManager_->ProcessSwapOutKeys(name, swapOutKeys, info);
         vector<float*> swapOutAddrs;
         rc = embCache->EmbeddingLookupAddrs(name, info.swapOutDDRKeys, swapOutAddrs);
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 9e195419..01beb358 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -962,8 +962,8 @@ void HybridMgmt::LookUpAndRemoveAddrs(const EmbTaskInfo &info)
 
     lookUpFunc(DDRSwapKeyQue, DDRSwapAddrsQue, SWAP_OUT_STR, ddrSwapKeyQueName);
     lookUpFunc(DDRSwapKeyQue, DDRSwapAddrsQue, SWAP_IN_STR, ddrSwapKeyQueName);
-    lookUpFunc(HBMSwapKeyQue, tableToQueueLookup, SWAP_IN_STR, hbmSwapKeyQueName);
-    lookUpFunc(HBMSwapKeyQue, tableToQueueLookup, SWAP_OUT_STR, hbmSwapKeyQueName);
+    lookUpFunc(HBMSwapKeyQue, HBMSwapAddrsQue, SWAP_IN_STR, hbmSwapKeyQueName);
+    lookUpFunc(HBMSwapKeyQue, HBMSwapAddrsQue, SWAP_OUT_STR, hbmSwapKeyQueName);
     lookUpSwapInAddrsPushId[info.name]++;
 }
 
@@ -987,7 +987,7 @@ void HybridMgmt::LookUpSwapAddrs(const string &embName, const string &swapStr)
         LOG_DEBUG(
             "table:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsTC(ms):{}",
             embName, swapStr, keys.size(), addrs.size(), id, lookupAddrsTC.ElapsedMS());
-        tableToQueueLookup[swapName].Pushv(addrs);
+        HBMSwapAddrsQue[swapName].Pushv(addrs);
         if (swapStr==SWAP_IN_STR) {
             lookUpSwapInAddrsPushId[embName]++;
             LOG_DEBUG("LookUpSwapAddrs, table:{}, pushId:{}, lookUpSwapInAddrsPushId:{}",
@@ -1258,8 +1258,8 @@ void HybridMgmt::InitDataPipelineForDDR(const string &embName)
     // 初始化公共队列
     HBMSwapKeyQue[embName+SWAP_IN_STR];
     HBMSwapKeyQue[embName+SWAP_OUT_STR];
-    tableToQueueLookup[embName+SWAP_IN_STR];
-    tableToQueueLookup[embName+SWAP_OUT_STR];
+    HBMSwapAddrsQue[embName + SWAP_IN_STR];
+    HBMSwapAddrsQue[embName + SWAP_OUT_STR];
 
     // 初始化lookup线程
     lookUpSwapInAddrsPushId[embName];  // 此处初始化，避免多线程竞争导致计数错误
@@ -1276,13 +1276,13 @@ void HybridMgmt::InitDataPipelineForL3Storage(const string &embName, int extEmbe
     // 初始化公共队列
     HBMSwapKeyQue[embName+SWAP_IN_STR];
     HBMSwapKeyQue[embName+SWAP_OUT_STR];
-    tableToQueueLookup[embName+SWAP_IN_STR];
-    tableToQueueLookup[embName+SWAP_OUT_STR];
+    HBMSwapAddrsQue[embName + SWAP_IN_STR];
+    HBMSwapAddrsQue[embName + SWAP_OUT_STR];
 
     HBMSwapKeyQue[embName + ADDR_STR];
-    SwapOut2L3StorageKeyQue[embName + SWAP_IN_STR];
-    SwapOut2L3StorageKeyQue[embName + ADDR_STR];
-    SwapOut2L3StorageKeyQue[embName + SWAP_OUT_STR];
+    HBMSwapKeyForL3StorageQue[embName + SWAP_IN_STR];
+    HBMSwapKeyForL3StorageQue[embName + ADDR_STR];
+    HBMSwapKeyForL3StorageQue[embName + SWAP_OUT_STR];
 
     DDRSwapKeyQue[embName + SWAP_OUT_STR];
     DDRSwapKeyQue[embName + SWAP_IN_STR];
@@ -1316,9 +1316,8 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
                  embInfo.name, embInfo.hostVocabSize, embInfo.extEmbeddingSize, embInfo.devVocabSize);
         EmbCache::EmbCacheInfo embCacheInfo(embInfo.name, embInfo.hostVocabSize, embInfo.embeddingSize,
                                             embInfo.extEmbeddingSize, embInfo.devVocabSize);
-        size_t prefill = std::max(embInfo.hostVocabSize/10, 2 * embInfo.devVocabSize);
         int ret = embCache->CreateCacheForTable(
-            embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, prefill, EMBEDDING_THREAD_NUM);
+            embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, embInfo.hostVocabSize, EMBEDDING_THREAD_NUM);
         if (ret != H_OK) {
             throw runtime_error(embInfo.name + "create cache for table failed, error code: " + std::to_string(ret));
         }
@@ -1327,13 +1326,13 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
 
 void HybridMgmt::JoinEmbeddingCacheThread()
 {
-    for (auto &p : tableToQueueLookup) {
+    for (auto &p : HBMSwapAddrsQue) {
         p.second.DestroyQueue();
     }
     for (auto &p : HBMSwapKeyQue) {
         p.second.DestroyQueue();
     }
-    for (auto &p : SwapOut2L3StorageKeyQue) {
+    for (auto &p : HBMSwapKeyForL3StorageQue) {
         p.second.DestroyQueue();
     }
     for (auto &p : DDRSwapKeyQue) {
@@ -1439,7 +1438,7 @@ bool HybridMgmt::EmbeddingReceiveDDR(const EmbTaskInfo& info, float*& ptr, vecto
     }
     TimeCost EmbeddingRecvTC = TimeCost();
 
-    swapOutAddrs = tableToQueueLookup[info.name+SWAP_OUT_STR].WaitAndPop();
+    swapOutAddrs = HBMSwapAddrsQue[info.name + SWAP_OUT_STR].WaitAndPop();
     if (!isRunning) {
         return false;
     }
@@ -1617,7 +1616,7 @@ bool HybridMgmt::EmbeddingReceiveL3Storage(const EmbTaskInfo &info, float *&ptr,
     }
     TimeCost EmbeddingRecvTC = TimeCost();
     // finish时会pop空vector，因此需要额外判定isRunning
-    swapOutAddrs = tableToQueueLookup[info.name+SWAP_OUT_STR].WaitAndPop();
+    swapOutAddrs = HBMSwapAddrsQue[info.name + SWAP_OUT_STR].WaitAndPop();
     if (!isRunning) {
         return false;
     }
@@ -1681,8 +1680,8 @@ void HybridMgmt::EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float *embPtr
 
     // L3Storage更新
     TimeCost L3StorageUpdateTC = TimeCost();
-    std::vector<uint64_t> swapOutL3StorageAddrOffs = SwapOut2L3StorageKeyQue[info.name + ADDR_STR].WaitAndPop();
-    std::vector<uint64_t> swapOutL3StorageKeys = SwapOut2L3StorageKeyQue[info.name + SWAP_OUT_STR].WaitAndPop();
+    std::vector<uint64_t> swapOutL3StorageAddrOffs = HBMSwapKeyForL3StorageQue[info.name + ADDR_STR].WaitAndPop();
+    std::vector<uint64_t> swapOutL3StorageKeys = HBMSwapKeyForL3StorageQue[info.name + SWAP_OUT_STR].WaitAndPop();
     if (!isRunning) {
         return;
     }
@@ -1874,8 +1873,8 @@ void HybridMgmt::HandleFirstBatchCaseL3Storage(const EmbBaseInfo& info,
     HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKoPair.first);
 
     // HBM->L3Storage
-    SwapOut2L3StorageKeyQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutL3StorageKeys);
-    SwapOut2L3StorageKeyQue[info.name + ADDR_STR].Pushv(emptySwapOutL3StorageAddrOff);
+    HBMSwapKeyForL3StorageQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutL3StorageKeys);
+    HBMSwapKeyForL3StorageQue[info.name + ADDR_STR].Pushv(emptySwapOutL3StorageAddrOff);
 }
 
 void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info,
@@ -1888,18 +1887,18 @@ void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info,
     LOG_DEBUG("ProcessSwapInKeysTC(ms):{} ", ProcessSwapInKeysTC.ElapsedMS());
 
     TimeCost ProcessSwapOutKeysTC;
-    SwapOutInfo swapInfo;
-    cacheManager->ProcessSwapOutKeys(info.name, swapOutKeys, swapInfo);
+    HBMSwapOutInfo hbmSwapInfo;
+    cacheManager->ProcessSwapOutKeys(info.name, swapOutKeys, hbmSwapInfo);
     LOG_DEBUG("ProcessSwapOutKeysTC(ms):{} ", ProcessSwapOutKeysTC.ElapsedMS());
 
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
               info.name, info.batchId, info.channelId, swapInKeys.size(), swapOutKeys.size());
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapOutDDRKeys:{}, swapOutDDRAddrOffs:{}, "
-              "swapOutL3StorageKeys:{}, swapOutL3StorageAddrOff:{}",
-              info.name, info.batchId, info.channelId, swapInfo.swapOutDDRKeys.size(),
-              swapInfo.swapOutDDRAddrOffs.size(), swapInfo.swapOutL3StorageKeys.size(),
-              swapInfo.swapOutL3StorageAddrOffs.size());
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToL3StorageKeys:{}, L3StorageToDDRKeys:{}",
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swap out, HBM2DDR Keys:{}, HBM2DDR AddrOffs:{}, "
+              "HBM2L3Storage Keys:{}, HBM2L3Storage AddrOff:{}",
+              info.name, info.batchId, info.channelId, hbmSwapInfo.swapOutDDRKeys.size(),
+              hbmSwapInfo.swapOutDDRAddrOffs.size(), hbmSwapInfo.swapOutL3StorageKeys.size(),
+              hbmSwapInfo.swapOutL3StorageAddrOffs.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDR2L3Storage Keys:{}, L3Storage2DDR Keys:{}",
               info.name, info.batchId, info.channelId, DDRToL3StorageKeys.size(), L3StorageToDDRKeys.size());
 
     auto DDRToL3StorageKeysForL3S = DDRToL3StorageKeys;
@@ -1912,18 +1911,18 @@ void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info,
     DDRSwapKeyForL3StorageQue[info.name + SWAP_IN_STR].Pushv(L3StorageToDDRKeysForL3S);
 
     // HBM<->DDR
-    HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(swapInfo.swapOutDDRKeys);
-    HBMSwapKeyQue[info.name + ADDR_STR].Pushv(swapInfo.swapOutDDRAddrOffs);
+    HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(hbmSwapInfo.swapOutDDRKeys);
+    HBMSwapKeyQue[info.name + ADDR_STR].Pushv(hbmSwapInfo.swapOutDDRAddrOffs);
     HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKeys);
 
     // HBM->L3Storage
-    SwapOut2L3StorageKeyQue[info.name + SWAP_OUT_STR].Pushv(swapInfo.swapOutL3StorageKeys);
-    SwapOut2L3StorageKeyQue[info.name + ADDR_STR].Pushv(swapInfo.swapOutL3StorageAddrOffs);
+    HBMSwapKeyForL3StorageQue[info.name + SWAP_OUT_STR].Pushv(hbmSwapInfo.swapOutL3StorageKeys);
+    HBMSwapKeyForL3StorageQue[info.name + ADDR_STR].Pushv(hbmSwapInfo.swapOutL3StorageAddrOffs);
 }
 
 bool HybridMgmt::BuildH2DEmbedding(const EmbTaskInfo &info, vector<Tensor> &h2dEmb)
 {
-    std::vector<float*> swapInAddrs = tableToQueueLookup[info.name+SWAP_IN_STR].WaitAndPop();
+    std::vector<float*> swapInAddrs = HBMSwapAddrsQue[info.name + SWAP_IN_STR].WaitAndPop();
     if (!isRunning) {
         return false;
     }
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index 0654be91..f5897861 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -159,21 +159,20 @@ namespace MxRec {
         std::vector<std::future<void>> lookUpSwapInAddrsThreads;
 
         std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyQue;
-        std::map<std::string, TaskQueue<std::vector<uint64_t>>> SwapOut2L3StorageKeyQue;
+        std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyForL3StorageQue;
         std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyQue;
         std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyForL3StorageQue;
+        std::map<std::string, TaskQueue<std::vector<float *>>> HBMSwapAddrsQue;
         std::map<std::string, TaskQueue<std::vector<float *>>> DDRSwapAddrsQue;
 
         std::mutex evictMut;
 
         std::map<std::string, std::unordered_set<uint64_t>> trainKeysSet;
-
         const string SWAP_IN_STR = "SwapIn";
         const string SWAP_OUT_STR = "SwapOut";
-        const string ADDR_STR = "Addr";
 
+        const string ADDR_STR = "Addr";
         ock::ctr::EmbCacheManagerPtr embCache = nullptr;
-        std::map<std::string, TaskQueue<std::vector<float *>>> tableToQueueLookup;
         std::map<std::string, std::vector<uint64_t>> lastSwapInPosMap {};
         std::map<std::string, std::vector<std::vector<uint64_t>>> trainTestSwitchInfoStore {};
         std::atomic<bool> lookupAddrSuccess {true};
diff --git a/src/core/l3_storage/cache_manager.h b/src/core/l3_storage/cache_manager.h
index 3f5b0a22..79335788 100644
--- a/src/core/l3_storage/cache_manager.h
+++ b/src/core/l3_storage/cache_manager.h
@@ -40,7 +40,7 @@ namespace MxRec {
         absl::flat_hash_map<emb_key_t, int64_t>& keyOffsetMap;
     };
 
-    struct SwapOutInfo {
+    struct HBMSwapOutInfo {
         vector<emb_cache_key_t> swapOutDDRKeys;
         vector<emb_cache_key_t> swapOutDDRAddrOffs;
         vector<emb_cache_key_t> swapOutL3StorageKeys;
@@ -89,7 +89,7 @@ namespace MxRec {
         void PutKey(const string& embTableName, const emb_key_t& key, RecordType type);
 
         void ProcessSwapOutKeys(const string& tableName, const vector<emb_cache_key_t>& swapOutKeys,
-                                SwapOutInfo& info);
+                                HBMSwapOutInfo& info);
 
         void ProcessSwapInKeys(const string& tableName, const vector<emb_cache_key_t>& swapInKeys,
                                vector<emb_cache_key_t>& DDRToL3StorageKeys,
-- 
Gitee


From ddefbd55694512acf3a2213c94cd491ed3058077 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 3 Jul 2024 15:03:10 +0800
Subject: [PATCH 254/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?prefill=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 3 ++-
 src/core/utils/common.h              | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index fda54d9d..e4e30f64 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -1321,8 +1321,9 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
                  embInfo.name, embInfo.hostVocabSize, embInfo.extEmbeddingSize, embInfo.devVocabSize);
         EmbCache::EmbCacheInfo embCacheInfo(embInfo.name, embInfo.hostVocabSize, embInfo.embeddingSize,
                                             embInfo.extEmbeddingSize, embInfo.devVocabSize);
+        size_t prefill = std::max(embInfo.hostVocabSize/HOST_TO_PREFILL_RATIO, embInfo.devVocabSize);
         int ret = embCache->CreateCacheForTable(
-            embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, embInfo.hostVocabSize, EMBEDDING_THREAD_NUM);
+            embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, prefill, EMBEDDING_THREAD_NUM);
         if (ret != H_OK) {
             throw runtime_error(embInfo.name + "create cache for table failed, error code: " + std::to_string(ret));
         }
diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index 9a39e7ac..c020bbc5 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -69,6 +69,7 @@ namespace MxRec {
     constexpr int SSD_SIZE_INDEX = 2;
     constexpr int MAX_FILE_NUM = 1000;
     constexpr int EMBEDDING_THREAD_NUM = 2;
+    constexpr int HOST_TO_PREFILL_RATIO = 10;
     // for GLOG
     struct GlogConfig {
         static bool gStatOn;
-- 
Gitee


From afa1b548e1d91dd6bd5f8b9a2b4e61af04019440 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 3 Jul 2024 15:09:34 +0800
Subject: [PATCH 255/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?reserve=E5=9C=A8dev=E4=BE=A7=E6=B5=AA=E8=B4=B9=E7=9A=84?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../embedding_cache/cache_manager/cache_manager.cpp   | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
index 8a6187a1..a9fac9f6 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
@@ -64,7 +64,8 @@ int EmbCacheManagerImpl::CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
     }
 
     if ((prefillBufferSize < 1) || (prefillBufferSize > embCacheInfo.vocabSize)) {
-        ExternalLogger::PrintLog(LogLevel::ERROR, "prefillBufferSize has to be between [1, hostVocabSize]");
+        ExternalLogger::PrintLog(LogLevel::ERROR, "prefillBufferSize: " + std::to_string(prefillBufferSize) +
+        "has to be between [1, hostVocabSize]");
         return H_PREFILL_BUFFER_SIZE_INVALID;
     }
 
@@ -72,16 +73,16 @@ int EmbCacheManagerImpl::CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
         return H_THREAD_NUM_ERROR;
     }
 
-    uint32_t reserve = embCacheInfo.vocabSize / VOCAB_CACHE_RATIO;
-    if (!offsetMappers[embCacheInfo.tableName].Initialize(reserve, embCacheInfo.maxCacheSize)) {
+    uint32_t reserveDevice = embCacheInfo.maxCacheSize / VOCAB_CACHE_RATIO;
+    if (!offsetMappers[embCacheInfo.tableName].Initialize(reserveDevice, embCacheInfo.maxCacheSize)) {
         offsetMappers[embCacheInfo.tableName].UnInitialize();
         offsetMappers.erase(embCacheInfo.tableName);
         return H_MEMORY_ALLOC_ERROR;
     }
 
     EmbPoolParam embPoolParam{prefillBufferSize, refillThreadNum};
-
-    if (!embTables[embCacheInfo.tableName].Initialize(embCacheInfo, reserve, initializerInfos, embPoolParam)) {
+    uint32_t reserveHost = embCacheInfo.vocabSize / VOCAB_CACHE_RATIO;
+    if (!embTables[embCacheInfo.tableName].Initialize(embCacheInfo, reserveHost, initializerInfos, embPoolParam)) {
         offsetMappers.erase(embCacheInfo.tableName);
         embTables.erase(embCacheInfo.tableName);
         return H_MEMORY_ALLOC_ERROR;
-- 
Gitee


From fe3982f8995eb03e428f03866c941b8569f86785 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 3 Jul 2024 15:19:09 +0800
Subject: [PATCH 256/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?reserve=E5=9C=A8dev=E4=BE=A7=E6=B5=AA=E8=B4=B9=E7=9A=84?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/l3_storage/cache_manager.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/l3_storage/cache_manager.cpp b/src/core/l3_storage/cache_manager.cpp
index 75d73b2d..ee3d7bc5 100644
--- a/src/core/l3_storage/cache_manager.cpp
+++ b/src/core/l3_storage/cache_manager.cpp
@@ -181,7 +181,7 @@ int64_t CacheManager::GetTableUsage(const string& tableName)
 }
 
 void CacheManager::ProcessSwapOutKeys(const string& tableName, const vector<emb_cache_key_t>& swapOutKeys,
-                                      SwapOutInfo& info)
+                                      HBMSwapOutInfo& info)
 {
     auto& swapOutDDRKeys = info.swapOutDDRKeys;
     auto& swapOutDDRAddrOffs = info.swapOutDDRAddrOffs;
-- 
Gitee


From 486e3e9f7159de52b78bb4313d027e7d48d6918b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 3 Jul 2024 15:37:52 +0800
Subject: [PATCH 257/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?SSD=E7=B2=BE=E5=BA=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 01beb358..6969c27d 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -1142,8 +1142,6 @@ void HybridMgmt::EmbeddingReceiveAndUpdateL3Storage(int batchId, int index, cons
         .extEmbeddingSize=embInfo.extEmbeddingSize,
         .name=embInfo.name
     };
-    // host swap out need to be executed before lookup
-    LookUpAndRemoveAddrs(info);
 
     float* ptr = nullptr;
     vector<float*> swapOutAddrs;
@@ -1614,6 +1612,9 @@ bool HybridMgmt::EmbeddingReceiveL3Storage(const EmbTaskInfo &info, float *&ptr,
     if (!isRunning) {
         return false;
     }
+    // DDR swap out key need to be removed
+    LookUpAndRemoveAddrs(info);
+
     TimeCost EmbeddingRecvTC = TimeCost();
     // finish时会pop空vector，因此需要额外判定isRunning
     swapOutAddrs = HBMSwapAddrsQue[info.name + SWAP_OUT_STR].WaitAndPop();
-- 
Gitee


From 474ca30dc51783a59d16803324e25cf8aa2d0395 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 3 Jul 2024 16:04:13 +0800
Subject: [PATCH 258/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E5=88=A0=E9=99=A4?=
 =?UTF-8?q?=E5=8A=A8=E6=80=81=E6=89=A9=E5=AE=B9=E5=86=97=E4=BD=99=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81=EF=BC=8C=E4=BF=AE=E5=A4=8Dtest?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/emb_table/embedding_ddr_test.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tests/emb_table/embedding_ddr_test.cpp b/src/tests/emb_table/embedding_ddr_test.cpp
index 60ec5af6..097167f6 100644
--- a/src/tests/emb_table/embedding_ddr_test.cpp
+++ b/src/tests/emb_table/embedding_ddr_test.cpp
@@ -22,7 +22,6 @@ See the License for the specific language governing permissions and
 #include <limits>
 #include <mpi.h>
 #include "utils/common.h"
-#include "emb_table/emb_table.h"
 #include "emb_table/embedding_ddr.h"
 
 using namespace std;
-- 
Gitee


From e6c501c12489146c7887c52519f739922367f780 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 4 Jul 2024 09:26:28 +0800
Subject: [PATCH 259/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91issure?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/embedding_cache/cache_manager/cache_manager.cpp       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
index a9fac9f6..76e90abc 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
@@ -64,8 +64,8 @@ int EmbCacheManagerImpl::CreateCacheForTable(const EmbCacheInfo& embCacheInfo,
     }
 
     if ((prefillBufferSize < 1) || (prefillBufferSize > embCacheInfo.vocabSize)) {
-        ExternalLogger::PrintLog(LogLevel::ERROR, "prefillBufferSize: " + std::to_string(prefillBufferSize) +
-        "has to be between [1, hostVocabSize]");
+        ExternalLogger::PrintLog(LogLevel::ERROR, "PrefillBufferSize: " + std::to_string(prefillBufferSize) +
+                                                  " has to be between [1, hostVocabSize].");
         return H_PREFILL_BUFFER_SIZE_INVALID;
     }
 
-- 
Gitee


From 637ef26d445886b98aee01f64b13a40081c60c04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 4 Jul 2024 09:54:03 +0800
Subject: [PATCH 260/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91delete?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tests/emb_table/embedding_mgmt_test.cpp   | 1 -
 src/tests/emb_table/embedding_static_test.cpp | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/tests/emb_table/embedding_mgmt_test.cpp b/src/tests/emb_table/embedding_mgmt_test.cpp
index 055cf5c5..81a354bf 100644
--- a/src/tests/emb_table/embedding_mgmt_test.cpp
+++ b/src/tests/emb_table/embedding_mgmt_test.cpp
@@ -22,7 +22,6 @@ See the License for the specific language governing permissions and
 #include <limits>
 #include <mpi.h>
 #include "utils/common.h"
-#include "emb_table/emb_table.h"
 #include "emb_table/embedding_mgmt.h"
 
 using namespace std;
diff --git a/src/tests/emb_table/embedding_static_test.cpp b/src/tests/emb_table/embedding_static_test.cpp
index a08569b3..5d1f0ab7 100644
--- a/src/tests/emb_table/embedding_static_test.cpp
+++ b/src/tests/emb_table/embedding_static_test.cpp
@@ -21,7 +21,6 @@ See the License for the specific language governing permissions and
 #include <acl/acl_rt.h>
 #include <limits>
 #include "utils/common.h"
-#include "emb_table/emb_table.h"
 #include "emb_table/embedding_static.h"
 
 using namespace std;
-- 
Gitee


From c9a321e908b0290b60b1776b595e38b37d2698ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Thu, 4 Jul 2024 16:08:15 +0800
Subject: [PATCH 261/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8Ctable.capacity=E5=87=BA?=
 =?UTF-8?q?=E7=8E=B0=E5=81=B6=E5=8F=91=E8=B4=9F=E5=80=BC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/emb_table/embedding_ddr.cpp     | 2 +-
 src/core/emb_table/embedding_dynamic.cpp | 4 ++--
 src/core/emb_table/embedding_table.h     | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index ca706c73..b9ca70dc 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -45,7 +45,7 @@ void EmbeddingDDR::Key2Offset(std::vector<emb_key_t>& splitKey, int channel)
 
 int64_t EmbeddingDDR::capacity() const
 {
-    return capacity_;
+    return capacity_.load();
 }
 
 /*
diff --git a/src/core/emb_table/embedding_dynamic.cpp b/src/core/emb_table/embedding_dynamic.cpp
index 7f8cd7e5..703d08ad 100644
--- a/src/core/emb_table/embedding_dynamic.cpp
+++ b/src/core/emb_table/embedding_dynamic.cpp
@@ -77,7 +77,7 @@ void EmbeddingDynamic::Key2Offset(std::vector<emb_key_t>& keys, int channel)
 
 int64_t EmbeddingDynamic::capacity() const
 {
-    return capacity_;
+    return capacity_.load();
 }
 
 int64_t EmbeddingDynamic::GetEmptyEmbeddingAddress()
@@ -103,7 +103,7 @@ void EmbeddingDynamic::MallocEmbeddingBlock(int embNum)
         float *embAddr = static_cast<float*>(block) + (i * extEmbSize_);
         embeddingList_.push_back(embAddr);
     }
-    capacity_ += embNum;
+    capacity_.fetch_add(embNum);
 }
 
 void EmbeddingDynamic::RandomInit(void* addr, size_t embNum)
diff --git a/src/core/emb_table/embedding_table.h b/src/core/emb_table/embedding_table.h
index cbf15a7a..3396a8a0 100644
--- a/src/core/emb_table/embedding_table.h
+++ b/src/core/emb_table/embedding_table.h
@@ -15,6 +15,7 @@ See the License for the specific language governing permissions and
 
 #ifndef MX_REC_EMBEDDING_TABLE_H
 #define MX_REC_EMBEDDING_TABLE_H
+#include <atomic>
 #include <map>
 #include <string>
 #include <vector>
@@ -113,7 +114,7 @@ protected:
     size_t embSize_;
     size_t extEmbSize_;
     int seed_;
-    int64_t capacity_;
+    std::atomic<int64> capacity_;
     size_t rankId_;
     size_t rankSize_;
     vector<int64_t> loadOffset;
-- 
Gitee


From a55ed2e12febf7542083651f8ed95ffbd3894e90 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Fri, 5 Jul 2024 08:01:45 +0000
Subject: [PATCH 262/302] fix: conflict in src/core/utils/common.h.

Signed-off-by: steepcurve <steepcurve@163.com>
---
 src/core/utils/common.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core/utils/common.h b/src/core/utils/common.h
index f8ff4565..8c7528f4 100644
--- a/src/core/utils/common.h
+++ b/src/core/utils/common.h
@@ -70,6 +70,7 @@ constexpr size_t MAX_VOCABULARY_SIZE = 1e10;
 constexpr int SSD_SIZE_INDEX = 2;
 constexpr int MAX_FILE_NUM = 1000;
 constexpr int EMBEDDING_THREAD_NUM = 2;
+constexpr int HOST_TO_PREFILL_RATIO = 10;
 // for GLOG
 struct GlogConfig {
     static bool gStatOn;
-- 
Gitee


From 03a664699c4aa5fd7bd6e8353327181dcd677aec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Fri, 5 Jul 2024 16:38:57 +0800
Subject: [PATCH 263/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8Ctable.capacity=E5=87=BA?=
 =?UTF-8?q?=E7=8E=B0=E5=81=B6=E5=8F=91=E8=B4=9F=E5=80=BC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 591 +++++++++++++--------------
 src/core/hybrid_mgmt/hybrid_mgmt.h   | 408 +++++++++---------
 2 files changed, 485 insertions(+), 514 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 61064fb4..3eb99685 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -15,23 +15,23 @@ See the License for the specific language governing permissions and
 
 #include "hybrid_mgmt.h"
 
+#include <mpi.h>
+
 #include <cstdlib>
+#include <future>
 #include <memory>
-#include <mpi.h>
 #include <string>
 #include <thread>
-#include <future>
 
+#include "checkpoint/checkpoint.h"
+#include "emb_table/embedding_mgmt.h"
 #include "hd_transfer/hd_transfer.h"
 #include "hybrid_mgmt/hybrid_mgmt_block.h"
-#include "utils/time_cost.h"
-#include "utils/logger.h"
-#include "utils/common.h"
-#include "checkpoint/checkpoint.h"
-#include "key_process/key_process.h"
 #include "key_process/feature_admit_and_evict.h"
-#include "emb_table/embedding_mgmt.h"
-
+#include "key_process/key_process.h"
+#include "utils/common.h"
+#include "utils/logger.h"
+#include "utils/time_cost.h"
 
 using namespace MxRec;
 using namespace std;
@@ -98,8 +98,8 @@ bool HybridMgmt::Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos,
     InitRankInfo(rankInfo, embInfos);
     GlogConfig::gStatOn = GlobalEnv::statOn;
 
-    LOG_INFO(MGMT + "begin initialize, localRankSize:{}, localRankId:{}, rank:{}",
-             rankInfo.localRankSize, rankInfo.localRankId, rankInfo.rankId);
+    LOG_INFO(MGMT + "begin initialize, localRankSize:{}, localRankId:{}, rank:{}", rankInfo.localRankSize,
+             rankInfo.localRankId, rankInfo.rankId);
 
     mgmtRankInfo = rankInfo;
     mgmtEmbInfo = embInfos;
@@ -134,15 +134,15 @@ bool HybridMgmt::Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos,
         Start();
     }
 
-    for (const auto& info: embInfos) {
-        LOG_INFO(MGMT + "table:{}, vocab size dev+host:{}+{}, send count:{}",
-                 info.name, info.devVocabSize, info.hostVocabSize, info.sendCount);
+    for (const auto& info : embInfos) {
+        LOG_INFO(MGMT + "table:{}, vocab size dev+host:{}+{}, send count:{}", info.name, info.devVocabSize,
+                 info.hostVocabSize, info.sendCount);
     }
     LOG_INFO(MGMT + "end initialize, rankId:{}, isDDR:{}, "
                     "step[train_interval, eval_interval, save_interval, max_train_step]:[{}, {}, {}, {}]",
-             rankInfo.rankId, rankInfo.isDDR,
-             rankInfo.ctrlSteps.at(TRAIN_CHANNEL_ID), rankInfo.ctrlSteps.at(EVAL_CHANNEL_ID),
-             rankInfo.ctrlSteps.at(SAVE_STEP_INDEX), rankInfo.ctrlSteps.at(MAX_TRAIN_STEP_INDEX));
+             rankInfo.rankId, rankInfo.isDDR, rankInfo.ctrlSteps.at(TRAIN_CHANNEL_ID),
+             rankInfo.ctrlSteps.at(EVAL_CHANNEL_ID), rankInfo.ctrlSteps.at(SAVE_STEP_INDEX),
+             rankInfo.ctrlSteps.at(MAX_TRAIN_STEP_INDEX));
 #endif
     isInitialized = true;
 
@@ -225,7 +225,7 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
     if (warmStartTables.size() == 0) {
         EmbeddingMgmt::Instance()->Load(loadPath, trainKeysSet);
     } else {
-        for (auto& tableName: warmStartTables) {
+        for (auto& tableName : warmStartTables) {
             EmbeddingMgmt::Instance()->Load(tableName, loadPath, trainKeysSet);
         }
     }
@@ -373,17 +373,17 @@ void HybridMgmt::Start()
 void HybridMgmt::StartThreadForHBM()
 {
 #ifndef GTEST
-        auto parseKeysTaskForHBMTrain = [this]() {
-            TrainTask(TaskType::HBM);
-            LOG_INFO("parseKeysTaskForHBMTrain done");
-        };
-        procThreads.emplace_back(std::make_unique<std::thread>(parseKeysTaskForHBMTrain));
-
-        auto parseKeysTaskForHBMEval = [this]() {
-            EvalTask(TaskType::HBM);
-            LOG_INFO("parseKeysTaskForHBMEval done");
-        };
-        procThreads.emplace_back(std::make_unique<std::thread>(parseKeysTaskForHBMEval));
+    auto parseKeysTaskForHBMTrain = [this]() {
+        TrainTask(TaskType::HBM);
+        LOG_INFO("parseKeysTaskForHBMTrain done");
+    };
+    procThreads.emplace_back(std::make_unique<std::thread>(parseKeysTaskForHBMTrain));
+
+    auto parseKeysTaskForHBMEval = [this]() {
+        EvalTask(TaskType::HBM);
+        LOG_INFO("parseKeysTaskForHBMEval done");
+    };
+    procThreads.emplace_back(std::make_unique<std::thread>(parseKeysTaskForHBMEval));
 #endif
 }
 
@@ -424,7 +424,7 @@ void HybridMgmt::Destroy()
     isRunning = false;
 
     mutexDestroy = true;
-    for (const auto& embInfo: mgmtEmbInfo) {
+    for (const auto& embInfo : mgmtEmbInfo) {
         for (int index = 0; index < EMBEDDING_THREAD_NUM; index++) {
             cvLastUpdateFinishMap[embInfo.name][index].notify_all();
             cvLastLookUpFinishMap[embInfo.name][index].notify_all();
@@ -456,7 +456,9 @@ void HybridMgmt::Destroy()
     // 停止预处理
     KEY_PROCESS_INSTANCE->Destroy();
     // stop embCache, even if the host emb is still allocating
-    if (embCache != nullptr) { embCache->Destroy(); }
+    if (embCache != nullptr) {
+        embCache->Destroy();
+    }
     LOG_DEBUG(MGMT + "Destroy hybrid_mgmt module end.");
 }
 
@@ -493,12 +495,10 @@ void HybridMgmt::EvalTask(TaskType type)
     do {
         hybridMgmtBlock->CheckAndSetBlock(channelId);
         if (hybridMgmtBlock->GetBlockStatus(channelId)) {
-            LOG_DEBUG("eval channel block at batchId:{}, needWaitSave:{}",
-                      evalBatchId, hybridMgmtBlock->IsNeedWaitSave());
+            LOG_DEBUG("eval channel block at batchId:{}, needWaitSave:{}", evalBatchId,
+                      hybridMgmtBlock->IsNeedWaitSave());
             std::unique_lock<std::mutex> checkSaveLocker(saveMutex);
-            cvCheckSave.wait(checkSaveLocker, [this] {
-                return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy;
-            });
+            cvCheckSave.wait(checkSaveLocker, [this] { return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy; });
             hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
             LOG_DEBUG("wake TrainTask");
             hybridMgmtBlock->DoBlock(channelId);
@@ -513,29 +513,28 @@ void HybridMgmt::EvalTask(TaskType type)
 #endif
 }
 
-void HybridMgmt::SendUniqKeysAndRestoreVecHBM(const EmbBaseInfo &info,
-                                              const unique_ptr<vector<Tensor>> &infoVecs, bool isGrad) const
+void HybridMgmt::SendUniqKeysAndRestoreVecHBM(const EmbBaseInfo& info, const unique_ptr<vector<Tensor>>& infoVecs,
+                                              bool isGrad) const
 {
     TimeCost sendUniqueKeysSyncTC;
-    LOG_DEBUG("channelId:{} batchId:{}, global unique, table name: {}, is grad: {}",
-              info.channelId, info.batchId, info.name, isGrad);
+    LOG_DEBUG("channelId:{} batchId:{}, global unique, table name: {}, is grad: {}", info.channelId, info.batchId,
+              info.name, isGrad);
     if (isGrad) {
         hdTransfer->Send(TransferChannel::UNIQKEYS, {infoVecs->back()}, info.channelId, info.name);
     }
     infoVecs->pop_back();
-    LOG_DEBUG("channelId:{} batchId:{}, sendUniqueKeysSyncTC(ms):{}",
-              info.channelId, info.batchId, sendUniqueKeysSyncTC.ElapsedMS());
+    LOG_DEBUG("channelId:{} batchId:{}, sendUniqueKeysSyncTC(ms):{}", info.channelId, info.batchId,
+              sendUniqueKeysSyncTC.ElapsedMS());
 
     TimeCost sendUniqueRestoreVecSyncTC;
     if (isGrad) {
         hdTransfer->Send(TransferChannel::RESTORE_SECOND, {infoVecs->back()}, info.channelId, info.name);
     }
     infoVecs->pop_back();
-    LOG_DEBUG("channelId:{} batchId:{}, sendUniqueRestoreVecSyncTC(ms):{}",
-              info.channelId, info.batchId, sendUniqueRestoreVecSyncTC.ElapsedMS());
+    LOG_DEBUG("channelId:{} batchId:{}, sendUniqueRestoreVecSyncTC(ms):{}", info.channelId, info.batchId,
+              sendUniqueRestoreVecSyncTC.ElapsedMS());
 }
 
-
 /// 当前处理的batch是否是最后一个batch，涵盖train切换eval、save场景
 /// \param batchId 已处理的batch数
 /// \return
@@ -544,13 +543,12 @@ bool HybridMgmt::IsTrainEndBatch(int batchId) const
     // case 1：需要切eval
     // case 2：需要save时，补发pos后被阻塞，等待save完成，避免embCache状态发送变化
     // batchId是从0开始的，所以要+1对上step
-    bool isNeedSwitchToEval = mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID] != -1 &&
-                              (batchId + 1) % mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID] == 0;
-    bool isNeedSave = mgmtRankInfo.ctrlSteps[SAVE_STEP_INDEX] != -1 &&
-                      mgmtRankInfo.ctrlSteps[SAVE_STEP_INDEX] != 0 &&
+    bool isNeedSwitchToEval =
+        mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID] != -1 && (batchId + 1) % mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID] == 0;
+    bool isNeedSave = mgmtRankInfo.ctrlSteps[SAVE_STEP_INDEX] != -1 && mgmtRankInfo.ctrlSteps[SAVE_STEP_INDEX] != 0 &&
                       (batchId + 1) % mgmtRankInfo.ctrlSteps[SAVE_STEP_INDEX] == 0;
-    LOG_DEBUG("mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID]:{}, batchId:{}",
-              mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID], batchId);
+    LOG_DEBUG("mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID]:{}, batchId:{}", mgmtRankInfo.ctrlSteps[TRAIN_CHANNEL_ID],
+              batchId);
     LOG_DEBUG("isNeedSwitchToEval:{}, isNeedSave:{}", isNeedSwitchToEval, isNeedSave);
     return isNeedSwitchToEval || isNeedSave;
 }
@@ -570,26 +568,23 @@ bool HybridMgmt::ParseKeys(int channelId, int& batchId, TaskType type)
 #ifndef GTEST
     LOG_INFO(MGMT + "channelId:{} batchId:{}, ParseKeys start.", channelId, batchId);
     TimeCost parseKeyTC;
-    bool remainBatch = true; // 是否从通道获取了数据
+    bool remainBatch = true;  // 是否从通道获取了数据
 
     vector<std::thread> parseKeyThreadPool;
     for (const auto& embInfo : mgmtEmbInfo) {
-        EmbBaseInfo info = {.batchId=batchId, .channelId=channelId, .name=embInfo.name};
+        EmbBaseInfo info = {.batchId = batchId, .channelId = channelId, .name = embInfo.name};
         switch (type) {
             case TaskType::HBM:
-                parseKeyThreadPool.emplace_back([this, info, &remainBatch, embInfo]() {
-                    ProcessEmbInfoHBM(info, remainBatch, embInfo.isGrad);
-                });
+                parseKeyThreadPool.emplace_back(
+                    [this, info, &remainBatch, embInfo]() { ProcessEmbInfoHBM(info, remainBatch, embInfo.isGrad); });
                 break;
             case TaskType::DDR:
                 if (!isL3StorageEnabled) {
-                    parseKeyThreadPool.emplace_back([this, info, &remainBatch, embInfo]() {
-                        ProcessEmbInfoDDR(info, remainBatch);
-                    });
+                    parseKeyThreadPool.emplace_back(
+                        [this, info, &remainBatch, embInfo]() { ProcessEmbInfoDDR(info, remainBatch); });
                 } else {
-                    parseKeyThreadPool.emplace_back([this, info, &remainBatch, embInfo]() {
-                        ProcessEmbInfoL3Storage(info, remainBatch);
-                    });
+                    parseKeyThreadPool.emplace_back(
+                        [this, info, &remainBatch, embInfo]() { ProcessEmbInfoL3Storage(info, remainBatch); });
                 }
                 break;
             default:
@@ -608,14 +603,14 @@ bool HybridMgmt::ParseKeys(int channelId, int& batchId, TaskType type)
     if (!isRunning) {
         return false;
     }
-    LOG_DEBUG(MGMT + "channelId:{} batchId:{}, ParseKeys end, parseKeyTC(ms):{}",
-              channelId, batchId, parseKeyTC.ElapsedMS());
+    LOG_DEBUG(MGMT + "channelId:{} batchId:{}, ParseKeys end, parseKeyTC(ms):{}", channelId, batchId,
+              parseKeyTC.ElapsedMS());
     batchId++;
 #endif
     return true;
 }
 
-void HybridMgmt::ProcessEmbInfoHBM(const EmbBaseInfo &info, bool& remainBatchOut, bool isGrad)
+void HybridMgmt::ProcessEmbInfoHBM(const EmbBaseInfo& info, bool& remainBatchOut, bool isGrad)
 {
     TimeCost parseKeysTc;
     LOG_DEBUG("ProcessEmbInfoHBM table:{}, batchId:{}, channel:{}", info.name, info.batchId, info.channelId);
@@ -628,13 +623,13 @@ void HybridMgmt::ProcessEmbInfoHBM(const EmbBaseInfo &info, bool& remainBatchOut
         return;
     }
     if (infoVecs == nullptr) {
-        LOG_INFO(MGMT + "table:{}, channelId:{} batchId:{}, ParseKeys infoVecs empty !",
-                 info.name, info.channelId, info.batchId);
+        LOG_INFO(MGMT + "table:{}, channelId:{} batchId:{}, ParseKeys infoVecs empty !", info.name, info.channelId,
+                 info.batchId);
         remainBatchOut = false;
         return;
     }
-    LOG_DEBUG("table:{}, channelId:{} batchId:{}, ParseKeysHBM GetInfoVec end",
-              info.name, info.channelId, info.batchId);
+    LOG_DEBUG("table:{}, channelId:{} batchId:{}, ParseKeysHBM GetInfoVec end", info.name, info.channelId,
+              info.batchId);
 
     // 动态shape场景下，获取all2all向量（通信量矩阵）
     SendAll2AllVec(info, remainBatchOut);
@@ -644,10 +639,10 @@ void HybridMgmt::ProcessEmbInfoHBM(const EmbBaseInfo &info, bool& remainBatchOut
 
     // 发送查询向量
     TimeCost sendLookupSyncTC;
-    hdTransfer->Send(TransferChannel::LOOKUP, { infoVecs->back() }, info.channelId, info.name);
+    hdTransfer->Send(TransferChannel::LOOKUP, {infoVecs->back()}, info.channelId, info.name);
     infoVecs->pop_back();
-    LOG_DEBUG("table:{}, channelId:{} batchId:{}, sendLookupSyncTC(ms):{}",
-              info.name, info.channelId, info.batchId, sendLookupSyncTC.ElapsedMS());
+    LOG_DEBUG("table:{}, channelId:{} batchId:{}, sendLookupSyncTC(ms):{}", info.name, info.channelId, info.batchId,
+              sendLookupSyncTC.ElapsedMS());
 
     // 训练时，使用全局去重聚合梯度，发送全局去重的key和对应的恢复向量
     if (mgmtRankInfo.useSumSameIdGradients && info.channelId == TRAIN_CHANNEL_ID) {
@@ -657,18 +652,17 @@ void HybridMgmt::ProcessEmbInfoHBM(const EmbBaseInfo &info, bool& remainBatchOut
     // 发送恢复向量
     TimeCost sendRestoreSyncTC;
     hdTransfer->Send(TransferChannel::RESTORE, *infoVecs, info.channelId, info.name);
-    LOG_DEBUG("table:{}, sendRestoreSyncTC(ms):{}, parseKeysTc HBM mode (ms):{}",
-              info.name, sendRestoreSyncTC.ElapsedMS(), parseKeysTc.ElapsedMS());
+    LOG_DEBUG("table:{}, sendRestoreSyncTC(ms):{}, parseKeysTc HBM mode (ms):{}", info.name,
+              sendRestoreSyncTC.ElapsedMS(), parseKeysTc.ElapsedMS());
 
-    LOG_INFO(MGMT + "table:{}, channelId:{} batchId:{}, embName:{}, ParseKeys with HBM mode end.",
-             info.name, info.channelId, info.batchId, info.name);
+    LOG_INFO(MGMT + "table:{}, channelId:{} batchId:{}, embName:{}, ParseKeys with HBM mode end.", info.name,
+             info.channelId, info.batchId, info.name);
 
     if (info.channelId == TRAIN_CHANNEL_ID) {
         alreadyTrainOnce = true;
     }
 }
 
-
 /// 构造训练所需的各种向量数据
 /// \param embName 表名
 /// \param batchId 已处理的batch数
@@ -680,7 +674,7 @@ void HybridMgmt::ProcessEmbInfoDDR(const EmbBaseInfo& info, bool& remainBatchOut
     TimeCost getAndSendTensorsTC;
     LOG_DEBUG("ProcessEmbInfoDDR start, table:{}, channel:{}, batchId:{}", info.name, info.channelId, info.batchId);
 
-    if (info.channelId == TRAIN_CHANNEL_ID  && info.batchId == hybridMgmtBlock->maxTrainStep) {
+    if (info.channelId == TRAIN_CHANNEL_ID && info.batchId == hybridMgmtBlock->maxTrainStep) {
         HandleReachMaxStepCase(info, remainBatchOut);
         return;
     }
@@ -718,10 +712,10 @@ void HybridMgmt::ProcessEmbInfoDDR(const EmbBaseInfo& info, bool& remainBatchOut
     SendGlobalUniqueVec(info, uniqueKeys, restoreVecSec);
 
     TimeCost swapProcessTC;
-    auto &swapInPos = swapInKoPair.second;
-    auto &swapOutPos = swapOutKoPair.second;
+    auto& swapInPos = swapInKoPair.second;
+    auto& swapOutPos = swapOutKoPair.second;
     auto lastSwapInPos = lastSwapInPosMap[info.name];
-    lastSwapInPosMap[info.name] = swapInPos; // 暂存待下一步发送
+    lastSwapInPosMap[info.name] = swapInPos;  // 暂存待下一步发送
 
     auto isNeedReturn = HandleSpecialProcessStatusDDR(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
     if (isNeedReturn) {
@@ -827,7 +821,6 @@ void HybridMgmt::EvictL3StorageKeys(const string& embName, const vector<emb_cach
     cacheManager->EvictL3StorageEmbedding(embName, keys);
 }
 
-
 /// 通过pyBind在python侧调用，通知hybridMgmt上层即将进行图的执行，需要进行唤醒
 /// \param channelID 通道id
 /// \param steps 运行的步数，由于可能存在循环下沉，所以1个session run 对应N步
@@ -919,16 +912,14 @@ void HybridMgmt::SetOptimizerInfo(const string& embName, OptimizerInfo optimInfo
 }
 
 // L3Storage
-void HybridMgmt::LookUpAndRemoveAddrs(const EmbTaskInfo &info)
+void HybridMgmt::LookUpAndRemoveAddrs(const EmbTaskInfo& info)
 {
     uint64_t memSize = info.extEmbeddingSize * sizeof(float);
     const std::string hbmSwapKeyQueName = "HBMSwapKeyQue";
     const std::string ddrSwapKeyQueName = "DDRSwapKeyQue";
-    auto lookUpFunc = [this, memSize, info](
-        std::map<std::string, TaskQueue<std::vector<uint64_t>>> &fromQue,
-        std::map<std::string, TaskQueue<std::vector<float *>>> &toQue,
-        const string &swapStr, const string &fromQueName
-    ) {
+    auto lookUpFunc = [this, memSize, info](std::map<std::string, TaskQueue<std::vector<uint64_t>>>& fromQue,
+                                            std::map<std::string, TaskQueue<std::vector<float*>>>& toQue,
+                                            const string& swapStr, const string& fromQueName) {
         std::vector<uint64_t> keys = fromQue[info.name + swapStr].WaitAndPop();
         if (!isRunning) {
             return;
@@ -942,8 +933,8 @@ void HybridMgmt::LookUpAndRemoveAddrs(const EmbTaskInfo &info)
             throw runtime_error("EmbeddingLookupAddrs failed! error code:" + std::to_string(rc));
         }
         if (&fromQue == &DDRSwapKeyQue && swapStr == SWAP_OUT_STR) {
-            for (auto &addr : addrs) {
-                auto *newAddr = (float*)malloc(memSize);
+            for (auto& addr : addrs) {
+                auto* newAddr = (float*)malloc(memSize);
                 rc = memcpy_s(newAddr, memSize, addr, memSize);
                 if (rc != 0) {
                     throw runtime_error("memcpy_s failed! error code:" + std::to_string(rc));
@@ -968,7 +959,7 @@ void HybridMgmt::LookUpAndRemoveAddrs(const EmbTaskInfo &info)
 }
 
 // DDR
-void HybridMgmt::LookUpSwapAddrs(const string &embName, const string &swapStr)
+void HybridMgmt::LookUpSwapAddrs(const string& embName, const string& swapStr)
 {
     int id = 0;
     std::string swapName = embName + swapStr;
@@ -977,21 +968,20 @@ void HybridMgmt::LookUpSwapAddrs(const string &embName, const string &swapStr)
         if (!isRunning) {
             return;
         }
-        vector<float *> addrs;
+        vector<float*> addrs;
         TimeCost lookupAddrsTC;
         int rc = embCache->EmbeddingLookupAddrs(embName, keys, addrs);
         if (rc != H_OK) {
             lookupAddrSuccess = false;
             throw runtime_error("EmbeddingLookupAddrs failed! error code: " + std::to_string(rc));
         }
-        LOG_DEBUG(
-            "table:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsTC(ms):{}",
-            embName, swapStr, keys.size(), addrs.size(), id, lookupAddrsTC.ElapsedMS());
+        LOG_DEBUG("table:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsTC(ms):{}", embName,
+                  swapStr, keys.size(), addrs.size(), id, lookupAddrsTC.ElapsedMS());
         HBMSwapAddrsQue[swapName].Pushv(addrs);
-        if (swapStr==SWAP_IN_STR) {
+        if (swapStr == SWAP_IN_STR) {
             lookUpSwapInAddrsPushId[embName]++;
-            LOG_DEBUG("LookUpSwapAddrs, table:{}, pushId:{}, lookUpSwapInAddrsPushId:{}",
-                      embName, id, lookUpSwapInAddrsPushId[embName]);
+            LOG_DEBUG("LookUpSwapAddrs, table:{}, pushId:{}, lookUpSwapInAddrsPushId:{}", embName, id,
+                      lookUpSwapInAddrsPushId[embName]);
         }
         id++;
     }
@@ -1006,15 +996,15 @@ void HybridMgmt::FetchDeviceEmb()
     if (mgmtRankInfo.isDDR) {
         // DDR模式保存host的emb表以及hashmap
         LOG_DEBUG(MGMT + "start host side save: ddr mode");
-        for (const auto &embInfo: mgmtEmbInfo) {
+        for (const auto& embInfo : mgmtEmbInfo) {
             std::vector<std::pair<uint64_t, uint64_t>> koVec;
             embCache->ExportDeviceKeyOffsetPairs(embInfo.name, koVec);
             std::vector<uint64_t> swapOutPos;
-            for (const auto &p : koVec) {
+            for (const auto& p : koVec) {
                 swapOutPos.push_back(p.second);
             }
 
-            vector <Tensor> swapTensor;
+            vector<Tensor> swapTensor;
             swapTensor.emplace_back(Vec2TensorI32(swapOutPos));
             swapTensor.emplace_back(Tensor(tensorflow::DT_INT32, {1}));
             auto swapOutLen = swapTensor.back().flat<int32>();
@@ -1030,7 +1020,7 @@ void HybridMgmt::FetchDeviceEmb()
 // 这里就是新增的embedding处理线程
 void HybridMgmt::EmbeddingTask()
 {
-    for (const auto& embInfo: mgmtEmbInfo) {
+    for (const auto& embInfo : mgmtEmbInfo) {
         lastUpdateFinishStepMap[embInfo.name] = 0;
         lastLookUpFinishStepMap[embInfo.name] = 0;
         lastSendFinishStepMap[embInfo.name] = 0;
@@ -1045,7 +1035,7 @@ void HybridMgmt::EmbeddingTask()
 void HybridMgmt::MultiThreadEmbHDTransWrap()
 {
     for (int index = 0; index < EMBEDDING_THREAD_NUM; index++) {
-        for (const auto& embInfo: mgmtEmbInfo) {
+        for (const auto& embInfo : mgmtEmbInfo) {
             CreateEmbeddingLookUpAndSendThread(index, embInfo);
             CreateEmbeddingReceiveAndUpdateThread(index, embInfo);
         }
@@ -1059,13 +1049,11 @@ void HybridMgmt::EmbeddingLookUpAndSendDDR(int batchId, int index, const EmbInfo
         cvNotifyIndex = index + 1;
     }
 
-    EmbTaskInfo info = {
-        .batchId=batchId,
-        .threadIdx=index,
-        .cvNotifyIndex=cvNotifyIndex,
-        .extEmbeddingSize=embInfo.extEmbeddingSize,
-        .name=embInfo.name
-    };
+    EmbTaskInfo info = {.batchId = batchId,
+                        .threadIdx = index,
+                        .cvNotifyIndex = cvNotifyIndex,
+                        .extEmbeddingSize = embInfo.extEmbeddingSize,
+                        .name = embInfo.name};
     vector<Tensor> h2dEmb;
 
     auto isSuccess = EmbeddingLookUpDDR(info, h2dEmb);
@@ -1084,13 +1072,11 @@ void HybridMgmt::EmbeddingReceiveAndUpdateDDR(int batchId, int index, const EmbI
         cvNotifyIndex = index + 1;
     }
 
-    EmbTaskInfo info = {
-        .batchId=batchId,
-        .threadIdx=index,
-        .cvNotifyIndex=cvNotifyIndex,
-        .extEmbeddingSize=embInfo.extEmbeddingSize,
-        .name=embInfo.name
-    };
+    EmbTaskInfo info = {.batchId = batchId,
+                        .threadIdx = index,
+                        .cvNotifyIndex = cvNotifyIndex,
+                        .extEmbeddingSize = embInfo.extEmbeddingSize,
+                        .name = embInfo.name};
 
     float* ptr = nullptr;
     vector<float*> swapOutAddrs;
@@ -1110,13 +1096,11 @@ void HybridMgmt::EmbeddingLookUpAndSendL3Storage(int batchId, int index, const E
         cvNotifyIndex = index + 1;
     }
 
-    EmbTaskInfo info = {
-        .batchId=batchId,
-        .threadIdx=index,
-        .cvNotifyIndex=cvNotifyIndex,
-        .extEmbeddingSize=embInfo.extEmbeddingSize,
-        .name=embInfo.name
-    };
+    EmbTaskInfo info = {.batchId = batchId,
+                        .threadIdx = index,
+                        .cvNotifyIndex = cvNotifyIndex,
+                        .extEmbeddingSize = embInfo.extEmbeddingSize,
+                        .name = embInfo.name};
     vector<Tensor> h2dEmb;
 
     auto isSuccess = EmbeddingLookUpL3Storage(info, h2dEmb);
@@ -1135,13 +1119,11 @@ void HybridMgmt::EmbeddingReceiveAndUpdateL3Storage(int batchId, int index, cons
         cvNotifyIndex = index + 1;
     }
 
-    EmbTaskInfo info = {
-        .batchId=batchId,
-        .threadIdx=index,
-        .cvNotifyIndex=cvNotifyIndex,
-        .extEmbeddingSize=embInfo.extEmbeddingSize,
-        .name=embInfo.name
-    };
+    EmbTaskInfo info = {.batchId = batchId,
+                        .threadIdx = index,
+                        .cvNotifyIndex = cvNotifyIndex,
+                        .extEmbeddingSize = embInfo.extEmbeddingSize,
+                        .name = embInfo.name};
 
     float* ptr = nullptr;
     vector<float*> swapOutAddrs;
@@ -1151,7 +1133,6 @@ void HybridMgmt::EmbeddingReceiveAndUpdateL3Storage(int batchId, int index, cons
     EmbeddingUpdateL3Storage(info, ptr, swapOutAddrs, dims0);
 }
 
-
 /// 构造训练所需的各种向量数据
 /// \param embName 表名
 /// \param batchId 已处理的batch数
@@ -1164,7 +1145,7 @@ void HybridMgmt::ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBa
     TimeCost getAndSendTensorsTC;
     LOG_DEBUG("ProcessEmbInfoL3Storage table:{}, channel:{}, batchId:{}", info.name, info.channelId, info.batchId);
 
-    if (info.channelId == TRAIN_CHANNEL_ID  && info.batchId == hybridMgmtBlock->maxTrainStep) {
+    if (info.channelId == TRAIN_CHANNEL_ID && info.batchId == hybridMgmtBlock->maxTrainStep) {
         HandleReachMaxStepCase(info, remainBatchOut);
         return;
     }
@@ -1202,12 +1183,12 @@ void HybridMgmt::ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBa
     SendGlobalUniqueVec(info, uniqueKeys, restoreVecSec);
 
     TimeCost swapProcessTC;
-    auto &swapInKeys = swapInKoPair.first;
-    auto &swapInPos = swapInKoPair.second;
-    auto &swapOutKeys = swapOutKoPair.first;
-    auto &swapOutPos = swapOutKoPair.second;
+    auto& swapInKeys = swapInKoPair.first;
+    auto& swapInPos = swapInKoPair.second;
+    auto& swapOutKeys = swapOutKoPair.first;
+    auto& swapOutPos = swapOutKoPair.second;
     auto lastSwapInPos = lastSwapInPosMap[info.name];
-    lastSwapInPosMap[info.name] = swapInPos; // 暂存待下一步发送
+    lastSwapInPosMap[info.name] = swapInPos;  // 暂存待下一步发送
 
     auto isNeedReturn = HandleSpecialProcessStatusL3Storage(info, getAndSendTensorsTC, swapInKoPair, swapOutKoPair);
     if (isNeedReturn) {
@@ -1232,18 +1213,17 @@ void HybridMgmt::ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBa
 #endif
 }
 
-void HybridMgmt::SendTensorForSwap(const EmbBaseInfo& info,
-                                   const vector<uint64_t> &swapInPosUint,
-                                   const vector<uint64_t> &swapOutPosUint)
+void HybridMgmt::SendTensorForSwap(const EmbBaseInfo& info, const vector<uint64_t>& swapInPosUint,
+                                   const vector<uint64_t>& swapOutPosUint)
 {
 #ifndef GTEST
     vector<Tensor> swapTensor;
     swapTensor.emplace_back(Vec2TensorI32(swapInPosUint));
     swapTensor.emplace_back(Vec2TensorI32(swapOutPosUint));
-    swapTensor.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
+    swapTensor.emplace_back(Tensor(tensorflow::DT_INT32, {1}));
     auto swapInLen = swapTensor.back().flat<int32>();
     swapInLen(0) = swapInPosUint.size();
-    swapTensor.emplace_back(Tensor(tensorflow::DT_INT32, { 1 }));
+    swapTensor.emplace_back(Tensor(tensorflow::DT_INT32, {1}));
     auto swapOutLen = swapTensor.back().flat<int32>();
     swapOutLen(0) = swapOutPosUint.size();
 
@@ -1251,11 +1231,11 @@ void HybridMgmt::SendTensorForSwap(const EmbBaseInfo& info,
 #endif
 }
 
-void HybridMgmt::InitDataPipelineForDDR(const string &embName)
+void HybridMgmt::InitDataPipelineForDDR(const string& embName)
 {
     // 初始化公共队列
-    HBMSwapKeyQue[embName+SWAP_IN_STR];
-    HBMSwapKeyQue[embName+SWAP_OUT_STR];
+    HBMSwapKeyQue[embName + SWAP_IN_STR];
+    HBMSwapKeyQue[embName + SWAP_OUT_STR];
     HBMSwapAddrsQue[embName + SWAP_IN_STR];
     HBMSwapAddrsQue[embName + SWAP_OUT_STR];
 
@@ -1269,11 +1249,11 @@ void HybridMgmt::InitDataPipelineForDDR(const string &embName)
     LOG_DEBUG("data pipeline for ddr init");
 }
 
-void HybridMgmt::InitDataPipelineForL3Storage(const string &embName, int extEmbeddingSize)
+void HybridMgmt::InitDataPipelineForL3Storage(const string& embName, int extEmbeddingSize)
 {
     // 初始化公共队列
-    HBMSwapKeyQue[embName+SWAP_IN_STR];
-    HBMSwapKeyQue[embName+SWAP_OUT_STR];
+    HBMSwapKeyQue[embName + SWAP_IN_STR];
+    HBMSwapKeyQue[embName + SWAP_OUT_STR];
     HBMSwapAddrsQue[embName + SWAP_IN_STR];
     HBMSwapAddrsQue[embName + SWAP_OUT_STR];
 
@@ -1300,7 +1280,7 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
     EmbeddingMgmt::Instance()->SetEmbCacheForEmbTable(embCache);
     EmbeddingMgmt::Instance()->SetHDTransferForEmbTable(hdTransfer);
 
-    for (auto embInfo: embInfos) {
+    for (auto embInfo : embInfos) {
         if (isL3StorageEnabled) {
             InitDataPipelineForL3Storage(embInfo.name, embInfo.extEmbeddingSize);
         } else {
@@ -1314,9 +1294,9 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
                  embInfo.name, embInfo.hostVocabSize, embInfo.extEmbeddingSize, embInfo.devVocabSize);
         EmbCache::EmbCacheInfo embCacheInfo(embInfo.name, embInfo.hostVocabSize, embInfo.embeddingSize,
                                             embInfo.extEmbeddingSize, embInfo.devVocabSize);
-        size_t prefill = std::max(embInfo.hostVocabSize/HOST_TO_PREFILL_RATIO, embInfo.devVocabSize);
-        int ret = embCache->CreateCacheForTable(
-            embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, prefill, EMBEDDING_THREAD_NUM);
+        size_t prefill = std::max(embInfo.hostVocabSize / HOST_TO_PREFILL_RATIO, embInfo.devVocabSize);
+        int ret = embCache->CreateCacheForTable(embCacheInfo, embInfo.initializeInfos, INVALID_KEY_VALUE, prefill,
+                                                EMBEDDING_THREAD_NUM);
         if (ret != H_OK) {
             throw runtime_error(embInfo.name + "create cache for table failed, error code: " + std::to_string(ret));
         }
@@ -1325,22 +1305,22 @@ void HybridMgmt::InitEmbeddingCache(const vector<EmbInfo>& embInfos)
 
 void HybridMgmt::JoinEmbeddingCacheThread()
 {
-    for (auto &p : HBMSwapAddrsQue) {
+    for (auto& p : HBMSwapAddrsQue) {
         p.second.DestroyQueue();
     }
-    for (auto &p : HBMSwapKeyQue) {
+    for (auto& p : HBMSwapKeyQue) {
         p.second.DestroyQueue();
     }
-    for (auto &p : HBMSwapKeyForL3StorageQue) {
+    for (auto& p : HBMSwapKeyForL3StorageQue) {
         p.second.DestroyQueue();
     }
-    for (auto &p : DDRSwapKeyQue) {
+    for (auto& p : DDRSwapKeyQue) {
         p.second.DestroyQueue();
     }
-    for (auto &p : DDRSwapKeyForL3StorageQue) {
+    for (auto& p : DDRSwapKeyForL3StorageQue) {
         p.second.DestroyQueue();
     }
-    for (auto &p : DDRSwapAddrsQue) {
+    for (auto& p : DDRSwapAddrsQue) {
         p.second.DestroyQueue();
     }
     for (auto& t : EmbeddingLookUpAndSendThreadPool) {
@@ -1363,25 +1343,26 @@ void HybridMgmt::HandleReachMaxStepCase(const EmbBaseInfo& info, bool& remainBat
     //  2. 如果切换过：
     //     a. eval场景跑完，不用send，外面自然退出
     //     b. save场景，能触发，说明期望的train step已经跑完（由IsTrainEndBatch判定send），当前step也不用send
-    LOG_DEBUG("table:{}, batchId:{}, ProcessStatus:{}, reach maxTrainStep",
-              info.name, info.batchId, ProcessStatus2Str(ProcessStatus::NORMAL));
+    LOG_DEBUG("table:{}, batchId:{}, ProcessStatus:{}, reach maxTrainStep", info.name, info.batchId,
+              ProcessStatus2Str(ProcessStatus::NORMAL));
     if (specialProcessStatus[info.name] == ProcessStatus::NORMAL) {
         LOG_DEBUG("table:{}, batchId:{}, need send swap tensor"
-                  " for last step to finish train", info.name, info.batchId);
+                  " for last step to finish train",
+                  info.name, info.batchId);
         std::vector<uint64_t> emptySwapOutPos;
         SendTensorForSwap(info, lastSwapInPosMap[info.name], emptySwapOutPos);
     } else {
-        LOG_DEBUG("table:{}, batchId:{}, switch from eval or save, unnecessary to send emptySwapOutPos",
-                  info.name, info.batchId);
+        LOG_DEBUG("table:{}, batchId:{}, switch from eval or save, unnecessary to send emptySwapOutPos", info.name,
+                  info.batchId);
     }
     remainBatchOut = false;
     hybridMgmtBlock->SetBlockStatus(TRAIN_CHANNEL_ID, true);
 }
 
-void HybridMgmt::HandleEosCase(const EmbBaseInfo& info, bool &remainBatchOut)
+void HybridMgmt::HandleEosCase(const EmbBaseInfo& info, bool& remainBatchOut)
 {
-    LOG_INFO("GetUniqueKeys get eos, handle final batch for current epoch, table:{}, channel:{}, batchId:{}",
-             info.name, info.channelId, info.batchId);
+    LOG_INFO("GetUniqueKeys get eos, handle final batch for current epoch, table:{}, channel:{}, batchId:{}", info.name,
+             info.channelId, info.batchId);
     bool sendAllChannel = false;
     if (info.channelId == TRAIN_CHANNEL_ID) {
         vector<uint64_t> emptySwapOutPos;
@@ -1418,8 +1399,8 @@ void HybridMgmt::HandleEosCase(const EmbBaseInfo& info, bool &remainBatchOut)
             // train+eval+train场景
             // 交给train的ProcessEmbInfoDDR启动最后n-1步eval
             // train发送pos让eval step n-1跑完，到eval step n时各channel遇到eos后结束（train、eval共享的channel除外）
-            LOG_INFO("GetUniqueKeys get eos, skip send pos for eval channel, table:{}, batchId:{}",
-                     info.name, info.batchId);
+            LOG_INFO("GetUniqueKeys get eos, skip send pos for eval channel, table:{}, batchId:{}", info.name,
+                     info.batchId);
         }
     }
     KEY_PROCESS_INSTANCE->SendEos(info.name, info.batchId, info.channelId, sendAllChannel);
@@ -1454,22 +1435,22 @@ bool HybridMgmt::EmbeddingReceiveDDR(const EmbTaskInfo& info, float*& ptr, vecto
         if (aclData == nullptr) {
             throw runtime_error("Acl get tensor data from dataset failed.");
         }
-        ptr = reinterpret_cast<float *>(acltdtGetDataAddrFromItem(aclData));
+        ptr = reinterpret_cast<float*>(acltdtGetDataAddrFromItem(aclData));
 
         // 判断拿到的embedding个数是否与swapOutKeys个数相等
         size_t dimNum = acltdtGetDimNumFromItem(aclData);
         int64_t dims[dimNum];
         acltdtGetDimsFromItem(aclData, dims, dimNum);
 
-        LOG_DEBUG("table:{}, batchId:{}, dims[0]:{}, swapOutAddrs size:{}",
-                  info.name, info.batchId, dims[0], swapOutAddrs.size());
+        LOG_DEBUG("table:{}, batchId:{}, dims[0]:{}, swapOutAddrs size:{}", info.name, info.batchId, dims[0],
+                  swapOutAddrs.size());
 
         if (dims[0] != static_cast<int64_t>(swapOutAddrs.size())) {
             throw runtime_error("data dims[0] != swapOutKeys.size()");
         }
     }
-    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingRecvTC(ms):{}",
-              info.name, info.batchId, info.threadIdx, EmbeddingRecvTC.ElapsedMS());
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingRecvTC(ms):{}", info.name, info.batchId, info.threadIdx,
+              EmbeddingRecvTC.ElapsedMS());
     lastRecvFinishStepMap[info.name]++;
     cvLastRecvFinishMap[info.name][info.cvNotifyIndex].notify_all();
 
@@ -1486,8 +1467,8 @@ void HybridMgmt::EmbeddingUpdateDDR(const EmbTaskInfo& info, const float* embPtr
 
     uint64_t memSize = info.extEmbeddingSize * sizeof(float);
     uint64_t extEmbeddingSize = info.extEmbeddingSize;
-# pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
-                shared(swapOutAddrs, embPtr, extEmbeddingSize, memSize)
+#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
+    shared(swapOutAddrs, embPtr, extEmbeddingSize, memSize)
     for (uint64_t i = 0; i < swapOutAddrs.size(); i++) {
         auto rc = memcpy_s(swapOutAddrs[i], memSize, embPtr + i * extEmbeddingSize, memSize);
         if (rc != 0) {
@@ -1497,18 +1478,19 @@ void HybridMgmt::EmbeddingUpdateDDR(const EmbTaskInfo& info, const float* embPtr
     if (MxRec::Logger::GetLevel() <= MxRec::Logger::DEBUG) {
         string sample;
         if (!swapOutAddrs.empty()) {
-            sample = FloatPtrToLimitStr(swapOutAddrs.front(), info.extEmbeddingSize); // print first element
+            sample = FloatPtrToLimitStr(swapOutAddrs.front(), info.extEmbeddingSize);  // print first element
         }
         LOG_DEBUG("table:{}, batchId:{}, thread:{}, receive d2hEmb, ext emb:{}, emb size:{}, emb samples:{}, "
-                  "EmbeddingUpdateTC(ms):{}", info.name.c_str(), info.batchId, info.threadIdx,
-                  info.extEmbeddingSize, swapOutAddrs.size(), sample, EmbeddingUpdateTC.ElapsedMS());
+                  "EmbeddingUpdateTC(ms):{}",
+                  info.name.c_str(), info.batchId, info.threadIdx, info.extEmbeddingSize, swapOutAddrs.size(), sample,
+                  EmbeddingUpdateTC.ElapsedMS());
     }
 
     lastUpdateFinishStepMap[info.name]++;
     cvLastUpdateFinishMap[info.name][info.cvNotifyIndex].notify_all();
 }
 
-bool HybridMgmt::EmbeddingLookUpDDR(const EmbTaskInfo &info, vector<Tensor>& h2dEmb)
+bool HybridMgmt::EmbeddingLookUpDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
 {
     std::unique_lock<std::mutex> lastUpdateFinishLocker(lastUpdateFinishMutexMap[info.name][info.threadIdx]);
     cvLastUpdateFinishMap[info.name][info.threadIdx].wait(lastUpdateFinishLocker, [info, this] {
@@ -1537,7 +1519,7 @@ bool HybridMgmt::EmbeddingLookUpDDR(const EmbTaskInfo &info, vector<Tensor>& h2d
     return true;
 }
 
-void HybridMgmt::EmbeddingSendDDR(const EmbTaskInfo &info, vector<Tensor>& h2dEmb)
+void HybridMgmt::EmbeddingSendDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
 {
     std::unique_lock<std::mutex> lastSendFinishLocker(lastSendFinishMutexMap[info.name][info.threadIdx]);
     cvLastSendFinishMap[info.name][info.threadIdx].wait(lastSendFinishLocker, [info, this] {
@@ -1547,10 +1529,11 @@ void HybridMgmt::EmbeddingSendDDR(const EmbTaskInfo &info, vector<Tensor>& h2dEm
     hdTransfer->Send(TransferChannel::H2D, h2dEmb, TRAIN_CHANNEL_ID, info.name, info.batchId);
     lastSendFinishStepMap[info.name]++;
     cvLastSendFinishMap[info.name][info.cvNotifyIndex].notify_all();
-    LOG_DEBUG("table:{}, batchId:{}, thread:{}, SendH2DEmbTC(ms):{}",
-              info.name, info.batchId, info.threadIdx, SendTC.ElapsedMS());
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, SendH2DEmbTC(ms):{}", info.name, info.batchId, info.threadIdx,
+              SendTC.ElapsedMS());
 
-    // 对于end of sequence场景，key process需要基于h2dNextBatchId等待每个table都完成了最后1个step发送，才能发EOS至各channel
+    // 对于end of sequence场景，key
+    // process需要基于h2dNextBatchId等待每个table都完成了最后1个step发送，才能发EOS至各channel
     hybridMgmtBlock->h2dNextBatchId[info.name]++;
     LOG_DEBUG("h2dNextBatchId, table:{}, next batchId:{}", info.name, hybridMgmtBlock->h2dNextBatchId[info.name]);
 }
@@ -1603,8 +1586,8 @@ void HybridMgmt::CreateEmbeddingReceiveAndUpdateThread(int index, const EmbInfo&
     });
 }
 
-bool HybridMgmt::EmbeddingReceiveL3Storage(const EmbTaskInfo &info, float *&ptr,
-                                           vector<float *> &swapOutAddrs, int64_t& dims0)
+bool HybridMgmt::EmbeddingReceiveL3Storage(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs,
+                                           int64_t& dims0)
 {
     std::unique_lock<std::mutex> lastRecvFinishLocker(lastRecvFinishMutexMap[info.name][info.threadIdx]);
     cvLastRecvFinishMap[info.name][info.threadIdx].wait(lastRecvFinishLocker, [info, this] {
@@ -1635,26 +1618,26 @@ bool HybridMgmt::EmbeddingReceiveL3Storage(const EmbTaskInfo &info, float *&ptr,
         if (aclData == nullptr) {
             throw runtime_error("Acl get tensor data from dataset failed.");
         }
-        ptr = reinterpret_cast<float *>(acltdtGetDataAddrFromItem(aclData));
+        ptr = reinterpret_cast<float*>(acltdtGetDataAddrFromItem(aclData));
 
         // 判断拿到的embedding个数是否与swapOutKeys个数相等
         size_t dimNum = acltdtGetDimNumFromItem(aclData);
         int64_t dims[dimNum];
         acltdtGetDimsFromItem(aclData, dims, dimNum);
 
-        LOG_DEBUG("table:{}, batchId:{}, recv d2h, dims[0]:{}, swapOutAddrs.size:{}",
-                  info.name, info.batchId, dims[0], swapOutAddrs.size());
+        LOG_DEBUG("table:{}, batchId:{}, recv d2h, dims[0]:{}, swapOutAddrs.size:{}", info.name, info.batchId, dims[0],
+                  swapOutAddrs.size());
         dims0 = dims[0];
     }
-    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingRecvTC(ms):{}",
-              info.name.c_str(), info.batchId, info.threadIdx, EmbeddingRecvTC.ElapsedMS());
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingRecvTC(ms):{}", info.name.c_str(), info.batchId,
+              info.threadIdx, EmbeddingRecvTC.ElapsedMS());
     lastRecvFinishStepMap[info.name]++;
     cvLastRecvFinishMap[info.name][info.cvNotifyIndex].notify_all();
     return true;
 }
 
-void HybridMgmt::EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float *embPtr,
-                                          vector<float *>& swapOutAddrs, int64_t& dims0)
+void HybridMgmt::EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float* embPtr, vector<float*>& swapOutAddrs,
+                                          int64_t& dims0)
 {
     std::unique_lock<std::mutex> lastUpdateFinishLocker(lastUpdateFinishMutexMap[info.name][info.threadIdx]);
     cvLastUpdateFinishMap[info.name][info.threadIdx].wait(lastUpdateFinishLocker, [info, this] {
@@ -1669,16 +1652,16 @@ void HybridMgmt::EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float *embPtr
     uint64_t memSize = info.extEmbeddingSize * sizeof(float);
     uint64_t extEmbeddingSize = info.extEmbeddingSize;
     // DDR更新
-# pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
-                shared(swapOutAddrs, swapOutDDRAddrOffs, embPtr, extEmbeddingSize, memSize)
+#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
+    shared(swapOutAddrs, swapOutDDRAddrOffs, embPtr, extEmbeddingSize, memSize)
     for (uint64_t i = 0; i < swapOutAddrs.size(); i++) {
         auto rc = memcpy_s(swapOutAddrs[i], memSize, embPtr + swapOutDDRAddrOffs[i] * extEmbeddingSize, memSize);
         if (rc != 0) {
             throw runtime_error("memcpy_s failed, error code:" + to_string(rc));
         }
     }
-    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingUpdateTC(ms):{}",
-              info.name.c_str(), info.batchId, info.threadIdx, EmbeddingUpdateTC.ElapsedMS());
+    LOG_DEBUG("table:{}, batchId:{}, thread:{}, EmbeddingUpdateTC(ms):{}", info.name.c_str(), info.batchId,
+              info.threadIdx, EmbeddingUpdateTC.ElapsedMS());
 
     // L3Storage更新
     TimeCost L3StorageUpdateTC = TimeCost();
@@ -1693,8 +1676,8 @@ void HybridMgmt::EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float *embPtr
     }
     cacheManager->UpdateL3StorageEmb(info.name, embPtr, extEmbeddingSize, swapOutL3StorageKeys,
                                      swapOutL3StorageAddrOffs);
-    LOG_DEBUG("table:{}, batchId:{}, thread{}, L3StorageUpdateTC(ms):{}",
-              info.name.c_str(), info.batchId, info.threadIdx, L3StorageUpdateTC.ElapsedMS());
+    LOG_DEBUG("table:{}, batchId:{}, thread{}, L3StorageUpdateTC(ms):{}", info.name.c_str(), info.batchId,
+              info.threadIdx, L3StorageUpdateTC.ElapsedMS());
 
     lastUpdateFinishStepMap[info.name]++;
     cvLastUpdateFinishMap[info.name][info.cvNotifyIndex].notify_all();
@@ -1726,8 +1709,8 @@ bool HybridMgmt::EmbeddingLookUpL3Storage(const EmbTaskInfo& info, vector<Tensor
         return false;
     }
     cacheManager->TransferDDR2L3Storage(info.name, info.extEmbeddingSize, DDR2L3StorageKeys, DDR2L3StorageAddrs);
-    LOG_DEBUG("table:{}, thread:{}, transferDDR2L3StorageTC(ms):{}",
-              info.name.c_str(), info.threadIdx, transferDDR2L3StorageTC.ElapsedMS());
+    LOG_DEBUG("table:{}, thread:{}, transferDDR2L3StorageTC(ms):{}", info.name.c_str(), info.threadIdx,
+              transferDDR2L3StorageTC.ElapsedMS());
 
     TimeCost fetchL3StorageEmb2DDRTC = TimeCost();
     // swapInKeys中在L3Storage的挪到DDR
@@ -1737,8 +1720,8 @@ bool HybridMgmt::EmbeddingLookUpL3Storage(const EmbTaskInfo& info, vector<Tensor
         return false;
     }
     cacheManager->FetchL3StorageEmb2DDR(info.name, info.extEmbeddingSize, L3Storage2DDRKeys, L3Storage2DDRAddrs);
-    LOG_DEBUG("table:{}, thread:{}, fetchL3StorageEmb2DDRTC(ms):{}",
-              info.name.c_str(), info.threadIdx, fetchL3StorageEmb2DDRTC.ElapsedMS());
+    LOG_DEBUG("table:{}, thread:{}, fetchL3StorageEmb2DDRTC(ms):{}", info.name.c_str(), info.threadIdx,
+              fetchL3StorageEmb2DDRTC.ElapsedMS());
 
     bool isSuccess = BuildH2DEmbedding(info, h2dEmb);
     if (!isSuccess) {
@@ -1763,12 +1746,13 @@ void HybridMgmt::EmbeddingSendL3Storage(const EmbTaskInfo& info, vector<Tensor>&
     cvLastSendFinishMap[info.name][info.cvNotifyIndex].notify_all();
     LOG_DEBUG("table:{}, thread:{}, SendH2DEmbTC(ms):{}", info.name.c_str(), info.threadIdx, SendTC.ElapsedMS());
 
-    // 对于end of sequence场景，key process需要基于h2dNextBatchId等待每个table都完成了最后1个step发送，才能发EOS至各channel
+    // 对于end of sequence场景，key
+    // process需要基于h2dNextBatchId等待每个table都完成了最后1个step发送，才能发EOS至各channel
     hybridMgmtBlock->h2dNextBatchId[info.name]++;
     LOG_DEBUG("h2dNextBatchId, table:{}, next batchId:{}", info.name, hybridMgmtBlock->h2dNextBatchId[info.name]);
 }
 
-void HybridMgmt::HandleEosCaseHBM(const string &embName, int batchId, int channelId, bool &remainBatchOut)
+void HybridMgmt::HandleEosCaseHBM(const string& embName, int batchId, int channelId, bool& remainBatchOut)
 {
     bool sendAllChannel = false;
     if (channelId == EVAL_CHANNEL_ID) {
@@ -1813,19 +1797,19 @@ void HybridMgmt::HandleFirstBatchCaseDDR(const EmbBaseInfo& info,
                                          pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
 {
     TimeCost swapProcessTC;
-    auto &swapInKeys = swapInKoPair.first;
-    auto &swapInPos = swapInKoPair.second;
-    auto &swapOutKeys = swapOutKoPair.first;
-    auto &swapOutPos = swapOutKoPair.second;
+    auto& swapInKeys = swapInKoPair.first;
+    auto& swapInPos = swapInKoPair.second;
+    auto& swapOutKeys = swapOutKoPair.first;
+    auto& swapOutPos = swapOutKoPair.second;
 
     vector<uint64_t> emptySwapOutKeys;
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
-              info.name, info.batchId, info.channelId, swapInKoPair.first.size(), emptySwapOutKeys.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}", info.name, info.batchId,
+              info.channelId, swapInKoPair.first.size(), emptySwapOutKeys.size());
     trainTestSwitchInfoStore[info.name] = {swapOutKeys, swapOutPos};
 
     LOG_DEBUG("handle first batch case, delay sending swapInPos, table:{}", info.name);
-    LOG_DEBUG("enqueue HBMSwapKeyQue table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
-              info.name, info.batchId, info.channelId, swapInKeys.size(), emptySwapOutKeys.size());
+    LOG_DEBUG("enqueue HBMSwapKeyQue table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}", info.name,
+              info.batchId, info.channelId, swapInKeys.size(), emptySwapOutKeys.size());
     HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(emptySwapOutKeys);
     HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKeys);
 }
@@ -1836,8 +1820,8 @@ void HybridMgmt::HandleFirstBatchCaseL3Storage(const EmbBaseInfo& info,
 {
     // 发现train、save、eval切换，先保存状态，发emptySwapOutKeys以对应上一步的emptySwapOutPos
     vector<uint64_t> emptySwapOutKeys;
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
-              info.name, info.batchId, info.channelId, swapInKoPair.first.size(), emptySwapOutKeys.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}", info.name, info.batchId,
+              info.channelId, swapInKoPair.first.size(), emptySwapOutKeys.size());
     trainTestSwitchInfoStore[info.name] = {swapOutKoPair.first, swapOutKoPair.second};
 
     TimeCost ProcessSwapInKeysTC = TimeCost();
@@ -1851,14 +1835,14 @@ void HybridMgmt::HandleFirstBatchCaseL3Storage(const EmbBaseInfo& info,
     vector<uint64_t> emptySwapOutL3StorageKeys;
     vector<uint64_t> emptySwapOutL3StorageAddrOff;
 
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
-              info.name, info.batchId, info.channelId, swapInKoPair.first.size(), swapOutKoPair.first.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}", info.name, info.batchId,
+              info.channelId, swapInKoPair.first.size(), swapOutKoPair.first.size());
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapOutDDRKeys.size:{}, swapOutDDRAddrOffs.size:{}, "
               "swapOutL3StorageKeys.size:{}, swapOutL3StorageAddrOff.size:{}",
               info.name, info.batchId, info.channelId, emptySwapOutDDRKeys.size(), emptySwapOutDDRAddrOffs.size(),
               emptySwapOutL3StorageKeys.size(), emptySwapOutL3StorageAddrOff.size());
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToL3StorageKeys.size:{}, L3StorageToDDRKeys.size:{}",
-              info.name, info.batchId, info.channelId, DDRToL3StorageKeys.size(), L3StorageToDDRKeys.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDRToL3StorageKeys.size:{}, L3StorageToDDRKeys.size:{}", info.name,
+              info.batchId, info.channelId, DDRToL3StorageKeys.size(), L3StorageToDDRKeys.size());
 
     auto DDRToL3StorageKeysForL3S = DDRToL3StorageKeys;
     auto L3StorageToDDRKeysForL3S = L3StorageToDDRKeys;
@@ -1879,8 +1863,8 @@ void HybridMgmt::HandleFirstBatchCaseL3Storage(const EmbBaseInfo& info,
     HBMSwapKeyForL3StorageQue[info.name + ADDR_STR].Pushv(emptySwapOutL3StorageAddrOff);
 }
 
-void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info,
-                                            vector<uint64_t> &swapInKeys, vector<uint64_t> &swapOutKeys)
+void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info, vector<uint64_t>& swapInKeys,
+                                            vector<uint64_t>& swapOutKeys)
 {
     TimeCost ProcessSwapInKeysTC;
     vector<emb_cache_key_t> L3StorageToDDRKeys;
@@ -1893,15 +1877,15 @@ void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info,
     cacheManager->ProcessSwapOutKeys(info.name, swapOutKeys, hbmSwapInfo);
     LOG_DEBUG("ProcessSwapOutKeysTC(ms):{} ", ProcessSwapOutKeysTC.ElapsedMS());
 
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
-              info.name, info.batchId, info.channelId, swapInKeys.size(), swapOutKeys.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}", info.name, info.batchId,
+              info.channelId, swapInKeys.size(), swapOutKeys.size());
     LOG_DEBUG("table:{}, batchId:{}, channelId:{}, swap out, HBM2DDR Keys:{}, HBM2DDR AddrOffs:{}, "
               "HBM2L3Storage Keys:{}, HBM2L3Storage AddrOff:{}",
               info.name, info.batchId, info.channelId, hbmSwapInfo.swapOutDDRKeys.size(),
               hbmSwapInfo.swapOutDDRAddrOffs.size(), hbmSwapInfo.swapOutL3StorageKeys.size(),
               hbmSwapInfo.swapOutL3StorageAddrOffs.size());
-    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDR2L3Storage Keys:{}, L3Storage2DDR Keys:{}",
-              info.name, info.batchId, info.channelId, DDRToL3StorageKeys.size(), L3StorageToDDRKeys.size());
+    LOG_DEBUG("table:{}, batchId:{}, channelId:{}, DDR2L3Storage Keys:{}, L3Storage2DDR Keys:{}", info.name,
+              info.batchId, info.channelId, DDRToL3StorageKeys.size(), L3StorageToDDRKeys.size());
 
     auto DDRToL3StorageKeysForL3S = DDRToL3StorageKeys;
     auto L3StorageToDDRKeysForL3S = L3StorageToDDRKeys;
@@ -1922,22 +1906,20 @@ void HybridMgmt::HandleDataSwapForL3Storage(const EmbBaseInfo& info,
     HBMSwapKeyForL3StorageQue[info.name + ADDR_STR].Pushv(hbmSwapInfo.swapOutL3StorageAddrOffs);
 }
 
-bool HybridMgmt::BuildH2DEmbedding(const EmbTaskInfo &info, vector<Tensor> &h2dEmb)
+bool HybridMgmt::BuildH2DEmbedding(const EmbTaskInfo& info, vector<Tensor>& h2dEmb)
 {
     std::vector<float*> swapInAddrs = HBMSwapAddrsQue[info.name + SWAP_IN_STR].WaitAndPop();
     if (!isRunning) {
         return false;
     }
-    h2dEmb.emplace_back(Tensor(tensorflow::DT_FLOAT, {
-        int(swapInAddrs.size()), static_cast<long long>(info.extEmbeddingSize)
-    }));
-    auto &tmpTensor = h2dEmb.back();
-    float *h2dEmbAddr = tmpTensor.flat<float>().data();
+    h2dEmb.emplace_back(
+        Tensor(tensorflow::DT_FLOAT, {int(swapInAddrs.size()), static_cast<long long>(info.extEmbeddingSize)}));
+    auto& tmpTensor = h2dEmb.back();
+    float* h2dEmbAddr = tmpTensor.flat<float>().data();
     TimeCost embeddingLookupTC = TimeCost();
 
     uint64_t memSize = info.extEmbeddingSize * sizeof(float);
-# pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) \
-                shared(swapInAddrs, h2dEmbAddr, info, memSize)
+#pragma omp parallel for num_threads(MGMT_CPY_THREADS) default(none) shared(swapInAddrs, h2dEmbAddr, info, memSize)
     for (uint64_t i = 0; i < swapInAddrs.size(); i++) {
         auto rc = memcpy_s(h2dEmbAddr + i * info.extEmbeddingSize, memSize, swapInAddrs[i], memSize);
         if (rc != 0) {
@@ -1951,7 +1933,7 @@ bool HybridMgmt::BuildH2DEmbedding(const EmbTaskInfo &info, vector<Tensor> &h2dE
     return true;
 }
 
-vector<uint64_t> HybridMgmt::GetUniqueKeys(const EmbBaseInfo &info, bool &remainBatchOut)
+vector<uint64_t> HybridMgmt::GetUniqueKeys(const EmbBaseInfo& info, bool& remainBatchOut)
 {
     bool isEos = false;
     auto uniqueKeys = KEY_PROCESS_INSTANCE->GetUniqueKeys(info, isEos, lookUpSwapInAddrsPushId);
@@ -1961,8 +1943,8 @@ vector<uint64_t> HybridMgmt::GetUniqueKeys(const EmbBaseInfo &info, bool &remain
     }
     if (uniqueKeys.empty()) {
         remainBatchOut = false;
-        LOG_WARN("table:{}, channelId:{} batchId:{}, UniqueKeys result is empty",
-                 info.name, info.channelId, info.batchId);
+        LOG_WARN("table:{}, channelId:{} batchId:{}, UniqueKeys result is empty", info.name, info.channelId,
+                 info.batchId);
         return uniqueKeys;
     }
 
@@ -1971,7 +1953,7 @@ vector<uint64_t> HybridMgmt::GetUniqueKeys(const EmbBaseInfo &info, bool &remain
         trainKeysSet[info.name].insert(uniqueKeys.begin(), uniqueKeys.end());
         LOG_DEBUG("table:{}, batchId:{}, KeyMaintainTC(ms):{}", info.name, info.batchId, KeyMaintainTC.ElapsedMS());
     } else {
-        for (auto &key : uniqueKeys) {
+        for (auto& key : uniqueKeys) {
             if (trainKeysSet[info.name].find(key) == trainKeysSet[info.name].end()) {
                 key = INVALID_KEY_VALUE;
                 LOG_TRACE("find key not train before, set as invalid key");
@@ -1983,28 +1965,27 @@ vector<uint64_t> HybridMgmt::GetUniqueKeys(const EmbBaseInfo &info, bool &remain
     return uniqueKeys;
 }
 
-vector<int32_t> HybridMgmt::GetRestoreVecSec(const EmbBaseInfo &info, bool &remainBatchOut)
+vector<int32_t> HybridMgmt::GetRestoreVecSec(const EmbBaseInfo& info, bool& remainBatchOut)
 {
     auto restoreVecSec = KEY_PROCESS_INSTANCE->GetRestoreVecSec(info);
     if (restoreVecSec.empty()) {
         remainBatchOut = false;
-        LOG_WARN("table:{}, channelId:{} batchId:{}, restoreVecSec result is empty",
-                 info.name, info.channelId, info.batchId);
+        LOG_WARN("table:{}, channelId:{} batchId:{}, restoreVecSec result is empty", info.name, info.channelId,
+                 info.batchId);
         return restoreVecSec;
     }
     LOG_DEBUG("table:{}, channelId:{} batchId:{}, GetRestoreVecSec end", info.name, info.channelId, info.batchId);
     return restoreVecSec;
 }
 
-void HybridMgmt::SendAll2AllVec(const EmbBaseInfo &info, bool &remainBatchOut)
+void HybridMgmt::SendAll2AllVec(const EmbBaseInfo& info, bool& remainBatchOut)
 {
     if (!mgmtRankInfo.useStatic) {
         bool isEos = false;  // useless, adapt to HBM mode
         TimeCost getAll2AllTC;
-        unique_ptr<vector<Tensor>> all2all = KEY_PROCESS_INSTANCE->GetInfoVec(
-            info, ProcessedInfo::ALL2ALL, isEos);
-        LOG_DEBUG("table:{}, channelId:{}, batchId:{}, GetInfoVec all2all end, GetAll2AllTC(ms):{}",
-                  info.name, info.channelId, info.batchId, getAll2AllTC.ElapsedMS());
+        unique_ptr<vector<Tensor>> all2all = KEY_PROCESS_INSTANCE->GetInfoVec(info, ProcessedInfo::ALL2ALL, isEos);
+        LOG_DEBUG("table:{}, channelId:{}, batchId:{}, GetInfoVec all2all end, GetAll2AllTC(ms):{}", info.name,
+                  info.channelId, info.batchId, getAll2AllTC.ElapsedMS());
         if (all2all == nullptr) {
             remainBatchOut = false;
             LOG_WARN("Information vector is nullptr!");
@@ -2012,17 +1993,16 @@ void HybridMgmt::SendAll2AllVec(const EmbBaseInfo &info, bool &remainBatchOut)
         }
         TimeCost sendAll2AllTC;
         hdTransfer->Send(TransferChannel::ALL2ALL, *all2all, info.channelId, info.name);
-        LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send all2all end, sendAll2AllTC(ms):{}",
-                  info.name, info.channelId, info.batchId, sendAll2AllTC.ElapsedMS());
+        LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send all2all end, sendAll2AllTC(ms):{}", info.name,
+                  info.channelId, info.batchId, sendAll2AllTC.ElapsedMS());
     }
 }
 
-void HybridMgmt::SendRestoreVec(const EmbBaseInfo &info, bool &remainBatchOut)
+void HybridMgmt::SendRestoreVec(const EmbBaseInfo& info, bool& remainBatchOut)
 {
     bool isEos = false;  // useless, adapt to HBM mode
     TimeCost getRestoreTC;
-    unique_ptr<vector<Tensor>> infoVecs = KEY_PROCESS_INSTANCE->GetInfoVec(
-        info, ProcessedInfo::RESTORE, isEos);
+    unique_ptr<vector<Tensor>> infoVecs = KEY_PROCESS_INSTANCE->GetInfoVec(info, ProcessedInfo::RESTORE, isEos);
     if (infoVecs == nullptr) {
         remainBatchOut = false;
         if (isRunning) {
@@ -2030,66 +2010,67 @@ void HybridMgmt::SendRestoreVec(const EmbBaseInfo &info, bool &remainBatchOut)
         }
         return;
     }
-    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, get restore end, getRestoreTC(ms):{}",
-              info.name, info.channelId, info.batchId, getRestoreTC.ElapsedMS());
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, get restore end, getRestoreTC(ms):{}", info.name, info.channelId,
+              info.batchId, getRestoreTC.ElapsedMS());
 
     TimeCost sendRestoreSyncTC;
     hdTransfer->Send(TransferChannel::RESTORE, *infoVecs, info.channelId, info.name);
-    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send restore end, sendRestoreSyncTC(ms):{}",
-              info.name, info.channelId, info.batchId, sendRestoreSyncTC.ElapsedMS());
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send restore end, sendRestoreSyncTC(ms):{}", info.name,
+              info.channelId, info.batchId, sendRestoreSyncTC.ElapsedMS());
 }
 
-void HybridMgmt::SendLookupOffsets(const EmbBaseInfo &info,
-                                   vector<uint64_t> &uniqueKeys, vector<int32_t> &restoreVecSec)
+void HybridMgmt::SendLookupOffsets(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys,
+                                   vector<int32_t>& restoreVecSec)
 {
     // uniqueKeys already transfer to offset in GetSwapPairsAndKey2Offset
     // graph will filter out invalid offset(-1). see function _set_specific_value_for_non_valid_key
     TimeCost sendLookupOffsetsTC;
     std::vector<uint64_t> lookupOffsets;
-    for (const auto &index : restoreVecSec) {
+    for (const auto& index : restoreVecSec) {
         if (index == INVALID_INDEX_VALUE) {
             lookupOffsets.emplace_back(static_cast<uint64_t>(INVALID_KEY_VALUE));
             continue;
         }
         lookupOffsets.emplace_back(uniqueKeys[index]);
     }
-    hdTransfer->Send(TransferChannel::LOOKUP, { Vec2TensorI32(lookupOffsets) }, info.channelId, info.name);
-    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send lookupOffset, sendLookupOffsetsTC(ms):{}",
-              info.name, info.channelId, info.batchId, sendLookupOffsetsTC.ElapsedMS());
+    hdTransfer->Send(TransferChannel::LOOKUP, {Vec2TensorI32(lookupOffsets)}, info.channelId, info.name);
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, send lookupOffset, sendLookupOffsetsTC(ms):{}", info.name,
+              info.channelId, info.batchId, sendLookupOffsetsTC.ElapsedMS());
 }
 
-void HybridMgmt::SendGlobalUniqueVec(const EmbBaseInfo &info,
-                                     vector<uint64_t> &uniqueKeys, vector<int32_t> &restoreVecSec)
+void HybridMgmt::SendGlobalUniqueVec(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys,
+                                     vector<int32_t>& restoreVecSec)
 {
     if (!(info.channelId == TRAIN_CHANNEL_ID && mgmtRankInfo.useSumSameIdGradients)) {
         return;
     }
     TimeCost sendUniqueKeysSyncTC;
-    hdTransfer->Send(TransferChannel::UNIQKEYS, {mgmtRankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys) :
-                                                 Vec2TensorI32(uniqueKeys) }, info.channelId, info.name);
-    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, sendUniqueKeysSyncTC(ms):{}",
-              info.name, info.channelId, info.batchId, sendUniqueKeysSyncTC.ElapsedMS());
+    hdTransfer->Send(TransferChannel::UNIQKEYS,
+                     {mgmtRankInfo.useDynamicExpansion ? Vec2TensorI64(uniqueKeys) : Vec2TensorI32(uniqueKeys)},
+                     info.channelId, info.name);
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, sendUniqueKeysSyncTC(ms):{}", info.name, info.channelId,
+              info.batchId, sendUniqueKeysSyncTC.ElapsedMS());
 
     TimeCost sendRestoreVecSecSyncTC;
-    hdTransfer->Send(TransferChannel::RESTORE_SECOND, {Vec2TensorI32(restoreVecSec) }, info.channelId, info.name);
-    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, sendRestoreVecSecSyncTC(ms):{}",
-              info.name, info.channelId, info.batchId, sendRestoreVecSecSyncTC.ElapsedMS());
+    hdTransfer->Send(TransferChannel::RESTORE_SECOND, {Vec2TensorI32(restoreVecSec)}, info.channelId, info.name);
+    LOG_DEBUG("table:{}, channelId:{}, batchId:{}, sendRestoreVecSecSyncTC(ms):{}", info.name, info.channelId,
+              info.batchId, sendRestoreVecSecSyncTC.ElapsedMS());
 }
 
-bool HybridMgmt::HandleSpecialProcessStatusDDR(const EmbBaseInfo &info, TimeCost& getAndSendTensorsTC,
-                                               pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
-                                               pair<vector<uint64_t>, vector<uint64_t>> &swapOutKoPair)
+bool HybridMgmt::HandleSpecialProcessStatusDDR(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
+                                               pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                               pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
 {
     TimeCost swapProcessTC;
-    auto &swapInPos = swapInKoPair.second;
-    auto &swapOutKeys = swapOutKoPair.first;
-    auto &swapOutPos = swapOutKoPair.second;
+    auto& swapInPos = swapInKoPair.second;
+    auto& swapOutKeys = swapOutKoPair.first;
+    auto& swapOutPos = swapOutKoPair.second;
 
     if (specialProcessStatus[info.name] == ProcessStatus::AFTER_SWITCH_FIRST_BATCH) {
         // 发现train、save、eval切换，先保存状态，发emptySwapOutKeys以对应上一步的emptySwapOutPos
         HandleFirstBatchCaseDDR(info, swapInKoPair, swapOutKoPair);
-        LOG_DEBUG("handle channel switch case:afterSwitchFirstBatch, table:{}, channelId:{}, batchId:{}",
-                  info.name, info.channelId, info.batchId);
+        LOG_DEBUG("handle channel switch case:afterSwitchFirstBatch, table:{}, channelId:{}, batchId:{}", info.name,
+                  info.channelId, info.batchId);
 
         if (mgmtRankInfo.ctrlSteps[info.channelId] == 1) {
             vector<uint64_t> emptySwapOutPos;
@@ -2110,32 +2091,33 @@ bool HybridMgmt::HandleSpecialProcessStatusDDR(const EmbBaseInfo &info, TimeCost
         swapOutKeys.insert(swapOutKeys.end(), tempStore[0].begin(), tempStore[0].end());
         swapOutPos.insert(swapOutPos.end(), tempStore[1].begin(), tempStore[1].end());
         specialProcessStatus[info.name] = ProcessStatus::NORMAL;
-        LOG_DEBUG("handle channel switch case:afterSwitchSecondBatch, table:{}, channelId:{}, batchId:{}",
-                  info.name, info.channelId, info.batchId);
+        LOG_DEBUG("handle channel switch case:afterSwitchSecondBatch, table:{}, channelId:{}, batchId:{}", info.name,
+                  info.channelId, info.batchId);
     }
     return false;
 }
 
-bool HybridMgmt::HandleSpecialProcessStatusL3Storage(const EmbBaseInfo &info, TimeCost &getAndSendTensorsTC,
-                                                     pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
-                                                     pair<vector<uint64_t>, vector<uint64_t>> &swapOutKoPair)
+bool HybridMgmt::HandleSpecialProcessStatusL3Storage(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
+                                                     pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                                     pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
 {
     TimeCost swapProcessTC;
-    auto &swapInPos = swapInKoPair.second;
-    auto &swapOutKeys = swapOutKoPair.first;
-    auto &swapOutPos = swapOutKoPair.second;
+    auto& swapInPos = swapInKoPair.second;
+    auto& swapOutKeys = swapOutKoPair.first;
+    auto& swapOutPos = swapOutKoPair.second;
 
     if (specialProcessStatus[info.name] == ProcessStatus::AFTER_SWITCH_FIRST_BATCH) {
         // 发现train、save、eval切换，先保存状态，发emptySwapOutKeys以对应上一步的emptySwapOutPos
         HandleFirstBatchCaseL3Storage(info, swapInKoPair, swapOutKoPair);
-        LOG_DEBUG("handle channel switch case:afterSwitchFirstBatch, table:{}, channelId:{}, batchId:{}",
-                  info.name, info.channelId, info.batchId);
+        LOG_DEBUG("handle channel switch case:afterSwitchFirstBatch, table:{}, channelId:{}, batchId:{}", info.name,
+                  info.channelId, info.batchId);
 
         if (mgmtRankInfo.ctrlSteps[info.channelId] == 1) {
             vector<uint64_t> emptySwapOutPos;
             SendTensorForSwap(info, swapInPos, emptySwapOutPos);
             LOG_DEBUG("ProcessEmbInfoL3Storage special case, user only run one step, "
-                      "table:{}, channelId:{}, batchId:{}", info.name, info.channelId, info.batchId);
+                      "table:{}, channelId:{}, batchId:{}",
+                      info.name, info.channelId, info.batchId);
         }
 
         specialProcessStatus[info.name] = ProcessStatus::AFTER_SWITCH_SECOND_BATCH;
@@ -2149,13 +2131,12 @@ bool HybridMgmt::HandleSpecialProcessStatusL3Storage(const EmbBaseInfo &info, Ti
         swapOutKeys.insert(swapOutKeys.end(), tempStore[0].begin(), tempStore[0].end());
         swapOutPos.insert(swapOutPos.end(), tempStore[1].begin(), tempStore[1].end());
         specialProcessStatus[info.name] = ProcessStatus::NORMAL;
-        LOG_DEBUG("handle channel switch case:afterSwitchSecondBatch, table:{}, channelId:{}, batchId:{}",
-                  info.name, info.channelId, info.batchId);
+        LOG_DEBUG("handle channel switch case:afterSwitchSecondBatch, table:{}, channelId:{}, batchId:{}", info.name,
+                  info.channelId, info.batchId);
     }
     return false;
 }
 
-
 void HybridMgmt::CheckLookupAddrSuccessDDR()
 {
     if (!lookupAddrSuccess) {
@@ -2169,20 +2150,19 @@ void HybridMgmt::CheckLookupAddrSuccessDDR()
     }
 }
 
-
-void HybridMgmt::GetSwapPairsAndKey2Offset(const EmbBaseInfo &info, vector<uint64_t> &uniqueKeys,
-                                           pair<vector<uint64_t>, vector<uint64_t>> &swapInKoPair,
-                                           pair<vector<uint64_t>, vector<uint64_t>> &swapOutKoPair)
+void HybridMgmt::GetSwapPairsAndKey2Offset(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys,
+                                           pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                           pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
 {
     TimeCost GetSwapPairsAndKey2OffsetTC;
     int swapInCode = embCache->GetSwapPairsAndKey2Offset(info.name, uniqueKeys, swapInKoPair, swapOutKoPair);
     if (swapInCode != H_OK) {
-        string errMsg = StringFormat("table:%s, GetSwapPairsAndKey2Offset failed! error code:%d",
-                                     info.name.c_str(), swapInCode);
+        string errMsg =
+            StringFormat("table:%s, GetSwapPairsAndKey2Offset failed! error code:%d", info.name.c_str(), swapInCode);
         throw runtime_error(errMsg);
     }
-    LOG_DEBUG("table:{}, channel:{}, batchId:{}, GetSwapPairsAndKey2OffsetTC(ms):{}",
-              info.name, info.channelId, info.batchId, GetSwapPairsAndKey2OffsetTC.ElapsedMS());
+    LOG_DEBUG("table:{}, channel:{}, batchId:{}, GetSwapPairsAndKey2OffsetTC(ms):{}", info.name, info.channelId,
+              info.batchId, GetSwapPairsAndKey2OffsetTC.ElapsedMS());
 
     LOG_DEBUG("table:{}, channel:{}, batchId:{}, swapIn keys:{}, swapIn pos:{}, swapOut keys:{}, swapOut pos:{}",
               info.name, info.channelId, info.batchId, VectorToString(swapInKoPair.first),
@@ -2190,15 +2170,14 @@ void HybridMgmt::GetSwapPairsAndKey2Offset(const EmbBaseInfo &info, vector<uint6
               VectorToString(swapOutKoPair.second));
 }
 
-void HybridMgmt::EnqueueSwapInfo(const EmbBaseInfo &info,
-                                 pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+void HybridMgmt::EnqueueSwapInfo(const EmbBaseInfo& info, pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
                                  pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair)
 {
-    auto &swapInKeys = swapInKoPair.first;
-    auto &swapOutKeys = swapOutKoPair.first;
+    auto& swapInKeys = swapInKoPair.first;
+    auto& swapOutKeys = swapOutKoPair.first;
 
-    LOG_DEBUG("enqueue HBMSwapKeyQue table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}",
-              info.name, info.batchId, info.channelId, swapInKeys.size(), swapOutKeys.size());
+    LOG_DEBUG("enqueue HBMSwapKeyQue table:{}, batchId:{}, channelId:{}, swapInSize:{}, swapOutSize:{}", info.name,
+              info.batchId, info.channelId, swapInKeys.size(), swapOutKeys.size());
     HBMSwapKeyQue[info.name + SWAP_OUT_STR].Pushv(swapOutKeys);
     HBMSwapKeyQue[info.name + SWAP_IN_STR].Pushv(swapInKeys);
 
@@ -2208,7 +2187,7 @@ void HybridMgmt::EnqueueSwapInfo(const EmbBaseInfo &info,
 bool HybridMgmt::IsTrainAndEvalCase()
 {
     bool isChannelSwitchCase = false;
-    for (auto& i: mgmtEmbInfo) {
+    for (auto& i : mgmtEmbInfo) {
         if (specialProcessStatus[i.name] == ProcessStatus::AFTER_SWITCH_FIRST_BATCH) {
             isChannelSwitchCase = true;
             break;
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index f5897861..ab34b19f 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -17,308 +17,300 @@ See the License for the specific language governing permissions and
 #define MX_REC_EMB_MGMT_H
 
 #include <array>
-#include <vector>
 #include <memory>
 #include <unordered_set>
+#include <vector>
 
 #include "absl/container/flat_hash_map.h"
-
+#include "emb_table/embedding_table.h"
+#include "hd_transfer/hd_transfer.h"
+#include "hybrid_mgmt_block.h"
+#include "l3_storage/cache_manager.h"
+#include "ock_ctr_common/include/embedding_cache.h"
+#include "ock_ctr_common/include/error_code.h"
+#include "ock_ctr_common/include/factory.h"
 #include "utils/common.h"
 #include "utils/config.h"
 #include "utils/singleton.h"
 #include "utils/task_queue.h"
 #include "utils/time_cost.h"
-#include "ock_ctr_common/include/factory.h"
-#include "ock_ctr_common/include/embedding_cache.h"
-#include "ock_ctr_common/include/error_code.h"
-
-#include "hd_transfer/hd_transfer.h"
-#include "l3_storage/cache_manager.h"
-#include "hybrid_mgmt_block.h"
-#include "emb_table/embedding_table.h"
 
 namespace MxRec {
-    using namespace std;
-    using namespace tensorflow;
-    using namespace Common;
-
-    enum class TaskType {
-        HBM,
-        DDR
-    };
-
-    enum class ProcessStatus {
-        NORMAL,
-        AFTER_SWITCH_FIRST_BATCH,
-        AFTER_SWITCH_SECOND_BATCH
-    };
-
-    inline string ProcessStatus2Str(ProcessStatus s)
+using namespace std;
+using namespace tensorflow;
+using namespace Common;
+
+enum class TaskType {
+    HBM,
+    DDR
+};
+
+enum class ProcessStatus {
+    NORMAL,
+    AFTER_SWITCH_FIRST_BATCH,
+    AFTER_SWITCH_SECOND_BATCH
+};
+
+inline string ProcessStatus2Str(ProcessStatus s)
+{
+    switch (s) {
+        case ProcessStatus::NORMAL:
+            return "normal";
+        case ProcessStatus::AFTER_SWITCH_FIRST_BATCH:
+            return "afterSwitchFirstBatch";
+        case ProcessStatus::AFTER_SWITCH_SECOND_BATCH:
+            return "afterSwitchSecondBatch";
+        default:
+            throw std::invalid_argument("Invalid ProcessStatus");
+    }
+};
+
+struct EmbTaskInfo {
+    int batchId;
+    int threadIdx;
+    int cvNotifyIndex;
+    int extEmbeddingSize;
+    string name;
+};
+
+class HybridMgmt {
+public:
+    HybridMgmt() = default;
+
+    ~HybridMgmt()
     {
-        switch (s) {
-            case ProcessStatus::NORMAL:
-                return "normal";
-            case ProcessStatus::AFTER_SWITCH_FIRST_BATCH:
-                return "afterSwitchFirstBatch";
-            case ProcessStatus::AFTER_SWITCH_SECOND_BATCH:
-                return "afterSwitchSecondBatch";
-            default:
-                throw std::invalid_argument("Invalid ProcessStatus");
-        }
-    };
-
-    struct EmbTaskInfo {
-        int batchId;
-        int threadIdx;
-        int cvNotifyIndex;
-        int extEmbeddingSize;
-        string name;
-    };
-
-    class HybridMgmt {
-    public:
-        HybridMgmt() = default;
-
-        ~HybridMgmt()
-        {
-            if (isRunning) {
-                Destroy();
-            }
+        if (isRunning) {
+            Destroy();
         }
+    }
 
-        HybridMgmt(const HybridMgmt&) = delete;
+    HybridMgmt(const HybridMgmt&) = delete;
 
-        HybridMgmt& operator=(const HybridMgmt&) = delete;
+    HybridMgmt& operator=(const HybridMgmt&) = delete;
 
-        bool Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos, int seed,
-                        const vector<ThresholdValue>& thresholdValues, bool ifLoad);
+    bool Initialize(RankInfo rankInfo, const vector<EmbInfo>& embInfos, int seed,
+                    const vector<ThresholdValue>& thresholdValues, bool ifLoad);
 
-        void Save(const string& savePath);
+    void Save(const string& savePath);
 
-        bool Load(const string& loadPath, vector<string> warmStartTables);
+    bool Load(const string& loadPath, vector<string> warmStartTables);
 
-        OffsetT SendHostMap(const string tableName);
+    OffsetT SendHostMap(const string tableName);
 
-        OffsetT SendLoadMap(const string tableName);
+    OffsetT SendLoadMap(const string tableName);
 
-        void ReceiveHostMap(AllKeyOffsetMapT receiveKeyOffsetMap);
+    void ReceiveHostMap(AllKeyOffsetMapT receiveKeyOffsetMap);
 
-        void Start();
+    void Start();
 
-        void StartThreadForHBM();
+    void StartThreadForHBM();
 
-        void StartThreadForDDR();
+    void StartThreadForDDR();
 
-        void Destroy();
+    void Destroy();
 
-        bool ParseKeys(int channelId, int& batchId, TaskType type);
+    bool ParseKeys(int channelId, int& batchId, TaskType type);
 
-        bool Evict();
+    bool Evict();
 
-        void NotifyBySessionRun(int channelID) const;
+    void NotifyBySessionRun(int channelID) const;
 
-        void CountStepBySessionRun(int channelID, int steps) const;
+    void CountStepBySessionRun(int channelID, int steps) const;
 
-        int64_t GetTableSize(const string& embName) const;
+    int64_t GetTableSize(const string& embName) const;
 
-        int64_t GetTableCapacity(const string& embName) const;
+    int64_t GetTableCapacity(const string& embName) const;
 
-        void SetOptimizerInfo(const string& embName, OptimizerInfo optimInfo) const;
+    void SetOptimizerInfo(const string& embName, OptimizerInfo optimInfo) const;
 
-        void FetchDeviceEmb();
+    void FetchDeviceEmb();
 
-        void ProcessEmbInfoHBM(const EmbBaseInfo& info, bool& remainBatchOut, bool isGrad);
+    void ProcessEmbInfoHBM(const EmbBaseInfo& info, bool& remainBatchOut, bool isGrad);
 
-        void ProcessEmbInfoDDR(const EmbBaseInfo& info, bool& remainBatchOut);
+    void ProcessEmbInfoDDR(const EmbBaseInfo& info, bool& remainBatchOut);
 
-        void ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBatchOut);
+    void ProcessEmbInfoL3Storage(const EmbBaseInfo& info, bool& remainBatchOut);
 
-    GTEST_PRIVATE:
-        bool mutexDestroy { false };
-        std::mutex lookUpAndSendBatchIdMtx;
-        std::mutex receiveAndUpdateBatchIdMtx;
-        std::map<std::string, int> lookUpAndSendTableBatchMap;
-        std::map<std::string, int> receiveAndUpdateTableBatchMap;
+    GTEST_PRIVATE : bool mutexDestroy{false};
+    std::mutex lookUpAndSendBatchIdMtx;
+    std::mutex receiveAndUpdateBatchIdMtx;
+    std::map<std::string, int> lookUpAndSendTableBatchMap;
+    std::map<std::string, int> receiveAndUpdateTableBatchMap;
 
-        std::map<std::string, std::map<int, std::mutex>> lastUpdateFinishMutexMap;
-        std::map<std::string, std::map<int, std::condition_variable>> cvLastUpdateFinishMap;
-        std::map<std::string, int> lastUpdateFinishStepMap;
-        std::map<std::string, std::map<int, std::mutex>> lastLookUpFinishMutexMap;
-        std::map<std::string, std::map<int, std::condition_variable>> cvLastLookUpFinishMap;
-        std::map<std::string, int> lastLookUpFinishStepMap;
-        std::map<std::string, std::map<int, std::mutex>> lastSendFinishMutexMap;
-        std::map<std::string, std::map<int, std::condition_variable>> cvLastSendFinishMap;
-        std::map<std::string, int> lastSendFinishStepMap;
-        std::map<std::string, std::map<int, std::mutex>> lastRecvFinishMutexMap;
-        std::map<std::string, std::map<int, std::condition_variable>> cvLastRecvFinishMap;
-        std::map<std::string, int> lastRecvFinishStepMap;
+    std::map<std::string, std::map<int, std::mutex>> lastUpdateFinishMutexMap;
+    std::map<std::string, std::map<int, std::condition_variable>> cvLastUpdateFinishMap;
+    std::map<std::string, int> lastUpdateFinishStepMap;
+    std::map<std::string, std::map<int, std::mutex>> lastLookUpFinishMutexMap;
+    std::map<std::string, std::map<int, std::condition_variable>> cvLastLookUpFinishMap;
+    std::map<std::string, int> lastLookUpFinishStepMap;
+    std::map<std::string, std::map<int, std::mutex>> lastSendFinishMutexMap;
+    std::map<std::string, std::map<int, std::condition_variable>> cvLastSendFinishMap;
+    std::map<std::string, int> lastSendFinishStepMap;
+    std::map<std::string, std::map<int, std::mutex>> lastRecvFinishMutexMap;
+    std::map<std::string, std::map<int, std::condition_variable>> cvLastRecvFinishMap;
+    std::map<std::string, int> lastRecvFinishStepMap;
 
-        std::vector<std::thread> EmbeddingLookUpAndSendThreadPool;
-        std::vector<std::thread> EmbeddingReceiveAndUpdateThreadPool;
-        std::vector<std::future<void>> lookUpSwapOutAddrsThreads;
-        std::vector<std::future<void>> lookUpSwapInAddrsThreads;
+    std::vector<std::thread> EmbeddingLookUpAndSendThreadPool;
+    std::vector<std::thread> EmbeddingReceiveAndUpdateThreadPool;
+    std::vector<std::future<void>> lookUpSwapOutAddrsThreads;
+    std::vector<std::future<void>> lookUpSwapInAddrsThreads;
 
-        std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyQue;
-        std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyForL3StorageQue;
-        std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyQue;
-        std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyForL3StorageQue;
-        std::map<std::string, TaskQueue<std::vector<float *>>> HBMSwapAddrsQue;
-        std::map<std::string, TaskQueue<std::vector<float *>>> DDRSwapAddrsQue;
+    std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyQue;
+    std::map<std::string, TaskQueue<std::vector<uint64_t>>> HBMSwapKeyForL3StorageQue;
+    std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyQue;
+    std::map<std::string, TaskQueue<std::vector<uint64_t>>> DDRSwapKeyForL3StorageQue;
+    std::map<std::string, TaskQueue<std::vector<float*>>> HBMSwapAddrsQue;
+    std::map<std::string, TaskQueue<std::vector<float*>>> DDRSwapAddrsQue;
 
-        std::mutex evictMut;
+    std::mutex evictMut;
 
-        std::map<std::string, std::unordered_set<uint64_t>> trainKeysSet;
-        const string SWAP_IN_STR = "SwapIn";
-        const string SWAP_OUT_STR = "SwapOut";
+    std::map<std::string, std::unordered_set<uint64_t>> trainKeysSet;
+    const string SWAP_IN_STR = "SwapIn";
+    const string SWAP_OUT_STR = "SwapOut";
 
-        const string ADDR_STR = "Addr";
-        ock::ctr::EmbCacheManagerPtr embCache = nullptr;
-        std::map<std::string, std::vector<uint64_t>> lastSwapInPosMap {};
-        std::map<std::string, std::vector<std::vector<uint64_t>>> trainTestSwitchInfoStore {};
-        std::atomic<bool> lookupAddrSuccess {true};
+    const string ADDR_STR = "Addr";
+    ock::ctr::EmbCacheManagerPtr embCache = nullptr;
+    std::map<std::string, std::vector<uint64_t>> lastSwapInPosMap{};
+    std::map<std::string, std::vector<std::vector<uint64_t>>> trainTestSwitchInfoStore{};
+    std::atomic<bool> lookupAddrSuccess{true};
 
-        std::mutex saveMutex;
-        std::condition_variable cvCheckSave;
+    std::mutex saveMutex;
+    std::condition_variable cvCheckSave;
 
-        void SetFeatureTypeForLoad(vector<CkptFeatureType>& loadFeatures);
+    void SetFeatureTypeForLoad(vector<CkptFeatureType>& loadFeatures);
 
-        void EvictKeys(const string& embName, const vector<emb_cache_key_t>& keys);
+    void EvictKeys(const string& embName, const vector<emb_cache_key_t>& keys);
 
-        void InitRankInfo(RankInfo& rankInfo, const vector<EmbInfo>& embInfos) const;
+    void InitRankInfo(RankInfo& rankInfo, const vector<EmbInfo>& embInfos) const;
 
-        void EvictL3StorageKeys(const string& embName, const vector<emb_cache_key_t>& keys) const;
+    void EvictL3StorageKeys(const string& embName, const vector<emb_cache_key_t>& keys) const;
 
-        void LookUpAndRemoveAddrs(const EmbTaskInfo &info);  // L3Storage, synchronous
+    void LookUpAndRemoveAddrs(const EmbTaskInfo& info);  // L3Storage, synchronous
 
-        void LookUpSwapAddrs(const std::string &embName, const std::string &swapStr);  // DDR, asynchronous
+    void LookUpSwapAddrs(const std::string& embName, const std::string& swapStr);  // DDR, asynchronous
 
-        void EmbeddingTask();
+    void EmbeddingTask();
 
-        void MultiThreadEmbHDTransWrap();
+    void MultiThreadEmbHDTransWrap();
 
-        void EmbeddingLookUpAndSendDDR(int batchId, int index, const EmbInfo& embInfo);
+    void EmbeddingLookUpAndSendDDR(int batchId, int index, const EmbInfo& embInfo);
 
-        void EmbeddingReceiveAndUpdateDDR(int batchId, int index, const EmbInfo& embInfo);
+    void EmbeddingReceiveAndUpdateDDR(int batchId, int index, const EmbInfo& embInfo);
 
-        void EmbeddingLookUpAndSendL3Storage(int batchId, int index, const EmbInfo& embInfo);
+    void EmbeddingLookUpAndSendL3Storage(int batchId, int index, const EmbInfo& embInfo);
 
-        void EmbeddingReceiveAndUpdateL3Storage(int batchId, int index, const EmbInfo& embInfo);
+    void EmbeddingReceiveAndUpdateL3Storage(int batchId, int index, const EmbInfo& embInfo);
 
-        void SendTensorForSwap(const EmbBaseInfo& info,
-                               const vector<uint64_t> &swapInPosUint,
-                               const vector<uint64_t> &swapOutPosUint);
+    void SendTensorForSwap(const EmbBaseInfo& info, const vector<uint64_t>& swapInPosUint,
+                           const vector<uint64_t>& swapOutPosUint);
 
-    private:
-        HybridMgmtBlock* hybridMgmtBlock;
-        vector<EmbInfo> mgmtEmbInfo;
-        RankInfo mgmtRankInfo;
-        CacheManager* cacheManager;
-        vector<std::unique_ptr<std::thread>> procThreads {};
-        map<string, vector<emb_cache_key_t>> evictKeyMap {};
-        HDTransfer *hdTransfer;
-        OffsetMapT offsetMapToSend;
-        OffsetMapT loadOffsetToSend;
-        bool isL3StorageEnabled { false };
-        bool isRunning;
-        bool isLoad { false };
-        bool isInitialized { false };
-        bool alreadyTrainOnce = false;  // 用于判断是否为predict模式
-        map<string, int> lookUpSwapInAddrsPushId;  // 用于处理eos场景，当消费者追上生产者且长时间无上游数据，会触发eos
-        map<string, ProcessStatus> specialProcessStatus;
+private:
+    HybridMgmtBlock* hybridMgmtBlock;
+    vector<EmbInfo> mgmtEmbInfo;
+    RankInfo mgmtRankInfo;
+    CacheManager* cacheManager;
+    vector<std::unique_ptr<std::thread>> procThreads{};
+    map<string, vector<emb_cache_key_t>> evictKeyMap{};
+    HDTransfer* hdTransfer;
+    OffsetMapT offsetMapToSend;
+    OffsetMapT loadOffsetToSend;
+    bool isL3StorageEnabled{false};
+    bool isRunning;
+    bool isLoad{false};
+    bool isInitialized{false};
+    bool alreadyTrainOnce = false;  // 用于判断是否为predict模式
+    map<string, int> lookUpSwapInAddrsPushId;  // 用于处理eos场景，当消费者追上生产者且长时间无上游数据，会触发eos
+    map<string, ProcessStatus> specialProcessStatus;
 
-        void TrainTask(TaskType type);
+    void TrainTask(TaskType type);
 
-        void EvalTask(TaskType type);
+    void EvalTask(TaskType type);
 
-        void SendUniqKeysAndRestoreVecHBM(const EmbBaseInfo &info,
-                                          const unique_ptr<vector<Tensor>> &infoVecs, bool isGrad) const;
+    void SendUniqKeysAndRestoreVecHBM(const EmbBaseInfo& info, const unique_ptr<vector<Tensor>>& infoVecs,
+                                      bool isGrad) const;
 
-        void HandleEndBatchCase(const EmbBaseInfo& info, vector<uint64_t>& swapInPos);
+    void HandleEndBatchCase(const EmbBaseInfo& info, vector<uint64_t>& swapInPos);
 
-        bool IsTrainEndBatch(int batchId) const;
+    bool IsTrainEndBatch(int batchId) const;
 
-        bool IsEvalEndBatch(int batchId) const;
+    bool IsEvalEndBatch(int batchId) const;
 
-        void InitEmbeddingCache(const vector<EmbInfo>& embInfos);
+    void InitEmbeddingCache(const vector<EmbInfo>& embInfos);
 
-        void InitDataPipelineForDDR(const string &embName);
+    void InitDataPipelineForDDR(const string& embName);
 
-        void InitDataPipelineForL3Storage(const string &embName, int extEmbeddingSize);
+    void InitDataPipelineForL3Storage(const string& embName, int extEmbeddingSize);
 
-        void JoinEmbeddingCacheThread();
+    void JoinEmbeddingCacheThread();
 
-        void HandleReachMaxStepCase(const EmbBaseInfo& info, bool& remainBatchOut);
+    void HandleReachMaxStepCase(const EmbBaseInfo& info, bool& remainBatchOut);
 
-        void HandleEosCase(const EmbBaseInfo& info, bool& remainBatchOut);
+    void HandleEosCase(const EmbBaseInfo& info, bool& remainBatchOut);
 
-        void HandleEosCaseHBM(const string& embName, int batchId, int channelId, bool& remainBatchOut);
+    void HandleEosCaseHBM(const string& embName, int batchId, int channelId, bool& remainBatchOut);
 
-        bool EmbeddingReceiveDDR(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs);
+    bool EmbeddingReceiveDDR(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs);
 
-        void EmbeddingUpdateDDR(const EmbTaskInfo& info, const float* embPtr, vector<float*>& swapOutAddrs);
+    void EmbeddingUpdateDDR(const EmbTaskInfo& info, const float* embPtr, vector<float*>& swapOutAddrs);
 
-        bool EmbeddingLookUpDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+    bool EmbeddingLookUpDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
-        void EmbeddingSendDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+    void EmbeddingSendDDR(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
-        bool EmbeddingReceiveL3Storage(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs,
-                                       int64_t& dims0);
+    bool EmbeddingReceiveL3Storage(const EmbTaskInfo& info, float*& ptr, vector<float*>& swapOutAddrs, int64_t& dims0);
 
-        void EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float* embPtr, vector<float*>& swapOutAddrs,
-                                      int64_t& dims0);
+    void EmbeddingUpdateL3Storage(const EmbTaskInfo& info, float* embPtr, vector<float*>& swapOutAddrs, int64_t& dims0);
 
-        bool EmbeddingLookUpL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+    bool EmbeddingLookUpL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
-        void EmbeddingSendL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+    void EmbeddingSendL3Storage(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
-        void CreateEmbeddingLookUpAndSendThread(int index, const EmbInfo& embInfo);
+    void CreateEmbeddingLookUpAndSendThread(int index, const EmbInfo& embInfo);
 
-        void CreateEmbeddingReceiveAndUpdateThread(int index, const EmbInfo& embInfo);
+    void CreateEmbeddingReceiveAndUpdateThread(int index, const EmbInfo& embInfo);
 
-        void HandleFirstBatchCaseDDR(const EmbBaseInfo& info,
-                                     std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                                     std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+    void HandleFirstBatchCaseDDR(const EmbBaseInfo& info, std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                 std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        void HandleFirstBatchCaseL3Storage(const EmbBaseInfo& info,
-                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+    void HandleFirstBatchCaseL3Storage(const EmbBaseInfo& info,
+                                       std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                       std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        void HandleDataSwapForL3Storage(const EmbBaseInfo& info,
-                                        vector<uint64_t> &swapInKeys, vector<uint64_t> &swapOutKeys);
+    void HandleDataSwapForL3Storage(const EmbBaseInfo& info, vector<uint64_t>& swapInKeys,
+                                    vector<uint64_t>& swapOutKeys);
 
-        bool BuildH2DEmbedding(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
+    bool BuildH2DEmbedding(const EmbTaskInfo& info, vector<Tensor>& h2dEmb);
 
-        vector<uint64_t> GetUniqueKeys(const EmbBaseInfo& info, bool& remainBatchOut);
+    vector<uint64_t> GetUniqueKeys(const EmbBaseInfo& info, bool& remainBatchOut);
 
-        vector<int32_t> GetRestoreVecSec(const EmbBaseInfo& info, bool& remainBatchOut);
+    vector<int32_t> GetRestoreVecSec(const EmbBaseInfo& info, bool& remainBatchOut);
 
-        void SendAll2AllVec(const EmbBaseInfo& info, bool& remainBatchOut);
+    void SendAll2AllVec(const EmbBaseInfo& info, bool& remainBatchOut);
 
-        void SendRestoreVec(const EmbBaseInfo& info, bool& remainBatchOut);
+    void SendRestoreVec(const EmbBaseInfo& info, bool& remainBatchOut);
 
-        void SendLookupOffsets(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys, vector<int32_t>& restoreVecSec);
+    void SendLookupOffsets(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys, vector<int32_t>& restoreVecSec);
 
-        void SendGlobalUniqueVec(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys, vector<int32_t>& restoreVecSec);
+    void SendGlobalUniqueVec(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys, vector<int32_t>& restoreVecSec);
 
-        bool HandleSpecialProcessStatusDDR(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
-                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                                           std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+    bool HandleSpecialProcessStatusDDR(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
+                                       std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                       std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        bool HandleSpecialProcessStatusL3Storage(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
-                                                 std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                                                 std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+    bool HandleSpecialProcessStatusL3Storage(const EmbBaseInfo& info, TimeCost& getAndSendTensorsTC,
+                                             std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                             std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        void CheckLookupAddrSuccessDDR();
+    void CheckLookupAddrSuccessDDR();
 
-        void GetSwapPairsAndKey2Offset(const EmbBaseInfo& info, vector<uint64_t> &uniqueKeys,
-                                       std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                                       std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+    void GetSwapPairsAndKey2Offset(const EmbBaseInfo& info, vector<uint64_t>& uniqueKeys,
+                                   std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                                   std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        void EnqueueSwapInfo(const EmbBaseInfo& info,
-                             std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
-                             std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
+    void EnqueueSwapInfo(const EmbBaseInfo& info, std::pair<vector<uint64_t>, vector<uint64_t>>& swapInKoPair,
+                         std::pair<vector<uint64_t>, vector<uint64_t>>& swapOutKoPair);
 
-        bool IsTrainAndEvalCase();
-    };
-}
-#endif // MX_REC_EMB_MGMT_H
+    bool IsTrainAndEvalCase();
+};
+}  // namespace MxRec
+#endif  // MX_REC_EMB_MGMT_H
-- 
Gitee


From ade2d1089abee6af3a3e4ef09313c5b1d5522bd7 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Fri, 5 Jul 2024 13:21:35 +0000
Subject: [PATCH 264/302] =?UTF-8?q?!209=20=E3=80=90FIX=E3=80=91=E5=A4=9A?=
 =?UTF-8?q?=E6=9C=BA=E8=AE=AD=E7=BB=83=E6=95=B0=E6=8D=AE=E4=BF=9D=E5=AD=98?=
 =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E9=80=82=E9=85=8D=20*=20=E3=80=90FIX?=
 =?UTF-8?q?=E3=80=91=E5=A4=9A=E6=9C=BA=E8=AE=AD=E7=BB=83=E6=95=B0=E6=8D=AE?=
 =?UTF-8?q?=E4=BF=9D=E5=AD=98=E5=8A=A0=E8=BD=BD=E9=80=82=E9=85=8D+hdfs=20*?=
 =?UTF-8?q?=20=E3=80=90FIX=E3=80=91=E5=A4=9A=E6=9C=BA=E8=AE=AD=E7=BB=83?=
 =?UTF-8?q?=E6=95=B0=E6=8D=AE=E4=BF=9D=E5=AD=98=E5=8A=A0=E8=BD=BD=E9=80=82?=
 =?UTF-8?q?=E9=85=8D+hdfs=20*=20=E3=80=90FIX=E3=80=91=E5=A4=9A=E6=9C=BA?=
 =?UTF-8?q?=E8=AE=AD=E7=BB=83=E6=95=B0=E6=8D=AE=E4=BF=9D=E5=AD=98=E5=8A=A0?=
 =?UTF-8?q?=E8=BD=BD=E9=80=82=E9=85=8D=20*=20=E3=80=90FIX=E3=80=91?=
 =?UTF-8?q?=E5=A4=9A=E6=9C=BA=E8=AE=AD=E7=BB=83=E6=95=B0=E6=8D=AE=E4=BF=9D?=
 =?UTF-8?q?=E5=AD=98=E5=8A=A0=E8=BD=BD=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/patch.py |  8 +++++---
 mx_rec/saver/saver.py | 27 ++++++++++++++++++---------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/mx_rec/saver/patch.py b/mx_rec/saver/patch.py
index dcdf95ca..0f3a237b 100644
--- a/mx_rec/saver/patch.py
+++ b/mx_rec/saver/patch.py
@@ -44,7 +44,8 @@ from tensorflow.python.training.saving import saveable_object_util
 import numpy as np
 from mpi4py import MPI
 
-from mx_rec.saver.saver import Saver as SparseSaver, check_file_system_is_valid
+from mx_rec.saver.saver import Saver as SparseSaver, check_file_system_is_valid, should_write_data
+from mx_rec.util.communication.hccl_ops import get_local_rank_size
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.validator.validator import para_checker_decorator, ClassValidator, StringValidator, OptionalIntValidator, \
     OptionalStringValidator, DirectoryValidator
@@ -253,7 +254,7 @@ def save(self, sess, save_path, global_step=None, latest_filename=None, meta_gra
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
     comm.Barrier()
-    if rank == 0:
+    if should_write_data(rank, save_path):
         model_checkpoint_path = compat.as_str(get_model_checkpoint_path(self, checkpoint_file, sess))
         if write_state:
             update_checkpoint_state(self, model_checkpoint_path, save_path_parent, latest_filename, meta_graph_suffix,
@@ -453,10 +454,11 @@ def patch_for_write_graph_func(func):
         comm = MPI.COMM_WORLD
         rank = comm.Get_rank()
         # In the case of multiple processes, choose one process to write graph.
-        if rank == 0:
+        if len(args) > 1 and should_write_data(rank, args[1]):
             return func(*args, **kwargs)
         else:
             return None
+
     return wrapper
 
 
diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index 9e0e1d29..a6362506 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -35,7 +35,6 @@ from mx_rec.util.log import logger
 from mx_rec.optimizers.base import CustomizedOptimizer
 from mx_rec.util.tf_version_adapter import npu_ops
 
-
 SAVE_SPARSE_PATH_PREFIX = "sparse"
 
 
@@ -171,7 +170,7 @@ class Saver(object):
         comm = MPI.COMM_WORLD
         rank = comm.Get_rank()
         comm.Barrier()
-        if rank == 0:
+        if should_write_data(rank, saving_path):
             table_list = self.save_op_dict.keys()
             for table_name in table_list:
                 self.merge_sparse_file(saving_path, table_name)
@@ -267,7 +266,7 @@ class Saver(object):
         else:
             self._save_ddr(sess, root_dir)
         logger.debug(f"Host data was saved.")
-        
+
     def _save_hbm(self, sess, root_dir):
         self.config_instance.hybrid_manager_config.save_host_data(root_dir)
         if self.config_instance.use_dynamic_expansion:
@@ -285,7 +284,7 @@ class Saver(object):
 
         for thread in threads:
             thread.join()
-            
+
     def _save_ddr(self, sess, root_dir):
         # 接受host侧传来的需要swap_out的offset用于更新host侧并保存
         self.config_instance.hybrid_manager_config.fetch_device_emb()
@@ -306,7 +305,7 @@ class Saver(object):
                 channel_name=f'{table_name}_save_h2d_{TRAIN_CHANNEL_ID}')
             if use_static:
                 swap_out_pos = swap_out_pos[:swap_out_len]
-                
+
             table = [var]
             optimizer = ConfigInitializer.get_instance().optimizer_config.get_optimizer_by_table_name(table_name)
             if optimizer is not None:
@@ -382,7 +381,6 @@ class Saver(object):
         else:
             placeholder_dict, restore_fetch_list = self.placeholder_dict, self.restore_fetch_dict
 
-
         for table_name in placeholder_dict:
             optimizer_instance = ConfigInitializer.get_instance().optimizer_config.optimizer_instance
             if optimizer_instance:
@@ -395,7 +393,7 @@ class Saver(object):
         table_instance0 = self.config_instance.sparse_embed_config.get_table_instance(self.var_list[0])
         if not table_instance0.is_hbm:
             return
-        
+
         if self.config_instance.use_dynamic_expansion:
             # Data related to dynamic expansion needs to be restored only on the host side.
             return
@@ -405,7 +403,7 @@ class Saver(object):
         for table_name, sub_placeholder_dict in placeholder_dict.items():
             load_offset = self.config_instance.hybrid_manager_config.get_load_offset(table_name)
             fill_placeholder(reading_path, sub_placeholder_dict, restore_feed_dict,
-                                NameDescriptor(table_name, DataName.EMBEDDING.value), load_offset)
+                             NameDescriptor(table_name, DataName.EMBEDDING.value), load_offset)
 
             if "optimizer" in sub_placeholder_dict:
                 optimizer_state_placeholder_dict_group = sub_placeholder_dict.get("optimizer")
@@ -698,4 +696,15 @@ def set_optimizer_info(optimizer: CustomizedOptimizer, table_name: str):
     """
     from mxrec_pybind import OptimizerInfo
     optim_info = OptimizerInfo(optimizer.optimizer_type, optimizer.optim_param_list)
-    ConfigInitializer.get_instance().hybrid_manager_config.set_optim_info(table_name, optim_info)
\ No newline at end of file
+    ConfigInitializer.get_instance().hybrid_manager_config.set_optim_info(table_name, optim_info)
+
+
+def should_write_data(rank_id: int, save_path: str) -> bool:
+    # When using hdfs filesystem, only the rank0 process execute write data operation, assuming use same hdfs path in
+    #   multi-machine.
+    # When using local filesystem, the process which `rank_id % local_rank_size == 0` execute write data operation.
+    # When using hdfs filesystem, and use different hdfs path to save data, should modify check condition
+    #    as same as local filesystem.
+    is_hdfs = check_file_system_is_hdfs(save_path)
+    local_rank_size = get_local_rank_size()
+    return rank_id == 0 if is_hdfs else rank_id % local_rank_size == 0
-- 
Gitee


From f07efc133ddc6a416d6e7f5fec9e7bb2fddacd21 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 8 Jul 2024 11:46:03 +0800
Subject: [PATCH 265/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91criteo=E6=95=B0?=
 =?UTF-8?q?=E6=8D=AE=E5=A4=84=E7=90=86=E8=84=9A=E6=9C=AC=E5=88=A4=E6=96=AD?=
 =?UTF-8?q?=E6=9D=A1=E4=BB=B6=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/dlrm/criteo_tb/gen_ttf.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/examples/dlrm/criteo_tb/gen_ttf.py b/examples/dlrm/criteo_tb/gen_ttf.py
index 8715f048..986bc6df 100644
--- a/examples/dlrm/criteo_tb/gen_ttf.py
+++ b/examples/dlrm/criteo_tb/gen_ttf.py
@@ -224,9 +224,9 @@ def make_example(label_list, dense_feat_list, sparse_feat_list):
     sparse_feature = np.array(sparse_feat_list, dtype=np.int64).reshape(-1)
     label = np.array(label_list, dtype=np.int64).reshape(-1)
     feature_dict = {"dense_feature": tf.train.Feature(float_list=tf.train.FloatList(value=dense_feature)),
-                "sparse_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=sparse_feature)),
-                "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label))
-                }
+                    "sparse_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=sparse_feature)),
+                    "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label))
+                    }
     example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
 
     return example
@@ -273,10 +273,10 @@ def convert_input2tfrd_multiprocess(proc_num, proc_id, in_file_path, output_file
             label = int(items[0])
             values = items[1:14]
             cats = items[14:]
-            if len(values) == 13:
-                raise ValueError("values.size： {}".format(len(values)))
-            if len(cats) == 26:
-                raise ValueError("cats.size： {}".format(len(cats)))
+            if len(values) != 13:
+                raise ValueError("dense feature length must be 13, current values.size: {}".format(len(values)))
+            if len(cats) != 26:
+                raise ValueError("sparse feature length must be 26, current cats.size: {}".format(len(cats)))
             val_list, cat_list = criteo_stats_dict.map_cat2id(values, cats)
             dense_res_list.append(val_list)
             cat_res_list.append(cat_list)
@@ -363,7 +363,7 @@ if __name__ == "__main__":
     process_num = args.train_process_num
     if len(train_data_files) == 0:
         raise ValueError(f'file not exist in train_data_dir:{train_data_dir}')
-    if process_num % len(train_data_files) == 0:
+    if process_num % len(train_data_files) != 0:
         raise ValueError(f'process_num {process_num} must exact div length of train_data_files {len(train_data_files)}')
 
     for process_id in range(process_num):
@@ -387,7 +387,7 @@ if __name__ == "__main__":
     process_num = args.test_process_num
     if len(test_data_files) == 0:
         raise ValueError(f'file not exist in test_data_dir:{test_data_dir}')
-    if process_num % len(test_data_files) == 0:
+    if process_num % len(test_data_files) != 0:
         raise ValueError(f'process_num {process_num} must exact div length of test_data_files {len(test_data_files)}')
 
     for process_id in range(process_num):
-- 
Gitee


From 33991245ee3d8f68cccb5d18b5ae6a20fab07014 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Tue, 9 Jul 2024 10:49:44 +0800
Subject: [PATCH 266/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E6=89=A9=E5=AE=B9?=
 =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8Ctable.capacity=E5=87=BA?=
 =?UTF-8?q?=E7=8E=B0=E5=81=B6=E5=8F=91=E8=B4=9F=E5=80=BC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/emb_table/embedding_table.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/emb_table/embedding_table.h b/src/core/emb_table/embedding_table.h
index 3396a8a0..ef741887 100644
--- a/src/core/emb_table/embedding_table.h
+++ b/src/core/emb_table/embedding_table.h
@@ -114,7 +114,7 @@ protected:
     size_t embSize_;
     size_t extEmbSize_;
     int seed_;
-    std::atomic<int64> capacity_;
+    std::atomic<int64_t> capacity_{0};
     size_t rankId_;
     size_t rankSize_;
     vector<int64_t> loadOffset;
-- 
Gitee


From 1b81040851f1bf326983ce1e4e6589c0c4a5986d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 9 Jul 2024 15:45:28 +0800
Subject: [PATCH 267/302] =?UTF-8?q?=E4=BF=AE=E6=94=B9mxRec=E9=95=9C?=
 =?UTF-8?q?=E5=83=8F=E4=BB=93=E7=9A=84=E9=93=BE=E6=8E=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 17d38fcd..f6bfb828 100644
--- a/README.md
+++ b/README.md
@@ -125,6 +125,6 @@ mxRec所支持的使用环境、功能特性、API接口与使用样例请参考
 
 mxRec框架基础镜像，基于TensorFlow 1.15.0、tensorflow2.6.5制作的基础镜像，安装mxRec后即可开始训练，以及样例使用介绍。
 
-1. https://ascendhub.huawei.com/#/detail/mxrec-tf1
+1. https://www.hiascend.com/developer/ascendhub/detail/mxrec-tf1
 
-2. https://ascendhub.huawei.com/#/detail/mxrec-tf2
+2. https://www.hiascend.com/developer/ascendhub/detail/mxrec-tf2
-- 
Gitee


From 42ac8e68ecab452042587c8fe7bac19c7abca82c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 9 Jul 2024 15:46:14 +0800
Subject: [PATCH 268/302] =?UTF-8?q?=E6=B7=BB=E5=8A=A0dlrm=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=E8=BF=90=E8=A1=8C=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/dlrm/README.md | 60 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 examples/dlrm/README.md

diff --git a/examples/dlrm/README.md b/examples/dlrm/README.md
new file mode 100644
index 00000000..85293c0c
--- /dev/null
+++ b/examples/dlrm/README.md
@@ -0,0 +1,60 @@
+# DLRM模型运行说明
+
+## 代码结构
+```shell
+.
+├── criteo_tb
+│   ├── gen_ttf.py  # criteo_tb原始数据转换成tfrecord格式的脚本
+│   └── README.md   # 数据格式转换脚本说明
+├── model
+│   ├── config.py            # 模型配置文件
+│   ├── delay_loss_scale.py  # loss缩放函数
+│   ├── gradient_descent_w.py  # 自定义SGD优化器
+│   ├── main_mxrec.py  # 主函数
+│   ├── mean_auc.py    # 计算acu的脚本
+│   ├── model.py       # DLRM模型
+│   ├── op_impl_mode.ini  # 算子执行模式配置
+│   ├── optimizer.py      # 优化器
+│   └── run.sh  # 运行DLRM模型的脚本
+└── README.md   # DLRM模型运行说明
+```
+
+## 1.准备数据
+参考criteo_tb目录下的说明文档准备好模型所需要的数据集，放在一个目录下，比如：/data/criteo_tb/。
+
+## 2.准备运行环境
+运行环境可以参考[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
+“安装部署”章节进行准备。
+
+## 3.安装mxRec
+mxRec软件包可以通过[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
+“安装部署”>“环境准备”>“获取软件包”章节提供的链接进行下载，选择自己需要的架构（x86或者arm）的mxRec包。下载完成之后，将mxRec包解压，进入解压后的目录（mindxsdk-mxrec）
+如下：
+```shell
+.
+├── cust_op
+│   └── cust_op_by_addr
+├── examples
+│   ├── DCNv2
+│   ├── demo
+│   └── dlrm
+├── tf1_whl
+│   └── mx_rec-{version}-py3-none-linux_x86_64.whl  # version为版本号
+├── tf2_whl
+│   └── mx_rec-{version}-py3-none-linux_x86_64.whl  # version为版本号
+└── version.info
+```
+其中，tf1_whl和tf2_whl目录下分别是适配tf1和tf2的mxRec软件包，按照自己需要选择其中一个进行安装即可（用pip/pip3 install 软件包这种方式进行安装）。
+确认安装mxRec的目录，比如mxRec安装在 /usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec这个目录下。
+
+## 4.运行DLRM模型
+执行完以上步骤之后，接下来就可以运行DLRM模型，其中run.sh就是运行的脚本，默认是8张卡。其中需要传入5个参数，分别对应：so_path、mx_rec_package_path、hccl_cfg_json、
+dlrm_criteo_data_path和ip。运行命令如：
+```shell
+bash run.sh {so_path} {mx_rec_package_path} {hccl_cfg_json} {dlrm_criteo_data_path} {ip}
+```
+* so_path：so_path是mxRec中动态库的目录，一般在mxRec的安装目录下的libasc目录，比如：/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/libasc。
+* mx_rec_package_path：mx_rec_package_path是mxRec的安装目录，比如：/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec。
+* hccl_cfg_json：hccl_cfg_json是hccl通信配置文件，如果配置了ip参数，这个参数就不用了，直接给一个""空字符串即可。
+* dlrm_criteo_data_path：dlrm_criteo_data_path是数据集所在的目录，比如/data/criteo_tb/。
+* ip：ip是运行模型的机器所在的ip，建议配置。
-- 
Gitee


From 31aa8b6db348a4e8dd2688b1331559eb20264aa1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 9 Jul 2024 15:54:15 +0800
Subject: [PATCH 269/302] =?UTF-8?q?=E6=B7=BB=E5=8A=A0DCNv2=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=E8=BF=90=E8=A1=8C=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/README.md | 54 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 examples/DCNv2/README.md

diff --git a/examples/DCNv2/README.md b/examples/DCNv2/README.md
new file mode 100644
index 00000000..e9b8a75f
--- /dev/null
+++ b/examples/DCNv2/README.md
@@ -0,0 +1,54 @@
+# DCNv2模型运行说明
+
+## 代码结构
+```shell
+.
+├── config.py  # 模型配置文件
+├── delay_loss_scale.py  # loss缩放函数
+├── main_mxrec.py     # 主函数
+├── model.py          # DCNv2模型
+├── op_impl_mode.ini  # 算子执行模式配置
+├── optimizer.py      # 优化器
+├── README.md         # DCNv2模型运行说明
+└── run.sh            # 运行DCNv2模型的脚本
+```
+
+## 1.准备数据
+参考dlrm模型中criteo_tb目录下的说明文档准备好模型所需要的数据集，放在一个目录下，比如：/data/criteo_tb/。
+
+## 2.准备运行环境
+运行环境可以参考[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
+“安装部署”章节进行准备。
+
+## 3.安装mxRec
+mxRec软件包可以通过[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
+“安装部署”>“环境准备”>“获取软件包”章节提供的链接进行下载，选择自己需要的架构（x86或者arm）的mxRec包。下载完成之后，将mxRec包解压，进入解压后的目录（mindxsdk-mxrec）
+如下：
+```shell
+.
+├── cust_op
+│   └── cust_op_by_addr
+├── examples
+│   ├── DCNv2
+│   ├── demo
+│   └── dlrm
+├── tf1_whl
+│   └── mx_rec-{version}-py3-none-linux_x86_64.whl  # version为版本号
+├── tf2_whl
+│   └── mx_rec-{version}-py3-none-linux_x86_64.whl  # version为版本号
+└── version.info
+```
+其中，tf1_whl和tf2_whl目录下分别是适配tf1和tf2的mxRec软件包，按照自己需要选择其中一个进行安装即可（用pip/pip3 install 软件包这种方式进行安装）。
+确认安装mxRec的目录，比如mxRec安装在 /usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec这个目录下。
+
+## 4.运行DLRM模型
+执行完以上步骤之后，接下来就可以运行DLRM模型，其中run.sh就是运行的脚本，默认是8张卡。其中需要传入5个参数，分别对应：so_path、mx_rec_package_path、hccl_cfg_json、
+dlrm_criteo_data_path和ip。运行命令如：
+```shell
+bash run.sh {so_path} {mx_rec_package_path} {hccl_cfg_json} {dlrm_criteo_data_path} {ip}
+```
+* so_path：so_path是mxRec中动态库的目录，一般在mxRec的安装目录下的libasc目录，比如：/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/libasc。
+* mx_rec_package_path：mx_rec_package_path是mxRec的安装目录，比如：/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec。
+* hccl_cfg_json：hccl_cfg_json是hccl通信配置文件，如果配置了ip参数，这个参数就不用了，直接给一个""空字符串即可。
+* dlrm_criteo_data_path：dlrm_criteo_data_path是数据集所在的目录，比如/data/criteo_tb/。
+* ip：ip是运行模型的机器所在的ip，建议配置。
-- 
Gitee


From 39fa9310b122431bffc75204d7fb8d18343db93f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Tue, 9 Jul 2024 16:10:33 +0800
Subject: [PATCH 270/302] =?UTF-8?q?WideDeep=E6=A0=B7=E4=BE=8B=20README?=
 =?UTF-8?q?=E6=96=87=E6=A1=A3=E5=AE=8C=E5=96=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/{README_WD.md => README.md} | 45 +++++++++++++------
 1 file changed, 31 insertions(+), 14 deletions(-)
 rename examples/WideDeep/{README_WD.md => README.md} (89%)

diff --git a/examples/WideDeep/README_WD.md b/examples/WideDeep/README.md
similarity index 89%
rename from examples/WideDeep/README_WD.md
rename to examples/WideDeep/README.md
index beb592c9..aef2379f 100644
--- a/examples/WideDeep/README_WD.md
+++ b/examples/WideDeep/README.md
@@ -4,7 +4,7 @@
 
 ***
 ## 开源项目链接
-
+Commits on Apr 29, 2022, 提交的SHA-1 hash值（提交ID）：4bbfb492b872c5a3290a2bce1ed5c160162558a3
 ```shell
 https://github.com/ZiyaoGeng/RecLearn
 ```
@@ -41,7 +41,7 @@ python critro.py --data_path data_path --output_path output_path
 
 ```python
 # get txt_list
-split_file_list = get_split_file_path(dataset_path = dataset_path)
+file_split_list = get_split_file_path(dataset_path=data_path)
 ```
 ***
 #### 2. 建立特征映射
@@ -49,7 +49,7 @@ split_file_list = get_split_file_path(dataset_path = dataset_path)
 
 ```python
 # get feature_map
-fea_map = get_fea_map(split_file_list=split_file_list)
+feature_map = get_fea_map(split_file_list=file_split_list)
 ```
 ***
 #### 3. dense_feature分桶离散化
@@ -57,7 +57,7 @@ fea_map = get_fea_map(split_file_list=split_file_list)
 
 ```python
 # dense feature: Bin continuous data into intervals.
-data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, fea_map)
+data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, feature_map)
 ```
 ***
 #### 4. sparse_feature特征映射
@@ -66,7 +66,10 @@ data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, f
 ```python
 # sparse feature: mapping
 for col in sparse_features:
-    data_df[col] = data_df[col].map(lambda x: fea_map[col][x])
+    try:
+        data_df[col] = data_df[col].map(lambda x: feature_map[col][x])
+    except KeyError as er:
+        raise KeyError("Feature {} not found in dataset".format(col)) from er
 ```
 ***
 #### 5. 39个特征增加偏移项
@@ -74,12 +77,14 @@ for col in sparse_features:
 
 ```python
 # add offsets
-slot_size_array = [1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001,
-                   1462, 585, 10131228, 2202609, 307, 25, 12519, 635, 5, 93147, 5685, 8351594, 3196,
-                   29, 14994, 5461307, 12, 5654, 2174, 5, 7046548, 19, 17, 286182, 106, 142573]
+slot_size_array = [
+                1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001,
+                1462, 585, 10131228, 2202609, 307, 25, 12519, 635, 5, 93147, 5685, 8351594, 3196,
+                29, 14994, 5461307, 12, 5654, 2174, 5, 7046548, 19, 17, 286182, 106, 142573
+]
 offset_size_list = np.cumsum([0] + slot_size_array[:-1])
-for j in range(1,len(offset_size_list)+1):
-    data_df.iloc[:, j] += offset_size_list[j-1]
+for col_index in range(1, len(offset_size_list) + 1):
+    data_df.iloc[:, col_index] += offset_size_list[col_index - 1]
 ```
 ***
 #### 6. 数据集格式转换：txt >> tfrecord
@@ -93,13 +98,25 @@ convert_input2tfrd(in_file_path=file, out_file_path=output_path)
 
 ## 模型运行
 
-参考mxrec的`README.md`文件在NPU服务器上配置环境后，可按照[mxrec-tf1](https://ascendhub.huawei.com/#/detail/mxrec-tf1)中DLRM模型运行命令启动模型训练。`so_path`、`mx_rec_package_path`、`hccl_cfg_json`配置不变，根据实际数据集路径配置`dlrm_criteo_data_path`。
+参考mxrec的`README.md`文件在NPU服务器上配置环境并安装镜像创建容器后，可参考DLRM模型运行命令启动模型训练。模型运行脚本是run.sh，运行此脚本需要四个参数：so_path、mx_rec_package_path、hccl_cfg_json以及dlrm_criteo_data_path。其中，   
+- so_path: mxrec中libasc所在路径，在镜像中已经安装过mxrec，所以so_path是：/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/libasc/
+- mx_rec_package_path: mxrec这个包的安装路径，镜像中是：/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/  
+- hccl_cfg_json:  hccl配置文件所在路径，一般是当前路径下的hccl文件
+- dlrm_criteo_data_path: Wide&Deep模型需要的数据所在路径，根据实际情况进行配置
 
+运行mxRec有两种方式，一种是使用hccl配置文件（rank table方案），一种是不使用hccl配置文件（去rank table方案）。
+- 使用hccl配置文件（rank table方案）
 ```shell
-# 运行命令
 bash run.sh {so_path} {mx_rec_package_path} {hccl_cfg_json} {dlrm_criteo_data_path}
 ```
 ***
+- 不使用hccl配置文件（去rank table方案）
+```shell
+bash run.sh {so_path} {mx_rec_package_path} {hccl_cfg_json} {dlrm_criteo_data_path} {IP}
+```
+如：bash run.sh /usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/libasc/ /usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec/ hccl_json_8p.json /dataset 10.10.10.10。  
+**注意:** 去rank table方案，当前路径下不存在hccl文件，模型仍可正常运行。
+
 
 ## 模型结果
 [开源项目](https://github.com/ZiyaoGeng/RecLearn)使用Criteo4500W数据集在GPU上训练模型，结果为`Log Loss=0.4692`、`AUC=0.7930`。适配完成模型后，固定`CACHE_MODE="HBM"`、`USE_FAAE=0`，在`run.sh`中配置其他选项卡，运行结果如下。
@@ -135,8 +152,8 @@ bash run.sh {so_path} {mx_rec_package_path} {hccl_cfg_json} {dlrm_criteo_data_pa
 ***
 ## 模型迁移
 
-**迁移思路：** 在现有已适配好的dlrm模型框架下，改动相关代码逻辑，完成Wide&deep模型的适配。**核心：根据开源项目model代码修改`model.py`；数据处理操作一部分放入`criteo.py`,一部分放入`main_mxrec.py`中`make_batch_and_iterator()`内；`main_mxrec.py`中其他相关代码改动主要是为了适配mxrec提供的相关特性。**  
-
+**迁移思路：** 在现有已适配好的dlrm模型框架下，改动相关代码逻辑，完成Wide&deep模型的适配。**核心：根据开源项目model代码修改`model.py`；数据处理操作一部分放入`criteo.py`,一部分放入`main_mxrec.py`中`make_batch_and_iterator()`内；`main_mxrec.py`中其他相关代码改动主要是为了适配mxrec提供的相关特性。**
+详细改动见https://gitee.com/ascend/mxrec/pulls/171/commits，Commits ID：7a05b033d41af51df9aed7414ad04216dff821cc。  
 下文所提到的`动态扩容`、`动态shape`、`自动改图`、`一表多查`是mxrec提供的相关特性，开关选项见`run.sh`。
 
 ```shell
-- 
Gitee


From 909ace13858217f2812884cd13d0ad8aeaaf7d19 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 9 Jul 2024 16:23:46 +0800
Subject: [PATCH 271/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91LazyAdam=E8=9E=8D?=
 =?UTF-8?q?=E5=90=88=E7=AE=97=E5=AD=90=E6=8F=8F=E8=BF=B0=E4=BF=A1=E6=81=AF?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cust_op/fused_lazy_adam/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cust_op/fused_lazy_adam/README.md b/cust_op/fused_lazy_adam/README.md
index 13ed6994..3cb69f2d 100644
--- a/cust_op/fused_lazy_adam/README.md
+++ b/cust_op/fused_lazy_adam/README.md
@@ -6,7 +6,7 @@
 ├── aclnn_lazy_adam_test  # 单算子测试用例
 ├── lazy_adam.json    # 算子原型配置
 ├── op_host    # LazyAdam融合算子Host侧实现
-├── op_kernel  # LazyAdam融合算子Kernel测实现
+├── op_kernel  # LazyAdam融合算子Kernel侧实现
 ├── README.md  # LazyAdam融合算子说明文档
 └── run.sh     # LazyAdam融合算子安装脚本
 ```
-- 
Gitee


From 14ac6e7f2f7d5b62f9ba4aaae3e57c2082ea036a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Tue, 9 Jul 2024 17:14:57 +0800
Subject: [PATCH 272/302] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/WideDeep/README.md | 5 +++--
 examples/WideDeep/criteo.py | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/WideDeep/README.md b/examples/WideDeep/README.md
index aef2379f..f4815cd9 100644
--- a/examples/WideDeep/README.md
+++ b/examples/WideDeep/README.md
@@ -5,6 +5,7 @@
 ***
 ## 开源项目链接
 Commits on Apr 29, 2022, 提交的SHA-1 hash值（提交ID）：4bbfb492b872c5a3290a2bce1ed5c160162558a3
+commit的链接: https://github.com/ZiyaoGeng/RecLearn/tree/4bbfb492b872c5a3290a2bce1ed5c160162558a3
 ```shell
 https://github.com/ZiyaoGeng/RecLearn
 ```
@@ -68,8 +69,8 @@ data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, f
 for col in sparse_features:
     try:
         data_df[col] = data_df[col].map(lambda x: feature_map[col][x])
-    except KeyError as er:
-        raise KeyError("Feature {} not found in dataset".format(col)) from er
+    except KeyError as e:
+        raise KeyError("Feature {} not found in dataset".format(col)) from e
 ```
 ***
 #### 5. 39个特征增加偏移项
diff --git a/examples/WideDeep/criteo.py b/examples/WideDeep/criteo.py
index 617c76f6..3c8ea430 100644
--- a/examples/WideDeep/criteo.py
+++ b/examples/WideDeep/criteo.py
@@ -248,8 +248,8 @@ if __name__ == '__main__':
         for col in sparse_features:
             try:
                 data_df[col] = data_df[col].map(lambda x: feature_map[col][x])
-            except KeyError as er:
-                raise KeyError("Feature {} not found in dataset".format(col)) from er
+            except KeyError as e:
+                raise KeyError("Feature {} not found in dataset".format(col)) from e
         # dense feature: Bin continuous data into intervals.
         data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, feature_map)
         # add offsets
-- 
Gitee


From 30d416ea128496119c1e95ed43240628727c7ca3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Tue, 9 Jul 2024 20:18:51 +0800
Subject: [PATCH 273/302] =?UTF-8?q?=E6=B7=BB=E5=8A=A0demo=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=E8=BF=90=E8=A1=8C=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/DCNv2/README.md                      |  6 +-
 examples/demo/README.md                       | 13 +++++
 examples/demo/little_demo/README.md           | 56 ++++++++++++++++++
 examples/demo/little_demo_estimator/README.md | 57 +++++++++++++++++++
 4 files changed, 129 insertions(+), 3 deletions(-)
 create mode 100644 examples/demo/README.md
 create mode 100644 examples/demo/little_demo/README.md
 create mode 100644 examples/demo/little_demo_estimator/README.md

diff --git a/examples/DCNv2/README.md b/examples/DCNv2/README.md
index e9b8a75f..f1940ebe 100644
--- a/examples/DCNv2/README.md
+++ b/examples/DCNv2/README.md
@@ -14,7 +14,7 @@
 ```
 
 ## 1.准备数据
-参考dlrm模型中criteo_tb目录下的说明文档准备好模型所需要的数据集，放在一个目录下，比如：/data/criteo_tb/。
+参考DLRM模型中criteo_tb目录下的说明文档准备好模型所需要的数据集，放在一个目录下，比如：/data/criteo_tb/。
 
 ## 2.准备运行环境
 运行环境可以参考[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
@@ -41,8 +41,8 @@ mxRec软件包可以通过[mxRec用户指南](https://www.hiascend.com/document/
 其中，tf1_whl和tf2_whl目录下分别是适配tf1和tf2的mxRec软件包，按照自己需要选择其中一个进行安装即可（用pip/pip3 install 软件包这种方式进行安装）。
 确认安装mxRec的目录，比如mxRec安装在 /usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec这个目录下。
 
-## 4.运行DLRM模型
-执行完以上步骤之后，接下来就可以运行DLRM模型，其中run.sh就是运行的脚本，默认是8张卡。其中需要传入5个参数，分别对应：so_path、mx_rec_package_path、hccl_cfg_json、
+## 4.运行DCNv2模型
+执行完以上步骤之后，接下来就可以运行DCNv2模型，其中run.sh就是运行的脚本，默认是8张卡。其中需要传入5个参数，分别对应：so_path、mx_rec_package_path、hccl_cfg_json、
 dlrm_criteo_data_path和ip。运行命令如：
 ```shell
 bash run.sh {so_path} {mx_rec_package_path} {hccl_cfg_json} {dlrm_criteo_data_path} {ip}
diff --git a/examples/demo/README.md b/examples/demo/README.md
new file mode 100644
index 00000000..931f8c26
--- /dev/null
+++ b/examples/demo/README.md
@@ -0,0 +1,13 @@
+# demo样例说明
+
+## 代码结构
+```shell
+.
+├── little_demo            # sess.run模式的demo
+├── little_demo_estimator  # estimator模式的demo
+└── README.md              # demo样例说明
+```
+
+mxRec提供了一个非常简单的样例模型demo，用于快速体验mxRec。在TensorFlow中，运行模型有sess.run和estimator两种模式。因此，mxRec也提供了两种
+模式下的样例。其中little_demo是sess.run模式的样例；little_demo_estimator是estimator模式的样例。用户可以选择自己需要或者感兴趣的模式进行
+体验，各个模式的样例的说明见对应目录下的README文档。
\ No newline at end of file
diff --git a/examples/demo/little_demo/README.md b/examples/demo/little_demo/README.md
new file mode 100644
index 00000000..dabe105b
--- /dev/null
+++ b/examples/demo/little_demo/README.md
@@ -0,0 +1,56 @@
+# sess.run模式下demo模型运行说明
+
+## 代码结构
+```shell
+.
+├── config.py                 # 模型配置文件
+├── dataset.py                # 生成数据集的脚本
+├── deterministic_loss        # 确定性计算loss样例
+├── main.py                   # 主函数
+├── model.py                  # demo模型
+├── op_impl_mode.ini          # 算子执行模式配置
+├── optimizer.py              # 优化器
+├── random_data_generator.py  # 数据生成器
+├── README.md                 # demo模型运行说明
+├── run_deterministic.sh      # 运行确定性计算的脚本
+├── run_mode.py               # 执行模型train、evaluate和predict的脚本
+└── run.sh                    # demo运行脚本
+```
+
+## 1.准备数据
+demo样例无需从其他地方下载数据集，在demo样例中mxRec会自动生成数据集，详情见dataset.py和random_data_generator.py。
+
+## 2.准备运行环境
+运行环境可以参考[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
+“安装部署”章节进行准备。
+
+## 3.安装mxRec
+mxRec软件包可以通过[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
+“安装部署”>“环境准备”>“获取软件包”章节提供的链接进行下载，选择自己需要的架构（x86或者arm）的mxRec包。下载完成之后，将mxRec包解压，进入解压后的目录（mindxsdk-mxrec）
+如下：
+```shell
+.
+├── cust_op
+│   └── cust_op_by_addr
+├── examples
+│   ├── DCNv2
+│   ├── demo
+│   └── dlrm
+├── tf1_whl
+│   └── mx_rec-{version}-py3-none-linux_x86_64.whl  # version为版本号
+├── tf2_whl
+│   └── mx_rec-{version}-py3-none-linux_x86_64.whl  # version为版本号
+└── version.info
+```
+其中，tf1_whl和tf2_whl目录下分别是适配tf1和tf2的mxRec软件包，按照自己需要选择其中一个进行安装即可（用pip/pip3 install 软件包这种方式进行安装）。
+确认安装mxRec的目录，比如mxRec安装在 /usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec这个目录下。
+
+## 4.运行demo模型
+执行完以上步骤之后，接下来就可以运行demo模型，其中run.sh就是运行的脚本，默认是8张卡。其中需要传入ip这个参数，运行命令如：
+```shell
+bash run.sh main.py {ip}
+```
+* ip：ip是运行模型的机器所在的ip。
+
+**Tips**：run.sh脚本中有一个参数是mx_rec_package_path，mx_rec_package_path是mxRec的安装目录，比如：/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec。
+这个参数在脚本是默认的，用户需要根据自己环境中mxRec实际安装的路径进行配置。
\ No newline at end of file
diff --git a/examples/demo/little_demo_estimator/README.md b/examples/demo/little_demo_estimator/README.md
new file mode 100644
index 00000000..aca25a34
--- /dev/null
+++ b/examples/demo/little_demo_estimator/README.md
@@ -0,0 +1,57 @@
+# estimator模式下demo模型运行说明
+
+## 代码结构
+```shell
+.
+├── config.py                 # 模型配置文件
+├── dataset.py                # 生成数据集的脚本
+├── main.py                   # 主函数
+├── nn_model_build.py         # demo模型
+├── nn_model_input.py         # 定义model_fn
+├── nn_optim.py               # 定义train的各个op
+├── nn_reader.py              # 定义input_fn
+├── op_precision.ini          # 算子执行模式配置
+├── random_data_generator.py  # 数据生成器
+├── README.md                 # demo模型运行说明
+├── run.sh                    # demo运行脚本
+├── tf_adapter.py             # 导入tf adapter
+└── utils.py                  # 公共函数
+```
+
+## 1.准备数据
+demo样例无需从其他地方下载数据集，在demo样例中mxRec会自动生成数据集，详情见dataset.py和random_data_generator.py。
+
+## 2.准备运行环境
+运行环境可以参考[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
+“安装部署”章节进行准备。
+
+## 3.安装mxRec
+mxRec软件包可以通过[mxRec用户指南](https://www.hiascend.com/document/detail/zh/mind-sdk/60rc1/mxRec/mxrecug/mxrecug_0007.html)
+“安装部署”>“环境准备”>“获取软件包”章节提供的链接进行下载，选择自己需要的架构（x86或者arm）的mxRec包。下载完成之后，将mxRec包解压，进入解压后的目录（mindxsdk-mxrec）
+如下：
+```shell
+.
+├── cust_op
+│   └── cust_op_by_addr
+├── examples
+│   ├── DCNv2
+│   ├── demo
+│   └── dlrm
+├── tf1_whl
+│   └── mx_rec-{version}-py3-none-linux_x86_64.whl  # version为版本号
+├── tf2_whl
+│   └── mx_rec-{version}-py3-none-linux_x86_64.whl  # version为版本号
+└── version.info
+```
+其中，tf1_whl和tf2_whl目录下分别是适配tf1和tf2的mxRec软件包，按照自己需要选择其中一个进行安装即可（用pip/pip3 install 软件包这种方式进行安装）。
+确认安装mxRec的目录，比如mxRec安装在 /usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec这个目录下。
+
+## 4.运行demo模型
+执行完以上步骤之后，接下来就可以运行demo模型，其中run.sh就是运行的脚本，默认是8张卡。其中需要传入ip这个参数，运行命令如：
+```shell
+bash run.sh main.py {ip}
+```
+* ip：ip是运行模型的机器所在的ip。
+
+**Tips**：run.sh脚本中有一个参数是mx_rec_package_path，mx_rec_package_path是mxRec的安装目录，比如：/usr/local/python3.7.5/lib/python3.7/site-packages/mx_rec。
+这个参数在脚本是默认的，用户需要根据自己环境中mxRec实际安装的路径进行配置。
\ No newline at end of file
-- 
Gitee


From 38866e896710f3ff873083c02216bb638672aecd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 10 Jul 2024 16:16:29 +0800
Subject: [PATCH 274/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91DDR=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=9C=A8device=E6=B5=8B=E8=BF=90=E8=A1=8C=E8=BE=83?=
 =?UTF-8?q?=E5=BF=AB=E7=9A=84=E6=83=85=E5=86=B5=E4=B8=8B=EF=BC=8Chost?=
 =?UTF-8?q?=E6=B5=8B=E7=94=B3=E8=AF=B7=E5=86=85=E5=AD=98=E5=92=8C=E5=88=9D?=
 =?UTF-8?q?=E5=A7=8B=E5=8C=96=E6=85=A2=EF=BC=8C=E5=AF=BC=E8=87=B4=E6=8A=A5?=
 =?UTF-8?q?=E9=94=99=E9=80=80=E5=87=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
index 46daaf29..3b87e6e6 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
@@ -109,7 +109,7 @@ public:
         fullCv.notify_all();
     }
 
-    BeforePutFuncState GetNewValueToBeInserted(uint64_t& value, uint32_t maxRetry = 1000)
+    BeforePutFuncState GetNewValueToBeInserted(uint64_t& value, uint32_t maxRetry = 10000)
     {
         for (uint32_t i = 0; i < maxRetry; i++) {
             if (BufferBin.pop(value)) {
@@ -252,7 +252,7 @@ public:
     FkvState FindAndPutIfNotFound(uint64_t key, uint64_t& value)
     {
         FkvState ret = MapperBase::FindAndPutIfNotFound(key, value, [&]() {
-            if (HM_UNLIKELY(current_size.load() >= hostVocabSize)) {
+            if (HM_UNLIKELY(current_size.load() > hostVocabSize)) {
                 ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "host does not have enough space");
                 return BeforePutFuncState::BEFORE_NO_SPACE;
             }
-- 
Gitee


From d076f903d3669c8194312b793de1349586f9f1b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Wed, 10 Jul 2024 16:55:41 +0800
Subject: [PATCH 275/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91DDR=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E6=8A=A5=E9=94=99host=E7=A9=BA=E9=97=B4=E4=B8=8D?=
 =?UTF-8?q?=E8=B6=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/embedding_cache/offset_mapper/address_mapper.h   | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
index 3b87e6e6..8b7e4e67 100644
--- a/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
+++ b/src/AccCTR/src/embedding_cache/offset_mapper/address_mapper.h
@@ -109,7 +109,7 @@ public:
         fullCv.notify_all();
     }
 
-    BeforePutFuncState GetNewValueToBeInserted(uint64_t& value, uint32_t maxRetry = 10000)
+    BeforePutFuncState GetNewValueToBeInserted(uint64_t& value, uint32_t maxRetry = 1000)
     {
         for (uint32_t i = 0; i < maxRetry; i++) {
             if (BufferBin.pop(value)) {
@@ -252,8 +252,11 @@ public:
     FkvState FindAndPutIfNotFound(uint64_t key, uint64_t& value)
     {
         FkvState ret = MapperBase::FindAndPutIfNotFound(key, value, [&]() {
-            if (HM_UNLIKELY(current_size.load() > hostVocabSize)) {
-                ock::ExternalLogger::PrintLog(ock::LogLevel::ERROR, "host does not have enough space");
+            if (HM_UNLIKELY(current_size.load() >= hostVocabSize)) {
+                ock::ExternalLogger::PrintLog(
+                    ock::LogLevel::ERROR,
+                    "host does not have enough space, current: " + std::to_string(current_size.load()) +
+                        ", host max size: " + std::to_string(hostVocabSize));
                 return BeforePutFuncState::BEFORE_NO_SPACE;
             }
             return emExpendMemInfoPtr->GetNewValueToBeInserted(value);
-- 
Gitee


From db89f0016478fcf3f9bde8481d0ae8ad4a1cb934 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=95=E9=9C=96?= <helin_1@163.com>
Date: Fri, 12 Jul 2024 18:05:31 +0800
Subject: [PATCH 276/302] =?UTF-8?q?=E4=BF=AE=E6=94=B9mxRec=20README?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index f6bfb828..5a2d9c03 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ mxRec作为面向互联网市场搜索推荐广告的应用使能SDK产品，对
 
 ## 安装方式
 
-安装前，请参考《CANN 软件安装指南》安装CANN开发套件软件包和TensorFlow适配昇腾插件。
+安装前，请参考[CANN 软件安装指南](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha003/softwareinst/instg/instg_0022.html)安装CANN开发套件软件包和TensorFlow适配昇腾插件。
 
 CANN软件提供进程级环境变量设置脚本，供用户在进程中引用，以自动完成环境变量设置。用户进程结束后自动失效。可在程序启动的Shell脚本中使用如下命令设置CANN的相关环境变量，也可通过命令行执行如下命令（以root用户默认安装路径“/usr/local/Ascend”为例）：
 ```shell
@@ -65,12 +65,34 @@ bash run.sh
 
 将pybind11和securec的压缩包放在与mxRec代码同级的opensource目录下，并且将其分别更名为pybind11-2.10.3.zip、huaweicloud-sdk-c-obs-3.23.9.zip。如果没有opensource目录，则需要在mxRec同级的目录下手动创建opensource目录，然后将pybind11和securec的压缩包放在opensource目录下。
 
-为了构建多个版本的whl包，编译脚本在python虚拟环境完成对应tensorflow版本的安装。用户可以根据实际情况调整编译脚本，指定tensorflow的安装路径。编译方法：
+由于构建脚本需要适配内部构建工程，所以在脚本中存在适配代码，但是这些代码可能对于用户来说不需要，所以在编译之前需要做如下处理：
+
+在build目录中存在build_tf1.sh和build_tf2.sh，其中分别存在如下代码：
+```shell
+# 配置tf1路径
+source /opt/buildtools/tf1_env/bin/activate
+tf1_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow_core
+deactivate tf1_env
+```
+```shell
+# 配置tf2路径
+source /opt/buildtools/tf2_env/bin/activate
+tf2_path=$(dirname "$(dirname "$(which python3.7)")")/lib/python3.7/site-packages/tensorflow
+deactivate tf2_env
+```
+
+可以看到，上述代码中都有激活Python虚拟环境的步骤，因此用户有两种选择：
+
+1. 根据需要在/opt/buildtools/目录下（没有此目录需要先创建）创建tf1_env和tf2_env两个Python虚拟环境，并在虚拟环境中安装对应版本的Tensorflow
+2. 将source /opt/buildtools/tf1_env/bin/activate和deactivate tf1_env注释掉或者删除或者source /opt/buildtools/tf2_env/bin/activate和deactivate tf2_env注释掉或者删除
+
+
+编译方法：
 
 进入mxRec代码目录：
-- setup.py：执行脚本setup.py，比如：**python3.7 setup.py**完成tf1和tf2版本whl包的构建和打包，构建成功后，whl包在build/mindxsdk-mxrec/目录下，其中tf1_whl和tf2_whl目录下存在对应的whl包。执行脚本前，请参考build/build_tf1.sh、build/build_tf2.sh创建对应的虚拟环境，在虚拟环境中完成对应tensorflow版本的安装，并修改对应的激活命令。
-- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**完成tf1版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf1_whl子目录下。执行脚本前，请参考build/build_tf1.sh创建tf1虚拟环境，在虚拟环境中完成tensorflow 1.15.0版本的安装，并修改对应的激活命令。
-- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**完成tf2版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf2_whl子目录下。执行脚本前，请参考build/build_tf2.sh创建tf2虚拟环境，在虚拟环境中完成tensorflow 2.6.5版本的安装，并修改对应的激活命令。
+- setup.py：此脚本供内部使用，用于同时构建tf1和tf2的mxRec包，用户通常只需要其中一个，所以建议使用下面两个脚本构建。
+- setup_tf1.py：执行脚本setup_tf1.py，比如：**python3.7 setup_tf1.py bdist_wheel**完成tf1版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf1_whl子目录下。
+- setup_tf2.py：执行脚本setup_tf2.py，比如：**python3.7 setup_tf2.py bdist_wheel**完成tf2版本whl包的构建，构建成功后，whl包在build/mindxsdk-mxrec/tf2_whl子目录下。
 
 如需使用动态扩容功能，进入“./cust_op/cust_op_by_addr”目录中。参考以下命令编译并安装动态扩容算子包。
 ```shell
-- 
Gitee


From c2d469d400a520846808d08ad7cb1016c0e462ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Fri, 12 Jul 2024 18:20:22 +0800
Subject: [PATCH 277/302] =?UTF-8?q?Little=20demo=E6=A8=A1=E5=9E=8Bestimato?=
 =?UTF-8?q?r=E6=A8=A1=E5=BC=8FDDR=E4=BF=9D=E5=AD=98=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=EF=BC=9B=E9=97=A8=E7=A6=81=E6=B5=8B=E8=AF=95?=
 =?UTF-8?q?=E7=94=A8=E4=BE=8B=E4=BF=AE=E6=94=B9=EF=BC=9B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py                |  9 +++++++++
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 10 +++++++++-
 tests/mx_rec/saver/test_saver.py     |  6 +++---
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index a6362506..f7ba8f03 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -127,6 +127,15 @@ class Saver(object):
         save_path = save_path if save_path else self._prefix_name
         directory, base_name = os.path.split(save_path)
 
+        # skip save in step-0, cause host skip save in step-0 EmbeddingDDR::Save SyncLatestEmbedding
+        try:
+            step_in_name = int(base_name.split("-")[-1])
+            if step_in_name == 0:
+                return
+        except ValueError as err:
+            raise ValueError(f"The base_name {base_name} needs to include save_step message "
+                             f"eg: mode-100") from err
+
         if global_step:
             if not isinstance(global_step, compat.integral_types):
                 global_step = int(sess.run(global_step))
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 3eb99685..bcc3a2a5 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -499,7 +499,15 @@ void HybridMgmt::EvalTask(TaskType type)
                       hybridMgmtBlock->IsNeedWaitSave());
             std::unique_lock<std::mutex> checkSaveLocker(saveMutex);
             cvCheckSave.wait(checkSaveLocker, [this] { return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy; });
-            hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
+
+            if (hybridMgmtBlock->pythonBatchID[EVAL_CHANNEL_ID] >= hybridMgmtBlock->hybridBatchId[EVAL_CHANNEL_ID])
+            {
+                hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
+            } else {
+                std::this_thread::sleep_for(SLEEP_MS);
+                continue;
+            }
+
             LOG_DEBUG("wake TrainTask");
             hybridMgmtBlock->DoBlock(channelId);
         }
diff --git a/tests/mx_rec/saver/test_saver.py b/tests/mx_rec/saver/test_saver.py
index bcfa0948..53066038 100644
--- a/tests/mx_rec/saver/test_saver.py
+++ b/tests/mx_rec/saver/test_saver.py
@@ -61,18 +61,18 @@ class TestSaver(unittest.TestCase):
             self.saver = Saver()
 
         with tf.compat.v1.Session(graph=self.graph) as sess:
-            embedding_directory = "./sparse-model/test_table/embedding"
+            embedding_directory = "./sparse-model-1/test_table/embedding"
             data_file = os.path.join(embedding_directory, "slice.data")
             attribute_file = os.path.join(embedding_directory, "slice.attribute")
             sess.run(tf.global_variables_initializer())
             origin_embedding = sess.run(self.var)[[0, 1, 4, 6, 8], :]
 
-            self.saver.save(sess)
+            self.saver.save(sess, save_path="model-1")
             self.assertTrue(os.path.exists(embedding_directory), "embedding目录已创建")
             self.assertTrue(os.path.exists(data_file), "embedding的data文件存储成功")
             self.assertTrue(os.path.exists(attribute_file), "embedding的attribute文件存储成功")
 
-            tf.io.gfile.rmtree("./sparse-model")
+            tf.io.gfile.rmtree("./sparse-model-1")
 
     def build_graph(self):
         self.graph = tf.compat.v1.Graph()
-- 
Gitee


From 6775ab93f0b004a6bbe15ce0d56f58da5df35745 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Fri, 12 Jul 2024 18:35:54 +0800
Subject: [PATCH 278/302] =?UTF-8?q?=E6=8B=BC=E5=86=99=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index bcc3a2a5..737cdb1d 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -500,7 +500,7 @@ void HybridMgmt::EvalTask(TaskType type)
             std::unique_lock<std::mutex> checkSaveLocker(saveMutex);
             cvCheckSave.wait(checkSaveLocker, [this] { return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy; });
 
-            if (hybridMgmtBlock->pythonBatchID[EVAL_CHANNEL_ID] >= hybridMgmtBlock->hybridBatchId[EVAL_CHANNEL_ID])
+            if (hybridMgmtBlock->pythonBatchId[EVAL_CHANNEL_ID] >= hybridMgmtBlock->hybridBatchId[EVAL_CHANNEL_ID])
             {
                 hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
             } else {
-- 
Gitee


From cb43c6a8da89f2a25118df6d68631eec9549998d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Fri, 12 Jul 2024 19:05:56 +0800
Subject: [PATCH 279/302] =?UTF-8?q?cleancode=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 737cdb1d..93954401 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -500,9 +500,8 @@ void HybridMgmt::EvalTask(TaskType type)
             std::unique_lock<std::mutex> checkSaveLocker(saveMutex);
             cvCheckSave.wait(checkSaveLocker, [this] { return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy; });
 
-            if (hybridMgmtBlock->pythonBatchId[EVAL_CHANNEL_ID] >= hybridMgmtBlock->hybridBatchId[EVAL_CHANNEL_ID])
-            {
-                hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
+            if (hybridMgmtBlock->pythonBatchId[EVAL_CHANNEL_ID] >= hybridMgmtBlock->hybridBatchId[EVAL_CHANNEL_ID]) {
+                hybridMgmtBlockgi->Wake(TRAIN_CHANNEL_ID);
             } else {
                 std::this_thread::sleep_for(SLEEP_MS);
                 continue;
-- 
Gitee


From d2ba56b47391194d99c807df5bd8879437cb6418 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Fri, 12 Jul 2024 19:20:39 +0800
Subject: [PATCH 280/302] =?UTF-8?q?=E6=8B=BC=E5=86=99=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 93954401..cab348ba 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -501,7 +501,7 @@ void HybridMgmt::EvalTask(TaskType type)
             cvCheckSave.wait(checkSaveLocker, [this] { return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy; });
 
             if (hybridMgmtBlock->pythonBatchId[EVAL_CHANNEL_ID] >= hybridMgmtBlock->hybridBatchId[EVAL_CHANNEL_ID]) {
-                hybridMgmtBlockgi->Wake(TRAIN_CHANNEL_ID);
+                hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
             } else {
                 std::this_thread::sleep_for(SLEEP_MS);
                 continue;
-- 
Gitee


From 33c03cadd19b48fb11daaac4045925bd13a4236f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 15 Jul 2024 09:49:14 +0800
Subject: [PATCH 281/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91DDR=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=81=B6=E5=8F=91=E6=8A=A5=E9=94=99=E7=A9=BA=E9=97=B4?=
 =?UTF-8?q?=E4=B8=8D=E8=B6=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 41 ++++++++++++++++++----------
 src/core/hybrid_mgmt/hybrid_mgmt.h   |  2 +-
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 3eb99685..73c30e13 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -959,30 +959,43 @@ void HybridMgmt::LookUpAndRemoveAddrs(const EmbTaskInfo& info)
 }
 
 // DDR
-void HybridMgmt::LookUpSwapAddrs(const string& embName, const string& swapStr)
+void HybridMgmt::LookUpSwapAddrs(const string& embName)
 {
     int id = 0;
-    std::string swapName = embName + swapStr;
+    std::string swapInName = embName + SWAP_IN_STR;
+    std::string swapOutName = embName + SWAP_OUT_STR;
+    vector<float*> addrs;
     while (isRunning && lookupAddrSuccess) {
-        std::vector<uint64_t> keys = HBMSwapKeyQue[swapName].WaitAndPop();
         if (!isRunning) {
             return;
         }
-        vector<float*> addrs;
-        TimeCost lookupAddrsTC;
+        // swap in
+        std::vector<uint64_t> keys = HBMSwapKeyQue[swapInName].WaitAndPop();
+        TimeCost lookupAddrsInTC;
         int rc = embCache->EmbeddingLookupAddrs(embName, keys, addrs);
         if (rc != H_OK) {
             lookupAddrSuccess = false;
             throw runtime_error("EmbeddingLookupAddrs failed! error code: " + std::to_string(rc));
         }
-        LOG_DEBUG("table:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsTC(ms):{}", embName,
-                  swapStr, keys.size(), addrs.size(), id, lookupAddrsTC.ElapsedMS());
-        HBMSwapAddrsQue[swapName].Pushv(addrs);
-        if (swapStr == SWAP_IN_STR) {
-            lookUpSwapInAddrsPushId[embName]++;
-            LOG_DEBUG("LookUpSwapAddrs, table:{}, pushId:{}, lookUpSwapInAddrsPushId:{}", embName, id,
-                      lookUpSwapInAddrsPushId[embName]);
+        LOG_DEBUG("table:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsInTC(ms):{}", embName,
+                  SWAP_IN_STR, keys.size(), addrs.size(), id, lookupAddrsInTC.ElapsedMS());
+        HBMSwapAddrsQue[swapInName].Pushv(addrs);
+
+        lookUpSwapInAddrsPushId[embName]++;
+        LOG_DEBUG("LookUpSwapAddrs, table:{}, pushId:{}, lookUpSwapInAddrsPushId:{}", embName, id,
+                  lookUpSwapInAddrsPushId[embName]);
+
+        // swap out
+        keys = HBMSwapKeyQue[swapOutName].WaitAndPop();
+        TimeCost lookupAddrsOutTC;
+        rc = embCache->EmbeddingLookupAddrs(embName, keys, addrs);
+        if (rc != H_OK) {
+            lookupAddrSuccess = false;
+            throw runtime_error("EmbeddingLookupAddrs failed! error code: " + std::to_string(rc));
         }
+        LOG_DEBUG("table:{}, swapStr:{}, keys.size:{}, addrs.size:{}, pushId:{}, lookupAddrsOutTC(ms):{}", embName,
+                  SWAP_OUT_STR, keys.size(), addrs.size(), id, lookupAddrsOutTC.ElapsedMS());
+        HBMSwapAddrsQue[swapOutName].Pushv(addrs);
         id++;
     }
 }
@@ -1242,9 +1255,7 @@ void HybridMgmt::InitDataPipelineForDDR(const string& embName)
     // 初始化lookup线程
     lookUpSwapInAddrsPushId[embName];  // 此处初始化，避免多线程竞争导致计数错误
     lookUpSwapInAddrsThreads.emplace_back(
-        std::async(std::launch::async, [=] { LookUpSwapAddrs(embName, SWAP_IN_STR); }));
-    lookUpSwapOutAddrsThreads.emplace_back(
-        std::async(std::launch::async, [=] { LookUpSwapAddrs(embName, SWAP_OUT_STR); }));
+        std::async(std::launch::async, [=] { LookUpSwapAddrs(embName); }));
 
     LOG_DEBUG("data pipeline for ddr init");
 }
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index ab34b19f..57a7ddd1 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -187,7 +187,7 @@ public:
 
     void LookUpAndRemoveAddrs(const EmbTaskInfo& info);  // L3Storage, synchronous
 
-    void LookUpSwapAddrs(const std::string& embName, const std::string& swapStr);  // DDR, asynchronous
+    void LookUpSwapAddrs(const std::string& embName);  // DDR, asynchronous
 
     void EmbeddingTask();
 
-- 
Gitee


From 5592a8e616f1ca0e98873b2ac84ec94fdeb20fc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=B9=B8=E8=BF=90?= <luoqianke@126.com>
Date: Mon, 15 Jul 2024 17:19:33 +0800
Subject: [PATCH 282/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91DDR=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=E5=81=B6=E5=8F=91=E6=8A=A5=E9=94=99=E7=A9=BA=E9=97=B4?=
 =?UTF-8?q?=E4=B8=8D=E8=B6=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/core/hybrid_mgmt/hybrid_mgmt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 9921fe27..f8ad9216 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -971,7 +971,7 @@ void HybridMgmt::LookUpSwapAddrs(const string& embName)
     int id = 0;
     std::string swapInName = embName + SWAP_IN_STR;
     std::string swapOutName = embName + SWAP_OUT_STR;
-    vector<float*> addrs;
+    std::vector<float*> addrs;
     while (isRunning && lookupAddrSuccess) {
         if (!isRunning) {
             return;
-- 
Gitee


From 45f3fe4365341c6024d52c589c77bb9af41e5248 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Mon, 15 Jul 2024 22:20:31 +0800
Subject: [PATCH 283/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91LazyAdam=E8=9E=8D?=
 =?UTF-8?q?=E5=90=88=E7=AE=97=E5=AD=90=E6=96=B0=E7=89=88=E6=9C=ACCANN?=
 =?UTF-8?q?=E7=BC=96=E8=AF=91=E5=A4=B1=E8=B4=A5=E4=BF=AE=E6=94=B9=EF=BC=9B?=
 =?UTF-8?q?=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91=E5=90=8C=E6=AD=A5py?=
 =?UTF-8?q?=E8=84=9A=E6=9C=AC=EF=BC=9B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_lazy_adam_test/scripts/gen_data.py                | 2 +-
 cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp             | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
index 6e07f836..6e8c9251 100644
--- a/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
+++ b/cust_op/fused_lazy_adam/aclnn_lazy_adam_test/scripts/gen_data.py
@@ -121,7 +121,7 @@ def _gen_golden_data():
     update_v = beta2 * old_v_slice + (1 - beta2) * np.square(gradient)
     out_v = _scatter_nd_update(input_v, indices, update_v)
 
-    denominator_slice = np.sqrt(update_v) + epsilon
+    denominator_slice = np.sqrt(np.abs(update_v)) + epsilon
     update_var = np.divide(-lr * update_m, denominator_slice)
     out_var = _scatter_nd_add(input_var, indices, update_var)
 
diff --git a/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp b/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
index 76164e50..e0ad8e45 100644
--- a/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
+++ b/cust_op/fused_lazy_adam/op_kernel/lazy_adam.cpp
@@ -176,6 +176,7 @@ private:
         this->updateV = localVSlice + this->updateV;
 
         // 计算Var
+        Abs(this->updateV, this->updateV, row * this->dim2);
         Sqrt(this->updateVar, this->updateV, row * this->dim2);
         Adds(this->updateVar, this->updateVar, this->epsilon, row * this->dim2);
         Muls(this->temp, this->updateM, -this->lr, row * this->dim2);
@@ -233,5 +234,10 @@ extern "C" __global__ __aicore__ void lazy_adam(GM_ADDR gradient, GM_ADDR indice
               tiling_data.row, tiling_data.indicesAllocSize, tiling_data.otherAllocSize, tiling_data.batch,
               tiling_data.loopCount, tiling_data.rowLeft, tiling_data.loopCountTail, tiling_data.rowLeftTail,
               tiling_data.coreNum);
+#ifdef KERNEL_TASK_TYPE_DEFAULT
+    // Set kernel type with new versions of CANN to avoid matmul error during compiling.
+    // In previous versions of CANN, avoid matmul error by using '#ifndef __GET_CODE_CHANNEL__'.
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_AIV_ONLY);
+#endif
     op32.Process();
 }
\ No newline at end of file
-- 
Gitee


From 1e9e773c32f67ff466893976f5b748ac217947c0 Mon Sep 17 00:00:00 2001
From: longfeifei <962977793@qq.com>
Date: Mon, 15 Jul 2024 14:20:33 +0800
Subject: [PATCH 284/302] =?UTF-8?q?estimator=E4=B8=ADtrain=E5=88=87?=
 =?UTF-8?q?=E6=8D=A2=E4=B8=BAeval,=E5=A2=9E=E5=8A=A0=E5=8E=9Fhost=E4=BE=A7?=
 =?UTF-8?q?train=E7=9A=84=E7=9B=B8=E5=85=B3=E7=8A=B6=E6=80=81=E5=A4=87?=
 =?UTF-8?q?=E4=BB=BD=EF=BC=8C=E5=9C=A8eval=E5=88=87=E6=8D=A2=E4=B8=BAtrain?=
 =?UTF-8?q?=E5=90=8E=E8=BF=9B=E8=A1=8C=E8=BF=98=E5=8E=9F=E5=A4=87=E4=BB=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/AccCTR/src/common/util/error_code.h       |  1 +
 .../cache_manager/cache_manager.cpp           | 57 ++++++++++++--
 .../cache_manager/cache_manager.h             |  8 +-
 src/AccCTR/src/embedding_cache/limited_set.h  | 19 +++--
 src/AccCTR/src/include/embedding_cache.h      | 18 +++--
 src/core/emb_table/embedding_ddr.cpp          | 11 ++-
 src/core/emb_table/embedding_mgmt.h           |  5 +-
 src/core/hybrid_mgmt/hybrid_mgmt.cpp          | 30 +++++---
 src/core/hybrid_mgmt/hybrid_mgmt.h            |  2 +-
 src/core/l3_storage/cache_manager.cpp         | 74 ++++++++++++++++++-
 src/core/l3_storage/cache_manager.h           | 11 +++
 .../ock_ctr_common/include/embedding_cache.h  | 18 +++--
 12 files changed, 200 insertions(+), 54 deletions(-)

diff --git a/src/AccCTR/src/common/util/error_code.h b/src/AccCTR/src/common/util/error_code.h
index b30bfd83..87c8ffe6 100644
--- a/src/AccCTR/src/common/util/error_code.h
+++ b/src/AccCTR/src/common/util/error_code.h
@@ -43,6 +43,7 @@ using CTRCode = enum : int {
     H_TABLE_NAME_EMPTY = 22,
     H_PREFILL_BUFFER_SIZE_INVALID = 23,
     H_TABLE_NAME_TOO_LONG = 24,
+    H_EMB_CACHE_INFO_LOST = 25
 };
 }
 }
diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
index 68351328..52578820 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.cpp
@@ -253,8 +253,7 @@ int EmbCacheManagerImpl::ExportDeviceKeyOffsetPairs(const std::string& tableName
     if (checkTableNameRet != H_OK) {
         return checkTableNameRet;
     }
-    OffsetMapper& om = offsetMappers[tableName];
-    koVec = om.ExportSortedKVPairs();
+    koVec = offsetMappers[tableName].ExportSortedKVPairs();
     return H_OK;
 }
 
@@ -318,30 +317,58 @@ int EmbCacheManagerImpl::LoadEmbTableInfos(std::string tableName, const std::vec
     return H_OK;
 }
 
-int EmbCacheManagerImpl::BackUpTrainStatus(std:string tableName)
+int EmbCacheManagerImpl::BackUpTrainStatus(const std::string& tableName)
 {
     int checkTableNameRet = CheckValidTableName(tableName);
     if (checkTableNameRet != H_OK) {
         return checkTableNameRet;
     }
 
+    // Back up the key-offset correspondence on the device
+    kvVecsBackUp[tableName] = offsetMappers[tableName].ExportVec();
+
+    auto embInfo = embCacheInfos.find(tableName);
+    if (embInfo == embCacheInfos.end()) {
+        return H_EMB_CACHE_INFO_LOST;
+    }
+    uint32_t reserve = embInfo->second.maxCacheSize / VOCAB_CACHE_RATIO;
+    uint32_t maxCacheSize = embInfo->second.maxCacheSize;
+
     auto om = offsetMappersBackUp.find(tableName);
     if (om != offsetMappersBackUp.end()) {
-        offsetMappersBackUp[tableName] = offsetMappers[tableName];
-    } else{
-        offsetMappersBackUp[tableName].Initialize(1000, 1000);
-        offsetMappersBackUp[tableName] = offsetMappers[tableName];
+        offsetMappersBackUp[tableName].UnInitialize();
     }
+    offsetMappersBackUp[tableName].Initialize(reserve, maxCacheSize);
+    offsetMappersBackUp[tableName] = offsetMappers[tableName];
+
     return H_OK;
 }
 
-int EmbCacheManagerImpl::RecoverTrainStatus(std:string tableName)
+int EmbCacheManagerImpl::RecoverTrainStatus(const std::string& tableName)
 {
     int checkTableNameRet = CheckValidTableName(tableName);
     if (checkTableNameRet != H_OK) {
         return checkTableNameRet;
     }
+
+    auto embInfo = embCacheInfos.find(tableName);
+    if (embInfo == embCacheInfos.end()) {
+        return H_EMB_CACHE_INFO_LOST;
+    }
+    uint32_t reserve = embInfo->second.maxCacheSize / VOCAB_CACHE_RATIO;
+    uint32_t maxCacheSize = embInfo->second.maxCacheSize;
+
+    offsetMappers[tableName].UnInitialize();
+    offsetMappers[tableName].Initialize(reserve, maxCacheSize);
     offsetMappers[tableName] = offsetMappersBackUp[tableName];
+
+    // Recover the key-offset correspondence on the device
+    auto kvVecBackUp = kvVecsBackUp[tableName];
+    for (const auto& kvPair: kvVecBackUp) {
+        offsetMappers[tableName].Put(kvPair.first, kvPair.second);
+    }
+
+    kvVecBackUp.clear();
     return H_OK;
 }
 
@@ -449,3 +476,17 @@ uint32_t EmbCacheManagerImpl::GetUsage(const std::string& tableName)
 {
     return embTables[tableName].GetUsage();
 }
+
+int EmbCacheManagerImpl::ResetOffsetMappers()
+{
+    for (auto it = offsetMappers.begin(); it != offsetMappers.end(); it++)  {
+        auto embInfo = embCacheInfos.find(it->first);
+        if (embInfo == embCacheInfos.end()) {
+            return H_EMB_CACHE_INFO_LOST;
+        }
+        it->second.UnInitialize();
+        uint32_t reserve = embInfo->second.maxCacheSize / VOCAB_CACHE_RATIO;
+        it->second.Initialize(reserve, embInfo->second.maxCacheSize);
+    }
+    return H_OK;
+}
diff --git a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
index 359e88ad..e4a240ae 100644
--- a/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
+++ b/src/AccCTR/src/embedding_cache/cache_manager/cache_manager.h
@@ -73,8 +73,11 @@ public:
                           const std::vector<std::vector<float>>& embeddings,
                           const std::vector<std::vector<float>>& optimizerSlots) override;
 
-    int BackUpTrainStatus(std:string tableName) override;
-    int RecoverTrainStatus(std::string tableName) override;
+    int BackUpTrainStatus(const std::string& tableName) override;
+
+    int RecoverTrainStatus(const std::string& tableName) override;
+
+    int ResetOffsetMappers() override;
 
     uint32_t GetUsage(const std::string& tableName) override;
 
@@ -83,6 +86,7 @@ private:
     std::map<std::string, OffsetMapper> offsetMappers;
     std::map<std::string, OffsetMapper> offsetMappersBackUp;
     std::map<std::string, EmbLocalTable> embTables;
+    std::map<std::string, std::vector<std::pair<uint64_t, uint64_t>>> kvVecsBackUp;
 
     int CheckValidTableName(const std::string& tableName);
 
diff --git a/src/AccCTR/src/embedding_cache/limited_set.h b/src/AccCTR/src/embedding_cache/limited_set.h
index d44b615a..f7bc2e1e 100644
--- a/src/AccCTR/src/embedding_cache/limited_set.h
+++ b/src/AccCTR/src/embedding_cache/limited_set.h
@@ -20,19 +20,21 @@ limitations under the License.
 
 namespace EmbCache {
 
+static constexpr int64_t NODE_DEFAULT_VALUE = -1;
+
 class LimitedSet {
 public:
     struct Node {
         uint64_t value;
         Node *prev, *next;
-        Node(uint64_t val = -1) : value(val), prev(nullptr), next(nullptr) {}
+        Node(uint64_t val = NODE_DEFAULT_VALUE) : value(val), prev(nullptr), next(nullptr) {}
     };
 
-    LimitedSet(uint64_t maxRange) : head(new Node(-1)), tail(new Node(-1))
+    LimitedSet(uint64_t maxRange) : head(new Node(NODE_DEFAULT_VALUE)), tail(new Node(NODE_DEFAULT_VALUE))
     {
         nodes.resize(maxRange);
         for (auto &node : nodes) {
-            node = new Node(-1);
+            node = new Node(NODE_DEFAULT_VALUE);
         }
         head->next = tail;
         tail->prev = head;
@@ -47,19 +49,16 @@ public:
         delete tail;
     }
 
-    // 拷贝构造函数
-    LimitedSet(const LimitedSet& other): head(new Node(-1)), tail(new Node(-1))
+    LimitedSet(const LimitedSet& other): head(new Node(NODE_DEFAULT_VALUE)), tail(new Node(NODE_DEFAULT_VALUE))
     {
         nodes.resize(other.nodes.size());
-        for (auto &node: nodes) {
-            node = new Node(-1);
+        for (auto& node: nodes) {
+            node = new Node(NODE_DEFAULT_VALUE);
         }
 
-        // 初始化头尾节点
         head->next = tail;
         tail->prev = head;
 
-        // 遍历原vector的每一个节点并复制
         for (Node* node = other.head->next; node != other.tail; node = node->next) {
             insert(node->value);
         }
@@ -87,7 +86,7 @@ public:
         Node *node = nodes[value];
         node->prev->next = node->next;
         node->next->prev = node->prev;
-        node->value = -1;
+        node->value = NODE_DEFAULT_VALUE;
     }
 
     bool find(uint64_t value)
diff --git a/src/AccCTR/src/include/embedding_cache.h b/src/AccCTR/src/include/embedding_cache.h
index 40d9dcbe..c0468549 100644
--- a/src/AccCTR/src/include/embedding_cache.h
+++ b/src/AccCTR/src/include/embedding_cache.h
@@ -317,18 +317,24 @@ public:
                                   const std::vector<std::vector<float>>& optimizerSlots) = 0;
 
     /* *
-     * train通道切换为eval, 备份当前表的offsetMapper对象, 存储下当前train对应的devices上key的状态
-     * @Param tableName: 需要加载信息的table名字
+     * When switch the channel to eval, backup the current table's offsetMapper object.
+     * @Param tableName: embedding table name
      * @Return errorCode
      */
-     virtual int BackUpTrainStatus(std::string tableName) = 0;
+    virtual int BackUpTrainStatus(const std::string& tableName) = 0;
 
     /* *
-     * eval通道切换为train, 将当前表的offsetMapper对象还原成备份的train对应的的device上key的状态
-     * @Param tableName: 需要加载信息的table名字
+     * When switch the eval channel back to train, Recover the current table's offsetMapper object to the backup state.
+     * @Param tableName: embedding table name
+     * @Return errorCode
+     */
+    virtual int RecoverTrainStatus(const std::string& tableName) = 0;
+
+    /* *
+     * Reset the offsetMapper object to revert to its initialized state after loading.
      * @Return errorCode
      */
-     virtual int RecoverTrainStatus(std::string tableName) = 0;
+    virtual int ResetOffsetMappers() = 0;
 };
 }  // namespace EmbCache
 
diff --git a/src/core/emb_table/embedding_ddr.cpp b/src/core/emb_table/embedding_ddr.cpp
index 82ca0b73..d05b3501 100644
--- a/src/core/emb_table/embedding_ddr.cpp
+++ b/src/core/emb_table/embedding_ddr.cpp
@@ -78,6 +78,11 @@ void EmbeddingDDR::Load(const string& savePath, map<string, unordered_set<emb_ca
     }
 
     trainKeySet[name].insert(keys.cbegin(), keys.cend());
+    // Reset the offsetMapper object to revert to its initialized state after loading
+    auto rs = embCache->ResetOffsetMappers();
+    if (rs != 0) {
+        throw runtime_error("embCache->ResetOffsetMappers failed, err code: " + to_string(rc));
+    }
 }
 
 void EmbeddingDDR::LoadKey(const string &savePath, vector<emb_cache_key_t> &keys)
@@ -187,15 +192,13 @@ void EmbeddingDDR::LoadOptimizerSlot(const string &savePath, vector<vector<float
 
 void EmbeddingDDR::Save(const string& savePath)
 {
+    SyncLatestEmbedding();
     vector<emb_cache_key_t> keys;
     vector<vector<float>> embeddings;
     vector<vector<float>> optimizerSlots;
 
     auto step = GetStepFromPath(savePath);
-    if (step > 0) {
-        SyncLatestEmbedding();
-        embCache->GetEmbTableInfos(name, keys, embeddings, optimizerSlots);
-    }
+    embCache->GetEmbTableInfos(name, keys, embeddings, optimizerSlots);
 
     SaveKey(savePath, keys);
     SaveEmbedding(savePath, embeddings);
diff --git a/src/core/emb_table/embedding_mgmt.h b/src/core/emb_table/embedding_mgmt.h
index 7cd3f782..9dd0e363 100644
--- a/src/core/emb_table/embedding_mgmt.h
+++ b/src/core/emb_table/embedding_mgmt.h
@@ -90,16 +90,15 @@ public:
     void Save(const string& filePath);
 
     /**
-     * estimator模式下train切换为eval时， 备份所有表train的状态
+     * In estimator mode, when switching from train to eval, backup the training state of all tables.
      */
     void BackUpTrainStatusBeforeLoad();
 
     /**
-     * estimator模式下eval切换为train时， 还原所有表train的状态
+     * In estimator mode, when switching from eval to train, recover the training state of all tables.
      */
     void RecoverTrainStatus();
 
-
     /**
     * 获取所有表对应的DeviceOffsets，该偏移用于python侧保存embedding时抽取key对应的embedding
     */
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.cpp b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
index 84195a3c..91750b65 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.cpp
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.cpp
@@ -206,12 +206,6 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
         throw runtime_error("HybridMgmt not initialized. Call Initialize first.");
     }
 
-    if (mgmtRankInfo.isDDR && IsTrainAndEvalCase()) {
-        LOG_INFO("estimator train and eval case, skip loading, "
-                 "host will reuse data in memory while evaluating since is's same as saved data");
-        return true;
-    }
-
     // 数据处理线程上锁
     KEY_PROCESS_INSTANCE->LoadSaveLock();
 
@@ -257,10 +251,15 @@ bool HybridMgmt::Load(const string& loadPath, vector<string> warmStartTables)
         featAdmitNEvict.LoadHistoryRecords(loadData.histRec);
     }
 
+    int& theTrainBatchId = hybridMgmtBlock->hybridBatchId[TRAIN_CHANNEL_ID];
     if (isL3StorageEnabled) {
         LOG_DEBUG(MGMT + "Start host side load: L3Storage key freq map");
         auto step = GetStepFromPath(loadPath);
-        cacheManager->Load(mgmtEmbInfo, step, trainKeysSet);
+        // When in load and train mode or predict mode, SSD needs to actually execute loading
+        // When in the train and eval modes, loading before eval should be directly skipped
+        if (theTrainBatchId == 0) {
+            cacheManager->Load(mgmtEmbInfo, step, trainKeysSet);
+        }
     }
 
     LOG_DEBUG(MGMT + "Finish host side load process");
@@ -502,7 +501,7 @@ void HybridMgmt::EvalTask(TaskType type)
             cvCheckSave.wait(checkSaveLocker, [this] { return !hybridMgmtBlock->IsNeedWaitSave() || mutexDestroy; });
 
             if (hybridMgmtBlock->pythonBatchId[EVAL_CHANNEL_ID] >= hybridMgmtBlock->hybridBatchId[EVAL_CHANNEL_ID]) {
-                // 在唤醒train的数据处理进程之前，需要将备份的train状态还原
+                // Before waking the data process for training, Recover the backed-up training state
                 RecoverTrainStatus();
                 hybridMgmtBlock->Wake(TRAIN_CHANNEL_ID);
             } else {
@@ -2210,15 +2209,18 @@ void HybridMgmt::BackUpTrainStatus()
 {
     int channelID = TRAIN_CHANNEL_ID;
     int& theTrainBatchId = hybridMgmtBlock->hybridBatchId[channelID];
-    //续训load、predict模式下的load不需要对train的状态进行备份
-    if (theTrainBatchId==0) {
+    if (theTrainBatchId == 0) {
         return;
     }
-    // train and eval模式下，train切换为eval之后
-    // eval的load需要线备份原有的相关状态， HBM非扩容模式需要备份keyOffsetMap, DDR模式需要备份offsetMapper对象
+
     LOG_INFO("On Estimator train and eval mode, start to backup train status, "
              "current train batchId: {} .", theTrainBatchId);
+    // When in the train and eval mode of estimator, backup training states before loading.
     EmbeddingMgmt::Instance()->BackUpTrainStatusBeforeLoad();
+
+    if (isL3StorageEnabled) {
+        cacheManager->BackUpTrainStatus();
+    }
     isBackUpTrainStatus = true;
 }
 
@@ -2227,5 +2229,9 @@ void HybridMgmt::RecoverTrainStatus()
     if (isBackUpTrainStatus) {
         EmbeddingMgmt::Instance()->RecoverTrainStatus();
     }
+
+    if (isL3StorageEnabled) {
+        cacheManager->RecoverTrainStatus();
+    }
     isBackUpTrainStatus = false;
 }
\ No newline at end of file
diff --git a/src/core/hybrid_mgmt/hybrid_mgmt.h b/src/core/hybrid_mgmt/hybrid_mgmt.h
index f845efb1..233030b9 100644
--- a/src/core/hybrid_mgmt/hybrid_mgmt.h
+++ b/src/core/hybrid_mgmt/hybrid_mgmt.h
@@ -223,7 +223,7 @@ private:
     bool isLoad{false};
     bool isInitialized{false};
     bool alreadyTrainOnce = false;  // 用于判断是否为predict模式
-    bool isBackUpTrainStatus = false; // 用于判断当前是否已经备份了train的状态
+    bool isBackUpTrainStatus = false; // whether the train state has been backed up
     map<string, int> lookUpSwapInAddrsPushId;  // 用于处理eos场景，当消费者追上生产者且长时间无上游数据，会触发eos
     map<string, ProcessStatus> specialProcessStatus;
 
diff --git a/src/core/l3_storage/cache_manager.cpp b/src/core/l3_storage/cache_manager.cpp
index ee3d7bc5..7ea68e14 100644
--- a/src/core/l3_storage/cache_manager.cpp
+++ b/src/core/l3_storage/cache_manager.cpp
@@ -32,10 +32,10 @@ void CacheManager::Init(ock::ctr::EmbCacheManagerPtr embCachePtr, vector<EmbInfo
     if (level3Storage == nullptr) {
         throw runtime_error("level3Storage is nullptr");
     }
-    
+
     this->embCache = std::move(embCachePtr);
     for (auto& emb : mgmtEmbInfo) {
-        EmbBaseInfo baseInfo {emb.ssdVocabSize, emb.ssdDataPath, false};
+        EmbBaseInfo baseInfo {emb.ssdVocabSize, emb.ssdDataPath, false, emb.extEmbeddingSize};
         embBaseInfos.emplace(emb.name, baseInfo);
         preProcessMapper[emb.name].Initialize(emb.name, emb.hostVocabSize, emb.ssdVocabSize);
     }
@@ -293,3 +293,73 @@ void CacheManager::FetchL3StorageEmb2DDR(string tableName, uint32_t extEmbedding
     embeddingTaskStep++;
     evictWaitCond.notify_all();
 }
+
+void CacheManager::BackUpTrainStatus()
+{
+    ddrKeyFreqMapBackUp = ddrKeyFreqMap;
+    excludeDDRKeyCountMapBackUp = excludeDDRKeyCountMap;
+}
+
+void CacheManager::RecoverTrainStatus()
+{
+    for (const auto& pair: excludeDDRKeyCountMapBackUp) {
+        auto tableName = pair.first;
+
+        std::vector<emb_cache_key_t> ssdKeysBeforeEval;
+        std::vector<emb_cache_key_t> ssdKeysAfterEval;
+        std::vector<emb_cache_key_t> swapInKeys;
+        std::vector<emb_cache_key_t> swapOutKeys;
+
+        for (const auto& keyMap : pair.second) {
+            ssdKeysBeforeEval.push_back(keyMap.first);
+        }
+        for (const auto& keyMap : excludeDDRKeyCountMap[tableName]) {
+            ssdKeysAfterEval.push_back(keyMap.first);
+        }
+
+        GetSwapInAndSwapOutKeys(ssdKeysBeforeEval, ssdKeysAfterEval, swapInKeys, swapOutKeys);
+
+        // ddr <-> ssd
+        // ddr-> lookup address, ssd->insert embedding , ddr->remove embedding
+        vector<float*> swapInKeysAddr;
+        int rc = embCache->EmbeddingLookupAddrs(tableName, swapInKeys, swapInKeysAddr);
+        if (rc != 0) {
+            throw runtime_error("EmbeddingLookUpAddrs failed! error code: " + std::to_string(rc));
+        }
+        auto extEmbeddingSize = embBaseInfos[tableName].extEmbeddingSize;
+        l3Storage->InsertEmbeddingsByAddr(tableName, swapInKeys, swapInKeysAddr, extEmbeddingSize);
+        rc = embCache->EmbeddingRemove(tableName, swapInKeys);
+        if (rc != 0) {
+            throw runtime_error("EmbeddingRemove failed! error code: " + std::to_string(rc));
+        }
+
+        // ssd->fetch embedding, ddr->EmbeddingUpdate, ssd->delete embedding
+        auto swapOutEmbeddings = l3Storage->FetchEmbeddings(tableName, swapOutKeys);
+        vector<float> swapOutFlattenEmbeddings;
+        for (auto& emb : swapOutEmbeddings) {
+            swapOutFlattenEmbeddings.insert(swapOutFlattenEmbeddings.cend(), emb.cbegin(), emb.cend());
+        }
+        rc = embCache->EmbeddingUpdate(tableName, swapOutKeys, swapOutFlattenEmbeddings.data());
+        l3Storage->DeleteEmbeddings(tableName, swapOutKeys);
+    }
+
+    ddrKeyFreqMap = ddrKeyFreqMapBackUp;
+    excludeDDRKeyCountMap = excludeDDRKeyCountMapBackUp;
+}
+
+void CacheManager::GetSwapInAndSwapOutKeys(vector<emb_cache_key_t>& ssdKeysBeforeEval,
+                                           vector<emb_cache_key_t>& ssdKeysAfterEval,
+                                           vector<emb_cache_key_t>& swapInKeys, vector<emb_cache_key_t>& swapOutKeys)
+{
+    std::sort(ssdKeysBeforeEval.begin(), ssdKeysBeforeEval.end());
+    std::sort(ssdKeysAfterEval.begin(), ssdKeysAfterEval.end());
+    vector<emb_cache_key_t> intersectionKeys;
+    std::set_intersection(ssdKeysBeforeEval.begin(), ssdKeysBeforeEval.end(), ssdKeysAfterEval.begin(),
+                          ssdKeysAfterEval.end(), std::back_inserter(intersectionKeys));
+
+    std::set_difference(ssdKeysBeforeEval.begin(), ssdKeysBeforeEval.end(), intersectionKeys.begin(),
+                        intersectionKeys.end(), std::back_inserter(swapInKeys));
+    std::set_difference(ssdKeysAfterEval.begin(), ssdKeysAfterEval.end(), intersectionKeys.begin(),
+                        intersectionKeys.end(), std::back_inserter(swapOutKeys));
+}
+
diff --git a/src/core/l3_storage/cache_manager.h b/src/core/l3_storage/cache_manager.h
index 79335788..34e7f0c2 100644
--- a/src/core/l3_storage/cache_manager.h
+++ b/src/core/l3_storage/cache_manager.h
@@ -107,10 +107,20 @@ namespace MxRec {
 
         int64_t GetTableUsage(const string& tableName);
 
+        void BackUpTrainStatus();
+
+        void RecoverTrainStatus();
+
+        void GetSwapInAndSwapOutKeys(vector<emb_cache_key_t>& ssdKeysBeforeEval,
+                                     vector<emb_cache_key_t>& ssdKeysAfterEval,
+                                     vector<emb_cache_key_t>& swapInKeys, vector<emb_cache_key_t>& swapOutKeys);
+
         // DDR内每个表中emb数据频次缓存；map<embTableName, 频次缓存>
         unordered_map<std::string, LFUCache> ddrKeyFreqMap;
+        unordered_map<std::string, LFUCache> ddrKeyFreqMapBackUp;
         // 每张表中非DDR内key的出现次数
         unordered_map<std::string, unordered_map<emb_cache_key_t, freq_num_t>> excludeDDRKeyCountMap;
+        unordered_map<std::string, unordered_map<emb_cache_key_t, freq_num_t>> excludeDDRKeyCountMapBackUp;
 
         // 每一个table对应一个PreProcessMapper，预先推演HBM->DDR的情况
         std::unordered_map<std::string, PreProcessMapper> preProcessMapper;
@@ -125,6 +135,7 @@ namespace MxRec {
             uint64_t maxTableSize;
             vector<std::string> savePath;
             bool isExist;
+            int extEmbeddingSize;
         };
 
         void CreateL3StorageTableIfNotExist(const std::string& embTableName);
diff --git a/src/core/ock_ctr_common/include/embedding_cache.h b/src/core/ock_ctr_common/include/embedding_cache.h
index 5e25a718..ce807f16 100644
--- a/src/core/ock_ctr_common/include/embedding_cache.h
+++ b/src/core/ock_ctr_common/include/embedding_cache.h
@@ -317,18 +317,24 @@ public:
                                   const std::vector<std::vector<float>>& optimizerSlots) = 0;
 
     /* *
-     * train通道切换为eval, 备份当前表的offsetMapper对象, 存储下当前train对应的devices上key的状态
-     * @Param tableName: 需要加载信息的table名字
+     * When switch the channel to eval, backup the current table's offsetMapper object.
+     * @Param tableName: embedding table name
      * @Return errorCode
      */
-     virtual int BackUpTrainStatus(std::string tableName) = 0;
+    virtual int BackUpTrainStatus(const std::string& tableName) = 0;
 
     /* *
-     * eval通道切换为train, 将当前表的offsetMapper对象还原成备份的train对应的的device上key的状态
-     * @Param tableName: 需要加载信息的table名字
+     * When switch the eval channel back to train, Recover the current table's offsetMapper object to the backup state.
+     * @Param tableName: embedding table name
+     * @Return errorCode
+     */
+    virtual int RecoverTrainStatus(const std::string& tableName) = 0;
+
+    /* *
+     * Reset the offsetMapper object to revert to its initialized state after loading.
      * @Return errorCode
      */
-     virtual int RecoverTrainStatus(std::string tableName) = 0;
+    virtual int ResetOffsetMappers() = 0;
 };
 }  // namespace EmbCache
 
-- 
Gitee


From 0c6d360bf5aa30853d4c4da9bc913d3100137f87 Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Fri, 19 Jul 2024 15:36:31 +0800
Subject: [PATCH 285/302] =?UTF-8?q?=E3=80=90FEAT=E3=80=91=E7=BB=99FileWrit?=
 =?UTF-8?q?er=E6=B7=BB=E5=8A=A0patch=E9=98=B2=E6=AD=A2=E5=86=99summary?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=86=B2=E7=AA=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/__init__.py    |  3 ++-
 mx_rec/saver/patch.py | 22 +++++++++++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/mx_rec/__init__.py b/mx_rec/__init__.py
index 4f82c3ed..618d802e 100644
--- a/mx_rec/__init__.py
+++ b/mx_rec/__init__.py
@@ -20,7 +20,7 @@ __all__ = ["version", "__version__"]
 
 from mx_rec.constants.constants import ASCEND_GLOBAL_HASHTABLE_COLLECTION
 from mx_rec.util.tf_version_adapter import npu_ops, hccl_ops, NPUCheckpointSaverHook
-from mx_rec.saver.patch import patch_for_saver
+from mx_rec.saver.patch import patch_for_saver, patch_for_summary_writer
 from mx_rec.graph.patch import patch_for_dataset, patch_for_chief_session_creator, patch_for_bool_gauge, \
     patch_for_assert_eval_spec, patch_for_scale_loss, patch_for_session
 from mx_rec.data.patch import patch_for_dataset_eos_map
@@ -28,6 +28,7 @@ from mx_rec.optimizers.base import patch_for_optimizer
 from mx_rec.saver.warm_start import patch_for_warm_start
 
 patch_for_saver()
+patch_for_summary_writer()
 patch_for_dataset()
 patch_for_dataset_eos_map()
 patch_for_scale_loss()
diff --git a/mx_rec/saver/patch.py b/mx_rec/saver/patch.py
index 0f3a237b..d5071d5c 100644
--- a/mx_rec/saver/patch.py
+++ b/mx_rec/saver/patch.py
@@ -23,6 +23,7 @@ import os
 import time
 
 import tensorflow as tf
+from tensorflow.compat.v1.summary import FileWriter
 from tensorflow.core.protobuf import saver_pb2
 from tensorflow.core.protobuf import trackable_object_graph_pb2
 from tensorflow.python import pywrap_tensorflow
@@ -45,13 +46,15 @@ import numpy as np
 from mpi4py import MPI
 
 from mx_rec.saver.saver import Saver as SparseSaver, check_file_system_is_valid, should_write_data
-from mx_rec.util.communication.hccl_ops import get_local_rank_size
+from mx_rec.util.communication.hccl_ops import get_rank_id
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.validator.validator import para_checker_decorator, ClassValidator, StringValidator, OptionalIntValidator, \
     OptionalStringValidator, DirectoryValidator
 from mx_rec.util.log import logger
 from mx_rec.constants.constants import MAX_INT32, INVALID_CHARS
 
+_FILENAME_SUFFIX = "filename_suffix"
+
 
 def get_sparse_vars(var_list):
     sparse_var_list = []
@@ -470,3 +473,20 @@ def patch_for_saver():
     dense_saver.build = build
     logger.debug("Class tf.train.Saver has been patched.")
     training_util.write_graph = patch_for_write_graph_func(graph_io.write_graph)
+
+
+def _patch_for_summary_writer(func):
+    def wrapper(*args, **kwargs):
+        filename_suffix = kwargs.get(_FILENAME_SUFFIX, "")
+        filename_suffix = filename_suffix or ""
+        rank_suffix = "_rank" + str(get_rank_id())
+        if rank_suffix not in filename_suffix:
+            filename_suffix = rank_suffix + "_" + filename_suffix if filename_suffix else rank_suffix
+        kwargs[_FILENAME_SUFFIX] = filename_suffix
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+def patch_for_summary_writer():
+    FileWriter.__init__ = _patch_for_summary_writer(FileWriter.__init__)
-- 
Gitee


From fd3f91d2cd874d683753469fce9d9bffe05f63b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E6=9C=9B?= <1244372993@qq.com>
Date: Mon, 22 Jul 2024 16:19:16 +0800
Subject: [PATCH 286/302] =?UTF-8?q?=E3=80=90FIX=E3=80=91=E4=BF=9D=E5=AD=98?=
 =?UTF-8?q?=E5=8A=A0=E8=BD=BD=EF=BC=8C=E5=9B=9E=E9=80=80Python=E4=BE=A7?=
 =?UTF-8?q?=E8=B7=B3=E8=BF=87=E7=AC=AC0=E6=AD=A5=E4=BF=9D=E5=AD=98?=
 =?UTF-8?q?=E7=9A=84=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/saver.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/mx_rec/saver/saver.py b/mx_rec/saver/saver.py
index f7ba8f03..a6362506 100644
--- a/mx_rec/saver/saver.py
+++ b/mx_rec/saver/saver.py
@@ -127,15 +127,6 @@ class Saver(object):
         save_path = save_path if save_path else self._prefix_name
         directory, base_name = os.path.split(save_path)
 
-        # skip save in step-0, cause host skip save in step-0 EmbeddingDDR::Save SyncLatestEmbedding
-        try:
-            step_in_name = int(base_name.split("-")[-1])
-            if step_in_name == 0:
-                return
-        except ValueError as err:
-            raise ValueError(f"The base_name {base_name} needs to include save_step message "
-                             f"eg: mode-100") from err
-
         if global_step:
             if not isinstance(global_step, compat.integral_types):
                 global_step = int(sess.run(global_step))
-- 
Gitee


From aaabe4aa37ef1b188d5e112c3a7c99040579c92f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Mon, 22 Jul 2024 22:08:22 +0800
Subject: [PATCH 287/302] =?UTF-8?q?mmoe=20=E6=A8=A1=E5=9E=8B=E6=A1=86?=
 =?UTF-8?q?=E6=9E=B6=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/model.py | 136 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 examples/mmoe/model.py

diff --git a/examples/mmoe/model.py b/examples/mmoe/model.py
new file mode 100644
index 00000000..0046d2fd
--- /dev/null
+++ b/examples/mmoe/model.py
@@ -0,0 +1,136 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import time
+from easydict import EasyDict as edict
+
+import tensorflow as tf
+
+
+model_cfg = edict()
+model_cfg.loss_mode = "batch"
+LOSS_OP_NAME = "loss"
+LABEL_OP_NAME = "label"
+VAR_LIST = "variable"
+PRED_OP_NAME = "pred"
+
+
+class MyModel:
+    def __init__(self, expert_num =8, expert_size=16, tower_size=8, gate_num = 2):
+
+        self.expert_num = expert_num
+        self.expert_size = expert_size
+        self.tower_size = tower_size
+        self.gate_num = gate_num
+
+    
+    def expert_layer(self, input):
+        param_expert = []
+        for i in range(0, self.expert_num):
+            expert_linear = tf.layers.dense(input, units=self.expert_size, activation=None, name = f'expert_payer_{i}', 
+                                            kernel_initializer = tf.constant_initializer(value=0.1), 
+                                            bias_initializer = tf.constant_initializer(values = 0.1))
+            
+            param_expert.append(expert_linear)
+        return param_expert
+    
+    
+    def gate_layer(self, input):
+        param_gate = []
+        for i in range(0, self.gate_num):
+            gate_linear = tf.layers.dense(input, units=self.gate_size, activation=None, name = f'gate_payer_{i}', 
+                                            kernel_initializer = tf.constant_initializer(value=0.1), 
+                                            bias_initializer = tf.constant_initializer(values = 0.1))
+            
+            param_gate.append(gate_linear)
+        return param_gate
+    
+    
+    def tower_layer(self, input, layer_name):
+        tower_linear = tf.layers.dense(input, units=self.tower_size, activation=None, name = f'tower_payer_{layer_name}', 
+                                            kernel_initializer = tf.constant_initializer(value=0.1), 
+                                            bias_initializer = tf.constant_initializer(values = 0.1))
+        
+        tower_linear_out = tf.layers.dense(tower_linear, units=self.tower_size, activation=None, name = f'tower_payer_out_{layer_name}', 
+                                            kernel_initializer = tf.constant_initializer(value=0.1), 
+                                            bias_initializer = tf.constant_initializer(values = 0.1))
+        
+        return tower_linear_out
+        
+        
+
+    
+    def build_model(self,
+                    embedding=None,
+                    dense_feature=None,
+                    label=None,
+                    is_training=True,
+                    seed=None):
+
+        with tf.variable_scope("mmoe", reuse=tf.AUTO_REUSE):
+
+            dense_expert = self.expert_layer(dense_feature)
+            dense_gate = self.gate_layer(dense_feature)
+
+            all_expert = []
+            _slice_num = 0
+            for i in range(0, self.expert_num):
+                slice_num_end = _slice_num + self.expert_size
+                cur_expert = tf.add(dense_expert[i], embedding[:, _slice_num:slice_num_end])
+                cur_expert = tf.nn.relu(cur_expert)
+                all_expert.append(cur_expert)
+                _slice_num = slice_num_end
+
+            expert_concat = tf.concat(all_expert, axis=1)
+            expert_concat = tf.reshape(expert_concat, [-1, self.expert_num, self.expert_size])
+
+            output_layers = []
+            out_pred = []
+            for i in range(0, self.gate_num):
+                slice_gate_end = _slice_num + self.expert_num
+                cur_gate = tf.add(dense_gate[i], embedding[:, _slice_num:slice_gate_end])
+                cur_gate = tf.nn.softmax(cur_gate)
+
+                cur_gate = tf.reshape(cur_gate, [-1, self.expert_num, 1])
+
+                cur_gate_expert = tf.multiply(x=expert_concat, y=cur_gate)
+                cur_gate_expert = tf.reduce_sum(cur_gate_expert, axis=1)
+                out = self.tower_layer(cur_gate_expert, i)
+                output_layers.append(out)
+                out_pred.append(tf.nn.softmax(out[:, 1]))
+                _slice_num = slice_num_end
+            trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='mmoe')
+
+            label_income = label[:, 0:1]
+            label_mat = label[:, 1:]
+
+            pred_income_1 = tf.slice(output_layers[0], [0, 1], [-1, 1])
+            pred_marital_1 = tf.slice(output_layers[1], [0, 1], [-1, 1])
+
+            cost_income = tf.losses.log_loss(labels=tf.cast(label_income, tf.float32), predictions=pred_income_1,
+                                             epsilon=1e-4)
+            cost_marital = tf.losses.log_loss(labels=tf.cast(label_mat, tf.float32), predictions=pred_marital_1,
+                                              epsilon=1e-4)
+
+            avg_cost_income = tf.reduce_mean(cost_income)
+            avg_cost_marital = tf.reduce_mean(cost_marital)
+
+            loss = 0.5 * (avg_cost_income + avg_cost_marital)
+            
+            return {LOSS_OP_NAME: loss,
+                    PRED_OP_NAME: out_pred,
+                    LABEL_OP_NAME: label,
+                    VAR_LIST: trainable_variables}
-- 
Gitee


From f17973de35900ab90455e1933717c21161fe2a62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Mon, 22 Jul 2024 22:39:52 +0800
Subject: [PATCH 288/302] cleancode

---
 examples/mmoe/config.py             | 197 ++++++++++++++++++++
 examples/mmoe/criteo.py             | 273 ++++++++++++++++++++++++++++
 examples/mmoe/delay_loss_scale.py   |  64 +++++++
 examples/mmoe/gradient_descent_w.py |  71 ++++++++
 examples/mmoe/mean_auc.py           |  40 ++++
 examples/mmoe/model.py              |  27 +--
 examples/mmoe/op_impl_mode.ini      |   1 +
 examples/mmoe/optimizer.py          |  35 ++++
 8 files changed, 695 insertions(+), 13 deletions(-)
 create mode 100644 examples/mmoe/config.py
 create mode 100644 examples/mmoe/criteo.py
 create mode 100644 examples/mmoe/delay_loss_scale.py
 create mode 100644 examples/mmoe/gradient_descent_w.py
 create mode 100644 examples/mmoe/mean_auc.py
 create mode 100644 examples/mmoe/op_impl_mode.ini
 create mode 100644 examples/mmoe/optimizer.py

diff --git a/examples/mmoe/config.py b/examples/mmoe/config.py
new file mode 100644
index 00000000..d5540908
--- /dev/null
+++ b/examples/mmoe/config.py
@@ -0,0 +1,197 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import enum
+import os
+
+import tensorflow as tf
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+from npu_bridge.estimator.npu.npu_config import NPURunConfig
+
+from mx_rec.constants.constants import CacheModeEnum
+
+SSD_DATA_PATH = ["ssd_data"]
+
+
+class LearningRateScheduler:
+    """
+    LR Scheduler combining Polynomial Decay with Warmup at the beginning.
+    TF-based cond operations necessary for performance in graph mode.
+    """
+
+    def __init__(self, base_lr_dense, base_lr_sparse, warmup_steps, decay_start_step, decay_steps):
+        self.warmup_steps = tf.constant(warmup_steps, dtype=tf.int32)
+        self.decay_start_step = tf.constant(decay_start_step, dtype=tf.int32)
+        self.decay_steps = tf.constant(decay_steps)
+        self.decay_end_step = decay_start_step + decay_steps  # 65041
+        self.poly_power = 2.0
+        self.base_lr_dense = base_lr_dense
+        self.base_lr_sparse = base_lr_sparse
+
+    def calc(self, global_step):
+        # used for the warmup stage
+        warmup_step = tf.cast(1 / self.warmup_steps, tf.float32)
+        lr_factor_warmup = 1 - tf.cast(self.warmup_steps - global_step, tf.float32) * warmup_step
+        lr_factor_warmup = tf.cast(lr_factor_warmup, tf.float32)
+        # used for the constant stage
+        lr_factor_constant = tf.cast(1.0, tf.float32)
+        
+        lr_sparse = self.base_lr_sparse * lr_factor_constant
+        lr_dense = self.base_lr_dense * lr_factor_constant
+        return lr_dense, lr_sparse
+
+
+class Config:
+    def __init__(self, ):
+        self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
+        tmp = os.getenv("TRAIN_RANK_SIZE")
+        if tmp is None:
+            raise ValueError("please export TRAIN_RANK_SIZE")
+        self.rank_size = int(tmp)
+
+        self.data_path = os.getenv("DLRM_CRITEO_DATA_PATH")
+        self.train_file_pattern = "train"
+        self.test_file_pattern = "test"
+
+        self.batch_size = 4096
+        self.line_per_sample = 1
+        self.train_epoch = 1
+        self.test_epoch = 9
+        self.perform_shuffle = False
+
+        self.key_type = tf.int64
+        self.label_type = tf.float32
+        self.value_type = tf.int64
+
+        self.feat_cnt = 26
+        self.__set_emb_table_size()
+
+        self.field_num = 26
+        self.send_count = 46000 // self.rank_size
+
+        self.emb_dim = 8
+        self.hashtable_threshold = 1
+
+        self.USE_PIPELINE_TEST = False
+
+        # 动态学习率
+        GLOBAL_BATCH_SIZE = 8192 * 8
+        LR_SCHEDULE_STEPS = [
+            int(2750 * 55296 / GLOBAL_BATCH_SIZE),
+            int(49315 * 55296 / GLOBAL_BATCH_SIZE),
+            int(27772 * 55296 / GLOBAL_BATCH_SIZE),
+        ]
+        self.global_step = tf.Variable(0, trainable=False)
+        _lr_scheduler = LearningRateScheduler(
+            0.001,
+            0.001,
+            LR_SCHEDULE_STEPS[0],
+            LR_SCHEDULE_STEPS[1],
+            LR_SCHEDULE_STEPS[2],
+        )
+        self.learning_rate = _lr_scheduler.calc(self.global_step)
+
+    def __set_emb_table_size(self):
+        self.cache_mode = os.getenv("CACHE_MODE")
+        if self.cache_mode is None:
+            raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
+
+        if self.cache_mode == CacheModeEnum.HBM.value:
+            self.dev_vocab_size = 14_000_000 * self.rank_size
+            self.host_vocab_size = 0
+        elif self.cache_mode == CacheModeEnum.DDR.value:
+            self.dev_vocab_size = 500_000 * self.rank_size
+            self.host_vocab_size = 24_000_000 * self.rank_size
+        elif self.cache_mode == CacheModeEnum.SSD.value:
+            self.dev_vocab_size = 100_000 * self.rank_size
+            self.host_vocab_size = 2_000_000 * self.rank_size
+            self.ssd_vocab_size = 24_000_000 * self.rank_size
+        else:
+            raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
+
+    def get_emb_table_cfg(self):
+        if self.cache_mode == CacheModeEnum.HBM.value:
+            return {"device_vocabulary_size": self.dev_vocab_size}
+        elif self.cache_mode == CacheModeEnum.DDR.value:
+            return {"device_vocabulary_size": self.dev_vocab_size,
+                    "host_vocabulary_size": self.host_vocab_size}
+        elif self.cache_mode == CacheModeEnum.SSD.value:
+            return {"device_vocabulary_size": self.dev_vocab_size,
+                    "host_vocabulary_size": self.host_vocab_size,
+                    "ssd_vocabulary_size": self.ssd_vocab_size,
+                    "ssd_data_path": SSD_DATA_PATH}
+        else:
+            raise RuntimeError(f"get CACHE_MODE:{self.cache_mode}, check Config.__set_emb_table_size implementation")
+
+
+def sess_config(dump_data=False, dump_path="./dump_output", dump_steps="0|1|2"):
+    session_config = tf.ConfigProto(allow_soft_placement=False,
+                                    log_device_placement=False)
+    session_config.gpu_options.allow_growth = True
+    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    custom_op.parameter_map["mix_compile_mode"].b = False
+    custom_op.parameter_map["use_off_line"].b = True
+    custom_op.parameter_map["min_group_size"].b = 1
+    # 可选配置level0:pairwise;level1:pairwise
+    custom_op.parameter_map["HCCL_algorithm"].s = tf.compat.as_bytes("level0:fullmesh;level1:fullmesh")
+    custom_op.parameter_map["enable_data_pre_proc"].b = True
+    custom_op.parameter_map["iterations_per_loop"].i = 10
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    custom_op.parameter_map["hcom_parallel"].b = False
+    custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes("op_impl_mode.ini")
+    custom_op.parameter_map["op_execute_timeout"].i = 2000
+    custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(
+        str(13 * 1024 * 1024 * 1024))  # total 31 need 13;
+    custom_op.parameter_map["graph_memory_max_size"].s = tf.compat.as_bytes(str(18 * 1024 * 1024 * 1024))  # need 25
+    custom_op.parameter_map["stream_max_parallel_num"].s = tf.compat.as_bytes("DNN_VM_AICPU:3,AIcoreEngine:3")
+
+    if dump_data:
+        custom_op.parameter_map["enable_dump"].b = True
+        custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(dump_path)
+        custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(dump_steps)
+        custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+
+    session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+
+    return session_config
+
+
+def get_npu_run_config():
+    session_config = tf.ConfigProto(allow_soft_placement=False,
+                                    log_device_placement=False)
+
+    session_config.gpu_options.allow_growth = True
+    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+
+    run_config = NPURunConfig(
+        save_summary_steps=1000,
+        save_checkpoints_steps=100,
+        keep_checkpoint_max=5,
+        session_config=session_config,
+        log_step_count_steps=20,
+        precision_mode='allow_mix_precision',
+        enable_data_pre_proc=True,
+        iterations_per_loop=1,
+        jit_compile=False,
+        op_compiler_cache_mode="enable",
+        HCCL_algorithm="level0:fullmesh;level1:fullmesh"  # 可选配置：level0:pairwise;level1:pairwise
+    )
+    return run_config
diff --git a/examples/mmoe/criteo.py b/examples/mmoe/criteo.py
new file mode 100644
index 00000000..25f1d869
--- /dev/null
+++ b/examples/mmoe/criteo.py
@@ -0,0 +1,273 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import stat
+import pickle
+import argparse
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from tqdm import tqdm
+
+NAMES = ['label'] + [f'I{i}' for i in range(1, 14)] + [f'C{i}' for i in range(1, 27)]
+
+
+def make_sub_file(lines, head, src_name, sub_dir_name, sub):
+    """Write sub-data.
+    
+    Args:
+        :param lines: A list. Several pieces of data.
+        :param head: A string. ['label', 'I1', 'I2', ...].
+        :param src_name: A string. The name of data.
+        :param sub_dir_name: A string.
+        :param sub: A scalar(Int). Record the current number of sub file.
+    :return: sub + 1.
+    """
+    root_path, file_path = os.path.split(src_name)
+    file_name, suffix = file_path.split('.')
+    split_file_name = file_name + "_" + str(sub).zfill(2) + "." + suffix
+    split_file = os.path.join(root_path, sub_dir_name, split_file_name)
+    if not os.path.exists(os.path.join(root_path, sub_dir_name)):
+        os.mkdir(os.path.join(root_path, sub_dir_name))
+
+    modes = stat.S_IWUSR | stat.S_IRUSR
+    flags = os.O_WRONLY | os.O_TRUNC | os.O_CREAT
+    f = os.fdopen(os.open(split_file, flags, modes), 'w')
+    try:
+        f.writelines([head])
+        f.writelines(lines)
+        return sub + 1
+    finally:
+        f.close()
+
+
+def split_byline_count(filename, count, sub_dir_name):
+    """Split File.
+    Note: You can specify how many rows of data each sub file contains.
+    Args:
+        :param filename: A string.
+        :param count: A scalar(int).
+        :param sub_dir_name: A string.
+    :return:
+    """
+    f = open(filename, 'r')
+    try:
+        head = f.readline()
+        buf = []
+        sub = 1
+        for line in f:
+            buf.append(line)
+            if len(buf) == count:
+                sub = make_sub_file(buf, head, filename, sub_dir_name, sub)
+                buf = []
+        if len(buf) != 0:
+            try:
+                make_sub_file(buf, head, filename, sub_dir_name, sub)
+            except FileNotFoundError as err:
+                raise FileNotFoundError("please check the filename of data") from err
+    finally:
+        f.close()
+
+
+def get_split_file_path(parent_path=None, dataset_path=None, sample_num=4600000):
+    """Get the list of split file path.
+    Note: Either parent_path or dataset_path must be valid.
+    If exists dataset_path + "/split", parent_path = dataset_path + "/split".
+    Args:
+        :param parent_path: A string. split file's parent path.
+        :param dataset_path: A string.
+        :param sample_num: A int. The sample number of every split file.
+    :return: A list. [file1_path, file2_path, ...]
+    """
+    sub_dir_name = 'split'
+    if parent_path is None and dataset_path is None:
+        raise ValueError('Please give parent path or file path.')
+    if parent_path is None and os.path.exists(os.path.join(os.path.dirname(dataset_path), sub_dir_name)):
+        parent_path = os.path.join(os.path.dirname(dataset_path), sub_dir_name)
+    elif parent_path is None or not os.path.exists(parent_path):
+        split_byline_count(dataset_path, sample_num, sub_dir_name)
+        parent_path = os.path.join(os.path.dirname(dataset_path), sub_dir_name)
+    split_file_name = os.listdir(parent_path)
+    split_file_name.sort()
+    split_file_list = [parent_path + "/" + file_name for file_name in split_file_name if file_name[-3:] == 'txt']
+    return split_file_list
+
+
+def get_fea_map(fea_map_path=None, split_file_list=None):
+    """Get feature map.
+    Note: Either parent_path or dataset_path must be valid.
+    If exists dir(split_file_list[0]) + "/fea_map.pkl", fea_map_path is valid.
+    If fea_map_path is None and you want to build the feature map,
+    the default file path is the parent directory of split file + "fea_map.pkl".
+    Args:
+        :param fea_map_path: A string.
+        :param split_file_list: A list. [file1_path, file2_path, ...]
+    :return: A dict. {'C1':{}, 'C2':{}, ...}
+    """
+    if fea_map_path is None and split_file_list is None:
+        raise ValueError('Please give feature map path or split file list.')
+    if fea_map_path is None and split_file_list is not None:
+        fea_map_path = os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl")
+    if os.path.exists(fea_map_path) and fea_map_path[-3:] == 'pkl':
+        with open(fea_map_path, 'rb') as f:
+            fea_map = pickle.load(f)
+        return fea_map
+    fea_map = {}
+    for file_open in tqdm(split_file_list):
+        f = open(file_open)
+        for line in f:
+            row = line.strip('\n').split('\t')
+            for i in range(14, 40):
+                if row[i] == '':
+                    continue
+                name = NAMES[i]
+                fea_map.setdefault(name, {})
+                if fea_map[name].get(row[i]) is None:
+                    fea_map[name][row[i]] = len(fea_map[name])
+            for j in range(1, 14):
+                if row[j] == '':
+                    continue
+                name = NAMES[j]
+                fea_map.setdefault(name, {})
+                fea_map[name].setdefault('min', float(row[j]))
+                fea_map[name].setdefault('max', float(row[j]))
+                fea_map[name]['min'] = min(fea_map[name]['min'], float(row[j]))
+                fea_map[name]['max'] = max(fea_map[name]['max'], float(row[j]))
+        f.close()
+    for i in range(14, 40):
+        fea_map[NAMES[i]]['-1'] = len(fea_map[NAMES[i]])
+    fea_map_path = os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl")
+
+
+    modes = stat.S_IWUSR | stat.S_IRUSR
+    flags = os.O_WRONLY | os.O_TRUNC | os.O_CREAT
+    with os.fdopen(os.open(fea_map_path, flags, modes), 'wb') as fd:
+        pickle.dump(fea_map, fd, pickle.HIGHEST_PROTOCOL)
+
+    return fea_map
+
+
+def rec_kbins_discretizer(dat, n_bins, min_max_dict):
+    """Bin continuous data into intervals.
+    Note: The strategy is "uniform".
+    Args:
+        :param dat: A dataframe.
+        :param n_bins: A scalar(int).
+        :param min_max_dict: A dict such as {'min': , 'max': }.
+    :return: The new  dataframe.
+    """
+    features = dat.columns
+    n_features = len(features)
+    bin_edges = np.zeros(n_features, dtype=object)
+    for idx, feature in enumerate(features):
+        bin_edges[idx] = np.linspace(min_max_dict[feature]['min'], min_max_dict[feature]['max'], n_bins + 1)
+        rtol = 1.e-5
+        atol = 1.e-8
+        eps = atol + rtol * np.abs(dat[feature])
+        dat[feature] = np.digitize(dat[feature] + eps, bin_edges[idx][1:])
+    return dat
+
+
+def convert_input2tfrd(in_file_path, out_file_path):
+    """
+    txt to tfrecords
+    """
+    def make_example(label_list, dense_feat_list, sparse_feat_list):
+        dense_feature = np.array(dense_feat_list, dtype=np.int64).reshape(-1)
+        sparse_feature = np.array(sparse_feat_list, dtype=np.int64).reshape(-1)
+        label = np.array(label_list, dtype=np.int64).reshape(-1)
+        feature_dict = {
+                    "dense_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=dense_feature)),
+                    "sparse_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=sparse_feature)),
+                    "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label))
+        }
+        example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+
+        return example
+
+    file_name = out_file_path + in_file_path[-12:-4] + '.tfrecord'
+    file_writer = tf.io.TFRecordWriter(file_name)
+
+    with open(in_file_path, encoding='utf-8') as file_in:
+
+        for _, line in tqdm(enumerate(file_in)):
+
+            line = line.strip('\n')
+            items = line.split('\t')
+            if len(items) != 40:
+                continue
+            label = int(items[0])
+            dense = items[1:14]
+            sparse = items[14:]
+
+            ex = make_example(label, dense, sparse)
+            serialized = ex.SerializeToString()
+            file_writer.write(serialized)
+
+        file_writer.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Get datasets')
+    parser.add_argument('--data_path')
+    parser.add_argument('--output_path')
+
+    args, _ = parser.parse_known_args()
+    data_path = args.data_path
+    output_path = args.output_path
+
+    # get txt_list
+    file_split_list = get_split_file_path(dataset_path=data_path)
+    # get feature_map
+    feature_map = get_fea_map(split_file_list=file_split_list)
+
+    for file in tqdm(file_split_list):
+
+        # read data
+        data_df = pd.read_csv(file, sep='\t', header=None, names=NAMES)
+        # name feature
+        sparse_features = ['C' + str(i) for i in range(1, 27)]
+        dense_features = ['I' + str(i) for i in range(1, 14)]
+        # data processing
+        data_df[sparse_features] = data_df[sparse_features].fillna('-1')
+        data_df[dense_features] = data_df[dense_features].fillna(0)
+        # sparse feature: mapping
+        for col in sparse_features:
+            try:
+                data_df[col] = data_df[col].map(lambda x: feature_map[col][x])
+            except KeyError as e:
+                raise KeyError("Feature {} not found in dataset".format(col)) from e
+        # dense feature: Bin continuous data into intervals.
+        data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, feature_map)
+        # add offsets
+        slot_size_array = [
+                        1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001,
+                        1462, 585, 10131228, 2202609, 307, 25, 12519, 635, 5, 93147, 5685, 8351594, 3196,
+                        29, 14994, 5461307, 12, 5654, 2174, 5, 7046548, 19, 17, 286182, 106, 142573
+        ]
+        offset_size_list = np.cumsum([0] + slot_size_array[:-1])
+        for col_index in range(1, len(offset_size_list) + 1):
+            data_df.iloc[:, col_index] += offset_size_list[col_index - 1]
+        # save to txt
+        data_df.to_csv(file, sep='\t', index=False, header=False)
+        # txt to tfrecords
+        convert_input2tfrd(in_file_path=file, out_file_path=output_path)
+
+
+
+
+
diff --git a/examples/mmoe/delay_loss_scale.py b/examples/mmoe/delay_loss_scale.py
new file mode 100644
index 00000000..f73baf68
--- /dev/null
+++ b/examples/mmoe/delay_loss_scale.py
@@ -0,0 +1,64 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import tensorflow as tf
+from tensorflow.python.training import optimizer
+
+from config import Config
+
+
+class DenseLossScaleOptimizer:
+    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
+        if not isinstance(opt, optimizer.Optimizer):
+            raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
+        self._optimizer = opt
+        self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
+        _update_lr_loss_scale(self._optimizer, loss_scale)
+
+    def compute_gradients(self, loss, var_list=None):
+        return self._optimizer.compute_gradients(loss * self._loss_scale, var_list=var_list)
+
+    def apply_gradients(self, avg_grads):
+        return self._optimizer.apply_gradients(avg_grads)
+
+
+class SparseLossScaleOptimizer:
+    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
+        if not isinstance(opt, optimizer.Optimizer):
+            raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
+        self._optimizer = opt
+        self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
+        _update_lr_loss_scale(self._optimizer, loss_scale)
+
+    def compute_gradients(self, loss, var_list=None):
+        return tf.gradients(loss * self._loss_scale, var_list)
+
+    def apply_gradients(self, grads_and_vars):
+        return self._optimizer.apply_gradients(grads_and_vars)
+
+
+def _update_lr_loss_scale(opt, loss_scale):
+    if loss_scale <= 0:
+        raise RuntimeError("the loss_scale must be greater than zero.")
+    loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
+    if hasattr(opt, "_lr"):
+        # LazyAdam or Adam optimizer
+        opt._lr = opt._lr / loss_scale
+    elif hasattr(opt, "_learning_rate"):
+        # SGD optimizer
+        opt._learning_rate = opt._learning_rate / loss_scale
+    else:
+        raise RuntimeError("`opt` should have a `_learning_rate` or `_lr` named field.")
\ No newline at end of file
diff --git a/examples/mmoe/gradient_descent_w.py b/examples/mmoe/gradient_descent_w.py
new file mode 100644
index 00000000..53adb996
--- /dev/null
+++ b/examples/mmoe/gradient_descent_w.py
@@ -0,0 +1,71 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from collections import defaultdict
+
+import tensorflow as tf
+from tensorflow.python.ops import math_ops
+from tensorflow.python.training import gradient_descent
+from mx_rec.optimizers.base import CustomizedOptimizer
+from mx_rec.util.log import logger
+from mx_rec.util.initialize import ConfigInitializer
+
+
+def create_hash_optimizer(learning_rate, weight_decay=0.0001, use_locking=False, name="GradientDescent"):
+    optimizer = CustomizedGradientDescentWithWeighDecay(learning_rate=learning_rate,
+                                                        weight_decay=weight_decay,
+                                                        use_locking=use_locking,
+                                                        name=name)
+    ConfigInitializer.get_instance().optimizer_config.optimizer_instance = optimizer
+    return optimizer
+
+
+class CustomizedGradientDescentWithWeighDecay(gradient_descent.GradientDescentOptimizer, CustomizedOptimizer):
+    name_counter = defaultdict(int)
+
+    def __init__(self, learning_rate, weight_decay, use_locking=False, name="GradientDescent"):
+        self.optimizer_type = "gradient_descent_with_weight_decay"
+        self.weight_decay = weight_decay
+        super(CustomizedGradientDescentWithWeighDecay, self)._get_name(name=name)
+        super(CustomizedGradientDescentWithWeighDecay, self).__init__(
+            learning_rate=learning_rate, use_locking=use_locking, name=self.unique_name
+        )
+        self._slot_num = 0
+        self._derivative = 1
+
+    def get_slot_init_values(self):
+        logger.info("no slot for gradient descent")
+        return []
+
+    def _apply_sparse_duplicate_indices(self, grad, var):
+        logger.debug(">>>> Enter _apply_sparse_duplicate_indices")
+        nd_indices = tf.expand_dims(grad.indices, 1)
+        logger.info(f"weigh_decay={self.weight_decay}")
+        if self.weight_decay is None:
+            nd_value = grad.values * math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
+        else:
+            nd_value = (grad.values + math_ops.cast(self.weight_decay, var.dtype.base_dtype) *
+                        tf.gather(var, grad.indices)) * math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
+        var_update_op = tf.scatter_nd_add(var, nd_indices, -nd_value, use_locking=self._use_locking)
+        return var_update_op
+
+    def _apply_dense(self, grad, var):
+        logger.debug(">>>> Enter _apply_dense")
+        raise NotImplementedError("You are using a wrong type of variable.")
diff --git a/examples/mmoe/mean_auc.py b/examples/mmoe/mean_auc.py
new file mode 100644
index 00000000..ff57df00
--- /dev/null
+++ b/examples/mmoe/mean_auc.py
@@ -0,0 +1,40 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+from glob import glob
+import numpy as np
+
+
+def split_auc(log_input):
+    with open(log_input, 'r') as log:
+        all_auc = []
+        for line in log.readlines():
+            if 'Test' in line:
+                all_auc.append(float(line.split(';')[0].split(':')[-1].strip()))
+    all_auc_len = len(all_auc)
+    all_auc_arr = np.array(all_auc)[:all_auc_len - all_auc_len % 8]
+    test_auc = np.mean(all_auc_arr.reshape(-1, 8), axis=-1)
+    return test_auc
+
+
+log_path_all = 'latest_*.log'
+log_path_list = glob(log_path_all)
+
+for log_path in log_path_list:
+    print(os.path.basename(log_path))
+    print(split_auc(log_path))
+    print('*'*20)
\ No newline at end of file
diff --git a/examples/mmoe/model.py b/examples/mmoe/model.py
index 0046d2fd..5b1917a3 100644
--- a/examples/mmoe/model.py
+++ b/examples/mmoe/model.py
@@ -29,7 +29,7 @@ PRED_OP_NAME = "pred"
 
 
 class MyModel:
-    def __init__(self, expert_num =8, expert_size=16, tower_size=8, gate_num = 2):
+    def __init__(self, expert_num=8, expert_size=16, tower_size=8, gate_num=2):
 
         self.expert_num = expert_num
         self.expert_size = expert_size
@@ -40,9 +40,9 @@ class MyModel:
     def expert_layer(self, input):
         param_expert = []
         for i in range(0, self.expert_num):
-            expert_linear = tf.layers.dense(input, units=self.expert_size, activation=None, name = f'expert_payer_{i}', 
-                                            kernel_initializer = tf.constant_initializer(value=0.1), 
-                                            bias_initializer = tf.constant_initializer(values = 0.1))
+            expert_linear = tf.layers.dense(input, units=self.expert_size, activation=None, name=f'expert_payer_{i}', 
+                                            kernel_initializer=tf.constant_initializer(value=0.1), 
+                                            bias_initializer=tf.constant_initializer(values = 0.1))
             
             param_expert.append(expert_linear)
         return param_expert
@@ -51,22 +51,23 @@ class MyModel:
     def gate_layer(self, input):
         param_gate = []
         for i in range(0, self.gate_num):
-            gate_linear = tf.layers.dense(input, units=self.gate_size, activation=None, name = f'gate_payer_{i}', 
-                                            kernel_initializer = tf.constant_initializer(value=0.1), 
-                                            bias_initializer = tf.constant_initializer(values = 0.1))
+            gate_linear = tf.layers.dense(input, units=self.gate_size, activation=None, name=f'gate_payer_{i}', 
+                                            kernel_initializer=tf.constant_initializer(value=0.1), 
+                                            bias_initializer=tf.constant_initializer(values = 0.1))
             
             param_gate.append(gate_linear)
         return param_gate
     
     
     def tower_layer(self, input, layer_name):
-        tower_linear = tf.layers.dense(input, units=self.tower_size, activation=None, name = f'tower_payer_{layer_name}', 
-                                            kernel_initializer = tf.constant_initializer(value=0.1), 
-                                            bias_initializer = tf.constant_initializer(values = 0.1))
+        tower_linear = tf.layers.dense(input, units=self.tower_size, activation=None, name=f'tower_payer_{layer_name}', 
+                                            kernel_initializer=tf.constant_initializer(value=0.1), 
+                                            bias_initializer=tf.constant_initializer(values = 0.1))
         
-        tower_linear_out = tf.layers.dense(tower_linear, units=self.tower_size, activation=None, name = f'tower_payer_out_{layer_name}', 
-                                            kernel_initializer = tf.constant_initializer(value=0.1), 
-                                            bias_initializer = tf.constant_initializer(values = 0.1))
+        tower_linear_out = tf.layers.dense(tower_linear, units=self.tower_size, activation=None, 
+                                            name=f'tower_payer_out_{layer_name}', 
+                                            kernel_initializer=tf.constant_initializer(value=0.1), 
+                                            bias_initializer=tf.constant_initializer(values=0.1))
         
         return tower_linear_out
         
diff --git a/examples/mmoe/op_impl_mode.ini b/examples/mmoe/op_impl_mode.ini
new file mode 100644
index 00000000..579dea43
--- /dev/null
+++ b/examples/mmoe/op_impl_mode.ini
@@ -0,0 +1 @@
+ScatterNdAdd=support_out_of_bound_index
\ No newline at end of file
diff --git a/examples/mmoe/optimizer.py b/examples/mmoe/optimizer.py
new file mode 100644
index 00000000..2c7685bb
--- /dev/null
+++ b/examples/mmoe/optimizer.py
@@ -0,0 +1,35 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import tensorflow as tf
+from delay_loss_scale import DenseLossScaleOptimizer, SparseLossScaleOptimizer
+from mx_rec.util.initialize import ConfigInitializer
+from mx_rec.optimizers.lazy_adam import create_hash_optimizer
+from mx_rec.optimizers.lazy_adam_by_addr import create_hash_optimizer_by_address
+
+
+def get_dense_and_sparse_optimizer(cfg):
+    dense_optimizer = tf.train.AdamOptimizer(learning_rate=cfg.learning_rate[0])
+    use_dynamic_expansion = ConfigInitializer.get_instance().use_dynamic_expansion
+    if use_dynamic_expansion:
+        sparse_optimizer = create_hash_optimizer_by_address(learning_rate=cfg.learning_rate[1])
+    else:
+        sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1])
+    loss_scale = 1
+    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, loss_scale)
+    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, loss_scale)
+
+    return dense_optimizer, sparse_optimizer
-- 
Gitee


From fe7073494d499d161e16ce826175f744a17336eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Mon, 22 Jul 2024 22:50:39 +0800
Subject: [PATCH 289/302] =?UTF-8?q?mmoe=20=E5=90=8A=E8=B5=B7=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/main_mxrec.py | 469 ++++++++++++++++++++++++++++++++++++
 1 file changed, 469 insertions(+)
 create mode 100644 examples/mmoe/main_mxrec.py

diff --git a/examples/mmoe/main_mxrec.py b/examples/mmoe/main_mxrec.py
new file mode 100644
index 00000000..51ed7c4a
--- /dev/null
+++ b/examples/mmoe/main_mxrec.py
@@ -0,0 +1,469 @@
+# coding=utf-8
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import shutil
+import time
+import warnings
+import random
+from glob import glob
+
+import tensorflow as tf
+from sklearn.metrics import roc_auc_score
+import numpy as np
+
+from optimizer import get_dense_and_sparse_optimizer
+from config import sess_config, Config, SSD_DATA_PATH, CacheModeEnum
+from model import MyModel
+from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
+from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
+from mx_rec.core.asc.manager import start_asc_pipeline
+from mx_rec.core.embedding import create_table, sparse_lookup
+from mx_rec.core.feature_process import EvictHook
+from mx_rec.graph.modifier import modify_graph_and_start_emb_cache, GraphModifierHook
+from mx_rec.constants.constants import ASCEND_TIMESTAMP
+from mx_rec.util.initialize import ConfigInitializer, init, terminate_config_initializer
+from mx_rec.util.ops import import_host_pipeline_ops
+import mx_rec.util as mxrec_util
+from mx_rec.util.variable import get_dense_and_sparse_variable
+from mx_rec.util.log import logger
+from npu_bridge.npu_init import *
+
+npu_plugin.set_device_sat_mode(0)
+
+dense_hashtable_seed = 128
+sparse_hashtable_seed = 128
+shuffle_seed = 128
+random.seed(shuffle_seed)
+
+
+def add_timestamp_func(batch):
+    timestamp = import_host_pipeline_ops().return_timestamp(tf.cast(batch['label'], dtype=tf.int64))
+    # tf.constant(np.random.randint(1,1688109060,1)), tf.int64))
+    batch["timestamp"] = timestamp
+    return batch
+
+
+def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph, is_use_faae=False):
+    if config.USE_PIPELINE_TEST:
+        num_parallel = 1
+    else:
+        num_parallel = 8
+
+    def extract_fn(data_record):
+        features = {
+            # Extract features using the keys set during creation
+            'label': tf.compat.v1.FixedLenFeature(shape=(config.line_per_sample,), dtype=tf.int64),
+            'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(26 * config.line_per_sample,), dtype=tf.int64),
+            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * config.line_per_sample,), dtype=tf.float32),
+        }
+        sample = tf.compat.v1.parse_single_example(data_record, features)
+        return sample
+
+    def reshape_fn(batch):
+        batch['label'] = tf.reshape(batch['label'], [-1, 1])
+        batch['dense_feature'] = tf.reshape(batch['dense_feature'], [-1, 13])
+        batch['dense_feature'] = tf.math.log(batch['dense_feature'] + 3.0)
+        batch['sparse_feature'] = tf.reshape(batch['sparse_feature'], [-1, 26])
+        return batch
+
+    if is_training:
+        files_list = glob(os.path.join(config.data_path, config.train_file_pattern) + '/*.tfrecord')
+    else:
+        files_list = glob(os.path.join(config.data_path, config.test_file_pattern) + '/*.tfrecord')
+    dataset = tf.data.TFRecordDataset(files_list, num_parallel_reads=num_parallel)
+    batch_size = config.batch_size // config.line_per_sample
+
+    dataset = dataset.shard(config.rank_size, config.rank_id)
+    if is_training:
+        dataset = dataset.shuffle(batch_size * 1000, seed=shuffle_seed)
+    if is_training:
+        dataset = dataset.repeat(config.train_epoch)
+    else:
+        dataset = dataset.repeat(config.test_epoch)
+    dataset = dataset.map(extract_fn, num_parallel_calls=num_parallel).batch(batch_size,
+                                                                             drop_remainder=True)
+    dataset = dataset.map(reshape_fn, num_parallel_calls=num_parallel)
+    if is_use_faae:
+        dataset = dataset.map(add_timestamp_func)
+
+    if not MODIFY_GRAPH_FLAG:
+        insert_fn = get_asc_insert_func(tgt_key_specs=feature_spec_list, is_training=is_training, dump_graph=dump_graph)
+        dataset = dataset.map(insert_fn)
+
+    dataset = dataset.prefetch(100)
+
+    iterator = dataset.make_initializable_iterator()
+    batch = iterator.get_next()
+    return batch, iterator
+
+
+def model_forward(feature_list, hash_table_list, batch, is_train, modify_graph):
+    embedding_list = []
+    logger.debug(f"In model_forward function, is_train: {is_train}, feature_list: {len(feature_list)}, "
+                 f"hash_table_list: {len(hash_table_list)}")
+    for feature, hash_table in zip(feature_list, hash_table_list):
+        if MODIFY_GRAPH_FLAG:
+            feature = batch["sparse_feature"]
+        embedding = sparse_lookup(hash_table, feature, cfg.send_count, dim=None, is_train=is_train,
+                                  name="user_embedding_lookup", modify_graph=modify_graph, batch=batch,
+                                  access_and_evict_config=None)
+        embedding_list.append(embedding)
+
+    if len(embedding_list) == 1:
+        emb = embedding_list[0]
+    elif len(embedding_list) > 1:
+        emb = tf.reduce_sum(embedding_list, axis=0, keepdims=False)
+    else:
+        raise ValueError("the length of embedding_list must be greater than or equal to 1.")
+    my_model = MyModel()
+    model_output = my_model.build_model(embedding=emb,
+                                        dense_feature=batch["dense_feature"],
+                                        label=batch["label"],
+                                        is_training=is_train,
+                                        seed=dense_hashtable_seed)
+    return model_output
+
+
+def evaluate():
+    print("read_test dataset")
+    if not MODIFY_GRAPH_FLAG:
+        eval_label = eval_model.get("label")
+        sess.run([eval_iterator.initializer])
+    else:
+        # 在sess run模式下，若还是使用原来batch中的label去sess run，则会出现getnext超时报错，需要使用新数据集中的batch
+        eval_label = ConfigInitializer.get_instance().train_params_config.get_target_batch(False).get("label")
+        sess.run([ConfigInitializer.get_instance().train_params_config.get_initializer(False)])
+    log_loss_list = []
+    pred_list = []
+    label_list = []
+    eval_current_steps = 0
+    finished = False
+    print("eval begin")
+
+    while not finished:
+        try:
+            eval_current_steps += 1
+            eval_start = time.time()
+            eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_label])
+            eval_cost = time.time() - eval_start
+            qps_eval = (1 / eval_cost) * rank_size * cfg.batch_size
+            log_loss_list += list(eval_loss.reshape(-1))
+            pred_list += list(pred.reshape(-1))
+            label_list += list(label.reshape(-1))
+            print(f"eval current_steps: {eval_current_steps}, qps: {qps_eval}")
+            if eval_current_steps == eval_steps:
+                finished = True
+        except tf.errors.OutOfRangeError:
+            finished = True
+    auc = roc_auc_score(label_list, pred_list)
+    mean_log_loss = np.mean(log_loss_list)
+    return auc, mean_log_loss
+
+
+def evaluate_fix(step):
+    print("read_test dataset evaluate_fix")
+    if not MODIFY_GRAPH_FLAG:
+        sess.run([eval_iterator.initializer])
+    else:
+        sess.run([ConfigInitializer.get_instance().train_params_config.get_initializer(False)])
+    log_loss_list = []
+    pred_list = []
+    label_list = []
+    eval_current_steps = 0
+    finished = False
+    print("eval begin")
+    while not finished:
+        try:
+            eval_current_steps += 1
+            eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_model.get("label")])
+            log_loss_list += list(eval_loss.reshape(-1))
+            pred_list += list(pred.reshape(-1))
+            label_list += list(label.reshape(-1))
+            print(f"eval current_steps: {eval_current_steps}")
+
+            if eval_current_steps == eval_steps:
+                finished = True
+        except tf.errors.OutOfRangeError:
+            finished = True
+
+    label_numpy = np.array(label_list)
+    pred_numpy = np.array(pred_list)
+    if not os.path.exists(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}"):
+        os.makedirs(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}")
+
+    if os.path.exists(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/label_{rank_id}.npy"):
+        os.remove(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/label_{rank_id}.npy")
+    if os.path.exists(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/pred_{rank_id}.npy"):
+        os.remove(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/pred_{rank_id}.npy")
+    if os.path.exists(f"flag_{rank_id}.txt"):
+        os.remove(f"flag_{rank_id}.txt")
+    np.save(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/label_{rank_id}.npy", label_numpy)
+    np.save(os.path.abspath(".") + f"/interval_{interval}/numpy_{step}/pred_{rank_id}.npy", pred_numpy)
+    os.mknod(f"flag_{rank_id}.txt")
+    while True:
+        file_exists_list = [os.path.exists(f"flag_{i}.txt") for i in range(rank_size)]
+        if sum(file_exists_list) == rank_size:
+            print("All saved!!!!!!!!!!")
+            break
+        else:
+            print("Waitting for saving numpy!!!!!!!!")
+            time.sleep(1)
+            continue
+
+    auc = roc_auc_score(label_list, pred_list)
+    mean_log_loss = np.mean(log_loss_list)
+    return auc, mean_log_loss
+
+
+def create_feature_spec_list(use_timestamp=False):
+    access_threshold = None
+    eviction_threshold = None
+    if use_timestamp:
+        access_threshold = 1000
+        eviction_threshold = 180
+
+    feature_spec_list = [FeatureSpec("sparse_feature", table_name="sparse_embeddings", batch_size=cfg.batch_size,
+                                     access_threshold=access_threshold, eviction_threshold=eviction_threshold)]
+    if use_multi_lookup:
+        feature_spec_list.append(FeatureSpec("sparse_feature", table_name="sparse_embeddings",
+                                             batch_size=cfg.batch_size,
+                                             access_threshold=access_threshold,
+                                             eviction_threshold=eviction_threshold))
+    if use_timestamp:
+        feature_spec_list.append(FeatureSpec("timestamp", is_timestamp=True))
+    return feature_spec_list
+
+
+def _del_related_dir(del_path: str) -> None:
+    if not os.path.isabs(del_path):
+        del_path = os.path.join(os.getcwd(), del_path)
+    dirs = glob(del_path)
+    for sub_dir in dirs:
+        shutil.rmtree(sub_dir, ignore_errors=True)
+        logger.info(f"Delete dir:{sub_dir}")
+
+
+def _clear_saved_model() -> None:
+    _del_related_dir("/root/ascend/log/*")
+    _del_related_dir("kernel*")
+    _del_related_dir("model_dir_rank*")
+    _del_related_dir("op_cache")
+
+    if os.getenv("CACHE_MODE", "") != CacheModeEnum.SSD.value:
+        return
+    logger.info("Current cache mode is SSD, and file overwrite is not allowed in SSD mode, deleting exist directory"
+                " then create empty directory for this use case.")
+    for sub_path in SSD_DATA_PATH:
+        _del_related_dir(sub_path)
+        os.makedirs(sub_path, mode=0o550, exist_ok=True)
+        logger.info(f"Create dir:{sub_path}")
+
+
+if __name__ == "__main__":
+    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+    warnings.filterwarnings("ignore")
+    _clear_saved_model()
+
+    rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
+    rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
+    interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
+    train_steps = 10000
+    eval_steps = 1360
+
+    try:
+        use_dynamic_expansion = bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0)))
+        use_multi_lookup = bool(int(os.getenv("USE_MULTI_LOOKUP", 0)))
+        MODIFY_GRAPH_FLAG = bool(int(os.getenv("USE_MODIFY_GRAPH", 0)))
+        use_faae = bool(int(os.getenv("USE_FAAE", 0)))
+    except ValueError as err:
+        raise ValueError("please correctly config USE_DYNAMIC_EXPANSION or USE_MULTI_LOOKUP or USE_FAAE "
+                         "or USE_MODIFY_GRAPH only 0 or 1 is supported.") from err
+
+    use_dynamic = bool(int(os.getenv("USE_DYNAMIC", 0)))
+    logger.info(f"USE_DYNAMIC:{use_dynamic}")
+    init(train_steps=train_steps, eval_steps=eval_steps,
+         use_dynamic=use_dynamic, use_dynamic_expansion=use_dynamic_expansion)
+    IF_LOAD = False
+    rank_id = mxrec_util.communication.hccl_ops.get_rank_id()
+    filelist = glob(f"./saved-model/sparse-model-0")
+    if filelist:
+        IF_LOAD = True
+    ConfigInitializer.get_instance().if_load = IF_LOAD
+
+    cfg = Config()
+    feature_spec_list_train = None
+    feature_spec_list_eval = None
+    if use_faae:
+        feature_spec_list_train = create_feature_spec_list(use_timestamp=True)
+        feature_spec_list_eval = create_feature_spec_list(use_timestamp=True)
+    else:
+        feature_spec_list_train = create_feature_spec_list(use_timestamp=False)
+        feature_spec_list_eval = create_feature_spec_list(use_timestamp=False)
+
+    train_batch, train_iterator = make_batch_and_iterator(cfg, feature_spec_list_train, is_training=True,
+                                                          dump_graph=True, is_use_faae=use_faae)
+    eval_batch, eval_iterator = make_batch_and_iterator(cfg, feature_spec_list_eval, is_training=False,
+                                                        dump_graph=False, is_use_faae=use_faae)
+    logger.info(f"train_batch: {train_batch}")
+
+    if use_faae:
+        cfg.dev_vocab_size = cfg.dev_vocab_size // 2
+
+    optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
+
+    # note: variance_scaling_initializer only support HBM mode
+    emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed) \
+        if cfg.cache_mode != "HBM" or use_dynamic_expansion else \
+        tf.compat.v1.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=sparse_hashtable_seed)
+    sparse_hashtable = create_table(
+        key_dtype=cfg.key_type,
+        dim=tf.TensorShape([cfg.emb_dim]),
+        name="sparse_embeddings",
+        emb_initializer=emb_initializer,
+        **cfg.get_emb_table_cfg()
+    )
+    if use_faae:
+        tf.compat.v1.add_to_collection(ASCEND_TIMESTAMP, train_batch["timestamp"])
+
+    sparse_hashtable_list = [sparse_hashtable, sparse_hashtable] if use_multi_lookup else [sparse_hashtable]
+    train_model = model_forward(feature_spec_list_train, sparse_hashtable_list, train_batch,
+                                is_train=True, modify_graph=MODIFY_GRAPH_FLAG)
+    eval_model = model_forward(feature_spec_list_eval, sparse_hashtable_list, eval_batch,
+                               is_train=False, modify_graph=MODIFY_GRAPH_FLAG)
+
+    dense_variables, sparse_variables = get_dense_and_sparse_variable()
+    trainable_varibles = []
+    trainable_varibles.extend(dense_variables)
+    if use_dynamic_expansion:
+        trainable_varibles.append(tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_LOCAL_EMB)[0])
+    else:
+        trainable_varibles.extend(sparse_variables)
+    rank_size = mxrec_util.communication.hccl_ops.get_rank_size()
+    train_ops = []
+    # multi task training
+    for loss, (dense_optimizer, sparse_optimizer) in zip([train_model.get("loss")], optimizer_list):
+        # do dense optimization
+        grads = dense_optimizer.compute_gradients(loss, var_list=trainable_varibles)
+        avg_grads = []
+        for grad, var in grads[:-1]:
+            if rank_size > 1:
+                grad = hccl_ops.allreduce(grad, "sum") if grad is not None else None
+            if grad is not None:
+                avg_grads.append((grad / 8.0, var))
+        # apply gradients: update variables
+        train_ops.append(dense_optimizer.apply_gradients(avg_grads))
+
+        if use_dynamic_expansion:
+            train_address_list = tf.compat.v1.get_collection(ASCEND_SPARSE_LOOKUP_ID_OFFSET)
+            # do sparse optimization by addr
+            sparse_grads = list(grads[-1])  # local_embedding
+            grads_and_vars = [(grad, address) for grad, address in zip(sparse_grads, train_address_list)]
+            train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
+        else:
+            # do sparse optimization
+            sparse_grads = list(grads[-1])
+            print("sparse_grads_tensor:", sparse_grads)
+            grads_and_vars = [(grad, variable) for grad, variable in zip(sparse_grads, sparse_variables)]
+            train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
+
+    # 动态学习率更新
+    train_ops.extend([cfg.global_step.assign(cfg.global_step + 1), cfg.learning_rate[0], cfg.learning_rate[1]])
+
+    with tf.control_dependencies(train_ops):
+        train_ops = tf.no_op()
+        cfg.learning_rate = [cfg.learning_rate[0], cfg.learning_rate[1]]
+
+    saver = tf.train.Saver()
+    if MODIFY_GRAPH_FLAG:
+        modify_graph_and_start_emb_cache(dump_graph=True)
+    else:
+        start_asc_pipeline()
+
+    hook_list = []
+    if use_faae:
+        hook_evict = EvictHook(evict_enable=True, evict_time_interval=120)
+        hook_list.append(hook_evict)
+        if MODIFY_GRAPH_FLAG:  # 该场景添加hook处理校验问题
+            hook_list.append(GraphModifierHook(modify_graph=False))
+
+    # with tf.compat.v1.Session(config=sess_config(dump_data=False)) as sess:
+    if use_faae:
+        sess = tf.compat.v1.train.MonitoredTrainingSession(
+            hooks=hook_list,
+            config=sess_config(dump_data=False)
+        )
+        sess.graph._unsafe_unfinalize()
+        if not MODIFY_GRAPH_FLAG:
+            sess.run(train_iterator.initializer)
+        else:
+            sess.run(ConfigInitializer.get_instance().train_params_config.get_initializer(True))
+    else:
+        sess = tf.compat.v1.Session(config=sess_config(dump_data=False))
+        sess.run(tf.compat.v1.global_variables_initializer())
+        if not MODIFY_GRAPH_FLAG:
+            sess.run(train_iterator.initializer)
+        else:
+            sess.run(ConfigInitializer.get_instance().train_params_config.get_initializer(True))
+
+    epoch = 0
+    cost_sum = 0
+    qps_sum = 0
+    best_auc = 0
+    iteration_per_loop = 10
+
+    train_ops = util.set_iteration_per_loop(sess, train_ops, 10)
+
+    # for i in range(1, TRAIN_STEPS):
+    i = 0
+    while True:
+        i += 1
+        logger.info(f"################    training at step {i * iteration_per_loop}    ################")
+        start_time = time.time()
+
+        try:
+            grad, loss = sess.run([train_ops, train_model.get("loss")])
+            lr = sess.run(cfg.learning_rate)
+            global_step = sess.run(cfg.global_step)
+        except tf.errors.OutOfRangeError:
+            logger.info(f"Encounter the end of Sequence for training.")
+            break
+
+        end_time = time.time()
+        cost_time = end_time - start_time
+        qps = (1 / cost_time) * rank_size * cfg.batch_size * iteration_per_loop
+        cost_sum += cost_time
+        logger.info(f"step: {i * iteration_per_loop}; training loss: {loss}")
+        logger.info(f"step: {i * iteration_per_loop}; grad: {grad}")
+        logger.info(f"step: {i * iteration_per_loop}; lr: {lr}")
+        logger.info(f"global step: {global_step}")
+        logger.info(f"step: {i * iteration_per_loop}; current sess cost time: {cost_time:.10f}; current QPS: {qps}")
+        logger.info(f"training at step:{i * iteration_per_loop}, table[{sparse_hashtable.table_name}], "
+                    f"table size:{sparse_hashtable.size()}, table capacity:{sparse_hashtable.capacity()}")
+
+        if i % (train_steps // iteration_per_loop) == 0:
+            if interval is not None:
+                test_auc, test_mean_log_loss = evaluate_fix(i * iteration_per_loop)
+            else:
+                test_auc, test_mean_log_loss = evaluate()
+            print("Test auc: {}; log_loss: {} ".format(test_auc, test_mean_log_loss))
+            best_auc = max(best_auc, test_auc)
+            logger.info(f"training step: {i * iteration_per_loop}, best auc: {best_auc}")
+
+    sess.close()
+
+    terminate_config_initializer()
+    logger.info("Demo done!")
-- 
Gitee


From 769164b3b7aff7766e4ffbec81e4766b13d75032 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Mon, 22 Jul 2024 23:38:14 +0800
Subject: [PATCH 290/302] =?UTF-8?q?=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9=EF=BC=8C=E5=85=A5=E5=8F=A3=E5=87=BD=E6=95=B0?=
 =?UTF-8?q?=E9=80=82=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/config.py     |  31 ++--
 examples/mmoe/criteo.py     | 273 ------------------------------------
 examples/mmoe/main_mxrec.py |  59 ++++----
 3 files changed, 51 insertions(+), 312 deletions(-)
 delete mode 100644 examples/mmoe/criteo.py

diff --git a/examples/mmoe/config.py b/examples/mmoe/config.py
index d5540908..b87bc11b 100644
--- a/examples/mmoe/config.py
+++ b/examples/mmoe/config.py
@@ -42,10 +42,6 @@ class LearningRateScheduler:
         self.base_lr_sparse = base_lr_sparse
 
     def calc(self, global_step):
-        # used for the warmup stage
-        warmup_step = tf.cast(1 / self.warmup_steps, tf.float32)
-        lr_factor_warmup = 1 - tf.cast(self.warmup_steps - global_step, tf.float32) * warmup_step
-        lr_factor_warmup = tf.cast(lr_factor_warmup, tf.float32)
         # used for the constant stage
         lr_factor_constant = tf.cast(1.0, tf.float32)
         
@@ -66,10 +62,15 @@ class Config:
         self.train_file_pattern = "train"
         self.test_file_pattern = "test"
 
-        self.batch_size = 4096
+        self.batch_size = 32
         self.line_per_sample = 1
-        self.train_epoch = 1
-        self.test_epoch = 9
+        self.train_epoch = 100
+        self.test_epoch = 100
+        self.expert_num = 8
+        self.gate_num = 2
+        self.expert_size = 16
+        self.tower_size = 8
+        
         self.perform_shuffle = False
 
         self.key_type = tf.int64
@@ -82,7 +83,7 @@ class Config:
         self.field_num = 26
         self.send_count = 46000 // self.rank_size
 
-        self.emb_dim = 8
+        self.emb_dim = self.expert_num * self.expert_size + self.gate_num * self.expert_num
         self.hashtable_threshold = 1
 
         self.USE_PIPELINE_TEST = False
@@ -102,7 +103,7 @@ class Config:
             LR_SCHEDULE_STEPS[1],
             LR_SCHEDULE_STEPS[2],
         )
-        self.learning_rate = _lr_scheduler.calc(self.global_step)
+        self.learning_rate = _lr_scheduler.calc()
 
     def __set_emb_table_size(self):
         self.cache_mode = os.getenv("CACHE_MODE")
@@ -110,15 +111,15 @@ class Config:
             raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
 
         if self.cache_mode == CacheModeEnum.HBM.value:
-            self.dev_vocab_size = 14_000_000 * self.rank_size
+            self.dev_vocab_size = 1000 * self.rank_size
             self.host_vocab_size = 0
         elif self.cache_mode == CacheModeEnum.DDR.value:
-            self.dev_vocab_size = 500_000 * self.rank_size
-            self.host_vocab_size = 24_000_000 * self.rank_size
+            self.dev_vocab_size = 1000 * self.rank_size
+            self.host_vocab_size = 1000 * self.rank_size
         elif self.cache_mode == CacheModeEnum.SSD.value:
-            self.dev_vocab_size = 100_000 * self.rank_size
-            self.host_vocab_size = 2_000_000 * self.rank_size
-            self.ssd_vocab_size = 24_000_000 * self.rank_size
+            self.dev_vocab_size = 1000 * self.rank_size
+            self.host_vocab_size = 1000 * self.rank_size
+            self.ssd_vocab_size = 1000 * self.rank_size
         else:
             raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
 
diff --git a/examples/mmoe/criteo.py b/examples/mmoe/criteo.py
deleted file mode 100644
index 25f1d869..00000000
--- a/examples/mmoe/criteo.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# coding=utf-8
-# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-import os
-import stat
-import pickle
-import argparse
-import pandas as pd
-import numpy as np
-import tensorflow as tf
-from tqdm import tqdm
-
-NAMES = ['label'] + [f'I{i}' for i in range(1, 14)] + [f'C{i}' for i in range(1, 27)]
-
-
-def make_sub_file(lines, head, src_name, sub_dir_name, sub):
-    """Write sub-data.
-    
-    Args:
-        :param lines: A list. Several pieces of data.
-        :param head: A string. ['label', 'I1', 'I2', ...].
-        :param src_name: A string. The name of data.
-        :param sub_dir_name: A string.
-        :param sub: A scalar(Int). Record the current number of sub file.
-    :return: sub + 1.
-    """
-    root_path, file_path = os.path.split(src_name)
-    file_name, suffix = file_path.split('.')
-    split_file_name = file_name + "_" + str(sub).zfill(2) + "." + suffix
-    split_file = os.path.join(root_path, sub_dir_name, split_file_name)
-    if not os.path.exists(os.path.join(root_path, sub_dir_name)):
-        os.mkdir(os.path.join(root_path, sub_dir_name))
-
-    modes = stat.S_IWUSR | stat.S_IRUSR
-    flags = os.O_WRONLY | os.O_TRUNC | os.O_CREAT
-    f = os.fdopen(os.open(split_file, flags, modes), 'w')
-    try:
-        f.writelines([head])
-        f.writelines(lines)
-        return sub + 1
-    finally:
-        f.close()
-
-
-def split_byline_count(filename, count, sub_dir_name):
-    """Split File.
-    Note: You can specify how many rows of data each sub file contains.
-    Args:
-        :param filename: A string.
-        :param count: A scalar(int).
-        :param sub_dir_name: A string.
-    :return:
-    """
-    f = open(filename, 'r')
-    try:
-        head = f.readline()
-        buf = []
-        sub = 1
-        for line in f:
-            buf.append(line)
-            if len(buf) == count:
-                sub = make_sub_file(buf, head, filename, sub_dir_name, sub)
-                buf = []
-        if len(buf) != 0:
-            try:
-                make_sub_file(buf, head, filename, sub_dir_name, sub)
-            except FileNotFoundError as err:
-                raise FileNotFoundError("please check the filename of data") from err
-    finally:
-        f.close()
-
-
-def get_split_file_path(parent_path=None, dataset_path=None, sample_num=4600000):
-    """Get the list of split file path.
-    Note: Either parent_path or dataset_path must be valid.
-    If exists dataset_path + "/split", parent_path = dataset_path + "/split".
-    Args:
-        :param parent_path: A string. split file's parent path.
-        :param dataset_path: A string.
-        :param sample_num: A int. The sample number of every split file.
-    :return: A list. [file1_path, file2_path, ...]
-    """
-    sub_dir_name = 'split'
-    if parent_path is None and dataset_path is None:
-        raise ValueError('Please give parent path or file path.')
-    if parent_path is None and os.path.exists(os.path.join(os.path.dirname(dataset_path), sub_dir_name)):
-        parent_path = os.path.join(os.path.dirname(dataset_path), sub_dir_name)
-    elif parent_path is None or not os.path.exists(parent_path):
-        split_byline_count(dataset_path, sample_num, sub_dir_name)
-        parent_path = os.path.join(os.path.dirname(dataset_path), sub_dir_name)
-    split_file_name = os.listdir(parent_path)
-    split_file_name.sort()
-    split_file_list = [parent_path + "/" + file_name for file_name in split_file_name if file_name[-3:] == 'txt']
-    return split_file_list
-
-
-def get_fea_map(fea_map_path=None, split_file_list=None):
-    """Get feature map.
-    Note: Either parent_path or dataset_path must be valid.
-    If exists dir(split_file_list[0]) + "/fea_map.pkl", fea_map_path is valid.
-    If fea_map_path is None and you want to build the feature map,
-    the default file path is the parent directory of split file + "fea_map.pkl".
-    Args:
-        :param fea_map_path: A string.
-        :param split_file_list: A list. [file1_path, file2_path, ...]
-    :return: A dict. {'C1':{}, 'C2':{}, ...}
-    """
-    if fea_map_path is None and split_file_list is None:
-        raise ValueError('Please give feature map path or split file list.')
-    if fea_map_path is None and split_file_list is not None:
-        fea_map_path = os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl")
-    if os.path.exists(fea_map_path) and fea_map_path[-3:] == 'pkl':
-        with open(fea_map_path, 'rb') as f:
-            fea_map = pickle.load(f)
-        return fea_map
-    fea_map = {}
-    for file_open in tqdm(split_file_list):
-        f = open(file_open)
-        for line in f:
-            row = line.strip('\n').split('\t')
-            for i in range(14, 40):
-                if row[i] == '':
-                    continue
-                name = NAMES[i]
-                fea_map.setdefault(name, {})
-                if fea_map[name].get(row[i]) is None:
-                    fea_map[name][row[i]] = len(fea_map[name])
-            for j in range(1, 14):
-                if row[j] == '':
-                    continue
-                name = NAMES[j]
-                fea_map.setdefault(name, {})
-                fea_map[name].setdefault('min', float(row[j]))
-                fea_map[name].setdefault('max', float(row[j]))
-                fea_map[name]['min'] = min(fea_map[name]['min'], float(row[j]))
-                fea_map[name]['max'] = max(fea_map[name]['max'], float(row[j]))
-        f.close()
-    for i in range(14, 40):
-        fea_map[NAMES[i]]['-1'] = len(fea_map[NAMES[i]])
-    fea_map_path = os.path.join(os.path.dirname(split_file_list[0]), "fea_map.pkl")
-
-
-    modes = stat.S_IWUSR | stat.S_IRUSR
-    flags = os.O_WRONLY | os.O_TRUNC | os.O_CREAT
-    with os.fdopen(os.open(fea_map_path, flags, modes), 'wb') as fd:
-        pickle.dump(fea_map, fd, pickle.HIGHEST_PROTOCOL)
-
-    return fea_map
-
-
-def rec_kbins_discretizer(dat, n_bins, min_max_dict):
-    """Bin continuous data into intervals.
-    Note: The strategy is "uniform".
-    Args:
-        :param dat: A dataframe.
-        :param n_bins: A scalar(int).
-        :param min_max_dict: A dict such as {'min': , 'max': }.
-    :return: The new  dataframe.
-    """
-    features = dat.columns
-    n_features = len(features)
-    bin_edges = np.zeros(n_features, dtype=object)
-    for idx, feature in enumerate(features):
-        bin_edges[idx] = np.linspace(min_max_dict[feature]['min'], min_max_dict[feature]['max'], n_bins + 1)
-        rtol = 1.e-5
-        atol = 1.e-8
-        eps = atol + rtol * np.abs(dat[feature])
-        dat[feature] = np.digitize(dat[feature] + eps, bin_edges[idx][1:])
-    return dat
-
-
-def convert_input2tfrd(in_file_path, out_file_path):
-    """
-    txt to tfrecords
-    """
-    def make_example(label_list, dense_feat_list, sparse_feat_list):
-        dense_feature = np.array(dense_feat_list, dtype=np.int64).reshape(-1)
-        sparse_feature = np.array(sparse_feat_list, dtype=np.int64).reshape(-1)
-        label = np.array(label_list, dtype=np.int64).reshape(-1)
-        feature_dict = {
-                    "dense_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=dense_feature)),
-                    "sparse_feature": tf.train.Feature(int64_list=tf.train.Int64List(value=sparse_feature)),
-                    "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label))
-        }
-        example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
-
-        return example
-
-    file_name = out_file_path + in_file_path[-12:-4] + '.tfrecord'
-    file_writer = tf.io.TFRecordWriter(file_name)
-
-    with open(in_file_path, encoding='utf-8') as file_in:
-
-        for _, line in tqdm(enumerate(file_in)):
-
-            line = line.strip('\n')
-            items = line.split('\t')
-            if len(items) != 40:
-                continue
-            label = int(items[0])
-            dense = items[1:14]
-            sparse = items[14:]
-
-            ex = make_example(label, dense, sparse)
-            serialized = ex.SerializeToString()
-            file_writer.write(serialized)
-
-        file_writer.close()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Get datasets')
-    parser.add_argument('--data_path')
-    parser.add_argument('--output_path')
-
-    args, _ = parser.parse_known_args()
-    data_path = args.data_path
-    output_path = args.output_path
-
-    # get txt_list
-    file_split_list = get_split_file_path(dataset_path=data_path)
-    # get feature_map
-    feature_map = get_fea_map(split_file_list=file_split_list)
-
-    for file in tqdm(file_split_list):
-
-        # read data
-        data_df = pd.read_csv(file, sep='\t', header=None, names=NAMES)
-        # name feature
-        sparse_features = ['C' + str(i) for i in range(1, 27)]
-        dense_features = ['I' + str(i) for i in range(1, 14)]
-        # data processing
-        data_df[sparse_features] = data_df[sparse_features].fillna('-1')
-        data_df[dense_features] = data_df[dense_features].fillna(0)
-        # sparse feature: mapping
-        for col in sparse_features:
-            try:
-                data_df[col] = data_df[col].map(lambda x: feature_map[col][x])
-            except KeyError as e:
-                raise KeyError("Feature {} not found in dataset".format(col)) from e
-        # dense feature: Bin continuous data into intervals.
-        data_df[dense_features] = rec_kbins_discretizer(data_df[dense_features], 1000, feature_map)
-        # add offsets
-        slot_size_array = [
-                        1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001,
-                        1462, 585, 10131228, 2202609, 307, 25, 12519, 635, 5, 93147, 5685, 8351594, 3196,
-                        29, 14994, 5461307, 12, 5654, 2174, 5, 7046548, 19, 17, 286182, 106, 142573
-        ]
-        offset_size_list = np.cumsum([0] + slot_size_array[:-1])
-        for col_index in range(1, len(offset_size_list) + 1):
-            data_df.iloc[:, col_index] += offset_size_list[col_index - 1]
-        # save to txt
-        data_df.to_csv(file, sep='\t', index=False, header=False)
-        # txt to tfrecords
-        convert_input2tfrd(in_file_path=file, out_file_path=output_path)
-
-
-
-
-
diff --git a/examples/mmoe/main_mxrec.py b/examples/mmoe/main_mxrec.py
index 51ed7c4a..e236cd2f 100644
--- a/examples/mmoe/main_mxrec.py
+++ b/examples/mmoe/main_mxrec.py
@@ -66,18 +66,17 @@ def make_batch_and_iterator(config, feature_spec_list, is_training, dump_graph,
     def extract_fn(data_record):
         features = {
             # Extract features using the keys set during creation
-            'label': tf.compat.v1.FixedLenFeature(shape=(config.line_per_sample,), dtype=tf.int64),
-            'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(26 * config.line_per_sample,), dtype=tf.int64),
-            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(13 * config.line_per_sample,), dtype=tf.float32),
+            'label': tf.compat.v1.FixedLenFeature(shape=(2 * config.line_per_sample,), dtype=tf.int64),
+            'sparse_feature': tf.compat.v1.FixedLenFeature(shape=(29 * config.line_per_sample,), dtype=tf.int64),
+            'dense_feature': tf.compat.v1.FixedLenFeature(shape=(11 * config.line_per_sample,), dtype=tf.float32),
         }
         sample = tf.compat.v1.parse_single_example(data_record, features)
         return sample
 
     def reshape_fn(batch):
-        batch['label'] = tf.reshape(batch['label'], [-1, 1])
-        batch['dense_feature'] = tf.reshape(batch['dense_feature'], [-1, 13])
-        batch['dense_feature'] = tf.math.log(batch['dense_feature'] + 3.0)
-        batch['sparse_feature'] = tf.reshape(batch['sparse_feature'], [-1, 26])
+        batch['label'] = tf.reshape(batch['label'], [-1, 2])
+        batch['dense_feature'] = tf.reshape(batch['dense_feature'], [-1, 11])
+        batch['sparse_feature'] = tf.reshape(batch['sparse_feature'], [-1, 29])
         return batch
 
     if is_training:
@@ -129,6 +128,7 @@ def model_forward(feature_list, hash_table_list, batch, is_train, modify_graph):
         emb = tf.reduce_sum(embedding_list, axis=0, keepdims=False)
     else:
         raise ValueError("the length of embedding_list must be greater than or equal to 1.")
+    emb = tf.reduce_sum(emb, axis=1)
     my_model = MyModel()
     model_output = my_model.build_model(embedding=emb,
                                         dense_feature=batch["dense_feature"],
@@ -148,8 +148,10 @@ def evaluate():
         eval_label = ConfigInitializer.get_instance().train_params_config.get_target_batch(False).get("label")
         sess.run([ConfigInitializer.get_instance().train_params_config.get_initializer(False)])
     log_loss_list = []
-    pred_list = []
-    label_list = []
+    pred_income_list = []
+    pred_mat_list = []
+    label_income_list = []
+    label_mat_list = []
     eval_current_steps = 0
     finished = False
     print("eval begin")
@@ -162,16 +164,21 @@ def evaluate():
             eval_cost = time.time() - eval_start
             qps_eval = (1 / eval_cost) * rank_size * cfg.batch_size
             log_loss_list += list(eval_loss.reshape(-1))
-            pred_list += list(pred.reshape(-1))
-            label_list += list(label.reshape(-1))
+            pred_income = pred[0]
+            pred_mat = pred[1]
+            pred_income_list += list(pred_income.reshape(-1))
+            pred_mat_list += list(pred_mat.reshape(-1))
+            label_income_list += list(label[:, 0].reshape(-1))
+            label_mat_list += list(label[:, 1].reshape(-1))
             print(f"eval current_steps: {eval_current_steps}, qps: {qps_eval}")
             if eval_current_steps == eval_steps:
                 finished = True
         except tf.errors.OutOfRangeError:
             finished = True
-    auc = roc_auc_score(label_list, pred_list)
+    auc_income = roc_auc_score(label_income_list, pred_income_list)
+    auc_mat = roc_auc_score(label_mat_list, pred_mat_list)
     mean_log_loss = np.mean(log_loss_list)
-    return auc, mean_log_loss
+    return auc_income, auc_mat, mean_log_loss
 
 
 def evaluate_fix(step):
@@ -281,8 +288,8 @@ if __name__ == "__main__":
     rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
     rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
     interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
-    train_steps = 10000
-    eval_steps = 1360
+    train_steps = 1000
+    eval_steps = 1000
 
     try:
         use_dynamic_expansion = bool(int(os.getenv("USE_DYNAMIC_EXPANSION", 0)))
@@ -326,9 +333,7 @@ if __name__ == "__main__":
     optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
 
     # note: variance_scaling_initializer only support HBM mode
-    emb_initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.05, seed=sparse_hashtable_seed) \
-        if cfg.cache_mode != "HBM" or use_dynamic_expansion else \
-        tf.compat.v1.variance_scaling_initializer(mode="fan_avg", distribution='normal', seed=sparse_hashtable_seed)
+    emb_initializer = tf.constant_initializer(value = 0.1)
     sparse_hashtable = create_table(
         key_dtype=cfg.key_type,
         dim=tf.TensorShape([cfg.emb_dim]),
@@ -422,7 +427,8 @@ if __name__ == "__main__":
     epoch = 0
     cost_sum = 0
     qps_sum = 0
-    best_auc = 0
+    best_income_auc = 0
+    best_auc_mat = 0
     iteration_per_loop = 10
 
     train_ops = util.set_iteration_per_loop(sess, train_ops, 10)
@@ -456,12 +462,17 @@ if __name__ == "__main__":
 
         if i % (train_steps // iteration_per_loop) == 0:
             if interval is not None:
-                test_auc, test_mean_log_loss = evaluate_fix(i * iteration_per_loop)
+                test_auc_income, test_auc_mat, test_mean_log_loss = evaluate_fix(i * iteration_per_loop)
             else:
-                test_auc, test_mean_log_loss = evaluate()
-            print("Test auc: {}; log_loss: {} ".format(test_auc, test_mean_log_loss))
-            best_auc = max(best_auc, test_auc)
-            logger.info(f"training step: {i * iteration_per_loop}, best auc: {best_auc}")
+                test_auc_income, test_auc_mat, test_mean_log_loss = evaluate()
+            print("Test auc income: {};Test auc mat: {} ;log_loss: {} ".format(test_auc_income, 
+                                                                               test_auc_mat,test_mean_log_loss))
+            best_auc_income = max(best_auc_income, test_auc_income)
+            best_auc_mat = max(best_auc_mat, test_auc_mat)
+            logger.info(f"training step: {i * iteration_per_loop}, 
+                        best auc income: {best_auc_income} , 
+                        best auc mat: {best_auc_mat}")
+
 
     sess.close()
 
-- 
Gitee


From c70d9eebb72a4f818b88d1ea19cb1ba9d172d197 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 00:09:26 +0800
Subject: [PATCH 291/302] =?UTF-8?q?bug=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/mmoe/model.py b/examples/mmoe/model.py
index 5b1917a3..cf8ca108 100644
--- a/examples/mmoe/model.py
+++ b/examples/mmoe/model.py
@@ -42,7 +42,7 @@ class MyModel:
         for i in range(0, self.expert_num):
             expert_linear = tf.layers.dense(input, units=self.expert_size, activation=None, name=f'expert_payer_{i}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
-                                            bias_initializer=tf.constant_initializer(values = 0.1))
+                                            bias_initializer=tf.constant_initializer(value=0.1))
             
             param_expert.append(expert_linear)
         return param_expert
@@ -53,7 +53,7 @@ class MyModel:
         for i in range(0, self.gate_num):
             gate_linear = tf.layers.dense(input, units=self.gate_size, activation=None, name=f'gate_payer_{i}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
-                                            bias_initializer=tf.constant_initializer(values = 0.1))
+                                            bias_initializer=tf.constant_initializer(value=0.1))
             
             param_gate.append(gate_linear)
         return param_gate
@@ -62,12 +62,12 @@ class MyModel:
     def tower_layer(self, input, layer_name):
         tower_linear = tf.layers.dense(input, units=self.tower_size, activation=None, name=f'tower_payer_{layer_name}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
-                                            bias_initializer=tf.constant_initializer(values = 0.1))
+                                            bias_initializer=tf.constant_initializer(value=0.1))
         
         tower_linear_out = tf.layers.dense(tower_linear, units=self.tower_size, activation=None, 
                                             name=f'tower_payer_out_{layer_name}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
-                                            bias_initializer=tf.constant_initializer(values=0.1))
+                                            bias_initializer=tf.constant_initializer(value=0.1))
         
         return tower_linear_out
         
-- 
Gitee


From 66a629d05eafaeadb807d25b077e41cf5936f1c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 14:28:16 +0800
Subject: [PATCH 292/302] =?UTF-8?q?=E6=97=A0=E7=94=A8=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E5=88=A0=E9=99=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/gradient_descent_w.py | 71 -----------------------------
 1 file changed, 71 deletions(-)
 delete mode 100644 examples/mmoe/gradient_descent_w.py

diff --git a/examples/mmoe/gradient_descent_w.py b/examples/mmoe/gradient_descent_w.py
deleted file mode 100644
index 53adb996..00000000
--- a/examples/mmoe/gradient_descent_w.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# coding=utf-8
-# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import defaultdict
-
-import tensorflow as tf
-from tensorflow.python.ops import math_ops
-from tensorflow.python.training import gradient_descent
-from mx_rec.optimizers.base import CustomizedOptimizer
-from mx_rec.util.log import logger
-from mx_rec.util.initialize import ConfigInitializer
-
-
-def create_hash_optimizer(learning_rate, weight_decay=0.0001, use_locking=False, name="GradientDescent"):
-    optimizer = CustomizedGradientDescentWithWeighDecay(learning_rate=learning_rate,
-                                                        weight_decay=weight_decay,
-                                                        use_locking=use_locking,
-                                                        name=name)
-    ConfigInitializer.get_instance().optimizer_config.optimizer_instance = optimizer
-    return optimizer
-
-
-class CustomizedGradientDescentWithWeighDecay(gradient_descent.GradientDescentOptimizer, CustomizedOptimizer):
-    name_counter = defaultdict(int)
-
-    def __init__(self, learning_rate, weight_decay, use_locking=False, name="GradientDescent"):
-        self.optimizer_type = "gradient_descent_with_weight_decay"
-        self.weight_decay = weight_decay
-        super(CustomizedGradientDescentWithWeighDecay, self)._get_name(name=name)
-        super(CustomizedGradientDescentWithWeighDecay, self).__init__(
-            learning_rate=learning_rate, use_locking=use_locking, name=self.unique_name
-        )
-        self._slot_num = 0
-        self._derivative = 1
-
-    def get_slot_init_values(self):
-        logger.info("no slot for gradient descent")
-        return []
-
-    def _apply_sparse_duplicate_indices(self, grad, var):
-        logger.debug(">>>> Enter _apply_sparse_duplicate_indices")
-        nd_indices = tf.expand_dims(grad.indices, 1)
-        logger.info(f"weigh_decay={self.weight_decay}")
-        if self.weight_decay is None:
-            nd_value = grad.values * math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
-        else:
-            nd_value = (grad.values + math_ops.cast(self.weight_decay, var.dtype.base_dtype) *
-                        tf.gather(var, grad.indices)) * math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
-        var_update_op = tf.scatter_nd_add(var, nd_indices, -nd_value, use_locking=self._use_locking)
-        return var_update_op
-
-    def _apply_dense(self, grad, var):
-        logger.debug(">>>> Enter _apply_dense")
-        raise NotImplementedError("You are using a wrong type of variable.")
-- 
Gitee


From 74f39df93b60f70f7ac236a4a236d351eb230c8e Mon Sep 17 00:00:00 2001
From: penghuiyang <1060916628@qq.com>
Date: Tue, 23 Jul 2024 17:06:43 +0800
Subject: [PATCH 293/302] =?UTF-8?q?=E3=80=90FEAT=E3=80=91=E7=BB=99FileWrit?=
 =?UTF-8?q?er=E6=B7=BB=E5=8A=A0patch=E9=98=B2=E6=AD=A2=E5=86=99summary?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=86=B2=E7=AA=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mx_rec/saver/patch.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mx_rec/saver/patch.py b/mx_rec/saver/patch.py
index d5071d5c..f57e8ce0 100644
--- a/mx_rec/saver/patch.py
+++ b/mx_rec/saver/patch.py
@@ -489,4 +489,8 @@ def _patch_for_summary_writer(func):
 
 
 def patch_for_summary_writer():
+    """
+    Patch for `tf.summary.FileWriter.__init__` method, add rankId to init param `filename_suffix`.
+    """
     FileWriter.__init__ = _patch_for_summary_writer(FileWriter.__init__)
+    logger.debug("Method `tf.summary.FileWriter.__init__` has been patched.")
-- 
Gitee


From aac7a3b3f4d613aea3c303d0266987e023daf62c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 17:20:36 +0800
Subject: [PATCH 294/302] =?UTF-8?q?=E5=90=8A=E8=B5=B7shell=E6=8F=90?=
 =?UTF-8?q?=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/run.sh | 99 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 examples/mmoe/run.sh

diff --git a/examples/mmoe/run.sh b/examples/mmoe/run.sh
new file mode 100644
index 00000000..6c142443
--- /dev/null
+++ b/examples/mmoe/run.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+cur_path=$(dirname "$(readlink -f "$0")")
+
+so_path=$1
+mx_rec_package_path=$2
+hccl_cfg_json=$3
+dlrm_criteo_data_path=$4
+ip=$5  # no ranktable时传入该参数
+
+interface="lo"
+num_server=1
+local_rank_size=8
+num_process=$((num_server * local_rank_size))
+export TRAIN_RANK_SIZE=$num_process
+
+################# 参数配置 ######################
+export USE_DYNAMIC=0            # 0：静态shape；1：动态shape
+export CACHE_MODE="HBM"         # HBM；DDR；SSD
+export USE_FAAE=0               # 0：关闭准入淘汰；1：开启准入淘汰
+export USE_DYNAMIC_EXPANSION=0  # 0：关闭动态扩容；1: 开启动态扩容
+export USE_MULTI_LOOKUP=0       # 0：一表一查；1：一表多查
+export USE_MODIFY_GRAPH=0       # 0：feature spec模式；1：自动改图模式
+################################################
+echo "CACHE_MODE:${CACHE_MODE}"
+
+export HCCL_CONNECT_TIMEOUT=1200
+export DLRM_CRITEO_DATA_PATH=${dlrm_criteo_data_path}
+export PYTHONPATH=${mx_rec_package_path}:${so_path}:$PYTHONPATH
+export LD_PRELOAD=/usr/lib64/libgomp.so.1
+export LD_LIBRARY_PATH=${so_path}:/usr/local/lib:$LD_LIBRARY_PATH
+export ASCEND_DEVICE_ID=0
+export RANK_ID_START=0
+export JOB_ID=10086
+export CUSTOMIZED_OPS_LIB_PATH=${so_path}/libcust_ops.so # Todo: please config
+export MXREC_LOG_LEVEL="INFO"
+export TF_CPP_MIN_LOG_LEVEL=3
+export ASCEND_GLOBAL_LOG_LEVEL=3
+#export USE_FAAE=1
+export ENABLE_FORCE_V2_CONTROL=1
+
+export PROFILING_OPTIONS='{"output":"/home/yz/profiling",
+                           "training_trace":"on",
+                           "task_trace":"on",
+                           "aicpu":"on",
+                           "fp_point":"",
+                           "bp_point":"",
+                           "aic_metrics":"PipeUtilization"}'
+
+RANK_ID_START=0
+
+export MXREC_MODE="ASC"
+echo "MXREC_MODE is $MXREC_MODE"
+export py=main_mxrec.py
+echo "py is $py"
+
+# 区分ranktable和no ranktable
+if [ -n "$ip" ]; then
+    # no ranktable分支
+    echo "Current is no ranktable solution."
+    echo "Input node ip: $ip, please make sure this ip is available."
+    export CM_CHIEF_IP=$ip  # 主节点ip
+    export CM_CHIEF_PORT=60001  # 主节点监听端口
+    export CM_CHIEF_DEVICE=0  # 主节点device id
+    export CM_WORKER_IP=$ip  # 当前节点ip
+    export CM_WORKER_SIZE=$num_process  # 参与集群训练的device数量
+    echo "CM_CHIEF_IP=$CM_CHIEF_IP"
+    echo "CM_CHIEF_PORT=$CM_CHIEF_PORT"
+    echo "CM_CHIEF_DEVICE=$CM_CHIEF_DEVICE"
+    echo "CM_WORKER_IP=$CM_WORKER_IP"
+    echo "CM_WORKER_SIZE=$CM_WORKER_SIZE"
+else
+    # ranktable分支
+    echo "Current is ranktable solution, hccl json file:${hccl_cfg_json}"
+    export RANK_SIZE=$num_process
+    echo "RANK_SIZE=${RANK_SIZE}, please make sure hccl configuration json file match this parameter"
+    export RANK_TABLE_FILE=${hccl_cfg_json}
+fi
+
+echo "use horovod to start tasks"
+# GLOG_stderrthreshold -2:TRACE -1:DEBUG 0:INFO 1:WARN 2.ERROR, 默认为INFO
+mpi_args='-x BIND_INFO="0:12 12:48 60:48" -x GLOG_stderrthreshold=2 -x GLOG_logtostderr=true -bind-to none -x NCCL_SOCKET_IFNAME=docker0 -mca btl_tcp_if_exclude docker0'
+
+horovodrun --network-interface ${interface} -np ${num_process} --mpi-args "${mpi_args}" --mpi -H localhost:${local_rank_size} \
+python3.7 ${py} 2>&1 | tee temp_${CACHE_MODE}_${num_process}p.log
-- 
Gitee


From 2bad2444eb05428de24da9a99e9f52496fcb4c67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 17:21:16 +0800
Subject: [PATCH 295/302] =?UTF-8?q?=E6=97=A0=E9=9C=80loss=5Fscale=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD=EF=BC=8C=E5=8E=BB=E9=99=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/delay_loss_scale.py | 64 -------------------------------
 1 file changed, 64 deletions(-)
 delete mode 100644 examples/mmoe/delay_loss_scale.py

diff --git a/examples/mmoe/delay_loss_scale.py b/examples/mmoe/delay_loss_scale.py
deleted file mode 100644
index f73baf68..00000000
--- a/examples/mmoe/delay_loss_scale.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# coding=utf-8
-# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-import tensorflow as tf
-from tensorflow.python.training import optimizer
-
-from config import Config
-
-
-class DenseLossScaleOptimizer:
-    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
-        if not isinstance(opt, optimizer.Optimizer):
-            raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
-        self._optimizer = opt
-        self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        _update_lr_loss_scale(self._optimizer, loss_scale)
-
-    def compute_gradients(self, loss, var_list=None):
-        return self._optimizer.compute_gradients(loss * self._loss_scale, var_list=var_list)
-
-    def apply_gradients(self, avg_grads):
-        return self._optimizer.apply_gradients(avg_grads)
-
-
-class SparseLossScaleOptimizer:
-    def __init__(self, opt: optimizer.Optimizer, loss_scale: int) -> None:
-        if not isinstance(opt, optimizer.Optimizer):
-            raise ValueError('"opt" must be an instance of Optimizer, but got: %s' % type(opt))
-        self._optimizer = opt
-        self._loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-        _update_lr_loss_scale(self._optimizer, loss_scale)
-
-    def compute_gradients(self, loss, var_list=None):
-        return tf.gradients(loss * self._loss_scale, var_list)
-
-    def apply_gradients(self, grads_and_vars):
-        return self._optimizer.apply_gradients(grads_and_vars)
-
-
-def _update_lr_loss_scale(opt, loss_scale):
-    if loss_scale <= 0:
-        raise RuntimeError("the loss_scale must be greater than zero.")
-    loss_scale = tf.convert_to_tensor(loss_scale, tf.float32)
-    if hasattr(opt, "_lr"):
-        # LazyAdam or Adam optimizer
-        opt._lr = opt._lr / loss_scale
-    elif hasattr(opt, "_learning_rate"):
-        # SGD optimizer
-        opt._learning_rate = opt._learning_rate / loss_scale
-    else:
-        raise RuntimeError("`opt` should have a `_learning_rate` or `_lr` named field.")
\ No newline at end of file
-- 
Gitee


From 6d08cf2ecb0290eafae0c4639c86f8eb85c43e47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 17:25:42 +0800
Subject: [PATCH 296/302] =?UTF-8?q?=E6=97=A0=E7=94=A8=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=E5=88=A0=E9=99=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/mean_auc.py | 40 ---------------------------------------
 1 file changed, 40 deletions(-)
 delete mode 100644 examples/mmoe/mean_auc.py

diff --git a/examples/mmoe/mean_auc.py b/examples/mmoe/mean_auc.py
deleted file mode 100644
index ff57df00..00000000
--- a/examples/mmoe/mean_auc.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# coding=utf-8
-# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-import os
-from glob import glob
-import numpy as np
-
-
-def split_auc(log_input):
-    with open(log_input, 'r') as log:
-        all_auc = []
-        for line in log.readlines():
-            if 'Test' in line:
-                all_auc.append(float(line.split(';')[0].split(':')[-1].strip()))
-    all_auc_len = len(all_auc)
-    all_auc_arr = np.array(all_auc)[:all_auc_len - all_auc_len % 8]
-    test_auc = np.mean(all_auc_arr.reshape(-1, 8), axis=-1)
-    return test_auc
-
-
-log_path_all = 'latest_*.log'
-log_path_list = glob(log_path_all)
-
-for log_path in log_path_list:
-    print(os.path.basename(log_path))
-    print(split_auc(log_path))
-    print('*'*20)
\ No newline at end of file
-- 
Gitee


From 9845d170e50cd3087b4869fe070308230967e364 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 17:27:02 +0800
Subject: [PATCH 297/302] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/config.py        | 38 ++++++++-----------
 examples/mmoe/main_mxrec.py    | 67 ++++++++++++++--------------------
 examples/mmoe/model.py         | 15 +++++---
 examples/mmoe/op_impl_mode.ini |  1 -
 examples/mmoe/optimizer.py     |  6 +--
 5 files changed, 54 insertions(+), 73 deletions(-)

diff --git a/examples/mmoe/config.py b/examples/mmoe/config.py
index b87bc11b..67ed7a20 100644
--- a/examples/mmoe/config.py
+++ b/examples/mmoe/config.py
@@ -32,16 +32,11 @@ class LearningRateScheduler:
     TF-based cond operations necessary for performance in graph mode.
     """
 
-    def __init__(self, base_lr_dense, base_lr_sparse, warmup_steps, decay_start_step, decay_steps):
-        self.warmup_steps = tf.constant(warmup_steps, dtype=tf.int32)
-        self.decay_start_step = tf.constant(decay_start_step, dtype=tf.int32)
-        self.decay_steps = tf.constant(decay_steps)
-        self.decay_end_step = decay_start_step + decay_steps  # 65041
-        self.poly_power = 2.0
+    def __init__(self, base_lr_dense, base_lr_sparse):
         self.base_lr_dense = base_lr_dense
         self.base_lr_sparse = base_lr_sparse
 
-    def calc(self, global_step):
+    def calc(self):
         # used for the constant stage
         lr_factor_constant = tf.cast(1.0, tf.float32)
         
@@ -51,7 +46,7 @@ class LearningRateScheduler:
 
 
 class Config:
-    def __init__(self, ):
+    def __init__(self, ) -> None:
         self.rank_id = int(os.getenv("OMPI_COMM_WORLD_RANK")) if os.getenv("OMPI_COMM_WORLD_RANK") else None
         tmp = os.getenv("TRAIN_RANK_SIZE")
         if tmp is None:
@@ -81,31 +76,30 @@ class Config:
         self.__set_emb_table_size()
 
         self.field_num = 26
-        self.send_count = 46000 // self.rank_size
+        self.send_count = self.get_send_count(self.rank_size)
 
         self.emb_dim = self.expert_num * self.expert_size + self.gate_num * self.expert_num
         self.hashtable_threshold = 1
 
         self.USE_PIPELINE_TEST = False
 
-        # 动态学习率
-        GLOBAL_BATCH_SIZE = 8192 * 8
-        LR_SCHEDULE_STEPS = [
-            int(2750 * 55296 / GLOBAL_BATCH_SIZE),
-            int(49315 * 55296 / GLOBAL_BATCH_SIZE),
-            int(27772 * 55296 / GLOBAL_BATCH_SIZE),
-        ]
         self.global_step = tf.Variable(0, trainable=False)
         _lr_scheduler = LearningRateScheduler(
             0.001,
-            0.001,
-            LR_SCHEDULE_STEPS[0],
-            LR_SCHEDULE_STEPS[1],
-            LR_SCHEDULE_STEPS[2],
+            0.001
         )
         self.learning_rate = _lr_scheduler.calc()
+        
+    def get_send_count(self, rank_size):
+        try:
+            return  46000 // rank_size
+        except ZeroDivisionError as exp:
+            raise ZeroDivisionError('Rank size can not be zero.') from exp
+        
+        
+    
 
-    def __set_emb_table_size(self):
+    def __set_emb_table_size(self) -> None:
         self.cache_mode = os.getenv("CACHE_MODE")
         if self.cache_mode is None:
             raise ValueError("please export CACHE_MODE environment variable, support:[HBM, DDR, SSD]")
@@ -123,7 +117,7 @@ class Config:
         else:
             raise ValueError(f"get CACHE_MODE:{self.cache_mode}, expect in [HBM, DDR, SSD]")
 
-    def get_emb_table_cfg(self):
+    def get_emb_table_cfg(self) -> None:
         if self.cache_mode == CacheModeEnum.HBM.value:
             return {"device_vocabulary_size": self.dev_vocab_size}
         elif self.cache_mode == CacheModeEnum.DDR.value:
diff --git a/examples/mmoe/main_mxrec.py b/examples/mmoe/main_mxrec.py
index e236cd2f..0eb127dd 100644
--- a/examples/mmoe/main_mxrec.py
+++ b/examples/mmoe/main_mxrec.py
@@ -24,10 +24,7 @@ from glob import glob
 import tensorflow as tf
 from sklearn.metrics import roc_auc_score
 import numpy as np
-
-from optimizer import get_dense_and_sparse_optimizer
-from config import sess_config, Config, SSD_DATA_PATH, CacheModeEnum
-from model import MyModel
+from npu_bridge.npu_init import *
 from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
@@ -40,7 +37,9 @@ from mx_rec.util.ops import import_host_pipeline_ops
 import mx_rec.util as mxrec_util
 from mx_rec.util.variable import get_dense_and_sparse_variable
 from mx_rec.util.log import logger
-from npu_bridge.npu_init import *
+from optimizer import get_dense_and_sparse_optimizer
+from config import sess_config, Config, SSD_DATA_PATH, CacheModeEnum
+from model import MyModel
 
 npu_plugin.set_device_sat_mode(0)
 
@@ -52,7 +51,6 @@ random.seed(shuffle_seed)
 
 def add_timestamp_func(batch):
     timestamp = import_host_pipeline_ops().return_timestamp(tf.cast(batch['label'], dtype=tf.int64))
-    # tf.constant(np.random.randint(1,1688109060,1)), tf.int64))
     batch["timestamp"] = timestamp
     return batch
 
@@ -144,7 +142,8 @@ def evaluate():
         eval_label = eval_model.get("label")
         sess.run([eval_iterator.initializer])
     else:
-        # 在sess run模式下，若还是使用原来batch中的label去sess run，则会出现getnext超时报错，需要使用新数据集中的batch
+        # In sess run mode, if the label from the original batch is still used for sess run, 
+        # a getnext timeout error will occur, and a new batch from the new dataset needs to be used
         eval_label = ConfigInitializer.get_instance().train_params_config.get_target_batch(False).get("label")
         sess.run([ConfigInitializer.get_instance().train_params_config.get_initializer(False)])
     log_loss_list = []
@@ -157,24 +156,26 @@ def evaluate():
     print("eval begin")
 
     while not finished:
+        
+        eval_current_steps += 1
+        eval_start = time.time()
         try:
-            eval_current_steps += 1
-            eval_start = time.time()
             eval_loss, pred, label = sess.run([eval_model.get("loss"), eval_model.get("pred"), eval_label])
-            eval_cost = time.time() - eval_start
-            qps_eval = (1 / eval_cost) * rank_size * cfg.batch_size
-            log_loss_list += list(eval_loss.reshape(-1))
-            pred_income = pred[0]
-            pred_mat = pred[1]
-            pred_income_list += list(pred_income.reshape(-1))
-            pred_mat_list += list(pred_mat.reshape(-1))
-            label_income_list += list(label[:, 0].reshape(-1))
-            label_mat_list += list(label[:, 1].reshape(-1))
-            print(f"eval current_steps: {eval_current_steps}, qps: {qps_eval}")
-            if eval_current_steps == eval_steps:
-                finished = True
         except tf.errors.OutOfRangeError:
+            break
+        eval_cost = time.time() - eval_start
+        qps_eval = (1 / eval_cost) * rank_size * cfg.batch_size
+        log_loss_list += list(eval_loss.reshape(-1))
+        pred_income = pred[0]
+        pred_mat = pred[1]
+        pred_income_list += list(pred_income.reshape(-1))
+        pred_mat_list += list(pred_mat.reshape(-1))
+        label_income_list += list(label[:, 0].reshape(-1))
+        label_mat_list += list(label[:, 1].reshape(-1))
+        print(f"eval current_steps: {eval_current_steps}, qps: {qps_eval}")
+        if eval_current_steps == eval_steps:
             finished = True
+        
     auc_income = roc_auc_score(label_income_list, pred_income_list)
     auc_mat = roc_auc_score(label_mat_list, pred_mat_list)
     mean_log_loss = np.mean(log_loss_list)
@@ -285,7 +286,6 @@ if __name__ == "__main__":
     warnings.filterwarnings("ignore")
     _clear_saved_model()
 
-    rank_id = int(os.getenv("RANK_ID")) if os.getenv("RANK_ID") else None
     rank_size = int(os.getenv("TRAIN_RANK_SIZE")) if os.getenv("TRAIN_RANK_SIZE") else None
     interval = int(os.getenv("INTERVAL")) if os.getenv("INTERVAL") else None
     train_steps = 1000
@@ -304,13 +304,8 @@ if __name__ == "__main__":
     logger.info(f"USE_DYNAMIC:{use_dynamic}")
     init(train_steps=train_steps, eval_steps=eval_steps,
          use_dynamic=use_dynamic, use_dynamic_expansion=use_dynamic_expansion)
-    IF_LOAD = False
+    
     rank_id = mxrec_util.communication.hccl_ops.get_rank_id()
-    filelist = glob(f"./saved-model/sparse-model-0")
-    if filelist:
-        IF_LOAD = True
-    ConfigInitializer.get_instance().if_load = IF_LOAD
-
     cfg = Config()
     feature_spec_list_train = None
     feature_spec_list_eval = None
@@ -385,14 +380,11 @@ if __name__ == "__main__":
             grads_and_vars = [(grad, variable) for grad, variable in zip(sparse_grads, sparse_variables)]
             train_ops.append(sparse_optimizer.apply_gradients(grads_and_vars))
 
-    # 动态学习率更新
-    train_ops.extend([cfg.global_step.assign(cfg.global_step + 1), cfg.learning_rate[0], cfg.learning_rate[1]])
 
     with tf.control_dependencies(train_ops):
         train_ops = tf.no_op()
         cfg.learning_rate = [cfg.learning_rate[0], cfg.learning_rate[1]]
 
-    saver = tf.train.Saver()
     if MODIFY_GRAPH_FLAG:
         modify_graph_and_start_emb_cache(dump_graph=True)
     else:
@@ -405,7 +397,6 @@ if __name__ == "__main__":
         if MODIFY_GRAPH_FLAG:  # 该场景添加hook处理校验问题
             hook_list.append(GraphModifierHook(modify_graph=False))
 
-    # with tf.compat.v1.Session(config=sess_config(dump_data=False)) as sess:
     if use_faae:
         sess = tf.compat.v1.train.MonitoredTrainingSession(
             hooks=hook_list,
@@ -427,13 +418,12 @@ if __name__ == "__main__":
     epoch = 0
     cost_sum = 0
     qps_sum = 0
-    best_income_auc = 0
+    best_auc_income= 0
     best_auc_mat = 0
     iteration_per_loop = 10
 
     train_ops = util.set_iteration_per_loop(sess, train_ops, 10)
 
-    # for i in range(1, TRAIN_STEPS):
     i = 0
     while True:
         i += 1
@@ -441,9 +431,8 @@ if __name__ == "__main__":
         start_time = time.time()
 
         try:
-            grad, loss = sess.run([train_ops, train_model.get("loss")])
-            lr = sess.run(cfg.learning_rate)
-            global_step = sess.run(cfg.global_step)
+            grad, loss, lr, global_step = sess.run([train_ops, train_model.get("loss"), 
+                                                    cfg.learning_rate, cfg.global_step])
         except tf.errors.OutOfRangeError:
             logger.info(f"Encounter the end of Sequence for training.")
             break
@@ -469,9 +458,7 @@ if __name__ == "__main__":
                                                                                test_auc_mat,test_mean_log_loss))
             best_auc_income = max(best_auc_income, test_auc_income)
             best_auc_mat = max(best_auc_mat, test_auc_mat)
-            logger.info(f"training step: {i * iteration_per_loop}, 
-                        best auc income: {best_auc_income} , 
-                        best auc mat: {best_auc_mat}")
+            logger.info(f"training step: {i * iteration_per_loop}, best auc income: {best_auc_income} , best auc mat: {best_auc_mat}")
 
 
     sess.close()
diff --git a/examples/mmoe/model.py b/examples/mmoe/model.py
index cf8ca108..224e8d6d 100644
--- a/examples/mmoe/model.py
+++ b/examples/mmoe/model.py
@@ -37,10 +37,10 @@ class MyModel:
         self.gate_num = gate_num
 
     
-    def expert_layer(self, input):
+    def expert_layer(self, _input):
         param_expert = []
         for i in range(0, self.expert_num):
-            expert_linear = tf.layers.dense(input, units=self.expert_size, activation=None, name=f'expert_payer_{i}', 
+            expert_linear = tf.layers.dense(_input, units=self.expert_size, activation=None, name=f'expert_layer_{i}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
                                             bias_initializer=tf.constant_initializer(value=0.1))
             
@@ -48,10 +48,10 @@ class MyModel:
         return param_expert
     
     
-    def gate_layer(self, input):
+    def gate_layer(self, _input):
         param_gate = []
         for i in range(0, self.gate_num):
-            gate_linear = tf.layers.dense(input, units=self.gate_size, activation=None, name=f'gate_payer_{i}', 
+            gate_linear = tf.layers.dense(_input, units=self.expert_num, activation=None, name=f'gate_layer_{i}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
                                             bias_initializer=tf.constant_initializer(value=0.1))
             
@@ -59,8 +59,8 @@ class MyModel:
         return param_gate
     
     
-    def tower_layer(self, input, layer_name):
-        tower_linear = tf.layers.dense(input, units=self.tower_size, activation=None, name=f'tower_payer_{layer_name}', 
+    def tower_layer(self, _input, layer_name):
+        tower_linear = tf.layers.dense(_input, units=self.tower_size, activation=None, name=f'tower_layer_{layer_name}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
                                             bias_initializer=tf.constant_initializer(value=0.1))
         
@@ -109,7 +109,10 @@ class MyModel:
 
                 cur_gate_expert = tf.multiply(x=expert_concat, y=cur_gate)
                 cur_gate_expert = tf.reduce_sum(cur_gate_expert, axis=1)
+                
                 out = self.tower_layer(cur_gate_expert, i)
+                out = tf.nn.softmax(out)
+                out = tf.clip_by_value(out, clip_value_min=1e-15, clip_value_max=1.0-1e-15)
                 output_layers.append(out)
                 out_pred.append(tf.nn.softmax(out[:, 1]))
                 _slice_num = slice_num_end
diff --git a/examples/mmoe/op_impl_mode.ini b/examples/mmoe/op_impl_mode.ini
index 579dea43..e69de29b 100644
--- a/examples/mmoe/op_impl_mode.ini
+++ b/examples/mmoe/op_impl_mode.ini
@@ -1 +0,0 @@
-ScatterNdAdd=support_out_of_bound_index
\ No newline at end of file
diff --git a/examples/mmoe/optimizer.py b/examples/mmoe/optimizer.py
index 2c7685bb..5469c705 100644
--- a/examples/mmoe/optimizer.py
+++ b/examples/mmoe/optimizer.py
@@ -15,12 +15,13 @@
 # ==============================================================================
 
 import tensorflow as tf
-from delay_loss_scale import DenseLossScaleOptimizer, SparseLossScaleOptimizer
+
 from mx_rec.util.initialize import ConfigInitializer
 from mx_rec.optimizers.lazy_adam import create_hash_optimizer
 from mx_rec.optimizers.lazy_adam_by_addr import create_hash_optimizer_by_address
 
 
+
 def get_dense_and_sparse_optimizer(cfg):
     dense_optimizer = tf.train.AdamOptimizer(learning_rate=cfg.learning_rate[0])
     use_dynamic_expansion = ConfigInitializer.get_instance().use_dynamic_expansion
@@ -28,8 +29,5 @@ def get_dense_and_sparse_optimizer(cfg):
         sparse_optimizer = create_hash_optimizer_by_address(learning_rate=cfg.learning_rate[1])
     else:
         sparse_optimizer = create_hash_optimizer(learning_rate=cfg.learning_rate[1])
-    loss_scale = 1
-    sparse_optimizer = SparseLossScaleOptimizer(sparse_optimizer, loss_scale)
-    dense_optimizer = DenseLossScaleOptimizer(dense_optimizer, loss_scale)
 
     return dense_optimizer, sparse_optimizer
-- 
Gitee


From ca2e82248c638e21066a3c6ae779d9409724d122 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 17:28:49 +0800
Subject: [PATCH 298/302] =?UTF-8?q?bug=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/mmoe/model.py b/examples/mmoe/model.py
index 224e8d6d..f18dbff0 100644
--- a/examples/mmoe/model.py
+++ b/examples/mmoe/model.py
@@ -60,11 +60,11 @@ class MyModel:
     
     
     def tower_layer(self, _input, layer_name):
-        tower_linear = tf.layers.dense(_input, units=self.tower_size, activation=None, name=f'tower_layer_{layer_name}', 
+        tower_linear = tf.layers.dense(_input, units=self.tower_size, activation='relu', name=f'tower_layer_{layer_name}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
                                             bias_initializer=tf.constant_initializer(value=0.1))
         
-        tower_linear_out = tf.layers.dense(tower_linear, units=self.tower_size, activation=None, 
+        tower_linear_out = tf.layers.dense(tower_linear, units=2, activation=None, 
                                             name=f'tower_payer_out_{layer_name}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
                                             bias_initializer=tf.constant_initializer(value=0.1))
-- 
Gitee


From 13f3618364bae56befe067d91b75603f3bae4624 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 19:29:53 +0800
Subject: [PATCH 299/302] codecheck

---
 examples/mmoe/config.py     |  8 ++++----
 examples/mmoe/main_mxrec.py | 12 +++++++-----
 examples/mmoe/model.py      |  5 +++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/examples/mmoe/config.py b/examples/mmoe/config.py
index 67ed7a20..b6a83582 100644
--- a/examples/mmoe/config.py
+++ b/examples/mmoe/config.py
@@ -90,14 +90,14 @@ class Config:
         )
         self.learning_rate = _lr_scheduler.calc()
         
+        
+    @staticmethod
     def get_send_count(self, rank_size):
         try:
-            return  46000 // rank_size
+            return 46000 // rank_size
         except ZeroDivisionError as exp:
             raise ZeroDivisionError('Rank size can not be zero.') from exp
-        
-        
-    
+         
 
     def __set_emb_table_size(self) -> None:
         self.cache_mode = os.getenv("CACHE_MODE")
diff --git a/examples/mmoe/main_mxrec.py b/examples/mmoe/main_mxrec.py
index 0eb127dd..d02566aa 100644
--- a/examples/mmoe/main_mxrec.py
+++ b/examples/mmoe/main_mxrec.py
@@ -25,6 +25,7 @@ import tensorflow as tf
 from sklearn.metrics import roc_auc_score
 import numpy as np
 from npu_bridge.npu_init import *
+from config import sess_config, Config, SSD_DATA_PATH, CacheModeEnum
 from mx_rec.constants.constants import ASCEND_SPARSE_LOOKUP_LOCAL_EMB, ASCEND_SPARSE_LOOKUP_ID_OFFSET
 from mx_rec.core.asc.helper import FeatureSpec, get_asc_insert_func
 from mx_rec.core.asc.manager import start_asc_pipeline
@@ -38,7 +39,7 @@ import mx_rec.util as mxrec_util
 from mx_rec.util.variable import get_dense_and_sparse_variable
 from mx_rec.util.log import logger
 from optimizer import get_dense_and_sparse_optimizer
-from config import sess_config, Config, SSD_DATA_PATH, CacheModeEnum
+
 from model import MyModel
 
 npu_plugin.set_device_sat_mode(0)
@@ -328,7 +329,7 @@ if __name__ == "__main__":
     optimizer_list = [get_dense_and_sparse_optimizer(cfg)]
 
     # note: variance_scaling_initializer only support HBM mode
-    emb_initializer = tf.constant_initializer(value = 0.1)
+    emb_initializer = tf.constant_initializer(value=0.1)
     sparse_hashtable = create_table(
         key_dtype=cfg.key_type,
         dim=tf.TensorShape([cfg.emb_dim]),
@@ -418,7 +419,7 @@ if __name__ == "__main__":
     epoch = 0
     cost_sum = 0
     qps_sum = 0
-    best_auc_income= 0
+    best_auc_income = 0
     best_auc_mat = 0
     iteration_per_loop = 10
 
@@ -455,10 +456,11 @@ if __name__ == "__main__":
             else:
                 test_auc_income, test_auc_mat, test_mean_log_loss = evaluate()
             print("Test auc income: {};Test auc mat: {} ;log_loss: {} ".format(test_auc_income, 
-                                                                               test_auc_mat,test_mean_log_loss))
+                                                                               test_auc_mat, test_mean_log_loss))
             best_auc_income = max(best_auc_income, test_auc_income)
             best_auc_mat = max(best_auc_mat, test_auc_mat)
-            logger.info(f"training step: {i * iteration_per_loop}, best auc income: {best_auc_income} , best auc mat: {best_auc_mat}")
+            logger.info(f"training step: {i * iteration_per_loop}, best auc income: "
+                        f"{best_auc_income} , best auc mat: {best_auc_mat}")
 
 
     sess.close()
diff --git a/examples/mmoe/model.py b/examples/mmoe/model.py
index f18dbff0..f8090373 100644
--- a/examples/mmoe/model.py
+++ b/examples/mmoe/model.py
@@ -60,7 +60,8 @@ class MyModel:
     
     
     def tower_layer(self, _input, layer_name):
-        tower_linear = tf.layers.dense(_input, units=self.tower_size, activation='relu', name=f'tower_layer_{layer_name}', 
+        tower_linear = tf.layers.dense(_input, units=self.tower_size, activation='relu', 
+                                            name=f'tower_layer_{layer_name}', 
                                             kernel_initializer=tf.constant_initializer(value=0.1), 
                                             bias_initializer=tf.constant_initializer(value=0.1))
         
@@ -112,7 +113,7 @@ class MyModel:
                 
                 out = self.tower_layer(cur_gate_expert, i)
                 out = tf.nn.softmax(out)
-                out = tf.clip_by_value(out, clip_value_min=1e-15, clip_value_max=1.0-1e-15)
+                out = tf.clip_by_value(out, clip_value_min=1e-15, clip_value_max=1.0 - 1e-15)
                 output_layers.append(out)
                 out_pred.append(tf.nn.softmax(out[:, 1]))
                 _slice_num = slice_num_end
-- 
Gitee


From e3ffcd9bffabc259852c0af58f43273272d655c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E6=82=94?= <1187940490@qq.com>
Date: Tue, 23 Jul 2024 21:59:32 +0800
Subject: [PATCH 300/302] =?UTF-8?q?bug=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/mmoe/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/mmoe/model.py b/examples/mmoe/model.py
index f8090373..8cbb7ba8 100644
--- a/examples/mmoe/model.py
+++ b/examples/mmoe/model.py
@@ -116,7 +116,7 @@ class MyModel:
                 out = tf.clip_by_value(out, clip_value_min=1e-15, clip_value_max=1.0 - 1e-15)
                 output_layers.append(out)
                 out_pred.append(tf.nn.softmax(out[:, 1]))
-                _slice_num = slice_num_end
+                _slice_num = slice_gate_end
             trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='mmoe')
 
             label_income = label[:, 0:1]
-- 
Gitee


From aa1c87126e4b84f6a58253932138d21860f1313b Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 14:12:51 +0000
Subject: [PATCH 301/302] =?UTF-8?q?!228=20=E3=80=90FEAT=E3=80=91`PerfRec`?=
 =?UTF-8?q?=E6=80=A7=E8=83=BD=E5=B7=A5=E5=85=B7=20*=20cleancode=20*=20clea?=
 =?UTF-8?q?ncode=20*=20cleancode=20*=20add=20shell=3DFalse=20*=20add=20REA?=
 =?UTF-8?q?DME=20*=20cleancode=20*=20fix=20bug=20and=20cleancode=20*=20add?=
 =?UTF-8?q?=20README.md=20*=20cleancode=20*=20cleancode=20*=20cleancode=20?=
 =?UTF-8?q?*=20add=20comment=20*=20add=20comment=20*=20write=20call=20stac?=
 =?UTF-8?q?k=20to=20file=20*=20write=20call=20stack=20to=20file=20*=20writ?=
 =?UTF-8?q?e=20call=20stack=20to=20file=20*=20npu=20optional=20*=20npu=20o?=
 =?UTF-8?q?ptional=20*=20fix=20bug=20*=20fix=20bug=20*=20add=20comments=20?=
 =?UTF-8?q?*=20add=20fusion=20tracing=20*=20add=20fusion=20tracing=20*=20a?=
 =?UTF-8?q?dd=20fusion=20tracing=20*=20add=20fusion=20tracing=20*=20add=20?=
 =?UTF-8?q?fusion=20tracing=20*=20add=20fusion=20tracing=20*=20add=20fusio?=
 =?UTF-8?q?n=20tracing=20*=20add=20fusion=20tracing=20*=20feat:=20`flamegr?=
 =?UTF-8?q?aph`=20wrapper=20*=20feat:=20`flamegraph`=20wrapper=20*=20feat:?=
 =?UTF-8?q?=20`flamegraph`=20wrapper=20*=20feat:=20`flamegraph`=20wrapper?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tools/perfrec-python/README.md         |  81 +++++
 tools/perfrec-python/config.toml       |  27 ++
 tools/perfrec-python/fusion_tracing.py | 425 +++++++++++++++++++++++++
 tools/perfrec-python/perf.py           | 251 +++++++++++++++
 4 files changed, 784 insertions(+)
 create mode 100644 tools/perfrec-python/README.md
 create mode 100644 tools/perfrec-python/config.toml
 create mode 100644 tools/perfrec-python/fusion_tracing.py
 create mode 100644 tools/perfrec-python/perf.py

diff --git a/tools/perfrec-python/README.md b/tools/perfrec-python/README.md
new file mode 100644
index 00000000..ddc7e114
--- /dev/null
+++ b/tools/perfrec-python/README.md
@@ -0,0 +1,81 @@
+## perf.py
+```
+usage: perf.py [-h] --perf_data PERF_DATA --flamegraph_path FLAMEGRAPH_PATH
+               [--perf_bin PERF_BIN] [--output_svg OUTPUT_SVG]
+
+Generate a Flamegraph from perf.data.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --perf_data PERF_DATA
+                        Path to the perf.data file.
+  --flamegraph_path FLAMEGRAPH_PATH
+                        Path to the Flamegraph Perl scripts directory.
+  --perf_bin PERF_BIN   Path to perf exacutable binary file. (default: perf)
+  --output_svg OUTPUT_SVG
+                        Path to the output SVG file. (default: flamegraph.svg)
+```
+#### 使用示例
+
+参考以下脚本使用`perf`采集数据。
+```bash
+pid=$(top -b -n 1 | head -n 8 | tail -n 1 | awk '{print $1}')
+if [ -z "$pid" ];then
+    echo "未获取到进程ID"
+    exit 1
+fi
+perf record -F 99 -p $pid -a -g -- sleep 60
+if [ $? -ne 0 ]; then
+    echo "perf record执行失败"
+    exit 1
+fi
+echo "perf.data 采集完成"
+```
+
+使用本工具生成火焰图和耗时函数分析。
+```bash
+python perf.py --perf_data perf.data --flamegraph_path /ws/FlameGraph 
+```
+#### 可选配置
+```toml
+# config.toml
+
+[perf]
+# Filter percentage of time cost
+threshold = 0.05
+# Ignore function list
+ignores = ["[libc.so.6]"]
+```
+
+## fusion_tracing.py
+```
+usage: fusion_tracing.py [-h] --debug_log DEBUG_LOG
+                         [--msprof_output MSPROF_OUTPUT]
+
+Generate CPU/NPU fusion tracing json.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --debug_log DEBUG_LOG
+                        MxRec DEBUG level log flie path.
+  --msprof_output MSPROF_OUTPUT
+                        msprof output path.
+```
+#### 使用示例
+```bash
+# only cpu
+python fusion_tracing.py --debug_log ../../example/demo/little_demo/temp.log
+# cpu + npu
+python fusion_tracing.py --debug_log ../../example/demo/little_demo/temp.log --msprof_output ../../example/demo/little_demo/msprof
+```
+#### 可选配置
+```toml
+# config.toml
+
+[mxrec]
+# Pipe name and time cost name
+key_process = ["getBatchData", "getAndProcess"]
+process_emb_info = ["getAndSendTensors"]
+lookup_swap_addr = ["lookupAddrs"]
+embedding_recv = ["EmbeddingRecv", "EmbeddingUpdate", "SendH2DEmb"]
+```
diff --git a/tools/perfrec-python/config.toml b/tools/perfrec-python/config.toml
new file mode 100644
index 00000000..8e15fd1d
--- /dev/null
+++ b/tools/perfrec-python/config.toml
@@ -0,0 +1,27 @@
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+[mxrec]
+# Pipe name and time cost name
+key_process = ["getBatchData", "getAndProcess"]
+process_emb_info = ["getAndSendTensors"]
+lookup_swap_addr = ["lookupAddrs"]
+embedding_recv = ["EmbeddingRecv", "EmbeddingUpdate", "SendH2DEmb"]
+
+[perf]
+# Filter percentage of time cost
+threshold = 0.05
+# Ignore function list
+ignores = ["[libc.so.6]"]
diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
new file mode 100644
index 00000000..49900004
--- /dev/null
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -0,0 +1,425 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import argparse
+import json
+import logging
+import os
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Dict, List, Tuple
+
+import pandas as pd
+import toml
+
+
+class MxRecConfig:
+    """
+    Configuration from `config.toml`.
+    """
+
+    def __init__(self, pipes: Dict[str, List[str]]):
+        self.pipes = pipes
+        self.func_to_pipe = defaultdict(str)
+        for pipe_name, event_list in self.pipes.items():
+            for event in event_list:
+                self.func_to_pipe[event] = pipe_name
+        self.pipe_names = [name for name in pipes.keys()]
+
+
+class MxRecEvent:
+    """
+    Class to represent an MxRec event.
+    """
+
+    def __init__(self, log_line: str, event_name: str, pipe_id: int):
+        timestamp_s = get_timestamp(log_line)
+        duration_ms = get_duration(log_line, event_name)
+        process_id = get_process_id(log_line)
+        self.timestamp_start_us = timestamp_s * 1e6 - duration_ms * 1e3
+        self.duration_us = duration_ms * 1e3
+        self.timestamp_end_us = timestamp_s * 1e6
+        self.process_id = process_id
+        self.name = event_name
+        self.pipe_id = pipe_id
+
+
+@dataclass
+class OpEvent:
+    """
+    Class to represent an Op event.
+    """
+
+    device_id: int
+    op_name: str
+    op_type: str
+    task_type: str
+    start_timestamp: float
+    duration: float
+
+
+def extract_mxrec_events(
+    log_path: str, config: MxRecConfig
+) -> Dict[int, Dict[str, List[MxRecEvent]]]:
+    """
+    Extracts MxRec events from the log file.
+
+    Args:
+        log_path (str): Path to the log file.
+        config (MxRecConfig): Dictionary mapping event names to pipe names and other configs.
+
+    Returns:
+        Dict[int, Dict[str, List[MxRecEvent]]]: Extracted MxRec events grouped by process ID and pipe.
+    """
+    events: Dict[int, Dict[str, List[MxRecEvent]]] = defaultdict(
+        lambda: defaultdict(list)
+    )
+    broken_lines = list()
+    event_names = config.func_to_pipe
+    pipe_names = config.pipe_names
+    pipe_ids = defaultdict(int)
+    for i, pipe in enumerate(pipe_names):
+        pipe_ids[pipe] = i
+    with open(log_path) as log:
+        for line in log:
+            for name, pipe in filter(lambda item: item[0] in line, event_names.items()):
+                try:
+                    event = MxRecEvent(line, name, pipe_ids[pipe])
+                    events[event.process_id][pipe].append(event)
+                except RuntimeError:
+                    broken_lines.append(line)
+    if broken_lines:
+        logging.warning("There are %d broken log lines", len(broken_lines))
+        for line in broken_lines:
+            logging.warning(line)
+    return events
+
+
+def extract_op_events(op_summary_path: str) -> List[OpEvent]:
+    """
+    Extracts Op events from the CSV file.
+
+    Args:
+        op_summary_path (str): Path to the op summary CSV file.
+
+    Returns:
+        List[OpEvent]: List of extracted Op events.
+    """
+    df = pd.read_csv(op_summary_path)
+    return [
+        OpEvent(
+            row["Device_id"],
+            row["Op Name"],
+            row["OP Type"],
+            row["Task Type"],
+            row["Task Start Time(us)"],
+            row["Task Duration(us)"],
+        )
+        for _, row in df.iterrows()
+    ]
+
+
+def get_timestamp(log_line: str) -> float:
+    """
+    Extracts the timestamp from a log line.
+
+    Args:
+        log_line (str): A line from the log file.
+
+    Returns:
+        float: The extracted timestamp as a float.
+    """
+    pattern = r"\[(\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}\.\d+)\]"
+    match = re.search(pattern, log_line)
+    if not match:
+        raise RuntimeError(f"there is no time in log: {log_line}")
+    date_time_str = match.group(1)
+    date_time_format = "%Y/%m/%d %H:%M:%S.%f"
+    # Parse the date-time string into a datetime object
+    date_time_obj = datetime.strptime(date_time_str, date_time_format)
+    # Convert the datetime object to a timestamp
+    return date_time_obj.timestamp()
+
+
+def get_duration(log_line: str, event_name: str) -> float:
+    """
+    Extracts the duration of an event from a log line.
+
+    Args:
+        log_line (str): A line from the log file.
+        event_name (str): The name of the event.
+
+    Returns:
+        int: The extracted duration in milliseconds.
+    """
+    pattern = event_name + r".*:\s*(\d+)"
+    match = re.search(pattern, log_line)
+    if not match:
+        raise RuntimeError(f"there is no event: {event_name}, log: {log_line}")
+    duration_ms = match.group(1)
+    return float(duration_ms)
+
+
+def get_process_id(log_line: str) -> int:
+    """
+    Extracts the process ID from a log line.
+
+    Args:
+        log_line (str): A line from the log file.
+
+    Returns:
+        int: The extracted process ID.
+    """
+    pattern = r"\[(\d+)\]"
+    match = re.search(pattern, log_line)
+    if not match:
+        raise RuntimeError(f"there is no process_id in log: {log_line}")
+    process_id = match.group(1)
+    return int(process_id)
+
+
+def read_mxrec_config() -> MxRecConfig:
+    """
+    Reads the MxRec configuration from a TOML file.
+
+    Returns:
+        MxRecCofig: Configuration class.
+    """
+    try:
+        config = toml.load("config.toml")
+        return MxRecConfig(config["mxrec"])
+    except toml.TomlDecodeError as e:
+        raise RuntimeError("can not load config.toml") from e
+
+
+@dataclass
+class TracingMetaData:
+    """
+    Class to represent metadata for tracing.
+    """
+
+    name: str
+    pid: int
+    tid: int
+    ph: str
+    args: Dict[str, Any]
+
+
+class TracingMxRecEvent:
+    """
+    Class to represent a traced MxRec event.
+    """
+
+    def __init__(self, mxrec_event: MxRecEvent):
+        self.name = mxrec_event.name
+        self.pid = mxrec_event.process_id
+        self.tid = get_fake_tid(self.pid, mxrec_event.pipe_id)
+        self.ts = mxrec_event.timestamp_start_us
+        self.dur = mxrec_event.duration_us
+        self.ph = "X"
+        self.args = {}
+
+
+class TracingOpEvent:
+    """
+    Class to represent a traced Op event.
+    """
+
+    def __init__(self, op_event: OpEvent, tid: int):
+        self.name = op_event.op_type
+        self.pid = get_op_pid(op_event)
+        self.tid = tid
+        self.ts = op_event.start_timestamp
+        self.dur = op_event.duration
+        self.ph = "X"
+        self.args = {"Op Name": op_event.op_name}
+
+
+def get_metadata(processes: List[int], config: MxRecConfig) -> List[TracingMetaData]:
+    """
+    Generates metadata for tracing processes and threads.
+
+    Args:
+        processes (List[int]): List of process IDs.
+        config (MxRecConfig): Configuration class.
+
+    Returns:
+        List[TracingMetaData]: List of tracing metadata.
+    """
+    metadata = list()
+    pipes = config.pipe_names
+    for i, pid in enumerate(processes):
+        metadata1 = TracingMetaData(
+            "process_name", pid, 0, "M", {"name": f"MxRec process {i}"}
+        )
+        metadata2 = TracingMetaData(
+            "process_sort_index", pid, 0, "M", {"sort_index": i}
+        )
+        metadata.append(metadata1)
+        metadata.append(metadata2)
+        for pipe_i, pipe in enumerate(pipes):
+            pipe_metadata1 = TracingMetaData(
+                "thread_name",
+                pid,
+                get_fake_tid(pid, pipe_i),
+                "M",
+                {"name": f"{pipe} {pid}"},
+            )
+            pipe_metadata2 = TracingMetaData(
+                "thread_sort_index",
+                pid,
+                get_fake_tid(pid, pipe_i),
+                "M",
+                {"sort_index": pipe_i},
+            )
+            metadata.append(pipe_metadata1)
+            metadata.append(pipe_metadata2)
+    return metadata
+
+
+def get_fake_tid(pid: int, pipe_id: int) -> int:
+    """
+    Generates a fake thread ID based on process ID and pipe ID.
+
+    Args:
+        pid (int): Process ID.
+        pipe_id (int): Pipe ID.
+
+    Returns:
+        int: Fake thread ID.
+    """
+    return pid * 10 + pipe_id
+
+
+def get_op_pid(op_event: OpEvent) -> int:
+    """
+    Gets the process ID for an Op event.
+
+    Args:
+        op_event (OpEvent): An Op event.
+
+    Returns:
+        int: Process ID.
+    """
+    # add 100 avoiding confict with cpu pid(rand_id)
+    return 100 + op_event.device_id
+
+
+def get_op_tracing(path: str) -> Tuple[List[TracingMetaData], List[TracingOpEvent]]:
+    """
+    Generates tracing data for Op events.
+
+    Args:
+        path (str): Path to the directory containing Op event summaries.
+
+    Returns:
+        Tuple[List[TracingMetaData], List[TracingOpEvent]]: Metadata and tracing events.
+    """
+    task_types = defaultdict(int)
+    pids = set()
+    tids = set()
+    metadata = list()
+    op_tracing = list()
+
+    def new_process_metadata(pid, device_id):
+        metadata1 = TracingMetaData(
+            "process_name", pid, 0, "M", {"name": f"NPU {device_id}"}
+        )
+        metadata2 = TracingMetaData(
+            "process_sort_index", pid, 0, "M", {"sort_index": pid}
+        )
+        return [metadata1, metadata2]
+
+    def new_thread_metadata(pid, tid, name):
+        metadata1 = TracingMetaData("thread_name", pid, tid, "M", {"name": f"{name}"})
+        metadata2 = TracingMetaData(
+            "thread_sort_index", pid, tid, "M", {"sort_index": tid}
+        )
+        return [metadata1, metadata2]
+
+    for root, _, files in os.walk(path):
+        for file in files:
+            if (
+                root.endswith("mindstudio_profiler_output")
+                and file.startswith("op_summary")
+                and file.endswith(".csv")
+            ):
+                file_path = os.path.join(root, file)
+                op_events = extract_op_events(file_path)
+                for event in op_events:
+                    process_id = get_op_pid(event)
+                    if process_id not in pids:
+                        pids.add(process_id)
+                        metadata.extend(
+                            new_process_metadata(process_id, event.device_id)
+                        )
+                    if event.task_type not in task_types:
+                        task_id = len(task_types)
+                        task_types[event.task_type] = task_id
+                    tid = get_fake_tid(process_id, task_types[event.task_type])
+                    if tid not in tids:
+                        tids.add(tid)
+                        metadata.extend(
+                            new_thread_metadata(process_id, tid, event.task_type)
+                        )
+                    op_tracing.append(TracingOpEvent(event, tid))
+    return metadata, op_tracing
+
+
+def main():
+    """
+    Main function to parse arguments and generate tracing JSON.
+    """
+    logging.basicConfig(level=logging.INFO)
+    parser = argparse.ArgumentParser(
+        description="Generate CPU/NPU fusion tracing json."
+    )
+    parser.add_argument(
+        "--debug_log", help="MxRec DEBUG level log file path.", required=True
+    )
+    parser.add_argument("--msprof_output", help="msprof output path.", required=False)
+    args = parser.parse_args()
+
+    log_path = args.debug_log
+    tracing = list()
+    try:
+        config = read_mxrec_config()
+        mxrec_events = extract_mxrec_events(log_path, config)
+        tracing.extend(get_metadata(list(mxrec_events.keys()), config))
+    except RuntimeError:
+        logging.error("Can not read config.toml, it will exit unsuccessfully.")
+        exit(1)
+
+    for process in mxrec_events.values():
+        for events in process.values():
+            tracing.extend([TracingMxRecEvent(event) for event in events])
+
+    msprof_output_path = args.msprof_output
+    if msprof_output_path:
+        op_metadata, op_tracing = get_op_tracing(msprof_output_path)
+        tracing.extend(op_metadata)
+        tracing.extend(op_tracing)
+
+    fd = os.open("mxrec_tracing.json", os.O_WRONLY | os.O_CREAT, 0o640)
+    with os.fdopen(fd, "w") as file:
+        json.dump(tracing, file, indent=4, default=lambda obj: obj.__dict__)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
new file mode 100644
index 00000000..34f688e9
--- /dev/null
+++ b/tools/perfrec-python/perf.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import argparse
+import logging
+import os
+import subprocess
+from collections import defaultdict
+from typing import List
+
+import toml
+from tabulate import tabulate
+
+
+def generate_flamegraph(
+    perf_bin: str, perf_data: str, output_svg: str, flamegraph_path: str
+) -> None:
+    """
+    Generate a flamegraph from perf data.
+
+    Args:
+        perf_data (str): Path to the perf.data file.
+        output_svg (str): Path to the output SVG file.
+        flamegraph_path (str): Path to the Flamegraph scripts directory.
+    """
+    # Ensure perf script is available
+    try:
+        subprocess.run([perf_bin, "--version"], shell=False, check=True)
+    except subprocess.CalledProcessError:
+        logging.error("perf is not installed or not in PATH.")
+        return
+
+    # Ensure Flamegraph scripts are available
+    stackcollapse_path = os.path.join(flamegraph_path, "stackcollapse-perf.pl")
+    flamegraph_script_path = os.path.join(flamegraph_path, "flamegraph.pl")
+
+    if not os.path.isfile(stackcollapse_path) or not os.path.isfile(
+        flamegraph_script_path
+    ):
+        logging.error(
+            "Flamegraph scripts not found in the provided directory %s.",
+            flamegraph_path,
+        )
+        return
+
+    # Generate the folded stack output
+    folded_output = perf_data + ".folded"
+    fd = os.open(folded_output, os.O_WRONLY | os.O_CREAT, 0o640)
+    with os.fdopen(fd, "w") as f:
+        script_output = subprocess.run(
+            [perf_bin, "script", "-i", perf_data],
+            shell=False,
+            check=True,
+            stdout=subprocess.PIPE,
+        )
+        subprocess.run(
+            [stackcollapse_path],
+            shell=False,
+            check=True,
+            input=script_output.stdout,
+            stdout=f,
+        )
+
+    # Generate the flamegraph
+    fd_svg = os.open(output_svg, os.O_WRONLY | os.O_CREAT, 0o640)
+    with os.fdopen(fd_svg, "w") as f:
+        subprocess.run(
+            [flamegraph_script_path, folded_output], shell=False, check=True, stdout=f
+        )
+
+    logging.info("Flamegraph generated at %s", output_svg)
+
+    # Analyze the folded stack output
+    analyze_folded_stack(folded_output)
+
+
+class CallStack:
+    def __init__(self):
+        self.count = 0
+        self.call_stacks = []
+
+    def add_call_stacks(self, count: int, call_stack: str):
+        self.count += count
+        self.call_stacks.append(call_stack)
+
+
+def analyze_folded_stack(folded_output: str) -> None:
+    """
+    Analyzes the folded stack output to find functions with significant sample counts.
+
+    Args:
+        folded_output (str): Path to the folded stack output file.
+    """
+
+    function_counts = defaultdict(CallStack)
+    total_count = 0
+
+    # Read the folded stack output
+    # Line of folded stack example:
+    # python3.7;[libascendalog.so];access;__sys_trace_return;prepare_creds 10101010
+    with open(folded_output, "r") as f:
+        for line in f:
+            parts = line.strip().rsplit(
+                " ", 1
+            )  # Use rsplit to handle function names with spaces
+            count = int(parts[-1])
+            call_stack_str = parts[0]
+            stack = parts[0].split(";")
+            function_counts[stack[-1]].add_call_stacks(count, call_stack_str)
+            total_count += count
+
+    config = read_config()
+
+    # Filter and display functions with more than 5% total count
+    threshold = total_count * config.threshold
+    results = [
+        (func, call_stack)
+        for func, call_stack in function_counts.items()
+        if call_stack.count >= threshold and func not in config.ignores
+    ]
+
+    # Sort results by count in descending order
+    results.sort(key=lambda x: x[1].count, reverse=True)
+
+    # Prepare data for tabulate
+    # Write call stacks to file
+    table_data = []
+    fd_call_stacks = os.open("call_stacks.txt", os.O_WRONLY | os.O_CREAT, 0o640)
+    with os.fdopen(fd_call_stacks, "w") as f:
+        for func, call_stack in results:
+            percentage = (
+                (call_stack.count / total_count) * 100 if total_count != 0 else 0
+            )
+            table_data.append(
+                [limit_line(func, 50), call_stack.count, f"{percentage:.2f}%"]
+            )
+            stacks = [stk + "\n" for stk in call_stack.call_stacks]
+            f.writelines(
+                [
+                    f"func_name: {func}\n",
+                    f"percentage: {percentage:.2f}%\n",
+                    "call_stacks:\n",
+                ]
+                + stacks
+                + ["\n\n"]
+            )
+
+    # Print the results using tabulate
+    logging.info("\nFunctions with more than 5% of total samples:")
+    headers = ["Function", "Count", "Percentage"]
+    logging.info("\n%s", tabulate(table_data, headers=headers, tablefmt="grid"))
+
+
+def limit_line(input_content: str, line_length: int) -> str:
+    """
+    Limits the length of a line to a specified number of characters, adding line breaks if necessary.
+
+    Args:
+        input_content (str): The input string.
+        line_length (int): The maximum line length.
+
+    Returns:
+        str: The formatted string with line breaks.
+    """
+    if line_length >= len(input_content):
+        return input_content
+    limited_str = ""
+    if line_length > 0:
+        limited_str = "\n".join(
+            input_content[i : i + line_length]
+            for i in range(len(input_content), line_length)
+        )
+    return limited_str
+
+
+class PerfConfig:
+    """
+    Configuration from `config.toml`.
+    """
+
+    def __init__(self, ignores: List[str], threshold: float = 0.05):
+        self.ignores = set(ignores)
+        self.threshold = threshold
+
+
+def read_config() -> PerfConfig:
+    """
+    Reads configs related to `perf` from the configuration file.
+
+    Returns:
+        PerfConfig: Configuration class.
+    """
+    try:
+        config = toml.load("config.toml")
+        perf_config = config["perf"]
+        return PerfConfig(perf_config["ignores"], perf_config["threshold"])
+    except toml.TomlDecodeError:
+        return PerfConfig(ignores=[])
+
+
+def main():
+    """
+    Main function to parse arguments and generate a flamegraph.
+    """
+    logging.basicConfig(level=logging.INFO)
+    parser = argparse.ArgumentParser(
+        description="Generate a Flamegraph from perf.data."
+    )
+    parser.add_argument(
+        "--perf_data", help="Path to the perf.data file.", required=True
+    )
+    parser.add_argument(
+        "--flamegraph_path",
+        help="Path to the Flamegraph Perl scripts directory.",
+        required=True,
+    )
+    parser.add_argument(
+        "--perf_bin",
+        help="Path to perf exacutable binary file. (default: perf)",
+        required=False,
+        default="perf",
+    )
+    parser.add_argument(
+        "--output_svg",
+        help="Path to the output SVG file. (default: flamegraph.svg)",
+        required=False,
+        default="flamegraph.svg",
+    )
+    args = parser.parse_args()
+
+    generate_flamegraph(
+        args.perf_bin, args.perf_data, args.output_svg, args.flamegraph_path
+    )
+
+
+if __name__ == "__main__":
+    main()
-- 
Gitee


From af1a01a3eb99e9a2d59f22e7e43e7cba414624de Mon Sep 17 00:00:00 2001
From: wuhongfa <1660398197@qq.com>
Date: Wed, 24 Jul 2024 05:51:12 +0000
Subject: [PATCH 302/302] =?UTF-8?q?=E3=80=90FEAT=E3=80=91=E6=96=B0?=
 =?UTF-8?q?=E5=A2=9Eattention=20grad=E8=9E=8D=E5=90=88=E7=AE=97=E5=AD=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../aclnn_attention_fusion_grad/inc/common.h  |  45 ++
 .../inc/op_runner.h                           | 182 +++++++
 .../inc/operator_desc.h                       |  57 +++
 .../aclnn_attention_fusion_grad/run.sh        |  91 ++++
 .../scripts/gen_data.py                       |  47 ++
 .../scripts/verify_result.py                  |  34 ++
 .../src/CMakeLists.txt                        |  68 +++
 .../src/common.cpp                            |  79 +++
 .../aclnn_attention_fusion_grad/src/main.cpp  | 182 +++++++
 .../src/op_runner.cpp                         | 464 ++++++++++++++++++
 .../src/operator_desc.cpp                     |  56 +++
 11 files changed, 1305 insertions(+)
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/common.h
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/op_runner.h
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/operator_desc.h
 create mode 100755 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/run.sh
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/scripts/gen_data.py
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/scripts/verify_result.py
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/CMakeLists.txt
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/common.cpp
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/main.cpp
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/op_runner.cpp
 create mode 100644 cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/operator_desc.cpp

diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/common.h b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/common.h
new file mode 100644
index 00000000..954f3f33
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/common.h
@@ -0,0 +1,45 @@
+/**
+* @file common.h
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <cstdio>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <iomanip>
+
+#include "acl/acl.h"
+
+#define SUCCESS 0
+#define FAILED 1
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize);
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
+
+#endif // COMMON_H
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/op_runner.h b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/op_runner.h
new file mode 100644
index 00000000..03d0aff4
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/op_runner.h
@@ -0,0 +1,182 @@
+/**
+* @file op_runner.h
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#ifndef OP_RUNNER_H
+#define OP_RUNNER_H
+
+#include "aclnn/acl_meta.h"
+#include "acl/acl.h"
+#include "common.h"
+#include "operator_desc.h"
+
+/**
+ * Op Runner
+ */
+class OpRunner {
+public:
+    /**
+     * @brief Constructor
+     * @param [in] opDesc: op description
+     */
+    explicit OpRunner(OperatorDesc *opDesc);
+
+    /**
+     * @brief Destructor
+     */
+    virtual ~OpRunner();
+
+    /**
+    * @brief Init op runner
+    */
+    bool Init();
+
+    /**
+     * @brief Get number of inputs
+     * @return number of inputs
+     */
+    const size_t NumInputs();
+
+    /**
+     * @brief Get number of outputs
+     * @return number of outputs
+     */
+    const size_t NumOutputs();
+
+    /**
+     * @brief Get input size by index
+     * @param [in] index: input index
+     * @return size of the input
+     */
+    const size_t GetInputSize(size_t index) const;
+    const size_t GetInputNumDims(size_t index) const;
+    aclDataType GetInputDataType(size_t index) const;
+    aclFormat GetInputFormat(size_t index) const;
+
+    /**
+     * @brief Get output size by index
+     * @param [in] index: output index
+     * @return size of the output
+     */
+    size_t GetOutputSize(size_t index) const;
+    const size_t GetOutputNumDims(size_t index) const;
+    aclDataType GetOutputDataType(size_t index) const;
+    aclFormat GetOutputFormat(size_t index) const;
+
+    /**
+     * @brief Get input element count by index
+     * @param i[in] ndex: input index
+     * @return element count of the input
+     */
+    size_t GetInputElementCount(size_t index) const;
+
+    /**
+     * @brief Get output element count by index
+     * @param [in] index: output index
+     * @return element count of the output
+     */
+    size_t GetOutputElementCount(size_t index) const;
+
+    /**
+     * @brief Get input shape by index
+     * @param [in] index: input index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetInputShape(size_t index) const;
+
+    /**
+     * @brief Get output shape by index
+     * @param [in] index: output index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetOutputShape(size_t index) const;
+
+    /**
+     * @brief Get input buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: input index
+     * @return host address of the input
+     */
+    template<typename T>
+    T *GetInputBuffer(size_t index)
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return nullptr;
+        }
+        return reinterpret_cast<T *>(hostInputs_[index]);
+    }
+
+    /**
+     * @brief Get output buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: output index
+     * @return host address of the output
+     */
+    template<typename T>
+    const T *GetOutputBuffer(size_t index)
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return nullptr;
+        }
+
+        return reinterpret_cast<T *>(hostOutputs_[index]);
+    }
+
+     /**
+      * @brief Print readable input by index
+      * @param [in] index: input index
+      * @param [in] elementsPerRow: number of elements per row
+      */
+    void PrintInput(size_t index, size_t elementsPerRow = 16);
+
+    /**
+      * @brief Print readable output by index
+      * @param [in] index: output index
+      * @param [in] elementsPerRow: number of elements per row
+      */
+    void PrintOutput(size_t index, size_t elementsPerRow = 16);
+
+    /**
+     * @brief Compile static op
+     * @return compile result
+     */
+    bool CompileStaticOp();
+
+    /**
+     * @brief Compile dynamic op
+     * @return compile result
+     */
+    bool CompileDynamicOp();
+
+    /**
+     * @brief Run op
+     * @return run result
+     */
+    bool RunOp();
+
+private:
+    size_t numInputs_;
+    size_t numOutputs_;
+
+    std::vector<aclDataBuffer *> inputBuffers_;
+    std::vector<aclDataBuffer *> outputBuffers_;
+
+    std::vector<void *> devInputs_;
+    std::vector<void *> devOutputs_;
+
+    std::vector<void *> hostInputs_;
+    std::vector<void *> hostOutputs_;
+
+    std::vector<aclTensor *> inputTensor_;
+    std::vector<aclTensor *> outputTensor_;
+    OperatorDesc *opDesc_;
+};
+
+#endif // OP_RUNNER_H
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/operator_desc.h b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/operator_desc.h
new file mode 100644
index 00000000..da719849
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/inc/operator_desc.h
@@ -0,0 +1,57 @@
+/**
+* @file operator_desc.h
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#ifndef OPERATOR_DESC_H
+#define OPERATOR_DESC_H
+
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+/**
+ * Op description
+ */
+struct OperatorDesc {
+    /**
+     * Constructor
+     */
+    explicit OperatorDesc();
+
+    /**
+     * Destructor
+     */
+    virtual ~OperatorDesc();
+
+    /**
+     * Add an input tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    /**
+     * Add an output tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    std::string opType;
+    std::vector<aclTensorDesc *> inputDesc;
+    std::vector<aclTensorDesc *> outputDesc;
+};
+
+#endif // OPERATOR_DESC_H
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/run.sh b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/run.sh
new file mode 100755
index 00000000..6793de82
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/run.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+export ASCEND_GLOBAL_LOG_LEVEL=0
+
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+cd $CURRENT_DIR
+
+SHORT=v:,
+LONG=dtype:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+while :
+do
+    case "$1" in
+        # float16, float, int32
+        (-v | --dtype)
+            DTYPE="$2"
+            shift 2;;
+        (--)
+            shift;
+            break;;
+        (*)
+            echo "[ERROR] Unexpected option: $1";
+            break;;
+    esac
+done
+
+if [ ! $ASCEND_HOME_DIR ]; then
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        export ASCEND_HOME_DIR=$HOME/Ascend/ascend-toolkit/latest
+    else
+        export ASCEND_HOME_DIR=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export DDK_PATH=$ASCEND_HOME_DIR
+arch=$(uname -m)
+export NPU_HOST_LIB=$ASCEND_HOME_DIR/${arch}-linux/lib64
+
+function main {
+    rm -rf $HOME/ascend/log/*
+    rm ./input/*.bin
+    rm ./output/*.bin
+
+    cd $CURRENT_DIR
+    python3 scripts/gen_data.py
+    if [ $? -ne 0 ]; then
+        echo "ERROR: generate input data failed!"
+        return 1
+    fi
+    echo "INFO: generate input data success!"
+
+    cd $CURRENT_DIR; rm -rf build; mkdir -p build; cd build
+    cmake ../src
+    if [ $? -ne 0 ]; then
+        echo "ERROR: cmake failed!"
+        return 1
+    fi
+    echo "INFO: cmake success!"
+    make
+    if [ $? -ne 0 ]; then
+        echo "ERROR: make failed!"
+        return 1
+    fi
+    echo "INFO: make success!"
+
+    cd $CURRENT_DIR/output
+    echo "INFO: execute op!"
+    ./execute_attention_fusion_grad_op
+
+    if [ $? -ne 0 ]; then
+        echo "ERROR: acl executable run failed! please check your project!"
+        return 1
+    fi
+    echo "INFO: acl executable run success!"
+    cd $CURRENT_DIR
+    ret=`python3 scripts/verify_result.py output/grad_query.bin output/grad_key.bin output/grad_value.bin  output/golden_grad_query.bin output/golden_grad_key.bin output/golden_grad_value.bin  `
+    echo $ret
+    if [ "x$ret" == "xtest pass" ]; then
+        echo ""
+        echo "#####################################"
+        echo "INFO: you have passed the Precision!"
+        echo "#####################################"
+        echo ""
+    fi
+}
+
+main
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/scripts/gen_data.py b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/scripts/gen_data.py
new file mode 100644
index 00000000..69077ee3
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/scripts/gen_data.py
@@ -0,0 +1,47 @@
+#!/usr/bin/python3
+# -*- coding:utf-8 -*-
+# Copyright 2024 Huawei Technologies Co., Ltd
+import numpy as np
+import os
+import math
+
+def softmax_grad(grad, src):
+    dst = grad * src
+    dst = np.sum(dst, axis=-1, keepdims=True)
+    dst = (grad - dst) * src
+    return dst
+
+def param_attn_layer_grad(dout, softmax_out, query, key, value):
+    # Dv and dS
+    dv = np.matmul(np.transpose(softmax_out, (0, 2, 1)), dout)
+    dS = np.matmul(dout, np.transpose(value, (0, 2, 1))) 
+    dS = softmax_grad(dS, softmax_out)/math.sqrt(query.shape[2])
+    # Atten 
+    dQ = np.matmul(dS, key)
+    dK = np.matmul(np.transpose(dS, (0, 2, 1)), query)
+    return dQ, dK, dv
+
+def gen_golden_data_simple():
+
+    dout = np.random.uniform(-1, 1,[1024, 1000, 80]).astype(np.float32)
+    softmax_out = np.random.uniform(-1, 1,[1024, 1000, 50]).astype(np.float32)
+    query = np.random.uniform(-1, 1,[1024, 1000, 80]).astype(np.float32)
+    key = np.random.uniform(-1, 1,[1024, 50, 80]).astype(np.float32)
+    value = np.random.uniform(-1, 1,[1024, 50, 80]).astype(np.float32)
+
+    grad_query, grad_key, grad_value = param_attn_layer_grad(dout, softmax_out, query, key, value)
+
+    os.system("mkdir -p input")
+    os.system("mkdir -p output")
+    dout.tofile("./input/dout.bin")
+    softmax_out.tofile("./input/softmax_out.bin")
+    query.tofile("./input/query.bin")
+    key.tofile("./input/key.bin")
+    value.tofile("./input/value.bin")
+    
+    grad_query.tofile("./output/golden_grad_query.bin")
+    grad_key.tofile("./output/golden_grad_key.bin")
+    grad_value.tofile("./output/golden_grad_value.bin")
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/scripts/verify_result.py b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/scripts/verify_result.py
new file mode 100644
index 00000000..7781d41f
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/scripts/verify_result.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python3
+# -*- coding:utf-8 -*-
+# Copyright 2024 Huawei Technologies Co., Ltd
+import os
+import sys
+import numpy as np
+
+loss = 1e-3
+minimum = 10e-10
+
+def verify_result(real_result, golden):
+    real_result = np.fromfile(real_result, dtype=np.float32)
+    golden = np.fromfile(golden, dtype=np.float32)
+    real_result = real_result[:golden.size]
+    print(real_result[:32])
+    print(golden[:32])
+    result = np.abs(real_result - golden)
+    deno = np.maximum(np.abs(real_result), np.abs(golden))
+    result_atol = np.less_equal(result, loss)
+    result_rtol = np.less_equal(result / np.add(deno, minimum), loss)
+    if not result_rtol.all() and not result_atol.all():
+        if np.sum(result_rtol == False) > real_result.size * loss and np.sum(result_atol == False) > real_result.size * loss:
+            print("[ERROR] result error")
+            return False
+    print("test pass")
+    return True
+
+if __name__ == '__main__':
+    print("=============================grad query============")
+    verify_result(sys.argv[1], sys.argv[4])
+    print("=============================grad key============")
+    verify_result(sys.argv[2], sys.argv[5])
+    print("=============================grad value============")
+    verify_result(sys.argv[3], sys.argv[6])
\ No newline at end of file
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/CMakeLists.txt b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/CMakeLists.txt
new file mode 100644
index 00000000..f1459958
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/CMakeLists.txt
@@ -0,0 +1,68 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_attention_fusion_grad)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output")
+
+set(INC_PATH $ENV{DDK_PATH})
+
+if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+else ()
+    message(STATUS "env INC_PATH: ${INC_PATH}")
+endif()
+
+set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/attention_fusion_grad/op_api")
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
+    set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+# Header path
+include_directories(
+    ${INC_PATH}/runtime/include
+    ${INC_PATH}/atc/include
+    ../inc
+    ${CUST_PKG_PATH}/include
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${LIB_PATH1}
+    ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_attention_fusion_grad_op
+    operator_desc.cpp
+    op_runner.cpp
+    main.cpp
+    op_runner.cpp
+    common.cpp
+)
+
+target_link_libraries(execute_attention_fusion_grad_op
+    ascendcl
+    cust_opapi
+    acl_op_compiler
+    nnopbase
+    stdc++
+)
+
+install(TARGETS execute_attention_fusion_grad_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/common.cpp b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/common.cpp
new file mode 100644
index 00000000..02eac9b4
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/common.cpp
@@ -0,0 +1,79 @@
+/**
+* @file common.cpp
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#include "common.h"
+
+#include <fstream>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+extern bool g_isDevice;
+
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file %s", filePath.c_str());
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    auto writeSize = write(fd, buffer, size);
+    (void) close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/main.cpp b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/main.cpp
new file mode 100644
index 00000000..e6aa8340
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/main.cpp
@@ -0,0 +1,182 @@
+/**
+* @file main.cpp
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#include <cstdint>
+#include <iostream>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "acl/acl.h"
+#include "op_runner.h"
+
+#include "common.h"
+
+bool g_isDevice = false;
+int deviceId = 15;
+
+OperatorDesc CreateOpDesc()
+{
+    // define operator
+    std::vector<int64_t> dout { 1024, 1000, 80 };
+    std::vector<int64_t> softmax_out { 1024, 1000, 50 };
+    std::vector<int64_t> query { 1024, 1000, 80};
+    std::vector<int64_t> key  { 1024, 50, 80  };
+    std::vector<int64_t> value { 1024, 50, 80  };
+
+    std::vector<int64_t> grad_query { 1024, 1000, 80};
+    std::vector<int64_t> grad_key { 1024, 50, 80 };
+    std::vector<int64_t> grad_value { 1024, 50, 80 };
+
+    aclFormat format = ACL_FORMAT_ND;
+    OperatorDesc opDesc;
+    opDesc.AddInputTensorDesc(ACL_FLOAT, dout.size(), dout.data(), format);
+    opDesc.AddInputTensorDesc(ACL_FLOAT, softmax_out.size(), softmax_out.data(), format);
+    opDesc.AddInputTensorDesc(ACL_FLOAT, query.size(), query.data(), format);
+    opDesc.AddInputTensorDesc(ACL_FLOAT, key.size(), key.data(), format);
+    opDesc.AddInputTensorDesc(ACL_FLOAT, value.size(), value.data(), format);
+
+    opDesc.AddOutputTensorDesc(ACL_FLOAT, grad_query.size(), grad_query.data(), format);
+    opDesc.AddOutputTensorDesc(ACL_FLOAT, grad_key.size(), grad_key.data(), format);
+    opDesc.AddOutputTensorDesc(ACL_FLOAT, grad_value.size(), grad_value.data(), format);
+    return opDesc;
+}
+
+bool SetInputData(OpRunner &runner)
+{
+    size_t fileSize = 0;
+    ReadFile("../input/dout.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0));
+    ReadFile("../input/softmax_out.bin", fileSize, runner.GetInputBuffer<void>(1), runner.GetInputSize(1));
+    ReadFile("../input/query.bin", fileSize, runner.GetInputBuffer<void>(2), runner.GetInputSize(2));
+    ReadFile("../input/key.bin", fileSize, runner.GetInputBuffer<void>(3), runner.GetInputSize(3));
+    ReadFile("../input/value.bin", fileSize, runner.GetInputBuffer<void>(4), runner.GetInputSize(4));
+    INFO_LOG("Set input success");
+    return true;
+}
+
+bool ProcessOutputData(OpRunner &runner)
+{
+    WriteFile("../output/grad_query.bin", runner.GetOutputBuffer<void>(0), runner.GetOutputSize(0));
+    WriteFile("../output/grad_key.bin", runner.GetOutputBuffer<void>(1), runner.GetOutputSize(1));
+    WriteFile("../output/grad_value.bin", runner.GetOutputBuffer<void>(2), runner.GetOutputSize(2));
+    INFO_LOG("Write output success");
+    return true;
+}
+
+void DestoryResource()
+{
+    bool flag = false;
+    if (aclrtResetDevice(deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Reset device %d failed", deviceId);
+        flag = true;
+    }
+    INFO_LOG("Reset Device success");
+    if (aclFinalize() != ACL_SUCCESS) {
+        ERROR_LOG("Finalize acl failed");
+        flag = true;
+    }
+    if (flag) {
+        ERROR_LOG("Destory resource failed");
+    } else {
+        INFO_LOG("Destory resource success");
+    }
+}
+
+bool InitResource()
+{
+    std::string output = "../output";
+    if (access(output.c_str(), 0) == -1) {
+        int ret = mkdir(output.c_str(), 0700);
+        if (ret == 0) {
+            INFO_LOG("Make output directory successfully");
+        }
+        else {
+            ERROR_LOG("Make output directory fail");
+            return false;
+        }
+    }
+
+    // acl.json is dump or profiling config file
+    if (aclInit(NULL) != ACL_SUCCESS) {
+        ERROR_LOG("acl init failed");
+        return false;
+    }
+
+    if (aclrtSetDevice(deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Set device failed. deviceId is %d", deviceId);
+        (void)aclFinalize();
+        return false;
+    }
+    INFO_LOG("Set device[%d] success", deviceId);
+
+    // runMode is ACL_HOST which represents app is running in host
+    // runMode is ACL_DEVICE which represents app is running in device
+    aclrtRunMode runMode;
+    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
+        ERROR_LOG("Get run mode failed");
+        DestoryResource();
+        return false;
+    }
+    g_isDevice = (runMode == ACL_DEVICE);
+    INFO_LOG("Get RunMode[%d] success", runMode);
+
+    return true;
+}
+
+bool RunOp()
+{
+    // create op desc
+    OperatorDesc opDesc = CreateOpDesc();
+
+    // create Runner
+    OpRunner opRunner(&opDesc);
+    if (!opRunner.Init()) {
+        ERROR_LOG("Init OpRunner failed");
+        return false;
+    }
+
+    // Load inputs
+    if (!SetInputData(opRunner)) {
+        ERROR_LOG("Set input data failed");
+        return false;
+    }
+
+    // Run op
+    if (!opRunner.RunOp()) {
+        ERROR_LOG("Run op failed");
+        return false;
+    }
+
+    // process output data
+    if (!ProcessOutputData(opRunner)) {
+        ERROR_LOG("Process output data failed");
+        return false;
+    }
+
+    INFO_LOG("Run op success");
+    return true;
+}
+
+int main(int argc, char **argv)
+{
+    if (!InitResource()) {
+        ERROR_LOG("Init resource failed");
+        return FAILED;
+    }
+    INFO_LOG("Init resource success");
+
+    if (!RunOp()) {
+        DestoryResource();
+        return FAILED;
+    }
+
+    DestoryResource();
+
+    return SUCCESS;
+}
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/op_runner.cpp b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/op_runner.cpp
new file mode 100644
index 00000000..4df5eea5
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/op_runner.cpp
@@ -0,0 +1,464 @@
+/**
+* @file op_runner.cpp
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#include "op_runner.h"
+#include "aclnn_attention_fusion_grad.h"
+#include <limits>
+#include <cassert>
+#include <chrono>
+#include "acl/acl_op_compiler.h"
+#include "common.h"
+
+using namespace std;
+
+extern bool g_isDevice;
+
+OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc)
+{
+    numInputs_ = opDesc->inputDesc.size();
+    numOutputs_ = opDesc->outputDesc.size();
+}
+
+OpRunner::~OpRunner()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        (void)aclDestroyTensor(inputTensor_[i]);
+        (void)aclDestroyDataBuffer(inputBuffers_[i]);
+        (void)aclrtFree(devInputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostInputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostInputs_[i]);
+        }
+    }
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        (void)aclDestroyTensor(outputTensor_[i]);
+        (void)aclDestroyDataBuffer(outputBuffers_[i]);
+        (void)aclrtFree(devOutputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostOutputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostOutputs_[i]);
+        }
+    }
+}
+
+bool OpRunner::Init()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        void *devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+            return false;
+        }
+        devInputs_.emplace_back(devMem);
+        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void *hostInput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostInput == nullptr) {
+            ERROR_LOG("Malloc memory for input[%zu] failed", i);
+            return false;
+        }
+        hostInputs_.emplace_back(hostInput);
+
+        aclTensor *inputTensor = aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i),
+            nullptr, 0, GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]);
+        if (inputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for input[%zu] failed", i);
+            return false;
+        }
+        inputTensor_.emplace_back(inputTensor);
+    }
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        void *devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+            return false;
+        }
+        devOutputs_.emplace_back(devMem);
+        outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void *hostOutput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostOutput == nullptr) {
+            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+            return false;
+        }
+        hostOutputs_.emplace_back(hostOutput);
+
+        aclTensor *outputTensor = aclCreateTensor(GetOutputShape(i).data(), GetOutputNumDims(i), GetOutputDataType(i),
+            nullptr, 0, GetOutputFormat(i), GetOutputShape(i).data(), GetOutputNumDims(i), devOutputs_[i]);
+        if (outputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for output[%zu] failed", i);
+            return false;
+        }
+        outputTensor_.emplace_back(outputTensor);
+    }
+
+    return true;
+}
+
+const size_t OpRunner::NumInputs()
+{
+    return numInputs_;
+}
+
+const size_t OpRunner::NumOutputs()
+{
+    return numOutputs_;
+}
+
+const size_t OpRunner::GetInputSize(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
+}
+
+const size_t OpRunner::GetInputNumDims(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
+}
+
+aclDataType OpRunner::GetInputDataType(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_DT_UNDEFINED;
+    }
+
+    return aclGetTensorDescType(opDesc_->inputDesc[index]);
+}
+
+aclFormat OpRunner::GetInputFormat(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+
+    return ret;
+}
+
+size_t OpRunner::GetOutputSize(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
+}
+
+const size_t OpRunner::GetOutputNumDims(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
+}
+
+aclDataType OpRunner::GetOutputDataType(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_DT_UNDEFINED;
+    }
+
+    return aclGetTensorDescType(opDesc_->outputDesc[index]);
+}
+
+
+aclFormat OpRunner::GetOutputFormat(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+    return ret;
+}
+
+size_t OpRunner::GetInputElementCount(size_t index) const
+{
+    if (index >= opDesc_->inputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
+}
+
+size_t OpRunner::GetOutputElementCount(size_t index) const
+{
+    if (index >= opDesc_->outputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
+}
+
+bool OpRunner::RunOp()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+            ERROR_LOG("Copy input[%zu] failed", i);
+            return false;
+        }
+        INFO_LOG("Copy input[%zu] success", i);
+    }
+
+    aclrtStream stream = nullptr;
+    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+        ERROR_LOG("Create stream failed");
+        return false;
+    }
+    INFO_LOG("Create stream success");
+
+    size_t workspaceSize = 0;
+	aclOpExecutor *handle = nullptr;
+	auto ret = aclnnAttentionFusionGradGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3], inputTensor_[4], outputTensor_[0], outputTensor_[1], outputTensor_[2], 
+                                              &workspaceSize, &handle);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+	INFO_LOG("Execute aclnnAttentionFusionGradGetWorkspaceSize success, workspace size %lu", workspaceSize);
+    
+    void *workspace = nullptr;
+    if (workspaceSize != 0) {
+        if (aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory failed");
+        }
+    }
+
+    ret = aclnnAttentionFusionGrad(workspace, workspaceSize, handle, stream);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+	INFO_LOG("Execute aclnnAttentionFusionGrad success");
+
+    ret = aclrtSynchronizeStreamWithTimeout(stream, 5000);
+    if (ret != SUCCESS) {
+        ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+        (void)aclrtDestroyStream(stream);
+        return false;
+    }
+    INFO_LOG("Synchronize stream success");
+
+    auto beforeTime = std::chrono::steady_clock::now();
+    for (int i = 0; i<100; i++) {
+        ret = aclnnAttentionFusionGradGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3], inputTensor_[4], outputTensor_[0], outputTensor_[1], outputTensor_[2], 
+                                              &workspaceSize, &handle);
+        ret = aclnnAttentionFusionGrad(workspace, workspaceSize, handle, stream);
+    }
+    ret = aclrtSynchronizeStreamWithTimeout(stream, 5000);
+    auto afterTime = std::chrono::steady_clock::now();
+    double duration_microsecond = std::chrono::duration<double, std::micro>(afterTime - beforeTime).count();
+	std::cout << "time cost " << duration_microsecond/100 << " us" << std::endl;
+    
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) {
+            INFO_LOG("Copy output[%zu] success", i);
+            (void)aclrtDestroyStream(stream);
+            return false;
+        }
+        INFO_LOG("Copy output[%zu] success", i);
+    }
+
+    (void)aclrtDestroyStream(stream);
+    return true;
+}
+
+
+template<typename T>
+void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case ACL_BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT8:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT8:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT16:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT16:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT32:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT32:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT64:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT64:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT16:
+            DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case ACL_DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+}
+
+void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
+        return;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
+
+void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
diff --git a/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/operator_desc.cpp b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/operator_desc.cpp
new file mode 100644
index 00000000..1928103c
--- /dev/null
+++ b/cust_op/attention_fusion_grad/aclnn_attention_fusion_grad/src/operator_desc.cpp
@@ -0,0 +1,56 @@
+/**
+* @file operator_desc.cpp
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#include "common.h"
+#include "operator_desc.h"
+
+using namespace std;
+
+OperatorDesc::OperatorDesc() {}
+
+OperatorDesc::~OperatorDesc()
+{
+    for (auto *desc : inputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+
+    for (auto *desc : outputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+
+}
+
+OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType,
+                                               int numDims,
+                                               const int64_t *dims,
+                                               aclFormat format)
+{
+    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+    inputDesc.emplace_back(desc);
+    return *this;
+}
+
+OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType,
+                                                int numDims,
+                                                const int64_t *dims,
+                                                aclFormat format)
+{
+    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+
+    outputDesc.emplace_back(desc);
+    return *this;
+}
-- 
Gitee